fixing typos
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58 #include <vnet/interface_output.h>
59
60 /** @brief IPv4 lookup node.
61     @node ip4-lookup
62
63     This is the main IPv4 lookup dispatch node.
64
65     @param vm vlib_main_t corresponding to the current thread
66     @param node vlib_node_runtime_t
67     @param frame vlib_frame_t whose contents should be dispatched
68
69     @par Graph mechanics: buffer metadata, next index usage
70
71     @em Uses:
72     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
73         - Indicates the @c sw_if_index value of the interface that the
74           packet was received on.
75     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
76         - When the value is @c ~0 then the node performs a longest prefix
77           match (LPM) for the packet destination address in the FIB attached
78           to the receive interface.
79         - Otherwise perform LPM for the packet destination address in the
80           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
81           value (0, 1, ...) and not a VRF id.
82
83     @em Sets:
84     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
85         - The lookup result adjacency index.
86
87     <em>Next Index:</em>
88     - Dispatches the packet to the node index found in
89       ip_adjacency_t @c adj->lookup_next_index
90       (where @c adj is the lookup result adjacency).
91 */
92 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
93                                 vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame,
96                             /* lookup_for_responses_to_locally_received_packets */
97                             0);
98
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
115                                       vlib_node_runtime_t * node,
116                                       vlib_frame_t * frame)
117 {
118   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
119   u32 n_left_from, n_left_to_next, *from, *to_next;
120   ip_lookup_next_t next;
121   u32 thread_index = vm->thread_index;
122
123   from = vlib_frame_vector_args (frame);
124   n_left_from = frame->n_vectors;
125   next = node->cached_next_index;
126
127   while (n_left_from > 0)
128     {
129       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
130
131
132       while (n_left_from >= 4 && n_left_to_next >= 2)
133         {
134           ip_lookup_next_t next0, next1;
135           const load_balance_t *lb0, *lb1;
136           vlib_buffer_t *p0, *p1;
137           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
138           const ip4_header_t *ip0, *ip1;
139           const dpo_id_t *dpo0, *dpo1;
140
141           /* Prefetch next iteration. */
142           {
143             vlib_buffer_t *p2, *p3;
144
145             p2 = vlib_get_buffer (vm, from[2]);
146             p3 = vlib_get_buffer (vm, from[3]);
147
148             vlib_prefetch_buffer_header (p2, STORE);
149             vlib_prefetch_buffer_header (p3, STORE);
150
151             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
152             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
153           }
154
155           pi0 = to_next[0] = from[0];
156           pi1 = to_next[1] = from[1];
157
158           from += 2;
159           n_left_from -= 2;
160           to_next += 2;
161           n_left_to_next -= 2;
162
163           p0 = vlib_get_buffer (vm, pi0);
164           p1 = vlib_get_buffer (vm, pi1);
165
166           ip0 = vlib_buffer_get_current (p0);
167           ip1 = vlib_buffer_get_current (p1);
168           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
169           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
170
171           lb0 = load_balance_get (lbi0);
172           lb1 = load_balance_get (lbi1);
173
174           /*
175            * this node is for via FIBs we can re-use the hash value from the
176            * to node if present.
177            * We don't want to use the same hash value at each level in the recursion
178            * graph as that would lead to polarisation
179            */
180           hc0 = hc1 = 0;
181
182           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
183             {
184               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
185                 {
186                   hc0 = vnet_buffer (p0)->ip.flow_hash =
187                     vnet_buffer (p0)->ip.flow_hash >> 1;
188                 }
189               else
190                 {
191                   hc0 = vnet_buffer (p0)->ip.flow_hash =
192                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
193                 }
194               dpo0 = load_balance_get_fwd_bucket
195                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
196             }
197           else
198             {
199               dpo0 = load_balance_get_bucket_i (lb0, 0);
200             }
201           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
202             {
203               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
204                 {
205                   hc1 = vnet_buffer (p1)->ip.flow_hash =
206                     vnet_buffer (p1)->ip.flow_hash >> 1;
207                 }
208               else
209                 {
210                   hc1 = vnet_buffer (p1)->ip.flow_hash =
211                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
212                 }
213               dpo1 = load_balance_get_fwd_bucket
214                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
215             }
216           else
217             {
218               dpo1 = load_balance_get_bucket_i (lb1, 0);
219             }
220
221           next0 = dpo0->dpoi_next_node;
222           next1 = dpo1->dpoi_next_node;
223
224           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
225           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
226
227           vlib_increment_combined_counter
228             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
229           vlib_increment_combined_counter
230             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
231
232           vlib_validate_buffer_enqueue_x2 (vm, node, next,
233                                            to_next, n_left_to_next,
234                                            pi0, pi1, next0, next1);
235         }
236
237       while (n_left_from > 0 && n_left_to_next > 0)
238         {
239           ip_lookup_next_t next0;
240           const load_balance_t *lb0;
241           vlib_buffer_t *p0;
242           u32 pi0, lbi0, hc0;
243           const ip4_header_t *ip0;
244           const dpo_id_t *dpo0;
245
246           pi0 = from[0];
247           to_next[0] = pi0;
248           from += 1;
249           to_next += 1;
250           n_left_to_next -= 1;
251           n_left_from -= 1;
252
253           p0 = vlib_get_buffer (vm, pi0);
254
255           ip0 = vlib_buffer_get_current (p0);
256           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
257
258           lb0 = load_balance_get (lbi0);
259
260           hc0 = 0;
261           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
262             {
263               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
264                 {
265                   hc0 = vnet_buffer (p0)->ip.flow_hash =
266                     vnet_buffer (p0)->ip.flow_hash >> 1;
267                 }
268               else
269                 {
270                   hc0 = vnet_buffer (p0)->ip.flow_hash =
271                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
272                 }
273               dpo0 = load_balance_get_fwd_bucket
274                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
275             }
276           else
277             {
278               dpo0 = load_balance_get_bucket_i (lb0, 0);
279             }
280
281           next0 = dpo0->dpoi_next_node;
282           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
283
284           vlib_increment_combined_counter
285             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
286
287           vlib_validate_buffer_enqueue_x1 (vm, node, next,
288                                            to_next, n_left_to_next,
289                                            pi0, next0);
290         }
291
292       vlib_put_next_frame (vm, node, next, n_left_to_next);
293     }
294
295   if (node->flags & VLIB_NODE_FLAG_TRACE)
296     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
297
298   return frame->n_vectors;
299 }
300
301 /* *INDENT-OFF* */
302 VLIB_REGISTER_NODE (ip4_load_balance_node) =
303 {
304   .name = "ip4-load-balance",
305   .vector_size = sizeof (u32),
306   .sibling_of = "ip4-lookup",
307   .format_trace = format_ip4_lookup_trace,
308 };
309 /* *INDENT-ON* */
310
311 #ifndef CLIB_MARCH_VARIANT
312 /* get first interface address */
313 ip4_address_t *
314 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
315                              ip_interface_address_t ** result_ia)
316 {
317   ip_lookup_main_t *lm = &im->lookup_main;
318   ip_interface_address_t *ia = 0;
319   ip4_address_t *result = 0;
320
321   /* *INDENT-OFF* */
322   foreach_ip_interface_address
323     (lm, ia, sw_if_index,
324      1 /* honor unnumbered */ ,
325      ({
326        ip4_address_t * a =
327          ip_interface_address_get_address (lm, ia);
328        result = a;
329        break;
330      }));
331   /* *INDENT-OFF* */
332   if (result_ia)
333     *result_ia = result ? ia : 0;
334   return result;
335 }
336
337 static void
338 ip4_add_subnet_bcast_route (u32 fib_index,
339                             fib_prefix_t *pfx,
340                             u32 sw_if_index)
341 {
342   vnet_sw_interface_flags_t iflags;
343
344   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
345
346   fib_table_entry_special_remove(fib_index,
347                                  pfx,
348                                  FIB_SOURCE_INTERFACE);
349
350   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
351     {
352       fib_table_entry_update_one_path (fib_index, pfx,
353                                        FIB_SOURCE_INTERFACE,
354                                        FIB_ENTRY_FLAG_NONE,
355                                        DPO_PROTO_IP4,
356                                        /* No next-hop address */
357                                        &ADJ_BCAST_ADDR,
358                                        sw_if_index,
359                                        // invalid FIB index
360                                        ~0,
361                                        1,
362                                        // no out-label stack
363                                        NULL,
364                                        FIB_ROUTE_PATH_FLAG_NONE);
365     }
366   else
367     {
368         fib_table_entry_special_add(fib_index,
369                                     pfx,
370                                     FIB_SOURCE_INTERFACE,
371                                     (FIB_ENTRY_FLAG_DROP |
372                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
373     }
374 }
375
376 static void
377 ip4_add_interface_routes (u32 sw_if_index,
378                           ip4_main_t * im, u32 fib_index,
379                           ip_interface_address_t * a)
380 {
381   ip_lookup_main_t *lm = &im->lookup_main;
382   ip4_address_t *address = ip_interface_address_get_address (lm, a);
383   fib_prefix_t pfx = {
384     .fp_len = a->address_length,
385     .fp_proto = FIB_PROTOCOL_IP4,
386     .fp_addr.ip4 = *address,
387   };
388
389   if (pfx.fp_len <= 30)
390     {
391       /* a /30 or shorter - add a glean for the network address */
392       fib_table_entry_update_one_path (fib_index, &pfx,
393                                        FIB_SOURCE_INTERFACE,
394                                        (FIB_ENTRY_FLAG_CONNECTED |
395                                         FIB_ENTRY_FLAG_ATTACHED),
396                                        DPO_PROTO_IP4,
397                                        /* No next-hop address */
398                                        NULL,
399                                        sw_if_index,
400                                        // invalid FIB index
401                                        ~0,
402                                        1,
403                                        // no out-label stack
404                                        NULL,
405                                        FIB_ROUTE_PATH_FLAG_NONE);
406
407       /* Add the two broadcast addresses as drop */
408       fib_prefix_t net_pfx = {
409         .fp_len = 32,
410         .fp_proto = FIB_PROTOCOL_IP4,
411         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
412       };
413       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
414         fib_table_entry_special_add(fib_index,
415                                     &net_pfx,
416                                     FIB_SOURCE_INTERFACE,
417                                     (FIB_ENTRY_FLAG_DROP |
418                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
419       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
420       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
421         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
422     }
423   else if (pfx.fp_len == 31)
424     {
425       u32 mask = clib_host_to_net_u32(1);
426       fib_prefix_t net_pfx = pfx;
427
428       net_pfx.fp_len = 32;
429       net_pfx.fp_addr.ip4.as_u32 ^= mask;
430
431       /* a /31 - add the other end as an attached host */
432       fib_table_entry_update_one_path (fib_index, &net_pfx,
433                                        FIB_SOURCE_INTERFACE,
434                                        (FIB_ENTRY_FLAG_ATTACHED),
435                                        DPO_PROTO_IP4,
436                                        &net_pfx.fp_addr,
437                                        sw_if_index,
438                                        // invalid FIB index
439                                        ~0,
440                                        1,
441                                        NULL,
442                                        FIB_ROUTE_PATH_FLAG_NONE);
443     }
444   pfx.fp_len = 32;
445
446   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
447     {
448       u32 classify_table_index =
449         lm->classify_table_index_by_sw_if_index[sw_if_index];
450       if (classify_table_index != (u32) ~ 0)
451         {
452           dpo_id_t dpo = DPO_INVALID;
453
454           dpo_set (&dpo,
455                    DPO_CLASSIFY,
456                    DPO_PROTO_IP4,
457                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
458
459           fib_table_entry_special_dpo_add (fib_index,
460                                            &pfx,
461                                            FIB_SOURCE_CLASSIFY,
462                                            FIB_ENTRY_FLAG_NONE, &dpo);
463           dpo_reset (&dpo);
464         }
465     }
466
467   fib_table_entry_update_one_path (fib_index, &pfx,
468                                    FIB_SOURCE_INTERFACE,
469                                    (FIB_ENTRY_FLAG_CONNECTED |
470                                     FIB_ENTRY_FLAG_LOCAL),
471                                    DPO_PROTO_IP4,
472                                    &pfx.fp_addr,
473                                    sw_if_index,
474                                    // invalid FIB index
475                                    ~0,
476                                    1, NULL,
477                                    FIB_ROUTE_PATH_FLAG_NONE);
478 }
479
480 static void
481 ip4_del_interface_routes (ip4_main_t * im,
482                           u32 fib_index,
483                           ip4_address_t * address, u32 address_length)
484 {
485   fib_prefix_t pfx = {
486     .fp_len = address_length,
487     .fp_proto = FIB_PROTOCOL_IP4,
488     .fp_addr.ip4 = *address,
489   };
490
491   if (pfx.fp_len <= 30)
492     {
493       fib_prefix_t net_pfx = {
494         .fp_len = 32,
495         .fp_proto = FIB_PROTOCOL_IP4,
496         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
497       };
498       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
499         fib_table_entry_special_remove(fib_index,
500                                        &net_pfx,
501                                        FIB_SOURCE_INTERFACE);
502       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
503       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
504         fib_table_entry_special_remove(fib_index,
505                                        &net_pfx,
506                                        FIB_SOURCE_INTERFACE);
507       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
508     }
509     else if (pfx.fp_len == 31)
510     {
511       u32 mask = clib_host_to_net_u32(1);
512       fib_prefix_t net_pfx = pfx;
513
514       net_pfx.fp_len = 32;
515       net_pfx.fp_addr.ip4.as_u32 ^= mask;
516
517       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
518     }
519
520   pfx.fp_len = 32;
521   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
522 }
523
524 void
525 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
526 {
527   ip4_main_t *im = &ip4_main;
528
529   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
530
531   /*
532    * enable/disable only on the 1<->0 transition
533    */
534   if (is_enable)
535     {
536       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
537         return;
538     }
539   else
540     {
541       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
542       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
543         return;
544     }
545   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
546                                !is_enable, 0, 0);
547
548
549   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
550                                sw_if_index, !is_enable, 0, 0);
551 }
552
553 static clib_error_t *
554 ip4_add_del_interface_address_internal (vlib_main_t * vm,
555                                         u32 sw_if_index,
556                                         ip4_address_t * address,
557                                         u32 address_length, u32 is_del)
558 {
559   vnet_main_t *vnm = vnet_get_main ();
560   ip4_main_t *im = &ip4_main;
561   ip_lookup_main_t *lm = &im->lookup_main;
562   clib_error_t *error = 0;
563   u32 if_address_index, elts_before;
564   ip4_address_fib_t ip4_af, *addr_fib = 0;
565
566   /* local0 interface doesn't support IP addressing  */
567   if (sw_if_index == 0)
568     {
569       return
570        clib_error_create ("local0 interface doesn't support IP addressing");
571     }
572
573   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
574   ip4_addr_fib_init (&ip4_af, address,
575                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
576   vec_add1 (addr_fib, ip4_af);
577
578   /*
579    * there is no support for adj-fib handling in the presence of overlapping
580    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
581    * most routers do.
582    */
583   /* *INDENT-OFF* */
584   if (!is_del)
585     {
586       /* When adding an address check that it does not conflict
587          with an existing address on any interface in this table. */
588       ip_interface_address_t *ia;
589       vnet_sw_interface_t *sif;
590
591       pool_foreach(sif, vnm->interface_main.sw_interfaces,
592       ({
593           if (im->fib_index_by_sw_if_index[sw_if_index] ==
594               im->fib_index_by_sw_if_index[sif->sw_if_index])
595             {
596               foreach_ip_interface_address
597                 (&im->lookup_main, ia, sif->sw_if_index,
598                  0 /* honor unnumbered */ ,
599                  ({
600                    ip4_address_t * x =
601                      ip_interface_address_get_address
602                      (&im->lookup_main, ia);
603                    if (ip4_destination_matches_route
604                        (im, address, x, ia->address_length) ||
605                        ip4_destination_matches_route (im,
606                                                       x,
607                                                       address,
608                                                       address_length))
609                      {
610                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
611
612                        return
613                          clib_error_create
614                          ("failed to add %U which conflicts with %U for interface %U",
615                           format_ip4_address_and_length, address,
616                           address_length,
617                           format_ip4_address_and_length, x,
618                           ia->address_length,
619                           format_vnet_sw_if_index_name, vnm,
620                           sif->sw_if_index);
621                      }
622                  }));
623             }
624       }));
625     }
626   /* *INDENT-ON* */
627
628   elts_before = pool_elts (lm->if_address_pool);
629
630   error = ip_interface_address_add_del
631     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
632   if (error)
633     goto done;
634
635   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
636
637   if (is_del)
638     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
639   else
640     ip4_add_interface_routes (sw_if_index,
641                               im, ip4_af.fib_index,
642                               pool_elt_at_index
643                               (lm->if_address_pool, if_address_index));
644
645   /* If pool did not grow/shrink: add duplicate address. */
646   if (elts_before != pool_elts (lm->if_address_pool))
647     {
648       ip4_add_del_interface_address_callback_t *cb;
649       vec_foreach (cb, im->add_del_interface_address_callbacks)
650         cb->function (im, cb->function_opaque, sw_if_index,
651                       address, address_length, if_address_index, is_del);
652     }
653
654 done:
655   vec_free (addr_fib);
656   return error;
657 }
658
659 clib_error_t *
660 ip4_add_del_interface_address (vlib_main_t * vm,
661                                u32 sw_if_index,
662                                ip4_address_t * address,
663                                u32 address_length, u32 is_del)
664 {
665   return ip4_add_del_interface_address_internal
666     (vm, sw_if_index, address, address_length, is_del);
667 }
668
669 void
670 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
671 {
672   ip_interface_address_t *ia;
673   ip4_main_t *im;
674
675   im = &ip4_main;
676
677   /*
678    * when directed broadcast is enabled, the subnet braodcast route will forward
679    * packets using an adjacency with a broadcast MAC. otherwise it drops
680    */
681   /* *INDENT-OFF* */
682   foreach_ip_interface_address(&im->lookup_main, ia,
683                                sw_if_index, 0,
684      ({
685        if (ia->address_length <= 30)
686          {
687            ip4_address_t *ipa;
688
689            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
690
691            fib_prefix_t pfx = {
692              .fp_len = 32,
693              .fp_proto = FIB_PROTOCOL_IP4,
694              .fp_addr = {
695                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
696              },
697            };
698
699            ip4_add_subnet_bcast_route
700              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
701                                                   sw_if_index),
702               &pfx, sw_if_index);
703          }
704      }));
705   /* *INDENT-ON* */
706 }
707 #endif
708
709 /* Built-in ip4 unicast rx feature path definition */
710 /* *INDENT-OFF* */
711 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
712 {
713   .arc_name = "ip4-unicast",
714   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
715   .last_in_arc = "ip4-lookup",
716   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
717 };
718
719 VNET_FEATURE_INIT (ip4_flow_classify, static) =
720 {
721   .arc_name = "ip4-unicast",
722   .node_name = "ip4-flow-classify",
723   .runs_before = VNET_FEATURES ("ip4-inacl"),
724 };
725
726 VNET_FEATURE_INIT (ip4_inacl, static) =
727 {
728   .arc_name = "ip4-unicast",
729   .node_name = "ip4-inacl",
730   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
731 };
732
733 VNET_FEATURE_INIT (ip4_source_check_1, static) =
734 {
735   .arc_name = "ip4-unicast",
736   .node_name = "ip4-source-check-via-rx",
737   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
738 };
739
740 VNET_FEATURE_INIT (ip4_source_check_2, static) =
741 {
742   .arc_name = "ip4-unicast",
743   .node_name = "ip4-source-check-via-any",
744   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
745 };
746
747 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
748 {
749   .arc_name = "ip4-unicast",
750   .node_name = "ip4-source-and-port-range-check-rx",
751   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
752 };
753
754 VNET_FEATURE_INIT (ip4_policer_classify, static) =
755 {
756   .arc_name = "ip4-unicast",
757   .node_name = "ip4-policer-classify",
758   .runs_before = VNET_FEATURES ("ipsec4-input-feature"),
759 };
760
761 VNET_FEATURE_INIT (ip4_ipsec, static) =
762 {
763   .arc_name = "ip4-unicast",
764   .node_name = "ipsec4-input-feature",
765   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
766 };
767
768 VNET_FEATURE_INIT (ip4_vpath, static) =
769 {
770   .arc_name = "ip4-unicast",
771   .node_name = "vpath-input-ip4",
772   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
773 };
774
775 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
776 {
777   .arc_name = "ip4-unicast",
778   .node_name = "ip4-vxlan-bypass",
779   .runs_before = VNET_FEATURES ("ip4-lookup"),
780 };
781
782 VNET_FEATURE_INIT (ip4_not_enabled, static) =
783 {
784   .arc_name = "ip4-unicast",
785   .node_name = "ip4-not-enabled",
786   .runs_before = VNET_FEATURES ("ip4-lookup"),
787 };
788
789 VNET_FEATURE_INIT (ip4_lookup, static) =
790 {
791   .arc_name = "ip4-unicast",
792   .node_name = "ip4-lookup",
793   .runs_before = 0,     /* not before any other features */
794 };
795
796 /* Built-in ip4 multicast rx feature path definition */
797 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
798 {
799   .arc_name = "ip4-multicast",
800   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
801   .last_in_arc = "ip4-mfib-forward-lookup",
802   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
803 };
804
805 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
806 {
807   .arc_name = "ip4-multicast",
808   .node_name = "vpath-input-ip4",
809   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
810 };
811
812 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
813 {
814   .arc_name = "ip4-multicast",
815   .node_name = "ip4-not-enabled",
816   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
817 };
818
819 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
820 {
821   .arc_name = "ip4-multicast",
822   .node_name = "ip4-mfib-forward-lookup",
823   .runs_before = 0,     /* last feature */
824 };
825
826 /* Source and port-range check ip4 tx feature path definition */
827 VNET_FEATURE_ARC_INIT (ip4_output, static) =
828 {
829   .arc_name = "ip4-output",
830   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
831   .last_in_arc = "interface-output",
832   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
833 };
834
835 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
836 {
837   .arc_name = "ip4-output",
838   .node_name = "ip4-source-and-port-range-check-tx",
839   .runs_before = VNET_FEATURES ("ip4-outacl"),
840 };
841
842 VNET_FEATURE_INIT (ip4_outacl, static) =
843 {
844   .arc_name = "ip4-output",
845   .node_name = "ip4-outacl",
846   .runs_before = VNET_FEATURES ("ipsec4-output-feature"),
847 };
848
849 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
850 {
851   .arc_name = "ip4-output",
852   .node_name = "ipsec4-output-feature",
853   .runs_before = VNET_FEATURES ("interface-output"),
854 };
855
856 /* Built-in ip4 tx feature path definition */
857 VNET_FEATURE_INIT (ip4_interface_output, static) =
858 {
859   .arc_name = "ip4-output",
860   .node_name = "interface-output",
861   .runs_before = 0,     /* not before any other features */
862 };
863 /* *INDENT-ON* */
864
865 static clib_error_t *
866 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
867 {
868   ip4_main_t *im = &ip4_main;
869
870   /* Fill in lookup tables with default table (0). */
871   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
872   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
873
874   if (!is_add)
875     {
876       ip4_main_t *im4 = &ip4_main;
877       ip_lookup_main_t *lm4 = &im4->lookup_main;
878       ip_interface_address_t *ia = 0;
879       ip4_address_t *address;
880       vlib_main_t *vm = vlib_get_main ();
881
882       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
883       /* *INDENT-OFF* */
884       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
885       ({
886         address = ip_interface_address_get_address (lm4, ia);
887         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
888       }));
889       /* *INDENT-ON* */
890     }
891
892   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
893                                is_add, 0, 0);
894
895   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
896                                sw_if_index, is_add, 0, 0);
897
898   return /* no error */ 0;
899 }
900
901 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
902
903 /* Global IP4 main. */
904 #ifndef CLIB_MARCH_VARIANT
905 ip4_main_t ip4_main;
906 #endif /* CLIB_MARCH_VARIANT */
907
908 static clib_error_t *
909 ip4_lookup_init (vlib_main_t * vm)
910 {
911   ip4_main_t *im = &ip4_main;
912   clib_error_t *error;
913   uword i;
914
915   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
916     return error;
917   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
918     return (error);
919   if ((error = vlib_call_init_function (vm, fib_module_init)))
920     return error;
921   if ((error = vlib_call_init_function (vm, mfib_module_init)))
922     return error;
923
924   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
925     {
926       u32 m;
927
928       if (i < 32)
929         m = pow2_mask (i) << (32 - i);
930       else
931         m = ~0;
932       im->fib_masks[i] = clib_host_to_net_u32 (m);
933     }
934
935   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
936
937   /* Create FIB with index 0 and table id of 0. */
938   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
939                                      FIB_SOURCE_DEFAULT_ROUTE);
940   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
941                                       MFIB_SOURCE_DEFAULT_ROUTE);
942
943   {
944     pg_node_t *pn;
945     pn = pg_get_node (ip4_lookup_node.index);
946     pn->unformat_edit = unformat_pg_ip4_header;
947   }
948
949   {
950     ethernet_arp_header_t h;
951
952     clib_memset (&h, 0, sizeof (h));
953
954 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
955 #define _8(f,v) h.f = v;
956     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
957     _16 (l3_type, ETHERNET_TYPE_IP4);
958     _8 (n_l2_address_bytes, 6);
959     _8 (n_l3_address_bytes, 4);
960     _16 (opcode, ETHERNET_ARP_OPCODE_request);
961 #undef _16
962 #undef _8
963
964     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
965                                /* data */ &h,
966                                sizeof (h),
967                                /* alloc chunk size */ 8,
968                                "ip4 arp");
969   }
970
971   return error;
972 }
973
974 VLIB_INIT_FUNCTION (ip4_lookup_init);
975
976 typedef struct
977 {
978   /* Adjacency taken. */
979   u32 dpo_index;
980   u32 flow_hash;
981   u32 fib_index;
982
983   /* Packet data, possibly *after* rewrite. */
984   u8 packet_data[64 - 1 * sizeof (u32)];
985 }
986 ip4_forward_next_trace_t;
987
988 #ifndef CLIB_MARCH_VARIANT
989 u8 *
990 format_ip4_forward_next_trace (u8 * s, va_list * args)
991 {
992   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
993   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
994   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
995   u32 indent = format_get_indent (s);
996   s = format (s, "%U%U",
997               format_white_space, indent,
998               format_ip4_header, t->packet_data, sizeof (t->packet_data));
999   return s;
1000 }
1001 #endif
1002
1003 static u8 *
1004 format_ip4_lookup_trace (u8 * s, va_list * args)
1005 {
1006   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1007   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1008   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1009   u32 indent = format_get_indent (s);
1010
1011   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1012               t->fib_index, t->dpo_index, t->flow_hash);
1013   s = format (s, "\n%U%U",
1014               format_white_space, indent,
1015               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1016   return s;
1017 }
1018
1019 static u8 *
1020 format_ip4_rewrite_trace (u8 * s, va_list * args)
1021 {
1022   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1023   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1024   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1025   u32 indent = format_get_indent (s);
1026
1027   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1028               t->fib_index, t->dpo_index, format_ip_adjacency,
1029               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1030   s = format (s, "\n%U%U",
1031               format_white_space, indent,
1032               format_ip_adjacency_packet_data,
1033               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1034   return s;
1035 }
1036
1037 #ifndef CLIB_MARCH_VARIANT
1038 /* Common trace function for all ip4-forward next nodes. */
1039 void
1040 ip4_forward_next_trace (vlib_main_t * vm,
1041                         vlib_node_runtime_t * node,
1042                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1043 {
1044   u32 *from, n_left;
1045   ip4_main_t *im = &ip4_main;
1046
1047   n_left = frame->n_vectors;
1048   from = vlib_frame_vector_args (frame);
1049
1050   while (n_left >= 4)
1051     {
1052       u32 bi0, bi1;
1053       vlib_buffer_t *b0, *b1;
1054       ip4_forward_next_trace_t *t0, *t1;
1055
1056       /* Prefetch next iteration. */
1057       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1058       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1059
1060       bi0 = from[0];
1061       bi1 = from[1];
1062
1063       b0 = vlib_get_buffer (vm, bi0);
1064       b1 = vlib_get_buffer (vm, bi1);
1065
1066       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1067         {
1068           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1069           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1070           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1071           t0->fib_index =
1072             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1073              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1074             vec_elt (im->fib_index_by_sw_if_index,
1075                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1076
1077           clib_memcpy_fast (t0->packet_data,
1078                             vlib_buffer_get_current (b0),
1079                             sizeof (t0->packet_data));
1080         }
1081       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1082         {
1083           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1084           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1085           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1086           t1->fib_index =
1087             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1088              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1089             vec_elt (im->fib_index_by_sw_if_index,
1090                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1091           clib_memcpy_fast (t1->packet_data, vlib_buffer_get_current (b1),
1092                             sizeof (t1->packet_data));
1093         }
1094       from += 2;
1095       n_left -= 2;
1096     }
1097
1098   while (n_left >= 1)
1099     {
1100       u32 bi0;
1101       vlib_buffer_t *b0;
1102       ip4_forward_next_trace_t *t0;
1103
1104       bi0 = from[0];
1105
1106       b0 = vlib_get_buffer (vm, bi0);
1107
1108       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1109         {
1110           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1111           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1112           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1113           t0->fib_index =
1114             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1115              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1116             vec_elt (im->fib_index_by_sw_if_index,
1117                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1118           clib_memcpy_fast (t0->packet_data, vlib_buffer_get_current (b0),
1119                             sizeof (t0->packet_data));
1120         }
1121       from += 1;
1122       n_left -= 1;
1123     }
1124 }
1125
1126 /* Compute TCP/UDP/ICMP4 checksum in software. */
1127 u16
1128 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1129                               ip4_header_t * ip0)
1130 {
1131   ip_csum_t sum0;
1132   u32 ip_header_length, payload_length_host_byte_order;
1133   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1134   u16 sum16;
1135   void *data_this_buffer;
1136
1137   /* Initialize checksum with ip header. */
1138   ip_header_length = ip4_header_bytes (ip0);
1139   payload_length_host_byte_order =
1140     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1141   sum0 =
1142     clib_host_to_net_u32 (payload_length_host_byte_order +
1143                           (ip0->protocol << 16));
1144
1145   if (BITS (uword) == 32)
1146     {
1147       sum0 =
1148         ip_csum_with_carry (sum0,
1149                             clib_mem_unaligned (&ip0->src_address, u32));
1150       sum0 =
1151         ip_csum_with_carry (sum0,
1152                             clib_mem_unaligned (&ip0->dst_address, u32));
1153     }
1154   else
1155     sum0 =
1156       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1157
1158   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1159   data_this_buffer = (void *) ip0 + ip_header_length;
1160   n_ip_bytes_this_buffer =
1161     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1162   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1163     {
1164       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1165         n_ip_bytes_this_buffer - ip_header_length : 0;
1166     }
1167   while (1)
1168     {
1169       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1170       n_bytes_left -= n_this_buffer;
1171       if (n_bytes_left == 0)
1172         break;
1173
1174       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1175       p0 = vlib_get_buffer (vm, p0->next_buffer);
1176       data_this_buffer = vlib_buffer_get_current (p0);
1177       n_this_buffer = p0->current_length;
1178     }
1179
1180   sum16 = ~ip_csum_fold (sum0);
1181
1182   return sum16;
1183 }
1184
1185 u32
1186 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1187 {
1188   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1189   udp_header_t *udp0;
1190   u16 sum16;
1191
1192   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1193           || ip0->protocol == IP_PROTOCOL_UDP);
1194
1195   udp0 = (void *) (ip0 + 1);
1196   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1197     {
1198       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1199                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1200       return p0->flags;
1201     }
1202
1203   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1204
1205   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1206                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1207
1208   return p0->flags;
1209 }
1210 #endif
1211
1212 /* *INDENT-OFF* */
1213 VNET_FEATURE_ARC_INIT (ip4_local) =
1214 {
1215   .arc_name  = "ip4-local",
1216   .start_nodes = VNET_FEATURES ("ip4-local"),
1217   .last_in_arc = "ip4-local-end-of-arc",
1218 };
1219 /* *INDENT-ON* */
1220
1221 static inline void
1222 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1223                             ip4_header_t * ip, u8 is_udp, u8 * error,
1224                             u8 * good_tcp_udp)
1225 {
1226   u32 flags0;
1227   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1228   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1229   if (is_udp)
1230     {
1231       udp_header_t *udp;
1232       u32 ip_len, udp_len;
1233       i32 len_diff;
1234       udp = ip4_next_header (ip);
1235       /* Verify UDP length. */
1236       ip_len = clib_net_to_host_u16 (ip->length);
1237       udp_len = clib_net_to_host_u16 (udp->length);
1238
1239       len_diff = ip_len - udp_len;
1240       *good_tcp_udp &= len_diff >= 0;
1241       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1242     }
1243 }
1244
1245 #define ip4_local_csum_is_offloaded(_b)                                 \
1246     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1247         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1248
1249 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1250     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1251         || ip4_local_csum_is_offloaded (_b)))
1252
1253 #define ip4_local_csum_is_valid(_b)                                     \
1254     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1255         || (ip4_local_csum_is_offloaded (_b))) != 0
1256
1257 static inline void
1258 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1259                          ip4_header_t * ih, u8 * error)
1260 {
1261   u8 is_udp, is_tcp_udp, good_tcp_udp;
1262
1263   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1264   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1265
1266   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1267     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1268   else
1269     good_tcp_udp = ip4_local_csum_is_valid (b);
1270
1271   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1272   *error = (is_tcp_udp && !good_tcp_udp
1273             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1274 }
1275
1276 static inline void
1277 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1278                             ip4_header_t ** ih, u8 * error)
1279 {
1280   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1281
1282   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1283   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1284
1285   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1286   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1287
1288   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1289   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1290
1291   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1292                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1293     {
1294       if (is_tcp_udp[0])
1295         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1296                                     &good_tcp_udp[0]);
1297       if (is_tcp_udp[1])
1298         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1299                                     &good_tcp_udp[1]);
1300     }
1301
1302   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1303               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1304   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1305               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1306 }
1307
1308 static inline void
1309 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1310                               vlib_buffer_t * b, u16 * next, u8 error,
1311                               u8 head_of_feature_arc)
1312 {
1313   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1314   u32 next_index;
1315
1316   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1317   b->error = error ? error_node->errors[error] : 0;
1318   if (head_of_feature_arc)
1319     {
1320       next_index = *next;
1321       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1322         {
1323           vnet_feature_arc_start (arc_index,
1324                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1325                                   &next_index, b);
1326           *next = next_index;
1327         }
1328     }
1329 }
1330
1331 typedef struct
1332 {
1333   ip4_address_t src;
1334   u32 lbi;
1335   u8 error;
1336   u8 first;
1337 } ip4_local_last_check_t;
1338
1339 static inline void
1340 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1341                      ip4_local_last_check_t * last_check, u8 * error0)
1342 {
1343   ip4_fib_mtrie_leaf_t leaf0;
1344   ip4_fib_mtrie_t *mtrie0;
1345   const dpo_id_t *dpo0;
1346   load_balance_t *lb0;
1347   u32 lbi0;
1348
1349   vnet_buffer (b)->ip.fib_index =
1350     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1351     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1352
1353   if (PREDICT_FALSE (last_check->first ||
1354                      (last_check->src.as_u32 != ip0->src_address.as_u32)))
1355     {
1356       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1357       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1358       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1359       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1360       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1361
1362       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1363       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1364
1365       lb0 = load_balance_get (lbi0);
1366       dpo0 = load_balance_get_bucket_i (lb0, 0);
1367
1368       /*
1369        * Must have a route to source otherwise we drop the packet.
1370        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1371        *
1372        * The checks are:
1373        *  - the source is a recieve => it's from us => bogus, do this
1374        *    first since it sets a different error code.
1375        *  - uRPF check for any route to source - accept if passes.
1376        *  - allow packets destined to the broadcast address from unknown sources
1377        */
1378
1379       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1380                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1381                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1382       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1383                   && !fib_urpf_check_size (lb0->lb_urpf)
1384                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1385                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1386
1387       last_check->src.as_u32 = ip0->src_address.as_u32;
1388       last_check->lbi = lbi0;
1389       last_check->error = *error0;
1390     }
1391   else
1392     {
1393       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1394       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1395       *error0 = last_check->error;
1396       last_check->first = 0;
1397     }
1398 }
1399
1400 static inline void
1401 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1402                         ip4_local_last_check_t * last_check, u8 * error)
1403 {
1404   ip4_fib_mtrie_leaf_t leaf[2];
1405   ip4_fib_mtrie_t *mtrie[2];
1406   const dpo_id_t *dpo[2];
1407   load_balance_t *lb[2];
1408   u32 not_last_hit;
1409   u32 lbi[2];
1410
1411   not_last_hit = last_check->first;
1412   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1413   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1414
1415   vnet_buffer (b[0])->ip.fib_index =
1416     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1417     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1418     vnet_buffer (b[0])->ip.fib_index;
1419
1420   vnet_buffer (b[1])->ip.fib_index =
1421     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1422     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1423     vnet_buffer (b[1])->ip.fib_index;
1424
1425   if (PREDICT_FALSE (not_last_hit))
1426     {
1427       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1428       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1429
1430       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1431       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1432
1433       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1434                                            &ip[0]->src_address, 2);
1435       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1436                                            &ip[1]->src_address, 2);
1437
1438       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1439                                            &ip[0]->src_address, 3);
1440       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1441                                            &ip[1]->src_address, 3);
1442
1443       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1444       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1445
1446       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1447       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1448
1449       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1450       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1451
1452       lb[0] = load_balance_get (lbi[0]);
1453       lb[1] = load_balance_get (lbi[1]);
1454
1455       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1456       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1457
1458       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1459                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1460                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1461       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1462                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1463                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1464                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1465
1466       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1467                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1468                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1469       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1470                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1471                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1472                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1473
1474       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1475       last_check->lbi = lbi[1];
1476       last_check->error = error[1];
1477     }
1478   else
1479     {
1480       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1481       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1482
1483       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1484       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1485
1486       error[0] = last_check->error;
1487       error[1] = last_check->error;
1488       last_check->first = 0;
1489     }
1490 }
1491
1492 enum ip_local_packet_type_e
1493 {
1494   IP_LOCAL_PACKET_TYPE_L4,
1495   IP_LOCAL_PACKET_TYPE_NAT,
1496   IP_LOCAL_PACKET_TYPE_FRAG,
1497 };
1498
1499 /**
1500  * Determine packet type and next node.
1501  *
1502  * The expectation is that all packets that are not L4 will skip
1503  * checksums and source checks.
1504  */
1505 always_inline u8
1506 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1507 {
1508   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1509
1510   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1511     {
1512       *next = IP_LOCAL_NEXT_REASSEMBLY;
1513       return IP_LOCAL_PACKET_TYPE_FRAG;
1514     }
1515   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1516     {
1517       *next = lm->local_next_by_ip_protocol[ip->protocol];
1518       return IP_LOCAL_PACKET_TYPE_NAT;
1519     }
1520
1521   *next = lm->local_next_by_ip_protocol[ip->protocol];
1522   return IP_LOCAL_PACKET_TYPE_L4;
1523 }
1524
1525 static inline uword
1526 ip4_local_inline (vlib_main_t * vm,
1527                   vlib_node_runtime_t * node,
1528                   vlib_frame_t * frame, int head_of_feature_arc)
1529 {
1530   u32 *from, n_left_from;
1531   vlib_node_runtime_t *error_node =
1532     vlib_node_get_runtime (vm, ip4_input_node.index);
1533   u16 nexts[VLIB_FRAME_SIZE], *next;
1534   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1535   ip4_header_t *ip[2];
1536   u8 error[2], pt[2];
1537
1538   ip4_local_last_check_t last_check = {
1539     /*
1540      * 0.0.0.0 can appear as the source address of an IP packet,
1541      * as can any other address, hence the need to use the 'first'
1542      * member to make sure the .lbi is initialised for the first
1543      * packet.
1544      */
1545     .src = {.as_u32 = 0},
1546     .lbi = ~0,
1547     .error = IP4_ERROR_UNKNOWN_PROTOCOL,
1548     .first = 1,
1549   };
1550
1551   from = vlib_frame_vector_args (frame);
1552   n_left_from = frame->n_vectors;
1553
1554   if (node->flags & VLIB_NODE_FLAG_TRACE)
1555     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1556
1557   vlib_get_buffers (vm, from, bufs, n_left_from);
1558   b = bufs;
1559   next = nexts;
1560
1561   while (n_left_from >= 6)
1562     {
1563       u8 not_batch = 0;
1564
1565       /* Prefetch next iteration. */
1566       {
1567         vlib_prefetch_buffer_header (b[4], LOAD);
1568         vlib_prefetch_buffer_header (b[5], LOAD);
1569
1570         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1571         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1572       }
1573
1574       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1575
1576       ip[0] = vlib_buffer_get_current (b[0]);
1577       ip[1] = vlib_buffer_get_current (b[1]);
1578
1579       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1580       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1581
1582       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1583       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1584
1585       not_batch = pt[0] ^ pt[1];
1586
1587       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1588         goto skip_checks;
1589
1590       if (PREDICT_TRUE (not_batch == 0))
1591         {
1592           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1593           ip4_local_check_src_x2 (b, ip, &last_check, error);
1594         }
1595       else
1596         {
1597           if (!pt[0])
1598             {
1599               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1600               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1601             }
1602           if (!pt[1])
1603             {
1604               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1605               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1606             }
1607         }
1608
1609     skip_checks:
1610
1611       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1612                                     head_of_feature_arc);
1613       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1614                                     head_of_feature_arc);
1615
1616       b += 2;
1617       next += 2;
1618       n_left_from -= 2;
1619     }
1620
1621   while (n_left_from > 0)
1622     {
1623       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1624
1625       ip[0] = vlib_buffer_get_current (b[0]);
1626       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1627       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1628
1629       if (head_of_feature_arc == 0 || pt[0])
1630         goto skip_check;
1631
1632       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1633       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1634
1635     skip_check:
1636
1637       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1638                                     head_of_feature_arc);
1639
1640       b += 1;
1641       next += 1;
1642       n_left_from -= 1;
1643     }
1644
1645   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1646   return frame->n_vectors;
1647 }
1648
1649 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1650                                vlib_frame_t * frame)
1651 {
1652   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1653 }
1654
1655 /* *INDENT-OFF* */
1656 VLIB_REGISTER_NODE (ip4_local_node) =
1657 {
1658   .name = "ip4-local",
1659   .vector_size = sizeof (u32),
1660   .format_trace = format_ip4_forward_next_trace,
1661   .n_next_nodes = IP_LOCAL_N_NEXT,
1662   .next_nodes =
1663   {
1664     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1665     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1666     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1667     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1668     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1669   },
1670 };
1671 /* *INDENT-ON* */
1672
1673
1674 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1675                                           vlib_node_runtime_t * node,
1676                                           vlib_frame_t * frame)
1677 {
1678   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1679 }
1680
1681 /* *INDENT-OFF* */
1682 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1683   .name = "ip4-local-end-of-arc",
1684   .vector_size = sizeof (u32),
1685
1686   .format_trace = format_ip4_forward_next_trace,
1687   .sibling_of = "ip4-local",
1688 };
1689
1690 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1691   .arc_name = "ip4-local",
1692   .node_name = "ip4-local-end-of-arc",
1693   .runs_before = 0, /* not before any other features */
1694 };
1695 /* *INDENT-ON* */
1696
1697 #ifndef CLIB_MARCH_VARIANT
1698 void
1699 ip4_register_protocol (u32 protocol, u32 node_index)
1700 {
1701   vlib_main_t *vm = vlib_get_main ();
1702   ip4_main_t *im = &ip4_main;
1703   ip_lookup_main_t *lm = &im->lookup_main;
1704
1705   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1706   lm->local_next_by_ip_protocol[protocol] =
1707     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1708 }
1709 #endif
1710
1711 static clib_error_t *
1712 show_ip_local_command_fn (vlib_main_t * vm,
1713                           unformat_input_t * input, vlib_cli_command_t * cmd)
1714 {
1715   ip4_main_t *im = &ip4_main;
1716   ip_lookup_main_t *lm = &im->lookup_main;
1717   int i;
1718
1719   vlib_cli_output (vm, "Protocols handled by ip4_local");
1720   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1721     {
1722       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1723         {
1724           u32 node_index = vlib_get_node (vm,
1725                                           ip4_local_node.index)->
1726             next_nodes[lm->local_next_by_ip_protocol[i]];
1727           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1728                            node_index);
1729         }
1730     }
1731   return 0;
1732 }
1733
1734
1735
1736 /*?
1737  * Display the set of protocols handled by the local IPv4 stack.
1738  *
1739  * @cliexpar
1740  * Example of how to display local protocol table:
1741  * @cliexstart{show ip local}
1742  * Protocols handled by ip4_local
1743  * 1
1744  * 17
1745  * 47
1746  * @cliexend
1747 ?*/
1748 /* *INDENT-OFF* */
1749 VLIB_CLI_COMMAND (show_ip_local, static) =
1750 {
1751   .path = "show ip local",
1752   .function = show_ip_local_command_fn,
1753   .short_help = "show ip local",
1754 };
1755 /* *INDENT-ON* */
1756
1757 always_inline uword
1758 ip4_arp_inline (vlib_main_t * vm,
1759                 vlib_node_runtime_t * node,
1760                 vlib_frame_t * frame, int is_glean)
1761 {
1762   vnet_main_t *vnm = vnet_get_main ();
1763   ip4_main_t *im = &ip4_main;
1764   ip_lookup_main_t *lm = &im->lookup_main;
1765   u32 *from, *to_next_drop;
1766   uword n_left_from, n_left_to_next_drop, next_index;
1767   u32 thread_index = vm->thread_index;
1768   u64 seed;
1769
1770   if (node->flags & VLIB_NODE_FLAG_TRACE)
1771     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1772
1773   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1774
1775   from = vlib_frame_vector_args (frame);
1776   n_left_from = frame->n_vectors;
1777   next_index = node->cached_next_index;
1778   if (next_index == IP4_ARP_NEXT_DROP)
1779     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1780
1781   while (n_left_from > 0)
1782     {
1783       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1784                            to_next_drop, n_left_to_next_drop);
1785
1786       while (n_left_from > 0 && n_left_to_next_drop > 0)
1787         {
1788           u32 pi0, bi0, adj_index0, sw_if_index0;
1789           ip_adjacency_t *adj0;
1790           vlib_buffer_t *p0, *b0;
1791           ip4_address_t resolve0;
1792           ethernet_arp_header_t *h0;
1793           vnet_hw_interface_t *hw_if0;
1794           u64 r0;
1795
1796           pi0 = from[0];
1797           p0 = vlib_get_buffer (vm, pi0);
1798
1799           from += 1;
1800           n_left_from -= 1;
1801           to_next_drop[0] = pi0;
1802           to_next_drop += 1;
1803           n_left_to_next_drop -= 1;
1804
1805           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1806           adj0 = adj_get (adj_index0);
1807
1808           if (is_glean)
1809             {
1810               /* resolve the packet's destination */
1811               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1812               resolve0 = ip0->dst_address;
1813             }
1814           else
1815             {
1816               /* resolve the incomplete adj */
1817               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1818             }
1819
1820           /* combine the address and interface for the hash key */
1821           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1822           r0 = (u64) resolve0.data_u32 << 32;
1823           r0 |= sw_if_index0;
1824
1825           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
1826             {
1827               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
1828               continue;
1829             }
1830
1831           /*
1832            * the adj has been updated to a rewrite but the node the DPO that got
1833            * us here hasn't - yet. no big deal. we'll drop while we wait.
1834            */
1835           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1836             {
1837               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
1838               continue;
1839             }
1840
1841           /*
1842            * Can happen if the control-plane is programming tables
1843            * with traffic flowing; at least that's today's lame excuse.
1844            */
1845           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1846               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1847             {
1848               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1849               continue;
1850             }
1851           /* Send ARP request. */
1852           h0 =
1853             vlib_packet_template_get_packet (vm,
1854                                              &im->ip4_arp_request_packet_template,
1855                                              &bi0);
1856           b0 = vlib_get_buffer (vm, bi0);
1857
1858           /* copy the persistent fields from the original */
1859           clib_memcpy_fast (b0->opaque2, p0->opaque2, sizeof (p0->opaque2));
1860
1861           /* Seems we're out of buffers */
1862           if (PREDICT_FALSE (!h0))
1863             {
1864               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
1865               continue;
1866             }
1867
1868           /* Add rewrite/encap string for ARP packet. */
1869           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1870
1871           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1872
1873           /* Src ethernet address in ARP header. */
1874           mac_address_from_bytes (&h0->ip4_over_ethernet[0].mac,
1875                                   hw_if0->hw_address);
1876           if (is_glean)
1877             {
1878               /* The interface's source address is stashed in the Glean Adj */
1879               h0->ip4_over_ethernet[0].ip4 =
1880                 adj0->sub_type.glean.receive_addr.ip4;
1881             }
1882           else
1883             {
1884               /* Src IP address in ARP header. */
1885               if (ip4_src_address_for_packet (lm, sw_if_index0,
1886                                               &h0->ip4_over_ethernet[0].ip4))
1887                 {
1888                   /* No source address available */
1889                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1890                   vlib_buffer_free (vm, &bi0, 1);
1891                   continue;
1892                 }
1893             }
1894           h0->ip4_over_ethernet[1].ip4 = resolve0;
1895
1896           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
1897
1898           vlib_buffer_copy_trace_flag (vm, p0, bi0);
1899           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1900           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1901
1902           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1903
1904           vlib_set_next_frame_buffer (vm, node,
1905                                       adj0->rewrite_header.next_index, bi0);
1906         }
1907
1908       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1909     }
1910
1911   return frame->n_vectors;
1912 }
1913
1914 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1915                              vlib_frame_t * frame)
1916 {
1917   return (ip4_arp_inline (vm, node, frame, 0));
1918 }
1919
1920 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1921                                vlib_frame_t * frame)
1922 {
1923   return (ip4_arp_inline (vm, node, frame, 1));
1924 }
1925
1926 static char *ip4_arp_error_strings[] = {
1927   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
1928   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
1929   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
1930   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1931   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1932   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1933 };
1934
1935 /* *INDENT-OFF* */
1936 VLIB_REGISTER_NODE (ip4_arp_node) =
1937 {
1938   .name = "ip4-arp",
1939   .vector_size = sizeof (u32),
1940   .format_trace = format_ip4_forward_next_trace,
1941   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1942   .error_strings = ip4_arp_error_strings,
1943   .n_next_nodes = IP4_ARP_N_NEXT,
1944   .next_nodes =
1945   {
1946     [IP4_ARP_NEXT_DROP] = "error-drop",
1947   },
1948 };
1949
1950 VLIB_REGISTER_NODE (ip4_glean_node) =
1951 {
1952   .name = "ip4-glean",
1953   .vector_size = sizeof (u32),
1954   .format_trace = format_ip4_forward_next_trace,
1955   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1956   .error_strings = ip4_arp_error_strings,
1957   .n_next_nodes = IP4_ARP_N_NEXT,
1958   .next_nodes = {
1959   [IP4_ARP_NEXT_DROP] = "error-drop",
1960   },
1961 };
1962 /* *INDENT-ON* */
1963
1964 #define foreach_notrace_ip4_arp_error           \
1965 _(THROTTLED)                                    \
1966 _(RESOLVED)                                     \
1967 _(NO_BUFFERS)                                   \
1968 _(REQUEST_SENT)                                 \
1969 _(NON_ARP_ADJ)                                  \
1970 _(NO_SOURCE_ADDRESS)
1971
1972 static clib_error_t *
1973 arp_notrace_init (vlib_main_t * vm)
1974 {
1975   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1976
1977   /* don't trace ARP request packets */
1978 #define _(a)                                    \
1979     vnet_pcap_drop_trace_filter_add_del         \
1980         (rt->errors[IP4_ARP_ERROR_##a],         \
1981          1 /* is_add */);
1982   foreach_notrace_ip4_arp_error;
1983 #undef _
1984   return 0;
1985 }
1986
1987 VLIB_INIT_FUNCTION (arp_notrace_init);
1988
1989
1990 #ifndef CLIB_MARCH_VARIANT
1991 /* Send an ARP request to see if given destination is reachable on given interface. */
1992 clib_error_t *
1993 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1994                     u8 refresh)
1995 {
1996   vnet_main_t *vnm = vnet_get_main ();
1997   ip4_main_t *im = &ip4_main;
1998   ethernet_arp_header_t *h;
1999   ip4_address_t *src;
2000   ip_interface_address_t *ia;
2001   ip_adjacency_t *adj;
2002   vnet_hw_interface_t *hi;
2003   vnet_sw_interface_t *si;
2004   vlib_buffer_t *b;
2005   adj_index_t ai;
2006   u32 bi = 0;
2007   u8 unicast_rewrite = 0;
2008
2009   si = vnet_get_sw_interface (vnm, sw_if_index);
2010
2011   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2012     {
2013       return clib_error_return (0, "%U: interface %U down",
2014                                 format_ip4_address, dst,
2015                                 format_vnet_sw_if_index_name, vnm,
2016                                 sw_if_index);
2017     }
2018
2019   src =
2020     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2021   if (!src)
2022     {
2023       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2024       return clib_error_return
2025         (0,
2026          "no matching interface address for destination %U (interface %U)",
2027          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2028          sw_if_index);
2029     }
2030
2031   h = vlib_packet_template_get_packet (vm,
2032                                        &im->ip4_arp_request_packet_template,
2033                                        &bi);
2034
2035   if (!h)
2036     return clib_error_return (0, "ARP request packet allocation failed");
2037
2038   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2039   if (PREDICT_FALSE (!hi->hw_address))
2040     {
2041       return clib_error_return (0, "%U: interface %U do not support ip probe",
2042                                 format_ip4_address, dst,
2043                                 format_vnet_sw_if_index_name, vnm,
2044                                 sw_if_index);
2045     }
2046
2047   mac_address_from_bytes (&h->ip4_over_ethernet[0].mac, hi->hw_address);
2048
2049   h->ip4_over_ethernet[0].ip4 = src[0];
2050   h->ip4_over_ethernet[1].ip4 = dst[0];
2051
2052   b = vlib_get_buffer (vm, bi);
2053   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2054     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2055
2056   ip46_address_t nh = {
2057     .ip4 = *dst,
2058   };
2059
2060   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2061                             VNET_LINK_IP4, &nh, sw_if_index);
2062   adj = adj_get (ai);
2063
2064   /* Peer has been previously resolved, retrieve glean adj instead */
2065   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2066     {
2067       if (refresh)
2068         unicast_rewrite = 1;
2069       else
2070         {
2071           adj_unlock (ai);
2072           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2073                                       VNET_LINK_IP4, sw_if_index, &nh);
2074           adj = adj_get (ai);
2075         }
2076     }
2077
2078   /* Add encapsulation string for software interface (e.g. ethernet header). */
2079   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2080   if (unicast_rewrite)
2081     {
2082       u16 *etype = vlib_buffer_get_current (b) - 2;
2083       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2084     }
2085   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2086
2087   {
2088     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2089     u32 *to_next = vlib_frame_vector_args (f);
2090     to_next[0] = bi;
2091     f->n_vectors = 1;
2092     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2093   }
2094
2095   adj_unlock (ai);
2096   return /* no error */ 0;
2097 }
2098 #endif
2099
2100 typedef enum
2101 {
2102   IP4_REWRITE_NEXT_DROP,
2103   IP4_REWRITE_NEXT_ICMP_ERROR,
2104   IP4_REWRITE_NEXT_FRAGMENT,
2105   IP4_REWRITE_N_NEXT            /* Last */
2106 } ip4_rewrite_next_t;
2107
2108 /**
2109  * This bits of an IPv4 address to mask to construct a multicast
2110  * MAC address
2111  */
2112 #if CLIB_ARCH_IS_BIG_ENDIAN
2113 #define IP4_MCAST_ADDR_MASK 0x007fffff
2114 #else
2115 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2116 #endif
2117
2118 always_inline void
2119 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2120                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2121 {
2122   if (packet_len > adj_packet_bytes)
2123     {
2124       *error = IP4_ERROR_MTU_EXCEEDED;
2125       if (df)
2126         {
2127           icmp4_error_set_vnet_buffer
2128             (b, ICMP4_destination_unreachable,
2129              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2130              adj_packet_bytes);
2131           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2132         }
2133       else
2134         {
2135           /* IP fragmentation */
2136           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2137                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2138           *next = IP4_REWRITE_NEXT_FRAGMENT;
2139         }
2140     }
2141 }
2142
2143 /* Decrement TTL & update checksum.
2144    Works either endian, so no need for byte swap. */
2145 static_always_inline void
2146 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2147                             u32 * error)
2148 {
2149   i32 ttl;
2150   u32 checksum;
2151   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2152     {
2153       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2154       return;
2155     }
2156
2157   ttl = ip->ttl;
2158
2159   /* Input node should have reject packets with ttl 0. */
2160   ASSERT (ip->ttl > 0);
2161
2162   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2163   checksum += checksum >= 0xffff;
2164
2165   ip->checksum = checksum;
2166   ttl -= 1;
2167   ip->ttl = ttl;
2168
2169   /*
2170    * If the ttl drops below 1 when forwarding, generate
2171    * an ICMP response.
2172    */
2173   if (PREDICT_FALSE (ttl <= 0))
2174     {
2175       *error = IP4_ERROR_TIME_EXPIRED;
2176       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2177       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2178                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2179                                    0);
2180       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2181     }
2182
2183   /* Verify checksum. */
2184   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2185           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2186 }
2187
2188
2189 always_inline uword
2190 ip4_rewrite_inline_with_gso (vlib_main_t * vm,
2191                              vlib_node_runtime_t * node,
2192                              vlib_frame_t * frame,
2193                              int do_counters, int is_midchain, int is_mcast,
2194                              int do_gso)
2195 {
2196   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2197   u32 *from = vlib_frame_vector_args (frame);
2198   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2199   u16 nexts[VLIB_FRAME_SIZE], *next;
2200   u32 n_left_from;
2201   vlib_node_runtime_t *error_node =
2202     vlib_node_get_runtime (vm, ip4_input_node.index);
2203
2204   n_left_from = frame->n_vectors;
2205   u32 thread_index = vm->thread_index;
2206
2207   vlib_get_buffers (vm, from, bufs, n_left_from);
2208   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2209
2210   if (n_left_from >= 6)
2211     {
2212       int i;
2213       for (i = 2; i < 6; i++)
2214         vlib_prefetch_buffer_header (bufs[i], LOAD);
2215     }
2216
2217   next = nexts;
2218   b = bufs;
2219   while (n_left_from >= 8)
2220     {
2221       ip_adjacency_t *adj0, *adj1;
2222       ip4_header_t *ip0, *ip1;
2223       u32 rw_len0, error0, adj_index0;
2224       u32 rw_len1, error1, adj_index1;
2225       u32 tx_sw_if_index0, tx_sw_if_index1;
2226       u8 *p;
2227
2228       vlib_prefetch_buffer_header (b[6], LOAD);
2229       vlib_prefetch_buffer_header (b[7], LOAD);
2230
2231       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2232       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2233
2234       /*
2235        * pre-fetch the per-adjacency counters
2236        */
2237       if (do_counters)
2238         {
2239           vlib_prefetch_combined_counter (&adjacency_counters,
2240                                           thread_index, adj_index0);
2241           vlib_prefetch_combined_counter (&adjacency_counters,
2242                                           thread_index, adj_index1);
2243         }
2244
2245       ip0 = vlib_buffer_get_current (b[0]);
2246       ip1 = vlib_buffer_get_current (b[1]);
2247
2248       error0 = error1 = IP4_ERROR_NONE;
2249
2250       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2251       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2252
2253       /* Rewrite packet header and updates lengths. */
2254       adj0 = adj_get (adj_index0);
2255       adj1 = adj_get (adj_index1);
2256
2257       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2258       rw_len0 = adj0[0].rewrite_header.data_bytes;
2259       rw_len1 = adj1[0].rewrite_header.data_bytes;
2260       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2261       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2262
2263       p = vlib_buffer_get_current (b[2]);
2264       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2265       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2266
2267       p = vlib_buffer_get_current (b[3]);
2268       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2269       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2270
2271       /* Check MTU of outgoing interface. */
2272       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2273       u16 ip1_len = clib_net_to_host_u16 (ip1->length);
2274
2275       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2276         ip0_len = gso_mtu_sz (b[0]);
2277       if (do_gso && (b[1]->flags & VNET_BUFFER_F_GSO))
2278         ip1_len = gso_mtu_sz (b[1]);
2279
2280       ip4_mtu_check (b[0], ip0_len,
2281                      adj0[0].rewrite_header.max_l3_packet_bytes,
2282                      ip0->flags_and_fragment_offset &
2283                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2284                      next + 0, &error0);
2285       ip4_mtu_check (b[1], ip1_len,
2286                      adj1[0].rewrite_header.max_l3_packet_bytes,
2287                      ip1->flags_and_fragment_offset &
2288                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2289                      next + 1, &error1);
2290
2291       if (is_mcast)
2292         {
2293           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2294                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2295                     IP4_ERROR_SAME_INTERFACE : error0);
2296           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2297                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2298                     IP4_ERROR_SAME_INTERFACE : error1);
2299         }
2300
2301       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2302        * to see the IP header */
2303       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2304         {
2305           u32 next_index = adj0[0].rewrite_header.next_index;
2306           b[0]->current_data -= rw_len0;
2307           b[0]->current_length += rw_len0;
2308           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2309           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2310
2311           if (PREDICT_FALSE
2312               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2313             vnet_feature_arc_start (lm->output_feature_arc_index,
2314                                     tx_sw_if_index0, &next_index, b[0]);
2315           next[0] = next_index;
2316         }
2317       else
2318         {
2319           b[0]->error = error_node->errors[error0];
2320         }
2321       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2322         {
2323           u32 next_index = adj1[0].rewrite_header.next_index;
2324           b[1]->current_data -= rw_len1;
2325           b[1]->current_length += rw_len1;
2326
2327           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2328           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2329
2330           if (PREDICT_FALSE
2331               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2332             vnet_feature_arc_start (lm->output_feature_arc_index,
2333                                     tx_sw_if_index1, &next_index, b[1]);
2334           next[1] = next_index;
2335         }
2336       else
2337         {
2338           b[1]->error = error_node->errors[error1];
2339         }
2340       if (is_midchain)
2341         {
2342           calc_checksums (vm, b[0]);
2343           calc_checksums (vm, b[1]);
2344         }
2345       /* Guess we are only writing on simple Ethernet header. */
2346       vnet_rewrite_two_headers (adj0[0], adj1[0],
2347                                 ip0, ip1, sizeof (ethernet_header_t));
2348
2349       /*
2350        * Bump the per-adjacency counters
2351        */
2352       if (do_counters)
2353         {
2354           vlib_increment_combined_counter
2355             (&adjacency_counters,
2356              thread_index,
2357              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2358
2359           vlib_increment_combined_counter
2360             (&adjacency_counters,
2361              thread_index,
2362              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2363         }
2364
2365       if (is_midchain)
2366         {
2367           if (adj0->sub_type.midchain.fixup_func)
2368             adj0->sub_type.midchain.fixup_func
2369               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2370           if (adj1->sub_type.midchain.fixup_func)
2371             adj1->sub_type.midchain.fixup_func
2372               (vm, adj1, b[1], adj1->sub_type.midchain.fixup_data);
2373         }
2374
2375       if (is_mcast)
2376         {
2377           /*
2378            * copy bytes from the IP address into the MAC rewrite
2379            */
2380           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2381                                       adj0->rewrite_header.dst_mcast_offset,
2382                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2383           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2384                                       adj1->rewrite_header.dst_mcast_offset,
2385                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2386         }
2387
2388       next += 2;
2389       b += 2;
2390       n_left_from -= 2;
2391     }
2392
2393   while (n_left_from > 0)
2394     {
2395       ip_adjacency_t *adj0;
2396       ip4_header_t *ip0;
2397       u32 rw_len0, adj_index0, error0;
2398       u32 tx_sw_if_index0;
2399
2400       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2401
2402       adj0 = adj_get (adj_index0);
2403
2404       if (do_counters)
2405         vlib_prefetch_combined_counter (&adjacency_counters,
2406                                         thread_index, adj_index0);
2407
2408       ip0 = vlib_buffer_get_current (b[0]);
2409
2410       error0 = IP4_ERROR_NONE;
2411
2412       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2413
2414
2415       /* Update packet buffer attributes/set output interface. */
2416       rw_len0 = adj0[0].rewrite_header.data_bytes;
2417       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2418
2419       /* Check MTU of outgoing interface. */
2420       u16 ip0_len = clib_net_to_host_u16 (ip0->length);
2421       if (do_gso && (b[0]->flags & VNET_BUFFER_F_GSO))
2422         ip0_len = gso_mtu_sz (b[0]);
2423
2424       ip4_mtu_check (b[0], ip0_len,
2425                      adj0[0].rewrite_header.max_l3_packet_bytes,
2426                      ip0->flags_and_fragment_offset &
2427                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2428                      next + 0, &error0);
2429
2430       if (is_mcast)
2431         {
2432           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2433                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2434                     IP4_ERROR_SAME_INTERFACE : error0);
2435         }
2436
2437       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2438        * to see the IP header */
2439       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2440         {
2441           u32 next_index = adj0[0].rewrite_header.next_index;
2442           b[0]->current_data -= rw_len0;
2443           b[0]->current_length += rw_len0;
2444           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2445           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2446
2447           if (PREDICT_FALSE
2448               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2449             vnet_feature_arc_start (lm->output_feature_arc_index,
2450                                     tx_sw_if_index0, &next_index, b[0]);
2451           next[0] = next_index;
2452         }
2453       else
2454         {
2455           b[0]->error = error_node->errors[error0];
2456         }
2457       if (is_midchain)
2458         {
2459           calc_checksums (vm, b[0]);
2460         }
2461       /* Guess we are only writing on simple Ethernet header. */
2462       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2463
2464       if (do_counters)
2465         vlib_increment_combined_counter
2466           (&adjacency_counters,
2467            thread_index, adj_index0, 1,
2468            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2469
2470       if (is_midchain)
2471         {
2472           if (adj0->sub_type.midchain.fixup_func)
2473             adj0->sub_type.midchain.fixup_func
2474               (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2475         }
2476
2477       if (is_mcast)
2478         {
2479           /*
2480            * copy bytes from the IP address into the MAC rewrite
2481            */
2482           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2483                                       adj0->rewrite_header.dst_mcast_offset,
2484                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2485         }
2486
2487       next += 1;
2488       b += 1;
2489       n_left_from -= 1;
2490     }
2491
2492
2493   /* Need to do trace after rewrites to pick up new packet data. */
2494   if (node->flags & VLIB_NODE_FLAG_TRACE)
2495     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2496
2497   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2498   return frame->n_vectors;
2499 }
2500
2501 always_inline uword
2502 ip4_rewrite_inline (vlib_main_t * vm,
2503                     vlib_node_runtime_t * node,
2504                     vlib_frame_t * frame,
2505                     int do_counters, int is_midchain, int is_mcast)
2506 {
2507   vnet_main_t *vnm = vnet_get_main ();
2508   if (PREDICT_FALSE (vnm->interface_main.gso_interface_count > 0))
2509     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2510                                         is_midchain, is_mcast,
2511                                         1 /* do_gso */ );
2512   else
2513     return ip4_rewrite_inline_with_gso (vm, node, frame, do_counters,
2514                                         is_midchain, is_mcast,
2515                                         0 /* no do_gso */ );
2516 }
2517
2518
2519 /** @brief IPv4 rewrite node.
2520     @node ip4-rewrite
2521
2522     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2523     header checksum, fetch the ip adjacency, check the outbound mtu,
2524     apply the adjacency rewrite, and send pkts to the adjacency
2525     rewrite header's rewrite_next_index.
2526
2527     @param vm vlib_main_t corresponding to the current thread
2528     @param node vlib_node_runtime_t
2529     @param frame vlib_frame_t whose contents should be dispatched
2530
2531     @par Graph mechanics: buffer metadata, next index usage
2532
2533     @em Uses:
2534     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2535         - the rewrite adjacency index
2536     - <code>adj->lookup_next_index</code>
2537         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2538           the packet will be dropped.
2539     - <code>adj->rewrite_header</code>
2540         - Rewrite string length, rewrite string, next_index
2541
2542     @em Sets:
2543     - <code>b->current_data, b->current_length</code>
2544         - Updated net of applying the rewrite string
2545
2546     <em>Next Indices:</em>
2547     - <code> adj->rewrite_header.next_index </code>
2548       or @c ip4-drop
2549 */
2550
2551 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2552                                  vlib_frame_t * frame)
2553 {
2554   if (adj_are_counters_enabled ())
2555     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2556   else
2557     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2558 }
2559
2560 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2561                                        vlib_node_runtime_t * node,
2562                                        vlib_frame_t * frame)
2563 {
2564   if (adj_are_counters_enabled ())
2565     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2566   else
2567     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2568 }
2569
2570 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2571                                   vlib_node_runtime_t * node,
2572                                   vlib_frame_t * frame)
2573 {
2574   if (adj_are_counters_enabled ())
2575     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2576   else
2577     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2578 }
2579
2580 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2581                                        vlib_node_runtime_t * node,
2582                                        vlib_frame_t * frame)
2583 {
2584   if (adj_are_counters_enabled ())
2585     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2586   else
2587     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2588 }
2589
2590 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2591                                         vlib_node_runtime_t * node,
2592                                         vlib_frame_t * frame)
2593 {
2594   if (adj_are_counters_enabled ())
2595     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2596   else
2597     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2598 }
2599
2600 /* *INDENT-OFF* */
2601 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2602   .name = "ip4-rewrite",
2603   .vector_size = sizeof (u32),
2604
2605   .format_trace = format_ip4_rewrite_trace,
2606
2607   .n_next_nodes = IP4_REWRITE_N_NEXT,
2608   .next_nodes = {
2609     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2610     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2611     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2612   },
2613 };
2614
2615 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2616   .name = "ip4-rewrite-bcast",
2617   .vector_size = sizeof (u32),
2618
2619   .format_trace = format_ip4_rewrite_trace,
2620   .sibling_of = "ip4-rewrite",
2621 };
2622
2623 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2624   .name = "ip4-rewrite-mcast",
2625   .vector_size = sizeof (u32),
2626
2627   .format_trace = format_ip4_rewrite_trace,
2628   .sibling_of = "ip4-rewrite",
2629 };
2630
2631 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2632   .name = "ip4-mcast-midchain",
2633   .vector_size = sizeof (u32),
2634
2635   .format_trace = format_ip4_rewrite_trace,
2636   .sibling_of = "ip4-rewrite",
2637 };
2638
2639 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2640   .name = "ip4-midchain",
2641   .vector_size = sizeof (u32),
2642   .format_trace = format_ip4_forward_next_trace,
2643   .sibling_of =  "ip4-rewrite",
2644 };
2645 /* *INDENT-ON */
2646
2647 static int
2648 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2649 {
2650   ip4_fib_mtrie_t *mtrie0;
2651   ip4_fib_mtrie_leaf_t leaf0;
2652   u32 lbi0;
2653
2654   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2655
2656   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2657   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2658   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2659
2660   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2661
2662   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2663 }
2664
2665 static clib_error_t *
2666 test_lookup_command_fn (vlib_main_t * vm,
2667                         unformat_input_t * input, vlib_cli_command_t * cmd)
2668 {
2669   ip4_fib_t *fib;
2670   u32 table_id = 0;
2671   f64 count = 1;
2672   u32 n;
2673   int i;
2674   ip4_address_t ip4_base_address;
2675   u64 errors = 0;
2676
2677   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2678     {
2679       if (unformat (input, "table %d", &table_id))
2680         {
2681           /* Make sure the entry exists. */
2682           fib = ip4_fib_get (table_id);
2683           if ((fib) && (fib->index != table_id))
2684             return clib_error_return (0, "<fib-index> %d does not exist",
2685                                       table_id);
2686         }
2687       else if (unformat (input, "count %f", &count))
2688         ;
2689
2690       else if (unformat (input, "%U",
2691                          unformat_ip4_address, &ip4_base_address))
2692         ;
2693       else
2694         return clib_error_return (0, "unknown input `%U'",
2695                                   format_unformat_error, input);
2696     }
2697
2698   n = count;
2699
2700   for (i = 0; i < n; i++)
2701     {
2702       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2703         errors++;
2704
2705       ip4_base_address.as_u32 =
2706         clib_host_to_net_u32 (1 +
2707                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2708     }
2709
2710   if (errors)
2711     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2712   else
2713     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2714
2715   return 0;
2716 }
2717
2718 /*?
2719  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2720  * given FIB table to determine if there is a conflict with the
2721  * adjacency table. The fib-id can be determined by using the
2722  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2723  * of 0 is used.
2724  *
2725  * @todo This command uses fib-id, other commands use table-id (not
2726  * just a name, they are different indexes). Would like to change this
2727  * to table-id for consistency.
2728  *
2729  * @cliexpar
2730  * Example of how to run the test lookup command:
2731  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2732  * No errors in 2 lookups
2733  * @cliexend
2734 ?*/
2735 /* *INDENT-OFF* */
2736 VLIB_CLI_COMMAND (lookup_test_command, static) =
2737 {
2738   .path = "test lookup",
2739   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2740   .function = test_lookup_command_fn,
2741 };
2742 /* *INDENT-ON* */
2743
2744 #ifndef CLIB_MARCH_VARIANT
2745 int
2746 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2747 {
2748   u32 fib_index;
2749
2750   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2751
2752   if (~0 == fib_index)
2753     return VNET_API_ERROR_NO_SUCH_FIB;
2754
2755   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2756                                   flow_hash_config);
2757
2758   return 0;
2759 }
2760 #endif
2761
2762 static clib_error_t *
2763 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2764                              unformat_input_t * input,
2765                              vlib_cli_command_t * cmd)
2766 {
2767   int matched = 0;
2768   u32 table_id = 0;
2769   u32 flow_hash_config = 0;
2770   int rv;
2771
2772   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2773     {
2774       if (unformat (input, "table %d", &table_id))
2775         matched = 1;
2776 #define _(a,v) \
2777     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2778       foreach_flow_hash_bit
2779 #undef _
2780         else
2781         break;
2782     }
2783
2784   if (matched == 0)
2785     return clib_error_return (0, "unknown input `%U'",
2786                               format_unformat_error, input);
2787
2788   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2789   switch (rv)
2790     {
2791     case 0:
2792       break;
2793
2794     case VNET_API_ERROR_NO_SUCH_FIB:
2795       return clib_error_return (0, "no such FIB table %d", table_id);
2796
2797     default:
2798       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2799       break;
2800     }
2801
2802   return 0;
2803 }
2804
2805 /*?
2806  * Configure the set of IPv4 fields used by the flow hash.
2807  *
2808  * @cliexpar
2809  * Example of how to set the flow hash on a given table:
2810  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2811  * Example of display the configured flow hash:
2812  * @cliexstart{show ip fib}
2813  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2814  * 0.0.0.0/0
2815  *   unicast-ip4-chain
2816  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2817  *     [0] [@0]: dpo-drop ip6
2818  * 0.0.0.0/32
2819  *   unicast-ip4-chain
2820  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2821  *     [0] [@0]: dpo-drop ip6
2822  * 224.0.0.0/8
2823  *   unicast-ip4-chain
2824  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2825  *     [0] [@0]: dpo-drop ip6
2826  * 6.0.1.2/32
2827  *   unicast-ip4-chain
2828  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2829  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2830  * 7.0.0.1/32
2831  *   unicast-ip4-chain
2832  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2833  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2834  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2835  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2836  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2837  * 240.0.0.0/8
2838  *   unicast-ip4-chain
2839  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2840  *     [0] [@0]: dpo-drop ip6
2841  * 255.255.255.255/32
2842  *   unicast-ip4-chain
2843  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2844  *     [0] [@0]: dpo-drop ip6
2845  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2846  * 0.0.0.0/0
2847  *   unicast-ip4-chain
2848  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2849  *     [0] [@0]: dpo-drop ip6
2850  * 0.0.0.0/32
2851  *   unicast-ip4-chain
2852  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2853  *     [0] [@0]: dpo-drop ip6
2854  * 172.16.1.0/24
2855  *   unicast-ip4-chain
2856  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2857  *     [0] [@4]: ipv4-glean: af_packet0
2858  * 172.16.1.1/32
2859  *   unicast-ip4-chain
2860  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2861  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2862  * 172.16.1.2/32
2863  *   unicast-ip4-chain
2864  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2865  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2866  * 172.16.2.0/24
2867  *   unicast-ip4-chain
2868  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2869  *     [0] [@4]: ipv4-glean: af_packet1
2870  * 172.16.2.1/32
2871  *   unicast-ip4-chain
2872  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2873  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2874  * 224.0.0.0/8
2875  *   unicast-ip4-chain
2876  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2877  *     [0] [@0]: dpo-drop ip6
2878  * 240.0.0.0/8
2879  *   unicast-ip4-chain
2880  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2881  *     [0] [@0]: dpo-drop ip6
2882  * 255.255.255.255/32
2883  *   unicast-ip4-chain
2884  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2885  *     [0] [@0]: dpo-drop ip6
2886  * @cliexend
2887 ?*/
2888 /* *INDENT-OFF* */
2889 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2890 {
2891   .path = "set ip flow-hash",
2892   .short_help =
2893   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2894   .function = set_ip_flow_hash_command_fn,
2895 };
2896 /* *INDENT-ON* */
2897
2898 #ifndef CLIB_MARCH_VARIANT
2899 int
2900 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2901                              u32 table_index)
2902 {
2903   vnet_main_t *vnm = vnet_get_main ();
2904   vnet_interface_main_t *im = &vnm->interface_main;
2905   ip4_main_t *ipm = &ip4_main;
2906   ip_lookup_main_t *lm = &ipm->lookup_main;
2907   vnet_classify_main_t *cm = &vnet_classify_main;
2908   ip4_address_t *if_addr;
2909
2910   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2911     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2912
2913   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2914     return VNET_API_ERROR_NO_SUCH_ENTRY;
2915
2916   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2917   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2918
2919   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2920
2921   if (NULL != if_addr)
2922     {
2923       fib_prefix_t pfx = {
2924         .fp_len = 32,
2925         .fp_proto = FIB_PROTOCOL_IP4,
2926         .fp_addr.ip4 = *if_addr,
2927       };
2928       u32 fib_index;
2929
2930       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2931                                                        sw_if_index);
2932
2933
2934       if (table_index != (u32) ~ 0)
2935         {
2936           dpo_id_t dpo = DPO_INVALID;
2937
2938           dpo_set (&dpo,
2939                    DPO_CLASSIFY,
2940                    DPO_PROTO_IP4,
2941                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2942
2943           fib_table_entry_special_dpo_add (fib_index,
2944                                            &pfx,
2945                                            FIB_SOURCE_CLASSIFY,
2946                                            FIB_ENTRY_FLAG_NONE, &dpo);
2947           dpo_reset (&dpo);
2948         }
2949       else
2950         {
2951           fib_table_entry_special_remove (fib_index,
2952                                           &pfx, FIB_SOURCE_CLASSIFY);
2953         }
2954     }
2955
2956   return 0;
2957 }
2958 #endif
2959
2960 static clib_error_t *
2961 set_ip_classify_command_fn (vlib_main_t * vm,
2962                             unformat_input_t * input,
2963                             vlib_cli_command_t * cmd)
2964 {
2965   u32 table_index = ~0;
2966   int table_index_set = 0;
2967   u32 sw_if_index = ~0;
2968   int rv;
2969
2970   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2971     {
2972       if (unformat (input, "table-index %d", &table_index))
2973         table_index_set = 1;
2974       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2975                          vnet_get_main (), &sw_if_index))
2976         ;
2977       else
2978         break;
2979     }
2980
2981   if (table_index_set == 0)
2982     return clib_error_return (0, "classify table-index must be specified");
2983
2984   if (sw_if_index == ~0)
2985     return clib_error_return (0, "interface / subif must be specified");
2986
2987   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2988
2989   switch (rv)
2990     {
2991     case 0:
2992       break;
2993
2994     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2995       return clib_error_return (0, "No such interface");
2996
2997     case VNET_API_ERROR_NO_SUCH_ENTRY:
2998       return clib_error_return (0, "No such classifier table");
2999     }
3000   return 0;
3001 }
3002
3003 /*?
3004  * Assign a classification table to an interface. The classification
3005  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3006  * commands. Once the table is create, use this command to filter packets
3007  * on an interface.
3008  *
3009  * @cliexpar
3010  * Example of how to assign a classification table to an interface:
3011  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3012 ?*/
3013 /* *INDENT-OFF* */
3014 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3015 {
3016     .path = "set ip classify",
3017     .short_help =
3018     "set ip classify intfc <interface> table-index <classify-idx>",
3019     .function = set_ip_classify_command_fn,
3020 };
3021 /* *INDENT-ON* */
3022
3023 static clib_error_t *
3024 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3025 {
3026   ip4_main_t *im = &ip4_main;
3027   uword heapsize = 0;
3028
3029   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3030     {
3031       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3032         ;
3033       else
3034         return clib_error_return (0,
3035                                   "invalid heap-size parameter `%U'",
3036                                   format_unformat_error, input);
3037     }
3038
3039   im->mtrie_heap_size = heapsize;
3040
3041   return 0;
3042 }
3043
3044 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3045
3046 /*
3047  * fd.io coding-style-patch-verification: ON
3048  *
3049  * Local Variables:
3050  * eval: (c-set-style "gnu")
3051  * End:
3052  */