IP load-balance; trace at the end of the node so the flow hash used is displayed
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58
59 /** @brief IPv4 lookup node.
60     @node ip4-lookup
61
62     This is the main IPv4 lookup dispatch node.
63
64     @param vm vlib_main_t corresponding to the current thread
65     @param node vlib_node_runtime_t
66     @param frame vlib_frame_t whose contents should be dispatched
67
68     @par Graph mechanics: buffer metadata, next index usage
69
70     @em Uses:
71     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
72         - Indicates the @c sw_if_index value of the interface that the
73           packet was received on.
74     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
75         - When the value is @c ~0 then the node performs a longest prefix
76           match (LPM) for the packet destination address in the FIB attached
77           to the receive interface.
78         - Otherwise perform LPM for the packet destination address in the
79           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
80           value (0, 1, ...) and not a VRF id.
81
82     @em Sets:
83     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
84         - The lookup result adjacency index.
85
86     <em>Next Index:</em>
87     - Dispatches the packet to the node index found in
88       ip_adjacency_t @c adj->lookup_next_index
89       (where @c adj is the lookup result adjacency).
90 */
91 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
92                                 vlib_frame_t * frame)
93 {
94   return ip4_lookup_inline (vm, node, frame,
95                             /* lookup_for_responses_to_locally_received_packets */
96                             0);
97
98 }
99
100 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101
102 /* *INDENT-OFF* */
103 VLIB_REGISTER_NODE (ip4_lookup_node) =
104 {
105   .name = "ip4-lookup",
106   .vector_size = sizeof (u32),
107   .format_trace = format_ip4_lookup_trace,
108   .n_next_nodes = IP_LOOKUP_N_NEXT,
109   .next_nodes = IP4_LOOKUP_NEXT_NODES,
110 };
111 /* *INDENT-ON* */
112
113 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
114                                       vlib_node_runtime_t * node,
115                                       vlib_frame_t * frame)
116 {
117   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
118   u32 n_left_from, n_left_to_next, *from, *to_next;
119   ip_lookup_next_t next;
120   u32 thread_index = vm->thread_index;
121
122   from = vlib_frame_vector_args (frame);
123   n_left_from = frame->n_vectors;
124   next = node->cached_next_index;
125
126   while (n_left_from > 0)
127     {
128       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
129
130
131       while (n_left_from >= 4 && n_left_to_next >= 2)
132         {
133           ip_lookup_next_t next0, next1;
134           const load_balance_t *lb0, *lb1;
135           vlib_buffer_t *p0, *p1;
136           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
137           const ip4_header_t *ip0, *ip1;
138           const dpo_id_t *dpo0, *dpo1;
139
140           /* Prefetch next iteration. */
141           {
142             vlib_buffer_t *p2, *p3;
143
144             p2 = vlib_get_buffer (vm, from[2]);
145             p3 = vlib_get_buffer (vm, from[3]);
146
147             vlib_prefetch_buffer_header (p2, STORE);
148             vlib_prefetch_buffer_header (p3, STORE);
149
150             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
151             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
152           }
153
154           pi0 = to_next[0] = from[0];
155           pi1 = to_next[1] = from[1];
156
157           from += 2;
158           n_left_from -= 2;
159           to_next += 2;
160           n_left_to_next -= 2;
161
162           p0 = vlib_get_buffer (vm, pi0);
163           p1 = vlib_get_buffer (vm, pi1);
164
165           ip0 = vlib_buffer_get_current (p0);
166           ip1 = vlib_buffer_get_current (p1);
167           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
168           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
169
170           lb0 = load_balance_get (lbi0);
171           lb1 = load_balance_get (lbi1);
172
173           /*
174            * this node is for via FIBs we can re-use the hash value from the
175            * to node if present.
176            * We don't want to use the same hash value at each level in the recursion
177            * graph as that would lead to polarisation
178            */
179           hc0 = hc1 = 0;
180
181           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
182             {
183               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
184                 {
185                   hc0 = vnet_buffer (p0)->ip.flow_hash =
186                     vnet_buffer (p0)->ip.flow_hash >> 1;
187                 }
188               else
189                 {
190                   hc0 = vnet_buffer (p0)->ip.flow_hash =
191                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
192                 }
193               dpo0 = load_balance_get_fwd_bucket
194                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
195             }
196           else
197             {
198               dpo0 = load_balance_get_bucket_i (lb0, 0);
199             }
200           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
201             {
202               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
203                 {
204                   hc1 = vnet_buffer (p1)->ip.flow_hash =
205                     vnet_buffer (p1)->ip.flow_hash >> 1;
206                 }
207               else
208                 {
209                   hc1 = vnet_buffer (p1)->ip.flow_hash =
210                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
211                 }
212               dpo1 = load_balance_get_fwd_bucket
213                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
214             }
215           else
216             {
217               dpo1 = load_balance_get_bucket_i (lb1, 0);
218             }
219
220           next0 = dpo0->dpoi_next_node;
221           next1 = dpo1->dpoi_next_node;
222
223           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
224           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
225
226           vlib_increment_combined_counter
227             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
228           vlib_increment_combined_counter
229             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
230
231           vlib_validate_buffer_enqueue_x2 (vm, node, next,
232                                            to_next, n_left_to_next,
233                                            pi0, pi1, next0, next1);
234         }
235
236       while (n_left_from > 0 && n_left_to_next > 0)
237         {
238           ip_lookup_next_t next0;
239           const load_balance_t *lb0;
240           vlib_buffer_t *p0;
241           u32 pi0, lbi0, hc0;
242           const ip4_header_t *ip0;
243           const dpo_id_t *dpo0;
244
245           pi0 = from[0];
246           to_next[0] = pi0;
247           from += 1;
248           to_next += 1;
249           n_left_to_next -= 1;
250           n_left_from -= 1;
251
252           p0 = vlib_get_buffer (vm, pi0);
253
254           ip0 = vlib_buffer_get_current (p0);
255           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
256
257           lb0 = load_balance_get (lbi0);
258
259           hc0 = 0;
260           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
261             {
262               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
263                 {
264                   hc0 = vnet_buffer (p0)->ip.flow_hash =
265                     vnet_buffer (p0)->ip.flow_hash >> 1;
266                 }
267               else
268                 {
269                   hc0 = vnet_buffer (p0)->ip.flow_hash =
270                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
271                 }
272               dpo0 = load_balance_get_fwd_bucket
273                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
274             }
275           else
276             {
277               dpo0 = load_balance_get_bucket_i (lb0, 0);
278             }
279
280           next0 = dpo0->dpoi_next_node;
281           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
282
283           vlib_increment_combined_counter
284             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
285
286           vlib_validate_buffer_enqueue_x1 (vm, node, next,
287                                            to_next, n_left_to_next,
288                                            pi0, next0);
289         }
290
291       vlib_put_next_frame (vm, node, next, n_left_to_next);
292     }
293
294   if (node->flags & VLIB_NODE_FLAG_TRACE)
295     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
296
297   return frame->n_vectors;
298 }
299
300 /* *INDENT-OFF* */
301 VLIB_REGISTER_NODE (ip4_load_balance_node) =
302 {
303   .name = "ip4-load-balance",
304   .vector_size = sizeof (u32),
305   .sibling_of = "ip4-lookup",
306   .format_trace = format_ip4_lookup_trace,
307 };
308 /* *INDENT-ON* */
309
310 #ifndef CLIB_MARCH_VARIANT
311 /* get first interface address */
312 ip4_address_t *
313 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
314                              ip_interface_address_t ** result_ia)
315 {
316   ip_lookup_main_t *lm = &im->lookup_main;
317   ip_interface_address_t *ia = 0;
318   ip4_address_t *result = 0;
319
320   /* *INDENT-OFF* */
321   foreach_ip_interface_address
322     (lm, ia, sw_if_index,
323      1 /* honor unnumbered */ ,
324      ({
325        ip4_address_t * a =
326          ip_interface_address_get_address (lm, ia);
327        result = a;
328        break;
329      }));
330   /* *INDENT-OFF* */
331   if (result_ia)
332     *result_ia = result ? ia : 0;
333   return result;
334 }
335
336 static void
337 ip4_add_subnet_bcast_route (u32 fib_index,
338                             fib_prefix_t *pfx,
339                             u32 sw_if_index)
340 {
341   vnet_sw_interface_flags_t iflags;
342
343   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
344
345   fib_table_entry_special_remove(fib_index,
346                                  pfx,
347                                  FIB_SOURCE_INTERFACE);
348
349   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
350     {
351       fib_table_entry_update_one_path (fib_index, pfx,
352                                        FIB_SOURCE_INTERFACE,
353                                        FIB_ENTRY_FLAG_NONE,
354                                        DPO_PROTO_IP4,
355                                        /* No next-hop address */
356                                        &ADJ_BCAST_ADDR,
357                                        sw_if_index,
358                                        // invalid FIB index
359                                        ~0,
360                                        1,
361                                        // no out-label stack
362                                        NULL,
363                                        FIB_ROUTE_PATH_FLAG_NONE);
364     }
365   else
366     {
367         fib_table_entry_special_add(fib_index,
368                                     pfx,
369                                     FIB_SOURCE_INTERFACE,
370                                     (FIB_ENTRY_FLAG_DROP |
371                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
372     }
373 }
374
375 static void
376 ip4_add_interface_routes (u32 sw_if_index,
377                           ip4_main_t * im, u32 fib_index,
378                           ip_interface_address_t * a)
379 {
380   ip_lookup_main_t *lm = &im->lookup_main;
381   ip4_address_t *address = ip_interface_address_get_address (lm, a);
382   fib_prefix_t pfx = {
383     .fp_len = a->address_length,
384     .fp_proto = FIB_PROTOCOL_IP4,
385     .fp_addr.ip4 = *address,
386   };
387
388   if (pfx.fp_len <= 30)
389     {
390       /* a /30 or shorter - add a glean for the network address */
391       fib_table_entry_update_one_path (fib_index, &pfx,
392                                        FIB_SOURCE_INTERFACE,
393                                        (FIB_ENTRY_FLAG_CONNECTED |
394                                         FIB_ENTRY_FLAG_ATTACHED),
395                                        DPO_PROTO_IP4,
396                                        /* No next-hop address */
397                                        NULL,
398                                        sw_if_index,
399                                        // invalid FIB index
400                                        ~0,
401                                        1,
402                                        // no out-label stack
403                                        NULL,
404                                        FIB_ROUTE_PATH_FLAG_NONE);
405
406       /* Add the two broadcast addresses as drop */
407       fib_prefix_t net_pfx = {
408         .fp_len = 32,
409         .fp_proto = FIB_PROTOCOL_IP4,
410         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
411       };
412       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
413         fib_table_entry_special_add(fib_index,
414                                     &net_pfx,
415                                     FIB_SOURCE_INTERFACE,
416                                     (FIB_ENTRY_FLAG_DROP |
417                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
418       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
419       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
420         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
421     }
422   else if (pfx.fp_len == 31)
423     {
424       u32 mask = clib_host_to_net_u32(1);
425       fib_prefix_t net_pfx = pfx;
426
427       net_pfx.fp_len = 32;
428       net_pfx.fp_addr.ip4.as_u32 ^= mask;
429
430       /* a /31 - add the other end as an attached host */
431       fib_table_entry_update_one_path (fib_index, &net_pfx,
432                                        FIB_SOURCE_INTERFACE,
433                                        (FIB_ENTRY_FLAG_ATTACHED),
434                                        DPO_PROTO_IP4,
435                                        &net_pfx.fp_addr,
436                                        sw_if_index,
437                                        // invalid FIB index
438                                        ~0,
439                                        1,
440                                        NULL,
441                                        FIB_ROUTE_PATH_FLAG_NONE);
442     }
443   pfx.fp_len = 32;
444
445   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
446     {
447       u32 classify_table_index =
448         lm->classify_table_index_by_sw_if_index[sw_if_index];
449       if (classify_table_index != (u32) ~ 0)
450         {
451           dpo_id_t dpo = DPO_INVALID;
452
453           dpo_set (&dpo,
454                    DPO_CLASSIFY,
455                    DPO_PROTO_IP4,
456                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
457
458           fib_table_entry_special_dpo_add (fib_index,
459                                            &pfx,
460                                            FIB_SOURCE_CLASSIFY,
461                                            FIB_ENTRY_FLAG_NONE, &dpo);
462           dpo_reset (&dpo);
463         }
464     }
465
466   fib_table_entry_update_one_path (fib_index, &pfx,
467                                    FIB_SOURCE_INTERFACE,
468                                    (FIB_ENTRY_FLAG_CONNECTED |
469                                     FIB_ENTRY_FLAG_LOCAL),
470                                    DPO_PROTO_IP4,
471                                    &pfx.fp_addr,
472                                    sw_if_index,
473                                    // invalid FIB index
474                                    ~0,
475                                    1, NULL,
476                                    FIB_ROUTE_PATH_FLAG_NONE);
477 }
478
479 static void
480 ip4_del_interface_routes (ip4_main_t * im,
481                           u32 fib_index,
482                           ip4_address_t * address, u32 address_length)
483 {
484   fib_prefix_t pfx = {
485     .fp_len = address_length,
486     .fp_proto = FIB_PROTOCOL_IP4,
487     .fp_addr.ip4 = *address,
488   };
489
490   if (pfx.fp_len <= 30)
491     {
492       fib_prefix_t net_pfx = {
493         .fp_len = 32,
494         .fp_proto = FIB_PROTOCOL_IP4,
495         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
496       };
497       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
498         fib_table_entry_special_remove(fib_index,
499                                        &net_pfx,
500                                        FIB_SOURCE_INTERFACE);
501       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
502       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
503         fib_table_entry_special_remove(fib_index,
504                                        &net_pfx,
505                                        FIB_SOURCE_INTERFACE);
506       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
507     }
508     else if (pfx.fp_len == 31)
509     {
510       u32 mask = clib_host_to_net_u32(1);
511       fib_prefix_t net_pfx = pfx;
512
513       net_pfx.fp_len = 32;
514       net_pfx.fp_addr.ip4.as_u32 ^= mask;
515
516       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
517     }
518
519   pfx.fp_len = 32;
520   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
521 }
522
523 void
524 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
525 {
526   ip4_main_t *im = &ip4_main;
527
528   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
529
530   /*
531    * enable/disable only on the 1<->0 transition
532    */
533   if (is_enable)
534     {
535       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
536         return;
537     }
538   else
539     {
540       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
541       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
542         return;
543     }
544   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
545                                !is_enable, 0, 0);
546
547
548   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
549                                sw_if_index, !is_enable, 0, 0);
550 }
551
552 static clib_error_t *
553 ip4_add_del_interface_address_internal (vlib_main_t * vm,
554                                         u32 sw_if_index,
555                                         ip4_address_t * address,
556                                         u32 address_length, u32 is_del)
557 {
558   vnet_main_t *vnm = vnet_get_main ();
559   ip4_main_t *im = &ip4_main;
560   ip_lookup_main_t *lm = &im->lookup_main;
561   clib_error_t *error = 0;
562   u32 if_address_index, elts_before;
563   ip4_address_fib_t ip4_af, *addr_fib = 0;
564
565   /* local0 interface doesn't support IP addressing  */
566   if (sw_if_index == 0)
567     {
568       return
569        clib_error_create ("local0 interface doesn't support IP addressing");
570     }
571
572   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
573   ip4_addr_fib_init (&ip4_af, address,
574                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
575   vec_add1 (addr_fib, ip4_af);
576
577   /*
578    * there is no support for adj-fib handling in the presence of overlapping
579    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
580    * most routers do.
581    */
582   /* *INDENT-OFF* */
583   if (!is_del)
584     {
585       /* When adding an address check that it does not conflict
586          with an existing address on any interface in this table. */
587       ip_interface_address_t *ia;
588       vnet_sw_interface_t *sif;
589
590       pool_foreach(sif, vnm->interface_main.sw_interfaces,
591       ({
592           if (im->fib_index_by_sw_if_index[sw_if_index] ==
593               im->fib_index_by_sw_if_index[sif->sw_if_index])
594             {
595               foreach_ip_interface_address
596                 (&im->lookup_main, ia, sif->sw_if_index,
597                  0 /* honor unnumbered */ ,
598                  ({
599                    ip4_address_t * x =
600                      ip_interface_address_get_address
601                      (&im->lookup_main, ia);
602                    if (ip4_destination_matches_route
603                        (im, address, x, ia->address_length) ||
604                        ip4_destination_matches_route (im,
605                                                       x,
606                                                       address,
607                                                       address_length))
608                      {
609                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
610
611                        return
612                          clib_error_create
613                          ("failed to add %U which conflicts with %U for interface %U",
614                           format_ip4_address_and_length, address,
615                           address_length,
616                           format_ip4_address_and_length, x,
617                           ia->address_length,
618                           format_vnet_sw_if_index_name, vnm,
619                           sif->sw_if_index);
620                      }
621                  }));
622             }
623       }));
624     }
625   /* *INDENT-ON* */
626
627   elts_before = pool_elts (lm->if_address_pool);
628
629   error = ip_interface_address_add_del
630     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
631   if (error)
632     goto done;
633
634   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
635
636   if (is_del)
637     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
638   else
639     ip4_add_interface_routes (sw_if_index,
640                               im, ip4_af.fib_index,
641                               pool_elt_at_index
642                               (lm->if_address_pool, if_address_index));
643
644   /* If pool did not grow/shrink: add duplicate address. */
645   if (elts_before != pool_elts (lm->if_address_pool))
646     {
647       ip4_add_del_interface_address_callback_t *cb;
648       vec_foreach (cb, im->add_del_interface_address_callbacks)
649         cb->function (im, cb->function_opaque, sw_if_index,
650                       address, address_length, if_address_index, is_del);
651     }
652
653 done:
654   vec_free (addr_fib);
655   return error;
656 }
657
658 clib_error_t *
659 ip4_add_del_interface_address (vlib_main_t * vm,
660                                u32 sw_if_index,
661                                ip4_address_t * address,
662                                u32 address_length, u32 is_del)
663 {
664   return ip4_add_del_interface_address_internal
665     (vm, sw_if_index, address, address_length, is_del);
666 }
667
668 void
669 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
670 {
671   ip_interface_address_t *ia;
672   ip4_main_t *im;
673
674   im = &ip4_main;
675
676   /*
677    * when directed broadcast is enabled, the subnet braodcast route will forward
678    * packets using an adjacency with a broadcast MAC. otherwise it drops
679    */
680   /* *INDENT-OFF* */
681   foreach_ip_interface_address(&im->lookup_main, ia,
682                                sw_if_index, 0,
683      ({
684        if (ia->address_length <= 30)
685          {
686            ip4_address_t *ipa;
687
688            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
689
690            fib_prefix_t pfx = {
691              .fp_len = 32,
692              .fp_proto = FIB_PROTOCOL_IP4,
693              .fp_addr = {
694                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
695              },
696            };
697
698            ip4_add_subnet_bcast_route
699              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
700                                                   sw_if_index),
701               &pfx, sw_if_index);
702          }
703      }));
704   /* *INDENT-ON* */
705 }
706 #endif
707
708 /* Built-in ip4 unicast rx feature path definition */
709 /* *INDENT-OFF* */
710 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
711 {
712   .arc_name = "ip4-unicast",
713   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
714   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
715 };
716
717 VNET_FEATURE_INIT (ip4_flow_classify, static) =
718 {
719   .arc_name = "ip4-unicast",
720   .node_name = "ip4-flow-classify",
721   .runs_before = VNET_FEATURES ("ip4-inacl"),
722 };
723
724 VNET_FEATURE_INIT (ip4_inacl, static) =
725 {
726   .arc_name = "ip4-unicast",
727   .node_name = "ip4-inacl",
728   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
729 };
730
731 VNET_FEATURE_INIT (ip4_source_check_1, static) =
732 {
733   .arc_name = "ip4-unicast",
734   .node_name = "ip4-source-check-via-rx",
735   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
736 };
737
738 VNET_FEATURE_INIT (ip4_source_check_2, static) =
739 {
740   .arc_name = "ip4-unicast",
741   .node_name = "ip4-source-check-via-any",
742   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
743 };
744
745 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
746 {
747   .arc_name = "ip4-unicast",
748   .node_name = "ip4-source-and-port-range-check-rx",
749   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
750 };
751
752 VNET_FEATURE_INIT (ip4_policer_classify, static) =
753 {
754   .arc_name = "ip4-unicast",
755   .node_name = "ip4-policer-classify",
756   .runs_before = VNET_FEATURES ("ipsec4-input"),
757 };
758
759 VNET_FEATURE_INIT (ip4_ipsec, static) =
760 {
761   .arc_name = "ip4-unicast",
762   .node_name = "ipsec4-input",
763   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
764 };
765
766 VNET_FEATURE_INIT (ip4_vpath, static) =
767 {
768   .arc_name = "ip4-unicast",
769   .node_name = "vpath-input-ip4",
770   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
771 };
772
773 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
774 {
775   .arc_name = "ip4-unicast",
776   .node_name = "ip4-vxlan-bypass",
777   .runs_before = VNET_FEATURES ("ip4-lookup"),
778 };
779
780 VNET_FEATURE_INIT (ip4_not_enabled, static) =
781 {
782   .arc_name = "ip4-unicast",
783   .node_name = "ip4-not-enabled",
784   .runs_before = VNET_FEATURES ("ip4-lookup"),
785 };
786
787 VNET_FEATURE_INIT (ip4_lookup, static) =
788 {
789   .arc_name = "ip4-unicast",
790   .node_name = "ip4-lookup",
791   .runs_before = 0,     /* not before any other features */
792 };
793
794 /* Built-in ip4 multicast rx feature path definition */
795 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
796 {
797   .arc_name = "ip4-multicast",
798   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
799   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
800 };
801
802 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
803 {
804   .arc_name = "ip4-multicast",
805   .node_name = "vpath-input-ip4",
806   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
807 };
808
809 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
810 {
811   .arc_name = "ip4-multicast",
812   .node_name = "ip4-not-enabled",
813   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
814 };
815
816 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
817 {
818   .arc_name = "ip4-multicast",
819   .node_name = "ip4-mfib-forward-lookup",
820   .runs_before = 0,     /* last feature */
821 };
822
823 /* Source and port-range check ip4 tx feature path definition */
824 VNET_FEATURE_ARC_INIT (ip4_output, static) =
825 {
826   .arc_name = "ip4-output",
827   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
828   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
829 };
830
831 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
832 {
833   .arc_name = "ip4-output",
834   .node_name = "ip4-source-and-port-range-check-tx",
835   .runs_before = VNET_FEATURES ("ip4-outacl"),
836 };
837
838 VNET_FEATURE_INIT (ip4_outacl, static) =
839 {
840   .arc_name = "ip4-output",
841   .node_name = "ip4-outacl",
842   .runs_before = VNET_FEATURES ("ipsec4-output"),
843 };
844
845 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
846 {
847   .arc_name = "ip4-output",
848   .node_name = "ipsec4-output",
849   .runs_before = VNET_FEATURES ("interface-output"),
850 };
851
852 /* Built-in ip4 tx feature path definition */
853 VNET_FEATURE_INIT (ip4_interface_output, static) =
854 {
855   .arc_name = "ip4-output",
856   .node_name = "interface-output",
857   .runs_before = 0,     /* not before any other features */
858 };
859 /* *INDENT-ON* */
860
861 static clib_error_t *
862 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
863 {
864   ip4_main_t *im = &ip4_main;
865
866   /* Fill in lookup tables with default table (0). */
867   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
868   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
869
870   if (!is_add)
871     {
872       ip4_main_t *im4 = &ip4_main;
873       ip_lookup_main_t *lm4 = &im4->lookup_main;
874       ip_interface_address_t *ia = 0;
875       ip4_address_t *address;
876       vlib_main_t *vm = vlib_get_main ();
877
878       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
879       /* *INDENT-OFF* */
880       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
881       ({
882         address = ip_interface_address_get_address (lm4, ia);
883         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
884       }));
885       /* *INDENT-ON* */
886     }
887
888   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
889                                is_add, 0, 0);
890
891   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
892                                sw_if_index, is_add, 0, 0);
893
894   return /* no error */ 0;
895 }
896
897 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
898
899 /* Global IP4 main. */
900 ip4_main_t ip4_main;
901
902 static clib_error_t *
903 ip4_lookup_init (vlib_main_t * vm)
904 {
905   ip4_main_t *im = &ip4_main;
906   clib_error_t *error;
907   uword i;
908
909   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
910     return error;
911   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
912     return (error);
913   if ((error = vlib_call_init_function (vm, fib_module_init)))
914     return error;
915   if ((error = vlib_call_init_function (vm, mfib_module_init)))
916     return error;
917
918   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
919     {
920       u32 m;
921
922       if (i < 32)
923         m = pow2_mask (i) << (32 - i);
924       else
925         m = ~0;
926       im->fib_masks[i] = clib_host_to_net_u32 (m);
927     }
928
929   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
930
931   /* Create FIB with index 0 and table id of 0. */
932   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
933                                      FIB_SOURCE_DEFAULT_ROUTE);
934   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
935                                       MFIB_SOURCE_DEFAULT_ROUTE);
936
937   {
938     pg_node_t *pn;
939     pn = pg_get_node (ip4_lookup_node.index);
940     pn->unformat_edit = unformat_pg_ip4_header;
941   }
942
943   {
944     ethernet_arp_header_t h;
945
946     clib_memset (&h, 0, sizeof (h));
947
948     /* Set target ethernet address to all zeros. */
949     clib_memset (h.ip4_over_ethernet[1].ethernet, 0,
950                  sizeof (h.ip4_over_ethernet[1].ethernet));
951
952 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
953 #define _8(f,v) h.f = v;
954     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
955     _16 (l3_type, ETHERNET_TYPE_IP4);
956     _8 (n_l2_address_bytes, 6);
957     _8 (n_l3_address_bytes, 4);
958     _16 (opcode, ETHERNET_ARP_OPCODE_request);
959 #undef _16
960 #undef _8
961
962     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
963                                /* data */ &h,
964                                sizeof (h),
965                                /* alloc chunk size */ 8,
966                                "ip4 arp");
967   }
968
969   return error;
970 }
971
972 VLIB_INIT_FUNCTION (ip4_lookup_init);
973
974 typedef struct
975 {
976   /* Adjacency taken. */
977   u32 dpo_index;
978   u32 flow_hash;
979   u32 fib_index;
980
981   /* Packet data, possibly *after* rewrite. */
982   u8 packet_data[64 - 1 * sizeof (u32)];
983 }
984 ip4_forward_next_trace_t;
985
986 #ifndef CLIB_MARCH_VARIANT
987 u8 *
988 format_ip4_forward_next_trace (u8 * s, va_list * args)
989 {
990   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
991   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
992   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
993   u32 indent = format_get_indent (s);
994   s = format (s, "%U%U",
995               format_white_space, indent,
996               format_ip4_header, t->packet_data, sizeof (t->packet_data));
997   return s;
998 }
999 #endif
1000
1001 static u8 *
1002 format_ip4_lookup_trace (u8 * s, va_list * args)
1003 {
1004   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1005   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1006   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1007   u32 indent = format_get_indent (s);
1008
1009   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1010               t->fib_index, t->dpo_index, t->flow_hash);
1011   s = format (s, "\n%U%U",
1012               format_white_space, indent,
1013               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1014   return s;
1015 }
1016
1017 static u8 *
1018 format_ip4_rewrite_trace (u8 * s, va_list * args)
1019 {
1020   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1021   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1022   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1023   u32 indent = format_get_indent (s);
1024
1025   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1026               t->fib_index, t->dpo_index, format_ip_adjacency,
1027               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1028   s = format (s, "\n%U%U",
1029               format_white_space, indent,
1030               format_ip_adjacency_packet_data,
1031               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1032   return s;
1033 }
1034
1035 #ifndef CLIB_MARCH_VARIANT
1036 /* Common trace function for all ip4-forward next nodes. */
1037 void
1038 ip4_forward_next_trace (vlib_main_t * vm,
1039                         vlib_node_runtime_t * node,
1040                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1041 {
1042   u32 *from, n_left;
1043   ip4_main_t *im = &ip4_main;
1044
1045   n_left = frame->n_vectors;
1046   from = vlib_frame_vector_args (frame);
1047
1048   while (n_left >= 4)
1049     {
1050       u32 bi0, bi1;
1051       vlib_buffer_t *b0, *b1;
1052       ip4_forward_next_trace_t *t0, *t1;
1053
1054       /* Prefetch next iteration. */
1055       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1056       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1057
1058       bi0 = from[0];
1059       bi1 = from[1];
1060
1061       b0 = vlib_get_buffer (vm, bi0);
1062       b1 = vlib_get_buffer (vm, bi1);
1063
1064       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1065         {
1066           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1067           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1068           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1069           t0->fib_index =
1070             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1071              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1072             vec_elt (im->fib_index_by_sw_if_index,
1073                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1074
1075           clib_memcpy (t0->packet_data,
1076                        vlib_buffer_get_current (b0),
1077                        sizeof (t0->packet_data));
1078         }
1079       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1080         {
1081           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1082           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1083           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1084           t1->fib_index =
1085             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1086              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1087             vec_elt (im->fib_index_by_sw_if_index,
1088                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1089           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1090                        sizeof (t1->packet_data));
1091         }
1092       from += 2;
1093       n_left -= 2;
1094     }
1095
1096   while (n_left >= 1)
1097     {
1098       u32 bi0;
1099       vlib_buffer_t *b0;
1100       ip4_forward_next_trace_t *t0;
1101
1102       bi0 = from[0];
1103
1104       b0 = vlib_get_buffer (vm, bi0);
1105
1106       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1107         {
1108           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1109           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1110           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1111           t0->fib_index =
1112             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1113              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1114             vec_elt (im->fib_index_by_sw_if_index,
1115                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1116           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1117                        sizeof (t0->packet_data));
1118         }
1119       from += 1;
1120       n_left -= 1;
1121     }
1122 }
1123
1124 /* Compute TCP/UDP/ICMP4 checksum in software. */
1125 u16
1126 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1127                               ip4_header_t * ip0)
1128 {
1129   ip_csum_t sum0;
1130   u32 ip_header_length, payload_length_host_byte_order;
1131   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1132   u16 sum16;
1133   void *data_this_buffer;
1134
1135   /* Initialize checksum with ip header. */
1136   ip_header_length = ip4_header_bytes (ip0);
1137   payload_length_host_byte_order =
1138     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1139   sum0 =
1140     clib_host_to_net_u32 (payload_length_host_byte_order +
1141                           (ip0->protocol << 16));
1142
1143   if (BITS (uword) == 32)
1144     {
1145       sum0 =
1146         ip_csum_with_carry (sum0,
1147                             clib_mem_unaligned (&ip0->src_address, u32));
1148       sum0 =
1149         ip_csum_with_carry (sum0,
1150                             clib_mem_unaligned (&ip0->dst_address, u32));
1151     }
1152   else
1153     sum0 =
1154       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1155
1156   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1157   data_this_buffer = (void *) ip0 + ip_header_length;
1158   n_ip_bytes_this_buffer =
1159     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1160   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1161     {
1162       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1163         n_ip_bytes_this_buffer - ip_header_length : 0;
1164     }
1165   while (1)
1166     {
1167       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1168       n_bytes_left -= n_this_buffer;
1169       if (n_bytes_left == 0)
1170         break;
1171
1172       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1173       p0 = vlib_get_buffer (vm, p0->next_buffer);
1174       data_this_buffer = vlib_buffer_get_current (p0);
1175       n_this_buffer = p0->current_length;
1176     }
1177
1178   sum16 = ~ip_csum_fold (sum0);
1179
1180   return sum16;
1181 }
1182
1183 u32
1184 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1185 {
1186   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1187   udp_header_t *udp0;
1188   u16 sum16;
1189
1190   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1191           || ip0->protocol == IP_PROTOCOL_UDP);
1192
1193   udp0 = (void *) (ip0 + 1);
1194   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1195     {
1196       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1197                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1198       return p0->flags;
1199     }
1200
1201   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1202
1203   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1204                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1205
1206   return p0->flags;
1207 }
1208 #endif
1209
1210 /* *INDENT-OFF* */
1211 VNET_FEATURE_ARC_INIT (ip4_local) =
1212 {
1213   .arc_name  = "ip4-local",
1214   .start_nodes = VNET_FEATURES ("ip4-local"),
1215 };
1216 /* *INDENT-ON* */
1217
1218 static inline void
1219 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1220                             ip4_header_t * ip, u8 is_udp, u8 * error,
1221                             u8 * good_tcp_udp)
1222 {
1223   u32 flags0;
1224   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1225   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1226   if (is_udp)
1227     {
1228       udp_header_t *udp;
1229       u32 ip_len, udp_len;
1230       i32 len_diff;
1231       udp = ip4_next_header (ip);
1232       /* Verify UDP length. */
1233       ip_len = clib_net_to_host_u16 (ip->length);
1234       udp_len = clib_net_to_host_u16 (udp->length);
1235
1236       len_diff = ip_len - udp_len;
1237       *good_tcp_udp &= len_diff >= 0;
1238       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1239     }
1240 }
1241
1242 #define ip4_local_csum_is_offloaded(_b)                                 \
1243     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1244         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1245
1246 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1247     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1248         || ip4_local_csum_is_offloaded (_b)))
1249
1250 #define ip4_local_csum_is_valid(_b)                                     \
1251     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1252         || (ip4_local_csum_is_offloaded (_b))) != 0
1253
1254 static inline void
1255 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1256                          ip4_header_t * ih, u8 * error)
1257 {
1258   u8 is_udp, is_tcp_udp, good_tcp_udp;
1259
1260   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1261   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1262
1263   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1264     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1265   else
1266     good_tcp_udp = ip4_local_csum_is_valid (b);
1267
1268   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1269   *error = (is_tcp_udp && !good_tcp_udp
1270             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1271 }
1272
1273 static inline void
1274 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1275                             ip4_header_t ** ih, u8 * error)
1276 {
1277   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1278
1279   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1280   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1281
1282   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1283   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1284
1285   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1286   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1287
1288   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1289                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1290     {
1291       if (is_tcp_udp[0])
1292         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1293                                     &good_tcp_udp[0]);
1294       if (is_tcp_udp[1])
1295         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1296                                     &good_tcp_udp[1]);
1297     }
1298
1299   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1300               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1301   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1302               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1303 }
1304
1305 static inline void
1306 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1307                               vlib_buffer_t * b, u16 * next, u8 error,
1308                               u8 head_of_feature_arc)
1309 {
1310   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1311   u32 next_index;
1312
1313   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1314   b->error = error ? error_node->errors[error] : 0;
1315   if (head_of_feature_arc)
1316     {
1317       next_index = *next;
1318       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1319         {
1320           vnet_feature_arc_start (arc_index,
1321                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1322                                   &next_index, b);
1323           *next = next_index;
1324         }
1325     }
1326 }
1327
1328 typedef struct
1329 {
1330   ip4_address_t src;
1331   u32 lbi;
1332   u8 error;
1333 } ip4_local_last_check_t;
1334
1335 static inline void
1336 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1337                      ip4_local_last_check_t * last_check, u8 * error0)
1338 {
1339   ip4_fib_mtrie_leaf_t leaf0;
1340   ip4_fib_mtrie_t *mtrie0;
1341   const dpo_id_t *dpo0;
1342   load_balance_t *lb0;
1343   u32 lbi0;
1344
1345   vnet_buffer (b)->ip.fib_index =
1346     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1347     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1348
1349   if (PREDICT_FALSE (last_check->src.as_u32 != ip0->src_address.as_u32))
1350     {
1351       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1352       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1353       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1354       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1355       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1356
1357       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1358       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1359
1360       lb0 = load_balance_get (lbi0);
1361       dpo0 = load_balance_get_bucket_i (lb0, 0);
1362
1363       /*
1364        * Must have a route to source otherwise we drop the packet.
1365        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1366        *
1367        * The checks are:
1368        *  - the source is a recieve => it's from us => bogus, do this
1369        *    first since it sets a different error code.
1370        *  - uRPF check for any route to source - accept if passes.
1371        *  - allow packets destined to the broadcast address from unknown sources
1372        */
1373
1374       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1375                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1376                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1377       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1378                   && !fib_urpf_check_size (lb0->lb_urpf)
1379                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1380                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1381
1382       last_check->src.as_u32 = ip0->src_address.as_u32;
1383       last_check->lbi = lbi0;
1384       last_check->error = *error0;
1385     }
1386   else
1387     {
1388       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1389       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1390       *error0 = last_check->error;
1391     }
1392 }
1393
1394 static inline void
1395 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1396                         ip4_local_last_check_t * last_check, u8 * error)
1397 {
1398   ip4_fib_mtrie_leaf_t leaf[2];
1399   ip4_fib_mtrie_t *mtrie[2];
1400   const dpo_id_t *dpo[2];
1401   load_balance_t *lb[2];
1402   u32 not_last_hit = 0;
1403   u32 lbi[2];
1404
1405   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1406   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1407
1408   vnet_buffer (b[0])->ip.fib_index =
1409     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1410     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1411     vnet_buffer (b[0])->ip.fib_index;
1412
1413   vnet_buffer (b[1])->ip.fib_index =
1414     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1415     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1416     vnet_buffer (b[1])->ip.fib_index;
1417
1418   if (PREDICT_FALSE (not_last_hit))
1419     {
1420       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1421       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1422
1423       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1424       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1425
1426       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1427                                            &ip[0]->src_address, 2);
1428       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1429                                            &ip[1]->src_address, 2);
1430
1431       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1432                                            &ip[0]->src_address, 3);
1433       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1434                                            &ip[1]->src_address, 3);
1435
1436       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1437       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1438
1439       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1440       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1441
1442       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1443       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1444
1445       lb[0] = load_balance_get (lbi[0]);
1446       lb[1] = load_balance_get (lbi[1]);
1447
1448       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1449       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1450
1451       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1452                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1453                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1454       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1455                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1456                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1457                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1458
1459       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1460                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1461                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1462       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1463                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1464                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1465                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1466
1467       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1468       last_check->lbi = lbi[1];
1469       last_check->error = error[1];
1470     }
1471   else
1472     {
1473       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1474       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1475
1476       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1477       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1478
1479       error[0] = last_check->error;
1480       error[1] = last_check->error;
1481     }
1482 }
1483
1484 enum ip_local_packet_type_e
1485 {
1486   IP_LOCAL_PACKET_TYPE_L4,
1487   IP_LOCAL_PACKET_TYPE_NAT,
1488   IP_LOCAL_PACKET_TYPE_FRAG,
1489 };
1490
1491 /**
1492  * Determine packet type and next node.
1493  *
1494  * The expectation is that all packets that are not L4 will skip
1495  * checksums and source checks.
1496  */
1497 always_inline u8
1498 ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
1499 {
1500   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1501
1502   if (PREDICT_FALSE (ip4_is_fragment (ip)))
1503     {
1504       *next = IP_LOCAL_NEXT_REASSEMBLY;
1505       return IP_LOCAL_PACKET_TYPE_FRAG;
1506     }
1507   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_IS_NATED))
1508     {
1509       *next = lm->local_next_by_ip_protocol[ip->protocol];
1510       return IP_LOCAL_PACKET_TYPE_NAT;
1511     }
1512
1513   *next = lm->local_next_by_ip_protocol[ip->protocol];
1514   return IP_LOCAL_PACKET_TYPE_L4;
1515 }
1516
1517 static inline uword
1518 ip4_local_inline (vlib_main_t * vm,
1519                   vlib_node_runtime_t * node,
1520                   vlib_frame_t * frame, int head_of_feature_arc)
1521 {
1522   u32 *from, n_left_from;
1523   vlib_node_runtime_t *error_node =
1524     vlib_node_get_runtime (vm, ip4_input_node.index);
1525   u16 nexts[VLIB_FRAME_SIZE], *next;
1526   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1527   ip4_header_t *ip[2];
1528   u8 error[2], pt[2];
1529
1530   ip4_local_last_check_t last_check = {
1531     .src = {.as_u32 = 0},
1532     .lbi = ~0,
1533     .error = IP4_ERROR_UNKNOWN_PROTOCOL
1534   };
1535
1536   from = vlib_frame_vector_args (frame);
1537   n_left_from = frame->n_vectors;
1538
1539   if (node->flags & VLIB_NODE_FLAG_TRACE)
1540     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1541
1542   vlib_get_buffers (vm, from, bufs, n_left_from);
1543   b = bufs;
1544   next = nexts;
1545
1546   while (n_left_from >= 6)
1547     {
1548       u8 not_batch = 0;
1549
1550       /* Prefetch next iteration. */
1551       {
1552         vlib_prefetch_buffer_header (b[4], LOAD);
1553         vlib_prefetch_buffer_header (b[5], LOAD);
1554
1555         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1556         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1557       }
1558
1559       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1560
1561       ip[0] = vlib_buffer_get_current (b[0]);
1562       ip[1] = vlib_buffer_get_current (b[1]);
1563
1564       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1565       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1566
1567       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1568       pt[1] = ip4_local_classify (b[1], ip[1], &next[1]);
1569
1570       not_batch = pt[0] ^ pt[1];
1571
1572       if (head_of_feature_arc == 0 || (pt[0] && not_batch == 0))
1573         goto skip_checks;
1574
1575       if (PREDICT_TRUE (not_batch == 0))
1576         {
1577           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1578           ip4_local_check_src_x2 (b, ip, &last_check, error);
1579         }
1580       else
1581         {
1582           if (!pt[0])
1583             {
1584               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1585               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1586             }
1587           if (!pt[1])
1588             {
1589               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1590               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1591             }
1592         }
1593
1594     skip_checks:
1595
1596       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1597                                     head_of_feature_arc);
1598       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1599                                     head_of_feature_arc);
1600
1601       b += 2;
1602       next += 2;
1603       n_left_from -= 2;
1604     }
1605
1606   while (n_left_from > 0)
1607     {
1608       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1609
1610       ip[0] = vlib_buffer_get_current (b[0]);
1611       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1612       pt[0] = ip4_local_classify (b[0], ip[0], &next[0]);
1613
1614       if (head_of_feature_arc == 0 || pt[0])
1615         goto skip_check;
1616
1617       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1618       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1619
1620     skip_check:
1621
1622       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1623                                     head_of_feature_arc);
1624
1625       b += 1;
1626       next += 1;
1627       n_left_from -= 1;
1628     }
1629
1630   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1631   return frame->n_vectors;
1632 }
1633
1634 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1635                                vlib_frame_t * frame)
1636 {
1637   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1638 }
1639
1640 /* *INDENT-OFF* */
1641 VLIB_REGISTER_NODE (ip4_local_node) =
1642 {
1643   .name = "ip4-local",
1644   .vector_size = sizeof (u32),
1645   .format_trace = format_ip4_forward_next_trace,
1646   .n_next_nodes = IP_LOCAL_N_NEXT,
1647   .next_nodes =
1648   {
1649     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1650     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1651     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1652     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1653     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1654   },
1655 };
1656 /* *INDENT-ON* */
1657
1658
1659 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1660                                           vlib_node_runtime_t * node,
1661                                           vlib_frame_t * frame)
1662 {
1663   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1664 }
1665
1666 /* *INDENT-OFF* */
1667 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1668   .name = "ip4-local-end-of-arc",
1669   .vector_size = sizeof (u32),
1670
1671   .format_trace = format_ip4_forward_next_trace,
1672   .sibling_of = "ip4-local",
1673 };
1674
1675 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1676   .arc_name = "ip4-local",
1677   .node_name = "ip4-local-end-of-arc",
1678   .runs_before = 0, /* not before any other features */
1679 };
1680 /* *INDENT-ON* */
1681
1682 #ifndef CLIB_MARCH_VARIANT
1683 void
1684 ip4_register_protocol (u32 protocol, u32 node_index)
1685 {
1686   vlib_main_t *vm = vlib_get_main ();
1687   ip4_main_t *im = &ip4_main;
1688   ip_lookup_main_t *lm = &im->lookup_main;
1689
1690   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1691   lm->local_next_by_ip_protocol[protocol] =
1692     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1693 }
1694 #endif
1695
1696 static clib_error_t *
1697 show_ip_local_command_fn (vlib_main_t * vm,
1698                           unformat_input_t * input, vlib_cli_command_t * cmd)
1699 {
1700   ip4_main_t *im = &ip4_main;
1701   ip_lookup_main_t *lm = &im->lookup_main;
1702   int i;
1703
1704   vlib_cli_output (vm, "Protocols handled by ip4_local");
1705   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1706     {
1707       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1708         {
1709           u32 node_index = vlib_get_node (vm,
1710                                           ip4_local_node.index)->
1711             next_nodes[lm->local_next_by_ip_protocol[i]];
1712           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1713                            node_index);
1714         }
1715     }
1716   return 0;
1717 }
1718
1719
1720
1721 /*?
1722  * Display the set of protocols handled by the local IPv4 stack.
1723  *
1724  * @cliexpar
1725  * Example of how to display local protocol table:
1726  * @cliexstart{show ip local}
1727  * Protocols handled by ip4_local
1728  * 1
1729  * 17
1730  * 47
1731  * @cliexend
1732 ?*/
1733 /* *INDENT-OFF* */
1734 VLIB_CLI_COMMAND (show_ip_local, static) =
1735 {
1736   .path = "show ip local",
1737   .function = show_ip_local_command_fn,
1738   .short_help = "show ip local",
1739 };
1740 /* *INDENT-ON* */
1741
1742 always_inline uword
1743 ip4_arp_inline (vlib_main_t * vm,
1744                 vlib_node_runtime_t * node,
1745                 vlib_frame_t * frame, int is_glean)
1746 {
1747   vnet_main_t *vnm = vnet_get_main ();
1748   ip4_main_t *im = &ip4_main;
1749   ip_lookup_main_t *lm = &im->lookup_main;
1750   u32 *from, *to_next_drop;
1751   uword n_left_from, n_left_to_next_drop, next_index;
1752   u32 thread_index = vm->thread_index;
1753   u64 seed;
1754
1755   if (node->flags & VLIB_NODE_FLAG_TRACE)
1756     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1757
1758   seed = throttle_seed (&im->arp_throttle, thread_index, vlib_time_now (vm));
1759
1760   from = vlib_frame_vector_args (frame);
1761   n_left_from = frame->n_vectors;
1762   next_index = node->cached_next_index;
1763   if (next_index == IP4_ARP_NEXT_DROP)
1764     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1765
1766   while (n_left_from > 0)
1767     {
1768       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1769                            to_next_drop, n_left_to_next_drop);
1770
1771       while (n_left_from > 0 && n_left_to_next_drop > 0)
1772         {
1773           u32 pi0, bi0, adj_index0, sw_if_index0;
1774           ip_adjacency_t *adj0;
1775           vlib_buffer_t *p0, *b0;
1776           ip4_address_t resolve0;
1777           ethernet_arp_header_t *h0;
1778           vnet_hw_interface_t *hw_if0;
1779           u64 r0;
1780
1781           pi0 = from[0];
1782           p0 = vlib_get_buffer (vm, pi0);
1783
1784           from += 1;
1785           n_left_from -= 1;
1786           to_next_drop[0] = pi0;
1787           to_next_drop += 1;
1788           n_left_to_next_drop -= 1;
1789
1790           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1791           adj0 = adj_get (adj_index0);
1792
1793           if (is_glean)
1794             {
1795               /* resolve the packet's destination */
1796               ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1797               resolve0 = ip0->dst_address;
1798             }
1799           else
1800             {
1801               /* resolve the incomplete adj */
1802               resolve0 = adj0->sub_type.nbr.next_hop.ip4;
1803             }
1804
1805           /* combine the address and interface for the hash key */
1806           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1807           r0 = (u64) resolve0.data_u32 << 32;
1808           r0 |= sw_if_index0;
1809
1810           if (throttle_check (&im->arp_throttle, thread_index, r0, seed))
1811             {
1812               p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
1813               continue;
1814             }
1815
1816           /*
1817            * the adj has been updated to a rewrite but the node the DPO that got
1818            * us here hasn't - yet. no big deal. we'll drop while we wait.
1819            */
1820           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1821             {
1822               p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
1823               continue;
1824             }
1825
1826           /*
1827            * Can happen if the control-plane is programming tables
1828            * with traffic flowing; at least that's today's lame excuse.
1829            */
1830           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1831               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1832             {
1833               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1834               continue;
1835             }
1836           /* Send ARP request. */
1837           h0 =
1838             vlib_packet_template_get_packet (vm,
1839                                              &im->ip4_arp_request_packet_template,
1840                                              &bi0);
1841
1842           /* Seems we're out of buffers */
1843           if (PREDICT_FALSE (!h0))
1844             {
1845               p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
1846               continue;
1847             }
1848
1849           /* Add rewrite/encap string for ARP packet. */
1850           vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1851
1852           hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1853
1854           /* Src ethernet address in ARP header. */
1855           clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
1856                        hw_if0->hw_address,
1857                        sizeof (h0->ip4_over_ethernet[0].ethernet));
1858           if (is_glean)
1859             {
1860               /* The interface's source address is stashed in the Glean Adj */
1861               h0->ip4_over_ethernet[0].ip4 =
1862                 adj0->sub_type.glean.receive_addr.ip4;
1863             }
1864           else
1865             {
1866               /* Src IP address in ARP header. */
1867               if (ip4_src_address_for_packet (lm, sw_if_index0,
1868                                               &h0->ip4_over_ethernet[0].ip4))
1869                 {
1870                   /* No source address available */
1871                   p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1872                   vlib_buffer_free (vm, &bi0, 1);
1873                   continue;
1874                 }
1875             }
1876           h0->ip4_over_ethernet[1].ip4 = resolve0;
1877
1878           p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
1879
1880           vlib_buffer_copy_trace_flag (vm, p0, bi0);
1881           b0 = vlib_get_buffer (vm, bi0);
1882           VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1883           vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1884
1885           vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1886
1887           vlib_set_next_frame_buffer (vm, node,
1888                                       adj0->rewrite_header.next_index, bi0);
1889         }
1890
1891       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1892     }
1893
1894   return frame->n_vectors;
1895 }
1896
1897 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1898                              vlib_frame_t * frame)
1899 {
1900   return (ip4_arp_inline (vm, node, frame, 0));
1901 }
1902
1903 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1904                                vlib_frame_t * frame)
1905 {
1906   return (ip4_arp_inline (vm, node, frame, 1));
1907 }
1908
1909 static char *ip4_arp_error_strings[] = {
1910   [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
1911   [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
1912   [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
1913   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1914   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1915   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1916 };
1917
1918 /* *INDENT-OFF* */
1919 VLIB_REGISTER_NODE (ip4_arp_node) =
1920 {
1921   .name = "ip4-arp",
1922   .vector_size = sizeof (u32),
1923   .format_trace = format_ip4_forward_next_trace,
1924   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1925   .error_strings = ip4_arp_error_strings,
1926   .n_next_nodes = IP4_ARP_N_NEXT,
1927   .next_nodes =
1928   {
1929     [IP4_ARP_NEXT_DROP] = "error-drop",
1930   },
1931 };
1932
1933 VLIB_REGISTER_NODE (ip4_glean_node) =
1934 {
1935   .name = "ip4-glean",
1936   .vector_size = sizeof (u32),
1937   .format_trace = format_ip4_forward_next_trace,
1938   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1939   .error_strings = ip4_arp_error_strings,
1940   .n_next_nodes = IP4_ARP_N_NEXT,
1941   .next_nodes = {
1942   [IP4_ARP_NEXT_DROP] = "error-drop",
1943   },
1944 };
1945 /* *INDENT-ON* */
1946
1947 #define foreach_notrace_ip4_arp_error           \
1948 _(THROTTLED)                                    \
1949 _(RESOLVED)                                     \
1950 _(NO_BUFFERS)                                   \
1951 _(REQUEST_SENT)                                 \
1952 _(NON_ARP_ADJ)                                  \
1953 _(NO_SOURCE_ADDRESS)
1954
1955 static clib_error_t *
1956 arp_notrace_init (vlib_main_t * vm)
1957 {
1958   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1959
1960   /* don't trace ARP request packets */
1961 #define _(a)                                    \
1962     vnet_pcap_drop_trace_filter_add_del         \
1963         (rt->errors[IP4_ARP_ERROR_##a],         \
1964          1 /* is_add */);
1965   foreach_notrace_ip4_arp_error;
1966 #undef _
1967   return 0;
1968 }
1969
1970 VLIB_INIT_FUNCTION (arp_notrace_init);
1971
1972
1973 #ifndef CLIB_MARCH_VARIANT
1974 /* Send an ARP request to see if given destination is reachable on given interface. */
1975 clib_error_t *
1976 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1977                     u8 refresh)
1978 {
1979   vnet_main_t *vnm = vnet_get_main ();
1980   ip4_main_t *im = &ip4_main;
1981   ethernet_arp_header_t *h;
1982   ip4_address_t *src;
1983   ip_interface_address_t *ia;
1984   ip_adjacency_t *adj;
1985   vnet_hw_interface_t *hi;
1986   vnet_sw_interface_t *si;
1987   vlib_buffer_t *b;
1988   adj_index_t ai;
1989   u32 bi = 0;
1990   u8 unicast_rewrite = 0;
1991
1992   si = vnet_get_sw_interface (vnm, sw_if_index);
1993
1994   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1995     {
1996       return clib_error_return (0, "%U: interface %U down",
1997                                 format_ip4_address, dst,
1998                                 format_vnet_sw_if_index_name, vnm,
1999                                 sw_if_index);
2000     }
2001
2002   src =
2003     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2004   if (!src)
2005     {
2006       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2007       return clib_error_return
2008         (0,
2009          "no matching interface address for destination %U (interface %U)",
2010          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2011          sw_if_index);
2012     }
2013
2014   h = vlib_packet_template_get_packet (vm,
2015                                        &im->ip4_arp_request_packet_template,
2016                                        &bi);
2017
2018   if (!h)
2019     return clib_error_return (0, "ARP request packet allocation failed");
2020
2021   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2022   if (PREDICT_FALSE (!hi->hw_address))
2023     {
2024       return clib_error_return (0, "%U: interface %U do not support ip probe",
2025                                 format_ip4_address, dst,
2026                                 format_vnet_sw_if_index_name, vnm,
2027                                 sw_if_index);
2028     }
2029
2030   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2031                sizeof (h->ip4_over_ethernet[0].ethernet));
2032
2033   h->ip4_over_ethernet[0].ip4 = src[0];
2034   h->ip4_over_ethernet[1].ip4 = dst[0];
2035
2036   b = vlib_get_buffer (vm, bi);
2037   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2038     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2039
2040   ip46_address_t nh = {
2041     .ip4 = *dst,
2042   };
2043
2044   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2045                             VNET_LINK_IP4, &nh, sw_if_index);
2046   adj = adj_get (ai);
2047
2048   /* Peer has been previously resolved, retrieve glean adj instead */
2049   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2050     {
2051       if (refresh)
2052         unicast_rewrite = 1;
2053       else
2054         {
2055           adj_unlock (ai);
2056           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2057                                       VNET_LINK_IP4, sw_if_index, &nh);
2058           adj = adj_get (ai);
2059         }
2060     }
2061
2062   /* Add encapsulation string for software interface (e.g. ethernet header). */
2063   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2064   if (unicast_rewrite)
2065     {
2066       u16 *etype = vlib_buffer_get_current (b) - 2;
2067       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2068     }
2069   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2070
2071   {
2072     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2073     u32 *to_next = vlib_frame_vector_args (f);
2074     to_next[0] = bi;
2075     f->n_vectors = 1;
2076     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2077   }
2078
2079   adj_unlock (ai);
2080   return /* no error */ 0;
2081 }
2082 #endif
2083
2084 typedef enum
2085 {
2086   IP4_REWRITE_NEXT_DROP,
2087   IP4_REWRITE_NEXT_ICMP_ERROR,
2088   IP4_REWRITE_NEXT_FRAGMENT,
2089   IP4_REWRITE_N_NEXT            /* Last */
2090 } ip4_rewrite_next_t;
2091
2092 /**
2093  * This bits of an IPv4 address to mask to construct a multicast
2094  * MAC address
2095  */
2096 #if CLIB_ARCH_IS_BIG_ENDIAN
2097 #define IP4_MCAST_ADDR_MASK 0x007fffff
2098 #else
2099 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2100 #endif
2101
2102 always_inline void
2103 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2104                u16 adj_packet_bytes, bool df, u16 * next, u32 * error)
2105 {
2106   if (packet_len > adj_packet_bytes)
2107     {
2108       *error = IP4_ERROR_MTU_EXCEEDED;
2109       if (df)
2110         {
2111           icmp4_error_set_vnet_buffer
2112             (b, ICMP4_destination_unreachable,
2113              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2114              adj_packet_bytes);
2115           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2116         }
2117       else
2118         {
2119           /* IP fragmentation */
2120           ip_frag_set_vnet_buffer (b, adj_packet_bytes,
2121                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2122           *next = IP4_REWRITE_NEXT_FRAGMENT;
2123         }
2124     }
2125 }
2126
2127 /* Decrement TTL & update checksum.
2128    Works either endian, so no need for byte swap. */
2129 static_always_inline void
2130 ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
2131                             u32 * error)
2132 {
2133   i32 ttl;
2134   u32 checksum;
2135   if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
2136     {
2137       b->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2138       return;
2139     }
2140
2141   ttl = ip->ttl;
2142
2143   /* Input node should have reject packets with ttl 0. */
2144   ASSERT (ip->ttl > 0);
2145
2146   checksum = ip->checksum + clib_host_to_net_u16 (0x0100);
2147   checksum += checksum >= 0xffff;
2148
2149   ip->checksum = checksum;
2150   ttl -= 1;
2151   ip->ttl = ttl;
2152
2153   /*
2154    * If the ttl drops below 1 when forwarding, generate
2155    * an ICMP response.
2156    */
2157   if (PREDICT_FALSE (ttl <= 0))
2158     {
2159       *error = IP4_ERROR_TIME_EXPIRED;
2160       vnet_buffer (b)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2161       icmp4_error_set_vnet_buffer (b, ICMP4_time_exceeded,
2162                                    ICMP4_time_exceeded_ttl_exceeded_in_transit,
2163                                    0);
2164       *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2165     }
2166
2167   /* Verify checksum. */
2168   ASSERT ((ip->checksum == ip4_header_checksum (ip)) ||
2169           (b->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2170 }
2171
2172
2173 always_inline uword
2174 ip4_rewrite_inline (vlib_main_t * vm,
2175                     vlib_node_runtime_t * node,
2176                     vlib_frame_t * frame,
2177                     int do_counters, int is_midchain, int is_mcast)
2178 {
2179   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2180   u32 *from = vlib_frame_vector_args (frame);
2181   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2182   u16 nexts[VLIB_FRAME_SIZE], *next;
2183   u32 n_left_from;
2184   vlib_node_runtime_t *error_node =
2185     vlib_node_get_runtime (vm, ip4_input_node.index);
2186
2187   n_left_from = frame->n_vectors;
2188   u32 thread_index = vm->thread_index;
2189
2190   vlib_get_buffers (vm, from, bufs, n_left_from);
2191   clib_memset_u16 (nexts, IP4_REWRITE_NEXT_DROP, n_left_from);
2192
2193   if (n_left_from >= 6)
2194     {
2195       int i;
2196       for (i = 0; i < 6; i++)
2197         vlib_prefetch_buffer_header (bufs[i], LOAD);
2198     }
2199
2200   next = nexts;
2201   b = bufs;
2202   while (n_left_from >= 8)
2203     {
2204       ip_adjacency_t *adj0, *adj1;
2205       ip4_header_t *ip0, *ip1;
2206       u32 rw_len0, error0, adj_index0;
2207       u32 rw_len1, error1, adj_index1;
2208       u32 tx_sw_if_index0, tx_sw_if_index1;
2209       u8 *p;
2210
2211       vlib_prefetch_buffer_header (b[6], LOAD);
2212       vlib_prefetch_buffer_header (b[7], LOAD);
2213
2214       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2215       adj_index1 = vnet_buffer (b[1])->ip.adj_index[VLIB_TX];
2216
2217       /*
2218        * pre-fetch the per-adjacency counters
2219        */
2220       if (do_counters)
2221         {
2222           vlib_prefetch_combined_counter (&adjacency_counters,
2223                                           thread_index, adj_index0);
2224           vlib_prefetch_combined_counter (&adjacency_counters,
2225                                           thread_index, adj_index1);
2226         }
2227
2228       ip0 = vlib_buffer_get_current (b[0]);
2229       ip1 = vlib_buffer_get_current (b[1]);
2230
2231       error0 = error1 = IP4_ERROR_NONE;
2232
2233       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2234       ip4_ttl_and_checksum_check (b[1], ip1, next + 1, &error1);
2235
2236       /* Rewrite packet header and updates lengths. */
2237       adj0 = adj_get (adj_index0);
2238       adj1 = adj_get (adj_index1);
2239
2240       /* Worth pipelining. No guarantee that adj0,1 are hot... */
2241       rw_len0 = adj0[0].rewrite_header.data_bytes;
2242       rw_len1 = adj1[0].rewrite_header.data_bytes;
2243       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2244       vnet_buffer (b[1])->ip.save_rewrite_length = rw_len1;
2245
2246       p = vlib_buffer_get_current (b[2]);
2247       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2248       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2249
2250       p = vlib_buffer_get_current (b[3]);
2251       CLIB_PREFETCH (p - CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES, STORE);
2252       CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
2253
2254       /* Check MTU of outgoing interface. */
2255       ip4_mtu_check (b[0], clib_net_to_host_u16 (ip0->length),
2256                      adj0[0].rewrite_header.max_l3_packet_bytes,
2257                      ip0->flags_and_fragment_offset &
2258                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2259                      next + 0, &error0);
2260       ip4_mtu_check (b[1], clib_net_to_host_u16 (ip1->length),
2261                      adj1[0].rewrite_header.max_l3_packet_bytes,
2262                      ip1->flags_and_fragment_offset &
2263                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2264                      next + 1, &error1);
2265
2266       if (is_mcast)
2267         {
2268           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2269                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2270                     IP4_ERROR_SAME_INTERFACE : error0);
2271           error1 = ((adj1[0].rewrite_header.sw_if_index ==
2272                      vnet_buffer (b[1])->sw_if_index[VLIB_RX]) ?
2273                     IP4_ERROR_SAME_INTERFACE : error1);
2274         }
2275
2276       b[0]->error = error_node->errors[error0];
2277       b[1]->error = error_node->errors[error1];
2278       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2279        * to see the IP headerr */
2280       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2281         {
2282           u32 next_index = adj0[0].rewrite_header.next_index;
2283           b[0]->current_data -= rw_len0;
2284           b[0]->current_length += rw_len0;
2285           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2286           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2287
2288           if (PREDICT_FALSE
2289               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2290             vnet_feature_arc_start (lm->output_feature_arc_index,
2291                                     tx_sw_if_index0, &next_index, b[0]);
2292           next[0] = next_index;
2293         }
2294       if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2295         {
2296           u32 next_index = adj1[0].rewrite_header.next_index;
2297           b[1]->current_data -= rw_len1;
2298           b[1]->current_length += rw_len1;
2299
2300           tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2301           vnet_buffer (b[1])->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2302
2303           if (PREDICT_FALSE
2304               (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2305             vnet_feature_arc_start (lm->output_feature_arc_index,
2306                                     tx_sw_if_index1, &next_index, b[1]);
2307           next[1] = next_index;
2308         }
2309
2310       /* Guess we are only writing on simple Ethernet header. */
2311       vnet_rewrite_two_headers (adj0[0], adj1[0],
2312                                 ip0, ip1, sizeof (ethernet_header_t));
2313
2314       /*
2315        * Bump the per-adjacency counters
2316        */
2317       if (do_counters)
2318         {
2319           vlib_increment_combined_counter
2320             (&adjacency_counters,
2321              thread_index,
2322              adj_index0, 1, vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2323
2324           vlib_increment_combined_counter
2325             (&adjacency_counters,
2326              thread_index,
2327              adj_index1, 1, vlib_buffer_length_in_chain (vm, b[1]) + rw_len1);
2328         }
2329
2330       if (is_midchain)
2331         {
2332           adj0->sub_type.midchain.fixup_func
2333             (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2334           adj1->sub_type.midchain.fixup_func
2335             (vm, adj1, b[1], adj0->sub_type.midchain.fixup_data);
2336         }
2337
2338       if (is_mcast)
2339         {
2340           /*
2341            * copy bytes from the IP address into the MAC rewrite
2342            */
2343           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2344                                       adj0->rewrite_header.dst_mcast_offset,
2345                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2346           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2347                                       adj0->rewrite_header.dst_mcast_offset,
2348                                       &ip1->dst_address.as_u32, (u8 *) ip1);
2349         }
2350
2351       next += 2;
2352       b += 2;
2353       n_left_from -= 2;
2354     }
2355
2356   while (n_left_from > 0)
2357     {
2358       ip_adjacency_t *adj0;
2359       ip4_header_t *ip0;
2360       u32 rw_len0, adj_index0, error0;
2361       u32 tx_sw_if_index0;
2362
2363       adj_index0 = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
2364
2365       adj0 = adj_get (adj_index0);
2366
2367       if (do_counters)
2368         vlib_prefetch_combined_counter (&adjacency_counters,
2369                                         thread_index, adj_index0);
2370
2371       ip0 = vlib_buffer_get_current (b[0]);
2372
2373       error0 = IP4_ERROR_NONE;
2374
2375       ip4_ttl_and_checksum_check (b[0], ip0, next + 0, &error0);
2376
2377
2378       /* Update packet buffer attributes/set output interface. */
2379       rw_len0 = adj0[0].rewrite_header.data_bytes;
2380       vnet_buffer (b[0])->ip.save_rewrite_length = rw_len0;
2381
2382       /* Check MTU of outgoing interface. */
2383       ip4_mtu_check (b[0], clib_net_to_host_u16 (ip0->length),
2384                      adj0[0].rewrite_header.max_l3_packet_bytes,
2385                      ip0->flags_and_fragment_offset &
2386                      clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2387                      next + 0, &error0);
2388
2389       if (is_mcast)
2390         {
2391           error0 = ((adj0[0].rewrite_header.sw_if_index ==
2392                      vnet_buffer (b[0])->sw_if_index[VLIB_RX]) ?
2393                     IP4_ERROR_SAME_INTERFACE : error0);
2394         }
2395       b[0]->error = error_node->errors[error0];
2396
2397       /* Don't adjust the buffer for ttl issue; icmp-error node wants
2398        * to see the IP headerr */
2399       if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2400         {
2401           u32 next_index = adj0[0].rewrite_header.next_index;
2402           b[0]->current_data -= rw_len0;
2403           b[0]->current_length += rw_len0;
2404           tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2405           vnet_buffer (b[0])->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2406
2407           if (PREDICT_FALSE
2408               (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2409             vnet_feature_arc_start (lm->output_feature_arc_index,
2410                                     tx_sw_if_index0, &next_index, b[0]);
2411           next[0] = next_index;
2412         }
2413
2414       /* Guess we are only writing on simple Ethernet header. */
2415       vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2416
2417       if (do_counters)
2418         vlib_increment_combined_counter
2419           (&adjacency_counters,
2420            thread_index, adj_index0, 1,
2421            vlib_buffer_length_in_chain (vm, b[0]) + rw_len0);
2422
2423       if (is_midchain)
2424         {
2425           adj0->sub_type.midchain.fixup_func
2426             (vm, adj0, b[0], adj0->sub_type.midchain.fixup_data);
2427         }
2428
2429       if (is_mcast)
2430         {
2431           /*
2432            * copy bytes from the IP address into the MAC rewrite
2433            */
2434           vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2435                                       adj0->rewrite_header.dst_mcast_offset,
2436                                       &ip0->dst_address.as_u32, (u8 *) ip0);
2437         }
2438
2439       next += 1;
2440       b += 1;
2441       n_left_from -= 1;
2442     }
2443
2444
2445   /* Need to do trace after rewrites to pick up new packet data. */
2446   if (node->flags & VLIB_NODE_FLAG_TRACE)
2447     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2448
2449   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2450   return frame->n_vectors;
2451 }
2452
2453
2454 /** @brief IPv4 rewrite node.
2455     @node ip4-rewrite
2456
2457     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2458     header checksum, fetch the ip adjacency, check the outbound mtu,
2459     apply the adjacency rewrite, and send pkts to the adjacency
2460     rewrite header's rewrite_next_index.
2461
2462     @param vm vlib_main_t corresponding to the current thread
2463     @param node vlib_node_runtime_t
2464     @param frame vlib_frame_t whose contents should be dispatched
2465
2466     @par Graph mechanics: buffer metadata, next index usage
2467
2468     @em Uses:
2469     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2470         - the rewrite adjacency index
2471     - <code>adj->lookup_next_index</code>
2472         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2473           the packet will be dropped.
2474     - <code>adj->rewrite_header</code>
2475         - Rewrite string length, rewrite string, next_index
2476
2477     @em Sets:
2478     - <code>b->current_data, b->current_length</code>
2479         - Updated net of applying the rewrite string
2480
2481     <em>Next Indices:</em>
2482     - <code> adj->rewrite_header.next_index </code>
2483       or @c ip4-drop
2484 */
2485
2486 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2487                                  vlib_frame_t * frame)
2488 {
2489   if (adj_are_counters_enabled ())
2490     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2491   else
2492     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2493 }
2494
2495 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2496                                        vlib_node_runtime_t * node,
2497                                        vlib_frame_t * frame)
2498 {
2499   if (adj_are_counters_enabled ())
2500     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2501   else
2502     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2503 }
2504
2505 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2506                                   vlib_node_runtime_t * node,
2507                                   vlib_frame_t * frame)
2508 {
2509   if (adj_are_counters_enabled ())
2510     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2511   else
2512     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2513 }
2514
2515 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2516                                        vlib_node_runtime_t * node,
2517                                        vlib_frame_t * frame)
2518 {
2519   if (adj_are_counters_enabled ())
2520     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2521   else
2522     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2523 }
2524
2525 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2526                                         vlib_node_runtime_t * node,
2527                                         vlib_frame_t * frame)
2528 {
2529   if (adj_are_counters_enabled ())
2530     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2531   else
2532     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2533 }
2534
2535 /* *INDENT-OFF* */
2536 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2537   .name = "ip4-rewrite",
2538   .vector_size = sizeof (u32),
2539
2540   .format_trace = format_ip4_rewrite_trace,
2541
2542   .n_next_nodes = IP4_REWRITE_N_NEXT,
2543   .next_nodes = {
2544     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2545     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2546     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2547   },
2548 };
2549
2550 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2551   .name = "ip4-rewrite-bcast",
2552   .vector_size = sizeof (u32),
2553
2554   .format_trace = format_ip4_rewrite_trace,
2555   .sibling_of = "ip4-rewrite",
2556 };
2557
2558 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2559   .name = "ip4-rewrite-mcast",
2560   .vector_size = sizeof (u32),
2561
2562   .format_trace = format_ip4_rewrite_trace,
2563   .sibling_of = "ip4-rewrite",
2564 };
2565
2566 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2567   .name = "ip4-mcast-midchain",
2568   .vector_size = sizeof (u32),
2569
2570   .format_trace = format_ip4_rewrite_trace,
2571   .sibling_of = "ip4-rewrite",
2572 };
2573
2574 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2575   .name = "ip4-midchain",
2576   .vector_size = sizeof (u32),
2577   .format_trace = format_ip4_forward_next_trace,
2578   .sibling_of =  "ip4-rewrite",
2579 };
2580 /* *INDENT-ON */
2581
2582 static int
2583 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2584 {
2585   ip4_fib_mtrie_t *mtrie0;
2586   ip4_fib_mtrie_leaf_t leaf0;
2587   u32 lbi0;
2588
2589   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2590
2591   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2592   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2593   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2594
2595   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2596
2597   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2598 }
2599
2600 static clib_error_t *
2601 test_lookup_command_fn (vlib_main_t * vm,
2602                         unformat_input_t * input, vlib_cli_command_t * cmd)
2603 {
2604   ip4_fib_t *fib;
2605   u32 table_id = 0;
2606   f64 count = 1;
2607   u32 n;
2608   int i;
2609   ip4_address_t ip4_base_address;
2610   u64 errors = 0;
2611
2612   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2613     {
2614       if (unformat (input, "table %d", &table_id))
2615         {
2616           /* Make sure the entry exists. */
2617           fib = ip4_fib_get (table_id);
2618           if ((fib) && (fib->index != table_id))
2619             return clib_error_return (0, "<fib-index> %d does not exist",
2620                                       table_id);
2621         }
2622       else if (unformat (input, "count %f", &count))
2623         ;
2624
2625       else if (unformat (input, "%U",
2626                          unformat_ip4_address, &ip4_base_address))
2627         ;
2628       else
2629         return clib_error_return (0, "unknown input `%U'",
2630                                   format_unformat_error, input);
2631     }
2632
2633   n = count;
2634
2635   for (i = 0; i < n; i++)
2636     {
2637       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2638         errors++;
2639
2640       ip4_base_address.as_u32 =
2641         clib_host_to_net_u32 (1 +
2642                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2643     }
2644
2645   if (errors)
2646     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2647   else
2648     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2649
2650   return 0;
2651 }
2652
2653 /*?
2654  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2655  * given FIB table to determine if there is a conflict with the
2656  * adjacency table. The fib-id can be determined by using the
2657  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2658  * of 0 is used.
2659  *
2660  * @todo This command uses fib-id, other commands use table-id (not
2661  * just a name, they are different indexes). Would like to change this
2662  * to table-id for consistency.
2663  *
2664  * @cliexpar
2665  * Example of how to run the test lookup command:
2666  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2667  * No errors in 2 lookups
2668  * @cliexend
2669 ?*/
2670 /* *INDENT-OFF* */
2671 VLIB_CLI_COMMAND (lookup_test_command, static) =
2672 {
2673   .path = "test lookup",
2674   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2675   .function = test_lookup_command_fn,
2676 };
2677 /* *INDENT-ON* */
2678
2679 #ifndef CLIB_MARCH_VARIANT
2680 int
2681 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2682 {
2683   u32 fib_index;
2684
2685   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2686
2687   if (~0 == fib_index)
2688     return VNET_API_ERROR_NO_SUCH_FIB;
2689
2690   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2691                                   flow_hash_config);
2692
2693   return 0;
2694 }
2695 #endif
2696
2697 static clib_error_t *
2698 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2699                              unformat_input_t * input,
2700                              vlib_cli_command_t * cmd)
2701 {
2702   int matched = 0;
2703   u32 table_id = 0;
2704   u32 flow_hash_config = 0;
2705   int rv;
2706
2707   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2708     {
2709       if (unformat (input, "table %d", &table_id))
2710         matched = 1;
2711 #define _(a,v) \
2712     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2713       foreach_flow_hash_bit
2714 #undef _
2715         else
2716         break;
2717     }
2718
2719   if (matched == 0)
2720     return clib_error_return (0, "unknown input `%U'",
2721                               format_unformat_error, input);
2722
2723   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2724   switch (rv)
2725     {
2726     case 0:
2727       break;
2728
2729     case VNET_API_ERROR_NO_SUCH_FIB:
2730       return clib_error_return (0, "no such FIB table %d", table_id);
2731
2732     default:
2733       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2734       break;
2735     }
2736
2737   return 0;
2738 }
2739
2740 /*?
2741  * Configure the set of IPv4 fields used by the flow hash.
2742  *
2743  * @cliexpar
2744  * Example of how to set the flow hash on a given table:
2745  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2746  * Example of display the configured flow hash:
2747  * @cliexstart{show ip fib}
2748  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2749  * 0.0.0.0/0
2750  *   unicast-ip4-chain
2751  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2752  *     [0] [@0]: dpo-drop ip6
2753  * 0.0.0.0/32
2754  *   unicast-ip4-chain
2755  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2756  *     [0] [@0]: dpo-drop ip6
2757  * 224.0.0.0/8
2758  *   unicast-ip4-chain
2759  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2760  *     [0] [@0]: dpo-drop ip6
2761  * 6.0.1.2/32
2762  *   unicast-ip4-chain
2763  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2764  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2765  * 7.0.0.1/32
2766  *   unicast-ip4-chain
2767  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2768  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2769  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2770  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2771  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2772  * 240.0.0.0/8
2773  *   unicast-ip4-chain
2774  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2775  *     [0] [@0]: dpo-drop ip6
2776  * 255.255.255.255/32
2777  *   unicast-ip4-chain
2778  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2779  *     [0] [@0]: dpo-drop ip6
2780  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2781  * 0.0.0.0/0
2782  *   unicast-ip4-chain
2783  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2784  *     [0] [@0]: dpo-drop ip6
2785  * 0.0.0.0/32
2786  *   unicast-ip4-chain
2787  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2788  *     [0] [@0]: dpo-drop ip6
2789  * 172.16.1.0/24
2790  *   unicast-ip4-chain
2791  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2792  *     [0] [@4]: ipv4-glean: af_packet0
2793  * 172.16.1.1/32
2794  *   unicast-ip4-chain
2795  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2796  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2797  * 172.16.1.2/32
2798  *   unicast-ip4-chain
2799  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2800  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2801  * 172.16.2.0/24
2802  *   unicast-ip4-chain
2803  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2804  *     [0] [@4]: ipv4-glean: af_packet1
2805  * 172.16.2.1/32
2806  *   unicast-ip4-chain
2807  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2808  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2809  * 224.0.0.0/8
2810  *   unicast-ip4-chain
2811  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2812  *     [0] [@0]: dpo-drop ip6
2813  * 240.0.0.0/8
2814  *   unicast-ip4-chain
2815  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2816  *     [0] [@0]: dpo-drop ip6
2817  * 255.255.255.255/32
2818  *   unicast-ip4-chain
2819  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2820  *     [0] [@0]: dpo-drop ip6
2821  * @cliexend
2822 ?*/
2823 /* *INDENT-OFF* */
2824 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2825 {
2826   .path = "set ip flow-hash",
2827   .short_help =
2828   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2829   .function = set_ip_flow_hash_command_fn,
2830 };
2831 /* *INDENT-ON* */
2832
2833 #ifndef CLIB_MARCH_VARIANT
2834 int
2835 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2836                              u32 table_index)
2837 {
2838   vnet_main_t *vnm = vnet_get_main ();
2839   vnet_interface_main_t *im = &vnm->interface_main;
2840   ip4_main_t *ipm = &ip4_main;
2841   ip_lookup_main_t *lm = &ipm->lookup_main;
2842   vnet_classify_main_t *cm = &vnet_classify_main;
2843   ip4_address_t *if_addr;
2844
2845   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2846     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2847
2848   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2849     return VNET_API_ERROR_NO_SUCH_ENTRY;
2850
2851   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2852   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2853
2854   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2855
2856   if (NULL != if_addr)
2857     {
2858       fib_prefix_t pfx = {
2859         .fp_len = 32,
2860         .fp_proto = FIB_PROTOCOL_IP4,
2861         .fp_addr.ip4 = *if_addr,
2862       };
2863       u32 fib_index;
2864
2865       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2866                                                        sw_if_index);
2867
2868
2869       if (table_index != (u32) ~ 0)
2870         {
2871           dpo_id_t dpo = DPO_INVALID;
2872
2873           dpo_set (&dpo,
2874                    DPO_CLASSIFY,
2875                    DPO_PROTO_IP4,
2876                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2877
2878           fib_table_entry_special_dpo_add (fib_index,
2879                                            &pfx,
2880                                            FIB_SOURCE_CLASSIFY,
2881                                            FIB_ENTRY_FLAG_NONE, &dpo);
2882           dpo_reset (&dpo);
2883         }
2884       else
2885         {
2886           fib_table_entry_special_remove (fib_index,
2887                                           &pfx, FIB_SOURCE_CLASSIFY);
2888         }
2889     }
2890
2891   return 0;
2892 }
2893 #endif
2894
2895 static clib_error_t *
2896 set_ip_classify_command_fn (vlib_main_t * vm,
2897                             unformat_input_t * input,
2898                             vlib_cli_command_t * cmd)
2899 {
2900   u32 table_index = ~0;
2901   int table_index_set = 0;
2902   u32 sw_if_index = ~0;
2903   int rv;
2904
2905   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2906     {
2907       if (unformat (input, "table-index %d", &table_index))
2908         table_index_set = 1;
2909       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2910                          vnet_get_main (), &sw_if_index))
2911         ;
2912       else
2913         break;
2914     }
2915
2916   if (table_index_set == 0)
2917     return clib_error_return (0, "classify table-index must be specified");
2918
2919   if (sw_if_index == ~0)
2920     return clib_error_return (0, "interface / subif must be specified");
2921
2922   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2923
2924   switch (rv)
2925     {
2926     case 0:
2927       break;
2928
2929     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2930       return clib_error_return (0, "No such interface");
2931
2932     case VNET_API_ERROR_NO_SUCH_ENTRY:
2933       return clib_error_return (0, "No such classifier table");
2934     }
2935   return 0;
2936 }
2937
2938 /*?
2939  * Assign a classification table to an interface. The classification
2940  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2941  * commands. Once the table is create, use this command to filter packets
2942  * on an interface.
2943  *
2944  * @cliexpar
2945  * Example of how to assign a classification table to an interface:
2946  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2947 ?*/
2948 /* *INDENT-OFF* */
2949 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2950 {
2951     .path = "set ip classify",
2952     .short_help =
2953     "set ip classify intfc <interface> table-index <classify-idx>",
2954     .function = set_ip_classify_command_fn,
2955 };
2956 /* *INDENT-ON* */
2957
2958 static clib_error_t *
2959 ip4_config (vlib_main_t * vm, unformat_input_t * input)
2960 {
2961   ip4_main_t *im = &ip4_main;
2962   uword heapsize = 0;
2963
2964   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2965     {
2966       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
2967         ;
2968       else
2969         return clib_error_return (0,
2970                                   "invalid heap-size parameter `%U'",
2971                                   format_unformat_error, input);
2972     }
2973
2974   im->mtrie_heap_size = heapsize;
2975
2976   return 0;
2977 }
2978
2979 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
2980
2981 /*
2982  * fd.io coding-style-patch-verification: ON
2983  *
2984  * Local Variables:
2985  * eval: (c-set-style "gnu")
2986  * End:
2987  */