Mcast rewrite optimisations
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/load_balance_map.h>
53 #include <vnet/dpo/classify_dpo.h>
54 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
55
56 #include <vnet/ip/ip4_forward.h>
57
58 /** @brief IPv4 lookup node.
59     @node ip4-lookup
60
61     This is the main IPv4 lookup dispatch node.
62
63     @param vm vlib_main_t corresponding to the current thread
64     @param node vlib_node_runtime_t
65     @param frame vlib_frame_t whose contents should be dispatched
66
67     @par Graph mechanics: buffer metadata, next index usage
68
69     @em Uses:
70     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
71         - Indicates the @c sw_if_index value of the interface that the
72           packet was received on.
73     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
74         - When the value is @c ~0 then the node performs a longest prefix
75           match (LPM) for the packet destination address in the FIB attached
76           to the receive interface.
77         - Otherwise perform LPM for the packet destination address in the
78           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
79           value (0, 1, ...) and not a VRF id.
80
81     @em Sets:
82     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
83         - The lookup result adjacency index.
84
85     <em>Next Index:</em>
86     - Dispatches the packet to the node index found in
87       ip_adjacency_t @c adj->lookup_next_index
88       (where @c adj is the lookup result adjacency).
89 */
90 static uword
91 ip4_lookup (vlib_main_t * vm,
92             vlib_node_runtime_t * node, vlib_frame_t * frame)
93 {
94   return ip4_lookup_inline (vm, node, frame,
95                             /* lookup_for_responses_to_locally_received_packets */
96                             0);
97
98 }
99
100 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101
102 /* *INDENT-OFF* */
103 VLIB_REGISTER_NODE (ip4_lookup_node) =
104 {
105   .function = ip4_lookup,
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
115
116 always_inline uword
117 ip4_load_balance (vlib_main_t * vm,
118                   vlib_node_runtime_t * node, vlib_frame_t * frame)
119 {
120   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
121   u32 n_left_from, n_left_to_next, *from, *to_next;
122   ip_lookup_next_t next;
123   u32 thread_index = vlib_get_thread_index ();
124
125   from = vlib_frame_vector_args (frame);
126   n_left_from = frame->n_vectors;
127   next = node->cached_next_index;
128
129   if (node->flags & VLIB_NODE_FLAG_TRACE)
130     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
131
132   while (n_left_from > 0)
133     {
134       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
135
136
137       while (n_left_from >= 4 && n_left_to_next >= 2)
138         {
139           ip_lookup_next_t next0, next1;
140           const load_balance_t *lb0, *lb1;
141           vlib_buffer_t *p0, *p1;
142           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
143           const ip4_header_t *ip0, *ip1;
144           const dpo_id_t *dpo0, *dpo1;
145
146           /* Prefetch next iteration. */
147           {
148             vlib_buffer_t *p2, *p3;
149
150             p2 = vlib_get_buffer (vm, from[2]);
151             p3 = vlib_get_buffer (vm, from[3]);
152
153             vlib_prefetch_buffer_header (p2, STORE);
154             vlib_prefetch_buffer_header (p3, STORE);
155
156             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
157             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
158           }
159
160           pi0 = to_next[0] = from[0];
161           pi1 = to_next[1] = from[1];
162
163           from += 2;
164           n_left_from -= 2;
165           to_next += 2;
166           n_left_to_next -= 2;
167
168           p0 = vlib_get_buffer (vm, pi0);
169           p1 = vlib_get_buffer (vm, pi1);
170
171           ip0 = vlib_buffer_get_current (p0);
172           ip1 = vlib_buffer_get_current (p1);
173           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
174           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
175
176           lb0 = load_balance_get (lbi0);
177           lb1 = load_balance_get (lbi1);
178
179           /*
180            * this node is for via FIBs we can re-use the hash value from the
181            * to node if present.
182            * We don't want to use the same hash value at each level in the recursion
183            * graph as that would lead to polarisation
184            */
185           hc0 = hc1 = 0;
186
187           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
188             {
189               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
190                 {
191                   hc0 = vnet_buffer (p0)->ip.flow_hash =
192                     vnet_buffer (p0)->ip.flow_hash >> 1;
193                 }
194               else
195                 {
196                   hc0 = vnet_buffer (p0)->ip.flow_hash =
197                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
198                 }
199               dpo0 = load_balance_get_fwd_bucket
200                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
201             }
202           else
203             {
204               dpo0 = load_balance_get_bucket_i (lb0, 0);
205             }
206           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
207             {
208               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
209                 {
210                   hc1 = vnet_buffer (p1)->ip.flow_hash =
211                     vnet_buffer (p1)->ip.flow_hash >> 1;
212                 }
213               else
214                 {
215                   hc1 = vnet_buffer (p1)->ip.flow_hash =
216                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
217                 }
218               dpo1 = load_balance_get_fwd_bucket
219                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
220             }
221           else
222             {
223               dpo1 = load_balance_get_bucket_i (lb1, 0);
224             }
225
226           next0 = dpo0->dpoi_next_node;
227           next1 = dpo1->dpoi_next_node;
228
229           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
230           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
231
232           vlib_increment_combined_counter
233             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
234           vlib_increment_combined_counter
235             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
236
237           vlib_validate_buffer_enqueue_x2 (vm, node, next,
238                                            to_next, n_left_to_next,
239                                            pi0, pi1, next0, next1);
240         }
241
242       while (n_left_from > 0 && n_left_to_next > 0)
243         {
244           ip_lookup_next_t next0;
245           const load_balance_t *lb0;
246           vlib_buffer_t *p0;
247           u32 pi0, lbi0, hc0;
248           const ip4_header_t *ip0;
249           const dpo_id_t *dpo0;
250
251           pi0 = from[0];
252           to_next[0] = pi0;
253           from += 1;
254           to_next += 1;
255           n_left_to_next -= 1;
256           n_left_from -= 1;
257
258           p0 = vlib_get_buffer (vm, pi0);
259
260           ip0 = vlib_buffer_get_current (p0);
261           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
262
263           lb0 = load_balance_get (lbi0);
264
265           hc0 = 0;
266           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
267             {
268               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
269                 {
270                   hc0 = vnet_buffer (p0)->ip.flow_hash =
271                     vnet_buffer (p0)->ip.flow_hash >> 1;
272                 }
273               else
274                 {
275                   hc0 = vnet_buffer (p0)->ip.flow_hash =
276                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
277                 }
278               dpo0 = load_balance_get_fwd_bucket
279                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
280             }
281           else
282             {
283               dpo0 = load_balance_get_bucket_i (lb0, 0);
284             }
285
286           next0 = dpo0->dpoi_next_node;
287           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
288
289           vlib_increment_combined_counter
290             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
291
292           vlib_validate_buffer_enqueue_x1 (vm, node, next,
293                                            to_next, n_left_to_next,
294                                            pi0, next0);
295         }
296
297       vlib_put_next_frame (vm, node, next, n_left_to_next);
298     }
299
300   return frame->n_vectors;
301 }
302
303 /* *INDENT-OFF* */
304 VLIB_REGISTER_NODE (ip4_load_balance_node) =
305 {
306   .function = ip4_load_balance,
307   .name = "ip4-load-balance",
308   .vector_size = sizeof (u32),
309   .sibling_of = "ip4-lookup",
310   .format_trace =
311   format_ip4_lookup_trace,
312 };
313 /* *INDENT-ON* */
314
315 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
316
317 /* get first interface address */
318 ip4_address_t *
319 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
320                              ip_interface_address_t ** result_ia)
321 {
322   ip_lookup_main_t *lm = &im->lookup_main;
323   ip_interface_address_t *ia = 0;
324   ip4_address_t *result = 0;
325
326   /* *INDENT-OFF* */
327   foreach_ip_interface_address
328     (lm, ia, sw_if_index,
329      1 /* honor unnumbered */ ,
330      ({
331        ip4_address_t * a =
332          ip_interface_address_get_address (lm, ia);
333        result = a;
334        break;
335      }));
336   /* *INDENT-OFF* */
337   if (result_ia)
338     *result_ia = result ? ia : 0;
339   return result;
340 }
341
342 static void
343 ip4_add_interface_routes (u32 sw_if_index,
344                           ip4_main_t * im, u32 fib_index,
345                           ip_interface_address_t * a)
346 {
347   ip_lookup_main_t *lm = &im->lookup_main;
348   ip4_address_t *address = ip_interface_address_get_address (lm, a);
349   fib_prefix_t pfx = {
350     .fp_len = a->address_length,
351     .fp_proto = FIB_PROTOCOL_IP4,
352     .fp_addr.ip4 = *address,
353   };
354
355   if (pfx.fp_len <= 30)
356     {
357       /* a /30 or shorter - add a glean for the network address */
358       fib_table_entry_update_one_path (fib_index, &pfx,
359                                        FIB_SOURCE_INTERFACE,
360                                        (FIB_ENTRY_FLAG_CONNECTED |
361                                         FIB_ENTRY_FLAG_ATTACHED),
362                                        DPO_PROTO_IP4,
363                                        /* No next-hop address */
364                                        NULL,
365                                        sw_if_index,
366                                        // invalid FIB index
367                                        ~0,
368                                        1,
369                                        // no out-label stack
370                                        NULL,
371                                        FIB_ROUTE_PATH_FLAG_NONE);
372
373       /* Add the two broadcast addresses as drop */
374       fib_prefix_t net_pfx = {
375         .fp_len = 32,
376         .fp_proto = FIB_PROTOCOL_IP4,
377         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
378       };
379       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
380         fib_table_entry_special_add(fib_index,
381                                     &net_pfx,
382                                     FIB_SOURCE_INTERFACE,
383                                     (FIB_ENTRY_FLAG_DROP |
384                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
385       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
386       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
387         fib_table_entry_special_add(fib_index,
388                                     &net_pfx,
389                                     FIB_SOURCE_INTERFACE,
390                                     (FIB_ENTRY_FLAG_DROP |
391                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
392     }
393   else if (pfx.fp_len == 31)
394     {
395       u32 mask = clib_host_to_net_u32(1);
396       fib_prefix_t net_pfx = pfx;
397
398       net_pfx.fp_len = 32;
399       net_pfx.fp_addr.ip4.as_u32 ^= mask;
400
401       /* a /31 - add the other end as an attached host */
402       fib_table_entry_update_one_path (fib_index, &net_pfx,
403                                        FIB_SOURCE_INTERFACE,
404                                        (FIB_ENTRY_FLAG_ATTACHED),
405                                        DPO_PROTO_IP4,
406                                        &net_pfx.fp_addr,
407                                        sw_if_index,
408                                        // invalid FIB index
409                                        ~0,
410                                        1,
411                                        NULL,
412                                        FIB_ROUTE_PATH_FLAG_NONE);
413     }
414   pfx.fp_len = 32;
415
416   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
417     {
418       u32 classify_table_index =
419         lm->classify_table_index_by_sw_if_index[sw_if_index];
420       if (classify_table_index != (u32) ~ 0)
421         {
422           dpo_id_t dpo = DPO_INVALID;
423
424           dpo_set (&dpo,
425                    DPO_CLASSIFY,
426                    DPO_PROTO_IP4,
427                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
428
429           fib_table_entry_special_dpo_add (fib_index,
430                                            &pfx,
431                                            FIB_SOURCE_CLASSIFY,
432                                            FIB_ENTRY_FLAG_NONE, &dpo);
433           dpo_reset (&dpo);
434         }
435     }
436
437   fib_table_entry_update_one_path (fib_index, &pfx,
438                                    FIB_SOURCE_INTERFACE,
439                                    (FIB_ENTRY_FLAG_CONNECTED |
440                                     FIB_ENTRY_FLAG_LOCAL),
441                                    DPO_PROTO_IP4,
442                                    &pfx.fp_addr,
443                                    sw_if_index,
444                                    // invalid FIB index
445                                    ~0,
446                                    1, NULL,
447                                    FIB_ROUTE_PATH_FLAG_NONE);
448 }
449
450 static void
451 ip4_del_interface_routes (ip4_main_t * im,
452                           u32 fib_index,
453                           ip4_address_t * address, u32 address_length)
454 {
455   fib_prefix_t pfx = {
456     .fp_len = address_length,
457     .fp_proto = FIB_PROTOCOL_IP4,
458     .fp_addr.ip4 = *address,
459   };
460
461   if (pfx.fp_len <= 30)
462     {
463       fib_prefix_t net_pfx = {
464         .fp_len = 32,
465         .fp_proto = FIB_PROTOCOL_IP4,
466         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
467       };
468       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
469         fib_table_entry_special_remove(fib_index,
470                                        &net_pfx,
471                                        FIB_SOURCE_INTERFACE);
472       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
473       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
474         fib_table_entry_special_remove(fib_index,
475                                        &net_pfx,
476                                        FIB_SOURCE_INTERFACE);
477       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
478     }
479     else if (pfx.fp_len == 31)
480     {
481       u32 mask = clib_host_to_net_u32(1);
482       fib_prefix_t net_pfx = pfx;
483
484       net_pfx.fp_len = 32;
485       net_pfx.fp_addr.ip4.as_u32 ^= mask;
486
487       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
488     }
489
490   pfx.fp_len = 32;
491   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
492 }
493
494 void
495 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
496 {
497   ip4_main_t *im = &ip4_main;
498
499   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
500
501   /*
502    * enable/disable only on the 1<->0 transition
503    */
504   if (is_enable)
505     {
506       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
507         return;
508     }
509   else
510     {
511       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
512       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
513         return;
514     }
515   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
516                                !is_enable, 0, 0);
517
518
519   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
520                                sw_if_index, !is_enable, 0, 0);
521 }
522
523 static clib_error_t *
524 ip4_add_del_interface_address_internal (vlib_main_t * vm,
525                                         u32 sw_if_index,
526                                         ip4_address_t * address,
527                                         u32 address_length, u32 is_del)
528 {
529   vnet_main_t *vnm = vnet_get_main ();
530   ip4_main_t *im = &ip4_main;
531   ip_lookup_main_t *lm = &im->lookup_main;
532   clib_error_t *error = 0;
533   u32 if_address_index, elts_before;
534   ip4_address_fib_t ip4_af, *addr_fib = 0;
535
536   /* local0 interface doesn't support IP addressing  */
537   if (sw_if_index == 0)
538     {
539       return
540        clib_error_create ("local0 interface doesn't support IP addressing");
541     }
542
543   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
544   ip4_addr_fib_init (&ip4_af, address,
545                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
546   vec_add1 (addr_fib, ip4_af);
547
548   /* FIXME-LATER
549    * there is no support for adj-fib handling in the presence of overlapping
550    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
551    * most routers do.
552    */
553   /* *INDENT-OFF* */
554   if (!is_del)
555     {
556       /* When adding an address check that it does not conflict
557          with an existing address. */
558       ip_interface_address_t *ia;
559       foreach_ip_interface_address
560         (&im->lookup_main, ia, sw_if_index,
561          0 /* honor unnumbered */ ,
562          ({
563            ip4_address_t * x =
564              ip_interface_address_get_address
565              (&im->lookup_main, ia);
566            if (ip4_destination_matches_route
567                (im, address, x, ia->address_length) ||
568                ip4_destination_matches_route (im,
569                                               x,
570                                               address,
571                                               address_length))
572              return
573                clib_error_create
574                ("failed to add %U which conflicts with %U for interface %U",
575                 format_ip4_address_and_length, address,
576                 address_length,
577                 format_ip4_address_and_length, x,
578                 ia->address_length,
579                 format_vnet_sw_if_index_name, vnm,
580                 sw_if_index);
581          }));
582     }
583   /* *INDENT-ON* */
584
585   elts_before = pool_elts (lm->if_address_pool);
586
587   error = ip_interface_address_add_del
588     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
589   if (error)
590     goto done;
591
592   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
593
594   if (is_del)
595     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
596   else
597     ip4_add_interface_routes (sw_if_index,
598                               im, ip4_af.fib_index,
599                               pool_elt_at_index
600                               (lm->if_address_pool, if_address_index));
601
602   /* If pool did not grow/shrink: add duplicate address. */
603   if (elts_before != pool_elts (lm->if_address_pool))
604     {
605       ip4_add_del_interface_address_callback_t *cb;
606       vec_foreach (cb, im->add_del_interface_address_callbacks)
607         cb->function (im, cb->function_opaque, sw_if_index,
608                       address, address_length, if_address_index, is_del);
609     }
610
611 done:
612   vec_free (addr_fib);
613   return error;
614 }
615
616 clib_error_t *
617 ip4_add_del_interface_address (vlib_main_t * vm,
618                                u32 sw_if_index,
619                                ip4_address_t * address,
620                                u32 address_length, u32 is_del)
621 {
622   return ip4_add_del_interface_address_internal
623     (vm, sw_if_index, address, address_length, is_del);
624 }
625
626 /* Built-in ip4 unicast rx feature path definition */
627 /* *INDENT-OFF* */
628 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
629 {
630   .arc_name = "ip4-unicast",
631   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
632   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
633 };
634
635 VNET_FEATURE_INIT (ip4_flow_classify, static) =
636 {
637   .arc_name = "ip4-unicast",
638   .node_name = "ip4-flow-classify",
639   .runs_before = VNET_FEATURES ("ip4-inacl"),
640 };
641
642 VNET_FEATURE_INIT (ip4_inacl, static) =
643 {
644   .arc_name = "ip4-unicast",
645   .node_name = "ip4-inacl",
646   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
647 };
648
649 VNET_FEATURE_INIT (ip4_source_check_1, static) =
650 {
651   .arc_name = "ip4-unicast",
652   .node_name = "ip4-source-check-via-rx",
653   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
654 };
655
656 VNET_FEATURE_INIT (ip4_source_check_2, static) =
657 {
658   .arc_name = "ip4-unicast",
659   .node_name = "ip4-source-check-via-any",
660   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
661 };
662
663 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
664 {
665   .arc_name = "ip4-unicast",
666   .node_name = "ip4-source-and-port-range-check-rx",
667   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
668 };
669
670 VNET_FEATURE_INIT (ip4_policer_classify, static) =
671 {
672   .arc_name = "ip4-unicast",
673   .node_name = "ip4-policer-classify",
674   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
675 };
676
677 VNET_FEATURE_INIT (ip4_ipsec, static) =
678 {
679   .arc_name = "ip4-unicast",
680   .node_name = "ipsec-input-ip4",
681   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
682 };
683
684 VNET_FEATURE_INIT (ip4_vpath, static) =
685 {
686   .arc_name = "ip4-unicast",
687   .node_name = "vpath-input-ip4",
688   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
689 };
690
691 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
692 {
693   .arc_name = "ip4-unicast",
694   .node_name = "ip4-vxlan-bypass",
695   .runs_before = VNET_FEATURES ("ip4-lookup"),
696 };
697
698 VNET_FEATURE_INIT (ip4_not_enabled, static) =
699 {
700   .arc_name = "ip4-unicast",
701   .node_name = "ip4-not-enabled",
702   .runs_before = VNET_FEATURES ("ip4-lookup"),
703 };
704
705 VNET_FEATURE_INIT (ip4_lookup, static) =
706 {
707   .arc_name = "ip4-unicast",
708   .node_name = "ip4-lookup",
709   .runs_before = 0,     /* not before any other features */
710 };
711
712 /* Built-in ip4 multicast rx feature path definition */
713 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
714 {
715   .arc_name = "ip4-multicast",
716   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
717   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
718 };
719
720 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
721 {
722   .arc_name = "ip4-multicast",
723   .node_name = "vpath-input-ip4",
724   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
725 };
726
727 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
728 {
729   .arc_name = "ip4-multicast",
730   .node_name = "ip4-not-enabled",
731   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
732 };
733
734 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
735 {
736   .arc_name = "ip4-multicast",
737   .node_name = "ip4-mfib-forward-lookup",
738   .runs_before = 0,     /* last feature */
739 };
740
741 /* Source and port-range check ip4 tx feature path definition */
742 VNET_FEATURE_ARC_INIT (ip4_output, static) =
743 {
744   .arc_name = "ip4-output",
745   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
746   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
747 };
748
749 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
750 {
751   .arc_name = "ip4-output",
752   .node_name = "ip4-source-and-port-range-check-tx",
753   .runs_before = VNET_FEATURES ("ip4-outacl"),
754 };
755
756 VNET_FEATURE_INIT (ip4_outacl, static) =
757 {
758   .arc_name = "ip4-output",
759   .node_name = "ip4-outacl",
760   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
761 };
762
763 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
764 {
765   .arc_name = "ip4-output",
766   .node_name = "ipsec-output-ip4",
767   .runs_before = VNET_FEATURES ("interface-output"),
768 };
769
770 /* Built-in ip4 tx feature path definition */
771 VNET_FEATURE_INIT (ip4_interface_output, static) =
772 {
773   .arc_name = "ip4-output",
774   .node_name = "interface-output",
775   .runs_before = 0,     /* not before any other features */
776 };
777 /* *INDENT-ON* */
778
779 static clib_error_t *
780 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
781 {
782   ip4_main_t *im = &ip4_main;
783
784   /* Fill in lookup tables with default table (0). */
785   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
786   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
787
788   if (!is_add)
789     {
790       ip4_main_t *im4 = &ip4_main;
791       ip_lookup_main_t *lm4 = &im4->lookup_main;
792       ip_interface_address_t *ia = 0;
793       ip4_address_t *address;
794       vlib_main_t *vm = vlib_get_main ();
795
796       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
797       /* *INDENT-OFF* */
798       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
799       ({
800         address = ip_interface_address_get_address (lm4, ia);
801         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
802       }));
803       /* *INDENT-ON* */
804     }
805
806   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
807                                is_add, 0, 0);
808
809   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
810                                sw_if_index, is_add, 0, 0);
811
812   return /* no error */ 0;
813 }
814
815 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
816
817 /* Global IP4 main. */
818 ip4_main_t ip4_main;
819
820 clib_error_t *
821 ip4_lookup_init (vlib_main_t * vm)
822 {
823   ip4_main_t *im = &ip4_main;
824   clib_error_t *error;
825   uword i;
826
827   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
828     return error;
829   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
830     return (error);
831   if ((error = vlib_call_init_function (vm, fib_module_init)))
832     return error;
833   if ((error = vlib_call_init_function (vm, mfib_module_init)))
834     return error;
835
836   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
837     {
838       u32 m;
839
840       if (i < 32)
841         m = pow2_mask (i) << (32 - i);
842       else
843         m = ~0;
844       im->fib_masks[i] = clib_host_to_net_u32 (m);
845     }
846
847   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
848
849   /* Create FIB with index 0 and table id of 0. */
850   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
851                                      FIB_SOURCE_DEFAULT_ROUTE);
852   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
853                                       MFIB_SOURCE_DEFAULT_ROUTE);
854
855   {
856     pg_node_t *pn;
857     pn = pg_get_node (ip4_lookup_node.index);
858     pn->unformat_edit = unformat_pg_ip4_header;
859   }
860
861   {
862     ethernet_arp_header_t h;
863
864     memset (&h, 0, sizeof (h));
865
866     /* Set target ethernet address to all zeros. */
867     memset (h.ip4_over_ethernet[1].ethernet, 0,
868             sizeof (h.ip4_over_ethernet[1].ethernet));
869
870 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
871 #define _8(f,v) h.f = v;
872     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
873     _16 (l3_type, ETHERNET_TYPE_IP4);
874     _8 (n_l2_address_bytes, 6);
875     _8 (n_l3_address_bytes, 4);
876     _16 (opcode, ETHERNET_ARP_OPCODE_request);
877 #undef _16
878 #undef _8
879
880     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
881                                /* data */ &h,
882                                sizeof (h),
883                                /* alloc chunk size */ 8,
884                                "ip4 arp");
885   }
886
887   return error;
888 }
889
890 VLIB_INIT_FUNCTION (ip4_lookup_init);
891
892 typedef struct
893 {
894   /* Adjacency taken. */
895   u32 dpo_index;
896   u32 flow_hash;
897   u32 fib_index;
898
899   /* Packet data, possibly *after* rewrite. */
900   u8 packet_data[64 - 1 * sizeof (u32)];
901 }
902 ip4_forward_next_trace_t;
903
904 u8 *
905 format_ip4_forward_next_trace (u8 * s, va_list * args)
906 {
907   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
908   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
909   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
910   u32 indent = format_get_indent (s);
911   s = format (s, "%U%U",
912               format_white_space, indent,
913               format_ip4_header, t->packet_data, sizeof (t->packet_data));
914   return s;
915 }
916
917 static u8 *
918 format_ip4_lookup_trace (u8 * s, va_list * args)
919 {
920   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
921   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
922   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
923   u32 indent = format_get_indent (s);
924
925   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
926               t->fib_index, t->dpo_index, t->flow_hash);
927   s = format (s, "\n%U%U",
928               format_white_space, indent,
929               format_ip4_header, t->packet_data, sizeof (t->packet_data));
930   return s;
931 }
932
933 static u8 *
934 format_ip4_rewrite_trace (u8 * s, va_list * args)
935 {
936   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
937   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
938   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
939   u32 indent = format_get_indent (s);
940
941   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
942               t->fib_index, t->dpo_index, format_ip_adjacency,
943               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
944   s = format (s, "\n%U%U",
945               format_white_space, indent,
946               format_ip_adjacency_packet_data,
947               t->dpo_index, t->packet_data, sizeof (t->packet_data));
948   return s;
949 }
950
951 /* Common trace function for all ip4-forward next nodes. */
952 void
953 ip4_forward_next_trace (vlib_main_t * vm,
954                         vlib_node_runtime_t * node,
955                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
956 {
957   u32 *from, n_left;
958   ip4_main_t *im = &ip4_main;
959
960   n_left = frame->n_vectors;
961   from = vlib_frame_vector_args (frame);
962
963   while (n_left >= 4)
964     {
965       u32 bi0, bi1;
966       vlib_buffer_t *b0, *b1;
967       ip4_forward_next_trace_t *t0, *t1;
968
969       /* Prefetch next iteration. */
970       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
971       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
972
973       bi0 = from[0];
974       bi1 = from[1];
975
976       b0 = vlib_get_buffer (vm, bi0);
977       b1 = vlib_get_buffer (vm, bi1);
978
979       if (b0->flags & VLIB_BUFFER_IS_TRACED)
980         {
981           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
982           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
983           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
984           t0->fib_index =
985             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
986              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
987             vec_elt (im->fib_index_by_sw_if_index,
988                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
989
990           clib_memcpy (t0->packet_data,
991                        vlib_buffer_get_current (b0),
992                        sizeof (t0->packet_data));
993         }
994       if (b1->flags & VLIB_BUFFER_IS_TRACED)
995         {
996           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
997           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
998           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
999           t1->fib_index =
1000             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1001              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1002             vec_elt (im->fib_index_by_sw_if_index,
1003                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1004           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1005                        sizeof (t1->packet_data));
1006         }
1007       from += 2;
1008       n_left -= 2;
1009     }
1010
1011   while (n_left >= 1)
1012     {
1013       u32 bi0;
1014       vlib_buffer_t *b0;
1015       ip4_forward_next_trace_t *t0;
1016
1017       bi0 = from[0];
1018
1019       b0 = vlib_get_buffer (vm, bi0);
1020
1021       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1022         {
1023           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1024           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1025           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1026           t0->fib_index =
1027             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1028              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1029             vec_elt (im->fib_index_by_sw_if_index,
1030                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1031           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1032                        sizeof (t0->packet_data));
1033         }
1034       from += 1;
1035       n_left -= 1;
1036     }
1037 }
1038
1039 /* Compute TCP/UDP/ICMP4 checksum in software. */
1040 u16
1041 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1042                               ip4_header_t * ip0)
1043 {
1044   ip_csum_t sum0;
1045   u32 ip_header_length, payload_length_host_byte_order;
1046   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1047   u16 sum16;
1048   void *data_this_buffer;
1049
1050   /* Initialize checksum with ip header. */
1051   ip_header_length = ip4_header_bytes (ip0);
1052   payload_length_host_byte_order =
1053     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1054   sum0 =
1055     clib_host_to_net_u32 (payload_length_host_byte_order +
1056                           (ip0->protocol << 16));
1057
1058   if (BITS (uword) == 32)
1059     {
1060       sum0 =
1061         ip_csum_with_carry (sum0,
1062                             clib_mem_unaligned (&ip0->src_address, u32));
1063       sum0 =
1064         ip_csum_with_carry (sum0,
1065                             clib_mem_unaligned (&ip0->dst_address, u32));
1066     }
1067   else
1068     sum0 =
1069       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1070
1071   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1072   data_this_buffer = (void *) ip0 + ip_header_length;
1073   n_ip_bytes_this_buffer =
1074     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1075   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1076     {
1077       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1078         n_ip_bytes_this_buffer - ip_header_length : 0;
1079     }
1080   while (1)
1081     {
1082       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1083       n_bytes_left -= n_this_buffer;
1084       if (n_bytes_left == 0)
1085         break;
1086
1087       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1088       p0 = vlib_get_buffer (vm, p0->next_buffer);
1089       data_this_buffer = vlib_buffer_get_current (p0);
1090       n_this_buffer = p0->current_length;
1091     }
1092
1093   sum16 = ~ip_csum_fold (sum0);
1094
1095   return sum16;
1096 }
1097
1098 u32
1099 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1100 {
1101   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1102   udp_header_t *udp0;
1103   u16 sum16;
1104
1105   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1106           || ip0->protocol == IP_PROTOCOL_UDP);
1107
1108   udp0 = (void *) (ip0 + 1);
1109   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1110     {
1111       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1112                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1113       return p0->flags;
1114     }
1115
1116   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1117
1118   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1119                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1120
1121   return p0->flags;
1122 }
1123
1124 /* *INDENT-OFF* */
1125 VNET_FEATURE_ARC_INIT (ip4_local) =
1126 {
1127   .arc_name  = "ip4-local",
1128   .start_nodes = VNET_FEATURES ("ip4-local"),
1129 };
1130 /* *INDENT-ON* */
1131
1132 static inline void
1133 ip4_local_validate_l4 (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip,
1134                        u8 is_udp, u8 * error, u8 * good_tcp_udp)
1135 {
1136   u32 flags0;
1137   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1138   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1139   if (is_udp)
1140     {
1141       udp_header_t *udp;
1142       u32 ip_len, udp_len;
1143       i32 len_diff;
1144       udp = ip4_next_header (ip);
1145       /* Verify UDP length. */
1146       ip_len = clib_net_to_host_u16 (ip->length);
1147       udp_len = clib_net_to_host_u16 (udp->length);
1148
1149       len_diff = ip_len - udp_len;
1150       *good_tcp_udp &= len_diff >= 0;
1151       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1152     }
1153 }
1154
1155 #define ip4_local_do_l4_check(is_tcp_udp, flags)                        \
1156     (is_tcp_udp && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1157     || flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1158     || flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM))
1159
1160 static inline uword
1161 ip4_local_inline (vlib_main_t * vm,
1162                   vlib_node_runtime_t * node,
1163                   vlib_frame_t * frame, int head_of_feature_arc)
1164 {
1165   ip4_main_t *im = &ip4_main;
1166   ip_lookup_main_t *lm = &im->lookup_main;
1167   ip_local_next_t next_index;
1168   u32 *from, *to_next, n_left_from, n_left_to_next;
1169   vlib_node_runtime_t *error_node =
1170     vlib_node_get_runtime (vm, ip4_input_node.index);
1171   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1172
1173   from = vlib_frame_vector_args (frame);
1174   n_left_from = frame->n_vectors;
1175   next_index = node->cached_next_index;
1176
1177   if (node->flags & VLIB_NODE_FLAG_TRACE)
1178     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1179
1180   while (n_left_from > 0)
1181     {
1182       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1183
1184       while (n_left_from >= 4 && n_left_to_next >= 2)
1185         {
1186           vlib_buffer_t *p0, *p1;
1187           ip4_header_t *ip0, *ip1;
1188           ip4_fib_mtrie_t *mtrie0, *mtrie1;
1189           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1190           const dpo_id_t *dpo0, *dpo1;
1191           const load_balance_t *lb0, *lb1;
1192           u32 pi0, next0, fib_index0, lbi0;
1193           u32 pi1, next1, fib_index1, lbi1;
1194           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1195           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1196           u32 sw_if_index0, sw_if_index1;
1197
1198           pi0 = to_next[0] = from[0];
1199           pi1 = to_next[1] = from[1];
1200           from += 2;
1201           n_left_from -= 2;
1202           to_next += 2;
1203           n_left_to_next -= 2;
1204
1205           next0 = next1 = IP_LOCAL_NEXT_DROP;
1206           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1207
1208           p0 = vlib_get_buffer (vm, pi0);
1209           p1 = vlib_get_buffer (vm, pi1);
1210
1211           ip0 = vlib_buffer_get_current (p0);
1212           ip1 = vlib_buffer_get_current (p1);
1213
1214           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1215           vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
1216
1217           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1218           sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1219
1220           proto0 = ip0->protocol;
1221           proto1 = ip1->protocol;
1222
1223           if (head_of_feature_arc == 0)
1224             goto skip_checks;
1225
1226           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1227           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1228           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1229           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1230
1231           good_tcp_udp0 =
1232             (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1233              || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1234                  || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1235           good_tcp_udp1 = (p1->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1236                            || (p1->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1237                                || p1->flags &
1238                                VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1239
1240           if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)
1241                              || ip4_local_do_l4_check (is_tcp_udp1,
1242                                                        p1->flags)))
1243             {
1244               if (is_tcp_udp0)
1245                 ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1246                                        &good_tcp_udp0);
1247               if (is_tcp_udp1)
1248                 ip4_local_validate_l4 (vm, p1, ip1, is_udp1, &error1,
1249                                        &good_tcp_udp1);
1250             }
1251
1252           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1253           error0 = (is_tcp_udp0 && !good_tcp_udp0
1254                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1255           error1 = (is_tcp_udp1 && !good_tcp_udp1
1256                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1257
1258           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1259           fib_index0 =
1260             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1261              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1262
1263           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1264           fib_index1 =
1265             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1266              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1267
1268           /* TODO maybe move to lookup? */
1269           vnet_buffer (p0)->ip.fib_index = fib_index0;
1270           vnet_buffer (p1)->ip.fib_index = fib_index1;
1271
1272           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1273           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1274
1275           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1276           leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1277           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1278                                              2);
1279           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1280                                              2);
1281           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1282                                              3);
1283           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1284                                              3);
1285
1286           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1287             ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1288           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1289
1290           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1291             ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1292           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1293
1294           lb0 = load_balance_get (lbi0);
1295           lb1 = load_balance_get (lbi1);
1296           dpo0 = load_balance_get_bucket_i (lb0, 0);
1297           dpo1 = load_balance_get_bucket_i (lb1, 0);
1298
1299           /*
1300            * Must have a route to source otherwise we drop the packet.
1301            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1302            *
1303            * The checks are:
1304            *  - the source is a recieve => it's from us => bogus, do this
1305            *    first since it sets a different error code.
1306            *  - uRPF check for any route to source - accept if passes.
1307            *  - allow packets destined to the broadcast address from unknown sources
1308            */
1309           if (p0->flags & VNET_BUFFER_F_IS_NATED)
1310             goto skip_check0;
1311
1312           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1313                      dpo0->dpoi_type == DPO_RECEIVE) ?
1314                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1315           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1316                      !fib_urpf_check_size (lb0->lb_urpf) &&
1317                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1318                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1319
1320         skip_check0:
1321           if (p1->flags & VNET_BUFFER_F_IS_NATED)
1322             goto skip_checks;
1323
1324           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1325                      dpo1->dpoi_type == DPO_RECEIVE) ?
1326                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1327           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1328                      !fib_urpf_check_size (lb1->lb_urpf) &&
1329                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1330                     ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1331
1332         skip_checks:
1333
1334           next0 = lm->local_next_by_ip_protocol[proto0];
1335           next1 = lm->local_next_by_ip_protocol[proto1];
1336
1337           next0 =
1338             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1339           next1 =
1340             error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1341
1342           p0->error = error0 ? error_node->errors[error0] : 0;
1343           p1->error = error1 ? error_node->errors[error1] : 0;
1344
1345           if (head_of_feature_arc)
1346             {
1347               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1348                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1349               if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1350                 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1351             }
1352
1353           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1354                                            n_left_to_next, pi0, pi1,
1355                                            next0, next1);
1356         }
1357
1358       while (n_left_from > 0 && n_left_to_next > 0)
1359         {
1360           vlib_buffer_t *p0;
1361           ip4_header_t *ip0;
1362           ip4_fib_mtrie_t *mtrie0;
1363           ip4_fib_mtrie_leaf_t leaf0;
1364           u32 pi0, next0, fib_index0, lbi0;
1365           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1366           load_balance_t *lb0;
1367           const dpo_id_t *dpo0;
1368           u32 sw_if_index0;
1369
1370           pi0 = to_next[0] = from[0];
1371           from += 1;
1372           n_left_from -= 1;
1373           to_next += 1;
1374           n_left_to_next -= 1;
1375
1376           next0 = IP_LOCAL_NEXT_DROP;
1377           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1378
1379           p0 = vlib_get_buffer (vm, pi0);
1380           ip0 = vlib_buffer_get_current (p0);
1381           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1382           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1383
1384           proto0 = ip0->protocol;
1385
1386           if (head_of_feature_arc == 0 || p0->flags & VNET_BUFFER_F_IS_NATED)
1387             goto skip_check;
1388
1389           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1390           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1391
1392           good_tcp_udp0 =
1393             (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1394              || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1395                  || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1396
1397           if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)))
1398             {
1399               ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1400                                      &good_tcp_udp0);
1401             }
1402
1403           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1404           error0 = (is_tcp_udp0 && !good_tcp_udp0
1405                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1406
1407           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1408           fib_index0 =
1409             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1410              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1411           vnet_buffer (p0)->ip.fib_index = fib_index0;
1412           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1413           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1414           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1415                                              2);
1416           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1417                                              3);
1418           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1419           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1420           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1421
1422           lb0 = load_balance_get (lbi0);
1423           dpo0 = load_balance_get_bucket_i (lb0, 0);
1424
1425           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1426                      dpo0->dpoi_type == DPO_RECEIVE) ?
1427                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1428           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1429                      !fib_urpf_check_size (lb0->lb_urpf) &&
1430                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1431                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1432
1433         skip_check:
1434           next0 = lm->local_next_by_ip_protocol[proto0];
1435           next0 =
1436             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1437
1438           p0->error = error0 ? error_node->errors[error0] : 0;
1439
1440           if (head_of_feature_arc)
1441             {
1442               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1443                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1444             }
1445
1446           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1447                                            n_left_to_next, pi0, next0);
1448         }
1449       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1450     }
1451
1452   return frame->n_vectors;
1453 }
1454
1455 static uword
1456 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1457 {
1458   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1459 }
1460
1461 /* *INDENT-OFF* */
1462 VLIB_REGISTER_NODE (ip4_local_node) =
1463 {
1464   .function = ip4_local,
1465   .name = "ip4-local",
1466   .vector_size = sizeof (u32),
1467   .format_trace = format_ip4_forward_next_trace,
1468   .n_next_nodes = IP_LOCAL_N_NEXT,
1469   .next_nodes =
1470   {
1471     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1472     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1473     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1474     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1475   },
1476 };
1477 /* *INDENT-ON* */
1478
1479 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1480
1481 static uword
1482 ip4_local_end_of_arc (vlib_main_t * vm,
1483                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1484 {
1485   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1486 }
1487
1488 /* *INDENT-OFF* */
1489 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1490   .function = ip4_local_end_of_arc,
1491   .name = "ip4-local-end-of-arc",
1492   .vector_size = sizeof (u32),
1493
1494   .format_trace = format_ip4_forward_next_trace,
1495   .sibling_of = "ip4-local",
1496 };
1497
1498 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1499
1500 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1501   .arc_name = "ip4-local",
1502   .node_name = "ip4-local-end-of-arc",
1503   .runs_before = 0, /* not before any other features */
1504 };
1505 /* *INDENT-ON* */
1506
1507 void
1508 ip4_register_protocol (u32 protocol, u32 node_index)
1509 {
1510   vlib_main_t *vm = vlib_get_main ();
1511   ip4_main_t *im = &ip4_main;
1512   ip_lookup_main_t *lm = &im->lookup_main;
1513
1514   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1515   lm->local_next_by_ip_protocol[protocol] =
1516     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1517 }
1518
1519 static clib_error_t *
1520 show_ip_local_command_fn (vlib_main_t * vm,
1521                           unformat_input_t * input, vlib_cli_command_t * cmd)
1522 {
1523   ip4_main_t *im = &ip4_main;
1524   ip_lookup_main_t *lm = &im->lookup_main;
1525   int i;
1526
1527   vlib_cli_output (vm, "Protocols handled by ip4_local");
1528   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1529     {
1530       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1531         {
1532           u32 node_index = vlib_get_node (vm,
1533                                           ip4_local_node.index)->
1534             next_nodes[lm->local_next_by_ip_protocol[i]];
1535           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1536                            node_index);
1537         }
1538     }
1539   return 0;
1540 }
1541
1542
1543
1544 /*?
1545  * Display the set of protocols handled by the local IPv4 stack.
1546  *
1547  * @cliexpar
1548  * Example of how to display local protocol table:
1549  * @cliexstart{show ip local}
1550  * Protocols handled by ip4_local
1551  * 1
1552  * 17
1553  * 47
1554  * @cliexend
1555 ?*/
1556 /* *INDENT-OFF* */
1557 VLIB_CLI_COMMAND (show_ip_local, static) =
1558 {
1559   .path = "show ip local",
1560   .function = show_ip_local_command_fn,
1561   .short_help = "show ip local",
1562 };
1563 /* *INDENT-ON* */
1564
1565 always_inline uword
1566 ip4_arp_inline (vlib_main_t * vm,
1567                 vlib_node_runtime_t * node,
1568                 vlib_frame_t * frame, int is_glean)
1569 {
1570   vnet_main_t *vnm = vnet_get_main ();
1571   ip4_main_t *im = &ip4_main;
1572   ip_lookup_main_t *lm = &im->lookup_main;
1573   u32 *from, *to_next_drop;
1574   uword n_left_from, n_left_to_next_drop, next_index;
1575   static f64 time_last_seed_change = -1e100;
1576   static u32 hash_seeds[3];
1577   static uword hash_bitmap[256 / BITS (uword)];
1578   f64 time_now;
1579
1580   if (node->flags & VLIB_NODE_FLAG_TRACE)
1581     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1582
1583   time_now = vlib_time_now (vm);
1584   if (time_now - time_last_seed_change > 1e-3)
1585     {
1586       uword i;
1587       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1588                                             sizeof (hash_seeds));
1589       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1590         hash_seeds[i] = r[i];
1591
1592       /* Mark all hash keys as been no-seen before. */
1593       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1594         hash_bitmap[i] = 0;
1595
1596       time_last_seed_change = time_now;
1597     }
1598
1599   from = vlib_frame_vector_args (frame);
1600   n_left_from = frame->n_vectors;
1601   next_index = node->cached_next_index;
1602   if (next_index == IP4_ARP_NEXT_DROP)
1603     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1604
1605   while (n_left_from > 0)
1606     {
1607       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1608                            to_next_drop, n_left_to_next_drop);
1609
1610       while (n_left_from > 0 && n_left_to_next_drop > 0)
1611         {
1612           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1613           ip_adjacency_t *adj0;
1614           vlib_buffer_t *p0;
1615           ip4_header_t *ip0;
1616           uword bm0;
1617
1618           pi0 = from[0];
1619
1620           p0 = vlib_get_buffer (vm, pi0);
1621
1622           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1623           adj0 = adj_get (adj_index0);
1624           ip0 = vlib_buffer_get_current (p0);
1625
1626           a0 = hash_seeds[0];
1627           b0 = hash_seeds[1];
1628           c0 = hash_seeds[2];
1629
1630           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1631           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1632
1633           if (is_glean)
1634             {
1635               /*
1636                * this is the Glean case, so we are ARPing for the
1637                * packet's destination
1638                */
1639               a0 ^= ip0->dst_address.data_u32;
1640             }
1641           else
1642             {
1643               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1644             }
1645           b0 ^= sw_if_index0;
1646
1647           hash_v3_mix32 (a0, b0, c0);
1648           hash_v3_finalize32 (a0, b0, c0);
1649
1650           c0 &= BITS (hash_bitmap) - 1;
1651           m0 = (uword) 1 << (c0 % BITS (uword));
1652           c0 = c0 / BITS (uword);
1653
1654           bm0 = hash_bitmap[c0];
1655           drop0 = (bm0 & m0) != 0;
1656
1657           /* Mark it as seen. */
1658           hash_bitmap[c0] = bm0 | m0;
1659
1660           from += 1;
1661           n_left_from -= 1;
1662           to_next_drop[0] = pi0;
1663           to_next_drop += 1;
1664           n_left_to_next_drop -= 1;
1665
1666           p0->error =
1667             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
1668                          IP4_ARP_ERROR_REQUEST_SENT];
1669
1670           /*
1671            * the adj has been updated to a rewrite but the node the DPO that got
1672            * us here hasn't - yet. no big deal. we'll drop while we wait.
1673            */
1674           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1675             continue;
1676
1677           if (drop0)
1678             continue;
1679
1680           /*
1681            * Can happen if the control-plane is programming tables
1682            * with traffic flowing; at least that's today's lame excuse.
1683            */
1684           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1685               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1686             {
1687               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1688             }
1689           else
1690             /* Send ARP request. */
1691             {
1692               u32 bi0 = 0;
1693               vlib_buffer_t *b0;
1694               ethernet_arp_header_t *h0;
1695               vnet_hw_interface_t *hw_if0;
1696
1697               h0 =
1698                 vlib_packet_template_get_packet (vm,
1699                                                  &im->ip4_arp_request_packet_template,
1700                                                  &bi0);
1701
1702               /* Seems we're out of buffers */
1703               if (PREDICT_FALSE (!h0))
1704                 continue;
1705
1706               /* Add rewrite/encap string for ARP packet. */
1707               vnet_rewrite_one_header (adj0[0], h0,
1708                                        sizeof (ethernet_header_t));
1709
1710               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1711
1712               /* Src ethernet address in ARP header. */
1713               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
1714                            hw_if0->hw_address,
1715                            sizeof (h0->ip4_over_ethernet[0].ethernet));
1716
1717               if (is_glean)
1718                 {
1719                   /* The interface's source address is stashed in the Glean Adj */
1720                   h0->ip4_over_ethernet[0].ip4 =
1721                     adj0->sub_type.glean.receive_addr.ip4;
1722
1723                   /* Copy in destination address we are requesting. This is the
1724                    * glean case, so it's the packet's destination.*/
1725                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1726                     ip0->dst_address.data_u32;
1727                 }
1728               else
1729                 {
1730                   /* Src IP address in ARP header. */
1731                   if (ip4_src_address_for_packet (lm, sw_if_index0,
1732                                                   &h0->
1733                                                   ip4_over_ethernet[0].ip4))
1734                     {
1735                       /* No source address available */
1736                       p0->error =
1737                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1738                       vlib_buffer_free (vm, &bi0, 1);
1739                       continue;
1740                     }
1741
1742                   /* Copy in destination address we are requesting from the
1743                      incomplete adj */
1744                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1745                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1746                 }
1747
1748               vlib_buffer_copy_trace_flag (vm, p0, bi0);
1749               b0 = vlib_get_buffer (vm, bi0);
1750               VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1751               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1752
1753               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1754
1755               vlib_set_next_frame_buffer (vm, node,
1756                                           adj0->rewrite_header.next_index,
1757                                           bi0);
1758             }
1759         }
1760
1761       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1762     }
1763
1764   return frame->n_vectors;
1765 }
1766
1767 static uword
1768 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1769 {
1770   return (ip4_arp_inline (vm, node, frame, 0));
1771 }
1772
1773 static uword
1774 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1775 {
1776   return (ip4_arp_inline (vm, node, frame, 1));
1777 }
1778
1779 static char *ip4_arp_error_strings[] = {
1780   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1781   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1782   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1783   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1784   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1785   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1786 };
1787
1788 /* *INDENT-OFF* */
1789 VLIB_REGISTER_NODE (ip4_arp_node) =
1790 {
1791   .function = ip4_arp,
1792   .name = "ip4-arp",
1793   .vector_size = sizeof (u32),
1794   .format_trace = format_ip4_forward_next_trace,
1795   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1796   .error_strings = ip4_arp_error_strings,
1797   .n_next_nodes = IP4_ARP_N_NEXT,
1798   .next_nodes =
1799   {
1800     [IP4_ARP_NEXT_DROP] = "error-drop",
1801   },
1802 };
1803
1804 VLIB_REGISTER_NODE (ip4_glean_node) =
1805 {
1806   .function = ip4_glean,
1807   .name = "ip4-glean",
1808   .vector_size = sizeof (u32),
1809   .format_trace = format_ip4_forward_next_trace,
1810   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1811   .error_strings = ip4_arp_error_strings,
1812   .n_next_nodes = IP4_ARP_N_NEXT,
1813   .next_nodes = {
1814   [IP4_ARP_NEXT_DROP] = "error-drop",
1815   },
1816 };
1817 /* *INDENT-ON* */
1818
1819 #define foreach_notrace_ip4_arp_error           \
1820 _(DROP)                                         \
1821 _(REQUEST_SENT)                                 \
1822 _(REPLICATE_DROP)                               \
1823 _(REPLICATE_FAIL)
1824
1825 clib_error_t *
1826 arp_notrace_init (vlib_main_t * vm)
1827 {
1828   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1829
1830   /* don't trace ARP request packets */
1831 #define _(a)                                    \
1832     vnet_pcap_drop_trace_filter_add_del         \
1833         (rt->errors[IP4_ARP_ERROR_##a],         \
1834          1 /* is_add */);
1835   foreach_notrace_ip4_arp_error;
1836 #undef _
1837   return 0;
1838 }
1839
1840 VLIB_INIT_FUNCTION (arp_notrace_init);
1841
1842
1843 /* Send an ARP request to see if given destination is reachable on given interface. */
1844 clib_error_t *
1845 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
1846 {
1847   vnet_main_t *vnm = vnet_get_main ();
1848   ip4_main_t *im = &ip4_main;
1849   ethernet_arp_header_t *h;
1850   ip4_address_t *src;
1851   ip_interface_address_t *ia;
1852   ip_adjacency_t *adj;
1853   vnet_hw_interface_t *hi;
1854   vnet_sw_interface_t *si;
1855   vlib_buffer_t *b;
1856   adj_index_t ai;
1857   u32 bi = 0;
1858
1859   si = vnet_get_sw_interface (vnm, sw_if_index);
1860
1861   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1862     {
1863       return clib_error_return (0, "%U: interface %U down",
1864                                 format_ip4_address, dst,
1865                                 format_vnet_sw_if_index_name, vnm,
1866                                 sw_if_index);
1867     }
1868
1869   src =
1870     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1871   if (!src)
1872     {
1873       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1874       return clib_error_return
1875         (0,
1876          "no matching interface address for destination %U (interface %U)",
1877          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
1878          sw_if_index);
1879     }
1880
1881   h = vlib_packet_template_get_packet (vm,
1882                                        &im->ip4_arp_request_packet_template,
1883                                        &bi);
1884
1885   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1886   if (PREDICT_FALSE (!hi->hw_address))
1887     {
1888       return clib_error_return (0, "%U: interface %U do not support ip probe",
1889                                 format_ip4_address, dst,
1890                                 format_vnet_sw_if_index_name, vnm,
1891                                 sw_if_index);
1892     }
1893
1894   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
1895                sizeof (h->ip4_over_ethernet[0].ethernet));
1896
1897   h->ip4_over_ethernet[0].ip4 = src[0];
1898   h->ip4_over_ethernet[1].ip4 = dst[0];
1899
1900   b = vlib_get_buffer (vm, bi);
1901   vnet_buffer (b)->sw_if_index[VLIB_RX] =
1902     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
1903
1904   ip46_address_t nh = {
1905     .ip4 = *dst,
1906   };
1907
1908   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
1909                             VNET_LINK_IP4, &nh, sw_if_index);
1910   adj = adj_get (ai);
1911
1912   /* Peer has been previously resolved, retrieve glean adj instead */
1913   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
1914     {
1915       adj_unlock (ai);
1916       ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
1917                                   VNET_LINK_IP4, sw_if_index, &nh);
1918       adj = adj_get (ai);
1919     }
1920
1921   /* Add encapsulation string for software interface (e.g. ethernet header). */
1922   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
1923   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
1924
1925   {
1926     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
1927     u32 *to_next = vlib_frame_vector_args (f);
1928     to_next[0] = bi;
1929     f->n_vectors = 1;
1930     vlib_put_frame_to_node (vm, hi->output_node_index, f);
1931   }
1932
1933   adj_unlock (ai);
1934   return /* no error */ 0;
1935 }
1936
1937 typedef enum
1938 {
1939   IP4_REWRITE_NEXT_DROP,
1940   IP4_REWRITE_NEXT_ICMP_ERROR,
1941 } ip4_rewrite_next_t;
1942
1943 /**
1944  * This bits of an IPv4 address to mask to construct a multicast
1945  * MAC address
1946  */
1947 #if CLIB_ARCH_IS_BIG_ENDIAN
1948 #define IP4_MCAST_ADDR_MASK 0x007fffff
1949 #else
1950 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1951 #endif
1952
1953 always_inline uword
1954 ip4_rewrite_inline (vlib_main_t * vm,
1955                     vlib_node_runtime_t * node,
1956                     vlib_frame_t * frame,
1957                     int do_counters, int is_midchain, int is_mcast)
1958 {
1959   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1960   u32 *from = vlib_frame_vector_args (frame);
1961   u32 n_left_from, n_left_to_next, *to_next, next_index;
1962   vlib_node_runtime_t *error_node =
1963     vlib_node_get_runtime (vm, ip4_input_node.index);
1964
1965   n_left_from = frame->n_vectors;
1966   next_index = node->cached_next_index;
1967   u32 thread_index = vlib_get_thread_index ();
1968
1969   while (n_left_from > 0)
1970     {
1971       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1972
1973       while (n_left_from >= 4 && n_left_to_next >= 2)
1974         {
1975           ip_adjacency_t *adj0, *adj1;
1976           vlib_buffer_t *p0, *p1;
1977           ip4_header_t *ip0, *ip1;
1978           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
1979           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
1980           u32 tx_sw_if_index0, tx_sw_if_index1;
1981
1982           /* Prefetch next iteration. */
1983           {
1984             vlib_buffer_t *p2, *p3;
1985
1986             p2 = vlib_get_buffer (vm, from[2]);
1987             p3 = vlib_get_buffer (vm, from[3]);
1988
1989             vlib_prefetch_buffer_header (p2, STORE);
1990             vlib_prefetch_buffer_header (p3, STORE);
1991
1992             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
1993             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
1994           }
1995
1996           pi0 = to_next[0] = from[0];
1997           pi1 = to_next[1] = from[1];
1998
1999           from += 2;
2000           n_left_from -= 2;
2001           to_next += 2;
2002           n_left_to_next -= 2;
2003
2004           p0 = vlib_get_buffer (vm, pi0);
2005           p1 = vlib_get_buffer (vm, pi1);
2006
2007           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2008           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2009
2010           /*
2011            * pre-fetch the per-adjacency counters
2012            */
2013           if (do_counters)
2014             {
2015               vlib_prefetch_combined_counter (&adjacency_counters,
2016                                               thread_index, adj_index0);
2017               vlib_prefetch_combined_counter (&adjacency_counters,
2018                                               thread_index, adj_index1);
2019             }
2020
2021           ip0 = vlib_buffer_get_current (p0);
2022           ip1 = vlib_buffer_get_current (p1);
2023
2024           error0 = error1 = IP4_ERROR_NONE;
2025           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2026
2027           /* Decrement TTL & update checksum.
2028              Works either endian, so no need for byte swap. */
2029           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2030             {
2031               i32 ttl0 = ip0->ttl;
2032
2033               /* Input node should have reject packets with ttl 0. */
2034               ASSERT (ip0->ttl > 0);
2035
2036               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2037               checksum0 += checksum0 >= 0xffff;
2038
2039               ip0->checksum = checksum0;
2040               ttl0 -= 1;
2041               ip0->ttl = ttl0;
2042
2043               /*
2044                * If the ttl drops below 1 when forwarding, generate
2045                * an ICMP response.
2046                */
2047               if (PREDICT_FALSE (ttl0 <= 0))
2048                 {
2049                   error0 = IP4_ERROR_TIME_EXPIRED;
2050                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2051                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2052                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2053                                                0);
2054                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2055                 }
2056
2057               /* Verify checksum. */
2058               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2059                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2060             }
2061           else
2062             {
2063               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2064             }
2065           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2066             {
2067               i32 ttl1 = ip1->ttl;
2068
2069               /* Input node should have reject packets with ttl 0. */
2070               ASSERT (ip1->ttl > 0);
2071
2072               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2073               checksum1 += checksum1 >= 0xffff;
2074
2075               ip1->checksum = checksum1;
2076               ttl1 -= 1;
2077               ip1->ttl = ttl1;
2078
2079               /*
2080                * If the ttl drops below 1 when forwarding, generate
2081                * an ICMP response.
2082                */
2083               if (PREDICT_FALSE (ttl1 <= 0))
2084                 {
2085                   error1 = IP4_ERROR_TIME_EXPIRED;
2086                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2087                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2088                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2089                                                0);
2090                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2091                 }
2092
2093               /* Verify checksum. */
2094               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2095                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2096             }
2097           else
2098             {
2099               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2100             }
2101
2102           /* Rewrite packet header and updates lengths. */
2103           adj0 = adj_get (adj_index0);
2104           adj1 = adj_get (adj_index1);
2105
2106           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2107           rw_len0 = adj0[0].rewrite_header.data_bytes;
2108           rw_len1 = adj1[0].rewrite_header.data_bytes;
2109           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2110           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2111
2112           /* Check MTU of outgoing interface. */
2113           if (vlib_buffer_length_in_chain (vm, p0) >
2114               adj0[0].rewrite_header.max_l3_packet_bytes)
2115             {
2116               error0 = IP4_ERROR_MTU_EXCEEDED;
2117               next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2118               icmp4_error_set_vnet_buffer
2119                 (p0, ICMP4_destination_unreachable,
2120                  ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2121                  0);
2122             }
2123           if (vlib_buffer_length_in_chain (vm, p1) >
2124               adj1[0].rewrite_header.max_l3_packet_bytes)
2125             {
2126               error1 = IP4_ERROR_MTU_EXCEEDED;
2127               next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2128               icmp4_error_set_vnet_buffer
2129                 (p1, ICMP4_destination_unreachable,
2130                  ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2131                  0);
2132             }
2133
2134           if (is_mcast)
2135             {
2136               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2137                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2138                         IP4_ERROR_SAME_INTERFACE : error0);
2139               error1 = ((adj1[0].rewrite_header.sw_if_index ==
2140                          vnet_buffer (p1)->sw_if_index[VLIB_RX]) ?
2141                         IP4_ERROR_SAME_INTERFACE : error1);
2142             }
2143
2144           p0->error = error_node->errors[error0];
2145           p1->error = error_node->errors[error1];
2146           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2147            * to see the IP headerr */
2148           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2149             {
2150               next0 = adj0[0].rewrite_header.next_index;
2151               p0->current_data -= rw_len0;
2152               p0->current_length += rw_len0;
2153               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2154               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2155
2156               if (PREDICT_FALSE
2157                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2158                 vnet_feature_arc_start (lm->output_feature_arc_index,
2159                                         tx_sw_if_index0, &next0, p0);
2160             }
2161           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2162             {
2163               next1 = adj1[0].rewrite_header.next_index;
2164               p1->current_data -= rw_len1;
2165               p1->current_length += rw_len1;
2166
2167               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2168               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2169
2170               if (PREDICT_FALSE
2171                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2172                 vnet_feature_arc_start (lm->output_feature_arc_index,
2173                                         tx_sw_if_index1, &next1, p1);
2174             }
2175
2176           /* Guess we are only writing on simple Ethernet header. */
2177           vnet_rewrite_two_headers (adj0[0], adj1[0],
2178                                     ip0, ip1, sizeof (ethernet_header_t));
2179
2180           /*
2181            * Bump the per-adjacency counters
2182            */
2183           if (do_counters)
2184             {
2185               vlib_increment_combined_counter
2186                 (&adjacency_counters,
2187                  thread_index,
2188                  adj_index0, 1,
2189                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2190
2191               vlib_increment_combined_counter
2192                 (&adjacency_counters,
2193                  thread_index,
2194                  adj_index1, 1,
2195                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2196             }
2197
2198           if (is_midchain)
2199             {
2200               adj0->sub_type.midchain.fixup_func
2201                 (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2202               adj1->sub_type.midchain.fixup_func
2203                 (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
2204             }
2205           if (is_mcast)
2206             {
2207               /*
2208                * copy bytes from the IP address into the MAC rewrite
2209                */
2210               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2211                                           adj0->
2212                                           rewrite_header.dst_mcast_offset,
2213                                           &ip0->dst_address.as_u32,
2214                                           (u8 *) ip0);
2215               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2216                                           adj0->
2217                                           rewrite_header.dst_mcast_offset,
2218                                           &ip1->dst_address.as_u32,
2219                                           (u8 *) ip1);
2220             }
2221
2222           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2223                                            to_next, n_left_to_next,
2224                                            pi0, pi1, next0, next1);
2225         }
2226
2227       while (n_left_from > 0 && n_left_to_next > 0)
2228         {
2229           ip_adjacency_t *adj0;
2230           vlib_buffer_t *p0;
2231           ip4_header_t *ip0;
2232           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2233           u32 tx_sw_if_index0;
2234
2235           pi0 = to_next[0] = from[0];
2236
2237           p0 = vlib_get_buffer (vm, pi0);
2238
2239           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2240
2241           adj0 = adj_get (adj_index0);
2242
2243           ip0 = vlib_buffer_get_current (p0);
2244
2245           error0 = IP4_ERROR_NONE;
2246           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2247
2248           /* Decrement TTL & update checksum. */
2249           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2250             {
2251               i32 ttl0 = ip0->ttl;
2252
2253               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2254
2255               checksum0 += checksum0 >= 0xffff;
2256
2257               ip0->checksum = checksum0;
2258
2259               ASSERT (ip0->ttl > 0);
2260
2261               ttl0 -= 1;
2262
2263               ip0->ttl = ttl0;
2264
2265               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2266                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2267
2268               if (PREDICT_FALSE (ttl0 <= 0))
2269                 {
2270                   /*
2271                    * If the ttl drops below 1 when forwarding, generate
2272                    * an ICMP response.
2273                    */
2274                   error0 = IP4_ERROR_TIME_EXPIRED;
2275                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2276                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2277                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2278                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2279                                                0);
2280                 }
2281             }
2282           else
2283             {
2284               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2285             }
2286
2287           if (do_counters)
2288             vlib_prefetch_combined_counter (&adjacency_counters,
2289                                             thread_index, adj_index0);
2290
2291           /* Guess we are only writing on simple Ethernet header. */
2292           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2293           if (is_mcast)
2294             {
2295               /*
2296                * copy bytes from the IP address into the MAC rewrite
2297                */
2298               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2299                                           adj0->
2300                                           rewrite_header.dst_mcast_offset,
2301                                           &ip0->dst_address.as_u32,
2302                                           (u8 *) ip0);
2303             }
2304
2305           /* Update packet buffer attributes/set output interface. */
2306           rw_len0 = adj0[0].rewrite_header.data_bytes;
2307           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2308
2309           if (do_counters)
2310             vlib_increment_combined_counter
2311               (&adjacency_counters,
2312                thread_index, adj_index0, 1,
2313                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2314
2315           /* Check MTU of outgoing interface. */
2316           if (vlib_buffer_length_in_chain (vm, p0) >
2317               adj0[0].rewrite_header.max_l3_packet_bytes)
2318             {
2319               error0 = IP4_ERROR_MTU_EXCEEDED;
2320               next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2321               icmp4_error_set_vnet_buffer
2322                 (p0, ICMP4_destination_unreachable,
2323                  ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2324                  0);
2325             }
2326           if (is_mcast)
2327             {
2328               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2329                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2330                         IP4_ERROR_SAME_INTERFACE : error0);
2331             }
2332           p0->error = error_node->errors[error0];
2333
2334           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2335            * to see the IP headerr */
2336           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2337             {
2338               p0->current_data -= rw_len0;
2339               p0->current_length += rw_len0;
2340               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2341
2342               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2343               next0 = adj0[0].rewrite_header.next_index;
2344
2345               if (is_midchain)
2346                 {
2347                   adj0->sub_type.midchain.fixup_func
2348                     (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2349                 }
2350
2351               if (PREDICT_FALSE
2352                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2353                 vnet_feature_arc_start (lm->output_feature_arc_index,
2354                                         tx_sw_if_index0, &next0, p0);
2355
2356             }
2357
2358           from += 1;
2359           n_left_from -= 1;
2360           to_next += 1;
2361           n_left_to_next -= 1;
2362
2363           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2364                                            to_next, n_left_to_next,
2365                                            pi0, next0);
2366         }
2367
2368       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2369     }
2370
2371   /* Need to do trace after rewrites to pick up new packet data. */
2372   if (node->flags & VLIB_NODE_FLAG_TRACE)
2373     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2374
2375   return frame->n_vectors;
2376 }
2377
2378
2379 /** @brief IPv4 rewrite node.
2380     @node ip4-rewrite
2381
2382     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2383     header checksum, fetch the ip adjacency, check the outbound mtu,
2384     apply the adjacency rewrite, and send pkts to the adjacency
2385     rewrite header's rewrite_next_index.
2386
2387     @param vm vlib_main_t corresponding to the current thread
2388     @param node vlib_node_runtime_t
2389     @param frame vlib_frame_t whose contents should be dispatched
2390
2391     @par Graph mechanics: buffer metadata, next index usage
2392
2393     @em Uses:
2394     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2395         - the rewrite adjacency index
2396     - <code>adj->lookup_next_index</code>
2397         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2398           the packet will be dropped.
2399     - <code>adj->rewrite_header</code>
2400         - Rewrite string length, rewrite string, next_index
2401
2402     @em Sets:
2403     - <code>b->current_data, b->current_length</code>
2404         - Updated net of applying the rewrite string
2405
2406     <em>Next Indices:</em>
2407     - <code> adj->rewrite_header.next_index </code>
2408       or @c ip4-drop
2409 */
2410 static uword
2411 ip4_rewrite (vlib_main_t * vm,
2412              vlib_node_runtime_t * node, vlib_frame_t * frame)
2413 {
2414   if (adj_are_counters_enabled ())
2415     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2416   else
2417     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2418 }
2419
2420 static uword
2421 ip4_midchain (vlib_main_t * vm,
2422               vlib_node_runtime_t * node, vlib_frame_t * frame)
2423 {
2424   if (adj_are_counters_enabled ())
2425     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2426   else
2427     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2428 }
2429
2430 static uword
2431 ip4_rewrite_mcast (vlib_main_t * vm,
2432                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2433 {
2434   if (adj_are_counters_enabled ())
2435     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2436   else
2437     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2438 }
2439
2440 static uword
2441 ip4_mcast_midchain (vlib_main_t * vm,
2442                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2443 {
2444   if (adj_are_counters_enabled ())
2445     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2446   else
2447     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2448 }
2449
2450 /* *INDENT-OFF* */
2451 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2452   .function = ip4_rewrite,
2453   .name = "ip4-rewrite",
2454   .vector_size = sizeof (u32),
2455
2456   .format_trace = format_ip4_rewrite_trace,
2457
2458   .n_next_nodes = 2,
2459   .next_nodes = {
2460     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2461     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2462   },
2463 };
2464 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2465
2466 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2467   .function = ip4_rewrite_mcast,
2468   .name = "ip4-rewrite-mcast",
2469   .vector_size = sizeof (u32),
2470
2471   .format_trace = format_ip4_rewrite_trace,
2472   .sibling_of = "ip4-rewrite",
2473 };
2474 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2475
2476 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2477   .function = ip4_mcast_midchain,
2478   .name = "ip4-mcast-midchain",
2479   .vector_size = sizeof (u32),
2480
2481   .format_trace = format_ip4_rewrite_trace,
2482   .sibling_of = "ip4-rewrite",
2483 };
2484 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2485
2486 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2487   .function = ip4_midchain,
2488   .name = "ip4-midchain",
2489   .vector_size = sizeof (u32),
2490   .format_trace = format_ip4_forward_next_trace,
2491   .sibling_of =  "ip4-rewrite",
2492 };
2493 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2494 /* *INDENT-ON */
2495
2496 int
2497 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2498 {
2499   ip4_fib_mtrie_t *mtrie0;
2500   ip4_fib_mtrie_leaf_t leaf0;
2501   u32 lbi0;
2502
2503   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2504
2505   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2506   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2507   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2508
2509   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2510
2511   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2512 }
2513
2514 static clib_error_t *
2515 test_lookup_command_fn (vlib_main_t * vm,
2516                         unformat_input_t * input, vlib_cli_command_t * cmd)
2517 {
2518   ip4_fib_t *fib;
2519   u32 table_id = 0;
2520   f64 count = 1;
2521   u32 n;
2522   int i;
2523   ip4_address_t ip4_base_address;
2524   u64 errors = 0;
2525
2526   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2527     {
2528       if (unformat (input, "table %d", &table_id))
2529         {
2530           /* Make sure the entry exists. */
2531           fib = ip4_fib_get (table_id);
2532           if ((fib) && (fib->index != table_id))
2533             return clib_error_return (0, "<fib-index> %d does not exist",
2534                                       table_id);
2535         }
2536       else if (unformat (input, "count %f", &count))
2537         ;
2538
2539       else if (unformat (input, "%U",
2540                          unformat_ip4_address, &ip4_base_address))
2541         ;
2542       else
2543         return clib_error_return (0, "unknown input `%U'",
2544                                   format_unformat_error, input);
2545     }
2546
2547   n = count;
2548
2549   for (i = 0; i < n; i++)
2550     {
2551       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2552         errors++;
2553
2554       ip4_base_address.as_u32 =
2555         clib_host_to_net_u32 (1 +
2556                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2557     }
2558
2559   if (errors)
2560     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2561   else
2562     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2563
2564   return 0;
2565 }
2566
2567 /*?
2568  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2569  * given FIB table to determine if there is a conflict with the
2570  * adjacency table. The fib-id can be determined by using the
2571  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2572  * of 0 is used.
2573  *
2574  * @todo This command uses fib-id, other commands use table-id (not
2575  * just a name, they are different indexes). Would like to change this
2576  * to table-id for consistency.
2577  *
2578  * @cliexpar
2579  * Example of how to run the test lookup command:
2580  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2581  * No errors in 2 lookups
2582  * @cliexend
2583 ?*/
2584 /* *INDENT-OFF* */
2585 VLIB_CLI_COMMAND (lookup_test_command, static) =
2586 {
2587   .path = "test lookup",
2588   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2589   .function = test_lookup_command_fn,
2590 };
2591 /* *INDENT-ON* */
2592
2593 int
2594 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2595 {
2596   u32 fib_index;
2597
2598   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2599
2600   if (~0 == fib_index)
2601     return VNET_API_ERROR_NO_SUCH_FIB;
2602
2603   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2604                                   flow_hash_config);
2605
2606   return 0;
2607 }
2608
2609 static clib_error_t *
2610 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2611                              unformat_input_t * input,
2612                              vlib_cli_command_t * cmd)
2613 {
2614   int matched = 0;
2615   u32 table_id = 0;
2616   u32 flow_hash_config = 0;
2617   int rv;
2618
2619   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2620     {
2621       if (unformat (input, "table %d", &table_id))
2622         matched = 1;
2623 #define _(a,v) \
2624     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2625       foreach_flow_hash_bit
2626 #undef _
2627         else
2628         break;
2629     }
2630
2631   if (matched == 0)
2632     return clib_error_return (0, "unknown input `%U'",
2633                               format_unformat_error, input);
2634
2635   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2636   switch (rv)
2637     {
2638     case 0:
2639       break;
2640
2641     case VNET_API_ERROR_NO_SUCH_FIB:
2642       return clib_error_return (0, "no such FIB table %d", table_id);
2643
2644     default:
2645       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2646       break;
2647     }
2648
2649   return 0;
2650 }
2651
2652 /*?
2653  * Configure the set of IPv4 fields used by the flow hash.
2654  *
2655  * @cliexpar
2656  * Example of how to set the flow hash on a given table:
2657  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2658  * Example of display the configured flow hash:
2659  * @cliexstart{show ip fib}
2660  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2661  * 0.0.0.0/0
2662  *   unicast-ip4-chain
2663  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2664  *     [0] [@0]: dpo-drop ip6
2665  * 0.0.0.0/32
2666  *   unicast-ip4-chain
2667  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2668  *     [0] [@0]: dpo-drop ip6
2669  * 224.0.0.0/8
2670  *   unicast-ip4-chain
2671  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2672  *     [0] [@0]: dpo-drop ip6
2673  * 6.0.1.2/32
2674  *   unicast-ip4-chain
2675  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2676  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2677  * 7.0.0.1/32
2678  *   unicast-ip4-chain
2679  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2680  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2681  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2682  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2683  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2684  * 240.0.0.0/8
2685  *   unicast-ip4-chain
2686  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2687  *     [0] [@0]: dpo-drop ip6
2688  * 255.255.255.255/32
2689  *   unicast-ip4-chain
2690  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2691  *     [0] [@0]: dpo-drop ip6
2692  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2693  * 0.0.0.0/0
2694  *   unicast-ip4-chain
2695  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2696  *     [0] [@0]: dpo-drop ip6
2697  * 0.0.0.0/32
2698  *   unicast-ip4-chain
2699  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2700  *     [0] [@0]: dpo-drop ip6
2701  * 172.16.1.0/24
2702  *   unicast-ip4-chain
2703  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2704  *     [0] [@4]: ipv4-glean: af_packet0
2705  * 172.16.1.1/32
2706  *   unicast-ip4-chain
2707  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2708  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2709  * 172.16.1.2/32
2710  *   unicast-ip4-chain
2711  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2712  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2713  * 172.16.2.0/24
2714  *   unicast-ip4-chain
2715  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2716  *     [0] [@4]: ipv4-glean: af_packet1
2717  * 172.16.2.1/32
2718  *   unicast-ip4-chain
2719  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2720  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2721  * 224.0.0.0/8
2722  *   unicast-ip4-chain
2723  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2724  *     [0] [@0]: dpo-drop ip6
2725  * 240.0.0.0/8
2726  *   unicast-ip4-chain
2727  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2728  *     [0] [@0]: dpo-drop ip6
2729  * 255.255.255.255/32
2730  *   unicast-ip4-chain
2731  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2732  *     [0] [@0]: dpo-drop ip6
2733  * @cliexend
2734 ?*/
2735 /* *INDENT-OFF* */
2736 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2737 {
2738   .path = "set ip flow-hash",
2739   .short_help =
2740   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2741   .function = set_ip_flow_hash_command_fn,
2742 };
2743 /* *INDENT-ON* */
2744
2745 int
2746 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2747                              u32 table_index)
2748 {
2749   vnet_main_t *vnm = vnet_get_main ();
2750   vnet_interface_main_t *im = &vnm->interface_main;
2751   ip4_main_t *ipm = &ip4_main;
2752   ip_lookup_main_t *lm = &ipm->lookup_main;
2753   vnet_classify_main_t *cm = &vnet_classify_main;
2754   ip4_address_t *if_addr;
2755
2756   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2757     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2758
2759   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2760     return VNET_API_ERROR_NO_SUCH_ENTRY;
2761
2762   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2763   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2764
2765   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2766
2767   if (NULL != if_addr)
2768     {
2769       fib_prefix_t pfx = {
2770         .fp_len = 32,
2771         .fp_proto = FIB_PROTOCOL_IP4,
2772         .fp_addr.ip4 = *if_addr,
2773       };
2774       u32 fib_index;
2775
2776       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2777                                                        sw_if_index);
2778
2779
2780       if (table_index != (u32) ~ 0)
2781         {
2782           dpo_id_t dpo = DPO_INVALID;
2783
2784           dpo_set (&dpo,
2785                    DPO_CLASSIFY,
2786                    DPO_PROTO_IP4,
2787                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2788
2789           fib_table_entry_special_dpo_add (fib_index,
2790                                            &pfx,
2791                                            FIB_SOURCE_CLASSIFY,
2792                                            FIB_ENTRY_FLAG_NONE, &dpo);
2793           dpo_reset (&dpo);
2794         }
2795       else
2796         {
2797           fib_table_entry_special_remove (fib_index,
2798                                           &pfx, FIB_SOURCE_CLASSIFY);
2799         }
2800     }
2801
2802   return 0;
2803 }
2804
2805 static clib_error_t *
2806 set_ip_classify_command_fn (vlib_main_t * vm,
2807                             unformat_input_t * input,
2808                             vlib_cli_command_t * cmd)
2809 {
2810   u32 table_index = ~0;
2811   int table_index_set = 0;
2812   u32 sw_if_index = ~0;
2813   int rv;
2814
2815   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2816     {
2817       if (unformat (input, "table-index %d", &table_index))
2818         table_index_set = 1;
2819       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2820                          vnet_get_main (), &sw_if_index))
2821         ;
2822       else
2823         break;
2824     }
2825
2826   if (table_index_set == 0)
2827     return clib_error_return (0, "classify table-index must be specified");
2828
2829   if (sw_if_index == ~0)
2830     return clib_error_return (0, "interface / subif must be specified");
2831
2832   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2833
2834   switch (rv)
2835     {
2836     case 0:
2837       break;
2838
2839     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2840       return clib_error_return (0, "No such interface");
2841
2842     case VNET_API_ERROR_NO_SUCH_ENTRY:
2843       return clib_error_return (0, "No such classifier table");
2844     }
2845   return 0;
2846 }
2847
2848 /*?
2849  * Assign a classification table to an interface. The classification
2850  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2851  * commands. Once the table is create, use this command to filter packets
2852  * on an interface.
2853  *
2854  * @cliexpar
2855  * Example of how to assign a classification table to an interface:
2856  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2857 ?*/
2858 /* *INDENT-OFF* */
2859 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2860 {
2861     .path = "set ip classify",
2862     .short_help =
2863     "set ip classify intfc <interface> table-index <classify-idx>",
2864     .function = set_ip_classify_command_fn,
2865 };
2866 /* *INDENT-ON* */
2867
2868 static clib_error_t *
2869 ip4_config (vlib_main_t * vm, unformat_input_t * input)
2870 {
2871   ip4_main_t *im = &ip4_main;
2872   uword heapsize = 0;
2873
2874   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2875     {
2876       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
2877         ;
2878       else
2879         return clib_error_return (0,
2880                                   "invalid heap-size parameter `%U'",
2881                                   format_unformat_error, input);
2882     }
2883
2884   im->mtrie_heap_size = heapsize;
2885
2886   return 0;
2887 }
2888
2889 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
2890
2891 /*
2892  * fd.io coding-style-patch-verification: ON
2893  *
2894  * Local Variables:
2895  * eval: (c-set-style "gnu")
2896  * End:
2897  */