VPP-1283: IPv4 PMTU missing MTU value in ICMP4 message.
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/load_balance_map.h>
53 #include <vnet/dpo/classify_dpo.h>
54 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
55
56 #include <vnet/ip/ip4_forward.h>
57
58 /** @brief IPv4 lookup node.
59     @node ip4-lookup
60
61     This is the main IPv4 lookup dispatch node.
62
63     @param vm vlib_main_t corresponding to the current thread
64     @param node vlib_node_runtime_t
65     @param frame vlib_frame_t whose contents should be dispatched
66
67     @par Graph mechanics: buffer metadata, next index usage
68
69     @em Uses:
70     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
71         - Indicates the @c sw_if_index value of the interface that the
72           packet was received on.
73     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
74         - When the value is @c ~0 then the node performs a longest prefix
75           match (LPM) for the packet destination address in the FIB attached
76           to the receive interface.
77         - Otherwise perform LPM for the packet destination address in the
78           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
79           value (0, 1, ...) and not a VRF id.
80
81     @em Sets:
82     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
83         - The lookup result adjacency index.
84
85     <em>Next Index:</em>
86     - Dispatches the packet to the node index found in
87       ip_adjacency_t @c adj->lookup_next_index
88       (where @c adj is the lookup result adjacency).
89 */
90 static uword
91 ip4_lookup (vlib_main_t * vm,
92             vlib_node_runtime_t * node, vlib_frame_t * frame)
93 {
94   return ip4_lookup_inline (vm, node, frame,
95                             /* lookup_for_responses_to_locally_received_packets */
96                             0);
97
98 }
99
100 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101
102 /* *INDENT-OFF* */
103 VLIB_REGISTER_NODE (ip4_lookup_node) =
104 {
105   .function = ip4_lookup,
106   .name = "ip4-lookup",
107   .vector_size = sizeof (u32),
108   .format_trace = format_ip4_lookup_trace,
109   .n_next_nodes = IP_LOOKUP_N_NEXT,
110   .next_nodes = IP4_LOOKUP_NEXT_NODES,
111 };
112 /* *INDENT-ON* */
113
114 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
115
116 always_inline uword
117 ip4_load_balance (vlib_main_t * vm,
118                   vlib_node_runtime_t * node, vlib_frame_t * frame)
119 {
120   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
121   u32 n_left_from, n_left_to_next, *from, *to_next;
122   ip_lookup_next_t next;
123   u32 thread_index = vlib_get_thread_index ();
124
125   from = vlib_frame_vector_args (frame);
126   n_left_from = frame->n_vectors;
127   next = node->cached_next_index;
128
129   if (node->flags & VLIB_NODE_FLAG_TRACE)
130     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
131
132   while (n_left_from > 0)
133     {
134       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
135
136
137       while (n_left_from >= 4 && n_left_to_next >= 2)
138         {
139           ip_lookup_next_t next0, next1;
140           const load_balance_t *lb0, *lb1;
141           vlib_buffer_t *p0, *p1;
142           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
143           const ip4_header_t *ip0, *ip1;
144           const dpo_id_t *dpo0, *dpo1;
145
146           /* Prefetch next iteration. */
147           {
148             vlib_buffer_t *p2, *p3;
149
150             p2 = vlib_get_buffer (vm, from[2]);
151             p3 = vlib_get_buffer (vm, from[3]);
152
153             vlib_prefetch_buffer_header (p2, STORE);
154             vlib_prefetch_buffer_header (p3, STORE);
155
156             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
157             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
158           }
159
160           pi0 = to_next[0] = from[0];
161           pi1 = to_next[1] = from[1];
162
163           from += 2;
164           n_left_from -= 2;
165           to_next += 2;
166           n_left_to_next -= 2;
167
168           p0 = vlib_get_buffer (vm, pi0);
169           p1 = vlib_get_buffer (vm, pi1);
170
171           ip0 = vlib_buffer_get_current (p0);
172           ip1 = vlib_buffer_get_current (p1);
173           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
174           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
175
176           lb0 = load_balance_get (lbi0);
177           lb1 = load_balance_get (lbi1);
178
179           /*
180            * this node is for via FIBs we can re-use the hash value from the
181            * to node if present.
182            * We don't want to use the same hash value at each level in the recursion
183            * graph as that would lead to polarisation
184            */
185           hc0 = hc1 = 0;
186
187           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
188             {
189               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
190                 {
191                   hc0 = vnet_buffer (p0)->ip.flow_hash =
192                     vnet_buffer (p0)->ip.flow_hash >> 1;
193                 }
194               else
195                 {
196                   hc0 = vnet_buffer (p0)->ip.flow_hash =
197                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
198                 }
199               dpo0 = load_balance_get_fwd_bucket
200                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
201             }
202           else
203             {
204               dpo0 = load_balance_get_bucket_i (lb0, 0);
205             }
206           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
207             {
208               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
209                 {
210                   hc1 = vnet_buffer (p1)->ip.flow_hash =
211                     vnet_buffer (p1)->ip.flow_hash >> 1;
212                 }
213               else
214                 {
215                   hc1 = vnet_buffer (p1)->ip.flow_hash =
216                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
217                 }
218               dpo1 = load_balance_get_fwd_bucket
219                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
220             }
221           else
222             {
223               dpo1 = load_balance_get_bucket_i (lb1, 0);
224             }
225
226           next0 = dpo0->dpoi_next_node;
227           next1 = dpo1->dpoi_next_node;
228
229           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
230           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
231
232           vlib_increment_combined_counter
233             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
234           vlib_increment_combined_counter
235             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
236
237           vlib_validate_buffer_enqueue_x2 (vm, node, next,
238                                            to_next, n_left_to_next,
239                                            pi0, pi1, next0, next1);
240         }
241
242       while (n_left_from > 0 && n_left_to_next > 0)
243         {
244           ip_lookup_next_t next0;
245           const load_balance_t *lb0;
246           vlib_buffer_t *p0;
247           u32 pi0, lbi0, hc0;
248           const ip4_header_t *ip0;
249           const dpo_id_t *dpo0;
250
251           pi0 = from[0];
252           to_next[0] = pi0;
253           from += 1;
254           to_next += 1;
255           n_left_to_next -= 1;
256           n_left_from -= 1;
257
258           p0 = vlib_get_buffer (vm, pi0);
259
260           ip0 = vlib_buffer_get_current (p0);
261           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
262
263           lb0 = load_balance_get (lbi0);
264
265           hc0 = 0;
266           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
267             {
268               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
269                 {
270                   hc0 = vnet_buffer (p0)->ip.flow_hash =
271                     vnet_buffer (p0)->ip.flow_hash >> 1;
272                 }
273               else
274                 {
275                   hc0 = vnet_buffer (p0)->ip.flow_hash =
276                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
277                 }
278               dpo0 = load_balance_get_fwd_bucket
279                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
280             }
281           else
282             {
283               dpo0 = load_balance_get_bucket_i (lb0, 0);
284             }
285
286           next0 = dpo0->dpoi_next_node;
287           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
288
289           vlib_increment_combined_counter
290             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
291
292           vlib_validate_buffer_enqueue_x1 (vm, node, next,
293                                            to_next, n_left_to_next,
294                                            pi0, next0);
295         }
296
297       vlib_put_next_frame (vm, node, next, n_left_to_next);
298     }
299
300   return frame->n_vectors;
301 }
302
303 /* *INDENT-OFF* */
304 VLIB_REGISTER_NODE (ip4_load_balance_node) =
305 {
306   .function = ip4_load_balance,
307   .name = "ip4-load-balance",
308   .vector_size = sizeof (u32),
309   .sibling_of = "ip4-lookup",
310   .format_trace =
311   format_ip4_lookup_trace,
312 };
313 /* *INDENT-ON* */
314
315 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
316
317 /* get first interface address */
318 ip4_address_t *
319 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
320                              ip_interface_address_t ** result_ia)
321 {
322   ip_lookup_main_t *lm = &im->lookup_main;
323   ip_interface_address_t *ia = 0;
324   ip4_address_t *result = 0;
325
326   /* *INDENT-OFF* */
327   foreach_ip_interface_address
328     (lm, ia, sw_if_index,
329      1 /* honor unnumbered */ ,
330      ({
331        ip4_address_t * a =
332          ip_interface_address_get_address (lm, ia);
333        result = a;
334        break;
335      }));
336   /* *INDENT-OFF* */
337   if (result_ia)
338     *result_ia = result ? ia : 0;
339   return result;
340 }
341
342 static void
343 ip4_add_interface_routes (u32 sw_if_index,
344                           ip4_main_t * im, u32 fib_index,
345                           ip_interface_address_t * a)
346 {
347   ip_lookup_main_t *lm = &im->lookup_main;
348   ip4_address_t *address = ip_interface_address_get_address (lm, a);
349   fib_prefix_t pfx = {
350     .fp_len = a->address_length,
351     .fp_proto = FIB_PROTOCOL_IP4,
352     .fp_addr.ip4 = *address,
353   };
354
355   if (pfx.fp_len <= 30)
356     {
357       /* a /30 or shorter - add a glean for the network address */
358       fib_table_entry_update_one_path (fib_index, &pfx,
359                                        FIB_SOURCE_INTERFACE,
360                                        (FIB_ENTRY_FLAG_CONNECTED |
361                                         FIB_ENTRY_FLAG_ATTACHED),
362                                        DPO_PROTO_IP4,
363                                        /* No next-hop address */
364                                        NULL,
365                                        sw_if_index,
366                                        // invalid FIB index
367                                        ~0,
368                                        1,
369                                        // no out-label stack
370                                        NULL,
371                                        FIB_ROUTE_PATH_FLAG_NONE);
372
373       /* Add the two broadcast addresses as drop */
374       fib_prefix_t net_pfx = {
375         .fp_len = 32,
376         .fp_proto = FIB_PROTOCOL_IP4,
377         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
378       };
379       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
380         fib_table_entry_special_add(fib_index,
381                                     &net_pfx,
382                                     FIB_SOURCE_INTERFACE,
383                                     (FIB_ENTRY_FLAG_DROP |
384                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
385       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
386       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
387         fib_table_entry_special_add(fib_index,
388                                     &net_pfx,
389                                     FIB_SOURCE_INTERFACE,
390                                     (FIB_ENTRY_FLAG_DROP |
391                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
392     }
393   else if (pfx.fp_len == 31)
394     {
395       u32 mask = clib_host_to_net_u32(1);
396       fib_prefix_t net_pfx = pfx;
397
398       net_pfx.fp_len = 32;
399       net_pfx.fp_addr.ip4.as_u32 ^= mask;
400
401       /* a /31 - add the other end as an attached host */
402       fib_table_entry_update_one_path (fib_index, &net_pfx,
403                                        FIB_SOURCE_INTERFACE,
404                                        (FIB_ENTRY_FLAG_ATTACHED),
405                                        DPO_PROTO_IP4,
406                                        &net_pfx.fp_addr,
407                                        sw_if_index,
408                                        // invalid FIB index
409                                        ~0,
410                                        1,
411                                        NULL,
412                                        FIB_ROUTE_PATH_FLAG_NONE);
413     }
414   pfx.fp_len = 32;
415
416   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
417     {
418       u32 classify_table_index =
419         lm->classify_table_index_by_sw_if_index[sw_if_index];
420       if (classify_table_index != (u32) ~ 0)
421         {
422           dpo_id_t dpo = DPO_INVALID;
423
424           dpo_set (&dpo,
425                    DPO_CLASSIFY,
426                    DPO_PROTO_IP4,
427                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
428
429           fib_table_entry_special_dpo_add (fib_index,
430                                            &pfx,
431                                            FIB_SOURCE_CLASSIFY,
432                                            FIB_ENTRY_FLAG_NONE, &dpo);
433           dpo_reset (&dpo);
434         }
435     }
436
437   fib_table_entry_update_one_path (fib_index, &pfx,
438                                    FIB_SOURCE_INTERFACE,
439                                    (FIB_ENTRY_FLAG_CONNECTED |
440                                     FIB_ENTRY_FLAG_LOCAL),
441                                    DPO_PROTO_IP4,
442                                    &pfx.fp_addr,
443                                    sw_if_index,
444                                    // invalid FIB index
445                                    ~0,
446                                    1, NULL,
447                                    FIB_ROUTE_PATH_FLAG_NONE);
448 }
449
450 static void
451 ip4_del_interface_routes (ip4_main_t * im,
452                           u32 fib_index,
453                           ip4_address_t * address, u32 address_length)
454 {
455   fib_prefix_t pfx = {
456     .fp_len = address_length,
457     .fp_proto = FIB_PROTOCOL_IP4,
458     .fp_addr.ip4 = *address,
459   };
460
461   if (pfx.fp_len <= 30)
462     {
463       fib_prefix_t net_pfx = {
464         .fp_len = 32,
465         .fp_proto = FIB_PROTOCOL_IP4,
466         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
467       };
468       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
469         fib_table_entry_special_remove(fib_index,
470                                        &net_pfx,
471                                        FIB_SOURCE_INTERFACE);
472       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
473       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
474         fib_table_entry_special_remove(fib_index,
475                                        &net_pfx,
476                                        FIB_SOURCE_INTERFACE);
477       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
478     }
479     else if (pfx.fp_len == 31)
480     {
481       u32 mask = clib_host_to_net_u32(1);
482       fib_prefix_t net_pfx = pfx;
483
484       net_pfx.fp_len = 32;
485       net_pfx.fp_addr.ip4.as_u32 ^= mask;
486
487       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
488     }
489
490   pfx.fp_len = 32;
491   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
492 }
493
494 void
495 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
496 {
497   ip4_main_t *im = &ip4_main;
498
499   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
500
501   /*
502    * enable/disable only on the 1<->0 transition
503    */
504   if (is_enable)
505     {
506       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
507         return;
508     }
509   else
510     {
511       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
512       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
513         return;
514     }
515   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
516                                !is_enable, 0, 0);
517
518
519   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
520                                sw_if_index, !is_enable, 0, 0);
521 }
522
523 static clib_error_t *
524 ip4_add_del_interface_address_internal (vlib_main_t * vm,
525                                         u32 sw_if_index,
526                                         ip4_address_t * address,
527                                         u32 address_length, u32 is_del)
528 {
529   vnet_main_t *vnm = vnet_get_main ();
530   ip4_main_t *im = &ip4_main;
531   ip_lookup_main_t *lm = &im->lookup_main;
532   clib_error_t *error = 0;
533   u32 if_address_index, elts_before;
534   ip4_address_fib_t ip4_af, *addr_fib = 0;
535
536   /* local0 interface doesn't support IP addressing  */
537   if (sw_if_index == 0)
538     {
539       return
540        clib_error_create ("local0 interface doesn't support IP addressing");
541     }
542
543   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
544   ip4_addr_fib_init (&ip4_af, address,
545                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
546   vec_add1 (addr_fib, ip4_af);
547
548   /*
549    * there is no support for adj-fib handling in the presence of overlapping
550    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
551    * most routers do.
552    */
553   /* *INDENT-OFF* */
554   if (!is_del)
555     {
556       /* When adding an address check that it does not conflict
557          with an existing address on any interface in this table. */
558       ip_interface_address_t *ia;
559       vnet_sw_interface_t *sif;
560
561       pool_foreach(sif, vnm->interface_main.sw_interfaces,
562       ({
563           if (im->fib_index_by_sw_if_index[sw_if_index] ==
564               im->fib_index_by_sw_if_index[sif->sw_if_index])
565             {
566               foreach_ip_interface_address
567                 (&im->lookup_main, ia, sif->sw_if_index,
568                  0 /* honor unnumbered */ ,
569                  ({
570                    ip4_address_t * x =
571                      ip_interface_address_get_address
572                      (&im->lookup_main, ia);
573                    if (ip4_destination_matches_route
574                        (im, address, x, ia->address_length) ||
575                        ip4_destination_matches_route (im,
576                                                       x,
577                                                       address,
578                                                       address_length))
579                      {
580                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
581
582                        return
583                          clib_error_create
584                          ("failed to add %U which conflicts with %U for interface %U",
585                           format_ip4_address_and_length, address,
586                           address_length,
587                           format_ip4_address_and_length, x,
588                           ia->address_length,
589                           format_vnet_sw_if_index_name, vnm,
590                           sif->sw_if_index);
591                      }
592                  }));
593             }
594       }));
595     }
596   /* *INDENT-ON* */
597
598   elts_before = pool_elts (lm->if_address_pool);
599
600   error = ip_interface_address_add_del
601     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
602   if (error)
603     goto done;
604
605   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
606
607   if (is_del)
608     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
609   else
610     ip4_add_interface_routes (sw_if_index,
611                               im, ip4_af.fib_index,
612                               pool_elt_at_index
613                               (lm->if_address_pool, if_address_index));
614
615   /* If pool did not grow/shrink: add duplicate address. */
616   if (elts_before != pool_elts (lm->if_address_pool))
617     {
618       ip4_add_del_interface_address_callback_t *cb;
619       vec_foreach (cb, im->add_del_interface_address_callbacks)
620         cb->function (im, cb->function_opaque, sw_if_index,
621                       address, address_length, if_address_index, is_del);
622     }
623
624 done:
625   vec_free (addr_fib);
626   return error;
627 }
628
629 clib_error_t *
630 ip4_add_del_interface_address (vlib_main_t * vm,
631                                u32 sw_if_index,
632                                ip4_address_t * address,
633                                u32 address_length, u32 is_del)
634 {
635   return ip4_add_del_interface_address_internal
636     (vm, sw_if_index, address, address_length, is_del);
637 }
638
639 /* Built-in ip4 unicast rx feature path definition */
640 /* *INDENT-OFF* */
641 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
642 {
643   .arc_name = "ip4-unicast",
644   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
645   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
646 };
647
648 VNET_FEATURE_INIT (ip4_flow_classify, static) =
649 {
650   .arc_name = "ip4-unicast",
651   .node_name = "ip4-flow-classify",
652   .runs_before = VNET_FEATURES ("ip4-inacl"),
653 };
654
655 VNET_FEATURE_INIT (ip4_inacl, static) =
656 {
657   .arc_name = "ip4-unicast",
658   .node_name = "ip4-inacl",
659   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
660 };
661
662 VNET_FEATURE_INIT (ip4_source_check_1, static) =
663 {
664   .arc_name = "ip4-unicast",
665   .node_name = "ip4-source-check-via-rx",
666   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
667 };
668
669 VNET_FEATURE_INIT (ip4_source_check_2, static) =
670 {
671   .arc_name = "ip4-unicast",
672   .node_name = "ip4-source-check-via-any",
673   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
674 };
675
676 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
677 {
678   .arc_name = "ip4-unicast",
679   .node_name = "ip4-source-and-port-range-check-rx",
680   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
681 };
682
683 VNET_FEATURE_INIT (ip4_policer_classify, static) =
684 {
685   .arc_name = "ip4-unicast",
686   .node_name = "ip4-policer-classify",
687   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
688 };
689
690 VNET_FEATURE_INIT (ip4_ipsec, static) =
691 {
692   .arc_name = "ip4-unicast",
693   .node_name = "ipsec-input-ip4",
694   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
695 };
696
697 VNET_FEATURE_INIT (ip4_vpath, static) =
698 {
699   .arc_name = "ip4-unicast",
700   .node_name = "vpath-input-ip4",
701   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
702 };
703
704 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
705 {
706   .arc_name = "ip4-unicast",
707   .node_name = "ip4-vxlan-bypass",
708   .runs_before = VNET_FEATURES ("ip4-lookup"),
709 };
710
711 VNET_FEATURE_INIT (ip4_not_enabled, static) =
712 {
713   .arc_name = "ip4-unicast",
714   .node_name = "ip4-not-enabled",
715   .runs_before = VNET_FEATURES ("ip4-lookup"),
716 };
717
718 VNET_FEATURE_INIT (ip4_lookup, static) =
719 {
720   .arc_name = "ip4-unicast",
721   .node_name = "ip4-lookup",
722   .runs_before = 0,     /* not before any other features */
723 };
724
725 /* Built-in ip4 multicast rx feature path definition */
726 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
727 {
728   .arc_name = "ip4-multicast",
729   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
730   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
731 };
732
733 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
734 {
735   .arc_name = "ip4-multicast",
736   .node_name = "vpath-input-ip4",
737   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
738 };
739
740 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
741 {
742   .arc_name = "ip4-multicast",
743   .node_name = "ip4-not-enabled",
744   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
745 };
746
747 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
748 {
749   .arc_name = "ip4-multicast",
750   .node_name = "ip4-mfib-forward-lookup",
751   .runs_before = 0,     /* last feature */
752 };
753
754 /* Source and port-range check ip4 tx feature path definition */
755 VNET_FEATURE_ARC_INIT (ip4_output, static) =
756 {
757   .arc_name = "ip4-output",
758   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
759   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
760 };
761
762 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
763 {
764   .arc_name = "ip4-output",
765   .node_name = "ip4-source-and-port-range-check-tx",
766   .runs_before = VNET_FEATURES ("ip4-outacl"),
767 };
768
769 VNET_FEATURE_INIT (ip4_outacl, static) =
770 {
771   .arc_name = "ip4-output",
772   .node_name = "ip4-outacl",
773   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
774 };
775
776 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
777 {
778   .arc_name = "ip4-output",
779   .node_name = "ipsec-output-ip4",
780   .runs_before = VNET_FEATURES ("interface-output"),
781 };
782
783 /* Built-in ip4 tx feature path definition */
784 VNET_FEATURE_INIT (ip4_interface_output, static) =
785 {
786   .arc_name = "ip4-output",
787   .node_name = "interface-output",
788   .runs_before = 0,     /* not before any other features */
789 };
790 /* *INDENT-ON* */
791
792 static clib_error_t *
793 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
794 {
795   ip4_main_t *im = &ip4_main;
796
797   /* Fill in lookup tables with default table (0). */
798   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
799   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
800
801   if (!is_add)
802     {
803       ip4_main_t *im4 = &ip4_main;
804       ip_lookup_main_t *lm4 = &im4->lookup_main;
805       ip_interface_address_t *ia = 0;
806       ip4_address_t *address;
807       vlib_main_t *vm = vlib_get_main ();
808
809       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
810       /* *INDENT-OFF* */
811       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
812       ({
813         address = ip_interface_address_get_address (lm4, ia);
814         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
815       }));
816       /* *INDENT-ON* */
817     }
818
819   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
820                                is_add, 0, 0);
821
822   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
823                                sw_if_index, is_add, 0, 0);
824
825   return /* no error */ 0;
826 }
827
828 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
829
830 /* Global IP4 main. */
831 ip4_main_t ip4_main;
832
833 clib_error_t *
834 ip4_lookup_init (vlib_main_t * vm)
835 {
836   ip4_main_t *im = &ip4_main;
837   clib_error_t *error;
838   uword i;
839
840   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
841     return error;
842   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
843     return (error);
844   if ((error = vlib_call_init_function (vm, fib_module_init)))
845     return error;
846   if ((error = vlib_call_init_function (vm, mfib_module_init)))
847     return error;
848
849   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
850     {
851       u32 m;
852
853       if (i < 32)
854         m = pow2_mask (i) << (32 - i);
855       else
856         m = ~0;
857       im->fib_masks[i] = clib_host_to_net_u32 (m);
858     }
859
860   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
861
862   /* Create FIB with index 0 and table id of 0. */
863   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
864                                      FIB_SOURCE_DEFAULT_ROUTE);
865   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
866                                       MFIB_SOURCE_DEFAULT_ROUTE);
867
868   {
869     pg_node_t *pn;
870     pn = pg_get_node (ip4_lookup_node.index);
871     pn->unformat_edit = unformat_pg_ip4_header;
872   }
873
874   {
875     ethernet_arp_header_t h;
876
877     memset (&h, 0, sizeof (h));
878
879     /* Set target ethernet address to all zeros. */
880     memset (h.ip4_over_ethernet[1].ethernet, 0,
881             sizeof (h.ip4_over_ethernet[1].ethernet));
882
883 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
884 #define _8(f,v) h.f = v;
885     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
886     _16 (l3_type, ETHERNET_TYPE_IP4);
887     _8 (n_l2_address_bytes, 6);
888     _8 (n_l3_address_bytes, 4);
889     _16 (opcode, ETHERNET_ARP_OPCODE_request);
890 #undef _16
891 #undef _8
892
893     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
894                                /* data */ &h,
895                                sizeof (h),
896                                /* alloc chunk size */ 8,
897                                "ip4 arp");
898   }
899
900   return error;
901 }
902
903 VLIB_INIT_FUNCTION (ip4_lookup_init);
904
905 typedef struct
906 {
907   /* Adjacency taken. */
908   u32 dpo_index;
909   u32 flow_hash;
910   u32 fib_index;
911
912   /* Packet data, possibly *after* rewrite. */
913   u8 packet_data[64 - 1 * sizeof (u32)];
914 }
915 ip4_forward_next_trace_t;
916
917 u8 *
918 format_ip4_forward_next_trace (u8 * s, va_list * args)
919 {
920   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
921   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
922   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
923   u32 indent = format_get_indent (s);
924   s = format (s, "%U%U",
925               format_white_space, indent,
926               format_ip4_header, t->packet_data, sizeof (t->packet_data));
927   return s;
928 }
929
930 static u8 *
931 format_ip4_lookup_trace (u8 * s, va_list * args)
932 {
933   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
934   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
935   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
936   u32 indent = format_get_indent (s);
937
938   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
939               t->fib_index, t->dpo_index, t->flow_hash);
940   s = format (s, "\n%U%U",
941               format_white_space, indent,
942               format_ip4_header, t->packet_data, sizeof (t->packet_data));
943   return s;
944 }
945
946 static u8 *
947 format_ip4_rewrite_trace (u8 * s, va_list * args)
948 {
949   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
950   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
951   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
952   u32 indent = format_get_indent (s);
953
954   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
955               t->fib_index, t->dpo_index, format_ip_adjacency,
956               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
957   s = format (s, "\n%U%U",
958               format_white_space, indent,
959               format_ip_adjacency_packet_data,
960               t->dpo_index, t->packet_data, sizeof (t->packet_data));
961   return s;
962 }
963
964 /* Common trace function for all ip4-forward next nodes. */
965 void
966 ip4_forward_next_trace (vlib_main_t * vm,
967                         vlib_node_runtime_t * node,
968                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
969 {
970   u32 *from, n_left;
971   ip4_main_t *im = &ip4_main;
972
973   n_left = frame->n_vectors;
974   from = vlib_frame_vector_args (frame);
975
976   while (n_left >= 4)
977     {
978       u32 bi0, bi1;
979       vlib_buffer_t *b0, *b1;
980       ip4_forward_next_trace_t *t0, *t1;
981
982       /* Prefetch next iteration. */
983       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
984       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
985
986       bi0 = from[0];
987       bi1 = from[1];
988
989       b0 = vlib_get_buffer (vm, bi0);
990       b1 = vlib_get_buffer (vm, bi1);
991
992       if (b0->flags & VLIB_BUFFER_IS_TRACED)
993         {
994           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
995           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
996           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
997           t0->fib_index =
998             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
999              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1000             vec_elt (im->fib_index_by_sw_if_index,
1001                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1002
1003           clib_memcpy (t0->packet_data,
1004                        vlib_buffer_get_current (b0),
1005                        sizeof (t0->packet_data));
1006         }
1007       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1008         {
1009           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1010           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1011           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1012           t1->fib_index =
1013             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1014              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1015             vec_elt (im->fib_index_by_sw_if_index,
1016                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1017           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1018                        sizeof (t1->packet_data));
1019         }
1020       from += 2;
1021       n_left -= 2;
1022     }
1023
1024   while (n_left >= 1)
1025     {
1026       u32 bi0;
1027       vlib_buffer_t *b0;
1028       ip4_forward_next_trace_t *t0;
1029
1030       bi0 = from[0];
1031
1032       b0 = vlib_get_buffer (vm, bi0);
1033
1034       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1035         {
1036           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1037           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1038           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1039           t0->fib_index =
1040             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1041              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1042             vec_elt (im->fib_index_by_sw_if_index,
1043                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1044           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1045                        sizeof (t0->packet_data));
1046         }
1047       from += 1;
1048       n_left -= 1;
1049     }
1050 }
1051
1052 /* Compute TCP/UDP/ICMP4 checksum in software. */
1053 u16
1054 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1055                               ip4_header_t * ip0)
1056 {
1057   ip_csum_t sum0;
1058   u32 ip_header_length, payload_length_host_byte_order;
1059   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1060   u16 sum16;
1061   void *data_this_buffer;
1062
1063   /* Initialize checksum with ip header. */
1064   ip_header_length = ip4_header_bytes (ip0);
1065   payload_length_host_byte_order =
1066     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1067   sum0 =
1068     clib_host_to_net_u32 (payload_length_host_byte_order +
1069                           (ip0->protocol << 16));
1070
1071   if (BITS (uword) == 32)
1072     {
1073       sum0 =
1074         ip_csum_with_carry (sum0,
1075                             clib_mem_unaligned (&ip0->src_address, u32));
1076       sum0 =
1077         ip_csum_with_carry (sum0,
1078                             clib_mem_unaligned (&ip0->dst_address, u32));
1079     }
1080   else
1081     sum0 =
1082       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1083
1084   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1085   data_this_buffer = (void *) ip0 + ip_header_length;
1086   n_ip_bytes_this_buffer =
1087     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1088   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1089     {
1090       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1091         n_ip_bytes_this_buffer - ip_header_length : 0;
1092     }
1093   while (1)
1094     {
1095       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1096       n_bytes_left -= n_this_buffer;
1097       if (n_bytes_left == 0)
1098         break;
1099
1100       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1101       p0 = vlib_get_buffer (vm, p0->next_buffer);
1102       data_this_buffer = vlib_buffer_get_current (p0);
1103       n_this_buffer = p0->current_length;
1104     }
1105
1106   sum16 = ~ip_csum_fold (sum0);
1107
1108   return sum16;
1109 }
1110
1111 u32
1112 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1113 {
1114   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1115   udp_header_t *udp0;
1116   u16 sum16;
1117
1118   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1119           || ip0->protocol == IP_PROTOCOL_UDP);
1120
1121   udp0 = (void *) (ip0 + 1);
1122   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1123     {
1124       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1125                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1126       return p0->flags;
1127     }
1128
1129   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1130
1131   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1132                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1133
1134   return p0->flags;
1135 }
1136
1137 /* *INDENT-OFF* */
1138 VNET_FEATURE_ARC_INIT (ip4_local) =
1139 {
1140   .arc_name  = "ip4-local",
1141   .start_nodes = VNET_FEATURES ("ip4-local"),
1142 };
1143 /* *INDENT-ON* */
1144
1145 static inline void
1146 ip4_local_validate_l4 (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip,
1147                        u8 is_udp, u8 * error, u8 * good_tcp_udp)
1148 {
1149   u32 flags0;
1150   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1151   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1152   if (is_udp)
1153     {
1154       udp_header_t *udp;
1155       u32 ip_len, udp_len;
1156       i32 len_diff;
1157       udp = ip4_next_header (ip);
1158       /* Verify UDP length. */
1159       ip_len = clib_net_to_host_u16 (ip->length);
1160       udp_len = clib_net_to_host_u16 (udp->length);
1161
1162       len_diff = ip_len - udp_len;
1163       *good_tcp_udp &= len_diff >= 0;
1164       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1165     }
1166 }
1167
1168 #define ip4_local_do_l4_check(is_tcp_udp, flags)                        \
1169     (is_tcp_udp && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1170     || flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1171     || flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM))
1172
1173 static inline uword
1174 ip4_local_inline (vlib_main_t * vm,
1175                   vlib_node_runtime_t * node,
1176                   vlib_frame_t * frame, int head_of_feature_arc)
1177 {
1178   ip4_main_t *im = &ip4_main;
1179   ip_lookup_main_t *lm = &im->lookup_main;
1180   ip_local_next_t next_index;
1181   u32 *from, *to_next, n_left_from, n_left_to_next;
1182   vlib_node_runtime_t *error_node =
1183     vlib_node_get_runtime (vm, ip4_input_node.index);
1184   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1185
1186   from = vlib_frame_vector_args (frame);
1187   n_left_from = frame->n_vectors;
1188   next_index = node->cached_next_index;
1189
1190   if (node->flags & VLIB_NODE_FLAG_TRACE)
1191     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1192
1193   while (n_left_from > 0)
1194     {
1195       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1196
1197       while (n_left_from >= 4 && n_left_to_next >= 2)
1198         {
1199           vlib_buffer_t *p0, *p1;
1200           ip4_header_t *ip0, *ip1;
1201           ip4_fib_mtrie_t *mtrie0, *mtrie1;
1202           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1203           const dpo_id_t *dpo0, *dpo1;
1204           const load_balance_t *lb0, *lb1;
1205           u32 pi0, next0, fib_index0, lbi0;
1206           u32 pi1, next1, fib_index1, lbi1;
1207           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1208           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1209           u32 sw_if_index0, sw_if_index1;
1210
1211           pi0 = to_next[0] = from[0];
1212           pi1 = to_next[1] = from[1];
1213           from += 2;
1214           n_left_from -= 2;
1215           to_next += 2;
1216           n_left_to_next -= 2;
1217
1218           next0 = next1 = IP_LOCAL_NEXT_DROP;
1219           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1220
1221           p0 = vlib_get_buffer (vm, pi0);
1222           p1 = vlib_get_buffer (vm, pi1);
1223
1224           ip0 = vlib_buffer_get_current (p0);
1225           ip1 = vlib_buffer_get_current (p1);
1226
1227           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1228           vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
1229
1230           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1231           sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1232
1233           proto0 = ip0->protocol;
1234           proto1 = ip1->protocol;
1235
1236           if (head_of_feature_arc == 0)
1237             goto skip_checks;
1238
1239           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1240           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1241           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1242           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1243
1244           good_tcp_udp0 =
1245             (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1246              || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1247                  || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1248           good_tcp_udp1 = (p1->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1249                            || (p1->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1250                                || p1->flags &
1251                                VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1252
1253           if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)
1254                              || ip4_local_do_l4_check (is_tcp_udp1,
1255                                                        p1->flags)))
1256             {
1257               if (is_tcp_udp0)
1258                 ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1259                                        &good_tcp_udp0);
1260               if (is_tcp_udp1)
1261                 ip4_local_validate_l4 (vm, p1, ip1, is_udp1, &error1,
1262                                        &good_tcp_udp1);
1263             }
1264
1265           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1266           error0 = (is_tcp_udp0 && !good_tcp_udp0
1267                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1268           error1 = (is_tcp_udp1 && !good_tcp_udp1
1269                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1270
1271           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1272           fib_index0 =
1273             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1274              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1275
1276           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1277           fib_index1 =
1278             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1279              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1280
1281           /* TODO maybe move to lookup? */
1282           vnet_buffer (p0)->ip.fib_index = fib_index0;
1283           vnet_buffer (p1)->ip.fib_index = fib_index1;
1284
1285           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1286           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1287
1288           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1289           leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1290           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1291                                              2);
1292           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1293                                              2);
1294           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1295                                              3);
1296           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1297                                              3);
1298
1299           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1300             ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1301           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1302
1303           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1304             ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1305           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1306
1307           lb0 = load_balance_get (lbi0);
1308           lb1 = load_balance_get (lbi1);
1309           dpo0 = load_balance_get_bucket_i (lb0, 0);
1310           dpo1 = load_balance_get_bucket_i (lb1, 0);
1311
1312           /*
1313            * Must have a route to source otherwise we drop the packet.
1314            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1315            *
1316            * The checks are:
1317            *  - the source is a recieve => it's from us => bogus, do this
1318            *    first since it sets a different error code.
1319            *  - uRPF check for any route to source - accept if passes.
1320            *  - allow packets destined to the broadcast address from unknown sources
1321            */
1322           if (p0->flags & VNET_BUFFER_F_IS_NATED)
1323             goto skip_check0;
1324
1325           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1326                      dpo0->dpoi_type == DPO_RECEIVE) ?
1327                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1328           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1329                      !fib_urpf_check_size (lb0->lb_urpf) &&
1330                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1331                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1332
1333         skip_check0:
1334           if (p1->flags & VNET_BUFFER_F_IS_NATED)
1335             goto skip_checks;
1336
1337           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1338                      dpo1->dpoi_type == DPO_RECEIVE) ?
1339                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1340           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1341                      !fib_urpf_check_size (lb1->lb_urpf) &&
1342                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1343                     ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1344
1345         skip_checks:
1346
1347           next0 = lm->local_next_by_ip_protocol[proto0];
1348           next1 = lm->local_next_by_ip_protocol[proto1];
1349
1350           next0 =
1351             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1352           next1 =
1353             error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1354
1355           p0->error = error0 ? error_node->errors[error0] : 0;
1356           p1->error = error1 ? error_node->errors[error1] : 0;
1357
1358           if (head_of_feature_arc)
1359             {
1360               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1361                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1362               if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1363                 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1364             }
1365
1366           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1367                                            n_left_to_next, pi0, pi1,
1368                                            next0, next1);
1369         }
1370
1371       while (n_left_from > 0 && n_left_to_next > 0)
1372         {
1373           vlib_buffer_t *p0;
1374           ip4_header_t *ip0;
1375           ip4_fib_mtrie_t *mtrie0;
1376           ip4_fib_mtrie_leaf_t leaf0;
1377           u32 pi0, next0, fib_index0, lbi0;
1378           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1379           load_balance_t *lb0;
1380           const dpo_id_t *dpo0;
1381           u32 sw_if_index0;
1382
1383           pi0 = to_next[0] = from[0];
1384           from += 1;
1385           n_left_from -= 1;
1386           to_next += 1;
1387           n_left_to_next -= 1;
1388
1389           next0 = IP_LOCAL_NEXT_DROP;
1390           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1391
1392           p0 = vlib_get_buffer (vm, pi0);
1393           ip0 = vlib_buffer_get_current (p0);
1394           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1395           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1396
1397           proto0 = ip0->protocol;
1398
1399           if (head_of_feature_arc == 0 || p0->flags & VNET_BUFFER_F_IS_NATED)
1400             goto skip_check;
1401
1402           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1403           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1404
1405           good_tcp_udp0 =
1406             (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1407              || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1408                  || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1409
1410           if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)))
1411             {
1412               ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1413                                      &good_tcp_udp0);
1414             }
1415
1416           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1417           error0 = (is_tcp_udp0 && !good_tcp_udp0
1418                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1419
1420           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1421           fib_index0 =
1422             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1423              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1424           vnet_buffer (p0)->ip.fib_index = fib_index0;
1425           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1426           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1427           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1428                                              2);
1429           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1430                                              3);
1431           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1432           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1433           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1434
1435           lb0 = load_balance_get (lbi0);
1436           dpo0 = load_balance_get_bucket_i (lb0, 0);
1437
1438           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1439                      dpo0->dpoi_type == DPO_RECEIVE) ?
1440                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1441           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1442                      !fib_urpf_check_size (lb0->lb_urpf) &&
1443                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1444                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1445
1446         skip_check:
1447           next0 = lm->local_next_by_ip_protocol[proto0];
1448           next0 =
1449             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1450
1451           p0->error = error0 ? error_node->errors[error0] : 0;
1452
1453           if (head_of_feature_arc)
1454             {
1455               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1456                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1457             }
1458
1459           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1460                                            n_left_to_next, pi0, next0);
1461         }
1462       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1463     }
1464
1465   return frame->n_vectors;
1466 }
1467
1468 static uword
1469 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1470 {
1471   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1472 }
1473
1474 /* *INDENT-OFF* */
1475 VLIB_REGISTER_NODE (ip4_local_node) =
1476 {
1477   .function = ip4_local,
1478   .name = "ip4-local",
1479   .vector_size = sizeof (u32),
1480   .format_trace = format_ip4_forward_next_trace,
1481   .n_next_nodes = IP_LOCAL_N_NEXT,
1482   .next_nodes =
1483   {
1484     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1485     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1486     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1487     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1488   },
1489 };
1490 /* *INDENT-ON* */
1491
1492 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1493
1494 static uword
1495 ip4_local_end_of_arc (vlib_main_t * vm,
1496                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1497 {
1498   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1499 }
1500
1501 /* *INDENT-OFF* */
1502 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1503   .function = ip4_local_end_of_arc,
1504   .name = "ip4-local-end-of-arc",
1505   .vector_size = sizeof (u32),
1506
1507   .format_trace = format_ip4_forward_next_trace,
1508   .sibling_of = "ip4-local",
1509 };
1510
1511 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1512
1513 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1514   .arc_name = "ip4-local",
1515   .node_name = "ip4-local-end-of-arc",
1516   .runs_before = 0, /* not before any other features */
1517 };
1518 /* *INDENT-ON* */
1519
1520 void
1521 ip4_register_protocol (u32 protocol, u32 node_index)
1522 {
1523   vlib_main_t *vm = vlib_get_main ();
1524   ip4_main_t *im = &ip4_main;
1525   ip_lookup_main_t *lm = &im->lookup_main;
1526
1527   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1528   lm->local_next_by_ip_protocol[protocol] =
1529     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1530 }
1531
1532 static clib_error_t *
1533 show_ip_local_command_fn (vlib_main_t * vm,
1534                           unformat_input_t * input, vlib_cli_command_t * cmd)
1535 {
1536   ip4_main_t *im = &ip4_main;
1537   ip_lookup_main_t *lm = &im->lookup_main;
1538   int i;
1539
1540   vlib_cli_output (vm, "Protocols handled by ip4_local");
1541   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1542     {
1543       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1544         {
1545           u32 node_index = vlib_get_node (vm,
1546                                           ip4_local_node.index)->
1547             next_nodes[lm->local_next_by_ip_protocol[i]];
1548           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1549                            node_index);
1550         }
1551     }
1552   return 0;
1553 }
1554
1555
1556
1557 /*?
1558  * Display the set of protocols handled by the local IPv4 stack.
1559  *
1560  * @cliexpar
1561  * Example of how to display local protocol table:
1562  * @cliexstart{show ip local}
1563  * Protocols handled by ip4_local
1564  * 1
1565  * 17
1566  * 47
1567  * @cliexend
1568 ?*/
1569 /* *INDENT-OFF* */
1570 VLIB_CLI_COMMAND (show_ip_local, static) =
1571 {
1572   .path = "show ip local",
1573   .function = show_ip_local_command_fn,
1574   .short_help = "show ip local",
1575 };
1576 /* *INDENT-ON* */
1577
1578 always_inline uword
1579 ip4_arp_inline (vlib_main_t * vm,
1580                 vlib_node_runtime_t * node,
1581                 vlib_frame_t * frame, int is_glean)
1582 {
1583   vnet_main_t *vnm = vnet_get_main ();
1584   ip4_main_t *im = &ip4_main;
1585   ip_lookup_main_t *lm = &im->lookup_main;
1586   u32 *from, *to_next_drop;
1587   uword n_left_from, n_left_to_next_drop, next_index;
1588   static f64 time_last_seed_change = -1e100;
1589   static u32 hash_seeds[3];
1590   static uword hash_bitmap[256 / BITS (uword)];
1591   f64 time_now;
1592
1593   if (node->flags & VLIB_NODE_FLAG_TRACE)
1594     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1595
1596   time_now = vlib_time_now (vm);
1597   if (time_now - time_last_seed_change > 1e-3)
1598     {
1599       uword i;
1600       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1601                                             sizeof (hash_seeds));
1602       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1603         hash_seeds[i] = r[i];
1604
1605       /* Mark all hash keys as been no-seen before. */
1606       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1607         hash_bitmap[i] = 0;
1608
1609       time_last_seed_change = time_now;
1610     }
1611
1612   from = vlib_frame_vector_args (frame);
1613   n_left_from = frame->n_vectors;
1614   next_index = node->cached_next_index;
1615   if (next_index == IP4_ARP_NEXT_DROP)
1616     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1617
1618   while (n_left_from > 0)
1619     {
1620       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1621                            to_next_drop, n_left_to_next_drop);
1622
1623       while (n_left_from > 0 && n_left_to_next_drop > 0)
1624         {
1625           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1626           ip_adjacency_t *adj0;
1627           vlib_buffer_t *p0;
1628           ip4_header_t *ip0;
1629           uword bm0;
1630
1631           pi0 = from[0];
1632
1633           p0 = vlib_get_buffer (vm, pi0);
1634
1635           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1636           adj0 = adj_get (adj_index0);
1637           ip0 = vlib_buffer_get_current (p0);
1638
1639           a0 = hash_seeds[0];
1640           b0 = hash_seeds[1];
1641           c0 = hash_seeds[2];
1642
1643           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1644           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1645
1646           if (is_glean)
1647             {
1648               /*
1649                * this is the Glean case, so we are ARPing for the
1650                * packet's destination
1651                */
1652               a0 ^= ip0->dst_address.data_u32;
1653             }
1654           else
1655             {
1656               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1657             }
1658           b0 ^= sw_if_index0;
1659
1660           hash_v3_mix32 (a0, b0, c0);
1661           hash_v3_finalize32 (a0, b0, c0);
1662
1663           c0 &= BITS (hash_bitmap) - 1;
1664           m0 = (uword) 1 << (c0 % BITS (uword));
1665           c0 = c0 / BITS (uword);
1666
1667           bm0 = hash_bitmap[c0];
1668           drop0 = (bm0 & m0) != 0;
1669
1670           /* Mark it as seen. */
1671           hash_bitmap[c0] = bm0 | m0;
1672
1673           from += 1;
1674           n_left_from -= 1;
1675           to_next_drop[0] = pi0;
1676           to_next_drop += 1;
1677           n_left_to_next_drop -= 1;
1678
1679           p0->error =
1680             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
1681                          IP4_ARP_ERROR_REQUEST_SENT];
1682
1683           /*
1684            * the adj has been updated to a rewrite but the node the DPO that got
1685            * us here hasn't - yet. no big deal. we'll drop while we wait.
1686            */
1687           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1688             continue;
1689
1690           if (drop0)
1691             continue;
1692
1693           /*
1694            * Can happen if the control-plane is programming tables
1695            * with traffic flowing; at least that's today's lame excuse.
1696            */
1697           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1698               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1699             {
1700               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1701             }
1702           else
1703             /* Send ARP request. */
1704             {
1705               u32 bi0 = 0;
1706               vlib_buffer_t *b0;
1707               ethernet_arp_header_t *h0;
1708               vnet_hw_interface_t *hw_if0;
1709
1710               h0 =
1711                 vlib_packet_template_get_packet (vm,
1712                                                  &im->ip4_arp_request_packet_template,
1713                                                  &bi0);
1714
1715               /* Seems we're out of buffers */
1716               if (PREDICT_FALSE (!h0))
1717                 continue;
1718
1719               /* Add rewrite/encap string for ARP packet. */
1720               vnet_rewrite_one_header (adj0[0], h0,
1721                                        sizeof (ethernet_header_t));
1722
1723               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1724
1725               /* Src ethernet address in ARP header. */
1726               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
1727                            hw_if0->hw_address,
1728                            sizeof (h0->ip4_over_ethernet[0].ethernet));
1729
1730               if (is_glean)
1731                 {
1732                   /* The interface's source address is stashed in the Glean Adj */
1733                   h0->ip4_over_ethernet[0].ip4 =
1734                     adj0->sub_type.glean.receive_addr.ip4;
1735
1736                   /* Copy in destination address we are requesting. This is the
1737                    * glean case, so it's the packet's destination.*/
1738                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1739                     ip0->dst_address.data_u32;
1740                 }
1741               else
1742                 {
1743                   /* Src IP address in ARP header. */
1744                   if (ip4_src_address_for_packet (lm, sw_if_index0,
1745                                                   &h0->
1746                                                   ip4_over_ethernet[0].ip4))
1747                     {
1748                       /* No source address available */
1749                       p0->error =
1750                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1751                       vlib_buffer_free (vm, &bi0, 1);
1752                       continue;
1753                     }
1754
1755                   /* Copy in destination address we are requesting from the
1756                      incomplete adj */
1757                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1758                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1759                 }
1760
1761               vlib_buffer_copy_trace_flag (vm, p0, bi0);
1762               b0 = vlib_get_buffer (vm, bi0);
1763               VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1764               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1765
1766               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1767
1768               vlib_set_next_frame_buffer (vm, node,
1769                                           adj0->rewrite_header.next_index,
1770                                           bi0);
1771             }
1772         }
1773
1774       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1775     }
1776
1777   return frame->n_vectors;
1778 }
1779
1780 static uword
1781 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1782 {
1783   return (ip4_arp_inline (vm, node, frame, 0));
1784 }
1785
1786 static uword
1787 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1788 {
1789   return (ip4_arp_inline (vm, node, frame, 1));
1790 }
1791
1792 static char *ip4_arp_error_strings[] = {
1793   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1794   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1795   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1796   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1797   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1798   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1799 };
1800
1801 /* *INDENT-OFF* */
1802 VLIB_REGISTER_NODE (ip4_arp_node) =
1803 {
1804   .function = ip4_arp,
1805   .name = "ip4-arp",
1806   .vector_size = sizeof (u32),
1807   .format_trace = format_ip4_forward_next_trace,
1808   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1809   .error_strings = ip4_arp_error_strings,
1810   .n_next_nodes = IP4_ARP_N_NEXT,
1811   .next_nodes =
1812   {
1813     [IP4_ARP_NEXT_DROP] = "error-drop",
1814   },
1815 };
1816
1817 VLIB_REGISTER_NODE (ip4_glean_node) =
1818 {
1819   .function = ip4_glean,
1820   .name = "ip4-glean",
1821   .vector_size = sizeof (u32),
1822   .format_trace = format_ip4_forward_next_trace,
1823   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1824   .error_strings = ip4_arp_error_strings,
1825   .n_next_nodes = IP4_ARP_N_NEXT,
1826   .next_nodes = {
1827   [IP4_ARP_NEXT_DROP] = "error-drop",
1828   },
1829 };
1830 /* *INDENT-ON* */
1831
1832 #define foreach_notrace_ip4_arp_error           \
1833 _(DROP)                                         \
1834 _(REQUEST_SENT)                                 \
1835 _(REPLICATE_DROP)                               \
1836 _(REPLICATE_FAIL)
1837
1838 clib_error_t *
1839 arp_notrace_init (vlib_main_t * vm)
1840 {
1841   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1842
1843   /* don't trace ARP request packets */
1844 #define _(a)                                    \
1845     vnet_pcap_drop_trace_filter_add_del         \
1846         (rt->errors[IP4_ARP_ERROR_##a],         \
1847          1 /* is_add */);
1848   foreach_notrace_ip4_arp_error;
1849 #undef _
1850   return 0;
1851 }
1852
1853 VLIB_INIT_FUNCTION (arp_notrace_init);
1854
1855
1856 /* Send an ARP request to see if given destination is reachable on given interface. */
1857 clib_error_t *
1858 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
1859 {
1860   vnet_main_t *vnm = vnet_get_main ();
1861   ip4_main_t *im = &ip4_main;
1862   ethernet_arp_header_t *h;
1863   ip4_address_t *src;
1864   ip_interface_address_t *ia;
1865   ip_adjacency_t *adj;
1866   vnet_hw_interface_t *hi;
1867   vnet_sw_interface_t *si;
1868   vlib_buffer_t *b;
1869   adj_index_t ai;
1870   u32 bi = 0;
1871
1872   si = vnet_get_sw_interface (vnm, sw_if_index);
1873
1874   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1875     {
1876       return clib_error_return (0, "%U: interface %U down",
1877                                 format_ip4_address, dst,
1878                                 format_vnet_sw_if_index_name, vnm,
1879                                 sw_if_index);
1880     }
1881
1882   src =
1883     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
1884   if (!src)
1885     {
1886       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
1887       return clib_error_return
1888         (0,
1889          "no matching interface address for destination %U (interface %U)",
1890          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
1891          sw_if_index);
1892     }
1893
1894   h = vlib_packet_template_get_packet (vm,
1895                                        &im->ip4_arp_request_packet_template,
1896                                        &bi);
1897
1898   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
1899   if (PREDICT_FALSE (!hi->hw_address))
1900     {
1901       return clib_error_return (0, "%U: interface %U do not support ip probe",
1902                                 format_ip4_address, dst,
1903                                 format_vnet_sw_if_index_name, vnm,
1904                                 sw_if_index);
1905     }
1906
1907   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
1908                sizeof (h->ip4_over_ethernet[0].ethernet));
1909
1910   h->ip4_over_ethernet[0].ip4 = src[0];
1911   h->ip4_over_ethernet[1].ip4 = dst[0];
1912
1913   b = vlib_get_buffer (vm, bi);
1914   vnet_buffer (b)->sw_if_index[VLIB_RX] =
1915     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
1916
1917   ip46_address_t nh = {
1918     .ip4 = *dst,
1919   };
1920
1921   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
1922                             VNET_LINK_IP4, &nh, sw_if_index);
1923   adj = adj_get (ai);
1924
1925   /* Peer has been previously resolved, retrieve glean adj instead */
1926   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
1927     {
1928       adj_unlock (ai);
1929       ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
1930                                   VNET_LINK_IP4, sw_if_index, &nh);
1931       adj = adj_get (ai);
1932     }
1933
1934   /* Add encapsulation string for software interface (e.g. ethernet header). */
1935   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
1936   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
1937
1938   {
1939     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
1940     u32 *to_next = vlib_frame_vector_args (f);
1941     to_next[0] = bi;
1942     f->n_vectors = 1;
1943     vlib_put_frame_to_node (vm, hi->output_node_index, f);
1944   }
1945
1946   adj_unlock (ai);
1947   return /* no error */ 0;
1948 }
1949
1950 typedef enum
1951 {
1952   IP4_REWRITE_NEXT_DROP,
1953   IP4_REWRITE_NEXT_ICMP_ERROR,
1954 } ip4_rewrite_next_t;
1955
1956 /**
1957  * This bits of an IPv4 address to mask to construct a multicast
1958  * MAC address
1959  */
1960 #if CLIB_ARCH_IS_BIG_ENDIAN
1961 #define IP4_MCAST_ADDR_MASK 0x007fffff
1962 #else
1963 #define IP4_MCAST_ADDR_MASK 0xffff7f00
1964 #endif
1965
1966 always_inline void
1967 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
1968                u16 adj_packet_bytes, bool df, u32 * next, u32 * error)
1969 {
1970   if (packet_len > adj_packet_bytes)
1971     {
1972       *error = IP4_ERROR_MTU_EXCEEDED;
1973       if (df)
1974         {
1975           icmp4_error_set_vnet_buffer
1976             (b, ICMP4_destination_unreachable,
1977              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
1978              adj_packet_bytes);
1979           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
1980         }
1981       else
1982         {
1983           /* Add support for fragmentation here */
1984           *next = IP4_REWRITE_NEXT_DROP;
1985         }
1986     }
1987 }
1988
1989 always_inline uword
1990 ip4_rewrite_inline (vlib_main_t * vm,
1991                     vlib_node_runtime_t * node,
1992                     vlib_frame_t * frame,
1993                     int do_counters, int is_midchain, int is_mcast)
1994 {
1995   ip_lookup_main_t *lm = &ip4_main.lookup_main;
1996   u32 *from = vlib_frame_vector_args (frame);
1997   u32 n_left_from, n_left_to_next, *to_next, next_index;
1998   vlib_node_runtime_t *error_node =
1999     vlib_node_get_runtime (vm, ip4_input_node.index);
2000
2001   n_left_from = frame->n_vectors;
2002   next_index = node->cached_next_index;
2003   u32 thread_index = vlib_get_thread_index ();
2004
2005   while (n_left_from > 0)
2006     {
2007       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2008
2009       while (n_left_from >= 4 && n_left_to_next >= 2)
2010         {
2011           ip_adjacency_t *adj0, *adj1;
2012           vlib_buffer_t *p0, *p1;
2013           ip4_header_t *ip0, *ip1;
2014           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2015           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2016           u32 tx_sw_if_index0, tx_sw_if_index1;
2017
2018           /* Prefetch next iteration. */
2019           {
2020             vlib_buffer_t *p2, *p3;
2021
2022             p2 = vlib_get_buffer (vm, from[2]);
2023             p3 = vlib_get_buffer (vm, from[3]);
2024
2025             vlib_prefetch_buffer_header (p2, STORE);
2026             vlib_prefetch_buffer_header (p3, STORE);
2027
2028             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2029             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2030           }
2031
2032           pi0 = to_next[0] = from[0];
2033           pi1 = to_next[1] = from[1];
2034
2035           from += 2;
2036           n_left_from -= 2;
2037           to_next += 2;
2038           n_left_to_next -= 2;
2039
2040           p0 = vlib_get_buffer (vm, pi0);
2041           p1 = vlib_get_buffer (vm, pi1);
2042
2043           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2044           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2045
2046           /*
2047            * pre-fetch the per-adjacency counters
2048            */
2049           if (do_counters)
2050             {
2051               vlib_prefetch_combined_counter (&adjacency_counters,
2052                                               thread_index, adj_index0);
2053               vlib_prefetch_combined_counter (&adjacency_counters,
2054                                               thread_index, adj_index1);
2055             }
2056
2057           ip0 = vlib_buffer_get_current (p0);
2058           ip1 = vlib_buffer_get_current (p1);
2059
2060           error0 = error1 = IP4_ERROR_NONE;
2061           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2062
2063           /* Decrement TTL & update checksum.
2064              Works either endian, so no need for byte swap. */
2065           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2066             {
2067               i32 ttl0 = ip0->ttl;
2068
2069               /* Input node should have reject packets with ttl 0. */
2070               ASSERT (ip0->ttl > 0);
2071
2072               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2073               checksum0 += checksum0 >= 0xffff;
2074
2075               ip0->checksum = checksum0;
2076               ttl0 -= 1;
2077               ip0->ttl = ttl0;
2078
2079               /*
2080                * If the ttl drops below 1 when forwarding, generate
2081                * an ICMP response.
2082                */
2083               if (PREDICT_FALSE (ttl0 <= 0))
2084                 {
2085                   error0 = IP4_ERROR_TIME_EXPIRED;
2086                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2087                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2088                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2089                                                0);
2090                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2091                 }
2092
2093               /* Verify checksum. */
2094               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2095                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2096             }
2097           else
2098             {
2099               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2100             }
2101           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2102             {
2103               i32 ttl1 = ip1->ttl;
2104
2105               /* Input node should have reject packets with ttl 0. */
2106               ASSERT (ip1->ttl > 0);
2107
2108               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2109               checksum1 += checksum1 >= 0xffff;
2110
2111               ip1->checksum = checksum1;
2112               ttl1 -= 1;
2113               ip1->ttl = ttl1;
2114
2115               /*
2116                * If the ttl drops below 1 when forwarding, generate
2117                * an ICMP response.
2118                */
2119               if (PREDICT_FALSE (ttl1 <= 0))
2120                 {
2121                   error1 = IP4_ERROR_TIME_EXPIRED;
2122                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2123                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2124                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2125                                                0);
2126                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2127                 }
2128
2129               /* Verify checksum. */
2130               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2131                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2132             }
2133           else
2134             {
2135               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2136             }
2137
2138           /* Rewrite packet header and updates lengths. */
2139           adj0 = adj_get (adj_index0);
2140           adj1 = adj_get (adj_index1);
2141
2142           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2143           rw_len0 = adj0[0].rewrite_header.data_bytes;
2144           rw_len1 = adj1[0].rewrite_header.data_bytes;
2145           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2146           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2147
2148           /* Check MTU of outgoing interface. */
2149           ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2150                          adj0[0].rewrite_header.max_l3_packet_bytes,
2151                          ip0->flags_and_fragment_offset &
2152                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2153                          &next0, &error0);
2154           ip4_mtu_check (p1, clib_net_to_host_u16 (ip1->length),
2155                          adj1[0].rewrite_header.max_l3_packet_bytes,
2156                          ip1->flags_and_fragment_offset &
2157                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2158                          &next1, &error1);
2159
2160           if (is_mcast)
2161             {
2162               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2163                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2164                         IP4_ERROR_SAME_INTERFACE : error0);
2165               error1 = ((adj1[0].rewrite_header.sw_if_index ==
2166                          vnet_buffer (p1)->sw_if_index[VLIB_RX]) ?
2167                         IP4_ERROR_SAME_INTERFACE : error1);
2168             }
2169
2170           p0->error = error_node->errors[error0];
2171           p1->error = error_node->errors[error1];
2172           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2173            * to see the IP headerr */
2174           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2175             {
2176               next0 = adj0[0].rewrite_header.next_index;
2177               p0->current_data -= rw_len0;
2178               p0->current_length += rw_len0;
2179               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2180               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2181
2182               if (PREDICT_FALSE
2183                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2184                 vnet_feature_arc_start (lm->output_feature_arc_index,
2185                                         tx_sw_if_index0, &next0, p0);
2186             }
2187           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2188             {
2189               next1 = adj1[0].rewrite_header.next_index;
2190               p1->current_data -= rw_len1;
2191               p1->current_length += rw_len1;
2192
2193               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2194               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2195
2196               if (PREDICT_FALSE
2197                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2198                 vnet_feature_arc_start (lm->output_feature_arc_index,
2199                                         tx_sw_if_index1, &next1, p1);
2200             }
2201
2202           /* Guess we are only writing on simple Ethernet header. */
2203           vnet_rewrite_two_headers (adj0[0], adj1[0],
2204                                     ip0, ip1, sizeof (ethernet_header_t));
2205
2206           /*
2207            * Bump the per-adjacency counters
2208            */
2209           if (do_counters)
2210             {
2211               vlib_increment_combined_counter
2212                 (&adjacency_counters,
2213                  thread_index,
2214                  adj_index0, 1,
2215                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2216
2217               vlib_increment_combined_counter
2218                 (&adjacency_counters,
2219                  thread_index,
2220                  adj_index1, 1,
2221                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2222             }
2223
2224           if (is_midchain)
2225             {
2226               adj0->sub_type.midchain.fixup_func
2227                 (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2228               adj1->sub_type.midchain.fixup_func
2229                 (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
2230             }
2231           if (is_mcast)
2232             {
2233               /*
2234                * copy bytes from the IP address into the MAC rewrite
2235                */
2236               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2237                                           adj0->
2238                                           rewrite_header.dst_mcast_offset,
2239                                           &ip0->dst_address.as_u32,
2240                                           (u8 *) ip0);
2241               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2242                                           adj0->
2243                                           rewrite_header.dst_mcast_offset,
2244                                           &ip1->dst_address.as_u32,
2245                                           (u8 *) ip1);
2246             }
2247
2248           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2249                                            to_next, n_left_to_next,
2250                                            pi0, pi1, next0, next1);
2251         }
2252
2253       while (n_left_from > 0 && n_left_to_next > 0)
2254         {
2255           ip_adjacency_t *adj0;
2256           vlib_buffer_t *p0;
2257           ip4_header_t *ip0;
2258           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2259           u32 tx_sw_if_index0;
2260
2261           pi0 = to_next[0] = from[0];
2262
2263           p0 = vlib_get_buffer (vm, pi0);
2264
2265           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2266
2267           adj0 = adj_get (adj_index0);
2268
2269           ip0 = vlib_buffer_get_current (p0);
2270
2271           error0 = IP4_ERROR_NONE;
2272           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2273
2274           /* Decrement TTL & update checksum. */
2275           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2276             {
2277               i32 ttl0 = ip0->ttl;
2278
2279               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2280
2281               checksum0 += checksum0 >= 0xffff;
2282
2283               ip0->checksum = checksum0;
2284
2285               ASSERT (ip0->ttl > 0);
2286
2287               ttl0 -= 1;
2288
2289               ip0->ttl = ttl0;
2290
2291               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2292                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2293
2294               if (PREDICT_FALSE (ttl0 <= 0))
2295                 {
2296                   /*
2297                    * If the ttl drops below 1 when forwarding, generate
2298                    * an ICMP response.
2299                    */
2300                   error0 = IP4_ERROR_TIME_EXPIRED;
2301                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2302                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2303                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2304                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2305                                                0);
2306                 }
2307             }
2308           else
2309             {
2310               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2311             }
2312
2313           if (do_counters)
2314             vlib_prefetch_combined_counter (&adjacency_counters,
2315                                             thread_index, adj_index0);
2316
2317           /* Guess we are only writing on simple Ethernet header. */
2318           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2319           if (is_mcast)
2320             {
2321               /*
2322                * copy bytes from the IP address into the MAC rewrite
2323                */
2324               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2325                                           adj0->
2326                                           rewrite_header.dst_mcast_offset,
2327                                           &ip0->dst_address.as_u32,
2328                                           (u8 *) ip0);
2329             }
2330
2331           /* Update packet buffer attributes/set output interface. */
2332           rw_len0 = adj0[0].rewrite_header.data_bytes;
2333           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2334
2335           if (do_counters)
2336             vlib_increment_combined_counter
2337               (&adjacency_counters,
2338                thread_index, adj_index0, 1,
2339                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2340
2341           /* Check MTU of outgoing interface. */
2342           ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2343                          adj0[0].rewrite_header.max_l3_packet_bytes,
2344                          ip0->flags_and_fragment_offset &
2345                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2346                          &next0, &error0);
2347
2348           if (is_mcast)
2349             {
2350               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2351                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2352                         IP4_ERROR_SAME_INTERFACE : error0);
2353             }
2354           p0->error = error_node->errors[error0];
2355
2356           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2357            * to see the IP headerr */
2358           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2359             {
2360               p0->current_data -= rw_len0;
2361               p0->current_length += rw_len0;
2362               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2363
2364               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2365               next0 = adj0[0].rewrite_header.next_index;
2366
2367               if (is_midchain)
2368                 {
2369                   adj0->sub_type.midchain.fixup_func
2370                     (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2371                 }
2372
2373               if (PREDICT_FALSE
2374                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2375                 vnet_feature_arc_start (lm->output_feature_arc_index,
2376                                         tx_sw_if_index0, &next0, p0);
2377
2378             }
2379
2380           from += 1;
2381           n_left_from -= 1;
2382           to_next += 1;
2383           n_left_to_next -= 1;
2384
2385           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2386                                            to_next, n_left_to_next,
2387                                            pi0, next0);
2388         }
2389
2390       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2391     }
2392
2393   /* Need to do trace after rewrites to pick up new packet data. */
2394   if (node->flags & VLIB_NODE_FLAG_TRACE)
2395     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2396
2397   return frame->n_vectors;
2398 }
2399
2400
2401 /** @brief IPv4 rewrite node.
2402     @node ip4-rewrite
2403
2404     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2405     header checksum, fetch the ip adjacency, check the outbound mtu,
2406     apply the adjacency rewrite, and send pkts to the adjacency
2407     rewrite header's rewrite_next_index.
2408
2409     @param vm vlib_main_t corresponding to the current thread
2410     @param node vlib_node_runtime_t
2411     @param frame vlib_frame_t whose contents should be dispatched
2412
2413     @par Graph mechanics: buffer metadata, next index usage
2414
2415     @em Uses:
2416     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2417         - the rewrite adjacency index
2418     - <code>adj->lookup_next_index</code>
2419         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2420           the packet will be dropped.
2421     - <code>adj->rewrite_header</code>
2422         - Rewrite string length, rewrite string, next_index
2423
2424     @em Sets:
2425     - <code>b->current_data, b->current_length</code>
2426         - Updated net of applying the rewrite string
2427
2428     <em>Next Indices:</em>
2429     - <code> adj->rewrite_header.next_index </code>
2430       or @c ip4-drop
2431 */
2432 static uword
2433 ip4_rewrite (vlib_main_t * vm,
2434              vlib_node_runtime_t * node, vlib_frame_t * frame)
2435 {
2436   if (adj_are_counters_enabled ())
2437     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2438   else
2439     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2440 }
2441
2442 static uword
2443 ip4_midchain (vlib_main_t * vm,
2444               vlib_node_runtime_t * node, vlib_frame_t * frame)
2445 {
2446   if (adj_are_counters_enabled ())
2447     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2448   else
2449     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2450 }
2451
2452 static uword
2453 ip4_rewrite_mcast (vlib_main_t * vm,
2454                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2455 {
2456   if (adj_are_counters_enabled ())
2457     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2458   else
2459     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2460 }
2461
2462 static uword
2463 ip4_mcast_midchain (vlib_main_t * vm,
2464                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2465 {
2466   if (adj_are_counters_enabled ())
2467     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2468   else
2469     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2470 }
2471
2472 /* *INDENT-OFF* */
2473 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2474   .function = ip4_rewrite,
2475   .name = "ip4-rewrite",
2476   .vector_size = sizeof (u32),
2477
2478   .format_trace = format_ip4_rewrite_trace,
2479
2480   .n_next_nodes = 2,
2481   .next_nodes = {
2482     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2483     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2484   },
2485 };
2486 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2487
2488 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2489   .function = ip4_rewrite_mcast,
2490   .name = "ip4-rewrite-mcast",
2491   .vector_size = sizeof (u32),
2492
2493   .format_trace = format_ip4_rewrite_trace,
2494   .sibling_of = "ip4-rewrite",
2495 };
2496 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2497
2498 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2499   .function = ip4_mcast_midchain,
2500   .name = "ip4-mcast-midchain",
2501   .vector_size = sizeof (u32),
2502
2503   .format_trace = format_ip4_rewrite_trace,
2504   .sibling_of = "ip4-rewrite",
2505 };
2506 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2507
2508 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2509   .function = ip4_midchain,
2510   .name = "ip4-midchain",
2511   .vector_size = sizeof (u32),
2512   .format_trace = format_ip4_forward_next_trace,
2513   .sibling_of =  "ip4-rewrite",
2514 };
2515 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2516 /* *INDENT-ON */
2517
2518 int
2519 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2520 {
2521   ip4_fib_mtrie_t *mtrie0;
2522   ip4_fib_mtrie_leaf_t leaf0;
2523   u32 lbi0;
2524
2525   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2526
2527   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2528   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2529   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2530
2531   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2532
2533   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2534 }
2535
2536 static clib_error_t *
2537 test_lookup_command_fn (vlib_main_t * vm,
2538                         unformat_input_t * input, vlib_cli_command_t * cmd)
2539 {
2540   ip4_fib_t *fib;
2541   u32 table_id = 0;
2542   f64 count = 1;
2543   u32 n;
2544   int i;
2545   ip4_address_t ip4_base_address;
2546   u64 errors = 0;
2547
2548   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2549     {
2550       if (unformat (input, "table %d", &table_id))
2551         {
2552           /* Make sure the entry exists. */
2553           fib = ip4_fib_get (table_id);
2554           if ((fib) && (fib->index != table_id))
2555             return clib_error_return (0, "<fib-index> %d does not exist",
2556                                       table_id);
2557         }
2558       else if (unformat (input, "count %f", &count))
2559         ;
2560
2561       else if (unformat (input, "%U",
2562                          unformat_ip4_address, &ip4_base_address))
2563         ;
2564       else
2565         return clib_error_return (0, "unknown input `%U'",
2566                                   format_unformat_error, input);
2567     }
2568
2569   n = count;
2570
2571   for (i = 0; i < n; i++)
2572     {
2573       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2574         errors++;
2575
2576       ip4_base_address.as_u32 =
2577         clib_host_to_net_u32 (1 +
2578                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2579     }
2580
2581   if (errors)
2582     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2583   else
2584     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2585
2586   return 0;
2587 }
2588
2589 /*?
2590  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2591  * given FIB table to determine if there is a conflict with the
2592  * adjacency table. The fib-id can be determined by using the
2593  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2594  * of 0 is used.
2595  *
2596  * @todo This command uses fib-id, other commands use table-id (not
2597  * just a name, they are different indexes). Would like to change this
2598  * to table-id for consistency.
2599  *
2600  * @cliexpar
2601  * Example of how to run the test lookup command:
2602  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2603  * No errors in 2 lookups
2604  * @cliexend
2605 ?*/
2606 /* *INDENT-OFF* */
2607 VLIB_CLI_COMMAND (lookup_test_command, static) =
2608 {
2609   .path = "test lookup",
2610   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2611   .function = test_lookup_command_fn,
2612 };
2613 /* *INDENT-ON* */
2614
2615 int
2616 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2617 {
2618   u32 fib_index;
2619
2620   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2621
2622   if (~0 == fib_index)
2623     return VNET_API_ERROR_NO_SUCH_FIB;
2624
2625   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2626                                   flow_hash_config);
2627
2628   return 0;
2629 }
2630
2631 static clib_error_t *
2632 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2633                              unformat_input_t * input,
2634                              vlib_cli_command_t * cmd)
2635 {
2636   int matched = 0;
2637   u32 table_id = 0;
2638   u32 flow_hash_config = 0;
2639   int rv;
2640
2641   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2642     {
2643       if (unformat (input, "table %d", &table_id))
2644         matched = 1;
2645 #define _(a,v) \
2646     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2647       foreach_flow_hash_bit
2648 #undef _
2649         else
2650         break;
2651     }
2652
2653   if (matched == 0)
2654     return clib_error_return (0, "unknown input `%U'",
2655                               format_unformat_error, input);
2656
2657   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2658   switch (rv)
2659     {
2660     case 0:
2661       break;
2662
2663     case VNET_API_ERROR_NO_SUCH_FIB:
2664       return clib_error_return (0, "no such FIB table %d", table_id);
2665
2666     default:
2667       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2668       break;
2669     }
2670
2671   return 0;
2672 }
2673
2674 /*?
2675  * Configure the set of IPv4 fields used by the flow hash.
2676  *
2677  * @cliexpar
2678  * Example of how to set the flow hash on a given table:
2679  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2680  * Example of display the configured flow hash:
2681  * @cliexstart{show ip fib}
2682  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2683  * 0.0.0.0/0
2684  *   unicast-ip4-chain
2685  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2686  *     [0] [@0]: dpo-drop ip6
2687  * 0.0.0.0/32
2688  *   unicast-ip4-chain
2689  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2690  *     [0] [@0]: dpo-drop ip6
2691  * 224.0.0.0/8
2692  *   unicast-ip4-chain
2693  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2694  *     [0] [@0]: dpo-drop ip6
2695  * 6.0.1.2/32
2696  *   unicast-ip4-chain
2697  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2698  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2699  * 7.0.0.1/32
2700  *   unicast-ip4-chain
2701  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2702  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2703  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2704  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2705  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2706  * 240.0.0.0/8
2707  *   unicast-ip4-chain
2708  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2709  *     [0] [@0]: dpo-drop ip6
2710  * 255.255.255.255/32
2711  *   unicast-ip4-chain
2712  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2713  *     [0] [@0]: dpo-drop ip6
2714  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2715  * 0.0.0.0/0
2716  *   unicast-ip4-chain
2717  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2718  *     [0] [@0]: dpo-drop ip6
2719  * 0.0.0.0/32
2720  *   unicast-ip4-chain
2721  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2722  *     [0] [@0]: dpo-drop ip6
2723  * 172.16.1.0/24
2724  *   unicast-ip4-chain
2725  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2726  *     [0] [@4]: ipv4-glean: af_packet0
2727  * 172.16.1.1/32
2728  *   unicast-ip4-chain
2729  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2730  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2731  * 172.16.1.2/32
2732  *   unicast-ip4-chain
2733  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2734  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2735  * 172.16.2.0/24
2736  *   unicast-ip4-chain
2737  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2738  *     [0] [@4]: ipv4-glean: af_packet1
2739  * 172.16.2.1/32
2740  *   unicast-ip4-chain
2741  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2742  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2743  * 224.0.0.0/8
2744  *   unicast-ip4-chain
2745  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2746  *     [0] [@0]: dpo-drop ip6
2747  * 240.0.0.0/8
2748  *   unicast-ip4-chain
2749  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2750  *     [0] [@0]: dpo-drop ip6
2751  * 255.255.255.255/32
2752  *   unicast-ip4-chain
2753  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2754  *     [0] [@0]: dpo-drop ip6
2755  * @cliexend
2756 ?*/
2757 /* *INDENT-OFF* */
2758 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2759 {
2760   .path = "set ip flow-hash",
2761   .short_help =
2762   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2763   .function = set_ip_flow_hash_command_fn,
2764 };
2765 /* *INDENT-ON* */
2766
2767 int
2768 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2769                              u32 table_index)
2770 {
2771   vnet_main_t *vnm = vnet_get_main ();
2772   vnet_interface_main_t *im = &vnm->interface_main;
2773   ip4_main_t *ipm = &ip4_main;
2774   ip_lookup_main_t *lm = &ipm->lookup_main;
2775   vnet_classify_main_t *cm = &vnet_classify_main;
2776   ip4_address_t *if_addr;
2777
2778   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2779     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2780
2781   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2782     return VNET_API_ERROR_NO_SUCH_ENTRY;
2783
2784   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2785   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2786
2787   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2788
2789   if (NULL != if_addr)
2790     {
2791       fib_prefix_t pfx = {
2792         .fp_len = 32,
2793         .fp_proto = FIB_PROTOCOL_IP4,
2794         .fp_addr.ip4 = *if_addr,
2795       };
2796       u32 fib_index;
2797
2798       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2799                                                        sw_if_index);
2800
2801
2802       if (table_index != (u32) ~ 0)
2803         {
2804           dpo_id_t dpo = DPO_INVALID;
2805
2806           dpo_set (&dpo,
2807                    DPO_CLASSIFY,
2808                    DPO_PROTO_IP4,
2809                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2810
2811           fib_table_entry_special_dpo_add (fib_index,
2812                                            &pfx,
2813                                            FIB_SOURCE_CLASSIFY,
2814                                            FIB_ENTRY_FLAG_NONE, &dpo);
2815           dpo_reset (&dpo);
2816         }
2817       else
2818         {
2819           fib_table_entry_special_remove (fib_index,
2820                                           &pfx, FIB_SOURCE_CLASSIFY);
2821         }
2822     }
2823
2824   return 0;
2825 }
2826
2827 static clib_error_t *
2828 set_ip_classify_command_fn (vlib_main_t * vm,
2829                             unformat_input_t * input,
2830                             vlib_cli_command_t * cmd)
2831 {
2832   u32 table_index = ~0;
2833   int table_index_set = 0;
2834   u32 sw_if_index = ~0;
2835   int rv;
2836
2837   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2838     {
2839       if (unformat (input, "table-index %d", &table_index))
2840         table_index_set = 1;
2841       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2842                          vnet_get_main (), &sw_if_index))
2843         ;
2844       else
2845         break;
2846     }
2847
2848   if (table_index_set == 0)
2849     return clib_error_return (0, "classify table-index must be specified");
2850
2851   if (sw_if_index == ~0)
2852     return clib_error_return (0, "interface / subif must be specified");
2853
2854   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
2855
2856   switch (rv)
2857     {
2858     case 0:
2859       break;
2860
2861     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
2862       return clib_error_return (0, "No such interface");
2863
2864     case VNET_API_ERROR_NO_SUCH_ENTRY:
2865       return clib_error_return (0, "No such classifier table");
2866     }
2867   return 0;
2868 }
2869
2870 /*?
2871  * Assign a classification table to an interface. The classification
2872  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
2873  * commands. Once the table is create, use this command to filter packets
2874  * on an interface.
2875  *
2876  * @cliexpar
2877  * Example of how to assign a classification table to an interface:
2878  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
2879 ?*/
2880 /* *INDENT-OFF* */
2881 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
2882 {
2883     .path = "set ip classify",
2884     .short_help =
2885     "set ip classify intfc <interface> table-index <classify-idx>",
2886     .function = set_ip_classify_command_fn,
2887 };
2888 /* *INDENT-ON* */
2889
2890 static clib_error_t *
2891 ip4_config (vlib_main_t * vm, unformat_input_t * input)
2892 {
2893   ip4_main_t *im = &ip4_main;
2894   uword heapsize = 0;
2895
2896   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2897     {
2898       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
2899         ;
2900       else
2901         return clib_error_return (0,
2902                                   "invalid heap-size parameter `%U'",
2903                                   format_unformat_error, input);
2904     }
2905
2906   im->mtrie_heap_size = heapsize;
2907
2908   return 0;
2909 }
2910
2911 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
2912
2913 /*
2914  * fd.io coding-style-patch-verification: ON
2915  *
2916  * Local Variables:
2917  * eval: (c-set-style "gnu")
2918  * End:
2919  */