Thread-safe ARP / ND throttling
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58
59 /** @brief IPv4 lookup node.
60     @node ip4-lookup
61
62     This is the main IPv4 lookup dispatch node.
63
64     @param vm vlib_main_t corresponding to the current thread
65     @param node vlib_node_runtime_t
66     @param frame vlib_frame_t whose contents should be dispatched
67
68     @par Graph mechanics: buffer metadata, next index usage
69
70     @em Uses:
71     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
72         - Indicates the @c sw_if_index value of the interface that the
73           packet was received on.
74     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
75         - When the value is @c ~0 then the node performs a longest prefix
76           match (LPM) for the packet destination address in the FIB attached
77           to the receive interface.
78         - Otherwise perform LPM for the packet destination address in the
79           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
80           value (0, 1, ...) and not a VRF id.
81
82     @em Sets:
83     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
84         - The lookup result adjacency index.
85
86     <em>Next Index:</em>
87     - Dispatches the packet to the node index found in
88       ip_adjacency_t @c adj->lookup_next_index
89       (where @c adj is the lookup result adjacency).
90 */
91 static uword
92 ip4_lookup (vlib_main_t * vm,
93             vlib_node_runtime_t * node, vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame,
96                             /* lookup_for_responses_to_locally_received_packets */
97                             0);
98
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .function = ip4_lookup,
107   .name = "ip4-lookup",
108   .vector_size = sizeof (u32),
109   .format_trace = format_ip4_lookup_trace,
110   .n_next_nodes = IP_LOOKUP_N_NEXT,
111   .next_nodes = IP4_LOOKUP_NEXT_NODES,
112 };
113 /* *INDENT-ON* */
114
115 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
116
117 static uword
118 ip4_load_balance (vlib_main_t * vm,
119                   vlib_node_runtime_t * node, vlib_frame_t * frame)
120 {
121   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
122   u32 n_left_from, n_left_to_next, *from, *to_next;
123   ip_lookup_next_t next;
124   u32 thread_index = vm->thread_index;
125
126   from = vlib_frame_vector_args (frame);
127   n_left_from = frame->n_vectors;
128   next = node->cached_next_index;
129
130   if (node->flags & VLIB_NODE_FLAG_TRACE)
131     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
132
133   while (n_left_from > 0)
134     {
135       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
136
137
138       while (n_left_from >= 4 && n_left_to_next >= 2)
139         {
140           ip_lookup_next_t next0, next1;
141           const load_balance_t *lb0, *lb1;
142           vlib_buffer_t *p0, *p1;
143           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
144           const ip4_header_t *ip0, *ip1;
145           const dpo_id_t *dpo0, *dpo1;
146
147           /* Prefetch next iteration. */
148           {
149             vlib_buffer_t *p2, *p3;
150
151             p2 = vlib_get_buffer (vm, from[2]);
152             p3 = vlib_get_buffer (vm, from[3]);
153
154             vlib_prefetch_buffer_header (p2, STORE);
155             vlib_prefetch_buffer_header (p3, STORE);
156
157             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
158             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
159           }
160
161           pi0 = to_next[0] = from[0];
162           pi1 = to_next[1] = from[1];
163
164           from += 2;
165           n_left_from -= 2;
166           to_next += 2;
167           n_left_to_next -= 2;
168
169           p0 = vlib_get_buffer (vm, pi0);
170           p1 = vlib_get_buffer (vm, pi1);
171
172           ip0 = vlib_buffer_get_current (p0);
173           ip1 = vlib_buffer_get_current (p1);
174           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
175           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
176
177           lb0 = load_balance_get (lbi0);
178           lb1 = load_balance_get (lbi1);
179
180           /*
181            * this node is for via FIBs we can re-use the hash value from the
182            * to node if present.
183            * We don't want to use the same hash value at each level in the recursion
184            * graph as that would lead to polarisation
185            */
186           hc0 = hc1 = 0;
187
188           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
189             {
190               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
191                 {
192                   hc0 = vnet_buffer (p0)->ip.flow_hash =
193                     vnet_buffer (p0)->ip.flow_hash >> 1;
194                 }
195               else
196                 {
197                   hc0 = vnet_buffer (p0)->ip.flow_hash =
198                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
199                 }
200               dpo0 = load_balance_get_fwd_bucket
201                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
202             }
203           else
204             {
205               dpo0 = load_balance_get_bucket_i (lb0, 0);
206             }
207           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
208             {
209               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
210                 {
211                   hc1 = vnet_buffer (p1)->ip.flow_hash =
212                     vnet_buffer (p1)->ip.flow_hash >> 1;
213                 }
214               else
215                 {
216                   hc1 = vnet_buffer (p1)->ip.flow_hash =
217                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
218                 }
219               dpo1 = load_balance_get_fwd_bucket
220                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
221             }
222           else
223             {
224               dpo1 = load_balance_get_bucket_i (lb1, 0);
225             }
226
227           next0 = dpo0->dpoi_next_node;
228           next1 = dpo1->dpoi_next_node;
229
230           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
231           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
232
233           vlib_increment_combined_counter
234             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
235           vlib_increment_combined_counter
236             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
237
238           vlib_validate_buffer_enqueue_x2 (vm, node, next,
239                                            to_next, n_left_to_next,
240                                            pi0, pi1, next0, next1);
241         }
242
243       while (n_left_from > 0 && n_left_to_next > 0)
244         {
245           ip_lookup_next_t next0;
246           const load_balance_t *lb0;
247           vlib_buffer_t *p0;
248           u32 pi0, lbi0, hc0;
249           const ip4_header_t *ip0;
250           const dpo_id_t *dpo0;
251
252           pi0 = from[0];
253           to_next[0] = pi0;
254           from += 1;
255           to_next += 1;
256           n_left_to_next -= 1;
257           n_left_from -= 1;
258
259           p0 = vlib_get_buffer (vm, pi0);
260
261           ip0 = vlib_buffer_get_current (p0);
262           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
263
264           lb0 = load_balance_get (lbi0);
265
266           hc0 = 0;
267           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
268             {
269               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
270                 {
271                   hc0 = vnet_buffer (p0)->ip.flow_hash =
272                     vnet_buffer (p0)->ip.flow_hash >> 1;
273                 }
274               else
275                 {
276                   hc0 = vnet_buffer (p0)->ip.flow_hash =
277                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
278                 }
279               dpo0 = load_balance_get_fwd_bucket
280                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
281             }
282           else
283             {
284               dpo0 = load_balance_get_bucket_i (lb0, 0);
285             }
286
287           next0 = dpo0->dpoi_next_node;
288           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
289
290           vlib_increment_combined_counter
291             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
292
293           vlib_validate_buffer_enqueue_x1 (vm, node, next,
294                                            to_next, n_left_to_next,
295                                            pi0, next0);
296         }
297
298       vlib_put_next_frame (vm, node, next, n_left_to_next);
299     }
300
301   return frame->n_vectors;
302 }
303
304 /* *INDENT-OFF* */
305 VLIB_REGISTER_NODE (ip4_load_balance_node) =
306 {
307   .function = ip4_load_balance,
308   .name = "ip4-load-balance",
309   .vector_size = sizeof (u32),
310   .sibling_of = "ip4-lookup",
311   .format_trace =
312   format_ip4_lookup_trace,
313 };
314 /* *INDENT-ON* */
315
316 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
317
318 /* get first interface address */
319 ip4_address_t *
320 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
321                              ip_interface_address_t ** result_ia)
322 {
323   ip_lookup_main_t *lm = &im->lookup_main;
324   ip_interface_address_t *ia = 0;
325   ip4_address_t *result = 0;
326
327   /* *INDENT-OFF* */
328   foreach_ip_interface_address
329     (lm, ia, sw_if_index,
330      1 /* honor unnumbered */ ,
331      ({
332        ip4_address_t * a =
333          ip_interface_address_get_address (lm, ia);
334        result = a;
335        break;
336      }));
337   /* *INDENT-OFF* */
338   if (result_ia)
339     *result_ia = result ? ia : 0;
340   return result;
341 }
342
343 static void
344 ip4_add_subnet_bcast_route (u32 fib_index,
345                             fib_prefix_t *pfx,
346                             u32 sw_if_index)
347 {
348   vnet_sw_interface_flags_t iflags;
349
350   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
351
352   fib_table_entry_special_remove(fib_index,
353                                  pfx,
354                                  FIB_SOURCE_INTERFACE);
355
356   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
357     {
358       fib_table_entry_update_one_path (fib_index, pfx,
359                                        FIB_SOURCE_INTERFACE,
360                                        FIB_ENTRY_FLAG_NONE,
361                                        DPO_PROTO_IP4,
362                                        /* No next-hop address */
363                                        &ADJ_BCAST_ADDR,
364                                        sw_if_index,
365                                        // invalid FIB index
366                                        ~0,
367                                        1,
368                                        // no out-label stack
369                                        NULL,
370                                        FIB_ROUTE_PATH_FLAG_NONE);
371     }
372   else
373     {
374         fib_table_entry_special_add(fib_index,
375                                     pfx,
376                                     FIB_SOURCE_INTERFACE,
377                                     (FIB_ENTRY_FLAG_DROP |
378                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
379     }
380 }
381
382 static void
383 ip4_add_interface_routes (u32 sw_if_index,
384                           ip4_main_t * im, u32 fib_index,
385                           ip_interface_address_t * a)
386 {
387   ip_lookup_main_t *lm = &im->lookup_main;
388   ip4_address_t *address = ip_interface_address_get_address (lm, a);
389   fib_prefix_t pfx = {
390     .fp_len = a->address_length,
391     .fp_proto = FIB_PROTOCOL_IP4,
392     .fp_addr.ip4 = *address,
393   };
394
395   if (pfx.fp_len <= 30)
396     {
397       /* a /30 or shorter - add a glean for the network address */
398       fib_table_entry_update_one_path (fib_index, &pfx,
399                                        FIB_SOURCE_INTERFACE,
400                                        (FIB_ENTRY_FLAG_CONNECTED |
401                                         FIB_ENTRY_FLAG_ATTACHED),
402                                        DPO_PROTO_IP4,
403                                        /* No next-hop address */
404                                        NULL,
405                                        sw_if_index,
406                                        // invalid FIB index
407                                        ~0,
408                                        1,
409                                        // no out-label stack
410                                        NULL,
411                                        FIB_ROUTE_PATH_FLAG_NONE);
412
413       /* Add the two broadcast addresses as drop */
414       fib_prefix_t net_pfx = {
415         .fp_len = 32,
416         .fp_proto = FIB_PROTOCOL_IP4,
417         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
418       };
419       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
420         fib_table_entry_special_add(fib_index,
421                                     &net_pfx,
422                                     FIB_SOURCE_INTERFACE,
423                                     (FIB_ENTRY_FLAG_DROP |
424                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
425       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
426       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
427         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
428     }
429   else if (pfx.fp_len == 31)
430     {
431       u32 mask = clib_host_to_net_u32(1);
432       fib_prefix_t net_pfx = pfx;
433
434       net_pfx.fp_len = 32;
435       net_pfx.fp_addr.ip4.as_u32 ^= mask;
436
437       /* a /31 - add the other end as an attached host */
438       fib_table_entry_update_one_path (fib_index, &net_pfx,
439                                        FIB_SOURCE_INTERFACE,
440                                        (FIB_ENTRY_FLAG_ATTACHED),
441                                        DPO_PROTO_IP4,
442                                        &net_pfx.fp_addr,
443                                        sw_if_index,
444                                        // invalid FIB index
445                                        ~0,
446                                        1,
447                                        NULL,
448                                        FIB_ROUTE_PATH_FLAG_NONE);
449     }
450   pfx.fp_len = 32;
451
452   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
453     {
454       u32 classify_table_index =
455         lm->classify_table_index_by_sw_if_index[sw_if_index];
456       if (classify_table_index != (u32) ~ 0)
457         {
458           dpo_id_t dpo = DPO_INVALID;
459
460           dpo_set (&dpo,
461                    DPO_CLASSIFY,
462                    DPO_PROTO_IP4,
463                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
464
465           fib_table_entry_special_dpo_add (fib_index,
466                                            &pfx,
467                                            FIB_SOURCE_CLASSIFY,
468                                            FIB_ENTRY_FLAG_NONE, &dpo);
469           dpo_reset (&dpo);
470         }
471     }
472
473   fib_table_entry_update_one_path (fib_index, &pfx,
474                                    FIB_SOURCE_INTERFACE,
475                                    (FIB_ENTRY_FLAG_CONNECTED |
476                                     FIB_ENTRY_FLAG_LOCAL),
477                                    DPO_PROTO_IP4,
478                                    &pfx.fp_addr,
479                                    sw_if_index,
480                                    // invalid FIB index
481                                    ~0,
482                                    1, NULL,
483                                    FIB_ROUTE_PATH_FLAG_NONE);
484 }
485
486 static void
487 ip4_del_interface_routes (ip4_main_t * im,
488                           u32 fib_index,
489                           ip4_address_t * address, u32 address_length)
490 {
491   fib_prefix_t pfx = {
492     .fp_len = address_length,
493     .fp_proto = FIB_PROTOCOL_IP4,
494     .fp_addr.ip4 = *address,
495   };
496
497   if (pfx.fp_len <= 30)
498     {
499       fib_prefix_t net_pfx = {
500         .fp_len = 32,
501         .fp_proto = FIB_PROTOCOL_IP4,
502         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
503       };
504       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
505         fib_table_entry_special_remove(fib_index,
506                                        &net_pfx,
507                                        FIB_SOURCE_INTERFACE);
508       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
509       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
510         fib_table_entry_special_remove(fib_index,
511                                        &net_pfx,
512                                        FIB_SOURCE_INTERFACE);
513       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
514     }
515     else if (pfx.fp_len == 31)
516     {
517       u32 mask = clib_host_to_net_u32(1);
518       fib_prefix_t net_pfx = pfx;
519
520       net_pfx.fp_len = 32;
521       net_pfx.fp_addr.ip4.as_u32 ^= mask;
522
523       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
524     }
525
526   pfx.fp_len = 32;
527   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
528 }
529
530 void
531 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
532 {
533   ip4_main_t *im = &ip4_main;
534
535   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
536
537   /*
538    * enable/disable only on the 1<->0 transition
539    */
540   if (is_enable)
541     {
542       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
543         return;
544     }
545   else
546     {
547       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
548       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
549         return;
550     }
551   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
552                                !is_enable, 0, 0);
553
554
555   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
556                                sw_if_index, !is_enable, 0, 0);
557 }
558
559 static clib_error_t *
560 ip4_add_del_interface_address_internal (vlib_main_t * vm,
561                                         u32 sw_if_index,
562                                         ip4_address_t * address,
563                                         u32 address_length, u32 is_del)
564 {
565   vnet_main_t *vnm = vnet_get_main ();
566   ip4_main_t *im = &ip4_main;
567   ip_lookup_main_t *lm = &im->lookup_main;
568   clib_error_t *error = 0;
569   u32 if_address_index, elts_before;
570   ip4_address_fib_t ip4_af, *addr_fib = 0;
571
572   /* local0 interface doesn't support IP addressing  */
573   if (sw_if_index == 0)
574     {
575       return
576        clib_error_create ("local0 interface doesn't support IP addressing");
577     }
578
579   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
580   ip4_addr_fib_init (&ip4_af, address,
581                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
582   vec_add1 (addr_fib, ip4_af);
583
584   /*
585    * there is no support for adj-fib handling in the presence of overlapping
586    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
587    * most routers do.
588    */
589   /* *INDENT-OFF* */
590   if (!is_del)
591     {
592       /* When adding an address check that it does not conflict
593          with an existing address on any interface in this table. */
594       ip_interface_address_t *ia;
595       vnet_sw_interface_t *sif;
596
597       pool_foreach(sif, vnm->interface_main.sw_interfaces,
598       ({
599           if (im->fib_index_by_sw_if_index[sw_if_index] ==
600               im->fib_index_by_sw_if_index[sif->sw_if_index])
601             {
602               foreach_ip_interface_address
603                 (&im->lookup_main, ia, sif->sw_if_index,
604                  0 /* honor unnumbered */ ,
605                  ({
606                    ip4_address_t * x =
607                      ip_interface_address_get_address
608                      (&im->lookup_main, ia);
609                    if (ip4_destination_matches_route
610                        (im, address, x, ia->address_length) ||
611                        ip4_destination_matches_route (im,
612                                                       x,
613                                                       address,
614                                                       address_length))
615                      {
616                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
617
618                        return
619                          clib_error_create
620                          ("failed to add %U which conflicts with %U for interface %U",
621                           format_ip4_address_and_length, address,
622                           address_length,
623                           format_ip4_address_and_length, x,
624                           ia->address_length,
625                           format_vnet_sw_if_index_name, vnm,
626                           sif->sw_if_index);
627                      }
628                  }));
629             }
630       }));
631     }
632   /* *INDENT-ON* */
633
634   elts_before = pool_elts (lm->if_address_pool);
635
636   error = ip_interface_address_add_del
637     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
638   if (error)
639     goto done;
640
641   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
642
643   if (is_del)
644     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
645   else
646     ip4_add_interface_routes (sw_if_index,
647                               im, ip4_af.fib_index,
648                               pool_elt_at_index
649                               (lm->if_address_pool, if_address_index));
650
651   /* If pool did not grow/shrink: add duplicate address. */
652   if (elts_before != pool_elts (lm->if_address_pool))
653     {
654       ip4_add_del_interface_address_callback_t *cb;
655       vec_foreach (cb, im->add_del_interface_address_callbacks)
656         cb->function (im, cb->function_opaque, sw_if_index,
657                       address, address_length, if_address_index, is_del);
658     }
659
660 done:
661   vec_free (addr_fib);
662   return error;
663 }
664
665 clib_error_t *
666 ip4_add_del_interface_address (vlib_main_t * vm,
667                                u32 sw_if_index,
668                                ip4_address_t * address,
669                                u32 address_length, u32 is_del)
670 {
671   return ip4_add_del_interface_address_internal
672     (vm, sw_if_index, address, address_length, is_del);
673 }
674
675 void
676 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
677 {
678   ip_interface_address_t *ia;
679   ip4_main_t *im;
680
681   im = &ip4_main;
682
683   /*
684    * when directed broadcast is enabled, the subnet braodcast route will forward
685    * packets using an adjacency with a broadcast MAC. otherwise it drops
686    */
687   /* *INDENT-OFF* */
688   foreach_ip_interface_address(&im->lookup_main, ia,
689                                sw_if_index, 0,
690      ({
691        if (ia->address_length <= 30)
692          {
693            ip4_address_t *ipa;
694
695            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
696
697            fib_prefix_t pfx = {
698              .fp_len = 32,
699              .fp_proto = FIB_PROTOCOL_IP4,
700              .fp_addr = {
701                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
702              },
703            };
704
705            ip4_add_subnet_bcast_route
706              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
707                                                   sw_if_index),
708               &pfx, sw_if_index);
709          }
710      }));
711   /* *INDENT-ON* */
712 }
713
714 /* Built-in ip4 unicast rx feature path definition */
715 /* *INDENT-OFF* */
716 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
717 {
718   .arc_name = "ip4-unicast",
719   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
720   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
721 };
722
723 VNET_FEATURE_INIT (ip4_flow_classify, static) =
724 {
725   .arc_name = "ip4-unicast",
726   .node_name = "ip4-flow-classify",
727   .runs_before = VNET_FEATURES ("ip4-inacl"),
728 };
729
730 VNET_FEATURE_INIT (ip4_inacl, static) =
731 {
732   .arc_name = "ip4-unicast",
733   .node_name = "ip4-inacl",
734   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
735 };
736
737 VNET_FEATURE_INIT (ip4_source_check_1, static) =
738 {
739   .arc_name = "ip4-unicast",
740   .node_name = "ip4-source-check-via-rx",
741   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
742 };
743
744 VNET_FEATURE_INIT (ip4_source_check_2, static) =
745 {
746   .arc_name = "ip4-unicast",
747   .node_name = "ip4-source-check-via-any",
748   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
749 };
750
751 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
752 {
753   .arc_name = "ip4-unicast",
754   .node_name = "ip4-source-and-port-range-check-rx",
755   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
756 };
757
758 VNET_FEATURE_INIT (ip4_policer_classify, static) =
759 {
760   .arc_name = "ip4-unicast",
761   .node_name = "ip4-policer-classify",
762   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
763 };
764
765 VNET_FEATURE_INIT (ip4_ipsec, static) =
766 {
767   .arc_name = "ip4-unicast",
768   .node_name = "ipsec-input-ip4",
769   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
770 };
771
772 VNET_FEATURE_INIT (ip4_vpath, static) =
773 {
774   .arc_name = "ip4-unicast",
775   .node_name = "vpath-input-ip4",
776   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
777 };
778
779 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
780 {
781   .arc_name = "ip4-unicast",
782   .node_name = "ip4-vxlan-bypass",
783   .runs_before = VNET_FEATURES ("ip4-lookup"),
784 };
785
786 VNET_FEATURE_INIT (ip4_not_enabled, static) =
787 {
788   .arc_name = "ip4-unicast",
789   .node_name = "ip4-not-enabled",
790   .runs_before = VNET_FEATURES ("ip4-lookup"),
791 };
792
793 VNET_FEATURE_INIT (ip4_lookup, static) =
794 {
795   .arc_name = "ip4-unicast",
796   .node_name = "ip4-lookup",
797   .runs_before = 0,     /* not before any other features */
798 };
799
800 /* Built-in ip4 multicast rx feature path definition */
801 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
802 {
803   .arc_name = "ip4-multicast",
804   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
805   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
806 };
807
808 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
809 {
810   .arc_name = "ip4-multicast",
811   .node_name = "vpath-input-ip4",
812   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
813 };
814
815 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
816 {
817   .arc_name = "ip4-multicast",
818   .node_name = "ip4-not-enabled",
819   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
820 };
821
822 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
823 {
824   .arc_name = "ip4-multicast",
825   .node_name = "ip4-mfib-forward-lookup",
826   .runs_before = 0,     /* last feature */
827 };
828
829 /* Source and port-range check ip4 tx feature path definition */
830 VNET_FEATURE_ARC_INIT (ip4_output, static) =
831 {
832   .arc_name = "ip4-output",
833   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
834   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
835 };
836
837 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
838 {
839   .arc_name = "ip4-output",
840   .node_name = "ip4-source-and-port-range-check-tx",
841   .runs_before = VNET_FEATURES ("ip4-outacl"),
842 };
843
844 VNET_FEATURE_INIT (ip4_outacl, static) =
845 {
846   .arc_name = "ip4-output",
847   .node_name = "ip4-outacl",
848   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
849 };
850
851 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
852 {
853   .arc_name = "ip4-output",
854   .node_name = "ipsec-output-ip4",
855   .runs_before = VNET_FEATURES ("interface-output"),
856 };
857
858 /* Built-in ip4 tx feature path definition */
859 VNET_FEATURE_INIT (ip4_interface_output, static) =
860 {
861   .arc_name = "ip4-output",
862   .node_name = "interface-output",
863   .runs_before = 0,     /* not before any other features */
864 };
865 /* *INDENT-ON* */
866
867 static clib_error_t *
868 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
869 {
870   ip4_main_t *im = &ip4_main;
871
872   /* Fill in lookup tables with default table (0). */
873   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
874   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
875
876   if (!is_add)
877     {
878       ip4_main_t *im4 = &ip4_main;
879       ip_lookup_main_t *lm4 = &im4->lookup_main;
880       ip_interface_address_t *ia = 0;
881       ip4_address_t *address;
882       vlib_main_t *vm = vlib_get_main ();
883
884       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
885       /* *INDENT-OFF* */
886       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
887       ({
888         address = ip_interface_address_get_address (lm4, ia);
889         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
890       }));
891       /* *INDENT-ON* */
892     }
893
894   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
895                                is_add, 0, 0);
896
897   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
898                                sw_if_index, is_add, 0, 0);
899
900   return /* no error */ 0;
901 }
902
903 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
904
905 /* Global IP4 main. */
906 ip4_main_t ip4_main;
907
908 clib_error_t *
909 ip4_lookup_init (vlib_main_t * vm)
910 {
911   ip4_main_t *im = &ip4_main;
912   clib_error_t *error;
913   uword i;
914
915   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
916     return error;
917   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
918     return (error);
919   if ((error = vlib_call_init_function (vm, fib_module_init)))
920     return error;
921   if ((error = vlib_call_init_function (vm, mfib_module_init)))
922     return error;
923
924   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
925     {
926       u32 m;
927
928       if (i < 32)
929         m = pow2_mask (i) << (32 - i);
930       else
931         m = ~0;
932       im->fib_masks[i] = clib_host_to_net_u32 (m);
933     }
934
935   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
936
937   /* Create FIB with index 0 and table id of 0. */
938   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
939                                      FIB_SOURCE_DEFAULT_ROUTE);
940   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
941                                       MFIB_SOURCE_DEFAULT_ROUTE);
942
943   {
944     pg_node_t *pn;
945     pn = pg_get_node (ip4_lookup_node.index);
946     pn->unformat_edit = unformat_pg_ip4_header;
947   }
948
949   {
950     ethernet_arp_header_t h;
951
952     memset (&h, 0, sizeof (h));
953
954     /* Set target ethernet address to all zeros. */
955     memset (h.ip4_over_ethernet[1].ethernet, 0,
956             sizeof (h.ip4_over_ethernet[1].ethernet));
957
958 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
959 #define _8(f,v) h.f = v;
960     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
961     _16 (l3_type, ETHERNET_TYPE_IP4);
962     _8 (n_l2_address_bytes, 6);
963     _8 (n_l3_address_bytes, 4);
964     _16 (opcode, ETHERNET_ARP_OPCODE_request);
965 #undef _16
966 #undef _8
967
968     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
969                                /* data */ &h,
970                                sizeof (h),
971                                /* alloc chunk size */ 8,
972                                "ip4 arp");
973   }
974
975   return error;
976 }
977
978 VLIB_INIT_FUNCTION (ip4_lookup_init);
979
980 typedef struct
981 {
982   /* Adjacency taken. */
983   u32 dpo_index;
984   u32 flow_hash;
985   u32 fib_index;
986
987   /* Packet data, possibly *after* rewrite. */
988   u8 packet_data[64 - 1 * sizeof (u32)];
989 }
990 ip4_forward_next_trace_t;
991
992 u8 *
993 format_ip4_forward_next_trace (u8 * s, va_list * args)
994 {
995   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
996   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
997   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
998   u32 indent = format_get_indent (s);
999   s = format (s, "%U%U",
1000               format_white_space, indent,
1001               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1002   return s;
1003 }
1004
1005 static u8 *
1006 format_ip4_lookup_trace (u8 * s, va_list * args)
1007 {
1008   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1009   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1010   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1011   u32 indent = format_get_indent (s);
1012
1013   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1014               t->fib_index, t->dpo_index, t->flow_hash);
1015   s = format (s, "\n%U%U",
1016               format_white_space, indent,
1017               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1018   return s;
1019 }
1020
1021 static u8 *
1022 format_ip4_rewrite_trace (u8 * s, va_list * args)
1023 {
1024   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1025   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1026   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1027   u32 indent = format_get_indent (s);
1028
1029   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1030               t->fib_index, t->dpo_index, format_ip_adjacency,
1031               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1032   s = format (s, "\n%U%U",
1033               format_white_space, indent,
1034               format_ip_adjacency_packet_data,
1035               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1036   return s;
1037 }
1038
1039 /* Common trace function for all ip4-forward next nodes. */
1040 void
1041 ip4_forward_next_trace (vlib_main_t * vm,
1042                         vlib_node_runtime_t * node,
1043                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1044 {
1045   u32 *from, n_left;
1046   ip4_main_t *im = &ip4_main;
1047
1048   n_left = frame->n_vectors;
1049   from = vlib_frame_vector_args (frame);
1050
1051   while (n_left >= 4)
1052     {
1053       u32 bi0, bi1;
1054       vlib_buffer_t *b0, *b1;
1055       ip4_forward_next_trace_t *t0, *t1;
1056
1057       /* Prefetch next iteration. */
1058       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1059       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1060
1061       bi0 = from[0];
1062       bi1 = from[1];
1063
1064       b0 = vlib_get_buffer (vm, bi0);
1065       b1 = vlib_get_buffer (vm, bi1);
1066
1067       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1068         {
1069           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1070           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1071           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1072           t0->fib_index =
1073             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1074              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1075             vec_elt (im->fib_index_by_sw_if_index,
1076                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1077
1078           clib_memcpy (t0->packet_data,
1079                        vlib_buffer_get_current (b0),
1080                        sizeof (t0->packet_data));
1081         }
1082       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1083         {
1084           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1085           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1086           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1087           t1->fib_index =
1088             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1089              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1090             vec_elt (im->fib_index_by_sw_if_index,
1091                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1092           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1093                        sizeof (t1->packet_data));
1094         }
1095       from += 2;
1096       n_left -= 2;
1097     }
1098
1099   while (n_left >= 1)
1100     {
1101       u32 bi0;
1102       vlib_buffer_t *b0;
1103       ip4_forward_next_trace_t *t0;
1104
1105       bi0 = from[0];
1106
1107       b0 = vlib_get_buffer (vm, bi0);
1108
1109       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1110         {
1111           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1112           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1113           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1114           t0->fib_index =
1115             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1116              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1117             vec_elt (im->fib_index_by_sw_if_index,
1118                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1119           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1120                        sizeof (t0->packet_data));
1121         }
1122       from += 1;
1123       n_left -= 1;
1124     }
1125 }
1126
1127 /* Compute TCP/UDP/ICMP4 checksum in software. */
1128 u16
1129 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1130                               ip4_header_t * ip0)
1131 {
1132   ip_csum_t sum0;
1133   u32 ip_header_length, payload_length_host_byte_order;
1134   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1135   u16 sum16;
1136   void *data_this_buffer;
1137
1138   /* Initialize checksum with ip header. */
1139   ip_header_length = ip4_header_bytes (ip0);
1140   payload_length_host_byte_order =
1141     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1142   sum0 =
1143     clib_host_to_net_u32 (payload_length_host_byte_order +
1144                           (ip0->protocol << 16));
1145
1146   if (BITS (uword) == 32)
1147     {
1148       sum0 =
1149         ip_csum_with_carry (sum0,
1150                             clib_mem_unaligned (&ip0->src_address, u32));
1151       sum0 =
1152         ip_csum_with_carry (sum0,
1153                             clib_mem_unaligned (&ip0->dst_address, u32));
1154     }
1155   else
1156     sum0 =
1157       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1158
1159   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1160   data_this_buffer = (void *) ip0 + ip_header_length;
1161   n_ip_bytes_this_buffer =
1162     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1163   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1164     {
1165       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1166         n_ip_bytes_this_buffer - ip_header_length : 0;
1167     }
1168   while (1)
1169     {
1170       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1171       n_bytes_left -= n_this_buffer;
1172       if (n_bytes_left == 0)
1173         break;
1174
1175       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1176       p0 = vlib_get_buffer (vm, p0->next_buffer);
1177       data_this_buffer = vlib_buffer_get_current (p0);
1178       n_this_buffer = p0->current_length;
1179     }
1180
1181   sum16 = ~ip_csum_fold (sum0);
1182
1183   return sum16;
1184 }
1185
1186 u32
1187 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1188 {
1189   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1190   udp_header_t *udp0;
1191   u16 sum16;
1192
1193   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1194           || ip0->protocol == IP_PROTOCOL_UDP);
1195
1196   udp0 = (void *) (ip0 + 1);
1197   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1198     {
1199       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1200                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1201       return p0->flags;
1202     }
1203
1204   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1205
1206   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1207                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1208
1209   return p0->flags;
1210 }
1211
1212 /* *INDENT-OFF* */
1213 VNET_FEATURE_ARC_INIT (ip4_local) =
1214 {
1215   .arc_name  = "ip4-local",
1216   .start_nodes = VNET_FEATURES ("ip4-local"),
1217 };
1218 /* *INDENT-ON* */
1219
1220 static inline void
1221 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1222                             ip4_header_t * ip, u8 is_udp, u8 * error,
1223                             u8 * good_tcp_udp)
1224 {
1225   u32 flags0;
1226   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1227   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1228   if (is_udp)
1229     {
1230       udp_header_t *udp;
1231       u32 ip_len, udp_len;
1232       i32 len_diff;
1233       udp = ip4_next_header (ip);
1234       /* Verify UDP length. */
1235       ip_len = clib_net_to_host_u16 (ip->length);
1236       udp_len = clib_net_to_host_u16 (udp->length);
1237
1238       len_diff = ip_len - udp_len;
1239       *good_tcp_udp &= len_diff >= 0;
1240       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1241     }
1242 }
1243
1244 #define ip4_local_csum_is_offloaded(_b)                                 \
1245     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1246         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1247
1248 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1249     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1250         || ip4_local_csum_is_offloaded (_b)))
1251
1252 #define ip4_local_csum_is_valid(_b)                                     \
1253     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1254         || (ip4_local_csum_is_offloaded (_b))) != 0
1255
1256 static inline void
1257 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1258                          ip4_header_t * ih, u8 * error)
1259 {
1260   u8 is_udp, is_tcp_udp, good_tcp_udp;
1261
1262   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1263   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1264
1265   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1266     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1267   else
1268     good_tcp_udp = ip4_local_csum_is_valid (b);
1269
1270   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1271   *error = (is_tcp_udp && !good_tcp_udp
1272             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1273 }
1274
1275 static inline void
1276 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1277                             ip4_header_t ** ih, u8 * error)
1278 {
1279   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1280
1281   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1282   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1283
1284   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1285   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1286
1287   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1288   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1289
1290   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1291                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1292     {
1293       if (is_tcp_udp[0])
1294         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1295                                     &good_tcp_udp[0]);
1296       if (is_tcp_udp[1])
1297         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1298                                     &good_tcp_udp[1]);
1299     }
1300
1301   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1302               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1303   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1304               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1305 }
1306
1307 static inline void
1308 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1309                               vlib_buffer_t * b, u16 * next, u8 error,
1310                               u8 head_of_feature_arc)
1311 {
1312   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1313   u32 next_index;
1314
1315   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1316   b->error = error ? error_node->errors[error] : 0;
1317   if (head_of_feature_arc)
1318     {
1319       next_index = *next;
1320       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1321         {
1322           vnet_feature_arc_start (arc_index,
1323                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1324                                   &next_index, b);
1325           *next = next_index;
1326         }
1327     }
1328 }
1329
1330 typedef struct
1331 {
1332   ip4_address_t src;
1333   u32 lbi;
1334   u8 error;
1335 } ip4_local_last_check_t;
1336
1337 static inline void
1338 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1339                      ip4_local_last_check_t * last_check, u8 * error0)
1340 {
1341   ip4_fib_mtrie_leaf_t leaf0;
1342   ip4_fib_mtrie_t *mtrie0;
1343   const dpo_id_t *dpo0;
1344   load_balance_t *lb0;
1345   u32 lbi0;
1346
1347   vnet_buffer (b)->ip.fib_index =
1348     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1349     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1350
1351   if (PREDICT_FALSE (last_check->src.as_u32 != ip0->src_address.as_u32))
1352     {
1353       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1354       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1355       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1356       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1357       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1358
1359       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1360       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1361
1362       lb0 = load_balance_get (lbi0);
1363       dpo0 = load_balance_get_bucket_i (lb0, 0);
1364
1365       /*
1366        * Must have a route to source otherwise we drop the packet.
1367        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1368        *
1369        * The checks are:
1370        *  - the source is a recieve => it's from us => bogus, do this
1371        *    first since it sets a different error code.
1372        *  - uRPF check for any route to source - accept if passes.
1373        *  - allow packets destined to the broadcast address from unknown sources
1374        */
1375
1376       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1377                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1378                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1379       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1380                   && !fib_urpf_check_size (lb0->lb_urpf)
1381                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1382                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1383
1384       last_check->src.as_u32 = ip0->src_address.as_u32;
1385       last_check->lbi = lbi0;
1386       last_check->error = *error0;
1387     }
1388   else
1389     {
1390       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1391       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1392       *error0 = last_check->error;
1393     }
1394 }
1395
1396 static inline void
1397 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1398                         ip4_local_last_check_t * last_check, u8 * error)
1399 {
1400   ip4_fib_mtrie_leaf_t leaf[2];
1401   ip4_fib_mtrie_t *mtrie[2];
1402   const dpo_id_t *dpo[2];
1403   load_balance_t *lb[2];
1404   u32 not_last_hit = 0;
1405   u32 lbi[2];
1406
1407   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1408   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1409
1410   vnet_buffer (b[0])->ip.fib_index =
1411     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1412     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1413     vnet_buffer (b[0])->ip.fib_index;
1414
1415   vnet_buffer (b[1])->ip.fib_index =
1416     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1417     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1418     vnet_buffer (b[1])->ip.fib_index;
1419
1420   if (PREDICT_FALSE (not_last_hit))
1421     {
1422       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1423       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1424
1425       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1426       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1427
1428       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1429                                            &ip[0]->src_address, 2);
1430       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1431                                            &ip[1]->src_address, 2);
1432
1433       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1434                                            &ip[0]->src_address, 3);
1435       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1436                                            &ip[1]->src_address, 3);
1437
1438       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1439       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1440
1441       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1442       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1443
1444       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1445       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1446
1447       lb[0] = load_balance_get (lbi[0]);
1448       lb[1] = load_balance_get (lbi[1]);
1449
1450       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1451       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1452
1453       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1454                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1455                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1456       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1457                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1458                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1459                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1460
1461       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1462                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1463                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1464       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1465                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1466                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1467                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1468
1469       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1470       last_check->lbi = lbi[1];
1471       last_check->error = error[1];
1472     }
1473   else
1474     {
1475       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1476       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1477
1478       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1479       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1480
1481       error[0] = last_check->error;
1482       error[1] = last_check->error;
1483     }
1484 }
1485
1486 static inline uword
1487 ip4_local_inline (vlib_main_t * vm,
1488                   vlib_node_runtime_t * node,
1489                   vlib_frame_t * frame, int head_of_feature_arc)
1490 {
1491   ip4_main_t *im = &ip4_main;
1492   ip_lookup_main_t *lm = &im->lookup_main;
1493   u32 *from, n_left_from;
1494   vlib_node_runtime_t *error_node =
1495     vlib_node_get_runtime (vm, ip4_input_node.index);
1496   u16 nexts[VLIB_FRAME_SIZE], *next;
1497   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1498   ip4_header_t *ip[2];
1499   u8 error[2];
1500
1501   ip4_local_last_check_t last_check = {
1502     .src = {.as_u32 = 0},
1503     .lbi = ~0,
1504     .error = IP4_ERROR_UNKNOWN_PROTOCOL
1505   };
1506
1507   from = vlib_frame_vector_args (frame);
1508   n_left_from = frame->n_vectors;
1509
1510   if (node->flags & VLIB_NODE_FLAG_TRACE)
1511     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1512
1513   vlib_get_buffers (vm, from, bufs, n_left_from);
1514   b = bufs;
1515   next = nexts;
1516
1517   while (n_left_from >= 6)
1518     {
1519       u32 is_nat, not_batch = 0;
1520
1521       /* Prefetch next iteration. */
1522       {
1523         vlib_prefetch_buffer_header (b[4], LOAD);
1524         vlib_prefetch_buffer_header (b[5], LOAD);
1525
1526         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1527         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1528       }
1529
1530       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1531
1532       ip[0] = vlib_buffer_get_current (b[0]);
1533       ip[1] = vlib_buffer_get_current (b[1]);
1534
1535       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1536       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1537
1538       is_nat = b[0]->flags & VNET_BUFFER_F_IS_NATED;
1539       not_batch |= is_nat ^ (b[1]->flags & VNET_BUFFER_F_IS_NATED);
1540
1541       if (head_of_feature_arc == 0 || (is_nat && not_batch == 0))
1542         goto skip_checks;
1543
1544       if (PREDICT_TRUE (not_batch == 0))
1545         {
1546           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1547           ip4_local_check_src_x2 (b, ip, &last_check, error);
1548         }
1549       else
1550         {
1551           if (!(b[0]->flags & VNET_BUFFER_F_IS_NATED))
1552             {
1553               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1554               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1555             }
1556           if (!(b[1]->flags & VNET_BUFFER_F_IS_NATED))
1557             {
1558               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1559               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1560             }
1561         }
1562
1563     skip_checks:
1564
1565       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1566       next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1567       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1568                                     head_of_feature_arc);
1569       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1570                                     head_of_feature_arc);
1571
1572       b += 2;
1573       next += 2;
1574       n_left_from -= 2;
1575     }
1576
1577   while (n_left_from > 0)
1578     {
1579       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1580
1581       ip[0] = vlib_buffer_get_current (b[0]);
1582       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1583
1584       if (head_of_feature_arc == 0 || (b[0]->flags & VNET_BUFFER_F_IS_NATED))
1585         goto skip_check;
1586
1587       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1588       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1589
1590     skip_check:
1591
1592       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1593       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1594                                     head_of_feature_arc);
1595
1596       b += 1;
1597       next += 1;
1598       n_left_from -= 1;
1599     }
1600
1601   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1602   return frame->n_vectors;
1603 }
1604
1605 static uword
1606 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1607 {
1608   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1609 }
1610
1611 /* *INDENT-OFF* */
1612 VLIB_REGISTER_NODE (ip4_local_node) =
1613 {
1614   .function = ip4_local,
1615   .name = "ip4-local",
1616   .vector_size = sizeof (u32),
1617   .format_trace = format_ip4_forward_next_trace,
1618   .n_next_nodes = IP_LOCAL_N_NEXT,
1619   .next_nodes =
1620   {
1621     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1622     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1623     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1624     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1625   },
1626 };
1627 /* *INDENT-ON* */
1628
1629 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1630
1631 static uword
1632 ip4_local_end_of_arc (vlib_main_t * vm,
1633                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1634 {
1635   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1636 }
1637
1638 /* *INDENT-OFF* */
1639 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1640   .function = ip4_local_end_of_arc,
1641   .name = "ip4-local-end-of-arc",
1642   .vector_size = sizeof (u32),
1643
1644   .format_trace = format_ip4_forward_next_trace,
1645   .sibling_of = "ip4-local",
1646 };
1647
1648 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1649
1650 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1651   .arc_name = "ip4-local",
1652   .node_name = "ip4-local-end-of-arc",
1653   .runs_before = 0, /* not before any other features */
1654 };
1655 /* *INDENT-ON* */
1656
1657 void
1658 ip4_register_protocol (u32 protocol, u32 node_index)
1659 {
1660   vlib_main_t *vm = vlib_get_main ();
1661   ip4_main_t *im = &ip4_main;
1662   ip_lookup_main_t *lm = &im->lookup_main;
1663
1664   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1665   lm->local_next_by_ip_protocol[protocol] =
1666     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1667 }
1668
1669 static clib_error_t *
1670 show_ip_local_command_fn (vlib_main_t * vm,
1671                           unformat_input_t * input, vlib_cli_command_t * cmd)
1672 {
1673   ip4_main_t *im = &ip4_main;
1674   ip_lookup_main_t *lm = &im->lookup_main;
1675   int i;
1676
1677   vlib_cli_output (vm, "Protocols handled by ip4_local");
1678   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1679     {
1680       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1681         {
1682           u32 node_index = vlib_get_node (vm,
1683                                           ip4_local_node.index)->
1684             next_nodes[lm->local_next_by_ip_protocol[i]];
1685           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1686                            node_index);
1687         }
1688     }
1689   return 0;
1690 }
1691
1692
1693
1694 /*?
1695  * Display the set of protocols handled by the local IPv4 stack.
1696  *
1697  * @cliexpar
1698  * Example of how to display local protocol table:
1699  * @cliexstart{show ip local}
1700  * Protocols handled by ip4_local
1701  * 1
1702  * 17
1703  * 47
1704  * @cliexend
1705 ?*/
1706 /* *INDENT-OFF* */
1707 VLIB_CLI_COMMAND (show_ip_local, static) =
1708 {
1709   .path = "show ip local",
1710   .function = show_ip_local_command_fn,
1711   .short_help = "show ip local",
1712 };
1713 /* *INDENT-ON* */
1714
1715 always_inline uword
1716 ip4_arp_inline (vlib_main_t * vm,
1717                 vlib_node_runtime_t * node,
1718                 vlib_frame_t * frame, int is_glean)
1719 {
1720   vnet_main_t *vnm = vnet_get_main ();
1721   ip4_main_t *im = &ip4_main;
1722   ip_lookup_main_t *lm = &im->lookup_main;
1723   u32 *from, *to_next_drop;
1724   uword n_left_from, n_left_to_next_drop, next_index;
1725   u32 thread_index = vm->thread_index;
1726   u32 seed;
1727   f64 time_now;
1728
1729   if (node->flags & VLIB_NODE_FLAG_TRACE)
1730     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1731
1732   time_now = vlib_time_now (vm);
1733   if (time_now - im->arp_throttle_last_seed_change_time[thread_index] > 1e-3)
1734     {
1735       (void) random_u32 (&im->arp_throttle_seeds[thread_index]);
1736       memset (im->arp_throttle_bitmaps[thread_index], 0,
1737               ARP_THROTTLE_BITS / BITS (u8));
1738
1739       im->arp_throttle_last_seed_change_time[thread_index] = time_now;
1740     }
1741   seed = im->arp_throttle_seeds[thread_index];
1742
1743   from = vlib_frame_vector_args (frame);
1744   n_left_from = frame->n_vectors;
1745   next_index = node->cached_next_index;
1746   if (next_index == IP4_ARP_NEXT_DROP)
1747     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1748
1749   while (n_left_from > 0)
1750     {
1751       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1752                            to_next_drop, n_left_to_next_drop);
1753
1754       while (n_left_from > 0 && n_left_to_next_drop > 0)
1755         {
1756           u32 pi0, adj_index0, r0, w0, sw_if_index0, drop0;
1757           uword m0;
1758           ip_adjacency_t *adj0;
1759           vlib_buffer_t *p0;
1760           ip4_header_t *ip0;
1761
1762           pi0 = from[0];
1763
1764           p0 = vlib_get_buffer (vm, pi0);
1765
1766           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1767           adj0 = adj_get (adj_index0);
1768           ip0 = vlib_buffer_get_current (p0);
1769
1770           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1771           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1772
1773           if (PREDICT_TRUE (is_glean))
1774             {
1775               /*
1776                * this is the Glean case, so we are ARPing for the
1777                * packet's destination
1778                */
1779               r0 = ip0->dst_address.data_u32;
1780             }
1781           else
1782             {
1783               r0 = adj0->sub_type.nbr.next_hop.ip4.data_u32;
1784             }
1785
1786           r0 ^= seed;
1787           /* Select bit number */
1788           r0 &= ARP_THROTTLE_BITS - 1;
1789           w0 = r0 / BITS (uword);
1790           m0 = (uword) 1 << (r0 % BITS (uword));
1791
1792           drop0 = (im->arp_throttle_bitmaps[thread_index][w0] & m0) != 0;
1793           im->arp_throttle_bitmaps[thread_index][w0] |= m0;
1794
1795           from += 1;
1796           n_left_from -= 1;
1797           to_next_drop[0] = pi0;
1798           to_next_drop += 1;
1799           n_left_to_next_drop -= 1;
1800
1801           p0->error =
1802             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
1803                          IP4_ARP_ERROR_REQUEST_SENT];
1804
1805           /*
1806            * the adj has been updated to a rewrite but the node the DPO that got
1807            * us here hasn't - yet. no big deal. we'll drop while we wait.
1808            */
1809           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1810             continue;
1811
1812           if (drop0)
1813             continue;
1814
1815           /*
1816            * Can happen if the control-plane is programming tables
1817            * with traffic flowing; at least that's today's lame excuse.
1818            */
1819           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1820               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1821             {
1822               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1823             }
1824           else
1825             /* Send ARP request. */
1826             {
1827               u32 bi0 = 0;
1828               vlib_buffer_t *b0;
1829               ethernet_arp_header_t *h0;
1830               vnet_hw_interface_t *hw_if0;
1831
1832               h0 =
1833                 vlib_packet_template_get_packet (vm,
1834                                                  &im->ip4_arp_request_packet_template,
1835                                                  &bi0);
1836
1837               /* Seems we're out of buffers */
1838               if (PREDICT_FALSE (!h0))
1839                 continue;
1840
1841               /* Add rewrite/encap string for ARP packet. */
1842               vnet_rewrite_one_header (adj0[0], h0,
1843                                        sizeof (ethernet_header_t));
1844
1845               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1846
1847               /* Src ethernet address in ARP header. */
1848               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
1849                            hw_if0->hw_address,
1850                            sizeof (h0->ip4_over_ethernet[0].ethernet));
1851
1852               if (is_glean)
1853                 {
1854                   /* The interface's source address is stashed in the Glean Adj */
1855                   h0->ip4_over_ethernet[0].ip4 =
1856                     adj0->sub_type.glean.receive_addr.ip4;
1857
1858                   /* Copy in destination address we are requesting. This is the
1859                    * glean case, so it's the packet's destination.*/
1860                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1861                     ip0->dst_address.data_u32;
1862                 }
1863               else
1864                 {
1865                   /* Src IP address in ARP header. */
1866                   if (ip4_src_address_for_packet (lm, sw_if_index0,
1867                                                   &h0->
1868                                                   ip4_over_ethernet[0].ip4))
1869                     {
1870                       /* No source address available */
1871                       p0->error =
1872                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1873                       vlib_buffer_free (vm, &bi0, 1);
1874                       continue;
1875                     }
1876
1877                   /* Copy in destination address we are requesting from the
1878                      incomplete adj */
1879                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1880                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1881                 }
1882
1883               vlib_buffer_copy_trace_flag (vm, p0, bi0);
1884               b0 = vlib_get_buffer (vm, bi0);
1885               VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1886               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1887
1888               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1889
1890               vlib_set_next_frame_buffer (vm, node,
1891                                           adj0->rewrite_header.next_index,
1892                                           bi0);
1893             }
1894         }
1895
1896       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1897     }
1898
1899   return frame->n_vectors;
1900 }
1901
1902 static uword
1903 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1904 {
1905   return (ip4_arp_inline (vm, node, frame, 0));
1906 }
1907
1908 static uword
1909 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1910 {
1911   return (ip4_arp_inline (vm, node, frame, 1));
1912 }
1913
1914 static char *ip4_arp_error_strings[] = {
1915   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1916   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1917   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1918   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1919   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1920   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1921 };
1922
1923 /* *INDENT-OFF* */
1924 VLIB_REGISTER_NODE (ip4_arp_node) =
1925 {
1926   .function = ip4_arp,
1927   .name = "ip4-arp",
1928   .vector_size = sizeof (u32),
1929   .format_trace = format_ip4_forward_next_trace,
1930   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1931   .error_strings = ip4_arp_error_strings,
1932   .n_next_nodes = IP4_ARP_N_NEXT,
1933   .next_nodes =
1934   {
1935     [IP4_ARP_NEXT_DROP] = "error-drop",
1936   },
1937 };
1938
1939 VLIB_REGISTER_NODE (ip4_glean_node) =
1940 {
1941   .function = ip4_glean,
1942   .name = "ip4-glean",
1943   .vector_size = sizeof (u32),
1944   .format_trace = format_ip4_forward_next_trace,
1945   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1946   .error_strings = ip4_arp_error_strings,
1947   .n_next_nodes = IP4_ARP_N_NEXT,
1948   .next_nodes = {
1949   [IP4_ARP_NEXT_DROP] = "error-drop",
1950   },
1951 };
1952 /* *INDENT-ON* */
1953
1954 #define foreach_notrace_ip4_arp_error           \
1955 _(DROP)                                         \
1956 _(REQUEST_SENT)                                 \
1957 _(REPLICATE_DROP)                               \
1958 _(REPLICATE_FAIL)
1959
1960 clib_error_t *
1961 arp_notrace_init (vlib_main_t * vm)
1962 {
1963   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1964
1965   /* don't trace ARP request packets */
1966 #define _(a)                                    \
1967     vnet_pcap_drop_trace_filter_add_del         \
1968         (rt->errors[IP4_ARP_ERROR_##a],         \
1969          1 /* is_add */);
1970   foreach_notrace_ip4_arp_error;
1971 #undef _
1972   return 0;
1973 }
1974
1975 VLIB_INIT_FUNCTION (arp_notrace_init);
1976
1977
1978 /* Send an ARP request to see if given destination is reachable on given interface. */
1979 clib_error_t *
1980 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1981                     u8 refresh)
1982 {
1983   vnet_main_t *vnm = vnet_get_main ();
1984   ip4_main_t *im = &ip4_main;
1985   ethernet_arp_header_t *h;
1986   ip4_address_t *src;
1987   ip_interface_address_t *ia;
1988   ip_adjacency_t *adj;
1989   vnet_hw_interface_t *hi;
1990   vnet_sw_interface_t *si;
1991   vlib_buffer_t *b;
1992   adj_index_t ai;
1993   u32 bi = 0;
1994   u8 unicast_rewrite = 0;
1995
1996   si = vnet_get_sw_interface (vnm, sw_if_index);
1997
1998   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1999     {
2000       return clib_error_return (0, "%U: interface %U down",
2001                                 format_ip4_address, dst,
2002                                 format_vnet_sw_if_index_name, vnm,
2003                                 sw_if_index);
2004     }
2005
2006   src =
2007     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2008   if (!src)
2009     {
2010       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2011       return clib_error_return
2012         (0,
2013          "no matching interface address for destination %U (interface %U)",
2014          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2015          sw_if_index);
2016     }
2017
2018   h = vlib_packet_template_get_packet (vm,
2019                                        &im->ip4_arp_request_packet_template,
2020                                        &bi);
2021
2022   if (!h)
2023     return clib_error_return (0, "ARP request packet allocation failed");
2024
2025   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2026   if (PREDICT_FALSE (!hi->hw_address))
2027     {
2028       return clib_error_return (0, "%U: interface %U do not support ip probe",
2029                                 format_ip4_address, dst,
2030                                 format_vnet_sw_if_index_name, vnm,
2031                                 sw_if_index);
2032     }
2033
2034   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2035                sizeof (h->ip4_over_ethernet[0].ethernet));
2036
2037   h->ip4_over_ethernet[0].ip4 = src[0];
2038   h->ip4_over_ethernet[1].ip4 = dst[0];
2039
2040   b = vlib_get_buffer (vm, bi);
2041   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2042     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2043
2044   ip46_address_t nh = {
2045     .ip4 = *dst,
2046   };
2047
2048   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2049                             VNET_LINK_IP4, &nh, sw_if_index);
2050   adj = adj_get (ai);
2051
2052   /* Peer has been previously resolved, retrieve glean adj instead */
2053   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2054     {
2055       if (refresh)
2056         unicast_rewrite = 1;
2057       else
2058         {
2059           adj_unlock (ai);
2060           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2061                                       VNET_LINK_IP4, sw_if_index, &nh);
2062           adj = adj_get (ai);
2063         }
2064     }
2065
2066   /* Add encapsulation string for software interface (e.g. ethernet header). */
2067   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2068   if (unicast_rewrite)
2069     {
2070       u16 *etype = vlib_buffer_get_current (b) - 2;
2071       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2072     }
2073   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2074
2075   {
2076     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2077     u32 *to_next = vlib_frame_vector_args (f);
2078     to_next[0] = bi;
2079     f->n_vectors = 1;
2080     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2081   }
2082
2083   adj_unlock (ai);
2084   return /* no error */ 0;
2085 }
2086
2087 typedef enum
2088 {
2089   IP4_REWRITE_NEXT_DROP,
2090   IP4_REWRITE_NEXT_ICMP_ERROR,
2091   IP4_REWRITE_NEXT_FRAGMENT,
2092   IP4_REWRITE_N_NEXT            /* Last */
2093 } ip4_rewrite_next_t;
2094
2095 /**
2096  * This bits of an IPv4 address to mask to construct a multicast
2097  * MAC address
2098  */
2099 #if CLIB_ARCH_IS_BIG_ENDIAN
2100 #define IP4_MCAST_ADDR_MASK 0x007fffff
2101 #else
2102 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2103 #endif
2104
2105 always_inline void
2106 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2107                u16 adj_packet_bytes, bool df, u32 * next, u32 * error)
2108 {
2109   if (packet_len > adj_packet_bytes)
2110     {
2111       *error = IP4_ERROR_MTU_EXCEEDED;
2112       if (df)
2113         {
2114           icmp4_error_set_vnet_buffer
2115             (b, ICMP4_destination_unreachable,
2116              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2117              adj_packet_bytes);
2118           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2119         }
2120       else
2121         {
2122           /* IP fragmentation */
2123           ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes,
2124                                    IP4_FRAG_NEXT_IP4_LOOKUP, 0);
2125           *next = IP4_REWRITE_NEXT_FRAGMENT;
2126         }
2127     }
2128 }
2129
2130 always_inline uword
2131 ip4_rewrite_inline (vlib_main_t * vm,
2132                     vlib_node_runtime_t * node,
2133                     vlib_frame_t * frame,
2134                     int do_counters, int is_midchain, int is_mcast)
2135 {
2136   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2137   u32 *from = vlib_frame_vector_args (frame);
2138   u32 n_left_from, n_left_to_next, *to_next, next_index;
2139   vlib_node_runtime_t *error_node =
2140     vlib_node_get_runtime (vm, ip4_input_node.index);
2141
2142   n_left_from = frame->n_vectors;
2143   next_index = node->cached_next_index;
2144   u32 thread_index = vm->thread_index;
2145
2146   while (n_left_from > 0)
2147     {
2148       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2149
2150       while (n_left_from >= 4 && n_left_to_next >= 2)
2151         {
2152           ip_adjacency_t *adj0, *adj1;
2153           vlib_buffer_t *p0, *p1;
2154           ip4_header_t *ip0, *ip1;
2155           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2156           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2157           u32 tx_sw_if_index0, tx_sw_if_index1;
2158
2159           /* Prefetch next iteration. */
2160           {
2161             vlib_buffer_t *p2, *p3;
2162
2163             p2 = vlib_get_buffer (vm, from[2]);
2164             p3 = vlib_get_buffer (vm, from[3]);
2165
2166             vlib_prefetch_buffer_header (p2, STORE);
2167             vlib_prefetch_buffer_header (p3, STORE);
2168
2169             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2170             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2171           }
2172
2173           pi0 = to_next[0] = from[0];
2174           pi1 = to_next[1] = from[1];
2175
2176           from += 2;
2177           n_left_from -= 2;
2178           to_next += 2;
2179           n_left_to_next -= 2;
2180
2181           p0 = vlib_get_buffer (vm, pi0);
2182           p1 = vlib_get_buffer (vm, pi1);
2183
2184           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2185           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2186
2187           /*
2188            * pre-fetch the per-adjacency counters
2189            */
2190           if (do_counters)
2191             {
2192               vlib_prefetch_combined_counter (&adjacency_counters,
2193                                               thread_index, adj_index0);
2194               vlib_prefetch_combined_counter (&adjacency_counters,
2195                                               thread_index, adj_index1);
2196             }
2197
2198           ip0 = vlib_buffer_get_current (p0);
2199           ip1 = vlib_buffer_get_current (p1);
2200
2201           error0 = error1 = IP4_ERROR_NONE;
2202           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2203
2204           /* Decrement TTL & update checksum.
2205              Works either endian, so no need for byte swap. */
2206           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2207             {
2208               i32 ttl0 = ip0->ttl;
2209
2210               /* Input node should have reject packets with ttl 0. */
2211               ASSERT (ip0->ttl > 0);
2212
2213               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2214               checksum0 += checksum0 >= 0xffff;
2215
2216               ip0->checksum = checksum0;
2217               ttl0 -= 1;
2218               ip0->ttl = ttl0;
2219
2220               /*
2221                * If the ttl drops below 1 when forwarding, generate
2222                * an ICMP response.
2223                */
2224               if (PREDICT_FALSE (ttl0 <= 0))
2225                 {
2226                   error0 = IP4_ERROR_TIME_EXPIRED;
2227                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2228                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2229                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2230                                                0);
2231                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2232                 }
2233
2234               /* Verify checksum. */
2235               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2236                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2237             }
2238           else
2239             {
2240               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2241             }
2242           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2243             {
2244               i32 ttl1 = ip1->ttl;
2245
2246               /* Input node should have reject packets with ttl 0. */
2247               ASSERT (ip1->ttl > 0);
2248
2249               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2250               checksum1 += checksum1 >= 0xffff;
2251
2252               ip1->checksum = checksum1;
2253               ttl1 -= 1;
2254               ip1->ttl = ttl1;
2255
2256               /*
2257                * If the ttl drops below 1 when forwarding, generate
2258                * an ICMP response.
2259                */
2260               if (PREDICT_FALSE (ttl1 <= 0))
2261                 {
2262                   error1 = IP4_ERROR_TIME_EXPIRED;
2263                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2264                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2265                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2266                                                0);
2267                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2268                 }
2269
2270               /* Verify checksum. */
2271               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2272                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2273             }
2274           else
2275             {
2276               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2277             }
2278
2279           /* Rewrite packet header and updates lengths. */
2280           adj0 = adj_get (adj_index0);
2281           adj1 = adj_get (adj_index1);
2282
2283           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2284           rw_len0 = adj0[0].rewrite_header.data_bytes;
2285           rw_len1 = adj1[0].rewrite_header.data_bytes;
2286           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2287           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2288
2289           /* Check MTU of outgoing interface. */
2290           ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2291                          adj0[0].rewrite_header.max_l3_packet_bytes,
2292                          ip0->flags_and_fragment_offset &
2293                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2294                          &next0, &error0);
2295           ip4_mtu_check (p1, clib_net_to_host_u16 (ip1->length),
2296                          adj1[0].rewrite_header.max_l3_packet_bytes,
2297                          ip1->flags_and_fragment_offset &
2298                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2299                          &next1, &error1);
2300
2301           if (is_mcast)
2302             {
2303               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2304                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2305                         IP4_ERROR_SAME_INTERFACE : error0);
2306               error1 = ((adj1[0].rewrite_header.sw_if_index ==
2307                          vnet_buffer (p1)->sw_if_index[VLIB_RX]) ?
2308                         IP4_ERROR_SAME_INTERFACE : error1);
2309             }
2310
2311           p0->error = error_node->errors[error0];
2312           p1->error = error_node->errors[error1];
2313           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2314            * to see the IP headerr */
2315           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2316             {
2317               next0 = adj0[0].rewrite_header.next_index;
2318               p0->current_data -= rw_len0;
2319               p0->current_length += rw_len0;
2320               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2321               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2322
2323               if (PREDICT_FALSE
2324                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2325                 vnet_feature_arc_start (lm->output_feature_arc_index,
2326                                         tx_sw_if_index0, &next0, p0);
2327             }
2328           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2329             {
2330               next1 = adj1[0].rewrite_header.next_index;
2331               p1->current_data -= rw_len1;
2332               p1->current_length += rw_len1;
2333
2334               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2335               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2336
2337               if (PREDICT_FALSE
2338                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2339                 vnet_feature_arc_start (lm->output_feature_arc_index,
2340                                         tx_sw_if_index1, &next1, p1);
2341             }
2342
2343           /* Guess we are only writing on simple Ethernet header. */
2344           vnet_rewrite_two_headers (adj0[0], adj1[0],
2345                                     ip0, ip1, sizeof (ethernet_header_t));
2346
2347           /*
2348            * Bump the per-adjacency counters
2349            */
2350           if (do_counters)
2351             {
2352               vlib_increment_combined_counter
2353                 (&adjacency_counters,
2354                  thread_index,
2355                  adj_index0, 1,
2356                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2357
2358               vlib_increment_combined_counter
2359                 (&adjacency_counters,
2360                  thread_index,
2361                  adj_index1, 1,
2362                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2363             }
2364
2365           if (is_midchain)
2366             {
2367               adj0->sub_type.midchain.fixup_func
2368                 (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2369               adj1->sub_type.midchain.fixup_func
2370                 (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
2371             }
2372           if (is_mcast)
2373             {
2374               /*
2375                * copy bytes from the IP address into the MAC rewrite
2376                */
2377               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2378                                           adj0->
2379                                           rewrite_header.dst_mcast_offset,
2380                                           &ip0->dst_address.as_u32,
2381                                           (u8 *) ip0);
2382               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2383                                           adj0->
2384                                           rewrite_header.dst_mcast_offset,
2385                                           &ip1->dst_address.as_u32,
2386                                           (u8 *) ip1);
2387             }
2388
2389           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2390                                            to_next, n_left_to_next,
2391                                            pi0, pi1, next0, next1);
2392         }
2393
2394       while (n_left_from > 0 && n_left_to_next > 0)
2395         {
2396           ip_adjacency_t *adj0;
2397           vlib_buffer_t *p0;
2398           ip4_header_t *ip0;
2399           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2400           u32 tx_sw_if_index0;
2401
2402           pi0 = to_next[0] = from[0];
2403
2404           p0 = vlib_get_buffer (vm, pi0);
2405
2406           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2407
2408           adj0 = adj_get (adj_index0);
2409
2410           ip0 = vlib_buffer_get_current (p0);
2411
2412           error0 = IP4_ERROR_NONE;
2413           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2414
2415           /* Decrement TTL & update checksum. */
2416           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2417             {
2418               i32 ttl0 = ip0->ttl;
2419
2420               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2421
2422               checksum0 += checksum0 >= 0xffff;
2423
2424               ip0->checksum = checksum0;
2425
2426               ASSERT (ip0->ttl > 0);
2427
2428               ttl0 -= 1;
2429
2430               ip0->ttl = ttl0;
2431
2432               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2433                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2434
2435               if (PREDICT_FALSE (ttl0 <= 0))
2436                 {
2437                   /*
2438                    * If the ttl drops below 1 when forwarding, generate
2439                    * an ICMP response.
2440                    */
2441                   error0 = IP4_ERROR_TIME_EXPIRED;
2442                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2443                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2444                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2445                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2446                                                0);
2447                 }
2448             }
2449           else
2450             {
2451               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2452             }
2453
2454           if (do_counters)
2455             vlib_prefetch_combined_counter (&adjacency_counters,
2456                                             thread_index, adj_index0);
2457
2458           /* Guess we are only writing on simple Ethernet header. */
2459           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2460           if (is_mcast)
2461             {
2462               /*
2463                * copy bytes from the IP address into the MAC rewrite
2464                */
2465               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2466                                           adj0->
2467                                           rewrite_header.dst_mcast_offset,
2468                                           &ip0->dst_address.as_u32,
2469                                           (u8 *) ip0);
2470             }
2471
2472           /* Update packet buffer attributes/set output interface. */
2473           rw_len0 = adj0[0].rewrite_header.data_bytes;
2474           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2475
2476           if (do_counters)
2477             vlib_increment_combined_counter
2478               (&adjacency_counters,
2479                thread_index, adj_index0, 1,
2480                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2481
2482           /* Check MTU of outgoing interface. */
2483           ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2484                          adj0[0].rewrite_header.max_l3_packet_bytes,
2485                          ip0->flags_and_fragment_offset &
2486                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2487                          &next0, &error0);
2488
2489           if (is_mcast)
2490             {
2491               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2492                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2493                         IP4_ERROR_SAME_INTERFACE : error0);
2494             }
2495           p0->error = error_node->errors[error0];
2496
2497           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2498            * to see the IP headerr */
2499           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2500             {
2501               p0->current_data -= rw_len0;
2502               p0->current_length += rw_len0;
2503               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2504
2505               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2506               next0 = adj0[0].rewrite_header.next_index;
2507
2508               if (is_midchain)
2509                 {
2510                   adj0->sub_type.midchain.fixup_func
2511                     (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2512                 }
2513
2514               if (PREDICT_FALSE
2515                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2516                 vnet_feature_arc_start (lm->output_feature_arc_index,
2517                                         tx_sw_if_index0, &next0, p0);
2518
2519             }
2520
2521           from += 1;
2522           n_left_from -= 1;
2523           to_next += 1;
2524           n_left_to_next -= 1;
2525
2526           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2527                                            to_next, n_left_to_next,
2528                                            pi0, next0);
2529         }
2530
2531       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2532     }
2533
2534   /* Need to do trace after rewrites to pick up new packet data. */
2535   if (node->flags & VLIB_NODE_FLAG_TRACE)
2536     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2537
2538   return frame->n_vectors;
2539 }
2540
2541
2542 /** @brief IPv4 rewrite node.
2543     @node ip4-rewrite
2544
2545     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2546     header checksum, fetch the ip adjacency, check the outbound mtu,
2547     apply the adjacency rewrite, and send pkts to the adjacency
2548     rewrite header's rewrite_next_index.
2549
2550     @param vm vlib_main_t corresponding to the current thread
2551     @param node vlib_node_runtime_t
2552     @param frame vlib_frame_t whose contents should be dispatched
2553
2554     @par Graph mechanics: buffer metadata, next index usage
2555
2556     @em Uses:
2557     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2558         - the rewrite adjacency index
2559     - <code>adj->lookup_next_index</code>
2560         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2561           the packet will be dropped.
2562     - <code>adj->rewrite_header</code>
2563         - Rewrite string length, rewrite string, next_index
2564
2565     @em Sets:
2566     - <code>b->current_data, b->current_length</code>
2567         - Updated net of applying the rewrite string
2568
2569     <em>Next Indices:</em>
2570     - <code> adj->rewrite_header.next_index </code>
2571       or @c ip4-drop
2572 */
2573 static uword
2574 ip4_rewrite (vlib_main_t * vm,
2575              vlib_node_runtime_t * node, vlib_frame_t * frame)
2576 {
2577   if (adj_are_counters_enabled ())
2578     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2579   else
2580     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2581 }
2582
2583 static uword
2584 ip4_rewrite_bcast (vlib_main_t * vm,
2585                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2586 {
2587   if (adj_are_counters_enabled ())
2588     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2589   else
2590     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2591 }
2592
2593 static uword
2594 ip4_midchain (vlib_main_t * vm,
2595               vlib_node_runtime_t * node, vlib_frame_t * frame)
2596 {
2597   if (adj_are_counters_enabled ())
2598     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2599   else
2600     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2601 }
2602
2603 static uword
2604 ip4_rewrite_mcast (vlib_main_t * vm,
2605                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2606 {
2607   if (adj_are_counters_enabled ())
2608     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2609   else
2610     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2611 }
2612
2613 static uword
2614 ip4_mcast_midchain (vlib_main_t * vm,
2615                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2616 {
2617   if (adj_are_counters_enabled ())
2618     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2619   else
2620     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2621 }
2622
2623 /* *INDENT-OFF* */
2624 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2625   .function = ip4_rewrite,
2626   .name = "ip4-rewrite",
2627   .vector_size = sizeof (u32),
2628
2629   .format_trace = format_ip4_rewrite_trace,
2630
2631   .n_next_nodes = IP4_REWRITE_N_NEXT,
2632   .next_nodes = {
2633     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2634     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2635     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2636   },
2637 };
2638
2639 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2640   .function = ip4_rewrite_bcast,
2641   .name = "ip4-rewrite-bcast",
2642   .vector_size = sizeof (u32),
2643
2644   .format_trace = format_ip4_rewrite_trace,
2645   .sibling_of = "ip4-rewrite",
2646 };
2647 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_bcast_node, ip4_rewrite_bcast)
2648
2649 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2650   .function = ip4_rewrite_mcast,
2651   .name = "ip4-rewrite-mcast",
2652   .vector_size = sizeof (u32),
2653
2654   .format_trace = format_ip4_rewrite_trace,
2655   .sibling_of = "ip4-rewrite",
2656 };
2657 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2658
2659 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2660   .function = ip4_mcast_midchain,
2661   .name = "ip4-mcast-midchain",
2662   .vector_size = sizeof (u32),
2663
2664   .format_trace = format_ip4_rewrite_trace,
2665   .sibling_of = "ip4-rewrite",
2666 };
2667 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2668
2669 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2670   .function = ip4_midchain,
2671   .name = "ip4-midchain",
2672   .vector_size = sizeof (u32),
2673   .format_trace = format_ip4_forward_next_trace,
2674   .sibling_of =  "ip4-rewrite",
2675 };
2676 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2677 /* *INDENT-ON */
2678
2679 int
2680 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2681 {
2682   ip4_fib_mtrie_t *mtrie0;
2683   ip4_fib_mtrie_leaf_t leaf0;
2684   u32 lbi0;
2685
2686   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2687
2688   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2689   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2690   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2691
2692   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2693
2694   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2695 }
2696
2697 static clib_error_t *
2698 test_lookup_command_fn (vlib_main_t * vm,
2699                         unformat_input_t * input, vlib_cli_command_t * cmd)
2700 {
2701   ip4_fib_t *fib;
2702   u32 table_id = 0;
2703   f64 count = 1;
2704   u32 n;
2705   int i;
2706   ip4_address_t ip4_base_address;
2707   u64 errors = 0;
2708
2709   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2710     {
2711       if (unformat (input, "table %d", &table_id))
2712         {
2713           /* Make sure the entry exists. */
2714           fib = ip4_fib_get (table_id);
2715           if ((fib) && (fib->index != table_id))
2716             return clib_error_return (0, "<fib-index> %d does not exist",
2717                                       table_id);
2718         }
2719       else if (unformat (input, "count %f", &count))
2720         ;
2721
2722       else if (unformat (input, "%U",
2723                          unformat_ip4_address, &ip4_base_address))
2724         ;
2725       else
2726         return clib_error_return (0, "unknown input `%U'",
2727                                   format_unformat_error, input);
2728     }
2729
2730   n = count;
2731
2732   for (i = 0; i < n; i++)
2733     {
2734       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2735         errors++;
2736
2737       ip4_base_address.as_u32 =
2738         clib_host_to_net_u32 (1 +
2739                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2740     }
2741
2742   if (errors)
2743     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2744   else
2745     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2746
2747   return 0;
2748 }
2749
2750 /*?
2751  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2752  * given FIB table to determine if there is a conflict with the
2753  * adjacency table. The fib-id can be determined by using the
2754  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2755  * of 0 is used.
2756  *
2757  * @todo This command uses fib-id, other commands use table-id (not
2758  * just a name, they are different indexes). Would like to change this
2759  * to table-id for consistency.
2760  *
2761  * @cliexpar
2762  * Example of how to run the test lookup command:
2763  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2764  * No errors in 2 lookups
2765  * @cliexend
2766 ?*/
2767 /* *INDENT-OFF* */
2768 VLIB_CLI_COMMAND (lookup_test_command, static) =
2769 {
2770   .path = "test lookup",
2771   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2772   .function = test_lookup_command_fn,
2773 };
2774 /* *INDENT-ON* */
2775
2776 int
2777 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2778 {
2779   u32 fib_index;
2780
2781   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2782
2783   if (~0 == fib_index)
2784     return VNET_API_ERROR_NO_SUCH_FIB;
2785
2786   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2787                                   flow_hash_config);
2788
2789   return 0;
2790 }
2791
2792 static clib_error_t *
2793 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2794                              unformat_input_t * input,
2795                              vlib_cli_command_t * cmd)
2796 {
2797   int matched = 0;
2798   u32 table_id = 0;
2799   u32 flow_hash_config = 0;
2800   int rv;
2801
2802   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2803     {
2804       if (unformat (input, "table %d", &table_id))
2805         matched = 1;
2806 #define _(a,v) \
2807     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2808       foreach_flow_hash_bit
2809 #undef _
2810         else
2811         break;
2812     }
2813
2814   if (matched == 0)
2815     return clib_error_return (0, "unknown input `%U'",
2816                               format_unformat_error, input);
2817
2818   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2819   switch (rv)
2820     {
2821     case 0:
2822       break;
2823
2824     case VNET_API_ERROR_NO_SUCH_FIB:
2825       return clib_error_return (0, "no such FIB table %d", table_id);
2826
2827     default:
2828       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2829       break;
2830     }
2831
2832   return 0;
2833 }
2834
2835 /*?
2836  * Configure the set of IPv4 fields used by the flow hash.
2837  *
2838  * @cliexpar
2839  * Example of how to set the flow hash on a given table:
2840  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2841  * Example of display the configured flow hash:
2842  * @cliexstart{show ip fib}
2843  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2844  * 0.0.0.0/0
2845  *   unicast-ip4-chain
2846  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2847  *     [0] [@0]: dpo-drop ip6
2848  * 0.0.0.0/32
2849  *   unicast-ip4-chain
2850  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2851  *     [0] [@0]: dpo-drop ip6
2852  * 224.0.0.0/8
2853  *   unicast-ip4-chain
2854  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2855  *     [0] [@0]: dpo-drop ip6
2856  * 6.0.1.2/32
2857  *   unicast-ip4-chain
2858  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2859  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2860  * 7.0.0.1/32
2861  *   unicast-ip4-chain
2862  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2863  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2864  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2865  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2866  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2867  * 240.0.0.0/8
2868  *   unicast-ip4-chain
2869  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2870  *     [0] [@0]: dpo-drop ip6
2871  * 255.255.255.255/32
2872  *   unicast-ip4-chain
2873  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2874  *     [0] [@0]: dpo-drop ip6
2875  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2876  * 0.0.0.0/0
2877  *   unicast-ip4-chain
2878  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2879  *     [0] [@0]: dpo-drop ip6
2880  * 0.0.0.0/32
2881  *   unicast-ip4-chain
2882  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2883  *     [0] [@0]: dpo-drop ip6
2884  * 172.16.1.0/24
2885  *   unicast-ip4-chain
2886  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2887  *     [0] [@4]: ipv4-glean: af_packet0
2888  * 172.16.1.1/32
2889  *   unicast-ip4-chain
2890  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2891  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2892  * 172.16.1.2/32
2893  *   unicast-ip4-chain
2894  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2895  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2896  * 172.16.2.0/24
2897  *   unicast-ip4-chain
2898  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2899  *     [0] [@4]: ipv4-glean: af_packet1
2900  * 172.16.2.1/32
2901  *   unicast-ip4-chain
2902  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2903  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2904  * 224.0.0.0/8
2905  *   unicast-ip4-chain
2906  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2907  *     [0] [@0]: dpo-drop ip6
2908  * 240.0.0.0/8
2909  *   unicast-ip4-chain
2910  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2911  *     [0] [@0]: dpo-drop ip6
2912  * 255.255.255.255/32
2913  *   unicast-ip4-chain
2914  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2915  *     [0] [@0]: dpo-drop ip6
2916  * @cliexend
2917 ?*/
2918 /* *INDENT-OFF* */
2919 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2920 {
2921   .path = "set ip flow-hash",
2922   .short_help =
2923   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2924   .function = set_ip_flow_hash_command_fn,
2925 };
2926 /* *INDENT-ON* */
2927
2928 int
2929 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2930                              u32 table_index)
2931 {
2932   vnet_main_t *vnm = vnet_get_main ();
2933   vnet_interface_main_t *im = &vnm->interface_main;
2934   ip4_main_t *ipm = &ip4_main;
2935   ip_lookup_main_t *lm = &ipm->lookup_main;
2936   vnet_classify_main_t *cm = &vnet_classify_main;
2937   ip4_address_t *if_addr;
2938
2939   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2940     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2941
2942   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2943     return VNET_API_ERROR_NO_SUCH_ENTRY;
2944
2945   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2946   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2947
2948   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2949
2950   if (NULL != if_addr)
2951     {
2952       fib_prefix_t pfx = {
2953         .fp_len = 32,
2954         .fp_proto = FIB_PROTOCOL_IP4,
2955         .fp_addr.ip4 = *if_addr,
2956       };
2957       u32 fib_index;
2958
2959       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2960                                                        sw_if_index);
2961
2962
2963       if (table_index != (u32) ~ 0)
2964         {
2965           dpo_id_t dpo = DPO_INVALID;
2966
2967           dpo_set (&dpo,
2968                    DPO_CLASSIFY,
2969                    DPO_PROTO_IP4,
2970                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2971
2972           fib_table_entry_special_dpo_add (fib_index,
2973                                            &pfx,
2974                                            FIB_SOURCE_CLASSIFY,
2975                                            FIB_ENTRY_FLAG_NONE, &dpo);
2976           dpo_reset (&dpo);
2977         }
2978       else
2979         {
2980           fib_table_entry_special_remove (fib_index,
2981                                           &pfx, FIB_SOURCE_CLASSIFY);
2982         }
2983     }
2984
2985   return 0;
2986 }
2987
2988 static clib_error_t *
2989 set_ip_classify_command_fn (vlib_main_t * vm,
2990                             unformat_input_t * input,
2991                             vlib_cli_command_t * cmd)
2992 {
2993   u32 table_index = ~0;
2994   int table_index_set = 0;
2995   u32 sw_if_index = ~0;
2996   int rv;
2997
2998   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2999     {
3000       if (unformat (input, "table-index %d", &table_index))
3001         table_index_set = 1;
3002       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3003                          vnet_get_main (), &sw_if_index))
3004         ;
3005       else
3006         break;
3007     }
3008
3009   if (table_index_set == 0)
3010     return clib_error_return (0, "classify table-index must be specified");
3011
3012   if (sw_if_index == ~0)
3013     return clib_error_return (0, "interface / subif must be specified");
3014
3015   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3016
3017   switch (rv)
3018     {
3019     case 0:
3020       break;
3021
3022     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3023       return clib_error_return (0, "No such interface");
3024
3025     case VNET_API_ERROR_NO_SUCH_ENTRY:
3026       return clib_error_return (0, "No such classifier table");
3027     }
3028   return 0;
3029 }
3030
3031 /*?
3032  * Assign a classification table to an interface. The classification
3033  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3034  * commands. Once the table is create, use this command to filter packets
3035  * on an interface.
3036  *
3037  * @cliexpar
3038  * Example of how to assign a classification table to an interface:
3039  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3040 ?*/
3041 /* *INDENT-OFF* */
3042 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3043 {
3044     .path = "set ip classify",
3045     .short_help =
3046     "set ip classify intfc <interface> table-index <classify-idx>",
3047     .function = set_ip_classify_command_fn,
3048 };
3049 /* *INDENT-ON* */
3050
3051 static clib_error_t *
3052 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3053 {
3054   ip4_main_t *im = &ip4_main;
3055   uword heapsize = 0;
3056
3057   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3058     {
3059       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3060         ;
3061       else
3062         return clib_error_return (0,
3063                                   "invalid heap-size parameter `%U'",
3064                                   format_unformat_error, input);
3065     }
3066
3067   im->mtrie_heap_size = heapsize;
3068
3069   return 0;
3070 }
3071
3072 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3073
3074 /*
3075  * fd.io coding-style-patch-verification: ON
3076  *
3077  * Local Variables:
3078  * eval: (c-set-style "gnu")
3079  * End:
3080  */