IP directed broadcast
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58
59 /** @brief IPv4 lookup node.
60     @node ip4-lookup
61
62     This is the main IPv4 lookup dispatch node.
63
64     @param vm vlib_main_t corresponding to the current thread
65     @param node vlib_node_runtime_t
66     @param frame vlib_frame_t whose contents should be dispatched
67
68     @par Graph mechanics: buffer metadata, next index usage
69
70     @em Uses:
71     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
72         - Indicates the @c sw_if_index value of the interface that the
73           packet was received on.
74     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
75         - When the value is @c ~0 then the node performs a longest prefix
76           match (LPM) for the packet destination address in the FIB attached
77           to the receive interface.
78         - Otherwise perform LPM for the packet destination address in the
79           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
80           value (0, 1, ...) and not a VRF id.
81
82     @em Sets:
83     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
84         - The lookup result adjacency index.
85
86     <em>Next Index:</em>
87     - Dispatches the packet to the node index found in
88       ip_adjacency_t @c adj->lookup_next_index
89       (where @c adj is the lookup result adjacency).
90 */
91 static uword
92 ip4_lookup (vlib_main_t * vm,
93             vlib_node_runtime_t * node, vlib_frame_t * frame)
94 {
95   return ip4_lookup_inline (vm, node, frame,
96                             /* lookup_for_responses_to_locally_received_packets */
97                             0);
98
99 }
100
101 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
102
103 /* *INDENT-OFF* */
104 VLIB_REGISTER_NODE (ip4_lookup_node) =
105 {
106   .function = ip4_lookup,
107   .name = "ip4-lookup",
108   .vector_size = sizeof (u32),
109   .format_trace = format_ip4_lookup_trace,
110   .n_next_nodes = IP_LOOKUP_N_NEXT,
111   .next_nodes = IP4_LOOKUP_NEXT_NODES,
112 };
113 /* *INDENT-ON* */
114
115 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
116
117 static uword
118 ip4_load_balance (vlib_main_t * vm,
119                   vlib_node_runtime_t * node, vlib_frame_t * frame)
120 {
121   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
122   u32 n_left_from, n_left_to_next, *from, *to_next;
123   ip_lookup_next_t next;
124   u32 thread_index = vm->thread_index;
125
126   from = vlib_frame_vector_args (frame);
127   n_left_from = frame->n_vectors;
128   next = node->cached_next_index;
129
130   if (node->flags & VLIB_NODE_FLAG_TRACE)
131     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
132
133   while (n_left_from > 0)
134     {
135       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
136
137
138       while (n_left_from >= 4 && n_left_to_next >= 2)
139         {
140           ip_lookup_next_t next0, next1;
141           const load_balance_t *lb0, *lb1;
142           vlib_buffer_t *p0, *p1;
143           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
144           const ip4_header_t *ip0, *ip1;
145           const dpo_id_t *dpo0, *dpo1;
146
147           /* Prefetch next iteration. */
148           {
149             vlib_buffer_t *p2, *p3;
150
151             p2 = vlib_get_buffer (vm, from[2]);
152             p3 = vlib_get_buffer (vm, from[3]);
153
154             vlib_prefetch_buffer_header (p2, STORE);
155             vlib_prefetch_buffer_header (p3, STORE);
156
157             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
158             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
159           }
160
161           pi0 = to_next[0] = from[0];
162           pi1 = to_next[1] = from[1];
163
164           from += 2;
165           n_left_from -= 2;
166           to_next += 2;
167           n_left_to_next -= 2;
168
169           p0 = vlib_get_buffer (vm, pi0);
170           p1 = vlib_get_buffer (vm, pi1);
171
172           ip0 = vlib_buffer_get_current (p0);
173           ip1 = vlib_buffer_get_current (p1);
174           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
175           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
176
177           lb0 = load_balance_get (lbi0);
178           lb1 = load_balance_get (lbi1);
179
180           /*
181            * this node is for via FIBs we can re-use the hash value from the
182            * to node if present.
183            * We don't want to use the same hash value at each level in the recursion
184            * graph as that would lead to polarisation
185            */
186           hc0 = hc1 = 0;
187
188           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
189             {
190               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
191                 {
192                   hc0 = vnet_buffer (p0)->ip.flow_hash =
193                     vnet_buffer (p0)->ip.flow_hash >> 1;
194                 }
195               else
196                 {
197                   hc0 = vnet_buffer (p0)->ip.flow_hash =
198                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
199                 }
200               dpo0 = load_balance_get_fwd_bucket
201                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
202             }
203           else
204             {
205               dpo0 = load_balance_get_bucket_i (lb0, 0);
206             }
207           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
208             {
209               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
210                 {
211                   hc1 = vnet_buffer (p1)->ip.flow_hash =
212                     vnet_buffer (p1)->ip.flow_hash >> 1;
213                 }
214               else
215                 {
216                   hc1 = vnet_buffer (p1)->ip.flow_hash =
217                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
218                 }
219               dpo1 = load_balance_get_fwd_bucket
220                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
221             }
222           else
223             {
224               dpo1 = load_balance_get_bucket_i (lb1, 0);
225             }
226
227           next0 = dpo0->dpoi_next_node;
228           next1 = dpo1->dpoi_next_node;
229
230           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
231           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
232
233           vlib_increment_combined_counter
234             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
235           vlib_increment_combined_counter
236             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
237
238           vlib_validate_buffer_enqueue_x2 (vm, node, next,
239                                            to_next, n_left_to_next,
240                                            pi0, pi1, next0, next1);
241         }
242
243       while (n_left_from > 0 && n_left_to_next > 0)
244         {
245           ip_lookup_next_t next0;
246           const load_balance_t *lb0;
247           vlib_buffer_t *p0;
248           u32 pi0, lbi0, hc0;
249           const ip4_header_t *ip0;
250           const dpo_id_t *dpo0;
251
252           pi0 = from[0];
253           to_next[0] = pi0;
254           from += 1;
255           to_next += 1;
256           n_left_to_next -= 1;
257           n_left_from -= 1;
258
259           p0 = vlib_get_buffer (vm, pi0);
260
261           ip0 = vlib_buffer_get_current (p0);
262           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
263
264           lb0 = load_balance_get (lbi0);
265
266           hc0 = 0;
267           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
268             {
269               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
270                 {
271                   hc0 = vnet_buffer (p0)->ip.flow_hash =
272                     vnet_buffer (p0)->ip.flow_hash >> 1;
273                 }
274               else
275                 {
276                   hc0 = vnet_buffer (p0)->ip.flow_hash =
277                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
278                 }
279               dpo0 = load_balance_get_fwd_bucket
280                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
281             }
282           else
283             {
284               dpo0 = load_balance_get_bucket_i (lb0, 0);
285             }
286
287           next0 = dpo0->dpoi_next_node;
288           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
289
290           vlib_increment_combined_counter
291             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
292
293           vlib_validate_buffer_enqueue_x1 (vm, node, next,
294                                            to_next, n_left_to_next,
295                                            pi0, next0);
296         }
297
298       vlib_put_next_frame (vm, node, next, n_left_to_next);
299     }
300
301   return frame->n_vectors;
302 }
303
304 /* *INDENT-OFF* */
305 VLIB_REGISTER_NODE (ip4_load_balance_node) =
306 {
307   .function = ip4_load_balance,
308   .name = "ip4-load-balance",
309   .vector_size = sizeof (u32),
310   .sibling_of = "ip4-lookup",
311   .format_trace =
312   format_ip4_lookup_trace,
313 };
314 /* *INDENT-ON* */
315
316 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
317
318 /* get first interface address */
319 ip4_address_t *
320 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
321                              ip_interface_address_t ** result_ia)
322 {
323   ip_lookup_main_t *lm = &im->lookup_main;
324   ip_interface_address_t *ia = 0;
325   ip4_address_t *result = 0;
326
327   /* *INDENT-OFF* */
328   foreach_ip_interface_address
329     (lm, ia, sw_if_index,
330      1 /* honor unnumbered */ ,
331      ({
332        ip4_address_t * a =
333          ip_interface_address_get_address (lm, ia);
334        result = a;
335        break;
336      }));
337   /* *INDENT-OFF* */
338   if (result_ia)
339     *result_ia = result ? ia : 0;
340   return result;
341 }
342
343 static void
344 ip4_add_subnet_bcast_route (u32 fib_index,
345                             fib_prefix_t *pfx,
346                             u32 sw_if_index)
347 {
348   vnet_sw_interface_flags_t iflags;
349
350   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
351
352   fib_table_entry_special_remove(fib_index,
353                                  pfx,
354                                  FIB_SOURCE_INTERFACE);
355
356   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
357     {
358       fib_table_entry_update_one_path (fib_index, pfx,
359                                        FIB_SOURCE_INTERFACE,
360                                        FIB_ENTRY_FLAG_NONE,
361                                        DPO_PROTO_IP4,
362                                        /* No next-hop address */
363                                        &ADJ_BCAST_ADDR,
364                                        sw_if_index,
365                                        // invalid FIB index
366                                        ~0,
367                                        1,
368                                        // no out-label stack
369                                        NULL,
370                                        FIB_ROUTE_PATH_FLAG_NONE);
371     }
372   else
373     {
374         fib_table_entry_special_add(fib_index,
375                                     pfx,
376                                     FIB_SOURCE_INTERFACE,
377                                     (FIB_ENTRY_FLAG_DROP |
378                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
379     }
380 }
381
382 static void
383 ip4_add_interface_routes (u32 sw_if_index,
384                           ip4_main_t * im, u32 fib_index,
385                           ip_interface_address_t * a)
386 {
387   ip_lookup_main_t *lm = &im->lookup_main;
388   ip4_address_t *address = ip_interface_address_get_address (lm, a);
389   fib_prefix_t pfx = {
390     .fp_len = a->address_length,
391     .fp_proto = FIB_PROTOCOL_IP4,
392     .fp_addr.ip4 = *address,
393   };
394
395   if (pfx.fp_len <= 30)
396     {
397       /* a /30 or shorter - add a glean for the network address */
398       fib_table_entry_update_one_path (fib_index, &pfx,
399                                        FIB_SOURCE_INTERFACE,
400                                        (FIB_ENTRY_FLAG_CONNECTED |
401                                         FIB_ENTRY_FLAG_ATTACHED),
402                                        DPO_PROTO_IP4,
403                                        /* No next-hop address */
404                                        NULL,
405                                        sw_if_index,
406                                        // invalid FIB index
407                                        ~0,
408                                        1,
409                                        // no out-label stack
410                                        NULL,
411                                        FIB_ROUTE_PATH_FLAG_NONE);
412
413       /* Add the two broadcast addresses as drop */
414       fib_prefix_t net_pfx = {
415         .fp_len = 32,
416         .fp_proto = FIB_PROTOCOL_IP4,
417         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
418       };
419       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
420         fib_table_entry_special_add(fib_index,
421                                     &net_pfx,
422                                     FIB_SOURCE_INTERFACE,
423                                     (FIB_ENTRY_FLAG_DROP |
424                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
425       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
426       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
427         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
428     }
429   else if (pfx.fp_len == 31)
430     {
431       u32 mask = clib_host_to_net_u32(1);
432       fib_prefix_t net_pfx = pfx;
433
434       net_pfx.fp_len = 32;
435       net_pfx.fp_addr.ip4.as_u32 ^= mask;
436
437       /* a /31 - add the other end as an attached host */
438       fib_table_entry_update_one_path (fib_index, &net_pfx,
439                                        FIB_SOURCE_INTERFACE,
440                                        (FIB_ENTRY_FLAG_ATTACHED),
441                                        DPO_PROTO_IP4,
442                                        &net_pfx.fp_addr,
443                                        sw_if_index,
444                                        // invalid FIB index
445                                        ~0,
446                                        1,
447                                        NULL,
448                                        FIB_ROUTE_PATH_FLAG_NONE);
449     }
450   pfx.fp_len = 32;
451
452   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
453     {
454       u32 classify_table_index =
455         lm->classify_table_index_by_sw_if_index[sw_if_index];
456       if (classify_table_index != (u32) ~ 0)
457         {
458           dpo_id_t dpo = DPO_INVALID;
459
460           dpo_set (&dpo,
461                    DPO_CLASSIFY,
462                    DPO_PROTO_IP4,
463                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
464
465           fib_table_entry_special_dpo_add (fib_index,
466                                            &pfx,
467                                            FIB_SOURCE_CLASSIFY,
468                                            FIB_ENTRY_FLAG_NONE, &dpo);
469           dpo_reset (&dpo);
470         }
471     }
472
473   fib_table_entry_update_one_path (fib_index, &pfx,
474                                    FIB_SOURCE_INTERFACE,
475                                    (FIB_ENTRY_FLAG_CONNECTED |
476                                     FIB_ENTRY_FLAG_LOCAL),
477                                    DPO_PROTO_IP4,
478                                    &pfx.fp_addr,
479                                    sw_if_index,
480                                    // invalid FIB index
481                                    ~0,
482                                    1, NULL,
483                                    FIB_ROUTE_PATH_FLAG_NONE);
484 }
485
486 static void
487 ip4_del_interface_routes (ip4_main_t * im,
488                           u32 fib_index,
489                           ip4_address_t * address, u32 address_length)
490 {
491   fib_prefix_t pfx = {
492     .fp_len = address_length,
493     .fp_proto = FIB_PROTOCOL_IP4,
494     .fp_addr.ip4 = *address,
495   };
496
497   if (pfx.fp_len <= 30)
498     {
499       fib_prefix_t net_pfx = {
500         .fp_len = 32,
501         .fp_proto = FIB_PROTOCOL_IP4,
502         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
503       };
504       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
505         fib_table_entry_special_remove(fib_index,
506                                        &net_pfx,
507                                        FIB_SOURCE_INTERFACE);
508       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
509       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
510         fib_table_entry_special_remove(fib_index,
511                                        &net_pfx,
512                                        FIB_SOURCE_INTERFACE);
513       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
514     }
515     else if (pfx.fp_len == 31)
516     {
517       u32 mask = clib_host_to_net_u32(1);
518       fib_prefix_t net_pfx = pfx;
519
520       net_pfx.fp_len = 32;
521       net_pfx.fp_addr.ip4.as_u32 ^= mask;
522
523       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
524     }
525
526   pfx.fp_len = 32;
527   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
528 }
529
530 void
531 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
532 {
533   ip4_main_t *im = &ip4_main;
534
535   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
536
537   /*
538    * enable/disable only on the 1<->0 transition
539    */
540   if (is_enable)
541     {
542       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
543         return;
544     }
545   else
546     {
547       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
548       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
549         return;
550     }
551   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
552                                !is_enable, 0, 0);
553
554
555   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
556                                sw_if_index, !is_enable, 0, 0);
557 }
558
559 static clib_error_t *
560 ip4_add_del_interface_address_internal (vlib_main_t * vm,
561                                         u32 sw_if_index,
562                                         ip4_address_t * address,
563                                         u32 address_length, u32 is_del)
564 {
565   vnet_main_t *vnm = vnet_get_main ();
566   ip4_main_t *im = &ip4_main;
567   ip_lookup_main_t *lm = &im->lookup_main;
568   clib_error_t *error = 0;
569   u32 if_address_index, elts_before;
570   ip4_address_fib_t ip4_af, *addr_fib = 0;
571
572   /* local0 interface doesn't support IP addressing  */
573   if (sw_if_index == 0)
574     {
575       return
576        clib_error_create ("local0 interface doesn't support IP addressing");
577     }
578
579   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
580   ip4_addr_fib_init (&ip4_af, address,
581                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
582   vec_add1 (addr_fib, ip4_af);
583
584   /*
585    * there is no support for adj-fib handling in the presence of overlapping
586    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
587    * most routers do.
588    */
589   /* *INDENT-OFF* */
590   if (!is_del)
591     {
592       /* When adding an address check that it does not conflict
593          with an existing address on any interface in this table. */
594       ip_interface_address_t *ia;
595       vnet_sw_interface_t *sif;
596
597       pool_foreach(sif, vnm->interface_main.sw_interfaces,
598       ({
599           if (im->fib_index_by_sw_if_index[sw_if_index] ==
600               im->fib_index_by_sw_if_index[sif->sw_if_index])
601             {
602               foreach_ip_interface_address
603                 (&im->lookup_main, ia, sif->sw_if_index,
604                  0 /* honor unnumbered */ ,
605                  ({
606                    ip4_address_t * x =
607                      ip_interface_address_get_address
608                      (&im->lookup_main, ia);
609                    if (ip4_destination_matches_route
610                        (im, address, x, ia->address_length) ||
611                        ip4_destination_matches_route (im,
612                                                       x,
613                                                       address,
614                                                       address_length))
615                      {
616                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
617
618                        return
619                          clib_error_create
620                          ("failed to add %U which conflicts with %U for interface %U",
621                           format_ip4_address_and_length, address,
622                           address_length,
623                           format_ip4_address_and_length, x,
624                           ia->address_length,
625                           format_vnet_sw_if_index_name, vnm,
626                           sif->sw_if_index);
627                      }
628                  }));
629             }
630       }));
631     }
632   /* *INDENT-ON* */
633
634   elts_before = pool_elts (lm->if_address_pool);
635
636   error = ip_interface_address_add_del
637     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
638   if (error)
639     goto done;
640
641   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
642
643   if (is_del)
644     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
645   else
646     ip4_add_interface_routes (sw_if_index,
647                               im, ip4_af.fib_index,
648                               pool_elt_at_index
649                               (lm->if_address_pool, if_address_index));
650
651   /* If pool did not grow/shrink: add duplicate address. */
652   if (elts_before != pool_elts (lm->if_address_pool))
653     {
654       ip4_add_del_interface_address_callback_t *cb;
655       vec_foreach (cb, im->add_del_interface_address_callbacks)
656         cb->function (im, cb->function_opaque, sw_if_index,
657                       address, address_length, if_address_index, is_del);
658     }
659
660 done:
661   vec_free (addr_fib);
662   return error;
663 }
664
665 clib_error_t *
666 ip4_add_del_interface_address (vlib_main_t * vm,
667                                u32 sw_if_index,
668                                ip4_address_t * address,
669                                u32 address_length, u32 is_del)
670 {
671   return ip4_add_del_interface_address_internal
672     (vm, sw_if_index, address, address_length, is_del);
673 }
674
675 void
676 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
677 {
678   ip_interface_address_t *ia;
679   ip4_main_t *im;
680
681   im = &ip4_main;
682
683   /*
684    * when directed broadcast is enabled, the subnet braodcast route will forward
685    * packets using an adjacency with a broadcast MAC. otherwise it drops
686    */
687   /* *INDENT-OFF* */
688   foreach_ip_interface_address(&im->lookup_main, ia,
689                                sw_if_index, 0,
690      ({
691        if (ia->address_length <= 30)
692          {
693            ip4_address_t *ipa;
694
695            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
696
697            fib_prefix_t pfx = {
698              .fp_len = 32,
699              .fp_proto = FIB_PROTOCOL_IP4,
700              .fp_addr = {
701                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
702              },
703            };
704
705            ip4_add_subnet_bcast_route
706              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
707                                                   sw_if_index),
708               &pfx, sw_if_index);
709          }
710      }));
711   /* *INDENT-ON* */
712 }
713
714 /* Built-in ip4 unicast rx feature path definition */
715 /* *INDENT-OFF* */
716 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
717 {
718   .arc_name = "ip4-unicast",
719   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
720   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
721 };
722
723 VNET_FEATURE_INIT (ip4_flow_classify, static) =
724 {
725   .arc_name = "ip4-unicast",
726   .node_name = "ip4-flow-classify",
727   .runs_before = VNET_FEATURES ("ip4-inacl"),
728 };
729
730 VNET_FEATURE_INIT (ip4_inacl, static) =
731 {
732   .arc_name = "ip4-unicast",
733   .node_name = "ip4-inacl",
734   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
735 };
736
737 VNET_FEATURE_INIT (ip4_source_check_1, static) =
738 {
739   .arc_name = "ip4-unicast",
740   .node_name = "ip4-source-check-via-rx",
741   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
742 };
743
744 VNET_FEATURE_INIT (ip4_source_check_2, static) =
745 {
746   .arc_name = "ip4-unicast",
747   .node_name = "ip4-source-check-via-any",
748   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
749 };
750
751 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
752 {
753   .arc_name = "ip4-unicast",
754   .node_name = "ip4-source-and-port-range-check-rx",
755   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
756 };
757
758 VNET_FEATURE_INIT (ip4_policer_classify, static) =
759 {
760   .arc_name = "ip4-unicast",
761   .node_name = "ip4-policer-classify",
762   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
763 };
764
765 VNET_FEATURE_INIT (ip4_ipsec, static) =
766 {
767   .arc_name = "ip4-unicast",
768   .node_name = "ipsec-input-ip4",
769   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
770 };
771
772 VNET_FEATURE_INIT (ip4_vpath, static) =
773 {
774   .arc_name = "ip4-unicast",
775   .node_name = "vpath-input-ip4",
776   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
777 };
778
779 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
780 {
781   .arc_name = "ip4-unicast",
782   .node_name = "ip4-vxlan-bypass",
783   .runs_before = VNET_FEATURES ("ip4-lookup"),
784 };
785
786 VNET_FEATURE_INIT (ip4_not_enabled, static) =
787 {
788   .arc_name = "ip4-unicast",
789   .node_name = "ip4-not-enabled",
790   .runs_before = VNET_FEATURES ("ip4-lookup"),
791 };
792
793 VNET_FEATURE_INIT (ip4_lookup, static) =
794 {
795   .arc_name = "ip4-unicast",
796   .node_name = "ip4-lookup",
797   .runs_before = 0,     /* not before any other features */
798 };
799
800 /* Built-in ip4 multicast rx feature path definition */
801 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
802 {
803   .arc_name = "ip4-multicast",
804   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
805   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
806 };
807
808 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
809 {
810   .arc_name = "ip4-multicast",
811   .node_name = "vpath-input-ip4",
812   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
813 };
814
815 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
816 {
817   .arc_name = "ip4-multicast",
818   .node_name = "ip4-not-enabled",
819   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
820 };
821
822 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
823 {
824   .arc_name = "ip4-multicast",
825   .node_name = "ip4-mfib-forward-lookup",
826   .runs_before = 0,     /* last feature */
827 };
828
829 /* Source and port-range check ip4 tx feature path definition */
830 VNET_FEATURE_ARC_INIT (ip4_output, static) =
831 {
832   .arc_name = "ip4-output",
833   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
834   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
835 };
836
837 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
838 {
839   .arc_name = "ip4-output",
840   .node_name = "ip4-source-and-port-range-check-tx",
841   .runs_before = VNET_FEATURES ("ip4-outacl"),
842 };
843
844 VNET_FEATURE_INIT (ip4_outacl, static) =
845 {
846   .arc_name = "ip4-output",
847   .node_name = "ip4-outacl",
848   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
849 };
850
851 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
852 {
853   .arc_name = "ip4-output",
854   .node_name = "ipsec-output-ip4",
855   .runs_before = VNET_FEATURES ("interface-output"),
856 };
857
858 /* Built-in ip4 tx feature path definition */
859 VNET_FEATURE_INIT (ip4_interface_output, static) =
860 {
861   .arc_name = "ip4-output",
862   .node_name = "interface-output",
863   .runs_before = 0,     /* not before any other features */
864 };
865 /* *INDENT-ON* */
866
867 static clib_error_t *
868 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
869 {
870   ip4_main_t *im = &ip4_main;
871
872   /* Fill in lookup tables with default table (0). */
873   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
874   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
875
876   if (!is_add)
877     {
878       ip4_main_t *im4 = &ip4_main;
879       ip_lookup_main_t *lm4 = &im4->lookup_main;
880       ip_interface_address_t *ia = 0;
881       ip4_address_t *address;
882       vlib_main_t *vm = vlib_get_main ();
883
884       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
885       /* *INDENT-OFF* */
886       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
887       ({
888         address = ip_interface_address_get_address (lm4, ia);
889         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
890       }));
891       /* *INDENT-ON* */
892     }
893
894   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
895                                is_add, 0, 0);
896
897   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
898                                sw_if_index, is_add, 0, 0);
899
900   return /* no error */ 0;
901 }
902
903 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
904
905 /* Global IP4 main. */
906 ip4_main_t ip4_main;
907
908 clib_error_t *
909 ip4_lookup_init (vlib_main_t * vm)
910 {
911   ip4_main_t *im = &ip4_main;
912   clib_error_t *error;
913   uword i;
914
915   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
916     return error;
917   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
918     return (error);
919   if ((error = vlib_call_init_function (vm, fib_module_init)))
920     return error;
921   if ((error = vlib_call_init_function (vm, mfib_module_init)))
922     return error;
923
924   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
925     {
926       u32 m;
927
928       if (i < 32)
929         m = pow2_mask (i) << (32 - i);
930       else
931         m = ~0;
932       im->fib_masks[i] = clib_host_to_net_u32 (m);
933     }
934
935   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
936
937   /* Create FIB with index 0 and table id of 0. */
938   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
939                                      FIB_SOURCE_DEFAULT_ROUTE);
940   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
941                                       MFIB_SOURCE_DEFAULT_ROUTE);
942
943   {
944     pg_node_t *pn;
945     pn = pg_get_node (ip4_lookup_node.index);
946     pn->unformat_edit = unformat_pg_ip4_header;
947   }
948
949   {
950     ethernet_arp_header_t h;
951
952     memset (&h, 0, sizeof (h));
953
954     /* Set target ethernet address to all zeros. */
955     memset (h.ip4_over_ethernet[1].ethernet, 0,
956             sizeof (h.ip4_over_ethernet[1].ethernet));
957
958 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
959 #define _8(f,v) h.f = v;
960     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
961     _16 (l3_type, ETHERNET_TYPE_IP4);
962     _8 (n_l2_address_bytes, 6);
963     _8 (n_l3_address_bytes, 4);
964     _16 (opcode, ETHERNET_ARP_OPCODE_request);
965 #undef _16
966 #undef _8
967
968     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
969                                /* data */ &h,
970                                sizeof (h),
971                                /* alloc chunk size */ 8,
972                                "ip4 arp");
973   }
974
975   return error;
976 }
977
978 VLIB_INIT_FUNCTION (ip4_lookup_init);
979
980 typedef struct
981 {
982   /* Adjacency taken. */
983   u32 dpo_index;
984   u32 flow_hash;
985   u32 fib_index;
986
987   /* Packet data, possibly *after* rewrite. */
988   u8 packet_data[64 - 1 * sizeof (u32)];
989 }
990 ip4_forward_next_trace_t;
991
992 u8 *
993 format_ip4_forward_next_trace (u8 * s, va_list * args)
994 {
995   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
996   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
997   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
998   u32 indent = format_get_indent (s);
999   s = format (s, "%U%U",
1000               format_white_space, indent,
1001               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1002   return s;
1003 }
1004
1005 static u8 *
1006 format_ip4_lookup_trace (u8 * s, va_list * args)
1007 {
1008   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1009   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1010   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1011   u32 indent = format_get_indent (s);
1012
1013   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1014               t->fib_index, t->dpo_index, t->flow_hash);
1015   s = format (s, "\n%U%U",
1016               format_white_space, indent,
1017               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1018   return s;
1019 }
1020
1021 static u8 *
1022 format_ip4_rewrite_trace (u8 * s, va_list * args)
1023 {
1024   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1025   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1026   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1027   u32 indent = format_get_indent (s);
1028
1029   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1030               t->fib_index, t->dpo_index, format_ip_adjacency,
1031               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1032   s = format (s, "\n%U%U",
1033               format_white_space, indent,
1034               format_ip_adjacency_packet_data,
1035               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1036   return s;
1037 }
1038
1039 /* Common trace function for all ip4-forward next nodes. */
1040 void
1041 ip4_forward_next_trace (vlib_main_t * vm,
1042                         vlib_node_runtime_t * node,
1043                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1044 {
1045   u32 *from, n_left;
1046   ip4_main_t *im = &ip4_main;
1047
1048   n_left = frame->n_vectors;
1049   from = vlib_frame_vector_args (frame);
1050
1051   while (n_left >= 4)
1052     {
1053       u32 bi0, bi1;
1054       vlib_buffer_t *b0, *b1;
1055       ip4_forward_next_trace_t *t0, *t1;
1056
1057       /* Prefetch next iteration. */
1058       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1059       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1060
1061       bi0 = from[0];
1062       bi1 = from[1];
1063
1064       b0 = vlib_get_buffer (vm, bi0);
1065       b1 = vlib_get_buffer (vm, bi1);
1066
1067       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1068         {
1069           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1070           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1071           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1072           t0->fib_index =
1073             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1074              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1075             vec_elt (im->fib_index_by_sw_if_index,
1076                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1077
1078           clib_memcpy (t0->packet_data,
1079                        vlib_buffer_get_current (b0),
1080                        sizeof (t0->packet_data));
1081         }
1082       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1083         {
1084           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1085           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1086           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1087           t1->fib_index =
1088             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1089              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1090             vec_elt (im->fib_index_by_sw_if_index,
1091                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1092           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1093                        sizeof (t1->packet_data));
1094         }
1095       from += 2;
1096       n_left -= 2;
1097     }
1098
1099   while (n_left >= 1)
1100     {
1101       u32 bi0;
1102       vlib_buffer_t *b0;
1103       ip4_forward_next_trace_t *t0;
1104
1105       bi0 = from[0];
1106
1107       b0 = vlib_get_buffer (vm, bi0);
1108
1109       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1110         {
1111           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1112           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1113           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1114           t0->fib_index =
1115             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1116              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1117             vec_elt (im->fib_index_by_sw_if_index,
1118                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1119           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1120                        sizeof (t0->packet_data));
1121         }
1122       from += 1;
1123       n_left -= 1;
1124     }
1125 }
1126
1127 /* Compute TCP/UDP/ICMP4 checksum in software. */
1128 u16
1129 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1130                               ip4_header_t * ip0)
1131 {
1132   ip_csum_t sum0;
1133   u32 ip_header_length, payload_length_host_byte_order;
1134   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1135   u16 sum16;
1136   void *data_this_buffer;
1137
1138   /* Initialize checksum with ip header. */
1139   ip_header_length = ip4_header_bytes (ip0);
1140   payload_length_host_byte_order =
1141     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1142   sum0 =
1143     clib_host_to_net_u32 (payload_length_host_byte_order +
1144                           (ip0->protocol << 16));
1145
1146   if (BITS (uword) == 32)
1147     {
1148       sum0 =
1149         ip_csum_with_carry (sum0,
1150                             clib_mem_unaligned (&ip0->src_address, u32));
1151       sum0 =
1152         ip_csum_with_carry (sum0,
1153                             clib_mem_unaligned (&ip0->dst_address, u32));
1154     }
1155   else
1156     sum0 =
1157       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1158
1159   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1160   data_this_buffer = (void *) ip0 + ip_header_length;
1161   n_ip_bytes_this_buffer =
1162     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1163   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1164     {
1165       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1166         n_ip_bytes_this_buffer - ip_header_length : 0;
1167     }
1168   while (1)
1169     {
1170       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1171       n_bytes_left -= n_this_buffer;
1172       if (n_bytes_left == 0)
1173         break;
1174
1175       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1176       p0 = vlib_get_buffer (vm, p0->next_buffer);
1177       data_this_buffer = vlib_buffer_get_current (p0);
1178       n_this_buffer = p0->current_length;
1179     }
1180
1181   sum16 = ~ip_csum_fold (sum0);
1182
1183   return sum16;
1184 }
1185
1186 u32
1187 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1188 {
1189   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1190   udp_header_t *udp0;
1191   u16 sum16;
1192
1193   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1194           || ip0->protocol == IP_PROTOCOL_UDP);
1195
1196   udp0 = (void *) (ip0 + 1);
1197   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1198     {
1199       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1200                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1201       return p0->flags;
1202     }
1203
1204   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1205
1206   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1207                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1208
1209   return p0->flags;
1210 }
1211
1212 /* *INDENT-OFF* */
1213 VNET_FEATURE_ARC_INIT (ip4_local) =
1214 {
1215   .arc_name  = "ip4-local",
1216   .start_nodes = VNET_FEATURES ("ip4-local"),
1217 };
1218 /* *INDENT-ON* */
1219
1220 static inline void
1221 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1222                             ip4_header_t * ip, u8 is_udp, u8 * error,
1223                             u8 * good_tcp_udp)
1224 {
1225   u32 flags0;
1226   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1227   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1228   if (is_udp)
1229     {
1230       udp_header_t *udp;
1231       u32 ip_len, udp_len;
1232       i32 len_diff;
1233       udp = ip4_next_header (ip);
1234       /* Verify UDP length. */
1235       ip_len = clib_net_to_host_u16 (ip->length);
1236       udp_len = clib_net_to_host_u16 (udp->length);
1237
1238       len_diff = ip_len - udp_len;
1239       *good_tcp_udp &= len_diff >= 0;
1240       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1241     }
1242 }
1243
1244 #define ip4_local_csum_is_offloaded(_b)                                 \
1245     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1246         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1247
1248 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1249     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1250         || ip4_local_csum_is_offloaded (_b)))
1251
1252 #define ip4_local_csum_is_valid(_b)                                     \
1253     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1254         || (ip4_local_csum_is_offloaded (_b))) != 0
1255
1256 static inline void
1257 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1258                          ip4_header_t * ih, u8 * error)
1259 {
1260   u8 is_udp, is_tcp_udp, good_tcp_udp;
1261
1262   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1263   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1264
1265   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1266     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1267   else
1268     good_tcp_udp = ip4_local_csum_is_valid (b);
1269
1270   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1271   *error = (is_tcp_udp && !good_tcp_udp
1272             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1273 }
1274
1275 static inline void
1276 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1277                             ip4_header_t ** ih, u8 * error)
1278 {
1279   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1280
1281   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1282   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1283
1284   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1285   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1286
1287   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1288   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1289
1290   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1291                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1292     {
1293       if (is_tcp_udp[0])
1294         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1295                                     &good_tcp_udp[0]);
1296       if (is_tcp_udp[1])
1297         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1298                                     &good_tcp_udp[1]);
1299     }
1300
1301   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1302               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1303   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1304               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1305 }
1306
1307 static inline void
1308 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1309                               vlib_buffer_t * b, u16 * next, u8 error,
1310                               u8 head_of_feature_arc)
1311 {
1312   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1313   u32 next_index;
1314
1315   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1316   b->error = error ? error_node->errors[error] : 0;
1317   if (head_of_feature_arc)
1318     {
1319       next_index = *next;
1320       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1321         {
1322           vnet_feature_arc_start (arc_index,
1323                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1324                                   &next_index, b);
1325           *next = next_index;
1326         }
1327     }
1328 }
1329
1330 typedef struct
1331 {
1332   ip4_address_t src;
1333   u32 lbi;
1334   u8 error;
1335 } ip4_local_last_check_t;
1336
1337 static inline void
1338 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1339                      ip4_local_last_check_t * last_check, u8 * error0)
1340 {
1341   ip4_fib_mtrie_leaf_t leaf0;
1342   ip4_fib_mtrie_t *mtrie0;
1343   const dpo_id_t *dpo0;
1344   load_balance_t *lb0;
1345   u32 lbi0;
1346
1347   vnet_buffer (b)->ip.fib_index =
1348     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1349     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1350
1351   if (PREDICT_FALSE (last_check->src.as_u32 != ip0->src_address.as_u32))
1352     {
1353       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1354       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1355       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1356       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1357       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1358
1359       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1360       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1361
1362       lb0 = load_balance_get (lbi0);
1363       dpo0 = load_balance_get_bucket_i (lb0, 0);
1364
1365       /*
1366        * Must have a route to source otherwise we drop the packet.
1367        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1368        *
1369        * The checks are:
1370        *  - the source is a recieve => it's from us => bogus, do this
1371        *    first since it sets a different error code.
1372        *  - uRPF check for any route to source - accept if passes.
1373        *  - allow packets destined to the broadcast address from unknown sources
1374        */
1375
1376       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1377                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1378                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1379       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1380                   && !fib_urpf_check_size (lb0->lb_urpf)
1381                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1382                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1383
1384       last_check->src.as_u32 = ip0->src_address.as_u32;
1385       last_check->lbi = lbi0;
1386       last_check->error = *error0;
1387     }
1388   else
1389     {
1390       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1391       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1392       *error0 = last_check->error;
1393     }
1394 }
1395
1396 static inline void
1397 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1398                         ip4_local_last_check_t * last_check, u8 * error)
1399 {
1400   ip4_fib_mtrie_leaf_t leaf[2];
1401   ip4_fib_mtrie_t *mtrie[2];
1402   const dpo_id_t *dpo[2];
1403   load_balance_t *lb[2];
1404   u32 not_last_hit = 0;
1405   u32 lbi[2];
1406
1407   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1408   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1409
1410   vnet_buffer (b[0])->ip.fib_index =
1411     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1412     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1413     vnet_buffer (b[0])->ip.fib_index;
1414
1415   vnet_buffer (b[1])->ip.fib_index =
1416     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1417     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1418     vnet_buffer (b[1])->ip.fib_index;
1419
1420   if (PREDICT_FALSE (not_last_hit))
1421     {
1422       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1423       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1424
1425       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1426       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1427
1428       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1429                                            &ip[0]->src_address, 2);
1430       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1431                                            &ip[1]->src_address, 2);
1432
1433       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1434                                            &ip[0]->src_address, 3);
1435       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1436                                            &ip[1]->src_address, 3);
1437
1438       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1439       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1440
1441       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1442       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1443
1444       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1445       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1446
1447       lb[0] = load_balance_get (lbi[0]);
1448       lb[1] = load_balance_get (lbi[1]);
1449
1450       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1451       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1452
1453       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1454                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1455                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1456       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1457                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1458                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1459                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1460
1461       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1462                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1463                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1464       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1465                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1466                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1467                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1468
1469       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1470       last_check->lbi = lbi[1];
1471       last_check->error = error[1];
1472     }
1473   else
1474     {
1475       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1476       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1477
1478       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1479       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1480
1481       error[0] = last_check->error;
1482       error[1] = last_check->error;
1483     }
1484 }
1485
1486 static inline uword
1487 ip4_local_inline (vlib_main_t * vm,
1488                   vlib_node_runtime_t * node,
1489                   vlib_frame_t * frame, int head_of_feature_arc)
1490 {
1491   ip4_main_t *im = &ip4_main;
1492   ip_lookup_main_t *lm = &im->lookup_main;
1493   u32 *from, n_left_from;
1494   vlib_node_runtime_t *error_node =
1495     vlib_node_get_runtime (vm, ip4_input_node.index);
1496   u16 nexts[VLIB_FRAME_SIZE], *next;
1497   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1498   ip4_header_t *ip[2];
1499   u8 error[2];
1500
1501   ip4_local_last_check_t last_check = {
1502     .src = {.as_u32 = 0},
1503     .lbi = ~0,
1504     .error = IP4_ERROR_UNKNOWN_PROTOCOL
1505   };
1506
1507   from = vlib_frame_vector_args (frame);
1508   n_left_from = frame->n_vectors;
1509
1510   if (node->flags & VLIB_NODE_FLAG_TRACE)
1511     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1512
1513   vlib_get_buffers (vm, from, bufs, n_left_from);
1514   b = bufs;
1515   next = nexts;
1516
1517   while (n_left_from >= 6)
1518     {
1519       u32 is_nat, not_batch = 0;
1520
1521       /* Prefetch next iteration. */
1522       {
1523         vlib_prefetch_buffer_header (b[4], LOAD);
1524         vlib_prefetch_buffer_header (b[5], LOAD);
1525
1526         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1527         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1528       }
1529
1530       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1531
1532       ip[0] = vlib_buffer_get_current (b[0]);
1533       ip[1] = vlib_buffer_get_current (b[1]);
1534
1535       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1536       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1537
1538       is_nat = b[0]->flags & VNET_BUFFER_F_IS_NATED;
1539       not_batch |= is_nat ^ (b[1]->flags & VNET_BUFFER_F_IS_NATED);
1540
1541       if (head_of_feature_arc == 0 || (is_nat && not_batch == 0))
1542         goto skip_checks;
1543
1544       if (PREDICT_TRUE (not_batch == 0))
1545         {
1546           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1547           ip4_local_check_src_x2 (b, ip, &last_check, error);
1548         }
1549       else
1550         {
1551           if (!(b[0]->flags & VNET_BUFFER_F_IS_NATED))
1552             {
1553               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1554               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1555             }
1556           if (!(b[1]->flags & VNET_BUFFER_F_IS_NATED))
1557             {
1558               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1559               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1560             }
1561         }
1562
1563     skip_checks:
1564
1565       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1566       next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1567       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1568                                     head_of_feature_arc);
1569       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1570                                     head_of_feature_arc);
1571
1572       b += 2;
1573       next += 2;
1574       n_left_from -= 2;
1575     }
1576
1577   while (n_left_from > 0)
1578     {
1579       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1580
1581       ip[0] = vlib_buffer_get_current (b[0]);
1582       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1583
1584       if (head_of_feature_arc == 0 || (b[0]->flags & VNET_BUFFER_F_IS_NATED))
1585         goto skip_check;
1586
1587       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1588       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1589
1590     skip_check:
1591
1592       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1593       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1594                                     head_of_feature_arc);
1595
1596       b += 1;
1597       next += 1;
1598       n_left_from -= 1;
1599     }
1600
1601   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1602   return frame->n_vectors;
1603 }
1604
1605 static uword
1606 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1607 {
1608   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1609 }
1610
1611 /* *INDENT-OFF* */
1612 VLIB_REGISTER_NODE (ip4_local_node) =
1613 {
1614   .function = ip4_local,
1615   .name = "ip4-local",
1616   .vector_size = sizeof (u32),
1617   .format_trace = format_ip4_forward_next_trace,
1618   .n_next_nodes = IP_LOCAL_N_NEXT,
1619   .next_nodes =
1620   {
1621     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1622     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1623     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1624     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1625   },
1626 };
1627 /* *INDENT-ON* */
1628
1629 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1630
1631 static uword
1632 ip4_local_end_of_arc (vlib_main_t * vm,
1633                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1634 {
1635   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1636 }
1637
1638 /* *INDENT-OFF* */
1639 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1640   .function = ip4_local_end_of_arc,
1641   .name = "ip4-local-end-of-arc",
1642   .vector_size = sizeof (u32),
1643
1644   .format_trace = format_ip4_forward_next_trace,
1645   .sibling_of = "ip4-local",
1646 };
1647
1648 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1649
1650 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1651   .arc_name = "ip4-local",
1652   .node_name = "ip4-local-end-of-arc",
1653   .runs_before = 0, /* not before any other features */
1654 };
1655 /* *INDENT-ON* */
1656
1657 void
1658 ip4_register_protocol (u32 protocol, u32 node_index)
1659 {
1660   vlib_main_t *vm = vlib_get_main ();
1661   ip4_main_t *im = &ip4_main;
1662   ip_lookup_main_t *lm = &im->lookup_main;
1663
1664   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1665   lm->local_next_by_ip_protocol[protocol] =
1666     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1667 }
1668
1669 static clib_error_t *
1670 show_ip_local_command_fn (vlib_main_t * vm,
1671                           unformat_input_t * input, vlib_cli_command_t * cmd)
1672 {
1673   ip4_main_t *im = &ip4_main;
1674   ip_lookup_main_t *lm = &im->lookup_main;
1675   int i;
1676
1677   vlib_cli_output (vm, "Protocols handled by ip4_local");
1678   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1679     {
1680       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1681         {
1682           u32 node_index = vlib_get_node (vm,
1683                                           ip4_local_node.index)->
1684             next_nodes[lm->local_next_by_ip_protocol[i]];
1685           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1686                            node_index);
1687         }
1688     }
1689   return 0;
1690 }
1691
1692
1693
1694 /*?
1695  * Display the set of protocols handled by the local IPv4 stack.
1696  *
1697  * @cliexpar
1698  * Example of how to display local protocol table:
1699  * @cliexstart{show ip local}
1700  * Protocols handled by ip4_local
1701  * 1
1702  * 17
1703  * 47
1704  * @cliexend
1705 ?*/
1706 /* *INDENT-OFF* */
1707 VLIB_CLI_COMMAND (show_ip_local, static) =
1708 {
1709   .path = "show ip local",
1710   .function = show_ip_local_command_fn,
1711   .short_help = "show ip local",
1712 };
1713 /* *INDENT-ON* */
1714
1715 always_inline uword
1716 ip4_arp_inline (vlib_main_t * vm,
1717                 vlib_node_runtime_t * node,
1718                 vlib_frame_t * frame, int is_glean)
1719 {
1720   vnet_main_t *vnm = vnet_get_main ();
1721   ip4_main_t *im = &ip4_main;
1722   ip_lookup_main_t *lm = &im->lookup_main;
1723   u32 *from, *to_next_drop;
1724   uword n_left_from, n_left_to_next_drop, next_index;
1725   static f64 time_last_seed_change = -1e100;
1726   static u32 hash_seeds[3];
1727   static uword hash_bitmap[256 / BITS (uword)];
1728   f64 time_now;
1729
1730   if (node->flags & VLIB_NODE_FLAG_TRACE)
1731     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1732
1733   time_now = vlib_time_now (vm);
1734   if (time_now - time_last_seed_change > 1e-3)
1735     {
1736       uword i;
1737       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1738                                             sizeof (hash_seeds));
1739       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1740         hash_seeds[i] = r[i];
1741
1742       /* Mark all hash keys as been no-seen before. */
1743       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1744         hash_bitmap[i] = 0;
1745
1746       time_last_seed_change = time_now;
1747     }
1748
1749   from = vlib_frame_vector_args (frame);
1750   n_left_from = frame->n_vectors;
1751   next_index = node->cached_next_index;
1752   if (next_index == IP4_ARP_NEXT_DROP)
1753     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1754
1755   while (n_left_from > 0)
1756     {
1757       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1758                            to_next_drop, n_left_to_next_drop);
1759
1760       while (n_left_from > 0 && n_left_to_next_drop > 0)
1761         {
1762           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1763           ip_adjacency_t *adj0;
1764           vlib_buffer_t *p0;
1765           ip4_header_t *ip0;
1766           uword bm0;
1767
1768           pi0 = from[0];
1769
1770           p0 = vlib_get_buffer (vm, pi0);
1771
1772           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1773           adj0 = adj_get (adj_index0);
1774           ip0 = vlib_buffer_get_current (p0);
1775
1776           a0 = hash_seeds[0];
1777           b0 = hash_seeds[1];
1778           c0 = hash_seeds[2];
1779
1780           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1781           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1782
1783           if (is_glean)
1784             {
1785               /*
1786                * this is the Glean case, so we are ARPing for the
1787                * packet's destination
1788                */
1789               a0 ^= ip0->dst_address.data_u32;
1790             }
1791           else
1792             {
1793               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1794             }
1795           b0 ^= sw_if_index0;
1796
1797           hash_v3_mix32 (a0, b0, c0);
1798           hash_v3_finalize32 (a0, b0, c0);
1799
1800           c0 &= BITS (hash_bitmap) - 1;
1801           m0 = (uword) 1 << (c0 % BITS (uword));
1802           c0 = c0 / BITS (uword);
1803
1804           bm0 = hash_bitmap[c0];
1805           drop0 = (bm0 & m0) != 0;
1806
1807           /* Mark it as seen. */
1808           hash_bitmap[c0] = bm0 | m0;
1809
1810           from += 1;
1811           n_left_from -= 1;
1812           to_next_drop[0] = pi0;
1813           to_next_drop += 1;
1814           n_left_to_next_drop -= 1;
1815
1816           p0->error =
1817             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
1818                          IP4_ARP_ERROR_REQUEST_SENT];
1819
1820           /*
1821            * the adj has been updated to a rewrite but the node the DPO that got
1822            * us here hasn't - yet. no big deal. we'll drop while we wait.
1823            */
1824           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1825             continue;
1826
1827           if (drop0)
1828             continue;
1829
1830           /*
1831            * Can happen if the control-plane is programming tables
1832            * with traffic flowing; at least that's today's lame excuse.
1833            */
1834           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1835               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1836             {
1837               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1838             }
1839           else
1840             /* Send ARP request. */
1841             {
1842               u32 bi0 = 0;
1843               vlib_buffer_t *b0;
1844               ethernet_arp_header_t *h0;
1845               vnet_hw_interface_t *hw_if0;
1846
1847               h0 =
1848                 vlib_packet_template_get_packet (vm,
1849                                                  &im->ip4_arp_request_packet_template,
1850                                                  &bi0);
1851
1852               /* Seems we're out of buffers */
1853               if (PREDICT_FALSE (!h0))
1854                 continue;
1855
1856               /* Add rewrite/encap string for ARP packet. */
1857               vnet_rewrite_one_header (adj0[0], h0,
1858                                        sizeof (ethernet_header_t));
1859
1860               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1861
1862               /* Src ethernet address in ARP header. */
1863               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
1864                            hw_if0->hw_address,
1865                            sizeof (h0->ip4_over_ethernet[0].ethernet));
1866
1867               if (is_glean)
1868                 {
1869                   /* The interface's source address is stashed in the Glean Adj */
1870                   h0->ip4_over_ethernet[0].ip4 =
1871                     adj0->sub_type.glean.receive_addr.ip4;
1872
1873                   /* Copy in destination address we are requesting. This is the
1874                    * glean case, so it's the packet's destination.*/
1875                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1876                     ip0->dst_address.data_u32;
1877                 }
1878               else
1879                 {
1880                   /* Src IP address in ARP header. */
1881                   if (ip4_src_address_for_packet (lm, sw_if_index0,
1882                                                   &h0->
1883                                                   ip4_over_ethernet[0].ip4))
1884                     {
1885                       /* No source address available */
1886                       p0->error =
1887                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1888                       vlib_buffer_free (vm, &bi0, 1);
1889                       continue;
1890                     }
1891
1892                   /* Copy in destination address we are requesting from the
1893                      incomplete adj */
1894                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1895                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1896                 }
1897
1898               vlib_buffer_copy_trace_flag (vm, p0, bi0);
1899               b0 = vlib_get_buffer (vm, bi0);
1900               VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1901               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1902
1903               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1904
1905               vlib_set_next_frame_buffer (vm, node,
1906                                           adj0->rewrite_header.next_index,
1907                                           bi0);
1908             }
1909         }
1910
1911       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1912     }
1913
1914   return frame->n_vectors;
1915 }
1916
1917 static uword
1918 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1919 {
1920   return (ip4_arp_inline (vm, node, frame, 0));
1921 }
1922
1923 static uword
1924 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1925 {
1926   return (ip4_arp_inline (vm, node, frame, 1));
1927 }
1928
1929 static char *ip4_arp_error_strings[] = {
1930   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1931   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1932   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1933   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1934   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1935   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1936 };
1937
1938 /* *INDENT-OFF* */
1939 VLIB_REGISTER_NODE (ip4_arp_node) =
1940 {
1941   .function = ip4_arp,
1942   .name = "ip4-arp",
1943   .vector_size = sizeof (u32),
1944   .format_trace = format_ip4_forward_next_trace,
1945   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1946   .error_strings = ip4_arp_error_strings,
1947   .n_next_nodes = IP4_ARP_N_NEXT,
1948   .next_nodes =
1949   {
1950     [IP4_ARP_NEXT_DROP] = "error-drop",
1951   },
1952 };
1953
1954 VLIB_REGISTER_NODE (ip4_glean_node) =
1955 {
1956   .function = ip4_glean,
1957   .name = "ip4-glean",
1958   .vector_size = sizeof (u32),
1959   .format_trace = format_ip4_forward_next_trace,
1960   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1961   .error_strings = ip4_arp_error_strings,
1962   .n_next_nodes = IP4_ARP_N_NEXT,
1963   .next_nodes = {
1964   [IP4_ARP_NEXT_DROP] = "error-drop",
1965   },
1966 };
1967 /* *INDENT-ON* */
1968
1969 #define foreach_notrace_ip4_arp_error           \
1970 _(DROP)                                         \
1971 _(REQUEST_SENT)                                 \
1972 _(REPLICATE_DROP)                               \
1973 _(REPLICATE_FAIL)
1974
1975 clib_error_t *
1976 arp_notrace_init (vlib_main_t * vm)
1977 {
1978   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1979
1980   /* don't trace ARP request packets */
1981 #define _(a)                                    \
1982     vnet_pcap_drop_trace_filter_add_del         \
1983         (rt->errors[IP4_ARP_ERROR_##a],         \
1984          1 /* is_add */);
1985   foreach_notrace_ip4_arp_error;
1986 #undef _
1987   return 0;
1988 }
1989
1990 VLIB_INIT_FUNCTION (arp_notrace_init);
1991
1992
1993 /* Send an ARP request to see if given destination is reachable on given interface. */
1994 clib_error_t *
1995 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1996                     u8 refresh)
1997 {
1998   vnet_main_t *vnm = vnet_get_main ();
1999   ip4_main_t *im = &ip4_main;
2000   ethernet_arp_header_t *h;
2001   ip4_address_t *src;
2002   ip_interface_address_t *ia;
2003   ip_adjacency_t *adj;
2004   vnet_hw_interface_t *hi;
2005   vnet_sw_interface_t *si;
2006   vlib_buffer_t *b;
2007   adj_index_t ai;
2008   u32 bi = 0;
2009   u8 unicast_rewrite = 0;
2010
2011   si = vnet_get_sw_interface (vnm, sw_if_index);
2012
2013   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2014     {
2015       return clib_error_return (0, "%U: interface %U down",
2016                                 format_ip4_address, dst,
2017                                 format_vnet_sw_if_index_name, vnm,
2018                                 sw_if_index);
2019     }
2020
2021   src =
2022     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2023   if (!src)
2024     {
2025       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2026       return clib_error_return
2027         (0,
2028          "no matching interface address for destination %U (interface %U)",
2029          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2030          sw_if_index);
2031     }
2032
2033   h = vlib_packet_template_get_packet (vm,
2034                                        &im->ip4_arp_request_packet_template,
2035                                        &bi);
2036
2037   if (!h)
2038     return clib_error_return (0, "ARP request packet allocation failed");
2039
2040   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2041   if (PREDICT_FALSE (!hi->hw_address))
2042     {
2043       return clib_error_return (0, "%U: interface %U do not support ip probe",
2044                                 format_ip4_address, dst,
2045                                 format_vnet_sw_if_index_name, vnm,
2046                                 sw_if_index);
2047     }
2048
2049   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2050                sizeof (h->ip4_over_ethernet[0].ethernet));
2051
2052   h->ip4_over_ethernet[0].ip4 = src[0];
2053   h->ip4_over_ethernet[1].ip4 = dst[0];
2054
2055   b = vlib_get_buffer (vm, bi);
2056   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2057     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2058
2059   ip46_address_t nh = {
2060     .ip4 = *dst,
2061   };
2062
2063   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2064                             VNET_LINK_IP4, &nh, sw_if_index);
2065   adj = adj_get (ai);
2066
2067   /* Peer has been previously resolved, retrieve glean adj instead */
2068   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2069     {
2070       if (refresh)
2071         unicast_rewrite = 1;
2072       else
2073         {
2074           adj_unlock (ai);
2075           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2076                                       VNET_LINK_IP4, sw_if_index, &nh);
2077           adj = adj_get (ai);
2078         }
2079     }
2080
2081   /* Add encapsulation string for software interface (e.g. ethernet header). */
2082   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2083   if (unicast_rewrite)
2084     {
2085       u16 *etype = vlib_buffer_get_current (b) - 2;
2086       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2087     }
2088   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2089
2090   {
2091     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2092     u32 *to_next = vlib_frame_vector_args (f);
2093     to_next[0] = bi;
2094     f->n_vectors = 1;
2095     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2096   }
2097
2098   adj_unlock (ai);
2099   return /* no error */ 0;
2100 }
2101
2102 typedef enum
2103 {
2104   IP4_REWRITE_NEXT_DROP,
2105   IP4_REWRITE_NEXT_ICMP_ERROR,
2106   IP4_REWRITE_NEXT_FRAGMENT,
2107   IP4_REWRITE_N_NEXT            /* Last */
2108 } ip4_rewrite_next_t;
2109
2110 /**
2111  * This bits of an IPv4 address to mask to construct a multicast
2112  * MAC address
2113  */
2114 #if CLIB_ARCH_IS_BIG_ENDIAN
2115 #define IP4_MCAST_ADDR_MASK 0x007fffff
2116 #else
2117 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2118 #endif
2119
2120 always_inline void
2121 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2122                u16 adj_packet_bytes, bool df, u32 * next, u32 * error)
2123 {
2124   if (packet_len > adj_packet_bytes)
2125     {
2126       *error = IP4_ERROR_MTU_EXCEEDED;
2127       if (df)
2128         {
2129           icmp4_error_set_vnet_buffer
2130             (b, ICMP4_destination_unreachable,
2131              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2132              adj_packet_bytes);
2133           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2134         }
2135       else
2136         {
2137           /* IP fragmentation */
2138           ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes,
2139                                    IP4_FRAG_NEXT_IP4_LOOKUP, 0);
2140           *next = IP4_REWRITE_NEXT_FRAGMENT;
2141         }
2142     }
2143 }
2144
2145 always_inline uword
2146 ip4_rewrite_inline (vlib_main_t * vm,
2147                     vlib_node_runtime_t * node,
2148                     vlib_frame_t * frame,
2149                     int do_counters, int is_midchain, int is_mcast)
2150 {
2151   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2152   u32 *from = vlib_frame_vector_args (frame);
2153   u32 n_left_from, n_left_to_next, *to_next, next_index;
2154   vlib_node_runtime_t *error_node =
2155     vlib_node_get_runtime (vm, ip4_input_node.index);
2156
2157   n_left_from = frame->n_vectors;
2158   next_index = node->cached_next_index;
2159   u32 thread_index = vm->thread_index;
2160
2161   while (n_left_from > 0)
2162     {
2163       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2164
2165       while (n_left_from >= 4 && n_left_to_next >= 2)
2166         {
2167           ip_adjacency_t *adj0, *adj1;
2168           vlib_buffer_t *p0, *p1;
2169           ip4_header_t *ip0, *ip1;
2170           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2171           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2172           u32 tx_sw_if_index0, tx_sw_if_index1;
2173
2174           /* Prefetch next iteration. */
2175           {
2176             vlib_buffer_t *p2, *p3;
2177
2178             p2 = vlib_get_buffer (vm, from[2]);
2179             p3 = vlib_get_buffer (vm, from[3]);
2180
2181             vlib_prefetch_buffer_header (p2, STORE);
2182             vlib_prefetch_buffer_header (p3, STORE);
2183
2184             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2185             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2186           }
2187
2188           pi0 = to_next[0] = from[0];
2189           pi1 = to_next[1] = from[1];
2190
2191           from += 2;
2192           n_left_from -= 2;
2193           to_next += 2;
2194           n_left_to_next -= 2;
2195
2196           p0 = vlib_get_buffer (vm, pi0);
2197           p1 = vlib_get_buffer (vm, pi1);
2198
2199           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2200           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2201
2202           /*
2203            * pre-fetch the per-adjacency counters
2204            */
2205           if (do_counters)
2206             {
2207               vlib_prefetch_combined_counter (&adjacency_counters,
2208                                               thread_index, adj_index0);
2209               vlib_prefetch_combined_counter (&adjacency_counters,
2210                                               thread_index, adj_index1);
2211             }
2212
2213           ip0 = vlib_buffer_get_current (p0);
2214           ip1 = vlib_buffer_get_current (p1);
2215
2216           error0 = error1 = IP4_ERROR_NONE;
2217           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2218
2219           /* Decrement TTL & update checksum.
2220              Works either endian, so no need for byte swap. */
2221           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2222             {
2223               i32 ttl0 = ip0->ttl;
2224
2225               /* Input node should have reject packets with ttl 0. */
2226               ASSERT (ip0->ttl > 0);
2227
2228               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2229               checksum0 += checksum0 >= 0xffff;
2230
2231               ip0->checksum = checksum0;
2232               ttl0 -= 1;
2233               ip0->ttl = ttl0;
2234
2235               /*
2236                * If the ttl drops below 1 when forwarding, generate
2237                * an ICMP response.
2238                */
2239               if (PREDICT_FALSE (ttl0 <= 0))
2240                 {
2241                   error0 = IP4_ERROR_TIME_EXPIRED;
2242                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2243                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2244                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2245                                                0);
2246                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2247                 }
2248
2249               /* Verify checksum. */
2250               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2251                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2252             }
2253           else
2254             {
2255               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2256             }
2257           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2258             {
2259               i32 ttl1 = ip1->ttl;
2260
2261               /* Input node should have reject packets with ttl 0. */
2262               ASSERT (ip1->ttl > 0);
2263
2264               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2265               checksum1 += checksum1 >= 0xffff;
2266
2267               ip1->checksum = checksum1;
2268               ttl1 -= 1;
2269               ip1->ttl = ttl1;
2270
2271               /*
2272                * If the ttl drops below 1 when forwarding, generate
2273                * an ICMP response.
2274                */
2275               if (PREDICT_FALSE (ttl1 <= 0))
2276                 {
2277                   error1 = IP4_ERROR_TIME_EXPIRED;
2278                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2279                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2280                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2281                                                0);
2282                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2283                 }
2284
2285               /* Verify checksum. */
2286               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2287                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2288             }
2289           else
2290             {
2291               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2292             }
2293
2294           /* Rewrite packet header and updates lengths. */
2295           adj0 = adj_get (adj_index0);
2296           adj1 = adj_get (adj_index1);
2297
2298           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2299           rw_len0 = adj0[0].rewrite_header.data_bytes;
2300           rw_len1 = adj1[0].rewrite_header.data_bytes;
2301           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2302           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2303
2304           /* Check MTU of outgoing interface. */
2305           ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2306                          adj0[0].rewrite_header.max_l3_packet_bytes,
2307                          ip0->flags_and_fragment_offset &
2308                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2309                          &next0, &error0);
2310           ip4_mtu_check (p1, clib_net_to_host_u16 (ip1->length),
2311                          adj1[0].rewrite_header.max_l3_packet_bytes,
2312                          ip1->flags_and_fragment_offset &
2313                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2314                          &next1, &error1);
2315
2316           if (is_mcast)
2317             {
2318               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2319                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2320                         IP4_ERROR_SAME_INTERFACE : error0);
2321               error1 = ((adj1[0].rewrite_header.sw_if_index ==
2322                          vnet_buffer (p1)->sw_if_index[VLIB_RX]) ?
2323                         IP4_ERROR_SAME_INTERFACE : error1);
2324             }
2325
2326           p0->error = error_node->errors[error0];
2327           p1->error = error_node->errors[error1];
2328           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2329            * to see the IP headerr */
2330           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2331             {
2332               next0 = adj0[0].rewrite_header.next_index;
2333               p0->current_data -= rw_len0;
2334               p0->current_length += rw_len0;
2335               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2336               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2337
2338               if (PREDICT_FALSE
2339                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2340                 vnet_feature_arc_start (lm->output_feature_arc_index,
2341                                         tx_sw_if_index0, &next0, p0);
2342             }
2343           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2344             {
2345               next1 = adj1[0].rewrite_header.next_index;
2346               p1->current_data -= rw_len1;
2347               p1->current_length += rw_len1;
2348
2349               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2350               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2351
2352               if (PREDICT_FALSE
2353                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2354                 vnet_feature_arc_start (lm->output_feature_arc_index,
2355                                         tx_sw_if_index1, &next1, p1);
2356             }
2357
2358           /* Guess we are only writing on simple Ethernet header. */
2359           vnet_rewrite_two_headers (adj0[0], adj1[0],
2360                                     ip0, ip1, sizeof (ethernet_header_t));
2361
2362           /*
2363            * Bump the per-adjacency counters
2364            */
2365           if (do_counters)
2366             {
2367               vlib_increment_combined_counter
2368                 (&adjacency_counters,
2369                  thread_index,
2370                  adj_index0, 1,
2371                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2372
2373               vlib_increment_combined_counter
2374                 (&adjacency_counters,
2375                  thread_index,
2376                  adj_index1, 1,
2377                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2378             }
2379
2380           if (is_midchain)
2381             {
2382               adj0->sub_type.midchain.fixup_func
2383                 (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2384               adj1->sub_type.midchain.fixup_func
2385                 (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
2386             }
2387           if (is_mcast)
2388             {
2389               /*
2390                * copy bytes from the IP address into the MAC rewrite
2391                */
2392               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2393                                           adj0->
2394                                           rewrite_header.dst_mcast_offset,
2395                                           &ip0->dst_address.as_u32,
2396                                           (u8 *) ip0);
2397               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2398                                           adj0->
2399                                           rewrite_header.dst_mcast_offset,
2400                                           &ip1->dst_address.as_u32,
2401                                           (u8 *) ip1);
2402             }
2403
2404           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2405                                            to_next, n_left_to_next,
2406                                            pi0, pi1, next0, next1);
2407         }
2408
2409       while (n_left_from > 0 && n_left_to_next > 0)
2410         {
2411           ip_adjacency_t *adj0;
2412           vlib_buffer_t *p0;
2413           ip4_header_t *ip0;
2414           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2415           u32 tx_sw_if_index0;
2416
2417           pi0 = to_next[0] = from[0];
2418
2419           p0 = vlib_get_buffer (vm, pi0);
2420
2421           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2422
2423           adj0 = adj_get (adj_index0);
2424
2425           ip0 = vlib_buffer_get_current (p0);
2426
2427           error0 = IP4_ERROR_NONE;
2428           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2429
2430           /* Decrement TTL & update checksum. */
2431           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2432             {
2433               i32 ttl0 = ip0->ttl;
2434
2435               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2436
2437               checksum0 += checksum0 >= 0xffff;
2438
2439               ip0->checksum = checksum0;
2440
2441               ASSERT (ip0->ttl > 0);
2442
2443               ttl0 -= 1;
2444
2445               ip0->ttl = ttl0;
2446
2447               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2448                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2449
2450               if (PREDICT_FALSE (ttl0 <= 0))
2451                 {
2452                   /*
2453                    * If the ttl drops below 1 when forwarding, generate
2454                    * an ICMP response.
2455                    */
2456                   error0 = IP4_ERROR_TIME_EXPIRED;
2457                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2458                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2459                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2460                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2461                                                0);
2462                 }
2463             }
2464           else
2465             {
2466               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2467             }
2468
2469           if (do_counters)
2470             vlib_prefetch_combined_counter (&adjacency_counters,
2471                                             thread_index, adj_index0);
2472
2473           /* Guess we are only writing on simple Ethernet header. */
2474           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2475           if (is_mcast)
2476             {
2477               /*
2478                * copy bytes from the IP address into the MAC rewrite
2479                */
2480               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2481                                           adj0->
2482                                           rewrite_header.dst_mcast_offset,
2483                                           &ip0->dst_address.as_u32,
2484                                           (u8 *) ip0);
2485             }
2486
2487           /* Update packet buffer attributes/set output interface. */
2488           rw_len0 = adj0[0].rewrite_header.data_bytes;
2489           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2490
2491           if (do_counters)
2492             vlib_increment_combined_counter
2493               (&adjacency_counters,
2494                thread_index, adj_index0, 1,
2495                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2496
2497           /* Check MTU of outgoing interface. */
2498           ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2499                          adj0[0].rewrite_header.max_l3_packet_bytes,
2500                          ip0->flags_and_fragment_offset &
2501                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2502                          &next0, &error0);
2503
2504           if (is_mcast)
2505             {
2506               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2507                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2508                         IP4_ERROR_SAME_INTERFACE : error0);
2509             }
2510           p0->error = error_node->errors[error0];
2511
2512           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2513            * to see the IP headerr */
2514           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2515             {
2516               p0->current_data -= rw_len0;
2517               p0->current_length += rw_len0;
2518               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2519
2520               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2521               next0 = adj0[0].rewrite_header.next_index;
2522
2523               if (is_midchain)
2524                 {
2525                   adj0->sub_type.midchain.fixup_func
2526                     (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2527                 }
2528
2529               if (PREDICT_FALSE
2530                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2531                 vnet_feature_arc_start (lm->output_feature_arc_index,
2532                                         tx_sw_if_index0, &next0, p0);
2533
2534             }
2535
2536           from += 1;
2537           n_left_from -= 1;
2538           to_next += 1;
2539           n_left_to_next -= 1;
2540
2541           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2542                                            to_next, n_left_to_next,
2543                                            pi0, next0);
2544         }
2545
2546       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2547     }
2548
2549   /* Need to do trace after rewrites to pick up new packet data. */
2550   if (node->flags & VLIB_NODE_FLAG_TRACE)
2551     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2552
2553   return frame->n_vectors;
2554 }
2555
2556
2557 /** @brief IPv4 rewrite node.
2558     @node ip4-rewrite
2559
2560     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2561     header checksum, fetch the ip adjacency, check the outbound mtu,
2562     apply the adjacency rewrite, and send pkts to the adjacency
2563     rewrite header's rewrite_next_index.
2564
2565     @param vm vlib_main_t corresponding to the current thread
2566     @param node vlib_node_runtime_t
2567     @param frame vlib_frame_t whose contents should be dispatched
2568
2569     @par Graph mechanics: buffer metadata, next index usage
2570
2571     @em Uses:
2572     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2573         - the rewrite adjacency index
2574     - <code>adj->lookup_next_index</code>
2575         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2576           the packet will be dropped.
2577     - <code>adj->rewrite_header</code>
2578         - Rewrite string length, rewrite string, next_index
2579
2580     @em Sets:
2581     - <code>b->current_data, b->current_length</code>
2582         - Updated net of applying the rewrite string
2583
2584     <em>Next Indices:</em>
2585     - <code> adj->rewrite_header.next_index </code>
2586       or @c ip4-drop
2587 */
2588 static uword
2589 ip4_rewrite (vlib_main_t * vm,
2590              vlib_node_runtime_t * node, vlib_frame_t * frame)
2591 {
2592   if (adj_are_counters_enabled ())
2593     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2594   else
2595     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2596 }
2597
2598 static uword
2599 ip4_rewrite_bcast (vlib_main_t * vm,
2600                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2601 {
2602   if (adj_are_counters_enabled ())
2603     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2604   else
2605     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2606 }
2607
2608 static uword
2609 ip4_midchain (vlib_main_t * vm,
2610               vlib_node_runtime_t * node, vlib_frame_t * frame)
2611 {
2612   if (adj_are_counters_enabled ())
2613     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2614   else
2615     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2616 }
2617
2618 static uword
2619 ip4_rewrite_mcast (vlib_main_t * vm,
2620                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2621 {
2622   if (adj_are_counters_enabled ())
2623     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2624   else
2625     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2626 }
2627
2628 static uword
2629 ip4_mcast_midchain (vlib_main_t * vm,
2630                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2631 {
2632   if (adj_are_counters_enabled ())
2633     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2634   else
2635     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2636 }
2637
2638 /* *INDENT-OFF* */
2639 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2640   .function = ip4_rewrite,
2641   .name = "ip4-rewrite",
2642   .vector_size = sizeof (u32),
2643
2644   .format_trace = format_ip4_rewrite_trace,
2645
2646   .n_next_nodes = IP4_REWRITE_N_NEXT,
2647   .next_nodes = {
2648     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2649     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2650     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2651   },
2652 };
2653
2654 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2655   .function = ip4_rewrite,
2656   .name = "ip4-rewrite-bcast",
2657   .vector_size = sizeof (u32),
2658
2659   .format_trace = format_ip4_rewrite_trace,
2660   .sibling_of = "ip4-rewrite",
2661 };
2662 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_bcast_node, ip4_rewrite_bcast)
2663
2664 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2665   .function = ip4_rewrite_mcast,
2666   .name = "ip4-rewrite-mcast",
2667   .vector_size = sizeof (u32),
2668
2669   .format_trace = format_ip4_rewrite_trace,
2670   .sibling_of = "ip4-rewrite",
2671 };
2672 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2673
2674 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2675   .function = ip4_mcast_midchain,
2676   .name = "ip4-mcast-midchain",
2677   .vector_size = sizeof (u32),
2678
2679   .format_trace = format_ip4_rewrite_trace,
2680   .sibling_of = "ip4-rewrite",
2681 };
2682 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2683
2684 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2685   .function = ip4_midchain,
2686   .name = "ip4-midchain",
2687   .vector_size = sizeof (u32),
2688   .format_trace = format_ip4_forward_next_trace,
2689   .sibling_of =  "ip4-rewrite",
2690 };
2691 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2692 /* *INDENT-ON */
2693
2694 int
2695 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2696 {
2697   ip4_fib_mtrie_t *mtrie0;
2698   ip4_fib_mtrie_leaf_t leaf0;
2699   u32 lbi0;
2700
2701   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2702
2703   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2704   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2705   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2706
2707   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2708
2709   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2710 }
2711
2712 static clib_error_t *
2713 test_lookup_command_fn (vlib_main_t * vm,
2714                         unformat_input_t * input, vlib_cli_command_t * cmd)
2715 {
2716   ip4_fib_t *fib;
2717   u32 table_id = 0;
2718   f64 count = 1;
2719   u32 n;
2720   int i;
2721   ip4_address_t ip4_base_address;
2722   u64 errors = 0;
2723
2724   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2725     {
2726       if (unformat (input, "table %d", &table_id))
2727         {
2728           /* Make sure the entry exists. */
2729           fib = ip4_fib_get (table_id);
2730           if ((fib) && (fib->index != table_id))
2731             return clib_error_return (0, "<fib-index> %d does not exist",
2732                                       table_id);
2733         }
2734       else if (unformat (input, "count %f", &count))
2735         ;
2736
2737       else if (unformat (input, "%U",
2738                          unformat_ip4_address, &ip4_base_address))
2739         ;
2740       else
2741         return clib_error_return (0, "unknown input `%U'",
2742                                   format_unformat_error, input);
2743     }
2744
2745   n = count;
2746
2747   for (i = 0; i < n; i++)
2748     {
2749       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2750         errors++;
2751
2752       ip4_base_address.as_u32 =
2753         clib_host_to_net_u32 (1 +
2754                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2755     }
2756
2757   if (errors)
2758     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2759   else
2760     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2761
2762   return 0;
2763 }
2764
2765 /*?
2766  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2767  * given FIB table to determine if there is a conflict with the
2768  * adjacency table. The fib-id can be determined by using the
2769  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2770  * of 0 is used.
2771  *
2772  * @todo This command uses fib-id, other commands use table-id (not
2773  * just a name, they are different indexes). Would like to change this
2774  * to table-id for consistency.
2775  *
2776  * @cliexpar
2777  * Example of how to run the test lookup command:
2778  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2779  * No errors in 2 lookups
2780  * @cliexend
2781 ?*/
2782 /* *INDENT-OFF* */
2783 VLIB_CLI_COMMAND (lookup_test_command, static) =
2784 {
2785   .path = "test lookup",
2786   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2787   .function = test_lookup_command_fn,
2788 };
2789 /* *INDENT-ON* */
2790
2791 int
2792 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2793 {
2794   u32 fib_index;
2795
2796   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2797
2798   if (~0 == fib_index)
2799     return VNET_API_ERROR_NO_SUCH_FIB;
2800
2801   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2802                                   flow_hash_config);
2803
2804   return 0;
2805 }
2806
2807 static clib_error_t *
2808 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2809                              unformat_input_t * input,
2810                              vlib_cli_command_t * cmd)
2811 {
2812   int matched = 0;
2813   u32 table_id = 0;
2814   u32 flow_hash_config = 0;
2815   int rv;
2816
2817   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2818     {
2819       if (unformat (input, "table %d", &table_id))
2820         matched = 1;
2821 #define _(a,v) \
2822     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2823       foreach_flow_hash_bit
2824 #undef _
2825         else
2826         break;
2827     }
2828
2829   if (matched == 0)
2830     return clib_error_return (0, "unknown input `%U'",
2831                               format_unformat_error, input);
2832
2833   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2834   switch (rv)
2835     {
2836     case 0:
2837       break;
2838
2839     case VNET_API_ERROR_NO_SUCH_FIB:
2840       return clib_error_return (0, "no such FIB table %d", table_id);
2841
2842     default:
2843       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2844       break;
2845     }
2846
2847   return 0;
2848 }
2849
2850 /*?
2851  * Configure the set of IPv4 fields used by the flow hash.
2852  *
2853  * @cliexpar
2854  * Example of how to set the flow hash on a given table:
2855  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2856  * Example of display the configured flow hash:
2857  * @cliexstart{show ip fib}
2858  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2859  * 0.0.0.0/0
2860  *   unicast-ip4-chain
2861  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2862  *     [0] [@0]: dpo-drop ip6
2863  * 0.0.0.0/32
2864  *   unicast-ip4-chain
2865  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2866  *     [0] [@0]: dpo-drop ip6
2867  * 224.0.0.0/8
2868  *   unicast-ip4-chain
2869  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2870  *     [0] [@0]: dpo-drop ip6
2871  * 6.0.1.2/32
2872  *   unicast-ip4-chain
2873  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2874  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2875  * 7.0.0.1/32
2876  *   unicast-ip4-chain
2877  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2878  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2879  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2880  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2881  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2882  * 240.0.0.0/8
2883  *   unicast-ip4-chain
2884  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2885  *     [0] [@0]: dpo-drop ip6
2886  * 255.255.255.255/32
2887  *   unicast-ip4-chain
2888  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2889  *     [0] [@0]: dpo-drop ip6
2890  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2891  * 0.0.0.0/0
2892  *   unicast-ip4-chain
2893  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2894  *     [0] [@0]: dpo-drop ip6
2895  * 0.0.0.0/32
2896  *   unicast-ip4-chain
2897  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2898  *     [0] [@0]: dpo-drop ip6
2899  * 172.16.1.0/24
2900  *   unicast-ip4-chain
2901  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2902  *     [0] [@4]: ipv4-glean: af_packet0
2903  * 172.16.1.1/32
2904  *   unicast-ip4-chain
2905  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2906  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2907  * 172.16.1.2/32
2908  *   unicast-ip4-chain
2909  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2910  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2911  * 172.16.2.0/24
2912  *   unicast-ip4-chain
2913  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2914  *     [0] [@4]: ipv4-glean: af_packet1
2915  * 172.16.2.1/32
2916  *   unicast-ip4-chain
2917  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2918  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2919  * 224.0.0.0/8
2920  *   unicast-ip4-chain
2921  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2922  *     [0] [@0]: dpo-drop ip6
2923  * 240.0.0.0/8
2924  *   unicast-ip4-chain
2925  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2926  *     [0] [@0]: dpo-drop ip6
2927  * 255.255.255.255/32
2928  *   unicast-ip4-chain
2929  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2930  *     [0] [@0]: dpo-drop ip6
2931  * @cliexend
2932 ?*/
2933 /* *INDENT-OFF* */
2934 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2935 {
2936   .path = "set ip flow-hash",
2937   .short_help =
2938   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2939   .function = set_ip_flow_hash_command_fn,
2940 };
2941 /* *INDENT-ON* */
2942
2943 int
2944 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2945                              u32 table_index)
2946 {
2947   vnet_main_t *vnm = vnet_get_main ();
2948   vnet_interface_main_t *im = &vnm->interface_main;
2949   ip4_main_t *ipm = &ip4_main;
2950   ip_lookup_main_t *lm = &ipm->lookup_main;
2951   vnet_classify_main_t *cm = &vnet_classify_main;
2952   ip4_address_t *if_addr;
2953
2954   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2955     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2956
2957   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2958     return VNET_API_ERROR_NO_SUCH_ENTRY;
2959
2960   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2961   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2962
2963   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2964
2965   if (NULL != if_addr)
2966     {
2967       fib_prefix_t pfx = {
2968         .fp_len = 32,
2969         .fp_proto = FIB_PROTOCOL_IP4,
2970         .fp_addr.ip4 = *if_addr,
2971       };
2972       u32 fib_index;
2973
2974       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2975                                                        sw_if_index);
2976
2977
2978       if (table_index != (u32) ~ 0)
2979         {
2980           dpo_id_t dpo = DPO_INVALID;
2981
2982           dpo_set (&dpo,
2983                    DPO_CLASSIFY,
2984                    DPO_PROTO_IP4,
2985                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2986
2987           fib_table_entry_special_dpo_add (fib_index,
2988                                            &pfx,
2989                                            FIB_SOURCE_CLASSIFY,
2990                                            FIB_ENTRY_FLAG_NONE, &dpo);
2991           dpo_reset (&dpo);
2992         }
2993       else
2994         {
2995           fib_table_entry_special_remove (fib_index,
2996                                           &pfx, FIB_SOURCE_CLASSIFY);
2997         }
2998     }
2999
3000   return 0;
3001 }
3002
3003 static clib_error_t *
3004 set_ip_classify_command_fn (vlib_main_t * vm,
3005                             unformat_input_t * input,
3006                             vlib_cli_command_t * cmd)
3007 {
3008   u32 table_index = ~0;
3009   int table_index_set = 0;
3010   u32 sw_if_index = ~0;
3011   int rv;
3012
3013   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3014     {
3015       if (unformat (input, "table-index %d", &table_index))
3016         table_index_set = 1;
3017       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3018                          vnet_get_main (), &sw_if_index))
3019         ;
3020       else
3021         break;
3022     }
3023
3024   if (table_index_set == 0)
3025     return clib_error_return (0, "classify table-index must be specified");
3026
3027   if (sw_if_index == ~0)
3028     return clib_error_return (0, "interface / subif must be specified");
3029
3030   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3031
3032   switch (rv)
3033     {
3034     case 0:
3035       break;
3036
3037     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3038       return clib_error_return (0, "No such interface");
3039
3040     case VNET_API_ERROR_NO_SUCH_ENTRY:
3041       return clib_error_return (0, "No such classifier table");
3042     }
3043   return 0;
3044 }
3045
3046 /*?
3047  * Assign a classification table to an interface. The classification
3048  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3049  * commands. Once the table is create, use this command to filter packets
3050  * on an interface.
3051  *
3052  * @cliexpar
3053  * Example of how to assign a classification table to an interface:
3054  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3055 ?*/
3056 /* *INDENT-OFF* */
3057 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3058 {
3059     .path = "set ip classify",
3060     .short_help =
3061     "set ip classify intfc <interface> table-index <classify-idx>",
3062     .function = set_ip_classify_command_fn,
3063 };
3064 /* *INDENT-ON* */
3065
3066 static clib_error_t *
3067 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3068 {
3069   ip4_main_t *im = &ip4_main;
3070   uword heapsize = 0;
3071
3072   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3073     {
3074       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3075         ;
3076       else
3077         return clib_error_return (0,
3078                                   "invalid heap-size parameter `%U'",
3079                                   format_unformat_error, input);
3080     }
3081
3082   im->mtrie_heap_size = heapsize;
3083
3084   return 0;
3085 }
3086
3087 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3088
3089 /*
3090  * fd.io coding-style-patch-verification: ON
3091  *
3092  * Local Variables:
3093  * eval: (c-set-style "gnu")
3094  * End:
3095  */