VPP-1392: VXLAN fails with IP fragmentation
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ip/ip_frag.h>
43 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
44 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
45 #include <vnet/ppp/ppp.h>
46 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
47 #include <vnet/api_errno.h>     /* for API error numbers */
48 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
50 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
51 #include <vnet/fib/ip4_fib.h>
52 #include <vnet/dpo/load_balance.h>
53 #include <vnet/dpo/load_balance_map.h>
54 #include <vnet/dpo/classify_dpo.h>
55 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
56
57 #include <vnet/ip/ip4_forward.h>
58
59 /** @brief IPv4 lookup node.
60     @node ip4-lookup
61
62     This is the main IPv4 lookup dispatch node.
63
64     @param vm vlib_main_t corresponding to the current thread
65     @param node vlib_node_runtime_t
66     @param frame vlib_frame_t whose contents should be dispatched
67
68     @par Graph mechanics: buffer metadata, next index usage
69
70     @em Uses:
71     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
72         - Indicates the @c sw_if_index value of the interface that the
73           packet was received on.
74     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
75         - When the value is @c ~0 then the node performs a longest prefix
76           match (LPM) for the packet destination address in the FIB attached
77           to the receive interface.
78         - Otherwise perform LPM for the packet destination address in the
79           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
80           value (0, 1, ...) and not a VRF id.
81
82     @em Sets:
83     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
84         - The lookup result adjacency index.
85
86     <em>Next Index:</em>
87     - Dispatches the packet to the node index found in
88       ip_adjacency_t @c adj->lookup_next_index
89       (where @c adj is the lookup result adjacency).
90 */
91 VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
92                                 vlib_frame_t * frame)
93 {
94   return ip4_lookup_inline (vm, node, frame,
95                             /* lookup_for_responses_to_locally_received_packets */
96                             0);
97
98 }
99
100 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
101
102 /* *INDENT-OFF* */
103 VLIB_REGISTER_NODE (ip4_lookup_node) =
104 {
105   .name = "ip4-lookup",
106   .vector_size = sizeof (u32),
107   .format_trace = format_ip4_lookup_trace,
108   .n_next_nodes = IP_LOOKUP_N_NEXT,
109   .next_nodes = IP4_LOOKUP_NEXT_NODES,
110 };
111 /* *INDENT-ON* */
112
113 VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
114                                       vlib_node_runtime_t * node,
115                                       vlib_frame_t * frame)
116 {
117   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
118   u32 n_left_from, n_left_to_next, *from, *to_next;
119   ip_lookup_next_t next;
120   u32 thread_index = vm->thread_index;
121
122   from = vlib_frame_vector_args (frame);
123   n_left_from = frame->n_vectors;
124   next = node->cached_next_index;
125
126   if (node->flags & VLIB_NODE_FLAG_TRACE)
127     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
128
129   while (n_left_from > 0)
130     {
131       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
132
133
134       while (n_left_from >= 4 && n_left_to_next >= 2)
135         {
136           ip_lookup_next_t next0, next1;
137           const load_balance_t *lb0, *lb1;
138           vlib_buffer_t *p0, *p1;
139           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
140           const ip4_header_t *ip0, *ip1;
141           const dpo_id_t *dpo0, *dpo1;
142
143           /* Prefetch next iteration. */
144           {
145             vlib_buffer_t *p2, *p3;
146
147             p2 = vlib_get_buffer (vm, from[2]);
148             p3 = vlib_get_buffer (vm, from[3]);
149
150             vlib_prefetch_buffer_header (p2, STORE);
151             vlib_prefetch_buffer_header (p3, STORE);
152
153             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
154             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
155           }
156
157           pi0 = to_next[0] = from[0];
158           pi1 = to_next[1] = from[1];
159
160           from += 2;
161           n_left_from -= 2;
162           to_next += 2;
163           n_left_to_next -= 2;
164
165           p0 = vlib_get_buffer (vm, pi0);
166           p1 = vlib_get_buffer (vm, pi1);
167
168           ip0 = vlib_buffer_get_current (p0);
169           ip1 = vlib_buffer_get_current (p1);
170           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
171           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
172
173           lb0 = load_balance_get (lbi0);
174           lb1 = load_balance_get (lbi1);
175
176           /*
177            * this node is for via FIBs we can re-use the hash value from the
178            * to node if present.
179            * We don't want to use the same hash value at each level in the recursion
180            * graph as that would lead to polarisation
181            */
182           hc0 = hc1 = 0;
183
184           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
185             {
186               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
187                 {
188                   hc0 = vnet_buffer (p0)->ip.flow_hash =
189                     vnet_buffer (p0)->ip.flow_hash >> 1;
190                 }
191               else
192                 {
193                   hc0 = vnet_buffer (p0)->ip.flow_hash =
194                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
195                 }
196               dpo0 = load_balance_get_fwd_bucket
197                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
198             }
199           else
200             {
201               dpo0 = load_balance_get_bucket_i (lb0, 0);
202             }
203           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
204             {
205               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
206                 {
207                   hc1 = vnet_buffer (p1)->ip.flow_hash =
208                     vnet_buffer (p1)->ip.flow_hash >> 1;
209                 }
210               else
211                 {
212                   hc1 = vnet_buffer (p1)->ip.flow_hash =
213                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
214                 }
215               dpo1 = load_balance_get_fwd_bucket
216                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
217             }
218           else
219             {
220               dpo1 = load_balance_get_bucket_i (lb1, 0);
221             }
222
223           next0 = dpo0->dpoi_next_node;
224           next1 = dpo1->dpoi_next_node;
225
226           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
227           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
228
229           vlib_increment_combined_counter
230             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
231           vlib_increment_combined_counter
232             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
233
234           vlib_validate_buffer_enqueue_x2 (vm, node, next,
235                                            to_next, n_left_to_next,
236                                            pi0, pi1, next0, next1);
237         }
238
239       while (n_left_from > 0 && n_left_to_next > 0)
240         {
241           ip_lookup_next_t next0;
242           const load_balance_t *lb0;
243           vlib_buffer_t *p0;
244           u32 pi0, lbi0, hc0;
245           const ip4_header_t *ip0;
246           const dpo_id_t *dpo0;
247
248           pi0 = from[0];
249           to_next[0] = pi0;
250           from += 1;
251           to_next += 1;
252           n_left_to_next -= 1;
253           n_left_from -= 1;
254
255           p0 = vlib_get_buffer (vm, pi0);
256
257           ip0 = vlib_buffer_get_current (p0);
258           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
259
260           lb0 = load_balance_get (lbi0);
261
262           hc0 = 0;
263           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
264             {
265               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
266                 {
267                   hc0 = vnet_buffer (p0)->ip.flow_hash =
268                     vnet_buffer (p0)->ip.flow_hash >> 1;
269                 }
270               else
271                 {
272                   hc0 = vnet_buffer (p0)->ip.flow_hash =
273                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
274                 }
275               dpo0 = load_balance_get_fwd_bucket
276                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
277             }
278           else
279             {
280               dpo0 = load_balance_get_bucket_i (lb0, 0);
281             }
282
283           next0 = dpo0->dpoi_next_node;
284           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
285
286           vlib_increment_combined_counter
287             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
288
289           vlib_validate_buffer_enqueue_x1 (vm, node, next,
290                                            to_next, n_left_to_next,
291                                            pi0, next0);
292         }
293
294       vlib_put_next_frame (vm, node, next, n_left_to_next);
295     }
296
297   return frame->n_vectors;
298 }
299
300 /* *INDENT-OFF* */
301 VLIB_REGISTER_NODE (ip4_load_balance_node) =
302 {
303   .name = "ip4-load-balance",
304   .vector_size = sizeof (u32),
305   .sibling_of = "ip4-lookup",
306   .format_trace = format_ip4_lookup_trace,
307 };
308 /* *INDENT-ON* */
309
310 #ifndef CLIB_MARCH_VARIANT
311 /* get first interface address */
312 ip4_address_t *
313 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
314                              ip_interface_address_t ** result_ia)
315 {
316   ip_lookup_main_t *lm = &im->lookup_main;
317   ip_interface_address_t *ia = 0;
318   ip4_address_t *result = 0;
319
320   /* *INDENT-OFF* */
321   foreach_ip_interface_address
322     (lm, ia, sw_if_index,
323      1 /* honor unnumbered */ ,
324      ({
325        ip4_address_t * a =
326          ip_interface_address_get_address (lm, ia);
327        result = a;
328        break;
329      }));
330   /* *INDENT-OFF* */
331   if (result_ia)
332     *result_ia = result ? ia : 0;
333   return result;
334 }
335
336 static void
337 ip4_add_subnet_bcast_route (u32 fib_index,
338                             fib_prefix_t *pfx,
339                             u32 sw_if_index)
340 {
341   vnet_sw_interface_flags_t iflags;
342
343   iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index);
344
345   fib_table_entry_special_remove(fib_index,
346                                  pfx,
347                                  FIB_SOURCE_INTERFACE);
348
349   if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST)
350     {
351       fib_table_entry_update_one_path (fib_index, pfx,
352                                        FIB_SOURCE_INTERFACE,
353                                        FIB_ENTRY_FLAG_NONE,
354                                        DPO_PROTO_IP4,
355                                        /* No next-hop address */
356                                        &ADJ_BCAST_ADDR,
357                                        sw_if_index,
358                                        // invalid FIB index
359                                        ~0,
360                                        1,
361                                        // no out-label stack
362                                        NULL,
363                                        FIB_ROUTE_PATH_FLAG_NONE);
364     }
365   else
366     {
367         fib_table_entry_special_add(fib_index,
368                                     pfx,
369                                     FIB_SOURCE_INTERFACE,
370                                     (FIB_ENTRY_FLAG_DROP |
371                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
372     }
373 }
374
375 static void
376 ip4_add_interface_routes (u32 sw_if_index,
377                           ip4_main_t * im, u32 fib_index,
378                           ip_interface_address_t * a)
379 {
380   ip_lookup_main_t *lm = &im->lookup_main;
381   ip4_address_t *address = ip_interface_address_get_address (lm, a);
382   fib_prefix_t pfx = {
383     .fp_len = a->address_length,
384     .fp_proto = FIB_PROTOCOL_IP4,
385     .fp_addr.ip4 = *address,
386   };
387
388   if (pfx.fp_len <= 30)
389     {
390       /* a /30 or shorter - add a glean for the network address */
391       fib_table_entry_update_one_path (fib_index, &pfx,
392                                        FIB_SOURCE_INTERFACE,
393                                        (FIB_ENTRY_FLAG_CONNECTED |
394                                         FIB_ENTRY_FLAG_ATTACHED),
395                                        DPO_PROTO_IP4,
396                                        /* No next-hop address */
397                                        NULL,
398                                        sw_if_index,
399                                        // invalid FIB index
400                                        ~0,
401                                        1,
402                                        // no out-label stack
403                                        NULL,
404                                        FIB_ROUTE_PATH_FLAG_NONE);
405
406       /* Add the two broadcast addresses as drop */
407       fib_prefix_t net_pfx = {
408         .fp_len = 32,
409         .fp_proto = FIB_PROTOCOL_IP4,
410         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
411       };
412       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
413         fib_table_entry_special_add(fib_index,
414                                     &net_pfx,
415                                     FIB_SOURCE_INTERFACE,
416                                     (FIB_ENTRY_FLAG_DROP |
417                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
418       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
419       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
420         ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index);
421     }
422   else if (pfx.fp_len == 31)
423     {
424       u32 mask = clib_host_to_net_u32(1);
425       fib_prefix_t net_pfx = pfx;
426
427       net_pfx.fp_len = 32;
428       net_pfx.fp_addr.ip4.as_u32 ^= mask;
429
430       /* a /31 - add the other end as an attached host */
431       fib_table_entry_update_one_path (fib_index, &net_pfx,
432                                        FIB_SOURCE_INTERFACE,
433                                        (FIB_ENTRY_FLAG_ATTACHED),
434                                        DPO_PROTO_IP4,
435                                        &net_pfx.fp_addr,
436                                        sw_if_index,
437                                        // invalid FIB index
438                                        ~0,
439                                        1,
440                                        NULL,
441                                        FIB_ROUTE_PATH_FLAG_NONE);
442     }
443   pfx.fp_len = 32;
444
445   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
446     {
447       u32 classify_table_index =
448         lm->classify_table_index_by_sw_if_index[sw_if_index];
449       if (classify_table_index != (u32) ~ 0)
450         {
451           dpo_id_t dpo = DPO_INVALID;
452
453           dpo_set (&dpo,
454                    DPO_CLASSIFY,
455                    DPO_PROTO_IP4,
456                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
457
458           fib_table_entry_special_dpo_add (fib_index,
459                                            &pfx,
460                                            FIB_SOURCE_CLASSIFY,
461                                            FIB_ENTRY_FLAG_NONE, &dpo);
462           dpo_reset (&dpo);
463         }
464     }
465
466   fib_table_entry_update_one_path (fib_index, &pfx,
467                                    FIB_SOURCE_INTERFACE,
468                                    (FIB_ENTRY_FLAG_CONNECTED |
469                                     FIB_ENTRY_FLAG_LOCAL),
470                                    DPO_PROTO_IP4,
471                                    &pfx.fp_addr,
472                                    sw_if_index,
473                                    // invalid FIB index
474                                    ~0,
475                                    1, NULL,
476                                    FIB_ROUTE_PATH_FLAG_NONE);
477 }
478
479 static void
480 ip4_del_interface_routes (ip4_main_t * im,
481                           u32 fib_index,
482                           ip4_address_t * address, u32 address_length)
483 {
484   fib_prefix_t pfx = {
485     .fp_len = address_length,
486     .fp_proto = FIB_PROTOCOL_IP4,
487     .fp_addr.ip4 = *address,
488   };
489
490   if (pfx.fp_len <= 30)
491     {
492       fib_prefix_t net_pfx = {
493         .fp_len = 32,
494         .fp_proto = FIB_PROTOCOL_IP4,
495         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
496       };
497       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
498         fib_table_entry_special_remove(fib_index,
499                                        &net_pfx,
500                                        FIB_SOURCE_INTERFACE);
501       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
502       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
503         fib_table_entry_special_remove(fib_index,
504                                        &net_pfx,
505                                        FIB_SOURCE_INTERFACE);
506       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
507     }
508     else if (pfx.fp_len == 31)
509     {
510       u32 mask = clib_host_to_net_u32(1);
511       fib_prefix_t net_pfx = pfx;
512
513       net_pfx.fp_len = 32;
514       net_pfx.fp_addr.ip4.as_u32 ^= mask;
515
516       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
517     }
518
519   pfx.fp_len = 32;
520   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
521 }
522
523 void
524 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
525 {
526   ip4_main_t *im = &ip4_main;
527
528   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
529
530   /*
531    * enable/disable only on the 1<->0 transition
532    */
533   if (is_enable)
534     {
535       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
536         return;
537     }
538   else
539     {
540       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
541       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
542         return;
543     }
544   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
545                                !is_enable, 0, 0);
546
547
548   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
549                                sw_if_index, !is_enable, 0, 0);
550 }
551
552 static clib_error_t *
553 ip4_add_del_interface_address_internal (vlib_main_t * vm,
554                                         u32 sw_if_index,
555                                         ip4_address_t * address,
556                                         u32 address_length, u32 is_del)
557 {
558   vnet_main_t *vnm = vnet_get_main ();
559   ip4_main_t *im = &ip4_main;
560   ip_lookup_main_t *lm = &im->lookup_main;
561   clib_error_t *error = 0;
562   u32 if_address_index, elts_before;
563   ip4_address_fib_t ip4_af, *addr_fib = 0;
564
565   /* local0 interface doesn't support IP addressing  */
566   if (sw_if_index == 0)
567     {
568       return
569        clib_error_create ("local0 interface doesn't support IP addressing");
570     }
571
572   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
573   ip4_addr_fib_init (&ip4_af, address,
574                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
575   vec_add1 (addr_fib, ip4_af);
576
577   /*
578    * there is no support for adj-fib handling in the presence of overlapping
579    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
580    * most routers do.
581    */
582   /* *INDENT-OFF* */
583   if (!is_del)
584     {
585       /* When adding an address check that it does not conflict
586          with an existing address on any interface in this table. */
587       ip_interface_address_t *ia;
588       vnet_sw_interface_t *sif;
589
590       pool_foreach(sif, vnm->interface_main.sw_interfaces,
591       ({
592           if (im->fib_index_by_sw_if_index[sw_if_index] ==
593               im->fib_index_by_sw_if_index[sif->sw_if_index])
594             {
595               foreach_ip_interface_address
596                 (&im->lookup_main, ia, sif->sw_if_index,
597                  0 /* honor unnumbered */ ,
598                  ({
599                    ip4_address_t * x =
600                      ip_interface_address_get_address
601                      (&im->lookup_main, ia);
602                    if (ip4_destination_matches_route
603                        (im, address, x, ia->address_length) ||
604                        ip4_destination_matches_route (im,
605                                                       x,
606                                                       address,
607                                                       address_length))
608                      {
609                        vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
610
611                        return
612                          clib_error_create
613                          ("failed to add %U which conflicts with %U for interface %U",
614                           format_ip4_address_and_length, address,
615                           address_length,
616                           format_ip4_address_and_length, x,
617                           ia->address_length,
618                           format_vnet_sw_if_index_name, vnm,
619                           sif->sw_if_index);
620                      }
621                  }));
622             }
623       }));
624     }
625   /* *INDENT-ON* */
626
627   elts_before = pool_elts (lm->if_address_pool);
628
629   error = ip_interface_address_add_del
630     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
631   if (error)
632     goto done;
633
634   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
635
636   if (is_del)
637     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
638   else
639     ip4_add_interface_routes (sw_if_index,
640                               im, ip4_af.fib_index,
641                               pool_elt_at_index
642                               (lm->if_address_pool, if_address_index));
643
644   /* If pool did not grow/shrink: add duplicate address. */
645   if (elts_before != pool_elts (lm->if_address_pool))
646     {
647       ip4_add_del_interface_address_callback_t *cb;
648       vec_foreach (cb, im->add_del_interface_address_callbacks)
649         cb->function (im, cb->function_opaque, sw_if_index,
650                       address, address_length, if_address_index, is_del);
651     }
652
653 done:
654   vec_free (addr_fib);
655   return error;
656 }
657
658 clib_error_t *
659 ip4_add_del_interface_address (vlib_main_t * vm,
660                                u32 sw_if_index,
661                                ip4_address_t * address,
662                                u32 address_length, u32 is_del)
663 {
664   return ip4_add_del_interface_address_internal
665     (vm, sw_if_index, address, address_length, is_del);
666 }
667
668 void
669 ip4_directed_broadcast (u32 sw_if_index, u8 enable)
670 {
671   ip_interface_address_t *ia;
672   ip4_main_t *im;
673
674   im = &ip4_main;
675
676   /*
677    * when directed broadcast is enabled, the subnet braodcast route will forward
678    * packets using an adjacency with a broadcast MAC. otherwise it drops
679    */
680   /* *INDENT-OFF* */
681   foreach_ip_interface_address(&im->lookup_main, ia,
682                                sw_if_index, 0,
683      ({
684        if (ia->address_length <= 30)
685          {
686            ip4_address_t *ipa;
687
688            ipa = ip_interface_address_get_address (&im->lookup_main, ia);
689
690            fib_prefix_t pfx = {
691              .fp_len = 32,
692              .fp_proto = FIB_PROTOCOL_IP4,
693              .fp_addr = {
694                .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]),
695              },
696            };
697
698            ip4_add_subnet_bcast_route
699              (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
700                                                   sw_if_index),
701               &pfx, sw_if_index);
702          }
703      }));
704   /* *INDENT-ON* */
705 }
706 #endif
707
708 /* Built-in ip4 unicast rx feature path definition */
709 /* *INDENT-OFF* */
710 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
711 {
712   .arc_name = "ip4-unicast",
713   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
714   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
715 };
716
717 VNET_FEATURE_INIT (ip4_flow_classify, static) =
718 {
719   .arc_name = "ip4-unicast",
720   .node_name = "ip4-flow-classify",
721   .runs_before = VNET_FEATURES ("ip4-inacl"),
722 };
723
724 VNET_FEATURE_INIT (ip4_inacl, static) =
725 {
726   .arc_name = "ip4-unicast",
727   .node_name = "ip4-inacl",
728   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
729 };
730
731 VNET_FEATURE_INIT (ip4_source_check_1, static) =
732 {
733   .arc_name = "ip4-unicast",
734   .node_name = "ip4-source-check-via-rx",
735   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
736 };
737
738 VNET_FEATURE_INIT (ip4_source_check_2, static) =
739 {
740   .arc_name = "ip4-unicast",
741   .node_name = "ip4-source-check-via-any",
742   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
743 };
744
745 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
746 {
747   .arc_name = "ip4-unicast",
748   .node_name = "ip4-source-and-port-range-check-rx",
749   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
750 };
751
752 VNET_FEATURE_INIT (ip4_policer_classify, static) =
753 {
754   .arc_name = "ip4-unicast",
755   .node_name = "ip4-policer-classify",
756   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
757 };
758
759 VNET_FEATURE_INIT (ip4_ipsec, static) =
760 {
761   .arc_name = "ip4-unicast",
762   .node_name = "ipsec-input-ip4",
763   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
764 };
765
766 VNET_FEATURE_INIT (ip4_vpath, static) =
767 {
768   .arc_name = "ip4-unicast",
769   .node_name = "vpath-input-ip4",
770   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
771 };
772
773 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
774 {
775   .arc_name = "ip4-unicast",
776   .node_name = "ip4-vxlan-bypass",
777   .runs_before = VNET_FEATURES ("ip4-lookup"),
778 };
779
780 VNET_FEATURE_INIT (ip4_not_enabled, static) =
781 {
782   .arc_name = "ip4-unicast",
783   .node_name = "ip4-not-enabled",
784   .runs_before = VNET_FEATURES ("ip4-lookup"),
785 };
786
787 VNET_FEATURE_INIT (ip4_lookup, static) =
788 {
789   .arc_name = "ip4-unicast",
790   .node_name = "ip4-lookup",
791   .runs_before = 0,     /* not before any other features */
792 };
793
794 /* Built-in ip4 multicast rx feature path definition */
795 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
796 {
797   .arc_name = "ip4-multicast",
798   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
799   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
800 };
801
802 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
803 {
804   .arc_name = "ip4-multicast",
805   .node_name = "vpath-input-ip4",
806   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
807 };
808
809 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
810 {
811   .arc_name = "ip4-multicast",
812   .node_name = "ip4-not-enabled",
813   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
814 };
815
816 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
817 {
818   .arc_name = "ip4-multicast",
819   .node_name = "ip4-mfib-forward-lookup",
820   .runs_before = 0,     /* last feature */
821 };
822
823 /* Source and port-range check ip4 tx feature path definition */
824 VNET_FEATURE_ARC_INIT (ip4_output, static) =
825 {
826   .arc_name = "ip4-output",
827   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
828   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
829 };
830
831 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
832 {
833   .arc_name = "ip4-output",
834   .node_name = "ip4-source-and-port-range-check-tx",
835   .runs_before = VNET_FEATURES ("ip4-outacl"),
836 };
837
838 VNET_FEATURE_INIT (ip4_outacl, static) =
839 {
840   .arc_name = "ip4-output",
841   .node_name = "ip4-outacl",
842   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
843 };
844
845 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
846 {
847   .arc_name = "ip4-output",
848   .node_name = "ipsec-output-ip4",
849   .runs_before = VNET_FEATURES ("interface-output"),
850 };
851
852 /* Built-in ip4 tx feature path definition */
853 VNET_FEATURE_INIT (ip4_interface_output, static) =
854 {
855   .arc_name = "ip4-output",
856   .node_name = "interface-output",
857   .runs_before = 0,     /* not before any other features */
858 };
859 /* *INDENT-ON* */
860
861 static clib_error_t *
862 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
863 {
864   ip4_main_t *im = &ip4_main;
865
866   /* Fill in lookup tables with default table (0). */
867   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
868   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
869
870   if (!is_add)
871     {
872       ip4_main_t *im4 = &ip4_main;
873       ip_lookup_main_t *lm4 = &im4->lookup_main;
874       ip_interface_address_t *ia = 0;
875       ip4_address_t *address;
876       vlib_main_t *vm = vlib_get_main ();
877
878       vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
879       /* *INDENT-OFF* */
880       foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
881       ({
882         address = ip_interface_address_get_address (lm4, ia);
883         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
884       }));
885       /* *INDENT-ON* */
886     }
887
888   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
889                                is_add, 0, 0);
890
891   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
892                                sw_if_index, is_add, 0, 0);
893
894   return /* no error */ 0;
895 }
896
897 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
898
899 /* Global IP4 main. */
900 ip4_main_t ip4_main;
901
902 static clib_error_t *
903 ip4_lookup_init (vlib_main_t * vm)
904 {
905   ip4_main_t *im = &ip4_main;
906   clib_error_t *error;
907   uword i;
908
909   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
910     return error;
911   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
912     return (error);
913   if ((error = vlib_call_init_function (vm, fib_module_init)))
914     return error;
915   if ((error = vlib_call_init_function (vm, mfib_module_init)))
916     return error;
917
918   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
919     {
920       u32 m;
921
922       if (i < 32)
923         m = pow2_mask (i) << (32 - i);
924       else
925         m = ~0;
926       im->fib_masks[i] = clib_host_to_net_u32 (m);
927     }
928
929   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
930
931   /* Create FIB with index 0 and table id of 0. */
932   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
933                                      FIB_SOURCE_DEFAULT_ROUTE);
934   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
935                                       MFIB_SOURCE_DEFAULT_ROUTE);
936
937   {
938     pg_node_t *pn;
939     pn = pg_get_node (ip4_lookup_node.index);
940     pn->unformat_edit = unformat_pg_ip4_header;
941   }
942
943   {
944     ethernet_arp_header_t h;
945
946     memset (&h, 0, sizeof (h));
947
948     /* Set target ethernet address to all zeros. */
949     memset (h.ip4_over_ethernet[1].ethernet, 0,
950             sizeof (h.ip4_over_ethernet[1].ethernet));
951
952 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
953 #define _8(f,v) h.f = v;
954     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
955     _16 (l3_type, ETHERNET_TYPE_IP4);
956     _8 (n_l2_address_bytes, 6);
957     _8 (n_l3_address_bytes, 4);
958     _16 (opcode, ETHERNET_ARP_OPCODE_request);
959 #undef _16
960 #undef _8
961
962     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
963                                /* data */ &h,
964                                sizeof (h),
965                                /* alloc chunk size */ 8,
966                                "ip4 arp");
967   }
968
969   return error;
970 }
971
972 VLIB_INIT_FUNCTION (ip4_lookup_init);
973
974 typedef struct
975 {
976   /* Adjacency taken. */
977   u32 dpo_index;
978   u32 flow_hash;
979   u32 fib_index;
980
981   /* Packet data, possibly *after* rewrite. */
982   u8 packet_data[64 - 1 * sizeof (u32)];
983 }
984 ip4_forward_next_trace_t;
985
986 #ifndef CLIB_MARCH_VARIANT
987 u8 *
988 format_ip4_forward_next_trace (u8 * s, va_list * args)
989 {
990   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
991   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
992   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
993   u32 indent = format_get_indent (s);
994   s = format (s, "%U%U",
995               format_white_space, indent,
996               format_ip4_header, t->packet_data, sizeof (t->packet_data));
997   return s;
998 }
999 #endif
1000
1001 static u8 *
1002 format_ip4_lookup_trace (u8 * s, va_list * args)
1003 {
1004   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1005   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1006   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1007   u32 indent = format_get_indent (s);
1008
1009   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1010               t->fib_index, t->dpo_index, t->flow_hash);
1011   s = format (s, "\n%U%U",
1012               format_white_space, indent,
1013               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1014   return s;
1015 }
1016
1017 static u8 *
1018 format_ip4_rewrite_trace (u8 * s, va_list * args)
1019 {
1020   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1021   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1022   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1023   u32 indent = format_get_indent (s);
1024
1025   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1026               t->fib_index, t->dpo_index, format_ip_adjacency,
1027               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1028   s = format (s, "\n%U%U",
1029               format_white_space, indent,
1030               format_ip_adjacency_packet_data,
1031               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1032   return s;
1033 }
1034
1035 #ifndef CLIB_MARCH_VARIANT
1036 /* Common trace function for all ip4-forward next nodes. */
1037 void
1038 ip4_forward_next_trace (vlib_main_t * vm,
1039                         vlib_node_runtime_t * node,
1040                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1041 {
1042   u32 *from, n_left;
1043   ip4_main_t *im = &ip4_main;
1044
1045   n_left = frame->n_vectors;
1046   from = vlib_frame_vector_args (frame);
1047
1048   while (n_left >= 4)
1049     {
1050       u32 bi0, bi1;
1051       vlib_buffer_t *b0, *b1;
1052       ip4_forward_next_trace_t *t0, *t1;
1053
1054       /* Prefetch next iteration. */
1055       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1056       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1057
1058       bi0 = from[0];
1059       bi1 = from[1];
1060
1061       b0 = vlib_get_buffer (vm, bi0);
1062       b1 = vlib_get_buffer (vm, bi1);
1063
1064       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1065         {
1066           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1067           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1068           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1069           t0->fib_index =
1070             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1071              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1072             vec_elt (im->fib_index_by_sw_if_index,
1073                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1074
1075           clib_memcpy (t0->packet_data,
1076                        vlib_buffer_get_current (b0),
1077                        sizeof (t0->packet_data));
1078         }
1079       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1080         {
1081           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1082           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1083           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1084           t1->fib_index =
1085             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1086              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1087             vec_elt (im->fib_index_by_sw_if_index,
1088                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1089           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1090                        sizeof (t1->packet_data));
1091         }
1092       from += 2;
1093       n_left -= 2;
1094     }
1095
1096   while (n_left >= 1)
1097     {
1098       u32 bi0;
1099       vlib_buffer_t *b0;
1100       ip4_forward_next_trace_t *t0;
1101
1102       bi0 = from[0];
1103
1104       b0 = vlib_get_buffer (vm, bi0);
1105
1106       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1107         {
1108           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1109           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1110           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1111           t0->fib_index =
1112             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1113              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1114             vec_elt (im->fib_index_by_sw_if_index,
1115                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1116           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1117                        sizeof (t0->packet_data));
1118         }
1119       from += 1;
1120       n_left -= 1;
1121     }
1122 }
1123
1124 /* Compute TCP/UDP/ICMP4 checksum in software. */
1125 u16
1126 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1127                               ip4_header_t * ip0)
1128 {
1129   ip_csum_t sum0;
1130   u32 ip_header_length, payload_length_host_byte_order;
1131   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1132   u16 sum16;
1133   void *data_this_buffer;
1134
1135   /* Initialize checksum with ip header. */
1136   ip_header_length = ip4_header_bytes (ip0);
1137   payload_length_host_byte_order =
1138     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1139   sum0 =
1140     clib_host_to_net_u32 (payload_length_host_byte_order +
1141                           (ip0->protocol << 16));
1142
1143   if (BITS (uword) == 32)
1144     {
1145       sum0 =
1146         ip_csum_with_carry (sum0,
1147                             clib_mem_unaligned (&ip0->src_address, u32));
1148       sum0 =
1149         ip_csum_with_carry (sum0,
1150                             clib_mem_unaligned (&ip0->dst_address, u32));
1151     }
1152   else
1153     sum0 =
1154       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1155
1156   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1157   data_this_buffer = (void *) ip0 + ip_header_length;
1158   n_ip_bytes_this_buffer =
1159     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1160   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1161     {
1162       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1163         n_ip_bytes_this_buffer - ip_header_length : 0;
1164     }
1165   while (1)
1166     {
1167       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1168       n_bytes_left -= n_this_buffer;
1169       if (n_bytes_left == 0)
1170         break;
1171
1172       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1173       p0 = vlib_get_buffer (vm, p0->next_buffer);
1174       data_this_buffer = vlib_buffer_get_current (p0);
1175       n_this_buffer = p0->current_length;
1176     }
1177
1178   sum16 = ~ip_csum_fold (sum0);
1179
1180   return sum16;
1181 }
1182
1183 u32
1184 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1185 {
1186   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1187   udp_header_t *udp0;
1188   u16 sum16;
1189
1190   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1191           || ip0->protocol == IP_PROTOCOL_UDP);
1192
1193   udp0 = (void *) (ip0 + 1);
1194   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1195     {
1196       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1197                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1198       return p0->flags;
1199     }
1200
1201   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1202
1203   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1204                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1205
1206   return p0->flags;
1207 }
1208 #endif
1209
1210 /* *INDENT-OFF* */
1211 VNET_FEATURE_ARC_INIT (ip4_local) =
1212 {
1213   .arc_name  = "ip4-local",
1214   .start_nodes = VNET_FEATURES ("ip4-local"),
1215 };
1216 /* *INDENT-ON* */
1217
1218 static inline void
1219 ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
1220                             ip4_header_t * ip, u8 is_udp, u8 * error,
1221                             u8 * good_tcp_udp)
1222 {
1223   u32 flags0;
1224   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1225   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1226   if (is_udp)
1227     {
1228       udp_header_t *udp;
1229       u32 ip_len, udp_len;
1230       i32 len_diff;
1231       udp = ip4_next_header (ip);
1232       /* Verify UDP length. */
1233       ip_len = clib_net_to_host_u16 (ip->length);
1234       udp_len = clib_net_to_host_u16 (udp->length);
1235
1236       len_diff = ip_len - udp_len;
1237       *good_tcp_udp &= len_diff >= 0;
1238       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1239     }
1240 }
1241
1242 #define ip4_local_csum_is_offloaded(_b)                                 \
1243     _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM                         \
1244         || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM
1245
1246 #define ip4_local_need_csum_check(is_tcp_udp, _b)                       \
1247     (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED     \
1248         || ip4_local_csum_is_offloaded (_b)))
1249
1250 #define ip4_local_csum_is_valid(_b)                                     \
1251     (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT                      \
1252         || (ip4_local_csum_is_offloaded (_b))) != 0
1253
1254 static inline void
1255 ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b,
1256                          ip4_header_t * ih, u8 * error)
1257 {
1258   u8 is_udp, is_tcp_udp, good_tcp_udp;
1259
1260   is_udp = ih->protocol == IP_PROTOCOL_UDP;
1261   is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP;
1262
1263   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b)))
1264     ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp);
1265   else
1266     good_tcp_udp = ip4_local_csum_is_valid (b);
1267
1268   ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1269   *error = (is_tcp_udp && !good_tcp_udp
1270             ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error);
1271 }
1272
1273 static inline void
1274 ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
1275                             ip4_header_t ** ih, u8 * error)
1276 {
1277   u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2];
1278
1279   is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP;
1280   is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP;
1281
1282   is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP;
1283   is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP;
1284
1285   good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]);
1286   good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]);
1287
1288   if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
1289                      || ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
1290     {
1291       if (is_tcp_udp[0])
1292         ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
1293                                     &good_tcp_udp[0]);
1294       if (is_tcp_udp[1])
1295         ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
1296                                     &good_tcp_udp[1]);
1297     }
1298
1299   error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ?
1300               IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]);
1301   error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ?
1302               IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]);
1303 }
1304
1305 static inline void
1306 ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
1307                               vlib_buffer_t * b, u16 * next, u8 error,
1308                               u8 head_of_feature_arc)
1309 {
1310   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1311   u32 next_index;
1312
1313   *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next;
1314   b->error = error ? error_node->errors[error] : 0;
1315   if (head_of_feature_arc)
1316     {
1317       next_index = *next;
1318       if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1319         {
1320           vnet_feature_arc_start (arc_index,
1321                                   vnet_buffer (b)->sw_if_index[VLIB_RX],
1322                                   &next_index, b);
1323           *next = next_index;
1324         }
1325     }
1326 }
1327
1328 typedef struct
1329 {
1330   ip4_address_t src;
1331   u32 lbi;
1332   u8 error;
1333 } ip4_local_last_check_t;
1334
1335 static inline void
1336 ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
1337                      ip4_local_last_check_t * last_check, u8 * error0)
1338 {
1339   ip4_fib_mtrie_leaf_t leaf0;
1340   ip4_fib_mtrie_t *mtrie0;
1341   const dpo_id_t *dpo0;
1342   load_balance_t *lb0;
1343   u32 lbi0;
1344
1345   vnet_buffer (b)->ip.fib_index =
1346     vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
1347     vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
1348
1349   if (PREDICT_FALSE (last_check->src.as_u32 != ip0->src_address.as_u32))
1350     {
1351       mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
1352       leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1353       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1354       leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1355       lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1356
1357       vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0;
1358       vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0;
1359
1360       lb0 = load_balance_get (lbi0);
1361       dpo0 = load_balance_get_bucket_i (lb0, 0);
1362
1363       /*
1364        * Must have a route to source otherwise we drop the packet.
1365        * ip4 broadcasts are accepted, e.g. to make dhcp client work
1366        *
1367        * The checks are:
1368        *  - the source is a recieve => it's from us => bogus, do this
1369        *    first since it sets a different error code.
1370        *  - uRPF check for any route to source - accept if passes.
1371        *  - allow packets destined to the broadcast address from unknown sources
1372        */
1373
1374       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1375                   && dpo0->dpoi_type == DPO_RECEIVE) ?
1376                  IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0);
1377       *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL
1378                   && !fib_urpf_check_size (lb0->lb_urpf)
1379                   && ip0->dst_address.as_u32 != 0xFFFFFFFF) ?
1380                  IP4_ERROR_SRC_LOOKUP_MISS : *error0);
1381
1382       last_check->src.as_u32 = ip0->src_address.as_u32;
1383       last_check->lbi = lbi0;
1384       last_check->error = *error0;
1385     }
1386   else
1387     {
1388       vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi;
1389       vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi;
1390       *error0 = last_check->error;
1391     }
1392 }
1393
1394 static inline void
1395 ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
1396                         ip4_local_last_check_t * last_check, u8 * error)
1397 {
1398   ip4_fib_mtrie_leaf_t leaf[2];
1399   ip4_fib_mtrie_t *mtrie[2];
1400   const dpo_id_t *dpo[2];
1401   load_balance_t *lb[2];
1402   u32 not_last_hit = 0;
1403   u32 lbi[2];
1404
1405   not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32;
1406   not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32;
1407
1408   vnet_buffer (b[0])->ip.fib_index =
1409     vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
1410     vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
1411     vnet_buffer (b[0])->ip.fib_index;
1412
1413   vnet_buffer (b[1])->ip.fib_index =
1414     vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
1415     vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
1416     vnet_buffer (b[1])->ip.fib_index;
1417
1418   if (PREDICT_FALSE (not_last_hit))
1419     {
1420       mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
1421       mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
1422
1423       leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
1424       leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
1425
1426       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1427                                            &ip[0]->src_address, 2);
1428       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1429                                            &ip[1]->src_address, 2);
1430
1431       leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
1432                                            &ip[0]->src_address, 3);
1433       leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
1434                                            &ip[1]->src_address, 3);
1435
1436       lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
1437       lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
1438
1439       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0];
1440       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0];
1441
1442       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1];
1443       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1];
1444
1445       lb[0] = load_balance_get (lbi[0]);
1446       lb[1] = load_balance_get (lbi[1]);
1447
1448       dpo[0] = load_balance_get_bucket_i (lb[0], 0);
1449       dpo[1] = load_balance_get_bucket_i (lb[1], 0);
1450
1451       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1452                    dpo[0]->dpoi_type == DPO_RECEIVE) ?
1453                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]);
1454       error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1455                    !fib_urpf_check_size (lb[0]->lb_urpf) &&
1456                    ip[0]->dst_address.as_u32 != 0xFFFFFFFF)
1457                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]);
1458
1459       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1460                    dpo[1]->dpoi_type == DPO_RECEIVE) ?
1461                   IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]);
1462       error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL &&
1463                    !fib_urpf_check_size (lb[1]->lb_urpf) &&
1464                    ip[1]->dst_address.as_u32 != 0xFFFFFFFF)
1465                   ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]);
1466
1467       last_check->src.as_u32 = ip[1]->src_address.as_u32;
1468       last_check->lbi = lbi[1];
1469       last_check->error = error[1];
1470     }
1471   else
1472     {
1473       vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi;
1474       vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi;
1475
1476       vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi;
1477       vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi;
1478
1479       error[0] = last_check->error;
1480       error[1] = last_check->error;
1481     }
1482 }
1483
1484 static inline uword
1485 ip4_local_inline (vlib_main_t * vm,
1486                   vlib_node_runtime_t * node,
1487                   vlib_frame_t * frame, int head_of_feature_arc)
1488 {
1489   ip4_main_t *im = &ip4_main;
1490   ip_lookup_main_t *lm = &im->lookup_main;
1491   u32 *from, n_left_from;
1492   vlib_node_runtime_t *error_node =
1493     vlib_node_get_runtime (vm, ip4_input_node.index);
1494   u16 nexts[VLIB_FRAME_SIZE], *next;
1495   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
1496   ip4_header_t *ip[2];
1497   u8 error[2];
1498
1499   ip4_local_last_check_t last_check = {
1500     .src = {.as_u32 = 0},
1501     .lbi = ~0,
1502     .error = IP4_ERROR_UNKNOWN_PROTOCOL
1503   };
1504
1505   from = vlib_frame_vector_args (frame);
1506   n_left_from = frame->n_vectors;
1507
1508   if (node->flags & VLIB_NODE_FLAG_TRACE)
1509     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1510
1511   vlib_get_buffers (vm, from, bufs, n_left_from);
1512   b = bufs;
1513   next = nexts;
1514
1515   while (n_left_from >= 6)
1516     {
1517       u32 is_nat, not_batch = 0;
1518
1519       /* Prefetch next iteration. */
1520       {
1521         vlib_prefetch_buffer_header (b[4], LOAD);
1522         vlib_prefetch_buffer_header (b[5], LOAD);
1523
1524         CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1525         CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD);
1526       }
1527
1528       error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL;
1529
1530       ip[0] = vlib_buffer_get_current (b[0]);
1531       ip[1] = vlib_buffer_get_current (b[1]);
1532
1533       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1534       vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data;
1535
1536       is_nat = b[0]->flags & VNET_BUFFER_F_IS_NATED;
1537       not_batch |= is_nat ^ (b[1]->flags & VNET_BUFFER_F_IS_NATED);
1538
1539       if (head_of_feature_arc == 0 || (is_nat && not_batch == 0))
1540         goto skip_checks;
1541
1542       if (PREDICT_TRUE (not_batch == 0))
1543         {
1544           ip4_local_check_l4_csum_x2 (vm, b, ip, error);
1545           ip4_local_check_src_x2 (b, ip, &last_check, error);
1546         }
1547       else
1548         {
1549           if (!(b[0]->flags & VNET_BUFFER_F_IS_NATED))
1550             {
1551               ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1552               ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1553             }
1554           if (!(b[1]->flags & VNET_BUFFER_F_IS_NATED))
1555             {
1556               ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
1557               ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
1558             }
1559         }
1560
1561     skip_checks:
1562
1563       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1564       next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol];
1565       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1566                                     head_of_feature_arc);
1567       ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1],
1568                                     head_of_feature_arc);
1569
1570       b += 2;
1571       next += 2;
1572       n_left_from -= 2;
1573     }
1574
1575   while (n_left_from > 0)
1576     {
1577       error[0] = IP4_ERROR_UNKNOWN_PROTOCOL;
1578
1579       ip[0] = vlib_buffer_get_current (b[0]);
1580       vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data;
1581
1582       if (head_of_feature_arc == 0 || (b[0]->flags & VNET_BUFFER_F_IS_NATED))
1583         goto skip_check;
1584
1585       ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
1586       ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
1587
1588     skip_check:
1589
1590       next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
1591       ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0],
1592                                     head_of_feature_arc);
1593
1594       b += 1;
1595       next += 1;
1596       n_left_from -= 1;
1597     }
1598
1599   vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
1600   return frame->n_vectors;
1601 }
1602
1603 VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1604                                vlib_frame_t * frame)
1605 {
1606   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1607 }
1608
1609 /* *INDENT-OFF* */
1610 VLIB_REGISTER_NODE (ip4_local_node) =
1611 {
1612   .name = "ip4-local",
1613   .vector_size = sizeof (u32),
1614   .format_trace = format_ip4_forward_next_trace,
1615   .n_next_nodes = IP_LOCAL_N_NEXT,
1616   .next_nodes =
1617   {
1618     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1619     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1620     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1621     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1622   },
1623 };
1624 /* *INDENT-ON* */
1625
1626
1627 VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
1628                                           vlib_node_runtime_t * node,
1629                                           vlib_frame_t * frame)
1630 {
1631   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1632 }
1633
1634 /* *INDENT-OFF* */
1635 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
1636   .name = "ip4-local-end-of-arc",
1637   .vector_size = sizeof (u32),
1638
1639   .format_trace = format_ip4_forward_next_trace,
1640   .sibling_of = "ip4-local",
1641 };
1642
1643 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1644   .arc_name = "ip4-local",
1645   .node_name = "ip4-local-end-of-arc",
1646   .runs_before = 0, /* not before any other features */
1647 };
1648 /* *INDENT-ON* */
1649
1650 #ifndef CLIB_MARCH_VARIANT
1651 void
1652 ip4_register_protocol (u32 protocol, u32 node_index)
1653 {
1654   vlib_main_t *vm = vlib_get_main ();
1655   ip4_main_t *im = &ip4_main;
1656   ip_lookup_main_t *lm = &im->lookup_main;
1657
1658   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1659   lm->local_next_by_ip_protocol[protocol] =
1660     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1661 }
1662 #endif
1663
1664 static clib_error_t *
1665 show_ip_local_command_fn (vlib_main_t * vm,
1666                           unformat_input_t * input, vlib_cli_command_t * cmd)
1667 {
1668   ip4_main_t *im = &ip4_main;
1669   ip_lookup_main_t *lm = &im->lookup_main;
1670   int i;
1671
1672   vlib_cli_output (vm, "Protocols handled by ip4_local");
1673   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1674     {
1675       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1676         {
1677           u32 node_index = vlib_get_node (vm,
1678                                           ip4_local_node.index)->
1679             next_nodes[lm->local_next_by_ip_protocol[i]];
1680           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1681                            node_index);
1682         }
1683     }
1684   return 0;
1685 }
1686
1687
1688
1689 /*?
1690  * Display the set of protocols handled by the local IPv4 stack.
1691  *
1692  * @cliexpar
1693  * Example of how to display local protocol table:
1694  * @cliexstart{show ip local}
1695  * Protocols handled by ip4_local
1696  * 1
1697  * 17
1698  * 47
1699  * @cliexend
1700 ?*/
1701 /* *INDENT-OFF* */
1702 VLIB_CLI_COMMAND (show_ip_local, static) =
1703 {
1704   .path = "show ip local",
1705   .function = show_ip_local_command_fn,
1706   .short_help = "show ip local",
1707 };
1708 /* *INDENT-ON* */
1709
1710 always_inline uword
1711 ip4_arp_inline (vlib_main_t * vm,
1712                 vlib_node_runtime_t * node,
1713                 vlib_frame_t * frame, int is_glean)
1714 {
1715   vnet_main_t *vnm = vnet_get_main ();
1716   ip4_main_t *im = &ip4_main;
1717   ip_lookup_main_t *lm = &im->lookup_main;
1718   u32 *from, *to_next_drop;
1719   uword n_left_from, n_left_to_next_drop, next_index;
1720   u32 thread_index = vm->thread_index;
1721   u32 seed;
1722   f64 time_now;
1723
1724   if (node->flags & VLIB_NODE_FLAG_TRACE)
1725     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1726
1727   time_now = vlib_time_now (vm);
1728   if (time_now - im->arp_throttle_last_seed_change_time[thread_index] > 1e-3)
1729     {
1730       (void) random_u32 (&im->arp_throttle_seeds[thread_index]);
1731       memset (im->arp_throttle_bitmaps[thread_index], 0,
1732               ARP_THROTTLE_BITS / BITS (u8));
1733
1734       im->arp_throttle_last_seed_change_time[thread_index] = time_now;
1735     }
1736   seed = im->arp_throttle_seeds[thread_index];
1737
1738   from = vlib_frame_vector_args (frame);
1739   n_left_from = frame->n_vectors;
1740   next_index = node->cached_next_index;
1741   if (next_index == IP4_ARP_NEXT_DROP)
1742     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1743
1744   while (n_left_from > 0)
1745     {
1746       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1747                            to_next_drop, n_left_to_next_drop);
1748
1749       while (n_left_from > 0 && n_left_to_next_drop > 0)
1750         {
1751           u32 pi0, adj_index0, r0, w0, sw_if_index0, drop0;
1752           uword m0;
1753           ip_adjacency_t *adj0;
1754           vlib_buffer_t *p0;
1755           ip4_header_t *ip0;
1756
1757           pi0 = from[0];
1758
1759           p0 = vlib_get_buffer (vm, pi0);
1760
1761           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1762           adj0 = adj_get (adj_index0);
1763           ip0 = vlib_buffer_get_current (p0);
1764
1765           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1766           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1767
1768           if (PREDICT_TRUE (is_glean))
1769             {
1770               /*
1771                * this is the Glean case, so we are ARPing for the
1772                * packet's destination
1773                */
1774               r0 = ip0->dst_address.data_u32;
1775             }
1776           else
1777             {
1778               r0 = adj0->sub_type.nbr.next_hop.ip4.data_u32;
1779             }
1780
1781           r0 ^= seed;
1782           /* Select bit number */
1783           r0 &= ARP_THROTTLE_BITS - 1;
1784           w0 = r0 / BITS (uword);
1785           m0 = (uword) 1 << (r0 % BITS (uword));
1786
1787           drop0 = (im->arp_throttle_bitmaps[thread_index][w0] & m0) != 0;
1788           im->arp_throttle_bitmaps[thread_index][w0] |= m0;
1789
1790           from += 1;
1791           n_left_from -= 1;
1792           to_next_drop[0] = pi0;
1793           to_next_drop += 1;
1794           n_left_to_next_drop -= 1;
1795
1796           p0->error =
1797             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
1798                          IP4_ARP_ERROR_REQUEST_SENT];
1799
1800           /*
1801            * the adj has been updated to a rewrite but the node the DPO that got
1802            * us here hasn't - yet. no big deal. we'll drop while we wait.
1803            */
1804           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1805             continue;
1806
1807           if (drop0)
1808             continue;
1809
1810           /*
1811            * Can happen if the control-plane is programming tables
1812            * with traffic flowing; at least that's today's lame excuse.
1813            */
1814           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
1815               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1816             {
1817               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1818             }
1819           else
1820             /* Send ARP request. */
1821             {
1822               u32 bi0 = 0;
1823               vlib_buffer_t *b0;
1824               ethernet_arp_header_t *h0;
1825               vnet_hw_interface_t *hw_if0;
1826
1827               h0 =
1828                 vlib_packet_template_get_packet (vm,
1829                                                  &im->ip4_arp_request_packet_template,
1830                                                  &bi0);
1831
1832               /* Seems we're out of buffers */
1833               if (PREDICT_FALSE (!h0))
1834                 continue;
1835
1836               /* Add rewrite/encap string for ARP packet. */
1837               vnet_rewrite_one_header (adj0[0], h0,
1838                                        sizeof (ethernet_header_t));
1839
1840               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1841
1842               /* Src ethernet address in ARP header. */
1843               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
1844                            hw_if0->hw_address,
1845                            sizeof (h0->ip4_over_ethernet[0].ethernet));
1846
1847               if (is_glean)
1848                 {
1849                   /* The interface's source address is stashed in the Glean Adj */
1850                   h0->ip4_over_ethernet[0].ip4 =
1851                     adj0->sub_type.glean.receive_addr.ip4;
1852
1853                   /* Copy in destination address we are requesting. This is the
1854                    * glean case, so it's the packet's destination.*/
1855                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1856                     ip0->dst_address.data_u32;
1857                 }
1858               else
1859                 {
1860                   /* Src IP address in ARP header. */
1861                   if (ip4_src_address_for_packet (lm, sw_if_index0,
1862                                                   &h0->
1863                                                   ip4_over_ethernet[0].ip4))
1864                     {
1865                       /* No source address available */
1866                       p0->error =
1867                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1868                       vlib_buffer_free (vm, &bi0, 1);
1869                       continue;
1870                     }
1871
1872                   /* Copy in destination address we are requesting from the
1873                      incomplete adj */
1874                   h0->ip4_over_ethernet[1].ip4.data_u32 =
1875                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
1876                 }
1877
1878               vlib_buffer_copy_trace_flag (vm, p0, bi0);
1879               b0 = vlib_get_buffer (vm, bi0);
1880               VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
1881               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1882
1883               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1884
1885               vlib_set_next_frame_buffer (vm, node,
1886                                           adj0->rewrite_header.next_index,
1887                                           bi0);
1888             }
1889         }
1890
1891       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1892     }
1893
1894   return frame->n_vectors;
1895 }
1896
1897 VLIB_NODE_FN (ip4_arp_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1898                              vlib_frame_t * frame)
1899 {
1900   return (ip4_arp_inline (vm, node, frame, 0));
1901 }
1902
1903 VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1904                                vlib_frame_t * frame)
1905 {
1906   return (ip4_arp_inline (vm, node, frame, 1));
1907 }
1908
1909 static char *ip4_arp_error_strings[] = {
1910   [IP4_ARP_ERROR_DROP] = "address overflow drops",
1911   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1912   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1913   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1914   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1915   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1916 };
1917
1918 /* *INDENT-OFF* */
1919 VLIB_REGISTER_NODE (ip4_arp_node) =
1920 {
1921   .name = "ip4-arp",
1922   .vector_size = sizeof (u32),
1923   .format_trace = format_ip4_forward_next_trace,
1924   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1925   .error_strings = ip4_arp_error_strings,
1926   .n_next_nodes = IP4_ARP_N_NEXT,
1927   .next_nodes =
1928   {
1929     [IP4_ARP_NEXT_DROP] = "error-drop",
1930   },
1931 };
1932
1933 VLIB_REGISTER_NODE (ip4_glean_node) =
1934 {
1935   .name = "ip4-glean",
1936   .vector_size = sizeof (u32),
1937   .format_trace = format_ip4_forward_next_trace,
1938   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1939   .error_strings = ip4_arp_error_strings,
1940   .n_next_nodes = IP4_ARP_N_NEXT,
1941   .next_nodes = {
1942   [IP4_ARP_NEXT_DROP] = "error-drop",
1943   },
1944 };
1945 /* *INDENT-ON* */
1946
1947 #define foreach_notrace_ip4_arp_error           \
1948 _(DROP)                                         \
1949 _(REQUEST_SENT)                                 \
1950 _(REPLICATE_DROP)                               \
1951 _(REPLICATE_FAIL)
1952
1953 static clib_error_t *
1954 arp_notrace_init (vlib_main_t * vm)
1955 {
1956   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
1957
1958   /* don't trace ARP request packets */
1959 #define _(a)                                    \
1960     vnet_pcap_drop_trace_filter_add_del         \
1961         (rt->errors[IP4_ARP_ERROR_##a],         \
1962          1 /* is_add */);
1963   foreach_notrace_ip4_arp_error;
1964 #undef _
1965   return 0;
1966 }
1967
1968 VLIB_INIT_FUNCTION (arp_notrace_init);
1969
1970
1971 #ifndef CLIB_MARCH_VARIANT
1972 /* Send an ARP request to see if given destination is reachable on given interface. */
1973 clib_error_t *
1974 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index,
1975                     u8 refresh)
1976 {
1977   vnet_main_t *vnm = vnet_get_main ();
1978   ip4_main_t *im = &ip4_main;
1979   ethernet_arp_header_t *h;
1980   ip4_address_t *src;
1981   ip_interface_address_t *ia;
1982   ip_adjacency_t *adj;
1983   vnet_hw_interface_t *hi;
1984   vnet_sw_interface_t *si;
1985   vlib_buffer_t *b;
1986   adj_index_t ai;
1987   u32 bi = 0;
1988   u8 unicast_rewrite = 0;
1989
1990   si = vnet_get_sw_interface (vnm, sw_if_index);
1991
1992   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
1993     {
1994       return clib_error_return (0, "%U: interface %U down",
1995                                 format_ip4_address, dst,
1996                                 format_vnet_sw_if_index_name, vnm,
1997                                 sw_if_index);
1998     }
1999
2000   src =
2001     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2002   if (!src)
2003     {
2004       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2005       return clib_error_return
2006         (0,
2007          "no matching interface address for destination %U (interface %U)",
2008          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2009          sw_if_index);
2010     }
2011
2012   h = vlib_packet_template_get_packet (vm,
2013                                        &im->ip4_arp_request_packet_template,
2014                                        &bi);
2015
2016   if (!h)
2017     return clib_error_return (0, "ARP request packet allocation failed");
2018
2019   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2020   if (PREDICT_FALSE (!hi->hw_address))
2021     {
2022       return clib_error_return (0, "%U: interface %U do not support ip probe",
2023                                 format_ip4_address, dst,
2024                                 format_vnet_sw_if_index_name, vnm,
2025                                 sw_if_index);
2026     }
2027
2028   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2029                sizeof (h->ip4_over_ethernet[0].ethernet));
2030
2031   h->ip4_over_ethernet[0].ip4 = src[0];
2032   h->ip4_over_ethernet[1].ip4 = dst[0];
2033
2034   b = vlib_get_buffer (vm, bi);
2035   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2036     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2037
2038   ip46_address_t nh = {
2039     .ip4 = *dst,
2040   };
2041
2042   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2043                             VNET_LINK_IP4, &nh, sw_if_index);
2044   adj = adj_get (ai);
2045
2046   /* Peer has been previously resolved, retrieve glean adj instead */
2047   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2048     {
2049       if (refresh)
2050         unicast_rewrite = 1;
2051       else
2052         {
2053           adj_unlock (ai);
2054           ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4,
2055                                       VNET_LINK_IP4, sw_if_index, &nh);
2056           adj = adj_get (ai);
2057         }
2058     }
2059
2060   /* Add encapsulation string for software interface (e.g. ethernet header). */
2061   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2062   if (unicast_rewrite)
2063     {
2064       u16 *etype = vlib_buffer_get_current (b) - 2;
2065       etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP);
2066     }
2067   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2068
2069   {
2070     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2071     u32 *to_next = vlib_frame_vector_args (f);
2072     to_next[0] = bi;
2073     f->n_vectors = 1;
2074     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2075   }
2076
2077   adj_unlock (ai);
2078   return /* no error */ 0;
2079 }
2080 #endif
2081
2082 typedef enum
2083 {
2084   IP4_REWRITE_NEXT_DROP,
2085   IP4_REWRITE_NEXT_ICMP_ERROR,
2086   IP4_REWRITE_NEXT_FRAGMENT,
2087   IP4_REWRITE_N_NEXT            /* Last */
2088 } ip4_rewrite_next_t;
2089
2090 /**
2091  * This bits of an IPv4 address to mask to construct a multicast
2092  * MAC address
2093  */
2094 #if CLIB_ARCH_IS_BIG_ENDIAN
2095 #define IP4_MCAST_ADDR_MASK 0x007fffff
2096 #else
2097 #define IP4_MCAST_ADDR_MASK 0xffff7f00
2098 #endif
2099
2100 always_inline void
2101 ip4_mtu_check (vlib_buffer_t * b, u16 packet_len,
2102                u16 adj_packet_bytes, bool df, u32 * next, u32 * error)
2103 {
2104   if (packet_len > adj_packet_bytes)
2105     {
2106       *error = IP4_ERROR_MTU_EXCEEDED;
2107       if (df)
2108         {
2109           icmp4_error_set_vnet_buffer
2110             (b, ICMP4_destination_unreachable,
2111              ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
2112              adj_packet_bytes);
2113           *next = IP4_REWRITE_NEXT_ICMP_ERROR;
2114         }
2115       else
2116         {
2117           /* IP fragmentation */
2118           ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes,
2119                                    IP4_FRAG_NEXT_IP4_REWRITE, 0);
2120           *next = IP4_REWRITE_NEXT_FRAGMENT;
2121         }
2122     }
2123 }
2124
2125 always_inline uword
2126 ip4_rewrite_inline (vlib_main_t * vm,
2127                     vlib_node_runtime_t * node,
2128                     vlib_frame_t * frame,
2129                     int do_counters, int is_midchain, int is_mcast)
2130 {
2131   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2132   u32 *from = vlib_frame_vector_args (frame);
2133   u32 n_left_from, n_left_to_next, *to_next, next_index;
2134   vlib_node_runtime_t *error_node =
2135     vlib_node_get_runtime (vm, ip4_input_node.index);
2136
2137   n_left_from = frame->n_vectors;
2138   next_index = node->cached_next_index;
2139   u32 thread_index = vm->thread_index;
2140
2141   while (n_left_from > 0)
2142     {
2143       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2144
2145       while (n_left_from >= 4 && n_left_to_next >= 2)
2146         {
2147           ip_adjacency_t *adj0, *adj1;
2148           vlib_buffer_t *p0, *p1;
2149           ip4_header_t *ip0, *ip1;
2150           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2151           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2152           u32 tx_sw_if_index0, tx_sw_if_index1;
2153
2154           /* Prefetch next iteration. */
2155           {
2156             vlib_buffer_t *p2, *p3;
2157
2158             p2 = vlib_get_buffer (vm, from[2]);
2159             p3 = vlib_get_buffer (vm, from[3]);
2160
2161             vlib_prefetch_buffer_header (p2, STORE);
2162             vlib_prefetch_buffer_header (p3, STORE);
2163
2164             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2165             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2166           }
2167
2168           pi0 = to_next[0] = from[0];
2169           pi1 = to_next[1] = from[1];
2170
2171           from += 2;
2172           n_left_from -= 2;
2173           to_next += 2;
2174           n_left_to_next -= 2;
2175
2176           p0 = vlib_get_buffer (vm, pi0);
2177           p1 = vlib_get_buffer (vm, pi1);
2178
2179           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2180           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2181
2182           /*
2183            * pre-fetch the per-adjacency counters
2184            */
2185           if (do_counters)
2186             {
2187               vlib_prefetch_combined_counter (&adjacency_counters,
2188                                               thread_index, adj_index0);
2189               vlib_prefetch_combined_counter (&adjacency_counters,
2190                                               thread_index, adj_index1);
2191             }
2192
2193           ip0 = vlib_buffer_get_current (p0);
2194           ip1 = vlib_buffer_get_current (p1);
2195
2196           error0 = error1 = IP4_ERROR_NONE;
2197           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2198
2199           /* Decrement TTL & update checksum.
2200              Works either endian, so no need for byte swap. */
2201           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2202             {
2203               i32 ttl0 = ip0->ttl;
2204
2205               /* Input node should have reject packets with ttl 0. */
2206               ASSERT (ip0->ttl > 0);
2207
2208               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2209               checksum0 += checksum0 >= 0xffff;
2210
2211               ip0->checksum = checksum0;
2212               ttl0 -= 1;
2213               ip0->ttl = ttl0;
2214
2215               /*
2216                * If the ttl drops below 1 when forwarding, generate
2217                * an ICMP response.
2218                */
2219               if (PREDICT_FALSE (ttl0 <= 0))
2220                 {
2221                   error0 = IP4_ERROR_TIME_EXPIRED;
2222                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2223                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2224                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2225                                                0);
2226                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2227                 }
2228
2229               /* Verify checksum. */
2230               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2231                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2232             }
2233           else
2234             {
2235               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2236             }
2237           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2238             {
2239               i32 ttl1 = ip1->ttl;
2240
2241               /* Input node should have reject packets with ttl 0. */
2242               ASSERT (ip1->ttl > 0);
2243
2244               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2245               checksum1 += checksum1 >= 0xffff;
2246
2247               ip1->checksum = checksum1;
2248               ttl1 -= 1;
2249               ip1->ttl = ttl1;
2250
2251               /*
2252                * If the ttl drops below 1 when forwarding, generate
2253                * an ICMP response.
2254                */
2255               if (PREDICT_FALSE (ttl1 <= 0))
2256                 {
2257                   error1 = IP4_ERROR_TIME_EXPIRED;
2258                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2259                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2260                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2261                                                0);
2262                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2263                 }
2264
2265               /* Verify checksum. */
2266               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2267                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2268             }
2269           else
2270             {
2271               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2272             }
2273
2274           /* Rewrite packet header and updates lengths. */
2275           adj0 = adj_get (adj_index0);
2276           adj1 = adj_get (adj_index1);
2277
2278           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2279           rw_len0 = adj0[0].rewrite_header.data_bytes;
2280           rw_len1 = adj1[0].rewrite_header.data_bytes;
2281           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2282           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2283
2284           /* Check MTU of outgoing interface. */
2285           ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2286                          adj0[0].rewrite_header.max_l3_packet_bytes,
2287                          ip0->flags_and_fragment_offset &
2288                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2289                          &next0, &error0);
2290           ip4_mtu_check (p1, clib_net_to_host_u16 (ip1->length),
2291                          adj1[0].rewrite_header.max_l3_packet_bytes,
2292                          ip1->flags_and_fragment_offset &
2293                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2294                          &next1, &error1);
2295
2296           if (is_mcast)
2297             {
2298               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2299                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2300                         IP4_ERROR_SAME_INTERFACE : error0);
2301               error1 = ((adj1[0].rewrite_header.sw_if_index ==
2302                          vnet_buffer (p1)->sw_if_index[VLIB_RX]) ?
2303                         IP4_ERROR_SAME_INTERFACE : error1);
2304             }
2305
2306           p0->error = error_node->errors[error0];
2307           p1->error = error_node->errors[error1];
2308           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2309            * to see the IP headerr */
2310           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2311             {
2312               next0 = adj0[0].rewrite_header.next_index;
2313               p0->current_data -= rw_len0;
2314               p0->current_length += rw_len0;
2315               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2316               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2317
2318               if (PREDICT_FALSE
2319                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2320                 vnet_feature_arc_start (lm->output_feature_arc_index,
2321                                         tx_sw_if_index0, &next0, p0);
2322             }
2323           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2324             {
2325               next1 = adj1[0].rewrite_header.next_index;
2326               p1->current_data -= rw_len1;
2327               p1->current_length += rw_len1;
2328
2329               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2330               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2331
2332               if (PREDICT_FALSE
2333                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2334                 vnet_feature_arc_start (lm->output_feature_arc_index,
2335                                         tx_sw_if_index1, &next1, p1);
2336             }
2337
2338           /* Guess we are only writing on simple Ethernet header. */
2339           vnet_rewrite_two_headers (adj0[0], adj1[0],
2340                                     ip0, ip1, sizeof (ethernet_header_t));
2341
2342           /*
2343            * Bump the per-adjacency counters
2344            */
2345           if (do_counters)
2346             {
2347               vlib_increment_combined_counter
2348                 (&adjacency_counters,
2349                  thread_index,
2350                  adj_index0, 1,
2351                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2352
2353               vlib_increment_combined_counter
2354                 (&adjacency_counters,
2355                  thread_index,
2356                  adj_index1, 1,
2357                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2358             }
2359
2360           if (is_midchain)
2361             {
2362               adj0->sub_type.midchain.fixup_func
2363                 (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2364               adj1->sub_type.midchain.fixup_func
2365                 (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
2366             }
2367           if (is_mcast)
2368             {
2369               /*
2370                * copy bytes from the IP address into the MAC rewrite
2371                */
2372               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2373                                           adj0->
2374                                           rewrite_header.dst_mcast_offset,
2375                                           &ip0->dst_address.as_u32,
2376                                           (u8 *) ip0);
2377               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2378                                           adj0->
2379                                           rewrite_header.dst_mcast_offset,
2380                                           &ip1->dst_address.as_u32,
2381                                           (u8 *) ip1);
2382             }
2383
2384           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2385                                            to_next, n_left_to_next,
2386                                            pi0, pi1, next0, next1);
2387         }
2388
2389       while (n_left_from > 0 && n_left_to_next > 0)
2390         {
2391           ip_adjacency_t *adj0;
2392           vlib_buffer_t *p0;
2393           ip4_header_t *ip0;
2394           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2395           u32 tx_sw_if_index0;
2396
2397           pi0 = to_next[0] = from[0];
2398
2399           p0 = vlib_get_buffer (vm, pi0);
2400
2401           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2402
2403           adj0 = adj_get (adj_index0);
2404
2405           ip0 = vlib_buffer_get_current (p0);
2406
2407           error0 = IP4_ERROR_NONE;
2408           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2409
2410           /* Decrement TTL & update checksum. */
2411           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2412             {
2413               i32 ttl0 = ip0->ttl;
2414
2415               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2416
2417               checksum0 += checksum0 >= 0xffff;
2418
2419               ip0->checksum = checksum0;
2420
2421               ASSERT (ip0->ttl > 0);
2422
2423               ttl0 -= 1;
2424
2425               ip0->ttl = ttl0;
2426
2427               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2428                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2429
2430               if (PREDICT_FALSE (ttl0 <= 0))
2431                 {
2432                   /*
2433                    * If the ttl drops below 1 when forwarding, generate
2434                    * an ICMP response.
2435                    */
2436                   error0 = IP4_ERROR_TIME_EXPIRED;
2437                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2438                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2439                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2440                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2441                                                0);
2442                 }
2443             }
2444           else
2445             {
2446               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2447             }
2448
2449           if (do_counters)
2450             vlib_prefetch_combined_counter (&adjacency_counters,
2451                                             thread_index, adj_index0);
2452
2453           /* Guess we are only writing on simple Ethernet header. */
2454           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2455           if (is_mcast)
2456             {
2457               /*
2458                * copy bytes from the IP address into the MAC rewrite
2459                */
2460               vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK,
2461                                           adj0->
2462                                           rewrite_header.dst_mcast_offset,
2463                                           &ip0->dst_address.as_u32,
2464                                           (u8 *) ip0);
2465             }
2466
2467           /* Update packet buffer attributes/set output interface. */
2468           rw_len0 = adj0[0].rewrite_header.data_bytes;
2469           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2470
2471           if (do_counters)
2472             vlib_increment_combined_counter
2473               (&adjacency_counters,
2474                thread_index, adj_index0, 1,
2475                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2476
2477           /* Check MTU of outgoing interface. */
2478           ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length),
2479                          adj0[0].rewrite_header.max_l3_packet_bytes,
2480                          ip0->flags_and_fragment_offset &
2481                          clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT),
2482                          &next0, &error0);
2483
2484           if (is_mcast)
2485             {
2486               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2487                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2488                         IP4_ERROR_SAME_INTERFACE : error0);
2489             }
2490           p0->error = error_node->errors[error0];
2491
2492           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2493            * to see the IP headerr */
2494           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2495             {
2496               p0->current_data -= rw_len0;
2497               p0->current_length += rw_len0;
2498               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2499
2500               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2501               next0 = adj0[0].rewrite_header.next_index;
2502
2503               if (is_midchain)
2504                 {
2505                   adj0->sub_type.midchain.fixup_func
2506                     (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2507                 }
2508
2509               if (PREDICT_FALSE
2510                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2511                 vnet_feature_arc_start (lm->output_feature_arc_index,
2512                                         tx_sw_if_index0, &next0, p0);
2513
2514             }
2515
2516           from += 1;
2517           n_left_from -= 1;
2518           to_next += 1;
2519           n_left_to_next -= 1;
2520
2521           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2522                                            to_next, n_left_to_next,
2523                                            pi0, next0);
2524         }
2525
2526       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2527     }
2528
2529   /* Need to do trace after rewrites to pick up new packet data. */
2530   if (node->flags & VLIB_NODE_FLAG_TRACE)
2531     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2532
2533   return frame->n_vectors;
2534 }
2535
2536
2537 /** @brief IPv4 rewrite node.
2538     @node ip4-rewrite
2539
2540     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2541     header checksum, fetch the ip adjacency, check the outbound mtu,
2542     apply the adjacency rewrite, and send pkts to the adjacency
2543     rewrite header's rewrite_next_index.
2544
2545     @param vm vlib_main_t corresponding to the current thread
2546     @param node vlib_node_runtime_t
2547     @param frame vlib_frame_t whose contents should be dispatched
2548
2549     @par Graph mechanics: buffer metadata, next index usage
2550
2551     @em Uses:
2552     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2553         - the rewrite adjacency index
2554     - <code>adj->lookup_next_index</code>
2555         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2556           the packet will be dropped.
2557     - <code>adj->rewrite_header</code>
2558         - Rewrite string length, rewrite string, next_index
2559
2560     @em Sets:
2561     - <code>b->current_data, b->current_length</code>
2562         - Updated net of applying the rewrite string
2563
2564     <em>Next Indices:</em>
2565     - <code> adj->rewrite_header.next_index </code>
2566       or @c ip4-drop
2567 */
2568
2569 VLIB_NODE_FN (ip4_rewrite_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
2570                                  vlib_frame_t * frame)
2571 {
2572   if (adj_are_counters_enabled ())
2573     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2574   else
2575     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2576 }
2577
2578 VLIB_NODE_FN (ip4_rewrite_bcast_node) (vlib_main_t * vm,
2579                                        vlib_node_runtime_t * node,
2580                                        vlib_frame_t * frame)
2581 {
2582   if (adj_are_counters_enabled ())
2583     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2584   else
2585     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2586 }
2587
2588 VLIB_NODE_FN (ip4_midchain_node) (vlib_main_t * vm,
2589                                   vlib_node_runtime_t * node,
2590                                   vlib_frame_t * frame)
2591 {
2592   if (adj_are_counters_enabled ())
2593     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2594   else
2595     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2596 }
2597
2598 VLIB_NODE_FN (ip4_rewrite_mcast_node) (vlib_main_t * vm,
2599                                        vlib_node_runtime_t * node,
2600                                        vlib_frame_t * frame)
2601 {
2602   if (adj_are_counters_enabled ())
2603     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2604   else
2605     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2606 }
2607
2608 VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
2609                                         vlib_node_runtime_t * node,
2610                                         vlib_frame_t * frame)
2611 {
2612   if (adj_are_counters_enabled ())
2613     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2614   else
2615     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2616 }
2617
2618 /* *INDENT-OFF* */
2619 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2620   .name = "ip4-rewrite",
2621   .vector_size = sizeof (u32),
2622
2623   .format_trace = format_ip4_rewrite_trace,
2624
2625   .n_next_nodes = IP4_REWRITE_N_NEXT,
2626   .next_nodes = {
2627     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2628     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2629     [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag",
2630   },
2631 };
2632
2633 VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = {
2634   .name = "ip4-rewrite-bcast",
2635   .vector_size = sizeof (u32),
2636
2637   .format_trace = format_ip4_rewrite_trace,
2638   .sibling_of = "ip4-rewrite",
2639 };
2640
2641 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2642   .name = "ip4-rewrite-mcast",
2643   .vector_size = sizeof (u32),
2644
2645   .format_trace = format_ip4_rewrite_trace,
2646   .sibling_of = "ip4-rewrite",
2647 };
2648
2649 VLIB_REGISTER_NODE (ip4_mcast_midchain_node) = {
2650   .name = "ip4-mcast-midchain",
2651   .vector_size = sizeof (u32),
2652
2653   .format_trace = format_ip4_rewrite_trace,
2654   .sibling_of = "ip4-rewrite",
2655 };
2656
2657 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2658   .name = "ip4-midchain",
2659   .vector_size = sizeof (u32),
2660   .format_trace = format_ip4_forward_next_trace,
2661   .sibling_of =  "ip4-rewrite",
2662 };
2663 /* *INDENT-ON */
2664
2665 static int
2666 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2667 {
2668   ip4_fib_mtrie_t *mtrie0;
2669   ip4_fib_mtrie_leaf_t leaf0;
2670   u32 lbi0;
2671
2672   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2673
2674   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2675   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2676   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2677
2678   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2679
2680   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2681 }
2682
2683 static clib_error_t *
2684 test_lookup_command_fn (vlib_main_t * vm,
2685                         unformat_input_t * input, vlib_cli_command_t * cmd)
2686 {
2687   ip4_fib_t *fib;
2688   u32 table_id = 0;
2689   f64 count = 1;
2690   u32 n;
2691   int i;
2692   ip4_address_t ip4_base_address;
2693   u64 errors = 0;
2694
2695   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2696     {
2697       if (unformat (input, "table %d", &table_id))
2698         {
2699           /* Make sure the entry exists. */
2700           fib = ip4_fib_get (table_id);
2701           if ((fib) && (fib->index != table_id))
2702             return clib_error_return (0, "<fib-index> %d does not exist",
2703                                       table_id);
2704         }
2705       else if (unformat (input, "count %f", &count))
2706         ;
2707
2708       else if (unformat (input, "%U",
2709                          unformat_ip4_address, &ip4_base_address))
2710         ;
2711       else
2712         return clib_error_return (0, "unknown input `%U'",
2713                                   format_unformat_error, input);
2714     }
2715
2716   n = count;
2717
2718   for (i = 0; i < n; i++)
2719     {
2720       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2721         errors++;
2722
2723       ip4_base_address.as_u32 =
2724         clib_host_to_net_u32 (1 +
2725                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2726     }
2727
2728   if (errors)
2729     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2730   else
2731     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2732
2733   return 0;
2734 }
2735
2736 /*?
2737  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2738  * given FIB table to determine if there is a conflict with the
2739  * adjacency table. The fib-id can be determined by using the
2740  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2741  * of 0 is used.
2742  *
2743  * @todo This command uses fib-id, other commands use table-id (not
2744  * just a name, they are different indexes). Would like to change this
2745  * to table-id for consistency.
2746  *
2747  * @cliexpar
2748  * Example of how to run the test lookup command:
2749  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2750  * No errors in 2 lookups
2751  * @cliexend
2752 ?*/
2753 /* *INDENT-OFF* */
2754 VLIB_CLI_COMMAND (lookup_test_command, static) =
2755 {
2756   .path = "test lookup",
2757   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2758   .function = test_lookup_command_fn,
2759 };
2760 /* *INDENT-ON* */
2761
2762 #ifndef CLIB_MARCH_VARIANT
2763 int
2764 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2765 {
2766   u32 fib_index;
2767
2768   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2769
2770   if (~0 == fib_index)
2771     return VNET_API_ERROR_NO_SUCH_FIB;
2772
2773   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2774                                   flow_hash_config);
2775
2776   return 0;
2777 }
2778 #endif
2779
2780 static clib_error_t *
2781 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2782                              unformat_input_t * input,
2783                              vlib_cli_command_t * cmd)
2784 {
2785   int matched = 0;
2786   u32 table_id = 0;
2787   u32 flow_hash_config = 0;
2788   int rv;
2789
2790   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2791     {
2792       if (unformat (input, "table %d", &table_id))
2793         matched = 1;
2794 #define _(a,v) \
2795     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2796       foreach_flow_hash_bit
2797 #undef _
2798         else
2799         break;
2800     }
2801
2802   if (matched == 0)
2803     return clib_error_return (0, "unknown input `%U'",
2804                               format_unformat_error, input);
2805
2806   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2807   switch (rv)
2808     {
2809     case 0:
2810       break;
2811
2812     case VNET_API_ERROR_NO_SUCH_FIB:
2813       return clib_error_return (0, "no such FIB table %d", table_id);
2814
2815     default:
2816       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2817       break;
2818     }
2819
2820   return 0;
2821 }
2822
2823 /*?
2824  * Configure the set of IPv4 fields used by the flow hash.
2825  *
2826  * @cliexpar
2827  * Example of how to set the flow hash on a given table:
2828  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2829  * Example of display the configured flow hash:
2830  * @cliexstart{show ip fib}
2831  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2832  * 0.0.0.0/0
2833  *   unicast-ip4-chain
2834  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2835  *     [0] [@0]: dpo-drop ip6
2836  * 0.0.0.0/32
2837  *   unicast-ip4-chain
2838  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2839  *     [0] [@0]: dpo-drop ip6
2840  * 224.0.0.0/8
2841  *   unicast-ip4-chain
2842  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2843  *     [0] [@0]: dpo-drop ip6
2844  * 6.0.1.2/32
2845  *   unicast-ip4-chain
2846  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2847  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2848  * 7.0.0.1/32
2849  *   unicast-ip4-chain
2850  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2851  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2852  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2853  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2854  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2855  * 240.0.0.0/8
2856  *   unicast-ip4-chain
2857  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2858  *     [0] [@0]: dpo-drop ip6
2859  * 255.255.255.255/32
2860  *   unicast-ip4-chain
2861  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2862  *     [0] [@0]: dpo-drop ip6
2863  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2864  * 0.0.0.0/0
2865  *   unicast-ip4-chain
2866  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2867  *     [0] [@0]: dpo-drop ip6
2868  * 0.0.0.0/32
2869  *   unicast-ip4-chain
2870  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2871  *     [0] [@0]: dpo-drop ip6
2872  * 172.16.1.0/24
2873  *   unicast-ip4-chain
2874  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2875  *     [0] [@4]: ipv4-glean: af_packet0
2876  * 172.16.1.1/32
2877  *   unicast-ip4-chain
2878  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2879  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2880  * 172.16.1.2/32
2881  *   unicast-ip4-chain
2882  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2883  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2884  * 172.16.2.0/24
2885  *   unicast-ip4-chain
2886  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2887  *     [0] [@4]: ipv4-glean: af_packet1
2888  * 172.16.2.1/32
2889  *   unicast-ip4-chain
2890  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2891  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2892  * 224.0.0.0/8
2893  *   unicast-ip4-chain
2894  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2895  *     [0] [@0]: dpo-drop ip6
2896  * 240.0.0.0/8
2897  *   unicast-ip4-chain
2898  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2899  *     [0] [@0]: dpo-drop ip6
2900  * 255.255.255.255/32
2901  *   unicast-ip4-chain
2902  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2903  *     [0] [@0]: dpo-drop ip6
2904  * @cliexend
2905 ?*/
2906 /* *INDENT-OFF* */
2907 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2908 {
2909   .path = "set ip flow-hash",
2910   .short_help =
2911   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
2912   .function = set_ip_flow_hash_command_fn,
2913 };
2914 /* *INDENT-ON* */
2915
2916 #ifndef CLIB_MARCH_VARIANT
2917 int
2918 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
2919                              u32 table_index)
2920 {
2921   vnet_main_t *vnm = vnet_get_main ();
2922   vnet_interface_main_t *im = &vnm->interface_main;
2923   ip4_main_t *ipm = &ip4_main;
2924   ip_lookup_main_t *lm = &ipm->lookup_main;
2925   vnet_classify_main_t *cm = &vnet_classify_main;
2926   ip4_address_t *if_addr;
2927
2928   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
2929     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
2930
2931   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
2932     return VNET_API_ERROR_NO_SUCH_ENTRY;
2933
2934   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
2935   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
2936
2937   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
2938
2939   if (NULL != if_addr)
2940     {
2941       fib_prefix_t pfx = {
2942         .fp_len = 32,
2943         .fp_proto = FIB_PROTOCOL_IP4,
2944         .fp_addr.ip4 = *if_addr,
2945       };
2946       u32 fib_index;
2947
2948       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
2949                                                        sw_if_index);
2950
2951
2952       if (table_index != (u32) ~ 0)
2953         {
2954           dpo_id_t dpo = DPO_INVALID;
2955
2956           dpo_set (&dpo,
2957                    DPO_CLASSIFY,
2958                    DPO_PROTO_IP4,
2959                    classify_dpo_create (DPO_PROTO_IP4, table_index));
2960
2961           fib_table_entry_special_dpo_add (fib_index,
2962                                            &pfx,
2963                                            FIB_SOURCE_CLASSIFY,
2964                                            FIB_ENTRY_FLAG_NONE, &dpo);
2965           dpo_reset (&dpo);
2966         }
2967       else
2968         {
2969           fib_table_entry_special_remove (fib_index,
2970                                           &pfx, FIB_SOURCE_CLASSIFY);
2971         }
2972     }
2973
2974   return 0;
2975 }
2976 #endif
2977
2978 static clib_error_t *
2979 set_ip_classify_command_fn (vlib_main_t * vm,
2980                             unformat_input_t * input,
2981                             vlib_cli_command_t * cmd)
2982 {
2983   u32 table_index = ~0;
2984   int table_index_set = 0;
2985   u32 sw_if_index = ~0;
2986   int rv;
2987
2988   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2989     {
2990       if (unformat (input, "table-index %d", &table_index))
2991         table_index_set = 1;
2992       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
2993                          vnet_get_main (), &sw_if_index))
2994         ;
2995       else
2996         break;
2997     }
2998
2999   if (table_index_set == 0)
3000     return clib_error_return (0, "classify table-index must be specified");
3001
3002   if (sw_if_index == ~0)
3003     return clib_error_return (0, "interface / subif must be specified");
3004
3005   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3006
3007   switch (rv)
3008     {
3009     case 0:
3010       break;
3011
3012     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3013       return clib_error_return (0, "No such interface");
3014
3015     case VNET_API_ERROR_NO_SUCH_ENTRY:
3016       return clib_error_return (0, "No such classifier table");
3017     }
3018   return 0;
3019 }
3020
3021 /*?
3022  * Assign a classification table to an interface. The classification
3023  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3024  * commands. Once the table is create, use this command to filter packets
3025  * on an interface.
3026  *
3027  * @cliexpar
3028  * Example of how to assign a classification table to an interface:
3029  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3030 ?*/
3031 /* *INDENT-OFF* */
3032 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3033 {
3034     .path = "set ip classify",
3035     .short_help =
3036     "set ip classify intfc <interface> table-index <classify-idx>",
3037     .function = set_ip_classify_command_fn,
3038 };
3039 /* *INDENT-ON* */
3040
3041 static clib_error_t *
3042 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3043 {
3044   ip4_main_t *im = &ip4_main;
3045   uword heapsize = 0;
3046
3047   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3048     {
3049       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3050         ;
3051       else
3052         return clib_error_return (0,
3053                                   "invalid heap-size parameter `%U'",
3054                                   format_unformat_error, input);
3055     }
3056
3057   im->mtrie_heap_size = heapsize;
3058
3059   return 0;
3060 }
3061
3062 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3063
3064 /*
3065  * fd.io coding-style-patch-verification: ON
3066  *
3067  * Local Variables:
3068  * eval: (c-set-style "gnu")
3069  * End:
3070  */