Fix vpp crash sending arp or probing neighb (VPP-917)
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/load_balance_map.h>
53 #include <vnet/dpo/classify_dpo.h>
54 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
55
56 /**
57  * @file
58  * @brief IPv4 Forwarding.
59  *
60  * This file contains the source code for IPv4 forwarding.
61  */
62
63 void
64 ip4_forward_next_trace (vlib_main_t * vm,
65                         vlib_node_runtime_t * node,
66                         vlib_frame_t * frame,
67                         vlib_rx_or_tx_t which_adj_index);
68
69 always_inline uword
70 ip4_lookup_inline (vlib_main_t * vm,
71                    vlib_node_runtime_t * node,
72                    vlib_frame_t * frame,
73                    int lookup_for_responses_to_locally_received_packets)
74 {
75   ip4_main_t *im = &ip4_main;
76   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
77   u32 n_left_from, n_left_to_next, *from, *to_next;
78   ip_lookup_next_t next;
79   u32 thread_index = vlib_get_thread_index ();
80
81   from = vlib_frame_vector_args (frame);
82   n_left_from = frame->n_vectors;
83   next = node->cached_next_index;
84
85   while (n_left_from > 0)
86     {
87       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
88
89       while (n_left_from >= 8 && n_left_to_next >= 4)
90         {
91           vlib_buffer_t *p0, *p1, *p2, *p3;
92           ip4_header_t *ip0, *ip1, *ip2, *ip3;
93           ip_lookup_next_t next0, next1, next2, next3;
94           const load_balance_t *lb0, *lb1, *lb2, *lb3;
95           ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
96           ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97           ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98           u32 pi0, fib_index0, lb_index0;
99           u32 pi1, fib_index1, lb_index1;
100           u32 pi2, fib_index2, lb_index2;
101           u32 pi3, fib_index3, lb_index3;
102           flow_hash_config_t flow_hash_config0, flow_hash_config1;
103           flow_hash_config_t flow_hash_config2, flow_hash_config3;
104           u32 hash_c0, hash_c1, hash_c2, hash_c3;
105           const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
106
107           /* Prefetch next iteration. */
108           {
109             vlib_buffer_t *p4, *p5, *p6, *p7;
110
111             p4 = vlib_get_buffer (vm, from[4]);
112             p5 = vlib_get_buffer (vm, from[5]);
113             p6 = vlib_get_buffer (vm, from[6]);
114             p7 = vlib_get_buffer (vm, from[7]);
115
116             vlib_prefetch_buffer_header (p4, LOAD);
117             vlib_prefetch_buffer_header (p5, LOAD);
118             vlib_prefetch_buffer_header (p6, LOAD);
119             vlib_prefetch_buffer_header (p7, LOAD);
120
121             CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
122             CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
123             CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
124             CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
125           }
126
127           pi0 = to_next[0] = from[0];
128           pi1 = to_next[1] = from[1];
129           pi2 = to_next[2] = from[2];
130           pi3 = to_next[3] = from[3];
131
132           from += 4;
133           to_next += 4;
134           n_left_to_next -= 4;
135           n_left_from -= 4;
136
137           p0 = vlib_get_buffer (vm, pi0);
138           p1 = vlib_get_buffer (vm, pi1);
139           p2 = vlib_get_buffer (vm, pi2);
140           p3 = vlib_get_buffer (vm, pi3);
141
142           ip0 = vlib_buffer_get_current (p0);
143           ip1 = vlib_buffer_get_current (p1);
144           ip2 = vlib_buffer_get_current (p2);
145           ip3 = vlib_buffer_get_current (p3);
146
147           dst_addr0 = &ip0->dst_address;
148           dst_addr1 = &ip1->dst_address;
149           dst_addr2 = &ip2->dst_address;
150           dst_addr3 = &ip3->dst_address;
151
152           fib_index0 =
153             vec_elt (im->fib_index_by_sw_if_index,
154                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
155           fib_index1 =
156             vec_elt (im->fib_index_by_sw_if_index,
157                      vnet_buffer (p1)->sw_if_index[VLIB_RX]);
158           fib_index2 =
159             vec_elt (im->fib_index_by_sw_if_index,
160                      vnet_buffer (p2)->sw_if_index[VLIB_RX]);
161           fib_index3 =
162             vec_elt (im->fib_index_by_sw_if_index,
163                      vnet_buffer (p3)->sw_if_index[VLIB_RX]);
164           fib_index0 =
165             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
166              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
167           fib_index1 =
168             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
169              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
170           fib_index2 =
171             (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
172              (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
173           fib_index3 =
174             (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
175              (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
176
177
178           if (!lookup_for_responses_to_locally_received_packets)
179             {
180               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
181               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
182               mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
183               mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
184
185               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
186               leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
187               leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
188               leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
189             }
190
191           if (!lookup_for_responses_to_locally_received_packets)
192             {
193               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
194               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
195               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
196               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
197             }
198
199           if (!lookup_for_responses_to_locally_received_packets)
200             {
201               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
202               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
203               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
204               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
205             }
206
207           if (lookup_for_responses_to_locally_received_packets)
208             {
209               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
210               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
211               lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
212               lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
213             }
214           else
215             {
216               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
217               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
218               lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
219               lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
220             }
221
222           ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
223           lb0 = load_balance_get (lb_index0);
224           lb1 = load_balance_get (lb_index1);
225           lb2 = load_balance_get (lb_index2);
226           lb3 = load_balance_get (lb_index3);
227
228           ASSERT (lb0->lb_n_buckets > 0);
229           ASSERT (is_pow2 (lb0->lb_n_buckets));
230           ASSERT (lb1->lb_n_buckets > 0);
231           ASSERT (is_pow2 (lb1->lb_n_buckets));
232           ASSERT (lb2->lb_n_buckets > 0);
233           ASSERT (is_pow2 (lb2->lb_n_buckets));
234           ASSERT (lb3->lb_n_buckets > 0);
235           ASSERT (is_pow2 (lb3->lb_n_buckets));
236
237           /* Use flow hash to compute multipath adjacency. */
238           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
239           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
240           hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
241           hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
242           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
243             {
244               flow_hash_config0 = lb0->lb_hash_config;
245               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
246                 ip4_compute_flow_hash (ip0, flow_hash_config0);
247               dpo0 =
248                 load_balance_get_fwd_bucket (lb0,
249                                              (hash_c0 &
250                                               (lb0->lb_n_buckets_minus_1)));
251             }
252           else
253             {
254               dpo0 = load_balance_get_bucket_i (lb0, 0);
255             }
256           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
257             {
258               flow_hash_config1 = lb1->lb_hash_config;
259               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
260                 ip4_compute_flow_hash (ip1, flow_hash_config1);
261               dpo1 =
262                 load_balance_get_fwd_bucket (lb1,
263                                              (hash_c1 &
264                                               (lb1->lb_n_buckets_minus_1)));
265             }
266           else
267             {
268               dpo1 = load_balance_get_bucket_i (lb1, 0);
269             }
270           if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
271             {
272               flow_hash_config2 = lb2->lb_hash_config;
273               hash_c2 = vnet_buffer (p2)->ip.flow_hash =
274                 ip4_compute_flow_hash (ip2, flow_hash_config2);
275               dpo2 =
276                 load_balance_get_fwd_bucket (lb2,
277                                              (hash_c2 &
278                                               (lb2->lb_n_buckets_minus_1)));
279             }
280           else
281             {
282               dpo2 = load_balance_get_bucket_i (lb2, 0);
283             }
284           if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
285             {
286               flow_hash_config3 = lb3->lb_hash_config;
287               hash_c3 = vnet_buffer (p3)->ip.flow_hash =
288                 ip4_compute_flow_hash (ip3, flow_hash_config3);
289               dpo3 =
290                 load_balance_get_fwd_bucket (lb3,
291                                              (hash_c3 &
292                                               (lb3->lb_n_buckets_minus_1)));
293             }
294           else
295             {
296               dpo3 = load_balance_get_bucket_i (lb3, 0);
297             }
298
299           next0 = dpo0->dpoi_next_node;
300           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
301           next1 = dpo1->dpoi_next_node;
302           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
303           next2 = dpo2->dpoi_next_node;
304           vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
305           next3 = dpo3->dpoi_next_node;
306           vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
307
308           vlib_increment_combined_counter
309             (cm, thread_index, lb_index0, 1,
310              vlib_buffer_length_in_chain (vm, p0));
311           vlib_increment_combined_counter
312             (cm, thread_index, lb_index1, 1,
313              vlib_buffer_length_in_chain (vm, p1));
314           vlib_increment_combined_counter
315             (cm, thread_index, lb_index2, 1,
316              vlib_buffer_length_in_chain (vm, p2));
317           vlib_increment_combined_counter
318             (cm, thread_index, lb_index3, 1,
319              vlib_buffer_length_in_chain (vm, p3));
320
321           vlib_validate_buffer_enqueue_x4 (vm, node, next,
322                                            to_next, n_left_to_next,
323                                            pi0, pi1, pi2, pi3,
324                                            next0, next1, next2, next3);
325         }
326
327       while (n_left_from > 0 && n_left_to_next > 0)
328         {
329           vlib_buffer_t *p0;
330           ip4_header_t *ip0;
331           ip_lookup_next_t next0;
332           const load_balance_t *lb0;
333           ip4_fib_mtrie_t *mtrie0;
334           ip4_fib_mtrie_leaf_t leaf0;
335           ip4_address_t *dst_addr0;
336           u32 pi0, fib_index0, lbi0;
337           flow_hash_config_t flow_hash_config0;
338           const dpo_id_t *dpo0;
339           u32 hash_c0;
340
341           pi0 = from[0];
342           to_next[0] = pi0;
343
344           p0 = vlib_get_buffer (vm, pi0);
345
346           ip0 = vlib_buffer_get_current (p0);
347
348           dst_addr0 = &ip0->dst_address;
349
350           fib_index0 =
351             vec_elt (im->fib_index_by_sw_if_index,
352                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
353           fib_index0 =
354             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
355              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
356
357           if (!lookup_for_responses_to_locally_received_packets)
358             {
359               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
360
361               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
362             }
363
364           if (!lookup_for_responses_to_locally_received_packets)
365             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
366
367           if (!lookup_for_responses_to_locally_received_packets)
368             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
369
370           if (lookup_for_responses_to_locally_received_packets)
371             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
372           else
373             {
374               /* Handle default route. */
375               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
376             }
377
378           ASSERT (lbi0);
379           lb0 = load_balance_get (lbi0);
380
381           ASSERT (lb0->lb_n_buckets > 0);
382           ASSERT (is_pow2 (lb0->lb_n_buckets));
383
384           /* Use flow hash to compute multipath adjacency. */
385           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
386           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
387             {
388               flow_hash_config0 = lb0->lb_hash_config;
389
390               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
391                 ip4_compute_flow_hash (ip0, flow_hash_config0);
392               dpo0 =
393                 load_balance_get_fwd_bucket (lb0,
394                                              (hash_c0 &
395                                               (lb0->lb_n_buckets_minus_1)));
396             }
397           else
398             {
399               dpo0 = load_balance_get_bucket_i (lb0, 0);
400             }
401
402           next0 = dpo0->dpoi_next_node;
403           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
404
405           vlib_increment_combined_counter (cm, thread_index, lbi0, 1,
406                                            vlib_buffer_length_in_chain (vm,
407                                                                         p0));
408
409           from += 1;
410           to_next += 1;
411           n_left_to_next -= 1;
412           n_left_from -= 1;
413
414           if (PREDICT_FALSE (next0 != next))
415             {
416               n_left_to_next += 1;
417               vlib_put_next_frame (vm, node, next, n_left_to_next);
418               next = next0;
419               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
420               to_next[0] = pi0;
421               to_next += 1;
422               n_left_to_next -= 1;
423             }
424         }
425
426       vlib_put_next_frame (vm, node, next, n_left_to_next);
427     }
428
429   if (node->flags & VLIB_NODE_FLAG_TRACE)
430     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
431
432   return frame->n_vectors;
433 }
434
435 /** @brief IPv4 lookup node.
436     @node ip4-lookup
437
438     This is the main IPv4 lookup dispatch node.
439
440     @param vm vlib_main_t corresponding to the current thread
441     @param node vlib_node_runtime_t
442     @param frame vlib_frame_t whose contents should be dispatched
443
444     @par Graph mechanics: buffer metadata, next index usage
445
446     @em Uses:
447     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
448         - Indicates the @c sw_if_index value of the interface that the
449           packet was received on.
450     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
451         - When the value is @c ~0 then the node performs a longest prefix
452           match (LPM) for the packet destination address in the FIB attached
453           to the receive interface.
454         - Otherwise perform LPM for the packet destination address in the
455           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
456           value (0, 1, ...) and not a VRF id.
457
458     @em Sets:
459     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
460         - The lookup result adjacency index.
461
462     <em>Next Index:</em>
463     - Dispatches the packet to the node index found in
464       ip_adjacency_t @c adj->lookup_next_index
465       (where @c adj is the lookup result adjacency).
466 */
467 static uword
468 ip4_lookup (vlib_main_t * vm,
469             vlib_node_runtime_t * node, vlib_frame_t * frame)
470 {
471   return ip4_lookup_inline (vm, node, frame,
472                             /* lookup_for_responses_to_locally_received_packets */
473                             0);
474
475 }
476
477 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
478
479 VLIB_REGISTER_NODE (ip4_lookup_node) =
480 {
481 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
482     sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
483     IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
484
485 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
486
487 always_inline uword
488 ip4_load_balance (vlib_main_t * vm,
489                   vlib_node_runtime_t * node, vlib_frame_t * frame)
490 {
491   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
492   u32 n_left_from, n_left_to_next, *from, *to_next;
493   ip_lookup_next_t next;
494   u32 thread_index = vlib_get_thread_index ();
495
496   from = vlib_frame_vector_args (frame);
497   n_left_from = frame->n_vectors;
498   next = node->cached_next_index;
499
500   if (node->flags & VLIB_NODE_FLAG_TRACE)
501     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
502
503   while (n_left_from > 0)
504     {
505       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
506
507
508       while (n_left_from >= 4 && n_left_to_next >= 2)
509         {
510           ip_lookup_next_t next0, next1;
511           const load_balance_t *lb0, *lb1;
512           vlib_buffer_t *p0, *p1;
513           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
514           const ip4_header_t *ip0, *ip1;
515           const dpo_id_t *dpo0, *dpo1;
516
517           /* Prefetch next iteration. */
518           {
519             vlib_buffer_t *p2, *p3;
520
521             p2 = vlib_get_buffer (vm, from[2]);
522             p3 = vlib_get_buffer (vm, from[3]);
523
524             vlib_prefetch_buffer_header (p2, STORE);
525             vlib_prefetch_buffer_header (p3, STORE);
526
527             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
528             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
529           }
530
531           pi0 = to_next[0] = from[0];
532           pi1 = to_next[1] = from[1];
533
534           from += 2;
535           n_left_from -= 2;
536           to_next += 2;
537           n_left_to_next -= 2;
538
539           p0 = vlib_get_buffer (vm, pi0);
540           p1 = vlib_get_buffer (vm, pi1);
541
542           ip0 = vlib_buffer_get_current (p0);
543           ip1 = vlib_buffer_get_current (p1);
544           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
545           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
546
547           lb0 = load_balance_get (lbi0);
548           lb1 = load_balance_get (lbi1);
549
550           /*
551            * this node is for via FIBs we can re-use the hash value from the
552            * to node if present.
553            * We don't want to use the same hash value at each level in the recursion
554            * graph as that would lead to polarisation
555            */
556           hc0 = hc1 = 0;
557
558           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
559             {
560               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
561                 {
562                   hc0 = vnet_buffer (p0)->ip.flow_hash =
563                     vnet_buffer (p0)->ip.flow_hash >> 1;
564                 }
565               else
566                 {
567                   hc0 = vnet_buffer (p0)->ip.flow_hash =
568                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
569                 }
570               dpo0 = load_balance_get_fwd_bucket
571                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
572             }
573           else
574             {
575               dpo0 = load_balance_get_bucket_i (lb0, 0);
576             }
577           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
578             {
579               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
580                 {
581                   hc1 = vnet_buffer (p1)->ip.flow_hash =
582                     vnet_buffer (p1)->ip.flow_hash >> 1;
583                 }
584               else
585                 {
586                   hc1 = vnet_buffer (p1)->ip.flow_hash =
587                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
588                 }
589               dpo1 = load_balance_get_fwd_bucket
590                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
591             }
592           else
593             {
594               dpo1 = load_balance_get_bucket_i (lb1, 0);
595             }
596
597           next0 = dpo0->dpoi_next_node;
598           next1 = dpo1->dpoi_next_node;
599
600           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
601           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
602
603           vlib_increment_combined_counter
604             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
605           vlib_increment_combined_counter
606             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
607
608           vlib_validate_buffer_enqueue_x2 (vm, node, next,
609                                            to_next, n_left_to_next,
610                                            pi0, pi1, next0, next1);
611         }
612
613       while (n_left_from > 0 && n_left_to_next > 0)
614         {
615           ip_lookup_next_t next0;
616           const load_balance_t *lb0;
617           vlib_buffer_t *p0;
618           u32 pi0, lbi0, hc0;
619           const ip4_header_t *ip0;
620           const dpo_id_t *dpo0;
621
622           pi0 = from[0];
623           to_next[0] = pi0;
624           from += 1;
625           to_next += 1;
626           n_left_to_next -= 1;
627           n_left_from -= 1;
628
629           p0 = vlib_get_buffer (vm, pi0);
630
631           ip0 = vlib_buffer_get_current (p0);
632           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
633
634           lb0 = load_balance_get (lbi0);
635
636           hc0 = 0;
637           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
638             {
639               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
640                 {
641                   hc0 = vnet_buffer (p0)->ip.flow_hash =
642                     vnet_buffer (p0)->ip.flow_hash >> 1;
643                 }
644               else
645                 {
646                   hc0 = vnet_buffer (p0)->ip.flow_hash =
647                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
648                 }
649               dpo0 = load_balance_get_fwd_bucket
650                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
651             }
652           else
653             {
654               dpo0 = load_balance_get_bucket_i (lb0, 0);
655             }
656
657           next0 = dpo0->dpoi_next_node;
658           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
659
660           vlib_increment_combined_counter
661             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
662
663           vlib_validate_buffer_enqueue_x1 (vm, node, next,
664                                            to_next, n_left_to_next,
665                                            pi0, next0);
666         }
667
668       vlib_put_next_frame (vm, node, next, n_left_to_next);
669     }
670
671   return frame->n_vectors;
672 }
673
674 VLIB_REGISTER_NODE (ip4_load_balance_node) =
675 {
676 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
677     sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
678     format_ip4_lookup_trace,};
679
680 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
681
682 /* get first interface address */
683 ip4_address_t *
684 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
685                              ip_interface_address_t ** result_ia)
686 {
687   ip_lookup_main_t *lm = &im->lookup_main;
688   ip_interface_address_t *ia = 0;
689   ip4_address_t *result = 0;
690
691   /* *INDENT-OFF* */
692   foreach_ip_interface_address
693     (lm, ia, sw_if_index,
694      1 /* honor unnumbered */ ,
695      ({
696        ip4_address_t * a =
697          ip_interface_address_get_address (lm, ia);
698        result = a;
699        break;
700      }));
701   /* *INDENT-OFF* */
702   if (result_ia)
703     *result_ia = result ? ia : 0;
704   return result;
705 }
706
707 static void
708 ip4_add_interface_routes (u32 sw_if_index,
709                           ip4_main_t * im, u32 fib_index,
710                           ip_interface_address_t * a)
711 {
712   ip_lookup_main_t *lm = &im->lookup_main;
713   ip4_address_t *address = ip_interface_address_get_address (lm, a);
714   fib_prefix_t pfx = {
715     .fp_len = a->address_length,
716     .fp_proto = FIB_PROTOCOL_IP4,
717     .fp_addr.ip4 = *address,
718   };
719
720   if (pfx.fp_len <= 30)
721     {
722       /* a /30 or shorter - add a glean for the network address */
723       fib_table_entry_update_one_path (fib_index, &pfx,
724                                        FIB_SOURCE_INTERFACE,
725                                        (FIB_ENTRY_FLAG_CONNECTED |
726                                         FIB_ENTRY_FLAG_ATTACHED),
727                                        FIB_PROTOCOL_IP4,
728                                        /* No next-hop address */
729                                        NULL,
730                                        sw_if_index,
731                                        // invalid FIB index
732                                        ~0,
733                                        1,
734                                        // no out-label stack
735                                        NULL,
736                                        FIB_ROUTE_PATH_FLAG_NONE);
737
738       /* Add the two broadcast addresses as drop */
739       fib_prefix_t net_pfx = {
740         .fp_len = 32,
741         .fp_proto = FIB_PROTOCOL_IP4,
742         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
743       };
744       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
745         fib_table_entry_special_add(fib_index,
746                                     &net_pfx,
747                                     FIB_SOURCE_INTERFACE,
748                                     (FIB_ENTRY_FLAG_DROP |
749                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
750       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
751       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
752         fib_table_entry_special_add(fib_index,
753                                     &net_pfx,
754                                     FIB_SOURCE_INTERFACE,
755                                     (FIB_ENTRY_FLAG_DROP |
756                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
757     }
758   else if (pfx.fp_len == 31)
759     {
760       u32 mask = clib_host_to_net_u32(1);
761       fib_prefix_t net_pfx = pfx;
762
763       net_pfx.fp_len = 32;
764       net_pfx.fp_addr.ip4.as_u32 ^= mask;
765
766       /* a /31 - add the other end as an attached host */
767       fib_table_entry_update_one_path (fib_index, &net_pfx,
768                                        FIB_SOURCE_INTERFACE,
769                                        (FIB_ENTRY_FLAG_ATTACHED),
770                                        FIB_PROTOCOL_IP4,
771                                        &net_pfx.fp_addr,
772                                        sw_if_index,
773                                        // invalid FIB index
774                                        ~0,
775                                        1,
776                                        NULL,
777                                        FIB_ROUTE_PATH_FLAG_NONE);
778     }
779   pfx.fp_len = 32;
780
781   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
782     {
783       u32 classify_table_index =
784         lm->classify_table_index_by_sw_if_index[sw_if_index];
785       if (classify_table_index != (u32) ~ 0)
786         {
787           dpo_id_t dpo = DPO_INVALID;
788
789           dpo_set (&dpo,
790                    DPO_CLASSIFY,
791                    DPO_PROTO_IP4,
792                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
793
794           fib_table_entry_special_dpo_add (fib_index,
795                                            &pfx,
796                                            FIB_SOURCE_CLASSIFY,
797                                            FIB_ENTRY_FLAG_NONE, &dpo);
798           dpo_reset (&dpo);
799         }
800     }
801
802   fib_table_entry_update_one_path (fib_index, &pfx,
803                                    FIB_SOURCE_INTERFACE,
804                                    (FIB_ENTRY_FLAG_CONNECTED |
805                                     FIB_ENTRY_FLAG_LOCAL),
806                                    FIB_PROTOCOL_IP4,
807                                    &pfx.fp_addr,
808                                    sw_if_index,
809                                    // invalid FIB index
810                                    ~0,
811                                    1, NULL,
812                                    FIB_ROUTE_PATH_FLAG_NONE);
813 }
814
815 static void
816 ip4_del_interface_routes (ip4_main_t * im,
817                           u32 fib_index,
818                           ip4_address_t * address, u32 address_length)
819 {
820   fib_prefix_t pfx = {
821     .fp_len = address_length,
822     .fp_proto = FIB_PROTOCOL_IP4,
823     .fp_addr.ip4 = *address,
824   };
825
826   if (pfx.fp_len <= 30)
827     {
828       fib_prefix_t net_pfx = {
829         .fp_len = 32,
830         .fp_proto = FIB_PROTOCOL_IP4,
831         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
832       };
833       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
834         fib_table_entry_special_remove(fib_index,
835                                        &net_pfx,
836                                        FIB_SOURCE_INTERFACE);
837       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
838       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
839         fib_table_entry_special_remove(fib_index,
840                                        &net_pfx,
841                                        FIB_SOURCE_INTERFACE);
842       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
843     }
844     else if (pfx.fp_len == 31)
845     {
846       u32 mask = clib_host_to_net_u32(1);
847       fib_prefix_t net_pfx = pfx;
848
849       net_pfx.fp_len = 32;
850       net_pfx.fp_addr.ip4.as_u32 ^= mask;
851
852       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
853     }
854
855   pfx.fp_len = 32;
856   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
857 }
858
859 void
860 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
861 {
862   ip4_main_t *im = &ip4_main;
863
864   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
865
866   /*
867    * enable/disable only on the 1<->0 transition
868    */
869   if (is_enable)
870     {
871       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
872         return;
873     }
874   else
875     {
876       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
877       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
878         return;
879     }
880   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
881                                !is_enable, 0, 0);
882
883
884   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
885                                sw_if_index, !is_enable, 0, 0);
886 }
887
888 static clib_error_t *
889 ip4_add_del_interface_address_internal (vlib_main_t * vm,
890                                         u32 sw_if_index,
891                                         ip4_address_t * address,
892                                         u32 address_length, u32 is_del)
893 {
894   vnet_main_t *vnm = vnet_get_main ();
895   ip4_main_t *im = &ip4_main;
896   ip_lookup_main_t *lm = &im->lookup_main;
897   clib_error_t *error = 0;
898   u32 if_address_index, elts_before;
899   ip4_address_fib_t ip4_af, *addr_fib = 0;
900
901   /* local0 interface doesn't support IP addressing  */
902   if (sw_if_index == 0)
903     {
904       return
905        clib_error_create ("local0 interface doesn't support IP addressing");
906     }
907
908   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
909   ip4_addr_fib_init (&ip4_af, address,
910                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
911   vec_add1 (addr_fib, ip4_af);
912
913   /* FIXME-LATER
914    * there is no support for adj-fib handling in the presence of overlapping
915    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
916    * most routers do.
917    */
918   /* *INDENT-OFF* */
919   if (!is_del)
920     {
921       /* When adding an address check that it does not conflict
922          with an existing address. */
923       ip_interface_address_t *ia;
924       foreach_ip_interface_address
925         (&im->lookup_main, ia, sw_if_index,
926          0 /* honor unnumbered */ ,
927          ({
928            ip4_address_t * x =
929              ip_interface_address_get_address
930              (&im->lookup_main, ia);
931            if (ip4_destination_matches_route
932                (im, address, x, ia->address_length) ||
933                ip4_destination_matches_route (im,
934                                               x,
935                                               address,
936                                               address_length))
937              return
938                clib_error_create
939                ("failed to add %U which conflicts with %U for interface %U",
940                 format_ip4_address_and_length, address,
941                 address_length,
942                 format_ip4_address_and_length, x,
943                 ia->address_length,
944                 format_vnet_sw_if_index_name, vnm,
945                 sw_if_index);
946          }));
947     }
948   /* *INDENT-ON* */
949
950   elts_before = pool_elts (lm->if_address_pool);
951
952   error = ip_interface_address_add_del
953     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
954   if (error)
955     goto done;
956
957   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
958
959   if (is_del)
960     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
961   else
962     ip4_add_interface_routes (sw_if_index,
963                               im, ip4_af.fib_index,
964                               pool_elt_at_index
965                               (lm->if_address_pool, if_address_index));
966
967   /* If pool did not grow/shrink: add duplicate address. */
968   if (elts_before != pool_elts (lm->if_address_pool))
969     {
970       ip4_add_del_interface_address_callback_t *cb;
971       vec_foreach (cb, im->add_del_interface_address_callbacks)
972         cb->function (im, cb->function_opaque, sw_if_index,
973                       address, address_length, if_address_index, is_del);
974     }
975
976 done:
977   vec_free (addr_fib);
978   return error;
979 }
980
981 clib_error_t *
982 ip4_add_del_interface_address (vlib_main_t * vm,
983                                u32 sw_if_index,
984                                ip4_address_t * address,
985                                u32 address_length, u32 is_del)
986 {
987   return ip4_add_del_interface_address_internal
988     (vm, sw_if_index, address, address_length, is_del);
989 }
990
991 /* Built-in ip4 unicast rx feature path definition */
992 /* *INDENT-OFF* */
993 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
994 {
995   .arc_name = "ip4-unicast",
996   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
997   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
998 };
999
1000 VNET_FEATURE_INIT (ip4_flow_classify, static) =
1001 {
1002   .arc_name = "ip4-unicast",
1003   .node_name = "ip4-flow-classify",
1004   .runs_before = VNET_FEATURES ("ip4-inacl"),
1005 };
1006
1007 VNET_FEATURE_INIT (ip4_inacl, static) =
1008 {
1009   .arc_name = "ip4-unicast",
1010   .node_name = "ip4-inacl",
1011   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
1012 };
1013
1014 VNET_FEATURE_INIT (ip4_source_check_1, static) =
1015 {
1016   .arc_name = "ip4-unicast",
1017   .node_name = "ip4-source-check-via-rx",
1018   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
1019 };
1020
1021 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1022 {
1023   .arc_name = "ip4-unicast",
1024   .node_name = "ip4-source-check-via-any",
1025   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1026 };
1027
1028 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1029 {
1030   .arc_name = "ip4-unicast",
1031   .node_name = "ip4-source-and-port-range-check-rx",
1032   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1033 };
1034
1035 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1036 {
1037   .arc_name = "ip4-unicast",
1038   .node_name = "ip4-policer-classify",
1039   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1040 };
1041
1042 VNET_FEATURE_INIT (ip4_ipsec, static) =
1043 {
1044   .arc_name = "ip4-unicast",
1045   .node_name = "ipsec-input-ip4",
1046   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1047 };
1048
1049 VNET_FEATURE_INIT (ip4_vpath, static) =
1050 {
1051   .arc_name = "ip4-unicast",
1052   .node_name = "vpath-input-ip4",
1053   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1054 };
1055
1056 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1057 {
1058   .arc_name = "ip4-unicast",
1059   .node_name = "ip4-vxlan-bypass",
1060   .runs_before = VNET_FEATURES ("ip4-lookup"),
1061 };
1062
1063 VNET_FEATURE_INIT (ip4_drop, static) =
1064 {
1065   .arc_name = "ip4-unicast",
1066   .node_name = "ip4-drop",
1067   .runs_before = VNET_FEATURES ("ip4-lookup"),
1068 };
1069
1070 VNET_FEATURE_INIT (ip4_lookup, static) =
1071 {
1072   .arc_name = "ip4-unicast",
1073   .node_name = "ip4-lookup",
1074   .runs_before = 0,     /* not before any other features */
1075 };
1076
1077 /* Built-in ip4 multicast rx feature path definition */
1078 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1079 {
1080   .arc_name = "ip4-multicast",
1081   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1082   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1083 };
1084
1085 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1086 {
1087   .arc_name = "ip4-multicast",
1088   .node_name = "vpath-input-ip4",
1089   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1090 };
1091
1092 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1093 {
1094   .arc_name = "ip4-multicast",
1095   .node_name = "ip4-drop",
1096   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1097 };
1098
1099 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1100 {
1101   .arc_name = "ip4-multicast",
1102   .node_name = "ip4-mfib-forward-lookup",
1103   .runs_before = 0,     /* last feature */
1104 };
1105
1106 /* Source and port-range check ip4 tx feature path definition */
1107 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1108 {
1109   .arc_name = "ip4-output",
1110   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1111   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1112 };
1113
1114 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1115 {
1116   .arc_name = "ip4-output",
1117   .node_name = "ip4-source-and-port-range-check-tx",
1118   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1119 };
1120
1121 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1122 {
1123   .arc_name = "ip4-output",
1124   .node_name = "ipsec-output-ip4",
1125   .runs_before = VNET_FEATURES ("interface-output"),
1126 };
1127
1128 /* Built-in ip4 tx feature path definition */
1129 VNET_FEATURE_INIT (ip4_interface_output, static) =
1130 {
1131   .arc_name = "ip4-output",
1132   .node_name = "interface-output",
1133   .runs_before = 0,     /* not before any other features */
1134 };
1135 /* *INDENT-ON* */
1136
1137 static clib_error_t *
1138 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1139 {
1140   ip4_main_t *im = &ip4_main;
1141
1142   /* Fill in lookup tables with default table (0). */
1143   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1144   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1145
1146   if (!is_add)
1147     {
1148       ip4_main_t *im4 = &ip4_main;
1149       ip_lookup_main_t *lm4 = &im4->lookup_main;
1150       ip_interface_address_t *ia = 0;
1151       ip4_address_t *address;
1152       vlib_main_t *vm = vlib_get_main ();
1153
1154       /* *INDENT-OFF* */
1155       foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */,
1156       ({
1157         address = ip_interface_address_get_address (lm4, ia);
1158         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1159       }));
1160       /* *INDENT-ON* */
1161     }
1162
1163   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1164                                is_add, 0, 0);
1165
1166   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1167                                is_add, 0, 0);
1168
1169   return /* no error */ 0;
1170 }
1171
1172 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1173
1174 /* Global IP4 main. */
1175 ip4_main_t ip4_main;
1176
1177 clib_error_t *
1178 ip4_lookup_init (vlib_main_t * vm)
1179 {
1180   ip4_main_t *im = &ip4_main;
1181   clib_error_t *error;
1182   uword i;
1183
1184   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1185     return error;
1186
1187   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1188     {
1189       u32 m;
1190
1191       if (i < 32)
1192         m = pow2_mask (i) << (32 - i);
1193       else
1194         m = ~0;
1195       im->fib_masks[i] = clib_host_to_net_u32 (m);
1196     }
1197
1198   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1199
1200   /* Create FIB with index 0 and table id of 0. */
1201   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1202   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1203
1204   {
1205     pg_node_t *pn;
1206     pn = pg_get_node (ip4_lookup_node.index);
1207     pn->unformat_edit = unformat_pg_ip4_header;
1208   }
1209
1210   {
1211     ethernet_arp_header_t h;
1212
1213     memset (&h, 0, sizeof (h));
1214
1215     /* Set target ethernet address to all zeros. */
1216     memset (h.ip4_over_ethernet[1].ethernet, 0,
1217             sizeof (h.ip4_over_ethernet[1].ethernet));
1218
1219 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1220 #define _8(f,v) h.f = v;
1221     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1222     _16 (l3_type, ETHERNET_TYPE_IP4);
1223     _8 (n_l2_address_bytes, 6);
1224     _8 (n_l3_address_bytes, 4);
1225     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1226 #undef _16
1227 #undef _8
1228
1229     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1230                                /* data */ &h,
1231                                sizeof (h),
1232                                /* alloc chunk size */ 8,
1233                                "ip4 arp");
1234   }
1235
1236   return error;
1237 }
1238
1239 VLIB_INIT_FUNCTION (ip4_lookup_init);
1240
1241 typedef struct
1242 {
1243   /* Adjacency taken. */
1244   u32 dpo_index;
1245   u32 flow_hash;
1246   u32 fib_index;
1247
1248   /* Packet data, possibly *after* rewrite. */
1249   u8 packet_data[64 - 1 * sizeof (u32)];
1250 }
1251 ip4_forward_next_trace_t;
1252
1253 u8 *
1254 format_ip4_forward_next_trace (u8 * s, va_list * args)
1255 {
1256   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1257   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1258   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1259   uword indent = format_get_indent (s);
1260   s = format (s, "%U%U",
1261               format_white_space, indent,
1262               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1263   return s;
1264 }
1265
1266 static u8 *
1267 format_ip4_lookup_trace (u8 * s, va_list * args)
1268 {
1269   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1270   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1271   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1272   uword indent = format_get_indent (s);
1273
1274   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1275               t->fib_index, t->dpo_index, t->flow_hash);
1276   s = format (s, "\n%U%U",
1277               format_white_space, indent,
1278               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1279   return s;
1280 }
1281
1282 static u8 *
1283 format_ip4_rewrite_trace (u8 * s, va_list * args)
1284 {
1285   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1286   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1287   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1288   uword indent = format_get_indent (s);
1289
1290   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1291               t->fib_index, t->dpo_index, format_ip_adjacency,
1292               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1293   s = format (s, "\n%U%U",
1294               format_white_space, indent,
1295               format_ip_adjacency_packet_data,
1296               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1297   return s;
1298 }
1299
1300 /* Common trace function for all ip4-forward next nodes. */
1301 void
1302 ip4_forward_next_trace (vlib_main_t * vm,
1303                         vlib_node_runtime_t * node,
1304                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1305 {
1306   u32 *from, n_left;
1307   ip4_main_t *im = &ip4_main;
1308
1309   n_left = frame->n_vectors;
1310   from = vlib_frame_vector_args (frame);
1311
1312   while (n_left >= 4)
1313     {
1314       u32 bi0, bi1;
1315       vlib_buffer_t *b0, *b1;
1316       ip4_forward_next_trace_t *t0, *t1;
1317
1318       /* Prefetch next iteration. */
1319       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1320       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1321
1322       bi0 = from[0];
1323       bi1 = from[1];
1324
1325       b0 = vlib_get_buffer (vm, bi0);
1326       b1 = vlib_get_buffer (vm, bi1);
1327
1328       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1329         {
1330           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1331           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1332           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1333           t0->fib_index =
1334             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1335              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1336             vec_elt (im->fib_index_by_sw_if_index,
1337                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1338
1339           clib_memcpy (t0->packet_data,
1340                        vlib_buffer_get_current (b0),
1341                        sizeof (t0->packet_data));
1342         }
1343       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1344         {
1345           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1346           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1347           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1348           t1->fib_index =
1349             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1350              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1351             vec_elt (im->fib_index_by_sw_if_index,
1352                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1353           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1354                        sizeof (t1->packet_data));
1355         }
1356       from += 2;
1357       n_left -= 2;
1358     }
1359
1360   while (n_left >= 1)
1361     {
1362       u32 bi0;
1363       vlib_buffer_t *b0;
1364       ip4_forward_next_trace_t *t0;
1365
1366       bi0 = from[0];
1367
1368       b0 = vlib_get_buffer (vm, bi0);
1369
1370       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1371         {
1372           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1373           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1374           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1375           t0->fib_index =
1376             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1377              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1378             vec_elt (im->fib_index_by_sw_if_index,
1379                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1380           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1381                        sizeof (t0->packet_data));
1382         }
1383       from += 1;
1384       n_left -= 1;
1385     }
1386 }
1387
1388 static uword
1389 ip4_drop_or_punt (vlib_main_t * vm,
1390                   vlib_node_runtime_t * node,
1391                   vlib_frame_t * frame, ip4_error_t error_code)
1392 {
1393   u32 *buffers = vlib_frame_vector_args (frame);
1394   uword n_packets = frame->n_vectors;
1395
1396   vlib_error_drop_buffers (vm, node, buffers,
1397                            /* stride */ 1,
1398                            n_packets,
1399                            /* next */ 0,
1400                            ip4_input_node.index, error_code);
1401
1402   if (node->flags & VLIB_NODE_FLAG_TRACE)
1403     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1404
1405   return n_packets;
1406 }
1407
1408 static uword
1409 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1410 {
1411   return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1412 }
1413
1414 static uword
1415 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1416 {
1417   return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1418 }
1419
1420 /* *INDENT-OFF* */
1421 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1422 {
1423   .function = ip4_drop,
1424   .name = "ip4-drop",
1425   .vector_size = sizeof (u32),
1426   .format_trace = format_ip4_forward_next_trace,
1427   .n_next_nodes = 1,
1428   .next_nodes = {
1429     [0] = "error-drop",
1430   },
1431 };
1432
1433 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1434
1435 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1436 {
1437   .function = ip4_punt,
1438   .name = "ip4-punt",
1439   .vector_size = sizeof (u32),
1440   .format_trace = format_ip4_forward_next_trace,
1441   .n_next_nodes = 1,
1442   .next_nodes = {
1443     [0] = "error-punt",
1444   },
1445 };
1446
1447 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1448 /* *INDENT-ON */
1449
1450 /* Compute TCP/UDP/ICMP4 checksum in software. */
1451 u16
1452 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1453                               ip4_header_t * ip0)
1454 {
1455   ip_csum_t sum0;
1456   u32 ip_header_length, payload_length_host_byte_order;
1457   u32 n_this_buffer, n_bytes_left;
1458   u16 sum16;
1459   void *data_this_buffer;
1460
1461   /* Initialize checksum with ip header. */
1462   ip_header_length = ip4_header_bytes (ip0);
1463   payload_length_host_byte_order =
1464     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1465   sum0 =
1466     clib_host_to_net_u32 (payload_length_host_byte_order +
1467                           (ip0->protocol << 16));
1468
1469   if (BITS (uword) == 32)
1470     {
1471       sum0 =
1472         ip_csum_with_carry (sum0,
1473                             clib_mem_unaligned (&ip0->src_address, u32));
1474       sum0 =
1475         ip_csum_with_carry (sum0,
1476                             clib_mem_unaligned (&ip0->dst_address, u32));
1477     }
1478   else
1479     sum0 =
1480       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1481
1482   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1483   data_this_buffer = (void *) ip0 + ip_header_length;
1484   if (n_this_buffer + ip_header_length > p0->current_length)
1485     n_this_buffer =
1486       p0->current_length >
1487       ip_header_length ? p0->current_length - ip_header_length : 0;
1488   while (1)
1489     {
1490       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1491       n_bytes_left -= n_this_buffer;
1492       if (n_bytes_left == 0)
1493         break;
1494
1495       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1496       p0 = vlib_get_buffer (vm, p0->next_buffer);
1497       data_this_buffer = vlib_buffer_get_current (p0);
1498       n_this_buffer = p0->current_length;
1499     }
1500
1501   sum16 = ~ip_csum_fold (sum0);
1502
1503   return sum16;
1504 }
1505
1506 u32
1507 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1508 {
1509   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1510   udp_header_t *udp0;
1511   u16 sum16;
1512
1513   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1514           || ip0->protocol == IP_PROTOCOL_UDP);
1515
1516   udp0 = (void *) (ip0 + 1);
1517   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1518     {
1519       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1520                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1521       return p0->flags;
1522     }
1523
1524   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1525
1526   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1527                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1528
1529   return p0->flags;
1530 }
1531
1532 /* *INDENT-OFF* */
1533 VNET_FEATURE_ARC_INIT (ip4_local) =
1534 {
1535   .arc_name  = "ip4-local",
1536   .start_nodes = VNET_FEATURES ("ip4-local"),
1537 };
1538 /* *INDENT-ON* */
1539
1540 static inline uword
1541 ip4_local_inline (vlib_main_t * vm,
1542                   vlib_node_runtime_t * node,
1543                   vlib_frame_t * frame, int head_of_feature_arc)
1544 {
1545   ip4_main_t *im = &ip4_main;
1546   ip_lookup_main_t *lm = &im->lookup_main;
1547   ip_local_next_t next_index;
1548   u32 *from, *to_next, n_left_from, n_left_to_next;
1549   vlib_node_runtime_t *error_node =
1550     vlib_node_get_runtime (vm, ip4_input_node.index);
1551   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1552
1553   from = vlib_frame_vector_args (frame);
1554   n_left_from = frame->n_vectors;
1555   next_index = node->cached_next_index;
1556
1557   if (node->flags & VLIB_NODE_FLAG_TRACE)
1558     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1559
1560   while (n_left_from > 0)
1561     {
1562       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1563
1564       while (n_left_from >= 4 && n_left_to_next >= 2)
1565         {
1566           vlib_buffer_t *p0, *p1;
1567           ip4_header_t *ip0, *ip1;
1568           udp_header_t *udp0, *udp1;
1569           ip4_fib_mtrie_t *mtrie0, *mtrie1;
1570           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1571           const dpo_id_t *dpo0, *dpo1;
1572           const load_balance_t *lb0, *lb1;
1573           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1574           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1575           i32 len_diff0, len_diff1;
1576           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1577           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1578           u32 sw_if_index0, sw_if_index1;
1579
1580           pi0 = to_next[0] = from[0];
1581           pi1 = to_next[1] = from[1];
1582           from += 2;
1583           n_left_from -= 2;
1584           to_next += 2;
1585           n_left_to_next -= 2;
1586
1587           next0 = next1 = IP_LOCAL_NEXT_DROP;
1588
1589           p0 = vlib_get_buffer (vm, pi0);
1590           p1 = vlib_get_buffer (vm, pi1);
1591
1592           ip0 = vlib_buffer_get_current (p0);
1593           ip1 = vlib_buffer_get_current (p1);
1594
1595           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1596           vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
1597
1598           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1599           sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1600
1601           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1602           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1603
1604           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1605           fib_index0 =
1606             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1607              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1608
1609           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1610           fib_index1 =
1611             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1612              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1613
1614           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1615           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1616
1617           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1618           leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1619
1620           /* Treat IP frag packets as "experimental" protocol for now
1621              until support of IP frag reassembly is implemented */
1622           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1623           proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1624
1625           if (head_of_feature_arc == 0)
1626             {
1627               error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1628               goto skip_checks;
1629             }
1630
1631           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1632           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1633           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1634           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1635
1636           flags0 = p0->flags;
1637           flags1 = p1->flags;
1638
1639           good_tcp_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1640           good_tcp_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1641
1642           udp0 = ip4_next_header (ip0);
1643           udp1 = ip4_next_header (ip1);
1644
1645           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1646           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1647           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1648
1649           /* Verify UDP length. */
1650           ip_len0 = clib_net_to_host_u16 (ip0->length);
1651           ip_len1 = clib_net_to_host_u16 (ip1->length);
1652           udp_len0 = clib_net_to_host_u16 (udp0->length);
1653           udp_len1 = clib_net_to_host_u16 (udp1->length);
1654
1655           len_diff0 = ip_len0 - udp_len0;
1656           len_diff1 = ip_len1 - udp_len1;
1657
1658           len_diff0 = is_udp0 ? len_diff0 : 0;
1659           len_diff1 = is_udp1 ? len_diff1 : 0;
1660
1661           if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1662                                & good_tcp_udp0 & good_tcp_udp1)))
1663             {
1664               if (is_tcp_udp0)
1665                 {
1666                   if (is_tcp_udp0
1667                       && !(flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))
1668                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1669                   good_tcp_udp0 =
1670                     (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1671                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1672                 }
1673               if (is_tcp_udp1)
1674                 {
1675                   if (is_tcp_udp1
1676                       && !(flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))
1677                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1678                   good_tcp_udp1 =
1679                     (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1680                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1681                 }
1682             }
1683
1684           good_tcp_udp0 &= len_diff0 >= 0;
1685           good_tcp_udp1 &= len_diff1 >= 0;
1686
1687           leaf0 =
1688             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1689           leaf1 =
1690             ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1691
1692           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1693
1694           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1695           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1696
1697           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1698           error0 = (is_tcp_udp0 && !good_tcp_udp0
1699                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1700           error1 = (is_tcp_udp1 && !good_tcp_udp1
1701                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1702
1703           leaf0 =
1704             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1705           leaf1 =
1706             ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1707
1708           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1709             ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1710           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1711
1712           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1713             ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1714           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1715
1716           lb0 = load_balance_get (lbi0);
1717           lb1 = load_balance_get (lbi1);
1718           dpo0 = load_balance_get_bucket_i (lb0, 0);
1719           dpo1 = load_balance_get_bucket_i (lb1, 0);
1720
1721           /*
1722            * Must have a route to source otherwise we drop the packet.
1723            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1724            *
1725            * The checks are:
1726            *  - the source is a recieve => it's from us => bogus, do this
1727            *    first since it sets a different error code.
1728            *  - uRPF check for any route to source - accept if passes.
1729            *  - allow packets destined to the broadcast address from unknown sources
1730            */
1731           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1732                      dpo0->dpoi_type == DPO_RECEIVE) ?
1733                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1734           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1735                      !fib_urpf_check_size (lb0->lb_urpf) &&
1736                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1737                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1738           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1739                      dpo1->dpoi_type == DPO_RECEIVE) ?
1740                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1741           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1742                      !fib_urpf_check_size (lb1->lb_urpf) &&
1743                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1744                     ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1745
1746         skip_checks:
1747
1748           next0 = lm->local_next_by_ip_protocol[proto0];
1749           next1 = lm->local_next_by_ip_protocol[proto1];
1750
1751           next0 =
1752             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1753           next1 =
1754             error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1755
1756           p0->error = error0 ? error_node->errors[error0] : 0;
1757           p1->error = error1 ? error_node->errors[error1] : 0;
1758
1759           if (head_of_feature_arc)
1760             {
1761               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1762                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1763               if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1764                 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1765             }
1766
1767           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1768                                            n_left_to_next, pi0, pi1,
1769                                            next0, next1);
1770         }
1771
1772       while (n_left_from > 0 && n_left_to_next > 0)
1773         {
1774           vlib_buffer_t *p0;
1775           ip4_header_t *ip0;
1776           udp_header_t *udp0;
1777           ip4_fib_mtrie_t *mtrie0;
1778           ip4_fib_mtrie_leaf_t leaf0;
1779           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1780           i32 len_diff0;
1781           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1782           load_balance_t *lb0;
1783           const dpo_id_t *dpo0;
1784           u32 sw_if_index0;
1785
1786           pi0 = to_next[0] = from[0];
1787           from += 1;
1788           n_left_from -= 1;
1789           to_next += 1;
1790           n_left_to_next -= 1;
1791
1792           next0 = IP_LOCAL_NEXT_DROP;
1793
1794           p0 = vlib_get_buffer (vm, pi0);
1795
1796           ip0 = vlib_buffer_get_current (p0);
1797
1798           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1799
1800           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1801
1802           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1803
1804           fib_index0 =
1805             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1806              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1807
1808           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1809
1810           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1811
1812           /* Treat IP frag packets as "experimental" protocol for now
1813              until support of IP frag reassembly is implemented */
1814           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1815
1816           if (head_of_feature_arc == 0)
1817             {
1818               error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1819               goto skip_check;
1820             }
1821
1822           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1823           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1824
1825           flags0 = p0->flags;
1826
1827           good_tcp_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1828
1829           udp0 = ip4_next_header (ip0);
1830
1831           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1832           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1833
1834           /* Verify UDP length. */
1835           ip_len0 = clib_net_to_host_u16 (ip0->length);
1836           udp_len0 = clib_net_to_host_u16 (udp0->length);
1837
1838           len_diff0 = ip_len0 - udp_len0;
1839
1840           len_diff0 = is_udp0 ? len_diff0 : 0;
1841
1842           if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1843             {
1844               if (is_tcp_udp0)
1845                 {
1846                   if (is_tcp_udp0
1847                       && !(flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))
1848                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1849                   good_tcp_udp0 =
1850                     (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1851                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1852                 }
1853             }
1854
1855           good_tcp_udp0 &= len_diff0 >= 0;
1856
1857           leaf0 =
1858             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1859
1860           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1861
1862           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1863
1864           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1865           error0 = (is_tcp_udp0 && !good_tcp_udp0
1866                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1867
1868           leaf0 =
1869             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1870
1871           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1872           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1873
1874           lb0 = load_balance_get (lbi0);
1875           dpo0 = load_balance_get_bucket_i (lb0, 0);
1876
1877           vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1878             vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1879
1880           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1881                      dpo0->dpoi_type == DPO_RECEIVE) ?
1882                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1883           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1884                      !fib_urpf_check_size (lb0->lb_urpf) &&
1885                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1886                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1887
1888         skip_check:
1889
1890           next0 = lm->local_next_by_ip_protocol[proto0];
1891
1892           next0 =
1893             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1894
1895           p0->error = error0 ? error_node->errors[error0] : 0;
1896
1897           if (head_of_feature_arc)
1898             {
1899               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1900                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1901             }
1902
1903           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1904                                            n_left_to_next, pi0, next0);
1905
1906         }
1907
1908       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1909     }
1910
1911   return frame->n_vectors;
1912 }
1913
1914 static uword
1915 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1916 {
1917   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1918 }
1919
1920 /* *INDENT-OFF* */
1921 VLIB_REGISTER_NODE (ip4_local_node) =
1922 {
1923   .function = ip4_local,
1924   .name = "ip4-local",
1925   .vector_size = sizeof (u32),
1926   .format_trace = format_ip4_forward_next_trace,
1927   .n_next_nodes = IP_LOCAL_N_NEXT,
1928   .next_nodes =
1929   {
1930     [IP_LOCAL_NEXT_DROP] = "error-drop",
1931     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1932     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1933     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",},
1934 };
1935 /* *INDENT-ON* */
1936
1937 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1938
1939 static uword
1940 ip4_local_end_of_arc (vlib_main_t * vm,
1941                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1942 {
1943   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1944 }
1945
1946 /* *INDENT-OFF* */
1947 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1948   .function = ip4_local_end_of_arc,
1949   .name = "ip4-local-end-of-arc",
1950   .vector_size = sizeof (u32),
1951
1952   .format_trace = format_ip4_forward_next_trace,
1953   .sibling_of = "ip4-local",
1954 };
1955
1956 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1957
1958 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1959   .arc_name = "ip4-local",
1960   .node_name = "ip4-local-end-of-arc",
1961   .runs_before = 0, /* not before any other features */
1962 };
1963 /* *INDENT-ON* */
1964
1965 void
1966 ip4_register_protocol (u32 protocol, u32 node_index)
1967 {
1968   vlib_main_t *vm = vlib_get_main ();
1969   ip4_main_t *im = &ip4_main;
1970   ip_lookup_main_t *lm = &im->lookup_main;
1971
1972   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1973   lm->local_next_by_ip_protocol[protocol] =
1974     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1975 }
1976
1977 static clib_error_t *
1978 show_ip_local_command_fn (vlib_main_t * vm,
1979                           unformat_input_t * input, vlib_cli_command_t * cmd)
1980 {
1981   ip4_main_t *im = &ip4_main;
1982   ip_lookup_main_t *lm = &im->lookup_main;
1983   int i;
1984
1985   vlib_cli_output (vm, "Protocols handled by ip4_local");
1986   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1987     {
1988       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1989         vlib_cli_output (vm, "%d", i);
1990     }
1991   return 0;
1992 }
1993
1994
1995
1996 /*?
1997  * Display the set of protocols handled by the local IPv4 stack.
1998  *
1999  * @cliexpar
2000  * Example of how to display local protocol table:
2001  * @cliexstart{show ip local}
2002  * Protocols handled by ip4_local
2003  * 1
2004  * 17
2005  * 47
2006  * @cliexend
2007 ?*/
2008 /* *INDENT-OFF* */
2009 VLIB_CLI_COMMAND (show_ip_local, static) =
2010 {
2011   .path = "show ip local",
2012   .function = show_ip_local_command_fn,
2013   .short_help = "show ip local",
2014 };
2015 /* *INDENT-ON* */
2016
2017 always_inline uword
2018 ip4_arp_inline (vlib_main_t * vm,
2019                 vlib_node_runtime_t * node,
2020                 vlib_frame_t * frame, int is_glean)
2021 {
2022   vnet_main_t *vnm = vnet_get_main ();
2023   ip4_main_t *im = &ip4_main;
2024   ip_lookup_main_t *lm = &im->lookup_main;
2025   u32 *from, *to_next_drop;
2026   uword n_left_from, n_left_to_next_drop, next_index;
2027   static f64 time_last_seed_change = -1e100;
2028   static u32 hash_seeds[3];
2029   static uword hash_bitmap[256 / BITS (uword)];
2030   f64 time_now;
2031
2032   if (node->flags & VLIB_NODE_FLAG_TRACE)
2033     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2034
2035   time_now = vlib_time_now (vm);
2036   if (time_now - time_last_seed_change > 1e-3)
2037     {
2038       uword i;
2039       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
2040                                             sizeof (hash_seeds));
2041       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2042         hash_seeds[i] = r[i];
2043
2044       /* Mark all hash keys as been no-seen before. */
2045       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2046         hash_bitmap[i] = 0;
2047
2048       time_last_seed_change = time_now;
2049     }
2050
2051   from = vlib_frame_vector_args (frame);
2052   n_left_from = frame->n_vectors;
2053   next_index = node->cached_next_index;
2054   if (next_index == IP4_ARP_NEXT_DROP)
2055     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
2056
2057   while (n_left_from > 0)
2058     {
2059       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2060                            to_next_drop, n_left_to_next_drop);
2061
2062       while (n_left_from > 0 && n_left_to_next_drop > 0)
2063         {
2064           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2065           ip_adjacency_t *adj0;
2066           vlib_buffer_t *p0;
2067           ip4_header_t *ip0;
2068           uword bm0;
2069
2070           pi0 = from[0];
2071
2072           p0 = vlib_get_buffer (vm, pi0);
2073
2074           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2075           adj0 = adj_get (adj_index0);
2076           ip0 = vlib_buffer_get_current (p0);
2077
2078           a0 = hash_seeds[0];
2079           b0 = hash_seeds[1];
2080           c0 = hash_seeds[2];
2081
2082           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2083           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2084
2085           if (is_glean)
2086             {
2087               /*
2088                * this is the Glean case, so we are ARPing for the
2089                * packet's destination
2090                */
2091               a0 ^= ip0->dst_address.data_u32;
2092             }
2093           else
2094             {
2095               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2096             }
2097           b0 ^= sw_if_index0;
2098
2099           hash_v3_finalize32 (a0, b0, c0);
2100
2101           c0 &= BITS (hash_bitmap) - 1;
2102           c0 = c0 / BITS (uword);
2103           m0 = (uword) 1 << (c0 % BITS (uword));
2104
2105           bm0 = hash_bitmap[c0];
2106           drop0 = (bm0 & m0) != 0;
2107
2108           /* Mark it as seen. */
2109           hash_bitmap[c0] = bm0 | m0;
2110
2111           from += 1;
2112           n_left_from -= 1;
2113           to_next_drop[0] = pi0;
2114           to_next_drop += 1;
2115           n_left_to_next_drop -= 1;
2116
2117           p0->error =
2118             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2119                          IP4_ARP_ERROR_REQUEST_SENT];
2120
2121           /*
2122            * the adj has been updated to a rewrite but the node the DPO that got
2123            * us here hasn't - yet. no big deal. we'll drop while we wait.
2124            */
2125           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2126             continue;
2127
2128           if (drop0)
2129             continue;
2130
2131           /*
2132            * Can happen if the control-plane is programming tables
2133            * with traffic flowing; at least that's today's lame excuse.
2134            */
2135           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2136               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2137             {
2138               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2139             }
2140           else
2141             /* Send ARP request. */
2142             {
2143               u32 bi0 = 0;
2144               vlib_buffer_t *b0;
2145               ethernet_arp_header_t *h0;
2146               vnet_hw_interface_t *hw_if0;
2147
2148               h0 =
2149                 vlib_packet_template_get_packet (vm,
2150                                                  &im->ip4_arp_request_packet_template,
2151                                                  &bi0);
2152
2153               /* Add rewrite/encap string for ARP packet. */
2154               vnet_rewrite_one_header (adj0[0], h0,
2155                                        sizeof (ethernet_header_t));
2156
2157               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2158
2159               /* Src ethernet address in ARP header. */
2160               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2161                            hw_if0->hw_address,
2162                            sizeof (h0->ip4_over_ethernet[0].ethernet));
2163
2164               if (is_glean)
2165                 {
2166                   /* The interface's source address is stashed in the Glean Adj */
2167                   h0->ip4_over_ethernet[0].ip4 =
2168                     adj0->sub_type.glean.receive_addr.ip4;
2169
2170                   /* Copy in destination address we are requesting. This is the
2171                    * glean case, so it's the packet's destination.*/
2172                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2173                     ip0->dst_address.data_u32;
2174                 }
2175               else
2176                 {
2177                   /* Src IP address in ARP header. */
2178                   if (ip4_src_address_for_packet (lm, sw_if_index0,
2179                                                   &h0->
2180                                                   ip4_over_ethernet[0].ip4))
2181                     {
2182                       /* No source address available */
2183                       p0->error =
2184                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2185                       vlib_buffer_free (vm, &bi0, 1);
2186                       continue;
2187                     }
2188
2189                   /* Copy in destination address we are requesting from the
2190                      incomplete adj */
2191                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2192                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
2193                 }
2194
2195               vlib_buffer_copy_trace_flag (vm, p0, bi0);
2196               b0 = vlib_get_buffer (vm, bi0);
2197               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2198
2199               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2200
2201               vlib_set_next_frame_buffer (vm, node,
2202                                           adj0->rewrite_header.next_index,
2203                                           bi0);
2204             }
2205         }
2206
2207       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2208     }
2209
2210   return frame->n_vectors;
2211 }
2212
2213 static uword
2214 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2215 {
2216   return (ip4_arp_inline (vm, node, frame, 0));
2217 }
2218
2219 static uword
2220 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2221 {
2222   return (ip4_arp_inline (vm, node, frame, 1));
2223 }
2224
2225 static char *ip4_arp_error_strings[] = {
2226   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2227   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2228   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2229   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2230   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2231   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2232 };
2233
2234 VLIB_REGISTER_NODE (ip4_arp_node) =
2235 {
2236   .function = ip4_arp,.name = "ip4-arp",.vector_size =
2237     sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2238     ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2239     ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2240   {
2241   [IP4_ARP_NEXT_DROP] = "error-drop",}
2242 ,};
2243
2244 VLIB_REGISTER_NODE (ip4_glean_node) =
2245 {
2246   .function = ip4_glean,.name = "ip4-glean",.vector_size =
2247     sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2248     ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2249     ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2250   {
2251   [IP4_ARP_NEXT_DROP] = "error-drop",}
2252 ,};
2253
2254 #define foreach_notrace_ip4_arp_error           \
2255 _(DROP)                                         \
2256 _(REQUEST_SENT)                                 \
2257 _(REPLICATE_DROP)                               \
2258 _(REPLICATE_FAIL)
2259
2260 clib_error_t *
2261 arp_notrace_init (vlib_main_t * vm)
2262 {
2263   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2264
2265   /* don't trace ARP request packets */
2266 #define _(a)                                    \
2267     vnet_pcap_drop_trace_filter_add_del         \
2268         (rt->errors[IP4_ARP_ERROR_##a],         \
2269          1 /* is_add */);
2270   foreach_notrace_ip4_arp_error;
2271 #undef _
2272   return 0;
2273 }
2274
2275 VLIB_INIT_FUNCTION (arp_notrace_init);
2276
2277
2278 /* Send an ARP request to see if given destination is reachable on given interface. */
2279 clib_error_t *
2280 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2281 {
2282   vnet_main_t *vnm = vnet_get_main ();
2283   ip4_main_t *im = &ip4_main;
2284   ethernet_arp_header_t *h;
2285   ip4_address_t *src;
2286   ip_interface_address_t *ia;
2287   ip_adjacency_t *adj;
2288   vnet_hw_interface_t *hi;
2289   vnet_sw_interface_t *si;
2290   vlib_buffer_t *b;
2291   adj_index_t ai;
2292   u32 bi = 0;
2293
2294   si = vnet_get_sw_interface (vnm, sw_if_index);
2295
2296   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2297     {
2298       return clib_error_return (0, "%U: interface %U down",
2299                                 format_ip4_address, dst,
2300                                 format_vnet_sw_if_index_name, vnm,
2301                                 sw_if_index);
2302     }
2303
2304   src =
2305     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2306   if (!src)
2307     {
2308       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2309       return clib_error_return
2310         (0,
2311          "no matching interface address for destination %U (interface %U)",
2312          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2313          sw_if_index);
2314     }
2315
2316   ip46_address_t nh = {
2317     .ip4 = *dst,
2318   };
2319
2320   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2321                             VNET_LINK_IP4, &nh, sw_if_index);
2322   adj = adj_get (ai);
2323
2324   h = vlib_packet_template_get_packet (vm,
2325                                        &im->ip4_arp_request_packet_template,
2326                                        &bi);
2327
2328   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2329   if (PREDICT_FALSE (!hi->hw_address))
2330     {
2331       return clib_error_return (0, "%U: interface %U do not support ip probe",
2332                                 format_ip4_address, dst,
2333                                 format_vnet_sw_if_index_name, vnm,
2334                                 sw_if_index);
2335     }
2336
2337   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2338                sizeof (h->ip4_over_ethernet[0].ethernet));
2339
2340   h->ip4_over_ethernet[0].ip4 = src[0];
2341   h->ip4_over_ethernet[1].ip4 = dst[0];
2342
2343   b = vlib_get_buffer (vm, bi);
2344   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2345     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2346
2347   /* Add encapsulation string for software interface (e.g. ethernet header). */
2348   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2349   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2350
2351   {
2352     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2353     u32 *to_next = vlib_frame_vector_args (f);
2354     to_next[0] = bi;
2355     f->n_vectors = 1;
2356     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2357   }
2358
2359   adj_unlock (ai);
2360   return /* no error */ 0;
2361 }
2362
2363 typedef enum
2364 {
2365   IP4_REWRITE_NEXT_DROP,
2366   IP4_REWRITE_NEXT_ICMP_ERROR,
2367 } ip4_rewrite_next_t;
2368
2369 always_inline uword
2370 ip4_rewrite_inline (vlib_main_t * vm,
2371                     vlib_node_runtime_t * node,
2372                     vlib_frame_t * frame,
2373                     int do_counters, int is_midchain, int is_mcast)
2374 {
2375   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2376   u32 *from = vlib_frame_vector_args (frame);
2377   u32 n_left_from, n_left_to_next, *to_next, next_index;
2378   vlib_node_runtime_t *error_node =
2379     vlib_node_get_runtime (vm, ip4_input_node.index);
2380
2381   n_left_from = frame->n_vectors;
2382   next_index = node->cached_next_index;
2383   u32 thread_index = vlib_get_thread_index ();
2384
2385   while (n_left_from > 0)
2386     {
2387       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2388
2389       while (n_left_from >= 4 && n_left_to_next >= 2)
2390         {
2391           ip_adjacency_t *adj0, *adj1;
2392           vlib_buffer_t *p0, *p1;
2393           ip4_header_t *ip0, *ip1;
2394           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2395           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2396           u32 tx_sw_if_index0, tx_sw_if_index1;
2397
2398           /* Prefetch next iteration. */
2399           {
2400             vlib_buffer_t *p2, *p3;
2401
2402             p2 = vlib_get_buffer (vm, from[2]);
2403             p3 = vlib_get_buffer (vm, from[3]);
2404
2405             vlib_prefetch_buffer_header (p2, STORE);
2406             vlib_prefetch_buffer_header (p3, STORE);
2407
2408             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2409             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2410           }
2411
2412           pi0 = to_next[0] = from[0];
2413           pi1 = to_next[1] = from[1];
2414
2415           from += 2;
2416           n_left_from -= 2;
2417           to_next += 2;
2418           n_left_to_next -= 2;
2419
2420           p0 = vlib_get_buffer (vm, pi0);
2421           p1 = vlib_get_buffer (vm, pi1);
2422
2423           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2424           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2425
2426           /*
2427            * pre-fetch the per-adjacency counters
2428            */
2429           if (do_counters)
2430             {
2431               vlib_prefetch_combined_counter (&adjacency_counters,
2432                                               thread_index, adj_index0);
2433               vlib_prefetch_combined_counter (&adjacency_counters,
2434                                               thread_index, adj_index1);
2435             }
2436
2437           ip0 = vlib_buffer_get_current (p0);
2438           ip1 = vlib_buffer_get_current (p1);
2439
2440           error0 = error1 = IP4_ERROR_NONE;
2441           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2442
2443           /* Decrement TTL & update checksum.
2444              Works either endian, so no need for byte swap. */
2445           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2446             {
2447               i32 ttl0 = ip0->ttl;
2448
2449               /* Input node should have reject packets with ttl 0. */
2450               ASSERT (ip0->ttl > 0);
2451
2452               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2453               checksum0 += checksum0 >= 0xffff;
2454
2455               ip0->checksum = checksum0;
2456               ttl0 -= 1;
2457               ip0->ttl = ttl0;
2458
2459               /*
2460                * If the ttl drops below 1 when forwarding, generate
2461                * an ICMP response.
2462                */
2463               if (PREDICT_FALSE (ttl0 <= 0))
2464                 {
2465                   error0 = IP4_ERROR_TIME_EXPIRED;
2466                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2467                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2468                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2469                                                0);
2470                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2471                 }
2472
2473               /* Verify checksum. */
2474               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2475                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2476             }
2477           else
2478             {
2479               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2480             }
2481           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2482             {
2483               i32 ttl1 = ip1->ttl;
2484
2485               /* Input node should have reject packets with ttl 0. */
2486               ASSERT (ip1->ttl > 0);
2487
2488               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2489               checksum1 += checksum1 >= 0xffff;
2490
2491               ip1->checksum = checksum1;
2492               ttl1 -= 1;
2493               ip1->ttl = ttl1;
2494
2495               /*
2496                * If the ttl drops below 1 when forwarding, generate
2497                * an ICMP response.
2498                */
2499               if (PREDICT_FALSE (ttl1 <= 0))
2500                 {
2501                   error1 = IP4_ERROR_TIME_EXPIRED;
2502                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2503                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2504                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2505                                                0);
2506                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2507                 }
2508
2509               /* Verify checksum. */
2510               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2511                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2512             }
2513           else
2514             {
2515               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2516             }
2517
2518           /* Rewrite packet header and updates lengths. */
2519           adj0 = adj_get (adj_index0);
2520           adj1 = adj_get (adj_index1);
2521
2522           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2523           rw_len0 = adj0[0].rewrite_header.data_bytes;
2524           rw_len1 = adj1[0].rewrite_header.data_bytes;
2525           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2526           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2527
2528           /* Check MTU of outgoing interface. */
2529           error0 =
2530             (vlib_buffer_length_in_chain (vm, p0) >
2531              adj0[0].
2532              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2533              error0);
2534           error1 =
2535             (vlib_buffer_length_in_chain (vm, p1) >
2536              adj1[0].
2537              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2538              error1);
2539
2540           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2541            * to see the IP headerr */
2542           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2543             {
2544               next0 = adj0[0].rewrite_header.next_index;
2545               p0->current_data -= rw_len0;
2546               p0->current_length += rw_len0;
2547               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2548               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2549
2550               if (PREDICT_FALSE
2551                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2552                 vnet_feature_arc_start (lm->output_feature_arc_index,
2553                                         tx_sw_if_index0, &next0, p0);
2554             }
2555           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2556             {
2557               next1 = adj1[0].rewrite_header.next_index;
2558               p1->current_data -= rw_len1;
2559               p1->current_length += rw_len1;
2560
2561               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2562               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2563
2564               if (PREDICT_FALSE
2565                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2566                 vnet_feature_arc_start (lm->output_feature_arc_index,
2567                                         tx_sw_if_index1, &next1, p1);
2568             }
2569
2570           /* Guess we are only writing on simple Ethernet header. */
2571           vnet_rewrite_two_headers (adj0[0], adj1[0],
2572                                     ip0, ip1, sizeof (ethernet_header_t));
2573
2574           /*
2575            * Bump the per-adjacency counters
2576            */
2577           if (do_counters)
2578             {
2579               vlib_increment_combined_counter
2580                 (&adjacency_counters,
2581                  thread_index,
2582                  adj_index0, 1,
2583                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2584
2585               vlib_increment_combined_counter
2586                 (&adjacency_counters,
2587                  thread_index,
2588                  adj_index1, 1,
2589                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2590             }
2591
2592           if (is_midchain)
2593             {
2594               adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2595               adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2596             }
2597           if (is_mcast)
2598             {
2599               /*
2600                * copy bytes from the IP address into the MAC rewrite
2601                */
2602               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2603               vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2604             }
2605
2606           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2607                                            to_next, n_left_to_next,
2608                                            pi0, pi1, next0, next1);
2609         }
2610
2611       while (n_left_from > 0 && n_left_to_next > 0)
2612         {
2613           ip_adjacency_t *adj0;
2614           vlib_buffer_t *p0;
2615           ip4_header_t *ip0;
2616           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2617           u32 tx_sw_if_index0;
2618
2619           pi0 = to_next[0] = from[0];
2620
2621           p0 = vlib_get_buffer (vm, pi0);
2622
2623           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2624
2625           adj0 = adj_get (adj_index0);
2626
2627           ip0 = vlib_buffer_get_current (p0);
2628
2629           error0 = IP4_ERROR_NONE;
2630           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2631
2632           /* Decrement TTL & update checksum. */
2633           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2634             {
2635               i32 ttl0 = ip0->ttl;
2636
2637               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2638
2639               checksum0 += checksum0 >= 0xffff;
2640
2641               ip0->checksum = checksum0;
2642
2643               ASSERT (ip0->ttl > 0);
2644
2645               ttl0 -= 1;
2646
2647               ip0->ttl = ttl0;
2648
2649               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2650                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2651
2652               if (PREDICT_FALSE (ttl0 <= 0))
2653                 {
2654                   /*
2655                    * If the ttl drops below 1 when forwarding, generate
2656                    * an ICMP response.
2657                    */
2658                   error0 = IP4_ERROR_TIME_EXPIRED;
2659                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2660                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2661                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2662                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2663                                                0);
2664                 }
2665             }
2666           else
2667             {
2668               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2669             }
2670
2671           if (do_counters)
2672             vlib_prefetch_combined_counter (&adjacency_counters,
2673                                             thread_index, adj_index0);
2674
2675           /* Guess we are only writing on simple Ethernet header. */
2676           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2677           if (is_mcast)
2678             {
2679               /*
2680                * copy bytes from the IP address into the MAC rewrite
2681                */
2682               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2683             }
2684
2685           /* Update packet buffer attributes/set output interface. */
2686           rw_len0 = adj0[0].rewrite_header.data_bytes;
2687           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2688
2689           if (do_counters)
2690             vlib_increment_combined_counter
2691               (&adjacency_counters,
2692                thread_index, adj_index0, 1,
2693                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2694
2695           /* Check MTU of outgoing interface. */
2696           error0 = (vlib_buffer_length_in_chain (vm, p0)
2697                     > adj0[0].rewrite_header.max_l3_packet_bytes
2698                     ? IP4_ERROR_MTU_EXCEEDED : error0);
2699
2700           p0->error = error_node->errors[error0];
2701
2702           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2703            * to see the IP headerr */
2704           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2705             {
2706               p0->current_data -= rw_len0;
2707               p0->current_length += rw_len0;
2708               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2709
2710               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2711               next0 = adj0[0].rewrite_header.next_index;
2712
2713               if (is_midchain)
2714                 {
2715                   adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2716                 }
2717
2718               if (PREDICT_FALSE
2719                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2720                 vnet_feature_arc_start (lm->output_feature_arc_index,
2721                                         tx_sw_if_index0, &next0, p0);
2722
2723             }
2724
2725           from += 1;
2726           n_left_from -= 1;
2727           to_next += 1;
2728           n_left_to_next -= 1;
2729
2730           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2731                                            to_next, n_left_to_next,
2732                                            pi0, next0);
2733         }
2734
2735       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2736     }
2737
2738   /* Need to do trace after rewrites to pick up new packet data. */
2739   if (node->flags & VLIB_NODE_FLAG_TRACE)
2740     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2741
2742   return frame->n_vectors;
2743 }
2744
2745
2746 /** @brief IPv4 rewrite node.
2747     @node ip4-rewrite
2748
2749     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2750     header checksum, fetch the ip adjacency, check the outbound mtu,
2751     apply the adjacency rewrite, and send pkts to the adjacency
2752     rewrite header's rewrite_next_index.
2753
2754     @param vm vlib_main_t corresponding to the current thread
2755     @param node vlib_node_runtime_t
2756     @param frame vlib_frame_t whose contents should be dispatched
2757
2758     @par Graph mechanics: buffer metadata, next index usage
2759
2760     @em Uses:
2761     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2762         - the rewrite adjacency index
2763     - <code>adj->lookup_next_index</code>
2764         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2765           the packet will be dropped.
2766     - <code>adj->rewrite_header</code>
2767         - Rewrite string length, rewrite string, next_index
2768
2769     @em Sets:
2770     - <code>b->current_data, b->current_length</code>
2771         - Updated net of applying the rewrite string
2772
2773     <em>Next Indices:</em>
2774     - <code> adj->rewrite_header.next_index </code>
2775       or @c error-drop
2776 */
2777 static uword
2778 ip4_rewrite (vlib_main_t * vm,
2779              vlib_node_runtime_t * node, vlib_frame_t * frame)
2780 {
2781   if (adj_are_counters_enabled ())
2782     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2783   else
2784     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2785 }
2786
2787 static uword
2788 ip4_midchain (vlib_main_t * vm,
2789               vlib_node_runtime_t * node, vlib_frame_t * frame)
2790 {
2791   if (adj_are_counters_enabled ())
2792     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2793   else
2794     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2795 }
2796
2797 static uword
2798 ip4_rewrite_mcast (vlib_main_t * vm,
2799                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2800 {
2801   if (adj_are_counters_enabled ())
2802     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2803   else
2804     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2805 }
2806
2807 static uword
2808 ip4_mcast_midchain (vlib_main_t * vm,
2809                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2810 {
2811   if (adj_are_counters_enabled ())
2812     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2813   else
2814     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2815 }
2816
2817 /* *INDENT-OFF* */
2818 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2819   .function = ip4_rewrite,
2820   .name = "ip4-rewrite",
2821   .vector_size = sizeof (u32),
2822
2823   .format_trace = format_ip4_rewrite_trace,
2824
2825   .n_next_nodes = 2,
2826   .next_nodes = {
2827     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2828     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2829   },
2830 };
2831 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2832
2833 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2834   .function = ip4_rewrite_mcast,
2835   .name = "ip4-rewrite-mcast",
2836   .vector_size = sizeof (u32),
2837
2838   .format_trace = format_ip4_rewrite_trace,
2839   .sibling_of = "ip4-rewrite",
2840 };
2841 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2842
2843 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2844   .function = ip4_mcast_midchain,
2845   .name = "ip4-mcast-midchain",
2846   .vector_size = sizeof (u32),
2847
2848   .format_trace = format_ip4_rewrite_trace,
2849   .sibling_of = "ip4-rewrite",
2850 };
2851 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2852
2853 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2854   .function = ip4_midchain,
2855   .name = "ip4-midchain",
2856   .vector_size = sizeof (u32),
2857   .format_trace = format_ip4_forward_next_trace,
2858   .sibling_of =  "ip4-rewrite",
2859 };
2860 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2861 /* *INDENT-ON */
2862
2863 static clib_error_t *
2864 add_del_interface_table (vlib_main_t * vm,
2865                          unformat_input_t * input, vlib_cli_command_t * cmd)
2866 {
2867   vnet_main_t *vnm = vnet_get_main ();
2868   ip_interface_address_t *ia;
2869   clib_error_t *error = 0;
2870   u32 sw_if_index, table_id;
2871
2872   sw_if_index = ~0;
2873
2874   if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2875     {
2876       error = clib_error_return (0, "unknown interface `%U'",
2877                                  format_unformat_error, input);
2878       goto done;
2879     }
2880
2881   if (unformat (input, "%d", &table_id))
2882     ;
2883   else
2884     {
2885       error = clib_error_return (0, "expected table id `%U'",
2886                                  format_unformat_error, input);
2887       goto done;
2888     }
2889
2890   /*
2891    * If the interface already has in IP address, then a change int
2892    * VRF is not allowed. The IP address applied must first be removed.
2893    * We do not do that automatically here, since VPP has no knowledge
2894    * of whether thoses subnets are valid in the destination VRF.
2895    */
2896   /* *INDENT-OFF* */
2897   foreach_ip_interface_address (&ip4_main.lookup_main,
2898                                 ia, sw_if_index,
2899                                 1 /* honor unnumbered */,
2900   ({
2901       ip4_address_t * a;
2902
2903       a = ip_interface_address_get_address (&ip4_main.lookup_main, ia);
2904       error = clib_error_return (0, "interface %U has address %U",
2905                                  format_vnet_sw_if_index_name, vnm,
2906                                  sw_if_index,
2907                                  format_ip4_address, a);
2908       goto done;
2909    }));
2910    /* *INDENT-ON* */
2911
2912 {
2913   ip4_main_t *im = &ip4_main;
2914   u32 fib_index;
2915
2916   fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2917
2918   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2919   im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2920
2921   fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2922   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
2923   im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
2924 }
2925
2926 done:
2927 return error;
2928 }
2929
2930 /*?
2931  * Place the indicated interface into the supplied IPv4 FIB table (also known
2932  * as a VRF). If the FIB table does not exist, this command creates it. To
2933  * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2934  * FIB table will only be displayed if a route has been added to the table, or
2935  * an IP Address is assigned to an interface in the table (which adds a route
2936  * automatically).
2937  *
2938  * @note IP addresses added after setting the interface IP table are added to
2939  * the indicated FIB table. If an IP address is added prior to changing the
2940  * table then this is an error. The control plane must remove these addresses
2941  * first and then change the table. VPP will not automatically move the
2942  * addresses from the old to the new table as it does not know the validity
2943  * of such a change.
2944  *
2945  * @cliexpar
2946  * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2947  * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2948  ?*/
2949 /* *INDENT-OFF* */
2950 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2951 {
2952   .path = "set interface ip table",
2953   .function = add_del_interface_table,
2954   .short_help = "set interface ip table <interface> <table-id>",
2955 };
2956 /* *INDENT-ON* */
2957
2958 int
2959 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2960 {
2961   ip4_fib_mtrie_t *mtrie0;
2962   ip4_fib_mtrie_leaf_t leaf0;
2963   u32 lbi0;
2964
2965   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2966
2967   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2968   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2969   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2970
2971   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2972
2973   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2974 }
2975
2976 static clib_error_t *
2977 test_lookup_command_fn (vlib_main_t * vm,
2978                         unformat_input_t * input, vlib_cli_command_t * cmd)
2979 {
2980   ip4_fib_t *fib;
2981   u32 table_id = 0;
2982   f64 count = 1;
2983   u32 n;
2984   int i;
2985   ip4_address_t ip4_base_address;
2986   u64 errors = 0;
2987
2988   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2989     {
2990       if (unformat (input, "table %d", &table_id))
2991         {
2992           /* Make sure the entry exists. */
2993           fib = ip4_fib_get (table_id);
2994           if ((fib) && (fib->index != table_id))
2995             return clib_error_return (0, "<fib-index> %d does not exist",
2996                                       table_id);
2997         }
2998       else if (unformat (input, "count %f", &count))
2999         ;
3000
3001       else if (unformat (input, "%U",
3002                          unformat_ip4_address, &ip4_base_address))
3003         ;
3004       else
3005         return clib_error_return (0, "unknown input `%U'",
3006                                   format_unformat_error, input);
3007     }
3008
3009   n = count;
3010
3011   for (i = 0; i < n; i++)
3012     {
3013       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3014         errors++;
3015
3016       ip4_base_address.as_u32 =
3017         clib_host_to_net_u32 (1 +
3018                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3019     }
3020
3021   if (errors)
3022     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3023   else
3024     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3025
3026   return 0;
3027 }
3028
3029 /*?
3030  * Perform a lookup of an IPv4 Address (or range of addresses) in the
3031  * given FIB table to determine if there is a conflict with the
3032  * adjacency table. The fib-id can be determined by using the
3033  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3034  * of 0 is used.
3035  *
3036  * @todo This command uses fib-id, other commands use table-id (not
3037  * just a name, they are different indexes). Would like to change this
3038  * to table-id for consistency.
3039  *
3040  * @cliexpar
3041  * Example of how to run the test lookup command:
3042  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3043  * No errors in 2 lookups
3044  * @cliexend
3045 ?*/
3046 /* *INDENT-OFF* */
3047 VLIB_CLI_COMMAND (lookup_test_command, static) =
3048 {
3049   .path = "test lookup",
3050   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3051   .function = test_lookup_command_fn,
3052 };
3053 /* *INDENT-ON* */
3054
3055 int
3056 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3057 {
3058   u32 fib_index;
3059
3060   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
3061
3062   if (~0 == fib_index)
3063     return VNET_API_ERROR_NO_SUCH_FIB;
3064
3065   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
3066                                   flow_hash_config);
3067
3068   return 0;
3069 }
3070
3071 static clib_error_t *
3072 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3073                              unformat_input_t * input,
3074                              vlib_cli_command_t * cmd)
3075 {
3076   int matched = 0;
3077   u32 table_id = 0;
3078   u32 flow_hash_config = 0;
3079   int rv;
3080
3081   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3082     {
3083       if (unformat (input, "table %d", &table_id))
3084         matched = 1;
3085 #define _(a,v) \
3086     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3087       foreach_flow_hash_bit
3088 #undef _
3089         else
3090         break;
3091     }
3092
3093   if (matched == 0)
3094     return clib_error_return (0, "unknown input `%U'",
3095                               format_unformat_error, input);
3096
3097   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3098   switch (rv)
3099     {
3100     case 0:
3101       break;
3102
3103     case VNET_API_ERROR_NO_SUCH_FIB:
3104       return clib_error_return (0, "no such FIB table %d", table_id);
3105
3106     default:
3107       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3108       break;
3109     }
3110
3111   return 0;
3112 }
3113
3114 /*?
3115  * Configure the set of IPv4 fields used by the flow hash.
3116  *
3117  * @cliexpar
3118  * Example of how to set the flow hash on a given table:
3119  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3120  * Example of display the configured flow hash:
3121  * @cliexstart{show ip fib}
3122  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3123  * 0.0.0.0/0
3124  *   unicast-ip4-chain
3125  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3126  *     [0] [@0]: dpo-drop ip6
3127  * 0.0.0.0/32
3128  *   unicast-ip4-chain
3129  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3130  *     [0] [@0]: dpo-drop ip6
3131  * 224.0.0.0/8
3132  *   unicast-ip4-chain
3133  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3134  *     [0] [@0]: dpo-drop ip6
3135  * 6.0.1.2/32
3136  *   unicast-ip4-chain
3137  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3138  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3139  * 7.0.0.1/32
3140  *   unicast-ip4-chain
3141  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3142  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3143  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3144  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3145  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3146  * 240.0.0.0/8
3147  *   unicast-ip4-chain
3148  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3149  *     [0] [@0]: dpo-drop ip6
3150  * 255.255.255.255/32
3151  *   unicast-ip4-chain
3152  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3153  *     [0] [@0]: dpo-drop ip6
3154  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3155  * 0.0.0.0/0
3156  *   unicast-ip4-chain
3157  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3158  *     [0] [@0]: dpo-drop ip6
3159  * 0.0.0.0/32
3160  *   unicast-ip4-chain
3161  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3162  *     [0] [@0]: dpo-drop ip6
3163  * 172.16.1.0/24
3164  *   unicast-ip4-chain
3165  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3166  *     [0] [@4]: ipv4-glean: af_packet0
3167  * 172.16.1.1/32
3168  *   unicast-ip4-chain
3169  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3170  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3171  * 172.16.1.2/32
3172  *   unicast-ip4-chain
3173  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3174  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3175  * 172.16.2.0/24
3176  *   unicast-ip4-chain
3177  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3178  *     [0] [@4]: ipv4-glean: af_packet1
3179  * 172.16.2.1/32
3180  *   unicast-ip4-chain
3181  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3182  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3183  * 224.0.0.0/8
3184  *   unicast-ip4-chain
3185  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3186  *     [0] [@0]: dpo-drop ip6
3187  * 240.0.0.0/8
3188  *   unicast-ip4-chain
3189  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3190  *     [0] [@0]: dpo-drop ip6
3191  * 255.255.255.255/32
3192  *   unicast-ip4-chain
3193  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3194  *     [0] [@0]: dpo-drop ip6
3195  * @cliexend
3196 ?*/
3197 /* *INDENT-OFF* */
3198 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3199 {
3200   .path = "set ip flow-hash",
3201   .short_help =
3202   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3203   .function = set_ip_flow_hash_command_fn,
3204 };
3205 /* *INDENT-ON* */
3206
3207 int
3208 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3209                              u32 table_index)
3210 {
3211   vnet_main_t *vnm = vnet_get_main ();
3212   vnet_interface_main_t *im = &vnm->interface_main;
3213   ip4_main_t *ipm = &ip4_main;
3214   ip_lookup_main_t *lm = &ipm->lookup_main;
3215   vnet_classify_main_t *cm = &vnet_classify_main;
3216   ip4_address_t *if_addr;
3217
3218   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3219     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3220
3221   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3222     return VNET_API_ERROR_NO_SUCH_ENTRY;
3223
3224   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3225   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3226
3227   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3228
3229   if (NULL != if_addr)
3230     {
3231       fib_prefix_t pfx = {
3232         .fp_len = 32,
3233         .fp_proto = FIB_PROTOCOL_IP4,
3234         .fp_addr.ip4 = *if_addr,
3235       };
3236       u32 fib_index;
3237
3238       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3239                                                        sw_if_index);
3240
3241
3242       if (table_index != (u32) ~ 0)
3243         {
3244           dpo_id_t dpo = DPO_INVALID;
3245
3246           dpo_set (&dpo,
3247                    DPO_CLASSIFY,
3248                    DPO_PROTO_IP4,
3249                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3250
3251           fib_table_entry_special_dpo_add (fib_index,
3252                                            &pfx,
3253                                            FIB_SOURCE_CLASSIFY,
3254                                            FIB_ENTRY_FLAG_NONE, &dpo);
3255           dpo_reset (&dpo);
3256         }
3257       else
3258         {
3259           fib_table_entry_special_remove (fib_index,
3260                                           &pfx, FIB_SOURCE_CLASSIFY);
3261         }
3262     }
3263
3264   return 0;
3265 }
3266
3267 static clib_error_t *
3268 set_ip_classify_command_fn (vlib_main_t * vm,
3269                             unformat_input_t * input,
3270                             vlib_cli_command_t * cmd)
3271 {
3272   u32 table_index = ~0;
3273   int table_index_set = 0;
3274   u32 sw_if_index = ~0;
3275   int rv;
3276
3277   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3278     {
3279       if (unformat (input, "table-index %d", &table_index))
3280         table_index_set = 1;
3281       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3282                          vnet_get_main (), &sw_if_index))
3283         ;
3284       else
3285         break;
3286     }
3287
3288   if (table_index_set == 0)
3289     return clib_error_return (0, "classify table-index must be specified");
3290
3291   if (sw_if_index == ~0)
3292     return clib_error_return (0, "interface / subif must be specified");
3293
3294   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3295
3296   switch (rv)
3297     {
3298     case 0:
3299       break;
3300
3301     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3302       return clib_error_return (0, "No such interface");
3303
3304     case VNET_API_ERROR_NO_SUCH_ENTRY:
3305       return clib_error_return (0, "No such classifier table");
3306     }
3307   return 0;
3308 }
3309
3310 /*?
3311  * Assign a classification table to an interface. The classification
3312  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3313  * commands. Once the table is create, use this command to filter packets
3314  * on an interface.
3315  *
3316  * @cliexpar
3317  * Example of how to assign a classification table to an interface:
3318  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3319 ?*/
3320 /* *INDENT-OFF* */
3321 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3322 {
3323     .path = "set ip classify",
3324     .short_help =
3325     "set ip classify intfc <interface> table-index <classify-idx>",
3326     .function = set_ip_classify_command_fn,
3327 };
3328 /* *INDENT-ON* */
3329
3330 /*
3331  * fd.io coding-style-patch-verification: ON
3332  *
3333  * Local Variables:
3334  * eval: (c-set-style "gnu")
3335  * End:
3336  */