Fix tcp multi buffer segments retransmission
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/load_balance_map.h>
53 #include <vnet/dpo/classify_dpo.h>
54 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
55
56 /**
57  * @file
58  * @brief IPv4 Forwarding.
59  *
60  * This file contains the source code for IPv4 forwarding.
61  */
62
63 void
64 ip4_forward_next_trace (vlib_main_t * vm,
65                         vlib_node_runtime_t * node,
66                         vlib_frame_t * frame,
67                         vlib_rx_or_tx_t which_adj_index);
68
69 always_inline uword
70 ip4_lookup_inline (vlib_main_t * vm,
71                    vlib_node_runtime_t * node,
72                    vlib_frame_t * frame,
73                    int lookup_for_responses_to_locally_received_packets)
74 {
75   ip4_main_t *im = &ip4_main;
76   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
77   u32 n_left_from, n_left_to_next, *from, *to_next;
78   ip_lookup_next_t next;
79   u32 thread_index = vlib_get_thread_index ();
80
81   from = vlib_frame_vector_args (frame);
82   n_left_from = frame->n_vectors;
83   next = node->cached_next_index;
84
85   while (n_left_from > 0)
86     {
87       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
88
89       while (n_left_from >= 8 && n_left_to_next >= 4)
90         {
91           vlib_buffer_t *p0, *p1, *p2, *p3;
92           ip4_header_t *ip0, *ip1, *ip2, *ip3;
93           ip_lookup_next_t next0, next1, next2, next3;
94           const load_balance_t *lb0, *lb1, *lb2, *lb3;
95           ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
96           ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97           ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98           u32 pi0, fib_index0, lb_index0;
99           u32 pi1, fib_index1, lb_index1;
100           u32 pi2, fib_index2, lb_index2;
101           u32 pi3, fib_index3, lb_index3;
102           flow_hash_config_t flow_hash_config0, flow_hash_config1;
103           flow_hash_config_t flow_hash_config2, flow_hash_config3;
104           u32 hash_c0, hash_c1, hash_c2, hash_c3;
105           const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
106
107           /* Prefetch next iteration. */
108           {
109             vlib_buffer_t *p4, *p5, *p6, *p7;
110
111             p4 = vlib_get_buffer (vm, from[4]);
112             p5 = vlib_get_buffer (vm, from[5]);
113             p6 = vlib_get_buffer (vm, from[6]);
114             p7 = vlib_get_buffer (vm, from[7]);
115
116             vlib_prefetch_buffer_header (p4, LOAD);
117             vlib_prefetch_buffer_header (p5, LOAD);
118             vlib_prefetch_buffer_header (p6, LOAD);
119             vlib_prefetch_buffer_header (p7, LOAD);
120
121             CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
122             CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
123             CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
124             CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
125           }
126
127           pi0 = to_next[0] = from[0];
128           pi1 = to_next[1] = from[1];
129           pi2 = to_next[2] = from[2];
130           pi3 = to_next[3] = from[3];
131
132           from += 4;
133           to_next += 4;
134           n_left_to_next -= 4;
135           n_left_from -= 4;
136
137           p0 = vlib_get_buffer (vm, pi0);
138           p1 = vlib_get_buffer (vm, pi1);
139           p2 = vlib_get_buffer (vm, pi2);
140           p3 = vlib_get_buffer (vm, pi3);
141
142           ip0 = vlib_buffer_get_current (p0);
143           ip1 = vlib_buffer_get_current (p1);
144           ip2 = vlib_buffer_get_current (p2);
145           ip3 = vlib_buffer_get_current (p3);
146
147           dst_addr0 = &ip0->dst_address;
148           dst_addr1 = &ip1->dst_address;
149           dst_addr2 = &ip2->dst_address;
150           dst_addr3 = &ip3->dst_address;
151
152           fib_index0 =
153             vec_elt (im->fib_index_by_sw_if_index,
154                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
155           fib_index1 =
156             vec_elt (im->fib_index_by_sw_if_index,
157                      vnet_buffer (p1)->sw_if_index[VLIB_RX]);
158           fib_index2 =
159             vec_elt (im->fib_index_by_sw_if_index,
160                      vnet_buffer (p2)->sw_if_index[VLIB_RX]);
161           fib_index3 =
162             vec_elt (im->fib_index_by_sw_if_index,
163                      vnet_buffer (p3)->sw_if_index[VLIB_RX]);
164           fib_index0 =
165             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
166              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
167           fib_index1 =
168             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
169              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
170           fib_index2 =
171             (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
172              (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
173           fib_index3 =
174             (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
175              (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
176
177
178           if (!lookup_for_responses_to_locally_received_packets)
179             {
180               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
181               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
182               mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
183               mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
184
185               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
186               leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
187               leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
188               leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
189             }
190
191           if (!lookup_for_responses_to_locally_received_packets)
192             {
193               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
194               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
195               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
196               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
197             }
198
199           if (!lookup_for_responses_to_locally_received_packets)
200             {
201               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
202               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
203               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
204               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
205             }
206
207           if (lookup_for_responses_to_locally_received_packets)
208             {
209               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
210               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
211               lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
212               lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
213             }
214           else
215             {
216               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
217               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
218               lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
219               lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
220             }
221
222           ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
223           lb0 = load_balance_get (lb_index0);
224           lb1 = load_balance_get (lb_index1);
225           lb2 = load_balance_get (lb_index2);
226           lb3 = load_balance_get (lb_index3);
227
228           ASSERT (lb0->lb_n_buckets > 0);
229           ASSERT (is_pow2 (lb0->lb_n_buckets));
230           ASSERT (lb1->lb_n_buckets > 0);
231           ASSERT (is_pow2 (lb1->lb_n_buckets));
232           ASSERT (lb2->lb_n_buckets > 0);
233           ASSERT (is_pow2 (lb2->lb_n_buckets));
234           ASSERT (lb3->lb_n_buckets > 0);
235           ASSERT (is_pow2 (lb3->lb_n_buckets));
236
237           /* Use flow hash to compute multipath adjacency. */
238           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
239           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
240           hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
241           hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
242           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
243             {
244               flow_hash_config0 = lb0->lb_hash_config;
245               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
246                 ip4_compute_flow_hash (ip0, flow_hash_config0);
247               dpo0 =
248                 load_balance_get_fwd_bucket (lb0,
249                                              (hash_c0 &
250                                               (lb0->lb_n_buckets_minus_1)));
251             }
252           else
253             {
254               dpo0 = load_balance_get_bucket_i (lb0, 0);
255             }
256           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
257             {
258               flow_hash_config1 = lb1->lb_hash_config;
259               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
260                 ip4_compute_flow_hash (ip1, flow_hash_config1);
261               dpo1 =
262                 load_balance_get_fwd_bucket (lb1,
263                                              (hash_c1 &
264                                               (lb1->lb_n_buckets_minus_1)));
265             }
266           else
267             {
268               dpo1 = load_balance_get_bucket_i (lb1, 0);
269             }
270           if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
271             {
272               flow_hash_config2 = lb2->lb_hash_config;
273               hash_c2 = vnet_buffer (p2)->ip.flow_hash =
274                 ip4_compute_flow_hash (ip2, flow_hash_config2);
275               dpo2 =
276                 load_balance_get_fwd_bucket (lb2,
277                                              (hash_c2 &
278                                               (lb2->lb_n_buckets_minus_1)));
279             }
280           else
281             {
282               dpo2 = load_balance_get_bucket_i (lb2, 0);
283             }
284           if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
285             {
286               flow_hash_config3 = lb3->lb_hash_config;
287               hash_c3 = vnet_buffer (p3)->ip.flow_hash =
288                 ip4_compute_flow_hash (ip3, flow_hash_config3);
289               dpo3 =
290                 load_balance_get_fwd_bucket (lb3,
291                                              (hash_c3 &
292                                               (lb3->lb_n_buckets_minus_1)));
293             }
294           else
295             {
296               dpo3 = load_balance_get_bucket_i (lb3, 0);
297             }
298
299           next0 = dpo0->dpoi_next_node;
300           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
301           next1 = dpo1->dpoi_next_node;
302           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
303           next2 = dpo2->dpoi_next_node;
304           vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
305           next3 = dpo3->dpoi_next_node;
306           vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
307
308           vlib_increment_combined_counter
309             (cm, thread_index, lb_index0, 1,
310              vlib_buffer_length_in_chain (vm, p0));
311           vlib_increment_combined_counter
312             (cm, thread_index, lb_index1, 1,
313              vlib_buffer_length_in_chain (vm, p1));
314           vlib_increment_combined_counter
315             (cm, thread_index, lb_index2, 1,
316              vlib_buffer_length_in_chain (vm, p2));
317           vlib_increment_combined_counter
318             (cm, thread_index, lb_index3, 1,
319              vlib_buffer_length_in_chain (vm, p3));
320
321           vlib_validate_buffer_enqueue_x4 (vm, node, next,
322                                            to_next, n_left_to_next,
323                                            pi0, pi1, pi2, pi3,
324                                            next0, next1, next2, next3);
325         }
326
327       while (n_left_from > 0 && n_left_to_next > 0)
328         {
329           vlib_buffer_t *p0;
330           ip4_header_t *ip0;
331           ip_lookup_next_t next0;
332           const load_balance_t *lb0;
333           ip4_fib_mtrie_t *mtrie0;
334           ip4_fib_mtrie_leaf_t leaf0;
335           ip4_address_t *dst_addr0;
336           u32 pi0, fib_index0, lbi0;
337           flow_hash_config_t flow_hash_config0;
338           const dpo_id_t *dpo0;
339           u32 hash_c0;
340
341           pi0 = from[0];
342           to_next[0] = pi0;
343
344           p0 = vlib_get_buffer (vm, pi0);
345
346           ip0 = vlib_buffer_get_current (p0);
347
348           dst_addr0 = &ip0->dst_address;
349
350           fib_index0 =
351             vec_elt (im->fib_index_by_sw_if_index,
352                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
353           fib_index0 =
354             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
355              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
356
357           if (!lookup_for_responses_to_locally_received_packets)
358             {
359               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
360
361               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
362             }
363
364           if (!lookup_for_responses_to_locally_received_packets)
365             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
366
367           if (!lookup_for_responses_to_locally_received_packets)
368             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
369
370           if (lookup_for_responses_to_locally_received_packets)
371             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
372           else
373             {
374               /* Handle default route. */
375               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
376             }
377
378           ASSERT (lbi0);
379           lb0 = load_balance_get (lbi0);
380
381           ASSERT (lb0->lb_n_buckets > 0);
382           ASSERT (is_pow2 (lb0->lb_n_buckets));
383
384           /* Use flow hash to compute multipath adjacency. */
385           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
386           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
387             {
388               flow_hash_config0 = lb0->lb_hash_config;
389
390               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
391                 ip4_compute_flow_hash (ip0, flow_hash_config0);
392               dpo0 =
393                 load_balance_get_fwd_bucket (lb0,
394                                              (hash_c0 &
395                                               (lb0->lb_n_buckets_minus_1)));
396             }
397           else
398             {
399               dpo0 = load_balance_get_bucket_i (lb0, 0);
400             }
401
402           next0 = dpo0->dpoi_next_node;
403           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
404
405           vlib_increment_combined_counter (cm, thread_index, lbi0, 1,
406                                            vlib_buffer_length_in_chain (vm,
407                                                                         p0));
408
409           from += 1;
410           to_next += 1;
411           n_left_to_next -= 1;
412           n_left_from -= 1;
413
414           if (PREDICT_FALSE (next0 != next))
415             {
416               n_left_to_next += 1;
417               vlib_put_next_frame (vm, node, next, n_left_to_next);
418               next = next0;
419               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
420               to_next[0] = pi0;
421               to_next += 1;
422               n_left_to_next -= 1;
423             }
424         }
425
426       vlib_put_next_frame (vm, node, next, n_left_to_next);
427     }
428
429   if (node->flags & VLIB_NODE_FLAG_TRACE)
430     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
431
432   return frame->n_vectors;
433 }
434
435 /** @brief IPv4 lookup node.
436     @node ip4-lookup
437
438     This is the main IPv4 lookup dispatch node.
439
440     @param vm vlib_main_t corresponding to the current thread
441     @param node vlib_node_runtime_t
442     @param frame vlib_frame_t whose contents should be dispatched
443
444     @par Graph mechanics: buffer metadata, next index usage
445
446     @em Uses:
447     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
448         - Indicates the @c sw_if_index value of the interface that the
449           packet was received on.
450     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
451         - When the value is @c ~0 then the node performs a longest prefix
452           match (LPM) for the packet destination address in the FIB attached
453           to the receive interface.
454         - Otherwise perform LPM for the packet destination address in the
455           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
456           value (0, 1, ...) and not a VRF id.
457
458     @em Sets:
459     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
460         - The lookup result adjacency index.
461
462     <em>Next Index:</em>
463     - Dispatches the packet to the node index found in
464       ip_adjacency_t @c adj->lookup_next_index
465       (where @c adj is the lookup result adjacency).
466 */
467 static uword
468 ip4_lookup (vlib_main_t * vm,
469             vlib_node_runtime_t * node, vlib_frame_t * frame)
470 {
471   return ip4_lookup_inline (vm, node, frame,
472                             /* lookup_for_responses_to_locally_received_packets */
473                             0);
474
475 }
476
477 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
478
479 VLIB_REGISTER_NODE (ip4_lookup_node) =
480 {
481 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
482     sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
483     IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
484
485 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
486
487 always_inline uword
488 ip4_load_balance (vlib_main_t * vm,
489                   vlib_node_runtime_t * node, vlib_frame_t * frame)
490 {
491   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
492   u32 n_left_from, n_left_to_next, *from, *to_next;
493   ip_lookup_next_t next;
494   u32 thread_index = vlib_get_thread_index ();
495
496   from = vlib_frame_vector_args (frame);
497   n_left_from = frame->n_vectors;
498   next = node->cached_next_index;
499
500   if (node->flags & VLIB_NODE_FLAG_TRACE)
501     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
502
503   while (n_left_from > 0)
504     {
505       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
506
507
508       while (n_left_from >= 4 && n_left_to_next >= 2)
509         {
510           ip_lookup_next_t next0, next1;
511           const load_balance_t *lb0, *lb1;
512           vlib_buffer_t *p0, *p1;
513           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
514           const ip4_header_t *ip0, *ip1;
515           const dpo_id_t *dpo0, *dpo1;
516
517           /* Prefetch next iteration. */
518           {
519             vlib_buffer_t *p2, *p3;
520
521             p2 = vlib_get_buffer (vm, from[2]);
522             p3 = vlib_get_buffer (vm, from[3]);
523
524             vlib_prefetch_buffer_header (p2, STORE);
525             vlib_prefetch_buffer_header (p3, STORE);
526
527             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
528             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
529           }
530
531           pi0 = to_next[0] = from[0];
532           pi1 = to_next[1] = from[1];
533
534           from += 2;
535           n_left_from -= 2;
536           to_next += 2;
537           n_left_to_next -= 2;
538
539           p0 = vlib_get_buffer (vm, pi0);
540           p1 = vlib_get_buffer (vm, pi1);
541
542           ip0 = vlib_buffer_get_current (p0);
543           ip1 = vlib_buffer_get_current (p1);
544           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
545           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
546
547           lb0 = load_balance_get (lbi0);
548           lb1 = load_balance_get (lbi1);
549
550           /*
551            * this node is for via FIBs we can re-use the hash value from the
552            * to node if present.
553            * We don't want to use the same hash value at each level in the recursion
554            * graph as that would lead to polarisation
555            */
556           hc0 = hc1 = 0;
557
558           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
559             {
560               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
561                 {
562                   hc0 = vnet_buffer (p0)->ip.flow_hash =
563                     vnet_buffer (p0)->ip.flow_hash >> 1;
564                 }
565               else
566                 {
567                   hc0 = vnet_buffer (p0)->ip.flow_hash =
568                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
569                 }
570               dpo0 = load_balance_get_fwd_bucket
571                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
572             }
573           else
574             {
575               dpo0 = load_balance_get_bucket_i (lb0, 0);
576             }
577           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
578             {
579               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
580                 {
581                   hc1 = vnet_buffer (p1)->ip.flow_hash =
582                     vnet_buffer (p1)->ip.flow_hash >> 1;
583                 }
584               else
585                 {
586                   hc1 = vnet_buffer (p1)->ip.flow_hash =
587                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
588                 }
589               dpo1 = load_balance_get_fwd_bucket
590                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
591             }
592           else
593             {
594               dpo1 = load_balance_get_bucket_i (lb1, 0);
595             }
596
597           next0 = dpo0->dpoi_next_node;
598           next1 = dpo1->dpoi_next_node;
599
600           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
601           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
602
603           vlib_increment_combined_counter
604             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
605           vlib_increment_combined_counter
606             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
607
608           vlib_validate_buffer_enqueue_x2 (vm, node, next,
609                                            to_next, n_left_to_next,
610                                            pi0, pi1, next0, next1);
611         }
612
613       while (n_left_from > 0 && n_left_to_next > 0)
614         {
615           ip_lookup_next_t next0;
616           const load_balance_t *lb0;
617           vlib_buffer_t *p0;
618           u32 pi0, lbi0, hc0;
619           const ip4_header_t *ip0;
620           const dpo_id_t *dpo0;
621
622           pi0 = from[0];
623           to_next[0] = pi0;
624           from += 1;
625           to_next += 1;
626           n_left_to_next -= 1;
627           n_left_from -= 1;
628
629           p0 = vlib_get_buffer (vm, pi0);
630
631           ip0 = vlib_buffer_get_current (p0);
632           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
633
634           lb0 = load_balance_get (lbi0);
635
636           hc0 = 0;
637           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
638             {
639               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
640                 {
641                   hc0 = vnet_buffer (p0)->ip.flow_hash =
642                     vnet_buffer (p0)->ip.flow_hash >> 1;
643                 }
644               else
645                 {
646                   hc0 = vnet_buffer (p0)->ip.flow_hash =
647                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
648                 }
649               dpo0 = load_balance_get_fwd_bucket
650                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
651             }
652           else
653             {
654               dpo0 = load_balance_get_bucket_i (lb0, 0);
655             }
656
657           next0 = dpo0->dpoi_next_node;
658           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
659
660           vlib_increment_combined_counter
661             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
662
663           vlib_validate_buffer_enqueue_x1 (vm, node, next,
664                                            to_next, n_left_to_next,
665                                            pi0, next0);
666         }
667
668       vlib_put_next_frame (vm, node, next, n_left_to_next);
669     }
670
671   return frame->n_vectors;
672 }
673
674 VLIB_REGISTER_NODE (ip4_load_balance_node) =
675 {
676 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
677     sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
678     format_ip4_lookup_trace,};
679
680 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
681
682 /* get first interface address */
683 ip4_address_t *
684 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
685                              ip_interface_address_t ** result_ia)
686 {
687   ip_lookup_main_t *lm = &im->lookup_main;
688   ip_interface_address_t *ia = 0;
689   ip4_address_t *result = 0;
690
691   /* *INDENT-OFF* */
692   foreach_ip_interface_address
693     (lm, ia, sw_if_index,
694      1 /* honor unnumbered */ ,
695      ({
696        ip4_address_t * a =
697          ip_interface_address_get_address (lm, ia);
698        result = a;
699        break;
700      }));
701   /* *INDENT-OFF* */
702   if (result_ia)
703     *result_ia = result ? ia : 0;
704   return result;
705 }
706
707 static void
708 ip4_add_interface_routes (u32 sw_if_index,
709                           ip4_main_t * im, u32 fib_index,
710                           ip_interface_address_t * a)
711 {
712   ip_lookup_main_t *lm = &im->lookup_main;
713   ip4_address_t *address = ip_interface_address_get_address (lm, a);
714   fib_prefix_t pfx = {
715     .fp_len = a->address_length,
716     .fp_proto = FIB_PROTOCOL_IP4,
717     .fp_addr.ip4 = *address,
718   };
719
720   if (pfx.fp_len <= 30)
721     {
722       /* a /30 or shorter - add a glean for the network address */
723       fib_table_entry_update_one_path (fib_index, &pfx,
724                                        FIB_SOURCE_INTERFACE,
725                                        (FIB_ENTRY_FLAG_CONNECTED |
726                                         FIB_ENTRY_FLAG_ATTACHED),
727                                        DPO_PROTO_IP4,
728                                        /* No next-hop address */
729                                        NULL,
730                                        sw_if_index,
731                                        // invalid FIB index
732                                        ~0,
733                                        1,
734                                        // no out-label stack
735                                        NULL,
736                                        FIB_ROUTE_PATH_FLAG_NONE);
737
738       /* Add the two broadcast addresses as drop */
739       fib_prefix_t net_pfx = {
740         .fp_len = 32,
741         .fp_proto = FIB_PROTOCOL_IP4,
742         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
743       };
744       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
745         fib_table_entry_special_add(fib_index,
746                                     &net_pfx,
747                                     FIB_SOURCE_INTERFACE,
748                                     (FIB_ENTRY_FLAG_DROP |
749                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
750       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
751       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
752         fib_table_entry_special_add(fib_index,
753                                     &net_pfx,
754                                     FIB_SOURCE_INTERFACE,
755                                     (FIB_ENTRY_FLAG_DROP |
756                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
757     }
758   else if (pfx.fp_len == 31)
759     {
760       u32 mask = clib_host_to_net_u32(1);
761       fib_prefix_t net_pfx = pfx;
762
763       net_pfx.fp_len = 32;
764       net_pfx.fp_addr.ip4.as_u32 ^= mask;
765
766       /* a /31 - add the other end as an attached host */
767       fib_table_entry_update_one_path (fib_index, &net_pfx,
768                                        FIB_SOURCE_INTERFACE,
769                                        (FIB_ENTRY_FLAG_ATTACHED),
770                                        DPO_PROTO_IP4,
771                                        &net_pfx.fp_addr,
772                                        sw_if_index,
773                                        // invalid FIB index
774                                        ~0,
775                                        1,
776                                        NULL,
777                                        FIB_ROUTE_PATH_FLAG_NONE);
778     }
779   pfx.fp_len = 32;
780
781   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
782     {
783       u32 classify_table_index =
784         lm->classify_table_index_by_sw_if_index[sw_if_index];
785       if (classify_table_index != (u32) ~ 0)
786         {
787           dpo_id_t dpo = DPO_INVALID;
788
789           dpo_set (&dpo,
790                    DPO_CLASSIFY,
791                    DPO_PROTO_IP4,
792                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
793
794           fib_table_entry_special_dpo_add (fib_index,
795                                            &pfx,
796                                            FIB_SOURCE_CLASSIFY,
797                                            FIB_ENTRY_FLAG_NONE, &dpo);
798           dpo_reset (&dpo);
799         }
800     }
801
802   fib_table_entry_update_one_path (fib_index, &pfx,
803                                    FIB_SOURCE_INTERFACE,
804                                    (FIB_ENTRY_FLAG_CONNECTED |
805                                     FIB_ENTRY_FLAG_LOCAL),
806                                    DPO_PROTO_IP4,
807                                    &pfx.fp_addr,
808                                    sw_if_index,
809                                    // invalid FIB index
810                                    ~0,
811                                    1, NULL,
812                                    FIB_ROUTE_PATH_FLAG_NONE);
813 }
814
815 static void
816 ip4_del_interface_routes (ip4_main_t * im,
817                           u32 fib_index,
818                           ip4_address_t * address, u32 address_length)
819 {
820   fib_prefix_t pfx = {
821     .fp_len = address_length,
822     .fp_proto = FIB_PROTOCOL_IP4,
823     .fp_addr.ip4 = *address,
824   };
825
826   if (pfx.fp_len <= 30)
827     {
828       fib_prefix_t net_pfx = {
829         .fp_len = 32,
830         .fp_proto = FIB_PROTOCOL_IP4,
831         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
832       };
833       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
834         fib_table_entry_special_remove(fib_index,
835                                        &net_pfx,
836                                        FIB_SOURCE_INTERFACE);
837       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
838       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
839         fib_table_entry_special_remove(fib_index,
840                                        &net_pfx,
841                                        FIB_SOURCE_INTERFACE);
842       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
843     }
844     else if (pfx.fp_len == 31)
845     {
846       u32 mask = clib_host_to_net_u32(1);
847       fib_prefix_t net_pfx = pfx;
848
849       net_pfx.fp_len = 32;
850       net_pfx.fp_addr.ip4.as_u32 ^= mask;
851
852       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
853     }
854
855   pfx.fp_len = 32;
856   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
857 }
858
859 void
860 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
861 {
862   ip4_main_t *im = &ip4_main;
863
864   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
865
866   /*
867    * enable/disable only on the 1<->0 transition
868    */
869   if (is_enable)
870     {
871       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
872         return;
873     }
874   else
875     {
876       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
877       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
878         return;
879     }
880   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
881                                !is_enable, 0, 0);
882
883
884   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
885                                sw_if_index, !is_enable, 0, 0);
886 }
887
888 static clib_error_t *
889 ip4_add_del_interface_address_internal (vlib_main_t * vm,
890                                         u32 sw_if_index,
891                                         ip4_address_t * address,
892                                         u32 address_length, u32 is_del)
893 {
894   vnet_main_t *vnm = vnet_get_main ();
895   ip4_main_t *im = &ip4_main;
896   ip_lookup_main_t *lm = &im->lookup_main;
897   clib_error_t *error = 0;
898   u32 if_address_index, elts_before;
899   ip4_address_fib_t ip4_af, *addr_fib = 0;
900
901   /* local0 interface doesn't support IP addressing  */
902   if (sw_if_index == 0)
903     {
904       return
905        clib_error_create ("local0 interface doesn't support IP addressing");
906     }
907
908   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
909   ip4_addr_fib_init (&ip4_af, address,
910                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
911   vec_add1 (addr_fib, ip4_af);
912
913   /* FIXME-LATER
914    * there is no support for adj-fib handling in the presence of overlapping
915    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
916    * most routers do.
917    */
918   /* *INDENT-OFF* */
919   if (!is_del)
920     {
921       /* When adding an address check that it does not conflict
922          with an existing address. */
923       ip_interface_address_t *ia;
924       foreach_ip_interface_address
925         (&im->lookup_main, ia, sw_if_index,
926          0 /* honor unnumbered */ ,
927          ({
928            ip4_address_t * x =
929              ip_interface_address_get_address
930              (&im->lookup_main, ia);
931            if (ip4_destination_matches_route
932                (im, address, x, ia->address_length) ||
933                ip4_destination_matches_route (im,
934                                               x,
935                                               address,
936                                               address_length))
937              return
938                clib_error_create
939                ("failed to add %U which conflicts with %U for interface %U",
940                 format_ip4_address_and_length, address,
941                 address_length,
942                 format_ip4_address_and_length, x,
943                 ia->address_length,
944                 format_vnet_sw_if_index_name, vnm,
945                 sw_if_index);
946          }));
947     }
948   /* *INDENT-ON* */
949
950   elts_before = pool_elts (lm->if_address_pool);
951
952   error = ip_interface_address_add_del
953     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
954   if (error)
955     goto done;
956
957   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
958
959   if (is_del)
960     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
961   else
962     ip4_add_interface_routes (sw_if_index,
963                               im, ip4_af.fib_index,
964                               pool_elt_at_index
965                               (lm->if_address_pool, if_address_index));
966
967   /* If pool did not grow/shrink: add duplicate address. */
968   if (elts_before != pool_elts (lm->if_address_pool))
969     {
970       ip4_add_del_interface_address_callback_t *cb;
971       vec_foreach (cb, im->add_del_interface_address_callbacks)
972         cb->function (im, cb->function_opaque, sw_if_index,
973                       address, address_length, if_address_index, is_del);
974     }
975
976 done:
977   vec_free (addr_fib);
978   return error;
979 }
980
981 clib_error_t *
982 ip4_add_del_interface_address (vlib_main_t * vm,
983                                u32 sw_if_index,
984                                ip4_address_t * address,
985                                u32 address_length, u32 is_del)
986 {
987   return ip4_add_del_interface_address_internal
988     (vm, sw_if_index, address, address_length, is_del);
989 }
990
991 /* Built-in ip4 unicast rx feature path definition */
992 /* *INDENT-OFF* */
993 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
994 {
995   .arc_name = "ip4-unicast",
996   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
997   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
998 };
999
1000 VNET_FEATURE_INIT (ip4_flow_classify, static) =
1001 {
1002   .arc_name = "ip4-unicast",
1003   .node_name = "ip4-flow-classify",
1004   .runs_before = VNET_FEATURES ("ip4-inacl"),
1005 };
1006
1007 VNET_FEATURE_INIT (ip4_inacl, static) =
1008 {
1009   .arc_name = "ip4-unicast",
1010   .node_name = "ip4-inacl",
1011   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
1012 };
1013
1014 VNET_FEATURE_INIT (ip4_source_check_1, static) =
1015 {
1016   .arc_name = "ip4-unicast",
1017   .node_name = "ip4-source-check-via-rx",
1018   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
1019 };
1020
1021 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1022 {
1023   .arc_name = "ip4-unicast",
1024   .node_name = "ip4-source-check-via-any",
1025   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1026 };
1027
1028 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1029 {
1030   .arc_name = "ip4-unicast",
1031   .node_name = "ip4-source-and-port-range-check-rx",
1032   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1033 };
1034
1035 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1036 {
1037   .arc_name = "ip4-unicast",
1038   .node_name = "ip4-policer-classify",
1039   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1040 };
1041
1042 VNET_FEATURE_INIT (ip4_ipsec, static) =
1043 {
1044   .arc_name = "ip4-unicast",
1045   .node_name = "ipsec-input-ip4",
1046   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1047 };
1048
1049 VNET_FEATURE_INIT (ip4_vpath, static) =
1050 {
1051   .arc_name = "ip4-unicast",
1052   .node_name = "vpath-input-ip4",
1053   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1054 };
1055
1056 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1057 {
1058   .arc_name = "ip4-unicast",
1059   .node_name = "ip4-vxlan-bypass",
1060   .runs_before = VNET_FEATURES ("ip4-lookup"),
1061 };
1062
1063 VNET_FEATURE_INIT (ip4_drop, static) =
1064 {
1065   .arc_name = "ip4-unicast",
1066   .node_name = "ip4-drop",
1067   .runs_before = VNET_FEATURES ("ip4-lookup"),
1068 };
1069
1070 VNET_FEATURE_INIT (ip4_lookup, static) =
1071 {
1072   .arc_name = "ip4-unicast",
1073   .node_name = "ip4-lookup",
1074   .runs_before = 0,     /* not before any other features */
1075 };
1076
1077 /* Built-in ip4 multicast rx feature path definition */
1078 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1079 {
1080   .arc_name = "ip4-multicast",
1081   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1082   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1083 };
1084
1085 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1086 {
1087   .arc_name = "ip4-multicast",
1088   .node_name = "vpath-input-ip4",
1089   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1090 };
1091
1092 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1093 {
1094   .arc_name = "ip4-multicast",
1095   .node_name = "ip4-drop",
1096   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1097 };
1098
1099 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1100 {
1101   .arc_name = "ip4-multicast",
1102   .node_name = "ip4-mfib-forward-lookup",
1103   .runs_before = 0,     /* last feature */
1104 };
1105
1106 /* Source and port-range check ip4 tx feature path definition */
1107 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1108 {
1109   .arc_name = "ip4-output",
1110   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1111   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1112 };
1113
1114 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1115 {
1116   .arc_name = "ip4-output",
1117   .node_name = "ip4-source-and-port-range-check-tx",
1118   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1119 };
1120
1121 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1122 {
1123   .arc_name = "ip4-output",
1124   .node_name = "ipsec-output-ip4",
1125   .runs_before = VNET_FEATURES ("interface-output"),
1126 };
1127
1128 /* Built-in ip4 tx feature path definition */
1129 VNET_FEATURE_INIT (ip4_interface_output, static) =
1130 {
1131   .arc_name = "ip4-output",
1132   .node_name = "interface-output",
1133   .runs_before = 0,     /* not before any other features */
1134 };
1135 /* *INDENT-ON* */
1136
1137 static clib_error_t *
1138 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1139 {
1140   ip4_main_t *im = &ip4_main;
1141
1142   /* Fill in lookup tables with default table (0). */
1143   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1144   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1145
1146   if (!is_add)
1147     {
1148       ip4_main_t *im4 = &ip4_main;
1149       ip_lookup_main_t *lm4 = &im4->lookup_main;
1150       ip_interface_address_t *ia = 0;
1151       ip4_address_t *address;
1152       vlib_main_t *vm = vlib_get_main ();
1153
1154       /* *INDENT-OFF* */
1155       foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */,
1156       ({
1157         address = ip_interface_address_get_address (lm4, ia);
1158         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1159       }));
1160       /* *INDENT-ON* */
1161     }
1162
1163   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1164                                is_add, 0, 0);
1165
1166   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1167                                is_add, 0, 0);
1168
1169   return /* no error */ 0;
1170 }
1171
1172 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1173
1174 /* Global IP4 main. */
1175 ip4_main_t ip4_main;
1176
1177 clib_error_t *
1178 ip4_lookup_init (vlib_main_t * vm)
1179 {
1180   ip4_main_t *im = &ip4_main;
1181   clib_error_t *error;
1182   uword i;
1183
1184   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1185     return error;
1186
1187   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1188     {
1189       u32 m;
1190
1191       if (i < 32)
1192         m = pow2_mask (i) << (32 - i);
1193       else
1194         m = ~0;
1195       im->fib_masks[i] = clib_host_to_net_u32 (m);
1196     }
1197
1198   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1199
1200   /* Create FIB with index 0 and table id of 0. */
1201   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1202   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1203
1204   {
1205     pg_node_t *pn;
1206     pn = pg_get_node (ip4_lookup_node.index);
1207     pn->unformat_edit = unformat_pg_ip4_header;
1208   }
1209
1210   {
1211     ethernet_arp_header_t h;
1212
1213     memset (&h, 0, sizeof (h));
1214
1215     /* Set target ethernet address to all zeros. */
1216     memset (h.ip4_over_ethernet[1].ethernet, 0,
1217             sizeof (h.ip4_over_ethernet[1].ethernet));
1218
1219 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1220 #define _8(f,v) h.f = v;
1221     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1222     _16 (l3_type, ETHERNET_TYPE_IP4);
1223     _8 (n_l2_address_bytes, 6);
1224     _8 (n_l3_address_bytes, 4);
1225     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1226 #undef _16
1227 #undef _8
1228
1229     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1230                                /* data */ &h,
1231                                sizeof (h),
1232                                /* alloc chunk size */ 8,
1233                                "ip4 arp");
1234   }
1235
1236   return error;
1237 }
1238
1239 VLIB_INIT_FUNCTION (ip4_lookup_init);
1240
1241 typedef struct
1242 {
1243   /* Adjacency taken. */
1244   u32 dpo_index;
1245   u32 flow_hash;
1246   u32 fib_index;
1247
1248   /* Packet data, possibly *after* rewrite. */
1249   u8 packet_data[64 - 1 * sizeof (u32)];
1250 }
1251 ip4_forward_next_trace_t;
1252
1253 u8 *
1254 format_ip4_forward_next_trace (u8 * s, va_list * args)
1255 {
1256   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1257   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1258   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1259   uword indent = format_get_indent (s);
1260   s = format (s, "%U%U",
1261               format_white_space, indent,
1262               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1263   return s;
1264 }
1265
1266 static u8 *
1267 format_ip4_lookup_trace (u8 * s, va_list * args)
1268 {
1269   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1270   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1271   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1272   uword indent = format_get_indent (s);
1273
1274   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1275               t->fib_index, t->dpo_index, t->flow_hash);
1276   s = format (s, "\n%U%U",
1277               format_white_space, indent,
1278               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1279   return s;
1280 }
1281
1282 static u8 *
1283 format_ip4_rewrite_trace (u8 * s, va_list * args)
1284 {
1285   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1286   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1287   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1288   uword indent = format_get_indent (s);
1289
1290   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1291               t->fib_index, t->dpo_index, format_ip_adjacency,
1292               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1293   s = format (s, "\n%U%U",
1294               format_white_space, indent,
1295               format_ip_adjacency_packet_data,
1296               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1297   return s;
1298 }
1299
1300 /* Common trace function for all ip4-forward next nodes. */
1301 void
1302 ip4_forward_next_trace (vlib_main_t * vm,
1303                         vlib_node_runtime_t * node,
1304                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1305 {
1306   u32 *from, n_left;
1307   ip4_main_t *im = &ip4_main;
1308
1309   n_left = frame->n_vectors;
1310   from = vlib_frame_vector_args (frame);
1311
1312   while (n_left >= 4)
1313     {
1314       u32 bi0, bi1;
1315       vlib_buffer_t *b0, *b1;
1316       ip4_forward_next_trace_t *t0, *t1;
1317
1318       /* Prefetch next iteration. */
1319       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1320       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1321
1322       bi0 = from[0];
1323       bi1 = from[1];
1324
1325       b0 = vlib_get_buffer (vm, bi0);
1326       b1 = vlib_get_buffer (vm, bi1);
1327
1328       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1329         {
1330           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1331           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1332           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1333           t0->fib_index =
1334             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1335              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1336             vec_elt (im->fib_index_by_sw_if_index,
1337                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1338
1339           clib_memcpy (t0->packet_data,
1340                        vlib_buffer_get_current (b0),
1341                        sizeof (t0->packet_data));
1342         }
1343       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1344         {
1345           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1346           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1347           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1348           t1->fib_index =
1349             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1350              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1351             vec_elt (im->fib_index_by_sw_if_index,
1352                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1353           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1354                        sizeof (t1->packet_data));
1355         }
1356       from += 2;
1357       n_left -= 2;
1358     }
1359
1360   while (n_left >= 1)
1361     {
1362       u32 bi0;
1363       vlib_buffer_t *b0;
1364       ip4_forward_next_trace_t *t0;
1365
1366       bi0 = from[0];
1367
1368       b0 = vlib_get_buffer (vm, bi0);
1369
1370       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1371         {
1372           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1373           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1374           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1375           t0->fib_index =
1376             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1377              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1378             vec_elt (im->fib_index_by_sw_if_index,
1379                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1380           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1381                        sizeof (t0->packet_data));
1382         }
1383       from += 1;
1384       n_left -= 1;
1385     }
1386 }
1387
1388 static uword
1389 ip4_drop_or_punt (vlib_main_t * vm,
1390                   vlib_node_runtime_t * node,
1391                   vlib_frame_t * frame, ip4_error_t error_code)
1392 {
1393   u32 *buffers = vlib_frame_vector_args (frame);
1394   uword n_packets = frame->n_vectors;
1395
1396   vlib_error_drop_buffers (vm, node, buffers,
1397                            /* stride */ 1,
1398                            n_packets,
1399                            /* next */ 0,
1400                            ip4_input_node.index, error_code);
1401
1402   if (node->flags & VLIB_NODE_FLAG_TRACE)
1403     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1404
1405   return n_packets;
1406 }
1407
1408 static uword
1409 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1410 {
1411   return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1412 }
1413
1414 static uword
1415 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1416 {
1417   return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1418 }
1419
1420 /* *INDENT-OFF* */
1421 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1422 {
1423   .function = ip4_drop,
1424   .name = "ip4-drop",
1425   .vector_size = sizeof (u32),
1426   .format_trace = format_ip4_forward_next_trace,
1427   .n_next_nodes = 1,
1428   .next_nodes = {
1429     [0] = "error-drop",
1430   },
1431 };
1432
1433 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1434
1435 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1436 {
1437   .function = ip4_punt,
1438   .name = "ip4-punt",
1439   .vector_size = sizeof (u32),
1440   .format_trace = format_ip4_forward_next_trace,
1441   .n_next_nodes = 1,
1442   .next_nodes = {
1443     [0] = "error-punt",
1444   },
1445 };
1446
1447 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1448 /* *INDENT-ON */
1449
1450 /* Compute TCP/UDP/ICMP4 checksum in software. */
1451 u16
1452 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1453                               ip4_header_t * ip0)
1454 {
1455   ip_csum_t sum0;
1456   u32 ip_header_length, payload_length_host_byte_order;
1457   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1458   u16 sum16;
1459   void *data_this_buffer;
1460
1461   /* Initialize checksum with ip header. */
1462   ip_header_length = ip4_header_bytes (ip0);
1463   payload_length_host_byte_order =
1464     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1465   sum0 =
1466     clib_host_to_net_u32 (payload_length_host_byte_order +
1467                           (ip0->protocol << 16));
1468
1469   if (BITS (uword) == 32)
1470     {
1471       sum0 =
1472         ip_csum_with_carry (sum0,
1473                             clib_mem_unaligned (&ip0->src_address, u32));
1474       sum0 =
1475         ip_csum_with_carry (sum0,
1476                             clib_mem_unaligned (&ip0->dst_address, u32));
1477     }
1478   else
1479     sum0 =
1480       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1481
1482   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1483   data_this_buffer = (void *) ip0 + ip_header_length;
1484   n_ip_bytes_this_buffer = p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1485   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1486     {
1487       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1488           n_ip_bytes_this_buffer - ip_header_length : 0;
1489     }
1490   while (1)
1491     {
1492       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1493       n_bytes_left -= n_this_buffer;
1494       if (n_bytes_left == 0)
1495         break;
1496
1497       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1498       p0 = vlib_get_buffer (vm, p0->next_buffer);
1499       data_this_buffer = vlib_buffer_get_current (p0);
1500       n_this_buffer = p0->current_length;
1501     }
1502
1503   sum16 = ~ip_csum_fold (sum0);
1504
1505   return sum16;
1506 }
1507
1508 u32
1509 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1510 {
1511   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1512   udp_header_t *udp0;
1513   u16 sum16;
1514
1515   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1516           || ip0->protocol == IP_PROTOCOL_UDP);
1517
1518   udp0 = (void *) (ip0 + 1);
1519   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1520     {
1521       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1522                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1523       return p0->flags;
1524     }
1525
1526   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1527
1528   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1529                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1530
1531   return p0->flags;
1532 }
1533
1534 /* *INDENT-OFF* */
1535 VNET_FEATURE_ARC_INIT (ip4_local) =
1536 {
1537   .arc_name  = "ip4-local",
1538   .start_nodes = VNET_FEATURES ("ip4-local"),
1539 };
1540 /* *INDENT-ON* */
1541
1542 static inline uword
1543 ip4_local_inline (vlib_main_t * vm,
1544                   vlib_node_runtime_t * node,
1545                   vlib_frame_t * frame, int head_of_feature_arc)
1546 {
1547   ip4_main_t *im = &ip4_main;
1548   ip_lookup_main_t *lm = &im->lookup_main;
1549   ip_local_next_t next_index;
1550   u32 *from, *to_next, n_left_from, n_left_to_next;
1551   vlib_node_runtime_t *error_node =
1552     vlib_node_get_runtime (vm, ip4_input_node.index);
1553   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1554
1555   from = vlib_frame_vector_args (frame);
1556   n_left_from = frame->n_vectors;
1557   next_index = node->cached_next_index;
1558
1559   if (node->flags & VLIB_NODE_FLAG_TRACE)
1560     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1561
1562   while (n_left_from > 0)
1563     {
1564       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1565
1566       while (n_left_from >= 4 && n_left_to_next >= 2)
1567         {
1568           vlib_buffer_t *p0, *p1;
1569           ip4_header_t *ip0, *ip1;
1570           udp_header_t *udp0, *udp1;
1571           ip4_fib_mtrie_t *mtrie0, *mtrie1;
1572           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1573           const dpo_id_t *dpo0, *dpo1;
1574           const load_balance_t *lb0, *lb1;
1575           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1576           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1577           i32 len_diff0, len_diff1;
1578           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1579           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1580           u32 sw_if_index0, sw_if_index1;
1581
1582           pi0 = to_next[0] = from[0];
1583           pi1 = to_next[1] = from[1];
1584           from += 2;
1585           n_left_from -= 2;
1586           to_next += 2;
1587           n_left_to_next -= 2;
1588
1589           next0 = next1 = IP_LOCAL_NEXT_DROP;
1590
1591           p0 = vlib_get_buffer (vm, pi0);
1592           p1 = vlib_get_buffer (vm, pi1);
1593
1594           ip0 = vlib_buffer_get_current (p0);
1595           ip1 = vlib_buffer_get_current (p1);
1596
1597           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1598           vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
1599
1600           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1601           sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1602
1603           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1604           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1605
1606           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1607           fib_index0 =
1608             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1609              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1610
1611           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1612           fib_index1 =
1613             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1614              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1615
1616           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1617           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1618
1619           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1620           leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1621
1622           /* Treat IP frag packets as "experimental" protocol for now
1623              until support of IP frag reassembly is implemented */
1624           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1625           proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1626
1627           if (head_of_feature_arc == 0)
1628             {
1629               error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1630               goto skip_checks;
1631             }
1632
1633           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1634           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1635           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1636           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1637
1638           flags0 = p0->flags;
1639           flags1 = p1->flags;
1640
1641           good_tcp_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1642           good_tcp_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1643
1644           udp0 = ip4_next_header (ip0);
1645           udp1 = ip4_next_header (ip1);
1646
1647           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1648           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1649           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1650
1651           /* Verify UDP length. */
1652           ip_len0 = clib_net_to_host_u16 (ip0->length);
1653           ip_len1 = clib_net_to_host_u16 (ip1->length);
1654           udp_len0 = clib_net_to_host_u16 (udp0->length);
1655           udp_len1 = clib_net_to_host_u16 (udp1->length);
1656
1657           len_diff0 = ip_len0 - udp_len0;
1658           len_diff1 = ip_len1 - udp_len1;
1659
1660           len_diff0 = is_udp0 ? len_diff0 : 0;
1661           len_diff1 = is_udp1 ? len_diff1 : 0;
1662
1663           if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1664                                & good_tcp_udp0 & good_tcp_udp1)))
1665             {
1666               if (is_tcp_udp0)
1667                 {
1668                   if (is_tcp_udp0
1669                       && !(flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))
1670                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1671                   good_tcp_udp0 =
1672                     (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1673                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1674                 }
1675               if (is_tcp_udp1)
1676                 {
1677                   if (is_tcp_udp1
1678                       && !(flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))
1679                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1680                   good_tcp_udp1 =
1681                     (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1682                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1683                 }
1684             }
1685
1686           good_tcp_udp0 &= len_diff0 >= 0;
1687           good_tcp_udp1 &= len_diff1 >= 0;
1688
1689           leaf0 =
1690             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1691           leaf1 =
1692             ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1693
1694           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1695
1696           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1697           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1698
1699           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1700           error0 = (is_tcp_udp0 && !good_tcp_udp0
1701                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1702           error1 = (is_tcp_udp1 && !good_tcp_udp1
1703                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1704
1705           leaf0 =
1706             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1707           leaf1 =
1708             ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1709
1710           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1711             ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1712           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1713
1714           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1715             ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1716           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1717
1718           lb0 = load_balance_get (lbi0);
1719           lb1 = load_balance_get (lbi1);
1720           dpo0 = load_balance_get_bucket_i (lb0, 0);
1721           dpo1 = load_balance_get_bucket_i (lb1, 0);
1722
1723           /*
1724            * Must have a route to source otherwise we drop the packet.
1725            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1726            *
1727            * The checks are:
1728            *  - the source is a recieve => it's from us => bogus, do this
1729            *    first since it sets a different error code.
1730            *  - uRPF check for any route to source - accept if passes.
1731            *  - allow packets destined to the broadcast address from unknown sources
1732            */
1733           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1734                      dpo0->dpoi_type == DPO_RECEIVE) ?
1735                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1736           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1737                      !fib_urpf_check_size (lb0->lb_urpf) &&
1738                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1739                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1740           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1741                      dpo1->dpoi_type == DPO_RECEIVE) ?
1742                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1743           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1744                      !fib_urpf_check_size (lb1->lb_urpf) &&
1745                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1746                     ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1747
1748         skip_checks:
1749
1750           next0 = lm->local_next_by_ip_protocol[proto0];
1751           next1 = lm->local_next_by_ip_protocol[proto1];
1752
1753           next0 =
1754             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1755           next1 =
1756             error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1757
1758           p0->error = error0 ? error_node->errors[error0] : 0;
1759           p1->error = error1 ? error_node->errors[error1] : 0;
1760
1761           if (head_of_feature_arc)
1762             {
1763               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1764                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1765               if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1766                 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1767             }
1768
1769           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1770                                            n_left_to_next, pi0, pi1,
1771                                            next0, next1);
1772         }
1773
1774       while (n_left_from > 0 && n_left_to_next > 0)
1775         {
1776           vlib_buffer_t *p0;
1777           ip4_header_t *ip0;
1778           udp_header_t *udp0;
1779           ip4_fib_mtrie_t *mtrie0;
1780           ip4_fib_mtrie_leaf_t leaf0;
1781           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1782           i32 len_diff0;
1783           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1784           load_balance_t *lb0;
1785           const dpo_id_t *dpo0;
1786           u32 sw_if_index0;
1787
1788           pi0 = to_next[0] = from[0];
1789           from += 1;
1790           n_left_from -= 1;
1791           to_next += 1;
1792           n_left_to_next -= 1;
1793
1794           next0 = IP_LOCAL_NEXT_DROP;
1795
1796           p0 = vlib_get_buffer (vm, pi0);
1797
1798           ip0 = vlib_buffer_get_current (p0);
1799
1800           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1801
1802           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1803
1804           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1805
1806           fib_index0 =
1807             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1808              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1809
1810           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1811
1812           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1813
1814           /* Treat IP frag packets as "experimental" protocol for now
1815              until support of IP frag reassembly is implemented */
1816           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1817
1818           if (head_of_feature_arc == 0)
1819             {
1820               error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1821               goto skip_check;
1822             }
1823
1824           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1825           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1826
1827           flags0 = p0->flags;
1828
1829           good_tcp_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1830
1831           udp0 = ip4_next_header (ip0);
1832
1833           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1834           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1835
1836           /* Verify UDP length. */
1837           ip_len0 = clib_net_to_host_u16 (ip0->length);
1838           udp_len0 = clib_net_to_host_u16 (udp0->length);
1839
1840           len_diff0 = ip_len0 - udp_len0;
1841
1842           len_diff0 = is_udp0 ? len_diff0 : 0;
1843
1844           if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1845             {
1846               if (is_tcp_udp0)
1847                 {
1848                   if (is_tcp_udp0
1849                       && !(flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))
1850                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1851                   good_tcp_udp0 =
1852                     (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1853                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1854                 }
1855             }
1856
1857           good_tcp_udp0 &= len_diff0 >= 0;
1858
1859           leaf0 =
1860             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1861
1862           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1863
1864           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1865
1866           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1867           error0 = (is_tcp_udp0 && !good_tcp_udp0
1868                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1869
1870           leaf0 =
1871             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1872
1873           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1874           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1875
1876           lb0 = load_balance_get (lbi0);
1877           dpo0 = load_balance_get_bucket_i (lb0, 0);
1878
1879           vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1880             vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1881
1882           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1883                      dpo0->dpoi_type == DPO_RECEIVE) ?
1884                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1885           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1886                      !fib_urpf_check_size (lb0->lb_urpf) &&
1887                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1888                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1889
1890         skip_check:
1891
1892           next0 = lm->local_next_by_ip_protocol[proto0];
1893
1894           next0 =
1895             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1896
1897           p0->error = error0 ? error_node->errors[error0] : 0;
1898
1899           if (head_of_feature_arc)
1900             {
1901               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1902                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1903             }
1904
1905           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1906                                            n_left_to_next, pi0, next0);
1907
1908         }
1909
1910       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1911     }
1912
1913   return frame->n_vectors;
1914 }
1915
1916 static uword
1917 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1918 {
1919   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1920 }
1921
1922 /* *INDENT-OFF* */
1923 VLIB_REGISTER_NODE (ip4_local_node) =
1924 {
1925   .function = ip4_local,
1926   .name = "ip4-local",
1927   .vector_size = sizeof (u32),
1928   .format_trace = format_ip4_forward_next_trace,
1929   .n_next_nodes = IP_LOCAL_N_NEXT,
1930   .next_nodes =
1931   {
1932     [IP_LOCAL_NEXT_DROP] = "error-drop",
1933     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1934     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1935     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",},
1936 };
1937 /* *INDENT-ON* */
1938
1939 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1940
1941 static uword
1942 ip4_local_end_of_arc (vlib_main_t * vm,
1943                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1944 {
1945   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1946 }
1947
1948 /* *INDENT-OFF* */
1949 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1950   .function = ip4_local_end_of_arc,
1951   .name = "ip4-local-end-of-arc",
1952   .vector_size = sizeof (u32),
1953
1954   .format_trace = format_ip4_forward_next_trace,
1955   .sibling_of = "ip4-local",
1956 };
1957
1958 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1959
1960 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1961   .arc_name = "ip4-local",
1962   .node_name = "ip4-local-end-of-arc",
1963   .runs_before = 0, /* not before any other features */
1964 };
1965 /* *INDENT-ON* */
1966
1967 void
1968 ip4_register_protocol (u32 protocol, u32 node_index)
1969 {
1970   vlib_main_t *vm = vlib_get_main ();
1971   ip4_main_t *im = &ip4_main;
1972   ip_lookup_main_t *lm = &im->lookup_main;
1973
1974   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1975   lm->local_next_by_ip_protocol[protocol] =
1976     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1977 }
1978
1979 static clib_error_t *
1980 show_ip_local_command_fn (vlib_main_t * vm,
1981                           unformat_input_t * input, vlib_cli_command_t * cmd)
1982 {
1983   ip4_main_t *im = &ip4_main;
1984   ip_lookup_main_t *lm = &im->lookup_main;
1985   int i;
1986
1987   vlib_cli_output (vm, "Protocols handled by ip4_local");
1988   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1989     {
1990       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1991         vlib_cli_output (vm, "%d", i);
1992     }
1993   return 0;
1994 }
1995
1996
1997
1998 /*?
1999  * Display the set of protocols handled by the local IPv4 stack.
2000  *
2001  * @cliexpar
2002  * Example of how to display local protocol table:
2003  * @cliexstart{show ip local}
2004  * Protocols handled by ip4_local
2005  * 1
2006  * 17
2007  * 47
2008  * @cliexend
2009 ?*/
2010 /* *INDENT-OFF* */
2011 VLIB_CLI_COMMAND (show_ip_local, static) =
2012 {
2013   .path = "show ip local",
2014   .function = show_ip_local_command_fn,
2015   .short_help = "show ip local",
2016 };
2017 /* *INDENT-ON* */
2018
2019 always_inline uword
2020 ip4_arp_inline (vlib_main_t * vm,
2021                 vlib_node_runtime_t * node,
2022                 vlib_frame_t * frame, int is_glean)
2023 {
2024   vnet_main_t *vnm = vnet_get_main ();
2025   ip4_main_t *im = &ip4_main;
2026   ip_lookup_main_t *lm = &im->lookup_main;
2027   u32 *from, *to_next_drop;
2028   uword n_left_from, n_left_to_next_drop, next_index;
2029   static f64 time_last_seed_change = -1e100;
2030   static u32 hash_seeds[3];
2031   static uword hash_bitmap[256 / BITS (uword)];
2032   f64 time_now;
2033
2034   if (node->flags & VLIB_NODE_FLAG_TRACE)
2035     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2036
2037   time_now = vlib_time_now (vm);
2038   if (time_now - time_last_seed_change > 1e-3)
2039     {
2040       uword i;
2041       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
2042                                             sizeof (hash_seeds));
2043       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2044         hash_seeds[i] = r[i];
2045
2046       /* Mark all hash keys as been no-seen before. */
2047       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2048         hash_bitmap[i] = 0;
2049
2050       time_last_seed_change = time_now;
2051     }
2052
2053   from = vlib_frame_vector_args (frame);
2054   n_left_from = frame->n_vectors;
2055   next_index = node->cached_next_index;
2056   if (next_index == IP4_ARP_NEXT_DROP)
2057     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
2058
2059   while (n_left_from > 0)
2060     {
2061       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2062                            to_next_drop, n_left_to_next_drop);
2063
2064       while (n_left_from > 0 && n_left_to_next_drop > 0)
2065         {
2066           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2067           ip_adjacency_t *adj0;
2068           vlib_buffer_t *p0;
2069           ip4_header_t *ip0;
2070           uword bm0;
2071
2072           pi0 = from[0];
2073
2074           p0 = vlib_get_buffer (vm, pi0);
2075
2076           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2077           adj0 = adj_get (adj_index0);
2078           ip0 = vlib_buffer_get_current (p0);
2079
2080           a0 = hash_seeds[0];
2081           b0 = hash_seeds[1];
2082           c0 = hash_seeds[2];
2083
2084           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2085           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2086
2087           if (is_glean)
2088             {
2089               /*
2090                * this is the Glean case, so we are ARPing for the
2091                * packet's destination
2092                */
2093               a0 ^= ip0->dst_address.data_u32;
2094             }
2095           else
2096             {
2097               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2098             }
2099           b0 ^= sw_if_index0;
2100
2101           hash_v3_finalize32 (a0, b0, c0);
2102
2103           c0 &= BITS (hash_bitmap) - 1;
2104           c0 = c0 / BITS (uword);
2105           m0 = (uword) 1 << (c0 % BITS (uword));
2106
2107           bm0 = hash_bitmap[c0];
2108           drop0 = (bm0 & m0) != 0;
2109
2110           /* Mark it as seen. */
2111           hash_bitmap[c0] = bm0 | m0;
2112
2113           from += 1;
2114           n_left_from -= 1;
2115           to_next_drop[0] = pi0;
2116           to_next_drop += 1;
2117           n_left_to_next_drop -= 1;
2118
2119           p0->error =
2120             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2121                          IP4_ARP_ERROR_REQUEST_SENT];
2122
2123           /*
2124            * the adj has been updated to a rewrite but the node the DPO that got
2125            * us here hasn't - yet. no big deal. we'll drop while we wait.
2126            */
2127           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2128             continue;
2129
2130           if (drop0)
2131             continue;
2132
2133           /*
2134            * Can happen if the control-plane is programming tables
2135            * with traffic flowing; at least that's today's lame excuse.
2136            */
2137           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2138               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2139             {
2140               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2141             }
2142           else
2143             /* Send ARP request. */
2144             {
2145               u32 bi0 = 0;
2146               vlib_buffer_t *b0;
2147               ethernet_arp_header_t *h0;
2148               vnet_hw_interface_t *hw_if0;
2149
2150               h0 =
2151                 vlib_packet_template_get_packet (vm,
2152                                                  &im->ip4_arp_request_packet_template,
2153                                                  &bi0);
2154
2155               /* Add rewrite/encap string for ARP packet. */
2156               vnet_rewrite_one_header (adj0[0], h0,
2157                                        sizeof (ethernet_header_t));
2158
2159               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2160
2161               /* Src ethernet address in ARP header. */
2162               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2163                            hw_if0->hw_address,
2164                            sizeof (h0->ip4_over_ethernet[0].ethernet));
2165
2166               if (is_glean)
2167                 {
2168                   /* The interface's source address is stashed in the Glean Adj */
2169                   h0->ip4_over_ethernet[0].ip4 =
2170                     adj0->sub_type.glean.receive_addr.ip4;
2171
2172                   /* Copy in destination address we are requesting. This is the
2173                    * glean case, so it's the packet's destination.*/
2174                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2175                     ip0->dst_address.data_u32;
2176                 }
2177               else
2178                 {
2179                   /* Src IP address in ARP header. */
2180                   if (ip4_src_address_for_packet (lm, sw_if_index0,
2181                                                   &h0->
2182                                                   ip4_over_ethernet[0].ip4))
2183                     {
2184                       /* No source address available */
2185                       p0->error =
2186                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2187                       vlib_buffer_free (vm, &bi0, 1);
2188                       continue;
2189                     }
2190
2191                   /* Copy in destination address we are requesting from the
2192                      incomplete adj */
2193                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2194                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
2195                 }
2196
2197               vlib_buffer_copy_trace_flag (vm, p0, bi0);
2198               b0 = vlib_get_buffer (vm, bi0);
2199               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2200
2201               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2202
2203               vlib_set_next_frame_buffer (vm, node,
2204                                           adj0->rewrite_header.next_index,
2205                                           bi0);
2206             }
2207         }
2208
2209       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2210     }
2211
2212   return frame->n_vectors;
2213 }
2214
2215 static uword
2216 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2217 {
2218   return (ip4_arp_inline (vm, node, frame, 0));
2219 }
2220
2221 static uword
2222 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2223 {
2224   return (ip4_arp_inline (vm, node, frame, 1));
2225 }
2226
2227 static char *ip4_arp_error_strings[] = {
2228   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2229   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2230   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2231   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2232   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2233   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2234 };
2235
2236 VLIB_REGISTER_NODE (ip4_arp_node) =
2237 {
2238   .function = ip4_arp,.name = "ip4-arp",.vector_size =
2239     sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2240     ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2241     ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2242   {
2243   [IP4_ARP_NEXT_DROP] = "error-drop",}
2244 ,};
2245
2246 VLIB_REGISTER_NODE (ip4_glean_node) =
2247 {
2248   .function = ip4_glean,.name = "ip4-glean",.vector_size =
2249     sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2250     ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2251     ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2252   {
2253   [IP4_ARP_NEXT_DROP] = "error-drop",}
2254 ,};
2255
2256 #define foreach_notrace_ip4_arp_error           \
2257 _(DROP)                                         \
2258 _(REQUEST_SENT)                                 \
2259 _(REPLICATE_DROP)                               \
2260 _(REPLICATE_FAIL)
2261
2262 clib_error_t *
2263 arp_notrace_init (vlib_main_t * vm)
2264 {
2265   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2266
2267   /* don't trace ARP request packets */
2268 #define _(a)                                    \
2269     vnet_pcap_drop_trace_filter_add_del         \
2270         (rt->errors[IP4_ARP_ERROR_##a],         \
2271          1 /* is_add */);
2272   foreach_notrace_ip4_arp_error;
2273 #undef _
2274   return 0;
2275 }
2276
2277 VLIB_INIT_FUNCTION (arp_notrace_init);
2278
2279
2280 /* Send an ARP request to see if given destination is reachable on given interface. */
2281 clib_error_t *
2282 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2283 {
2284   vnet_main_t *vnm = vnet_get_main ();
2285   ip4_main_t *im = &ip4_main;
2286   ethernet_arp_header_t *h;
2287   ip4_address_t *src;
2288   ip_interface_address_t *ia;
2289   ip_adjacency_t *adj;
2290   vnet_hw_interface_t *hi;
2291   vnet_sw_interface_t *si;
2292   vlib_buffer_t *b;
2293   adj_index_t ai;
2294   u32 bi = 0;
2295
2296   si = vnet_get_sw_interface (vnm, sw_if_index);
2297
2298   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2299     {
2300       return clib_error_return (0, "%U: interface %U down",
2301                                 format_ip4_address, dst,
2302                                 format_vnet_sw_if_index_name, vnm,
2303                                 sw_if_index);
2304     }
2305
2306   src =
2307     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2308   if (!src)
2309     {
2310       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2311       return clib_error_return
2312         (0,
2313          "no matching interface address for destination %U (interface %U)",
2314          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2315          sw_if_index);
2316     }
2317
2318   ip46_address_t nh = {
2319     .ip4 = *dst,
2320   };
2321
2322   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2323                             VNET_LINK_IP4, &nh, sw_if_index);
2324   adj = adj_get (ai);
2325
2326   h = vlib_packet_template_get_packet (vm,
2327                                        &im->ip4_arp_request_packet_template,
2328                                        &bi);
2329
2330   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2331   if (PREDICT_FALSE (!hi->hw_address))
2332     {
2333       return clib_error_return (0, "%U: interface %U do not support ip probe",
2334                                 format_ip4_address, dst,
2335                                 format_vnet_sw_if_index_name, vnm,
2336                                 sw_if_index);
2337     }
2338
2339   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2340                sizeof (h->ip4_over_ethernet[0].ethernet));
2341
2342   h->ip4_over_ethernet[0].ip4 = src[0];
2343   h->ip4_over_ethernet[1].ip4 = dst[0];
2344
2345   b = vlib_get_buffer (vm, bi);
2346   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2347     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2348
2349   /* Add encapsulation string for software interface (e.g. ethernet header). */
2350   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2351   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2352
2353   {
2354     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2355     u32 *to_next = vlib_frame_vector_args (f);
2356     to_next[0] = bi;
2357     f->n_vectors = 1;
2358     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2359   }
2360
2361   adj_unlock (ai);
2362   return /* no error */ 0;
2363 }
2364
2365 typedef enum
2366 {
2367   IP4_REWRITE_NEXT_DROP,
2368   IP4_REWRITE_NEXT_ICMP_ERROR,
2369 } ip4_rewrite_next_t;
2370
2371 always_inline uword
2372 ip4_rewrite_inline (vlib_main_t * vm,
2373                     vlib_node_runtime_t * node,
2374                     vlib_frame_t * frame,
2375                     int do_counters, int is_midchain, int is_mcast)
2376 {
2377   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2378   u32 *from = vlib_frame_vector_args (frame);
2379   u32 n_left_from, n_left_to_next, *to_next, next_index;
2380   vlib_node_runtime_t *error_node =
2381     vlib_node_get_runtime (vm, ip4_input_node.index);
2382
2383   n_left_from = frame->n_vectors;
2384   next_index = node->cached_next_index;
2385   u32 thread_index = vlib_get_thread_index ();
2386
2387   while (n_left_from > 0)
2388     {
2389       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2390
2391       while (n_left_from >= 4 && n_left_to_next >= 2)
2392         {
2393           ip_adjacency_t *adj0, *adj1;
2394           vlib_buffer_t *p0, *p1;
2395           ip4_header_t *ip0, *ip1;
2396           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2397           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2398           u32 tx_sw_if_index0, tx_sw_if_index1;
2399
2400           /* Prefetch next iteration. */
2401           {
2402             vlib_buffer_t *p2, *p3;
2403
2404             p2 = vlib_get_buffer (vm, from[2]);
2405             p3 = vlib_get_buffer (vm, from[3]);
2406
2407             vlib_prefetch_buffer_header (p2, STORE);
2408             vlib_prefetch_buffer_header (p3, STORE);
2409
2410             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2411             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2412           }
2413
2414           pi0 = to_next[0] = from[0];
2415           pi1 = to_next[1] = from[1];
2416
2417           from += 2;
2418           n_left_from -= 2;
2419           to_next += 2;
2420           n_left_to_next -= 2;
2421
2422           p0 = vlib_get_buffer (vm, pi0);
2423           p1 = vlib_get_buffer (vm, pi1);
2424
2425           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2426           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2427
2428           /*
2429            * pre-fetch the per-adjacency counters
2430            */
2431           if (do_counters)
2432             {
2433               vlib_prefetch_combined_counter (&adjacency_counters,
2434                                               thread_index, adj_index0);
2435               vlib_prefetch_combined_counter (&adjacency_counters,
2436                                               thread_index, adj_index1);
2437             }
2438
2439           ip0 = vlib_buffer_get_current (p0);
2440           ip1 = vlib_buffer_get_current (p1);
2441
2442           error0 = error1 = IP4_ERROR_NONE;
2443           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2444
2445           /* Decrement TTL & update checksum.
2446              Works either endian, so no need for byte swap. */
2447           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2448             {
2449               i32 ttl0 = ip0->ttl;
2450
2451               /* Input node should have reject packets with ttl 0. */
2452               ASSERT (ip0->ttl > 0);
2453
2454               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2455               checksum0 += checksum0 >= 0xffff;
2456
2457               ip0->checksum = checksum0;
2458               ttl0 -= 1;
2459               ip0->ttl = ttl0;
2460
2461               /*
2462                * If the ttl drops below 1 when forwarding, generate
2463                * an ICMP response.
2464                */
2465               if (PREDICT_FALSE (ttl0 <= 0))
2466                 {
2467                   error0 = IP4_ERROR_TIME_EXPIRED;
2468                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2469                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2470                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2471                                                0);
2472                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2473                 }
2474
2475               /* Verify checksum. */
2476               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2477                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2478             }
2479           else
2480             {
2481               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2482             }
2483           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2484             {
2485               i32 ttl1 = ip1->ttl;
2486
2487               /* Input node should have reject packets with ttl 0. */
2488               ASSERT (ip1->ttl > 0);
2489
2490               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2491               checksum1 += checksum1 >= 0xffff;
2492
2493               ip1->checksum = checksum1;
2494               ttl1 -= 1;
2495               ip1->ttl = ttl1;
2496
2497               /*
2498                * If the ttl drops below 1 when forwarding, generate
2499                * an ICMP response.
2500                */
2501               if (PREDICT_FALSE (ttl1 <= 0))
2502                 {
2503                   error1 = IP4_ERROR_TIME_EXPIRED;
2504                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2505                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2506                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2507                                                0);
2508                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2509                 }
2510
2511               /* Verify checksum. */
2512               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2513                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2514             }
2515           else
2516             {
2517               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2518             }
2519
2520           /* Rewrite packet header and updates lengths. */
2521           adj0 = adj_get (adj_index0);
2522           adj1 = adj_get (adj_index1);
2523
2524           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2525           rw_len0 = adj0[0].rewrite_header.data_bytes;
2526           rw_len1 = adj1[0].rewrite_header.data_bytes;
2527           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2528           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2529
2530           /* Check MTU of outgoing interface. */
2531           error0 =
2532             (vlib_buffer_length_in_chain (vm, p0) >
2533              adj0[0].
2534              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2535              error0);
2536           error1 =
2537             (vlib_buffer_length_in_chain (vm, p1) >
2538              adj1[0].
2539              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2540              error1);
2541
2542           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2543            * to see the IP headerr */
2544           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2545             {
2546               next0 = adj0[0].rewrite_header.next_index;
2547               p0->current_data -= rw_len0;
2548               p0->current_length += rw_len0;
2549               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2550               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2551
2552               if (PREDICT_FALSE
2553                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2554                 vnet_feature_arc_start (lm->output_feature_arc_index,
2555                                         tx_sw_if_index0, &next0, p0);
2556             }
2557           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2558             {
2559               next1 = adj1[0].rewrite_header.next_index;
2560               p1->current_data -= rw_len1;
2561               p1->current_length += rw_len1;
2562
2563               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2564               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2565
2566               if (PREDICT_FALSE
2567                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2568                 vnet_feature_arc_start (lm->output_feature_arc_index,
2569                                         tx_sw_if_index1, &next1, p1);
2570             }
2571
2572           /* Guess we are only writing on simple Ethernet header. */
2573           vnet_rewrite_two_headers (adj0[0], adj1[0],
2574                                     ip0, ip1, sizeof (ethernet_header_t));
2575
2576           /*
2577            * Bump the per-adjacency counters
2578            */
2579           if (do_counters)
2580             {
2581               vlib_increment_combined_counter
2582                 (&adjacency_counters,
2583                  thread_index,
2584                  adj_index0, 1,
2585                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2586
2587               vlib_increment_combined_counter
2588                 (&adjacency_counters,
2589                  thread_index,
2590                  adj_index1, 1,
2591                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2592             }
2593
2594           if (is_midchain)
2595             {
2596               adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2597               adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2598             }
2599           if (is_mcast)
2600             {
2601               /*
2602                * copy bytes from the IP address into the MAC rewrite
2603                */
2604               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2605               vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2606             }
2607
2608           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2609                                            to_next, n_left_to_next,
2610                                            pi0, pi1, next0, next1);
2611         }
2612
2613       while (n_left_from > 0 && n_left_to_next > 0)
2614         {
2615           ip_adjacency_t *adj0;
2616           vlib_buffer_t *p0;
2617           ip4_header_t *ip0;
2618           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2619           u32 tx_sw_if_index0;
2620
2621           pi0 = to_next[0] = from[0];
2622
2623           p0 = vlib_get_buffer (vm, pi0);
2624
2625           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2626
2627           adj0 = adj_get (adj_index0);
2628
2629           ip0 = vlib_buffer_get_current (p0);
2630
2631           error0 = IP4_ERROR_NONE;
2632           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2633
2634           /* Decrement TTL & update checksum. */
2635           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2636             {
2637               i32 ttl0 = ip0->ttl;
2638
2639               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2640
2641               checksum0 += checksum0 >= 0xffff;
2642
2643               ip0->checksum = checksum0;
2644
2645               ASSERT (ip0->ttl > 0);
2646
2647               ttl0 -= 1;
2648
2649               ip0->ttl = ttl0;
2650
2651               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2652                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2653
2654               if (PREDICT_FALSE (ttl0 <= 0))
2655                 {
2656                   /*
2657                    * If the ttl drops below 1 when forwarding, generate
2658                    * an ICMP response.
2659                    */
2660                   error0 = IP4_ERROR_TIME_EXPIRED;
2661                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2662                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2663                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2664                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2665                                                0);
2666                 }
2667             }
2668           else
2669             {
2670               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2671             }
2672
2673           if (do_counters)
2674             vlib_prefetch_combined_counter (&adjacency_counters,
2675                                             thread_index, adj_index0);
2676
2677           /* Guess we are only writing on simple Ethernet header. */
2678           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2679           if (is_mcast)
2680             {
2681               /*
2682                * copy bytes from the IP address into the MAC rewrite
2683                */
2684               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2685             }
2686
2687           /* Update packet buffer attributes/set output interface. */
2688           rw_len0 = adj0[0].rewrite_header.data_bytes;
2689           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2690
2691           if (do_counters)
2692             vlib_increment_combined_counter
2693               (&adjacency_counters,
2694                thread_index, adj_index0, 1,
2695                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2696
2697           /* Check MTU of outgoing interface. */
2698           error0 = (vlib_buffer_length_in_chain (vm, p0)
2699                     > adj0[0].rewrite_header.max_l3_packet_bytes
2700                     ? IP4_ERROR_MTU_EXCEEDED : error0);
2701
2702           p0->error = error_node->errors[error0];
2703
2704           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2705            * to see the IP headerr */
2706           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2707             {
2708               p0->current_data -= rw_len0;
2709               p0->current_length += rw_len0;
2710               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2711
2712               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2713               next0 = adj0[0].rewrite_header.next_index;
2714
2715               if (is_midchain)
2716                 {
2717                   adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2718                 }
2719
2720               if (PREDICT_FALSE
2721                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2722                 vnet_feature_arc_start (lm->output_feature_arc_index,
2723                                         tx_sw_if_index0, &next0, p0);
2724
2725             }
2726
2727           from += 1;
2728           n_left_from -= 1;
2729           to_next += 1;
2730           n_left_to_next -= 1;
2731
2732           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2733                                            to_next, n_left_to_next,
2734                                            pi0, next0);
2735         }
2736
2737       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2738     }
2739
2740   /* Need to do trace after rewrites to pick up new packet data. */
2741   if (node->flags & VLIB_NODE_FLAG_TRACE)
2742     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2743
2744   return frame->n_vectors;
2745 }
2746
2747
2748 /** @brief IPv4 rewrite node.
2749     @node ip4-rewrite
2750
2751     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2752     header checksum, fetch the ip adjacency, check the outbound mtu,
2753     apply the adjacency rewrite, and send pkts to the adjacency
2754     rewrite header's rewrite_next_index.
2755
2756     @param vm vlib_main_t corresponding to the current thread
2757     @param node vlib_node_runtime_t
2758     @param frame vlib_frame_t whose contents should be dispatched
2759
2760     @par Graph mechanics: buffer metadata, next index usage
2761
2762     @em Uses:
2763     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2764         - the rewrite adjacency index
2765     - <code>adj->lookup_next_index</code>
2766         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2767           the packet will be dropped.
2768     - <code>adj->rewrite_header</code>
2769         - Rewrite string length, rewrite string, next_index
2770
2771     @em Sets:
2772     - <code>b->current_data, b->current_length</code>
2773         - Updated net of applying the rewrite string
2774
2775     <em>Next Indices:</em>
2776     - <code> adj->rewrite_header.next_index </code>
2777       or @c error-drop
2778 */
2779 static uword
2780 ip4_rewrite (vlib_main_t * vm,
2781              vlib_node_runtime_t * node, vlib_frame_t * frame)
2782 {
2783   if (adj_are_counters_enabled ())
2784     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2785   else
2786     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2787 }
2788
2789 static uword
2790 ip4_midchain (vlib_main_t * vm,
2791               vlib_node_runtime_t * node, vlib_frame_t * frame)
2792 {
2793   if (adj_are_counters_enabled ())
2794     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2795   else
2796     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2797 }
2798
2799 static uword
2800 ip4_rewrite_mcast (vlib_main_t * vm,
2801                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2802 {
2803   if (adj_are_counters_enabled ())
2804     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2805   else
2806     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2807 }
2808
2809 static uword
2810 ip4_mcast_midchain (vlib_main_t * vm,
2811                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2812 {
2813   if (adj_are_counters_enabled ())
2814     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2815   else
2816     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2817 }
2818
2819 /* *INDENT-OFF* */
2820 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2821   .function = ip4_rewrite,
2822   .name = "ip4-rewrite",
2823   .vector_size = sizeof (u32),
2824
2825   .format_trace = format_ip4_rewrite_trace,
2826
2827   .n_next_nodes = 2,
2828   .next_nodes = {
2829     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2830     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2831   },
2832 };
2833 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2834
2835 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2836   .function = ip4_rewrite_mcast,
2837   .name = "ip4-rewrite-mcast",
2838   .vector_size = sizeof (u32),
2839
2840   .format_trace = format_ip4_rewrite_trace,
2841   .sibling_of = "ip4-rewrite",
2842 };
2843 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2844
2845 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2846   .function = ip4_mcast_midchain,
2847   .name = "ip4-mcast-midchain",
2848   .vector_size = sizeof (u32),
2849
2850   .format_trace = format_ip4_rewrite_trace,
2851   .sibling_of = "ip4-rewrite",
2852 };
2853 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2854
2855 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2856   .function = ip4_midchain,
2857   .name = "ip4-midchain",
2858   .vector_size = sizeof (u32),
2859   .format_trace = format_ip4_forward_next_trace,
2860   .sibling_of =  "ip4-rewrite",
2861 };
2862 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2863 /* *INDENT-ON */
2864
2865 static clib_error_t *
2866 add_del_interface_table (vlib_main_t * vm,
2867                          unformat_input_t * input, vlib_cli_command_t * cmd)
2868 {
2869   vnet_main_t *vnm = vnet_get_main ();
2870   ip_interface_address_t *ia;
2871   clib_error_t *error = 0;
2872   u32 sw_if_index, table_id;
2873
2874   sw_if_index = ~0;
2875
2876   if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2877     {
2878       error = clib_error_return (0, "unknown interface `%U'",
2879                                  format_unformat_error, input);
2880       goto done;
2881     }
2882
2883   if (unformat (input, "%d", &table_id))
2884     ;
2885   else
2886     {
2887       error = clib_error_return (0, "expected table id `%U'",
2888                                  format_unformat_error, input);
2889       goto done;
2890     }
2891
2892   /*
2893    * If the interface already has in IP address, then a change int
2894    * VRF is not allowed. The IP address applied must first be removed.
2895    * We do not do that automatically here, since VPP has no knowledge
2896    * of whether thoses subnets are valid in the destination VRF.
2897    */
2898   /* *INDENT-OFF* */
2899   foreach_ip_interface_address (&ip4_main.lookup_main,
2900                                 ia, sw_if_index,
2901                                 1 /* honor unnumbered */,
2902   ({
2903       ip4_address_t * a;
2904
2905       a = ip_interface_address_get_address (&ip4_main.lookup_main, ia);
2906       error = clib_error_return (0, "interface %U has address %U",
2907                                  format_vnet_sw_if_index_name, vnm,
2908                                  sw_if_index,
2909                                  format_ip4_address, a);
2910       goto done;
2911    }));
2912    /* *INDENT-ON* */
2913
2914 {
2915   ip4_main_t *im = &ip4_main;
2916   u32 fib_index;
2917
2918   fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2919
2920   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2921   im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2922
2923   fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2924   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
2925   im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
2926 }
2927
2928 done:
2929 return error;
2930 }
2931
2932 /*?
2933  * Place the indicated interface into the supplied IPv4 FIB table (also known
2934  * as a VRF). If the FIB table does not exist, this command creates it. To
2935  * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2936  * FIB table will only be displayed if a route has been added to the table, or
2937  * an IP Address is assigned to an interface in the table (which adds a route
2938  * automatically).
2939  *
2940  * @note IP addresses added after setting the interface IP table are added to
2941  * the indicated FIB table. If an IP address is added prior to changing the
2942  * table then this is an error. The control plane must remove these addresses
2943  * first and then change the table. VPP will not automatically move the
2944  * addresses from the old to the new table as it does not know the validity
2945  * of such a change.
2946  *
2947  * @cliexpar
2948  * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2949  * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2950  ?*/
2951 /* *INDENT-OFF* */
2952 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2953 {
2954   .path = "set interface ip table",
2955   .function = add_del_interface_table,
2956   .short_help = "set interface ip table <interface> <table-id>",
2957 };
2958 /* *INDENT-ON* */
2959
2960 int
2961 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2962 {
2963   ip4_fib_mtrie_t *mtrie0;
2964   ip4_fib_mtrie_leaf_t leaf0;
2965   u32 lbi0;
2966
2967   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2968
2969   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2970   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2971   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2972
2973   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2974
2975   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2976 }
2977
2978 static clib_error_t *
2979 test_lookup_command_fn (vlib_main_t * vm,
2980                         unformat_input_t * input, vlib_cli_command_t * cmd)
2981 {
2982   ip4_fib_t *fib;
2983   u32 table_id = 0;
2984   f64 count = 1;
2985   u32 n;
2986   int i;
2987   ip4_address_t ip4_base_address;
2988   u64 errors = 0;
2989
2990   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2991     {
2992       if (unformat (input, "table %d", &table_id))
2993         {
2994           /* Make sure the entry exists. */
2995           fib = ip4_fib_get (table_id);
2996           if ((fib) && (fib->index != table_id))
2997             return clib_error_return (0, "<fib-index> %d does not exist",
2998                                       table_id);
2999         }
3000       else if (unformat (input, "count %f", &count))
3001         ;
3002
3003       else if (unformat (input, "%U",
3004                          unformat_ip4_address, &ip4_base_address))
3005         ;
3006       else
3007         return clib_error_return (0, "unknown input `%U'",
3008                                   format_unformat_error, input);
3009     }
3010
3011   n = count;
3012
3013   for (i = 0; i < n; i++)
3014     {
3015       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3016         errors++;
3017
3018       ip4_base_address.as_u32 =
3019         clib_host_to_net_u32 (1 +
3020                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3021     }
3022
3023   if (errors)
3024     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3025   else
3026     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3027
3028   return 0;
3029 }
3030
3031 /*?
3032  * Perform a lookup of an IPv4 Address (or range of addresses) in the
3033  * given FIB table to determine if there is a conflict with the
3034  * adjacency table. The fib-id can be determined by using the
3035  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3036  * of 0 is used.
3037  *
3038  * @todo This command uses fib-id, other commands use table-id (not
3039  * just a name, they are different indexes). Would like to change this
3040  * to table-id for consistency.
3041  *
3042  * @cliexpar
3043  * Example of how to run the test lookup command:
3044  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3045  * No errors in 2 lookups
3046  * @cliexend
3047 ?*/
3048 /* *INDENT-OFF* */
3049 VLIB_CLI_COMMAND (lookup_test_command, static) =
3050 {
3051   .path = "test lookup",
3052   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3053   .function = test_lookup_command_fn,
3054 };
3055 /* *INDENT-ON* */
3056
3057 int
3058 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3059 {
3060   u32 fib_index;
3061
3062   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
3063
3064   if (~0 == fib_index)
3065     return VNET_API_ERROR_NO_SUCH_FIB;
3066
3067   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
3068                                   flow_hash_config);
3069
3070   return 0;
3071 }
3072
3073 static clib_error_t *
3074 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3075                              unformat_input_t * input,
3076                              vlib_cli_command_t * cmd)
3077 {
3078   int matched = 0;
3079   u32 table_id = 0;
3080   u32 flow_hash_config = 0;
3081   int rv;
3082
3083   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3084     {
3085       if (unformat (input, "table %d", &table_id))
3086         matched = 1;
3087 #define _(a,v) \
3088     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3089       foreach_flow_hash_bit
3090 #undef _
3091         else
3092         break;
3093     }
3094
3095   if (matched == 0)
3096     return clib_error_return (0, "unknown input `%U'",
3097                               format_unformat_error, input);
3098
3099   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3100   switch (rv)
3101     {
3102     case 0:
3103       break;
3104
3105     case VNET_API_ERROR_NO_SUCH_FIB:
3106       return clib_error_return (0, "no such FIB table %d", table_id);
3107
3108     default:
3109       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3110       break;
3111     }
3112
3113   return 0;
3114 }
3115
3116 /*?
3117  * Configure the set of IPv4 fields used by the flow hash.
3118  *
3119  * @cliexpar
3120  * Example of how to set the flow hash on a given table:
3121  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3122  * Example of display the configured flow hash:
3123  * @cliexstart{show ip fib}
3124  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3125  * 0.0.0.0/0
3126  *   unicast-ip4-chain
3127  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3128  *     [0] [@0]: dpo-drop ip6
3129  * 0.0.0.0/32
3130  *   unicast-ip4-chain
3131  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3132  *     [0] [@0]: dpo-drop ip6
3133  * 224.0.0.0/8
3134  *   unicast-ip4-chain
3135  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3136  *     [0] [@0]: dpo-drop ip6
3137  * 6.0.1.2/32
3138  *   unicast-ip4-chain
3139  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3140  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3141  * 7.0.0.1/32
3142  *   unicast-ip4-chain
3143  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3144  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3145  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3146  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3147  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3148  * 240.0.0.0/8
3149  *   unicast-ip4-chain
3150  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3151  *     [0] [@0]: dpo-drop ip6
3152  * 255.255.255.255/32
3153  *   unicast-ip4-chain
3154  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3155  *     [0] [@0]: dpo-drop ip6
3156  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3157  * 0.0.0.0/0
3158  *   unicast-ip4-chain
3159  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3160  *     [0] [@0]: dpo-drop ip6
3161  * 0.0.0.0/32
3162  *   unicast-ip4-chain
3163  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3164  *     [0] [@0]: dpo-drop ip6
3165  * 172.16.1.0/24
3166  *   unicast-ip4-chain
3167  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3168  *     [0] [@4]: ipv4-glean: af_packet0
3169  * 172.16.1.1/32
3170  *   unicast-ip4-chain
3171  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3172  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3173  * 172.16.1.2/32
3174  *   unicast-ip4-chain
3175  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3176  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3177  * 172.16.2.0/24
3178  *   unicast-ip4-chain
3179  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3180  *     [0] [@4]: ipv4-glean: af_packet1
3181  * 172.16.2.1/32
3182  *   unicast-ip4-chain
3183  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3184  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3185  * 224.0.0.0/8
3186  *   unicast-ip4-chain
3187  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3188  *     [0] [@0]: dpo-drop ip6
3189  * 240.0.0.0/8
3190  *   unicast-ip4-chain
3191  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3192  *     [0] [@0]: dpo-drop ip6
3193  * 255.255.255.255/32
3194  *   unicast-ip4-chain
3195  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3196  *     [0] [@0]: dpo-drop ip6
3197  * @cliexend
3198 ?*/
3199 /* *INDENT-OFF* */
3200 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3201 {
3202   .path = "set ip flow-hash",
3203   .short_help =
3204   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3205   .function = set_ip_flow_hash_command_fn,
3206 };
3207 /* *INDENT-ON* */
3208
3209 int
3210 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3211                              u32 table_index)
3212 {
3213   vnet_main_t *vnm = vnet_get_main ();
3214   vnet_interface_main_t *im = &vnm->interface_main;
3215   ip4_main_t *ipm = &ip4_main;
3216   ip_lookup_main_t *lm = &ipm->lookup_main;
3217   vnet_classify_main_t *cm = &vnet_classify_main;
3218   ip4_address_t *if_addr;
3219
3220   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3221     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3222
3223   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3224     return VNET_API_ERROR_NO_SUCH_ENTRY;
3225
3226   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3227   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3228
3229   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3230
3231   if (NULL != if_addr)
3232     {
3233       fib_prefix_t pfx = {
3234         .fp_len = 32,
3235         .fp_proto = FIB_PROTOCOL_IP4,
3236         .fp_addr.ip4 = *if_addr,
3237       };
3238       u32 fib_index;
3239
3240       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3241                                                        sw_if_index);
3242
3243
3244       if (table_index != (u32) ~ 0)
3245         {
3246           dpo_id_t dpo = DPO_INVALID;
3247
3248           dpo_set (&dpo,
3249                    DPO_CLASSIFY,
3250                    DPO_PROTO_IP4,
3251                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3252
3253           fib_table_entry_special_dpo_add (fib_index,
3254                                            &pfx,
3255                                            FIB_SOURCE_CLASSIFY,
3256                                            FIB_ENTRY_FLAG_NONE, &dpo);
3257           dpo_reset (&dpo);
3258         }
3259       else
3260         {
3261           fib_table_entry_special_remove (fib_index,
3262                                           &pfx, FIB_SOURCE_CLASSIFY);
3263         }
3264     }
3265
3266   return 0;
3267 }
3268
3269 static clib_error_t *
3270 set_ip_classify_command_fn (vlib_main_t * vm,
3271                             unformat_input_t * input,
3272                             vlib_cli_command_t * cmd)
3273 {
3274   u32 table_index = ~0;
3275   int table_index_set = 0;
3276   u32 sw_if_index = ~0;
3277   int rv;
3278
3279   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3280     {
3281       if (unformat (input, "table-index %d", &table_index))
3282         table_index_set = 1;
3283       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3284                          vnet_get_main (), &sw_if_index))
3285         ;
3286       else
3287         break;
3288     }
3289
3290   if (table_index_set == 0)
3291     return clib_error_return (0, "classify table-index must be specified");
3292
3293   if (sw_if_index == ~0)
3294     return clib_error_return (0, "interface / subif must be specified");
3295
3296   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3297
3298   switch (rv)
3299     {
3300     case 0:
3301       break;
3302
3303     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3304       return clib_error_return (0, "No such interface");
3305
3306     case VNET_API_ERROR_NO_SUCH_ENTRY:
3307       return clib_error_return (0, "No such classifier table");
3308     }
3309   return 0;
3310 }
3311
3312 /*?
3313  * Assign a classification table to an interface. The classification
3314  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3315  * commands. Once the table is create, use this command to filter packets
3316  * on an interface.
3317  *
3318  * @cliexpar
3319  * Example of how to assign a classification table to an interface:
3320  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3321 ?*/
3322 /* *INDENT-OFF* */
3323 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3324 {
3325     .path = "set ip classify",
3326     .short_help =
3327     "set ip classify intfc <interface> table-index <classify-idx>",
3328     .function = set_ip_classify_command_fn,
3329 };
3330 /* *INDENT-ON* */
3331
3332 /*
3333  * fd.io coding-style-patch-verification: ON
3334  *
3335  * Local Variables:
3336  * eval: (c-set-style "gnu")
3337  * End:
3338  */