DVR: run L3 output features
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/load_balance_map.h>
53 #include <vnet/dpo/classify_dpo.h>
54 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
55
56 /**
57  * @file
58  * @brief IPv4 Forwarding.
59  *
60  * This file contains the source code for IPv4 forwarding.
61  */
62
63 always_inline uword
64 ip4_lookup_inline (vlib_main_t * vm,
65                    vlib_node_runtime_t * node,
66                    vlib_frame_t * frame,
67                    int lookup_for_responses_to_locally_received_packets)
68 {
69   ip4_main_t *im = &ip4_main;
70   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
71   u32 n_left_from, n_left_to_next, *from, *to_next;
72   ip_lookup_next_t next;
73   u32 thread_index = vlib_get_thread_index ();
74
75   from = vlib_frame_vector_args (frame);
76   n_left_from = frame->n_vectors;
77   next = node->cached_next_index;
78
79   while (n_left_from > 0)
80     {
81       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
82
83       while (n_left_from >= 8 && n_left_to_next >= 4)
84         {
85           vlib_buffer_t *p0, *p1, *p2, *p3;
86           ip4_header_t *ip0, *ip1, *ip2, *ip3;
87           ip_lookup_next_t next0, next1, next2, next3;
88           const load_balance_t *lb0, *lb1, *lb2, *lb3;
89           ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
90           ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
91           ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
92           u32 pi0, fib_index0, lb_index0;
93           u32 pi1, fib_index1, lb_index1;
94           u32 pi2, fib_index2, lb_index2;
95           u32 pi3, fib_index3, lb_index3;
96           flow_hash_config_t flow_hash_config0, flow_hash_config1;
97           flow_hash_config_t flow_hash_config2, flow_hash_config3;
98           u32 hash_c0, hash_c1, hash_c2, hash_c3;
99           const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
100
101           /* Prefetch next iteration. */
102           {
103             vlib_buffer_t *p4, *p5, *p6, *p7;
104
105             p4 = vlib_get_buffer (vm, from[4]);
106             p5 = vlib_get_buffer (vm, from[5]);
107             p6 = vlib_get_buffer (vm, from[6]);
108             p7 = vlib_get_buffer (vm, from[7]);
109
110             vlib_prefetch_buffer_header (p4, LOAD);
111             vlib_prefetch_buffer_header (p5, LOAD);
112             vlib_prefetch_buffer_header (p6, LOAD);
113             vlib_prefetch_buffer_header (p7, LOAD);
114
115             CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
116             CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
117             CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
118             CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
119           }
120
121           pi0 = to_next[0] = from[0];
122           pi1 = to_next[1] = from[1];
123           pi2 = to_next[2] = from[2];
124           pi3 = to_next[3] = from[3];
125
126           from += 4;
127           to_next += 4;
128           n_left_to_next -= 4;
129           n_left_from -= 4;
130
131           p0 = vlib_get_buffer (vm, pi0);
132           p1 = vlib_get_buffer (vm, pi1);
133           p2 = vlib_get_buffer (vm, pi2);
134           p3 = vlib_get_buffer (vm, pi3);
135
136           ip0 = vlib_buffer_get_current (p0);
137           ip1 = vlib_buffer_get_current (p1);
138           ip2 = vlib_buffer_get_current (p2);
139           ip3 = vlib_buffer_get_current (p3);
140
141           dst_addr0 = &ip0->dst_address;
142           dst_addr1 = &ip1->dst_address;
143           dst_addr2 = &ip2->dst_address;
144           dst_addr3 = &ip3->dst_address;
145
146           fib_index0 =
147             vec_elt (im->fib_index_by_sw_if_index,
148                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
149           fib_index1 =
150             vec_elt (im->fib_index_by_sw_if_index,
151                      vnet_buffer (p1)->sw_if_index[VLIB_RX]);
152           fib_index2 =
153             vec_elt (im->fib_index_by_sw_if_index,
154                      vnet_buffer (p2)->sw_if_index[VLIB_RX]);
155           fib_index3 =
156             vec_elt (im->fib_index_by_sw_if_index,
157                      vnet_buffer (p3)->sw_if_index[VLIB_RX]);
158           fib_index0 =
159             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
160              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
161           fib_index1 =
162             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
163              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
164           fib_index2 =
165             (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
166              (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
167           fib_index3 =
168             (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
169              (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
170
171
172           if (!lookup_for_responses_to_locally_received_packets)
173             {
174               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
175               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
176               mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
177               mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
178
179               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
180               leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
181               leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
182               leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
183             }
184
185           if (!lookup_for_responses_to_locally_received_packets)
186             {
187               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
188               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
189               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
190               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
191             }
192
193           if (!lookup_for_responses_to_locally_received_packets)
194             {
195               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
196               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
197               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
198               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
199             }
200
201           if (lookup_for_responses_to_locally_received_packets)
202             {
203               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
204               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
205               lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
206               lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
207             }
208           else
209             {
210               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
211               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
212               lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
213               lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
214             }
215
216           ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
217           lb0 = load_balance_get (lb_index0);
218           lb1 = load_balance_get (lb_index1);
219           lb2 = load_balance_get (lb_index2);
220           lb3 = load_balance_get (lb_index3);
221
222           ASSERT (lb0->lb_n_buckets > 0);
223           ASSERT (is_pow2 (lb0->lb_n_buckets));
224           ASSERT (lb1->lb_n_buckets > 0);
225           ASSERT (is_pow2 (lb1->lb_n_buckets));
226           ASSERT (lb2->lb_n_buckets > 0);
227           ASSERT (is_pow2 (lb2->lb_n_buckets));
228           ASSERT (lb3->lb_n_buckets > 0);
229           ASSERT (is_pow2 (lb3->lb_n_buckets));
230
231           /* Use flow hash to compute multipath adjacency. */
232           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
233           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
234           hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
235           hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
236           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
237             {
238               flow_hash_config0 = lb0->lb_hash_config;
239               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, flow_hash_config0);
241               dpo0 =
242                 load_balance_get_fwd_bucket (lb0,
243                                              (hash_c0 &
244                                               (lb0->lb_n_buckets_minus_1)));
245             }
246           else
247             {
248               dpo0 = load_balance_get_bucket_i (lb0, 0);
249             }
250           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
251             {
252               flow_hash_config1 = lb1->lb_hash_config;
253               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
254                 ip4_compute_flow_hash (ip1, flow_hash_config1);
255               dpo1 =
256                 load_balance_get_fwd_bucket (lb1,
257                                              (hash_c1 &
258                                               (lb1->lb_n_buckets_minus_1)));
259             }
260           else
261             {
262               dpo1 = load_balance_get_bucket_i (lb1, 0);
263             }
264           if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
265             {
266               flow_hash_config2 = lb2->lb_hash_config;
267               hash_c2 = vnet_buffer (p2)->ip.flow_hash =
268                 ip4_compute_flow_hash (ip2, flow_hash_config2);
269               dpo2 =
270                 load_balance_get_fwd_bucket (lb2,
271                                              (hash_c2 &
272                                               (lb2->lb_n_buckets_minus_1)));
273             }
274           else
275             {
276               dpo2 = load_balance_get_bucket_i (lb2, 0);
277             }
278           if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
279             {
280               flow_hash_config3 = lb3->lb_hash_config;
281               hash_c3 = vnet_buffer (p3)->ip.flow_hash =
282                 ip4_compute_flow_hash (ip3, flow_hash_config3);
283               dpo3 =
284                 load_balance_get_fwd_bucket (lb3,
285                                              (hash_c3 &
286                                               (lb3->lb_n_buckets_minus_1)));
287             }
288           else
289             {
290               dpo3 = load_balance_get_bucket_i (lb3, 0);
291             }
292
293           next0 = dpo0->dpoi_next_node;
294           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
295           next1 = dpo1->dpoi_next_node;
296           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
297           next2 = dpo2->dpoi_next_node;
298           vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
299           next3 = dpo3->dpoi_next_node;
300           vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
301
302           vlib_increment_combined_counter
303             (cm, thread_index, lb_index0, 1,
304              vlib_buffer_length_in_chain (vm, p0));
305           vlib_increment_combined_counter
306             (cm, thread_index, lb_index1, 1,
307              vlib_buffer_length_in_chain (vm, p1));
308           vlib_increment_combined_counter
309             (cm, thread_index, lb_index2, 1,
310              vlib_buffer_length_in_chain (vm, p2));
311           vlib_increment_combined_counter
312             (cm, thread_index, lb_index3, 1,
313              vlib_buffer_length_in_chain (vm, p3));
314
315           vlib_validate_buffer_enqueue_x4 (vm, node, next,
316                                            to_next, n_left_to_next,
317                                            pi0, pi1, pi2, pi3,
318                                            next0, next1, next2, next3);
319         }
320
321       while (n_left_from > 0 && n_left_to_next > 0)
322         {
323           vlib_buffer_t *p0;
324           ip4_header_t *ip0;
325           ip_lookup_next_t next0;
326           const load_balance_t *lb0;
327           ip4_fib_mtrie_t *mtrie0;
328           ip4_fib_mtrie_leaf_t leaf0;
329           ip4_address_t *dst_addr0;
330           u32 pi0, fib_index0, lbi0;
331           flow_hash_config_t flow_hash_config0;
332           const dpo_id_t *dpo0;
333           u32 hash_c0;
334
335           pi0 = from[0];
336           to_next[0] = pi0;
337
338           p0 = vlib_get_buffer (vm, pi0);
339
340           ip0 = vlib_buffer_get_current (p0);
341
342           dst_addr0 = &ip0->dst_address;
343
344           fib_index0 =
345             vec_elt (im->fib_index_by_sw_if_index,
346                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
347           fib_index0 =
348             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
349              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
350
351           if (!lookup_for_responses_to_locally_received_packets)
352             {
353               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
354
355               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
356             }
357
358           if (!lookup_for_responses_to_locally_received_packets)
359             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
360
361           if (!lookup_for_responses_to_locally_received_packets)
362             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
363
364           if (lookup_for_responses_to_locally_received_packets)
365             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
366           else
367             {
368               /* Handle default route. */
369               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
370             }
371
372           ASSERT (lbi0);
373           lb0 = load_balance_get (lbi0);
374
375           ASSERT (lb0->lb_n_buckets > 0);
376           ASSERT (is_pow2 (lb0->lb_n_buckets));
377
378           /* Use flow hash to compute multipath adjacency. */
379           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
380           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
381             {
382               flow_hash_config0 = lb0->lb_hash_config;
383
384               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
385                 ip4_compute_flow_hash (ip0, flow_hash_config0);
386               dpo0 =
387                 load_balance_get_fwd_bucket (lb0,
388                                              (hash_c0 &
389                                               (lb0->lb_n_buckets_minus_1)));
390             }
391           else
392             {
393               dpo0 = load_balance_get_bucket_i (lb0, 0);
394             }
395
396           next0 = dpo0->dpoi_next_node;
397           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
398
399           vlib_increment_combined_counter (cm, thread_index, lbi0, 1,
400                                            vlib_buffer_length_in_chain (vm,
401                                                                         p0));
402
403           from += 1;
404           to_next += 1;
405           n_left_to_next -= 1;
406           n_left_from -= 1;
407
408           if (PREDICT_FALSE (next0 != next))
409             {
410               n_left_to_next += 1;
411               vlib_put_next_frame (vm, node, next, n_left_to_next);
412               next = next0;
413               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
414               to_next[0] = pi0;
415               to_next += 1;
416               n_left_to_next -= 1;
417             }
418         }
419
420       vlib_put_next_frame (vm, node, next, n_left_to_next);
421     }
422
423   if (node->flags & VLIB_NODE_FLAG_TRACE)
424     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
425
426   return frame->n_vectors;
427 }
428
429 /** @brief IPv4 lookup node.
430     @node ip4-lookup
431
432     This is the main IPv4 lookup dispatch node.
433
434     @param vm vlib_main_t corresponding to the current thread
435     @param node vlib_node_runtime_t
436     @param frame vlib_frame_t whose contents should be dispatched
437
438     @par Graph mechanics: buffer metadata, next index usage
439
440     @em Uses:
441     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
442         - Indicates the @c sw_if_index value of the interface that the
443           packet was received on.
444     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
445         - When the value is @c ~0 then the node performs a longest prefix
446           match (LPM) for the packet destination address in the FIB attached
447           to the receive interface.
448         - Otherwise perform LPM for the packet destination address in the
449           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
450           value (0, 1, ...) and not a VRF id.
451
452     @em Sets:
453     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
454         - The lookup result adjacency index.
455
456     <em>Next Index:</em>
457     - Dispatches the packet to the node index found in
458       ip_adjacency_t @c adj->lookup_next_index
459       (where @c adj is the lookup result adjacency).
460 */
461 static uword
462 ip4_lookup (vlib_main_t * vm,
463             vlib_node_runtime_t * node, vlib_frame_t * frame)
464 {
465   return ip4_lookup_inline (vm, node, frame,
466                             /* lookup_for_responses_to_locally_received_packets */
467                             0);
468
469 }
470
471 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
472
473 /* *INDENT-OFF* */
474 VLIB_REGISTER_NODE (ip4_lookup_node) =
475 {
476   .function = ip4_lookup,
477   .name = "ip4-lookup",
478   .vector_size = sizeof (u32),
479   .format_trace = format_ip4_lookup_trace,
480   .n_next_nodes = IP_LOOKUP_N_NEXT,
481   .next_nodes = IP4_LOOKUP_NEXT_NODES,
482 };
483 /* *INDENT-ON* */
484
485 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
486
487 always_inline uword
488 ip4_load_balance (vlib_main_t * vm,
489                   vlib_node_runtime_t * node, vlib_frame_t * frame)
490 {
491   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
492   u32 n_left_from, n_left_to_next, *from, *to_next;
493   ip_lookup_next_t next;
494   u32 thread_index = vlib_get_thread_index ();
495
496   from = vlib_frame_vector_args (frame);
497   n_left_from = frame->n_vectors;
498   next = node->cached_next_index;
499
500   if (node->flags & VLIB_NODE_FLAG_TRACE)
501     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
502
503   while (n_left_from > 0)
504     {
505       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
506
507
508       while (n_left_from >= 4 && n_left_to_next >= 2)
509         {
510           ip_lookup_next_t next0, next1;
511           const load_balance_t *lb0, *lb1;
512           vlib_buffer_t *p0, *p1;
513           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
514           const ip4_header_t *ip0, *ip1;
515           const dpo_id_t *dpo0, *dpo1;
516
517           /* Prefetch next iteration. */
518           {
519             vlib_buffer_t *p2, *p3;
520
521             p2 = vlib_get_buffer (vm, from[2]);
522             p3 = vlib_get_buffer (vm, from[3]);
523
524             vlib_prefetch_buffer_header (p2, STORE);
525             vlib_prefetch_buffer_header (p3, STORE);
526
527             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
528             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
529           }
530
531           pi0 = to_next[0] = from[0];
532           pi1 = to_next[1] = from[1];
533
534           from += 2;
535           n_left_from -= 2;
536           to_next += 2;
537           n_left_to_next -= 2;
538
539           p0 = vlib_get_buffer (vm, pi0);
540           p1 = vlib_get_buffer (vm, pi1);
541
542           ip0 = vlib_buffer_get_current (p0);
543           ip1 = vlib_buffer_get_current (p1);
544           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
545           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
546
547           lb0 = load_balance_get (lbi0);
548           lb1 = load_balance_get (lbi1);
549
550           /*
551            * this node is for via FIBs we can re-use the hash value from the
552            * to node if present.
553            * We don't want to use the same hash value at each level in the recursion
554            * graph as that would lead to polarisation
555            */
556           hc0 = hc1 = 0;
557
558           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
559             {
560               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
561                 {
562                   hc0 = vnet_buffer (p0)->ip.flow_hash =
563                     vnet_buffer (p0)->ip.flow_hash >> 1;
564                 }
565               else
566                 {
567                   hc0 = vnet_buffer (p0)->ip.flow_hash =
568                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
569                 }
570               dpo0 = load_balance_get_fwd_bucket
571                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
572             }
573           else
574             {
575               dpo0 = load_balance_get_bucket_i (lb0, 0);
576             }
577           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
578             {
579               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
580                 {
581                   hc1 = vnet_buffer (p1)->ip.flow_hash =
582                     vnet_buffer (p1)->ip.flow_hash >> 1;
583                 }
584               else
585                 {
586                   hc1 = vnet_buffer (p1)->ip.flow_hash =
587                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
588                 }
589               dpo1 = load_balance_get_fwd_bucket
590                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
591             }
592           else
593             {
594               dpo1 = load_balance_get_bucket_i (lb1, 0);
595             }
596
597           next0 = dpo0->dpoi_next_node;
598           next1 = dpo1->dpoi_next_node;
599
600           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
601           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
602
603           vlib_increment_combined_counter
604             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
605           vlib_increment_combined_counter
606             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
607
608           vlib_validate_buffer_enqueue_x2 (vm, node, next,
609                                            to_next, n_left_to_next,
610                                            pi0, pi1, next0, next1);
611         }
612
613       while (n_left_from > 0 && n_left_to_next > 0)
614         {
615           ip_lookup_next_t next0;
616           const load_balance_t *lb0;
617           vlib_buffer_t *p0;
618           u32 pi0, lbi0, hc0;
619           const ip4_header_t *ip0;
620           const dpo_id_t *dpo0;
621
622           pi0 = from[0];
623           to_next[0] = pi0;
624           from += 1;
625           to_next += 1;
626           n_left_to_next -= 1;
627           n_left_from -= 1;
628
629           p0 = vlib_get_buffer (vm, pi0);
630
631           ip0 = vlib_buffer_get_current (p0);
632           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
633
634           lb0 = load_balance_get (lbi0);
635
636           hc0 = 0;
637           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
638             {
639               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
640                 {
641                   hc0 = vnet_buffer (p0)->ip.flow_hash =
642                     vnet_buffer (p0)->ip.flow_hash >> 1;
643                 }
644               else
645                 {
646                   hc0 = vnet_buffer (p0)->ip.flow_hash =
647                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
648                 }
649               dpo0 = load_balance_get_fwd_bucket
650                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
651             }
652           else
653             {
654               dpo0 = load_balance_get_bucket_i (lb0, 0);
655             }
656
657           next0 = dpo0->dpoi_next_node;
658           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
659
660           vlib_increment_combined_counter
661             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
662
663           vlib_validate_buffer_enqueue_x1 (vm, node, next,
664                                            to_next, n_left_to_next,
665                                            pi0, next0);
666         }
667
668       vlib_put_next_frame (vm, node, next, n_left_to_next);
669     }
670
671   return frame->n_vectors;
672 }
673
674 /* *INDENT-OFF* */
675 VLIB_REGISTER_NODE (ip4_load_balance_node) =
676 {
677   .function = ip4_load_balance,
678   .name = "ip4-load-balance",
679   .vector_size = sizeof (u32),
680   .sibling_of = "ip4-lookup",
681   .format_trace =
682   format_ip4_lookup_trace,
683 };
684 /* *INDENT-ON* */
685
686 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
687
688 /* get first interface address */
689 ip4_address_t *
690 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
691                              ip_interface_address_t ** result_ia)
692 {
693   ip_lookup_main_t *lm = &im->lookup_main;
694   ip_interface_address_t *ia = 0;
695   ip4_address_t *result = 0;
696
697   /* *INDENT-OFF* */
698   foreach_ip_interface_address
699     (lm, ia, sw_if_index,
700      1 /* honor unnumbered */ ,
701      ({
702        ip4_address_t * a =
703          ip_interface_address_get_address (lm, ia);
704        result = a;
705        break;
706      }));
707   /* *INDENT-OFF* */
708   if (result_ia)
709     *result_ia = result ? ia : 0;
710   return result;
711 }
712
713 static void
714 ip4_add_interface_routes (u32 sw_if_index,
715                           ip4_main_t * im, u32 fib_index,
716                           ip_interface_address_t * a)
717 {
718   ip_lookup_main_t *lm = &im->lookup_main;
719   ip4_address_t *address = ip_interface_address_get_address (lm, a);
720   fib_prefix_t pfx = {
721     .fp_len = a->address_length,
722     .fp_proto = FIB_PROTOCOL_IP4,
723     .fp_addr.ip4 = *address,
724   };
725
726   if (pfx.fp_len <= 30)
727     {
728       /* a /30 or shorter - add a glean for the network address */
729       fib_table_entry_update_one_path (fib_index, &pfx,
730                                        FIB_SOURCE_INTERFACE,
731                                        (FIB_ENTRY_FLAG_CONNECTED |
732                                         FIB_ENTRY_FLAG_ATTACHED),
733                                        DPO_PROTO_IP4,
734                                        /* No next-hop address */
735                                        NULL,
736                                        sw_if_index,
737                                        // invalid FIB index
738                                        ~0,
739                                        1,
740                                        // no out-label stack
741                                        NULL,
742                                        FIB_ROUTE_PATH_FLAG_NONE);
743
744       /* Add the two broadcast addresses as drop */
745       fib_prefix_t net_pfx = {
746         .fp_len = 32,
747         .fp_proto = FIB_PROTOCOL_IP4,
748         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
749       };
750       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
751         fib_table_entry_special_add(fib_index,
752                                     &net_pfx,
753                                     FIB_SOURCE_INTERFACE,
754                                     (FIB_ENTRY_FLAG_DROP |
755                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
756       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
757       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
758         fib_table_entry_special_add(fib_index,
759                                     &net_pfx,
760                                     FIB_SOURCE_INTERFACE,
761                                     (FIB_ENTRY_FLAG_DROP |
762                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
763     }
764   else if (pfx.fp_len == 31)
765     {
766       u32 mask = clib_host_to_net_u32(1);
767       fib_prefix_t net_pfx = pfx;
768
769       net_pfx.fp_len = 32;
770       net_pfx.fp_addr.ip4.as_u32 ^= mask;
771
772       /* a /31 - add the other end as an attached host */
773       fib_table_entry_update_one_path (fib_index, &net_pfx,
774                                        FIB_SOURCE_INTERFACE,
775                                        (FIB_ENTRY_FLAG_ATTACHED),
776                                        DPO_PROTO_IP4,
777                                        &net_pfx.fp_addr,
778                                        sw_if_index,
779                                        // invalid FIB index
780                                        ~0,
781                                        1,
782                                        NULL,
783                                        FIB_ROUTE_PATH_FLAG_NONE);
784     }
785   pfx.fp_len = 32;
786
787   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
788     {
789       u32 classify_table_index =
790         lm->classify_table_index_by_sw_if_index[sw_if_index];
791       if (classify_table_index != (u32) ~ 0)
792         {
793           dpo_id_t dpo = DPO_INVALID;
794
795           dpo_set (&dpo,
796                    DPO_CLASSIFY,
797                    DPO_PROTO_IP4,
798                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
799
800           fib_table_entry_special_dpo_add (fib_index,
801                                            &pfx,
802                                            FIB_SOURCE_CLASSIFY,
803                                            FIB_ENTRY_FLAG_NONE, &dpo);
804           dpo_reset (&dpo);
805         }
806     }
807
808   fib_table_entry_update_one_path (fib_index, &pfx,
809                                    FIB_SOURCE_INTERFACE,
810                                    (FIB_ENTRY_FLAG_CONNECTED |
811                                     FIB_ENTRY_FLAG_LOCAL),
812                                    DPO_PROTO_IP4,
813                                    &pfx.fp_addr,
814                                    sw_if_index,
815                                    // invalid FIB index
816                                    ~0,
817                                    1, NULL,
818                                    FIB_ROUTE_PATH_FLAG_NONE);
819 }
820
821 static void
822 ip4_del_interface_routes (ip4_main_t * im,
823                           u32 fib_index,
824                           ip4_address_t * address, u32 address_length)
825 {
826   fib_prefix_t pfx = {
827     .fp_len = address_length,
828     .fp_proto = FIB_PROTOCOL_IP4,
829     .fp_addr.ip4 = *address,
830   };
831
832   if (pfx.fp_len <= 30)
833     {
834       fib_prefix_t net_pfx = {
835         .fp_len = 32,
836         .fp_proto = FIB_PROTOCOL_IP4,
837         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
838       };
839       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
840         fib_table_entry_special_remove(fib_index,
841                                        &net_pfx,
842                                        FIB_SOURCE_INTERFACE);
843       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
844       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
845         fib_table_entry_special_remove(fib_index,
846                                        &net_pfx,
847                                        FIB_SOURCE_INTERFACE);
848       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
849     }
850     else if (pfx.fp_len == 31)
851     {
852       u32 mask = clib_host_to_net_u32(1);
853       fib_prefix_t net_pfx = pfx;
854
855       net_pfx.fp_len = 32;
856       net_pfx.fp_addr.ip4.as_u32 ^= mask;
857
858       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
859     }
860
861   pfx.fp_len = 32;
862   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
863 }
864
865 void
866 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
867 {
868   ip4_main_t *im = &ip4_main;
869
870   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
871
872   /*
873    * enable/disable only on the 1<->0 transition
874    */
875   if (is_enable)
876     {
877       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
878         return;
879     }
880   else
881     {
882       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
883       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
884         return;
885     }
886   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
887                                !is_enable, 0, 0);
888
889
890   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
891                                sw_if_index, !is_enable, 0, 0);
892 }
893
894 static clib_error_t *
895 ip4_add_del_interface_address_internal (vlib_main_t * vm,
896                                         u32 sw_if_index,
897                                         ip4_address_t * address,
898                                         u32 address_length, u32 is_del)
899 {
900   vnet_main_t *vnm = vnet_get_main ();
901   ip4_main_t *im = &ip4_main;
902   ip_lookup_main_t *lm = &im->lookup_main;
903   clib_error_t *error = 0;
904   u32 if_address_index, elts_before;
905   ip4_address_fib_t ip4_af, *addr_fib = 0;
906
907   /* local0 interface doesn't support IP addressing  */
908   if (sw_if_index == 0)
909     {
910       return
911        clib_error_create ("local0 interface doesn't support IP addressing");
912     }
913
914   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
915   ip4_addr_fib_init (&ip4_af, address,
916                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
917   vec_add1 (addr_fib, ip4_af);
918
919   /* FIXME-LATER
920    * there is no support for adj-fib handling in the presence of overlapping
921    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
922    * most routers do.
923    */
924   /* *INDENT-OFF* */
925   if (!is_del)
926     {
927       /* When adding an address check that it does not conflict
928          with an existing address. */
929       ip_interface_address_t *ia;
930       foreach_ip_interface_address
931         (&im->lookup_main, ia, sw_if_index,
932          0 /* honor unnumbered */ ,
933          ({
934            ip4_address_t * x =
935              ip_interface_address_get_address
936              (&im->lookup_main, ia);
937            if (ip4_destination_matches_route
938                (im, address, x, ia->address_length) ||
939                ip4_destination_matches_route (im,
940                                               x,
941                                               address,
942                                               address_length))
943              return
944                clib_error_create
945                ("failed to add %U which conflicts with %U for interface %U",
946                 format_ip4_address_and_length, address,
947                 address_length,
948                 format_ip4_address_and_length, x,
949                 ia->address_length,
950                 format_vnet_sw_if_index_name, vnm,
951                 sw_if_index);
952          }));
953     }
954   /* *INDENT-ON* */
955
956   elts_before = pool_elts (lm->if_address_pool);
957
958   error = ip_interface_address_add_del
959     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
960   if (error)
961     goto done;
962
963   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
964
965   if (is_del)
966     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
967   else
968     ip4_add_interface_routes (sw_if_index,
969                               im, ip4_af.fib_index,
970                               pool_elt_at_index
971                               (lm->if_address_pool, if_address_index));
972
973   /* If pool did not grow/shrink: add duplicate address. */
974   if (elts_before != pool_elts (lm->if_address_pool))
975     {
976       ip4_add_del_interface_address_callback_t *cb;
977       vec_foreach (cb, im->add_del_interface_address_callbacks)
978         cb->function (im, cb->function_opaque, sw_if_index,
979                       address, address_length, if_address_index, is_del);
980     }
981
982 done:
983   vec_free (addr_fib);
984   return error;
985 }
986
987 clib_error_t *
988 ip4_add_del_interface_address (vlib_main_t * vm,
989                                u32 sw_if_index,
990                                ip4_address_t * address,
991                                u32 address_length, u32 is_del)
992 {
993   return ip4_add_del_interface_address_internal
994     (vm, sw_if_index, address, address_length, is_del);
995 }
996
997 /* Built-in ip4 unicast rx feature path definition */
998 /* *INDENT-OFF* */
999 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
1000 {
1001   .arc_name = "ip4-unicast",
1002   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1003   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
1004 };
1005
1006 VNET_FEATURE_INIT (ip4_flow_classify, static) =
1007 {
1008   .arc_name = "ip4-unicast",
1009   .node_name = "ip4-flow-classify",
1010   .runs_before = VNET_FEATURES ("ip4-inacl"),
1011 };
1012
1013 VNET_FEATURE_INIT (ip4_inacl, static) =
1014 {
1015   .arc_name = "ip4-unicast",
1016   .node_name = "ip4-inacl",
1017   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
1018 };
1019
1020 VNET_FEATURE_INIT (ip4_source_check_1, static) =
1021 {
1022   .arc_name = "ip4-unicast",
1023   .node_name = "ip4-source-check-via-rx",
1024   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
1025 };
1026
1027 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1028 {
1029   .arc_name = "ip4-unicast",
1030   .node_name = "ip4-source-check-via-any",
1031   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1032 };
1033
1034 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1035 {
1036   .arc_name = "ip4-unicast",
1037   .node_name = "ip4-source-and-port-range-check-rx",
1038   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1039 };
1040
1041 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1042 {
1043   .arc_name = "ip4-unicast",
1044   .node_name = "ip4-policer-classify",
1045   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1046 };
1047
1048 VNET_FEATURE_INIT (ip4_ipsec, static) =
1049 {
1050   .arc_name = "ip4-unicast",
1051   .node_name = "ipsec-input-ip4",
1052   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1053 };
1054
1055 VNET_FEATURE_INIT (ip4_vpath, static) =
1056 {
1057   .arc_name = "ip4-unicast",
1058   .node_name = "vpath-input-ip4",
1059   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1060 };
1061
1062 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1063 {
1064   .arc_name = "ip4-unicast",
1065   .node_name = "ip4-vxlan-bypass",
1066   .runs_before = VNET_FEATURES ("ip4-lookup"),
1067 };
1068
1069 VNET_FEATURE_INIT (ip4_drop, static) =
1070 {
1071   .arc_name = "ip4-unicast",
1072   .node_name = "ip4-drop",
1073   .runs_before = VNET_FEATURES ("ip4-lookup"),
1074 };
1075
1076 VNET_FEATURE_INIT (ip4_lookup, static) =
1077 {
1078   .arc_name = "ip4-unicast",
1079   .node_name = "ip4-lookup",
1080   .runs_before = 0,     /* not before any other features */
1081 };
1082
1083 /* Built-in ip4 multicast rx feature path definition */
1084 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1085 {
1086   .arc_name = "ip4-multicast",
1087   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1088   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1089 };
1090
1091 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1092 {
1093   .arc_name = "ip4-multicast",
1094   .node_name = "vpath-input-ip4",
1095   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1096 };
1097
1098 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1099 {
1100   .arc_name = "ip4-multicast",
1101   .node_name = "ip4-drop",
1102   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1103 };
1104
1105 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1106 {
1107   .arc_name = "ip4-multicast",
1108   .node_name = "ip4-mfib-forward-lookup",
1109   .runs_before = 0,     /* last feature */
1110 };
1111
1112 /* Source and port-range check ip4 tx feature path definition */
1113 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1114 {
1115   .arc_name = "ip4-output",
1116   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1117   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1118 };
1119
1120 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1121 {
1122   .arc_name = "ip4-output",
1123   .node_name = "ip4-source-and-port-range-check-tx",
1124   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1125 };
1126
1127 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1128 {
1129   .arc_name = "ip4-output",
1130   .node_name = "ipsec-output-ip4",
1131   .runs_before = VNET_FEATURES ("interface-output"),
1132 };
1133
1134 /* Built-in ip4 tx feature path definition */
1135 VNET_FEATURE_INIT (ip4_interface_output, static) =
1136 {
1137   .arc_name = "ip4-output",
1138   .node_name = "interface-output",
1139   .runs_before = 0,     /* not before any other features */
1140 };
1141 /* *INDENT-ON* */
1142
1143 static clib_error_t *
1144 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1145 {
1146   ip4_main_t *im = &ip4_main;
1147
1148   /* Fill in lookup tables with default table (0). */
1149   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1150   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1151
1152   if (!is_add)
1153     {
1154       ip4_main_t *im4 = &ip4_main;
1155       ip_lookup_main_t *lm4 = &im4->lookup_main;
1156       ip_interface_address_t *ia = 0;
1157       ip4_address_t *address;
1158       vlib_main_t *vm = vlib_get_main ();
1159
1160       /* *INDENT-OFF* */
1161       foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */,
1162       ({
1163         address = ip_interface_address_get_address (lm4, ia);
1164         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1165       }));
1166       /* *INDENT-ON* */
1167     }
1168
1169   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1170                                is_add, 0, 0);
1171
1172   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1173                                is_add, 0, 0);
1174
1175   return /* no error */ 0;
1176 }
1177
1178 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1179
1180 /* Global IP4 main. */
1181 ip4_main_t ip4_main;
1182
1183 clib_error_t *
1184 ip4_lookup_init (vlib_main_t * vm)
1185 {
1186   ip4_main_t *im = &ip4_main;
1187   clib_error_t *error;
1188   uword i;
1189
1190   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1191     return error;
1192   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1193     return (error);
1194   if ((error = vlib_call_init_function (vm, fib_module_init)))
1195     return error;
1196   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1197     return error;
1198
1199   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1200     {
1201       u32 m;
1202
1203       if (i < 32)
1204         m = pow2_mask (i) << (32 - i);
1205       else
1206         m = ~0;
1207       im->fib_masks[i] = clib_host_to_net_u32 (m);
1208     }
1209
1210   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1211
1212   /* Create FIB with index 0 and table id of 0. */
1213   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1214                                      FIB_SOURCE_DEFAULT_ROUTE);
1215   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1216                                       MFIB_SOURCE_DEFAULT_ROUTE);
1217
1218   {
1219     pg_node_t *pn;
1220     pn = pg_get_node (ip4_lookup_node.index);
1221     pn->unformat_edit = unformat_pg_ip4_header;
1222   }
1223
1224   {
1225     ethernet_arp_header_t h;
1226
1227     memset (&h, 0, sizeof (h));
1228
1229     /* Set target ethernet address to all zeros. */
1230     memset (h.ip4_over_ethernet[1].ethernet, 0,
1231             sizeof (h.ip4_over_ethernet[1].ethernet));
1232
1233 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1234 #define _8(f,v) h.f = v;
1235     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1236     _16 (l3_type, ETHERNET_TYPE_IP4);
1237     _8 (n_l2_address_bytes, 6);
1238     _8 (n_l3_address_bytes, 4);
1239     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1240 #undef _16
1241 #undef _8
1242
1243     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1244                                /* data */ &h,
1245                                sizeof (h),
1246                                /* alloc chunk size */ 8,
1247                                "ip4 arp");
1248   }
1249
1250   return error;
1251 }
1252
1253 VLIB_INIT_FUNCTION (ip4_lookup_init);
1254
1255 typedef struct
1256 {
1257   /* Adjacency taken. */
1258   u32 dpo_index;
1259   u32 flow_hash;
1260   u32 fib_index;
1261
1262   /* Packet data, possibly *after* rewrite. */
1263   u8 packet_data[64 - 1 * sizeof (u32)];
1264 }
1265 ip4_forward_next_trace_t;
1266
1267 u8 *
1268 format_ip4_forward_next_trace (u8 * s, va_list * args)
1269 {
1270   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1271   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1272   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1273   u32 indent = format_get_indent (s);
1274   s = format (s, "%U%U",
1275               format_white_space, indent,
1276               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1277   return s;
1278 }
1279
1280 static u8 *
1281 format_ip4_lookup_trace (u8 * s, va_list * args)
1282 {
1283   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1284   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1285   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1286   u32 indent = format_get_indent (s);
1287
1288   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1289               t->fib_index, t->dpo_index, t->flow_hash);
1290   s = format (s, "\n%U%U",
1291               format_white_space, indent,
1292               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1293   return s;
1294 }
1295
1296 static u8 *
1297 format_ip4_rewrite_trace (u8 * s, va_list * args)
1298 {
1299   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1300   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1301   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1302   u32 indent = format_get_indent (s);
1303
1304   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1305               t->fib_index, t->dpo_index, format_ip_adjacency,
1306               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1307   s = format (s, "\n%U%U",
1308               format_white_space, indent,
1309               format_ip_adjacency_packet_data,
1310               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1311   return s;
1312 }
1313
1314 /* Common trace function for all ip4-forward next nodes. */
1315 void
1316 ip4_forward_next_trace (vlib_main_t * vm,
1317                         vlib_node_runtime_t * node,
1318                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1319 {
1320   u32 *from, n_left;
1321   ip4_main_t *im = &ip4_main;
1322
1323   n_left = frame->n_vectors;
1324   from = vlib_frame_vector_args (frame);
1325
1326   while (n_left >= 4)
1327     {
1328       u32 bi0, bi1;
1329       vlib_buffer_t *b0, *b1;
1330       ip4_forward_next_trace_t *t0, *t1;
1331
1332       /* Prefetch next iteration. */
1333       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1334       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1335
1336       bi0 = from[0];
1337       bi1 = from[1];
1338
1339       b0 = vlib_get_buffer (vm, bi0);
1340       b1 = vlib_get_buffer (vm, bi1);
1341
1342       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1343         {
1344           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1345           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1346           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1347           t0->fib_index =
1348             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1349              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1350             vec_elt (im->fib_index_by_sw_if_index,
1351                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1352
1353           clib_memcpy (t0->packet_data,
1354                        vlib_buffer_get_current (b0),
1355                        sizeof (t0->packet_data));
1356         }
1357       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1358         {
1359           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1360           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1361           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1362           t1->fib_index =
1363             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1364              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1365             vec_elt (im->fib_index_by_sw_if_index,
1366                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1367           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1368                        sizeof (t1->packet_data));
1369         }
1370       from += 2;
1371       n_left -= 2;
1372     }
1373
1374   while (n_left >= 1)
1375     {
1376       u32 bi0;
1377       vlib_buffer_t *b0;
1378       ip4_forward_next_trace_t *t0;
1379
1380       bi0 = from[0];
1381
1382       b0 = vlib_get_buffer (vm, bi0);
1383
1384       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1385         {
1386           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1387           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1388           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1389           t0->fib_index =
1390             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1391              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1392             vec_elt (im->fib_index_by_sw_if_index,
1393                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1394           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1395                        sizeof (t0->packet_data));
1396         }
1397       from += 1;
1398       n_left -= 1;
1399     }
1400 }
1401
1402 /* Compute TCP/UDP/ICMP4 checksum in software. */
1403 u16
1404 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1405                               ip4_header_t * ip0)
1406 {
1407   ip_csum_t sum0;
1408   u32 ip_header_length, payload_length_host_byte_order;
1409   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1410   u16 sum16;
1411   void *data_this_buffer;
1412
1413   /* Initialize checksum with ip header. */
1414   ip_header_length = ip4_header_bytes (ip0);
1415   payload_length_host_byte_order =
1416     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1417   sum0 =
1418     clib_host_to_net_u32 (payload_length_host_byte_order +
1419                           (ip0->protocol << 16));
1420
1421   if (BITS (uword) == 32)
1422     {
1423       sum0 =
1424         ip_csum_with_carry (sum0,
1425                             clib_mem_unaligned (&ip0->src_address, u32));
1426       sum0 =
1427         ip_csum_with_carry (sum0,
1428                             clib_mem_unaligned (&ip0->dst_address, u32));
1429     }
1430   else
1431     sum0 =
1432       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1433
1434   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1435   data_this_buffer = (void *) ip0 + ip_header_length;
1436   n_ip_bytes_this_buffer =
1437     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1438   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1439     {
1440       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1441         n_ip_bytes_this_buffer - ip_header_length : 0;
1442     }
1443   while (1)
1444     {
1445       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1446       n_bytes_left -= n_this_buffer;
1447       if (n_bytes_left == 0)
1448         break;
1449
1450       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1451       p0 = vlib_get_buffer (vm, p0->next_buffer);
1452       data_this_buffer = vlib_buffer_get_current (p0);
1453       n_this_buffer = p0->current_length;
1454     }
1455
1456   sum16 = ~ip_csum_fold (sum0);
1457
1458   return sum16;
1459 }
1460
1461 u32
1462 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1463 {
1464   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1465   udp_header_t *udp0;
1466   u16 sum16;
1467
1468   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1469           || ip0->protocol == IP_PROTOCOL_UDP);
1470
1471   udp0 = (void *) (ip0 + 1);
1472   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1473     {
1474       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1475                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1476       return p0->flags;
1477     }
1478
1479   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1480
1481   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1482                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1483
1484   return p0->flags;
1485 }
1486
1487 /* *INDENT-OFF* */
1488 VNET_FEATURE_ARC_INIT (ip4_local) =
1489 {
1490   .arc_name  = "ip4-local",
1491   .start_nodes = VNET_FEATURES ("ip4-local"),
1492 };
1493 /* *INDENT-ON* */
1494
1495 static inline void
1496 ip4_local_validate_l4 (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip,
1497                        u8 is_udp, u8 * error, u8 * good_tcp_udp)
1498 {
1499   u32 flags0;
1500   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1501   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1502   if (is_udp)
1503     {
1504       udp_header_t *udp;
1505       u32 ip_len, udp_len;
1506       i32 len_diff;
1507       udp = ip4_next_header (ip);
1508       /* Verify UDP length. */
1509       ip_len = clib_net_to_host_u16 (ip->length);
1510       udp_len = clib_net_to_host_u16 (udp->length);
1511
1512       len_diff = ip_len - udp_len;
1513       *good_tcp_udp &= len_diff >= 0;
1514       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1515     }
1516 }
1517
1518 #define ip4_local_do_l4_check(is_tcp_udp, flags)                        \
1519     (is_tcp_udp && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1520     || flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1521     || flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM))
1522
1523 static inline uword
1524 ip4_local_inline (vlib_main_t * vm,
1525                   vlib_node_runtime_t * node,
1526                   vlib_frame_t * frame, int head_of_feature_arc)
1527 {
1528   ip4_main_t *im = &ip4_main;
1529   ip_lookup_main_t *lm = &im->lookup_main;
1530   ip_local_next_t next_index;
1531   u32 *from, *to_next, n_left_from, n_left_to_next;
1532   vlib_node_runtime_t *error_node =
1533     vlib_node_get_runtime (vm, ip4_input_node.index);
1534   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1535
1536   from = vlib_frame_vector_args (frame);
1537   n_left_from = frame->n_vectors;
1538   next_index = node->cached_next_index;
1539
1540   if (node->flags & VLIB_NODE_FLAG_TRACE)
1541     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1542
1543   while (n_left_from > 0)
1544     {
1545       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1546
1547       while (n_left_from >= 4 && n_left_to_next >= 2)
1548         {
1549           vlib_buffer_t *p0, *p1;
1550           ip4_header_t *ip0, *ip1;
1551           ip4_fib_mtrie_t *mtrie0, *mtrie1;
1552           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1553           const dpo_id_t *dpo0, *dpo1;
1554           const load_balance_t *lb0, *lb1;
1555           u32 pi0, next0, fib_index0, lbi0;
1556           u32 pi1, next1, fib_index1, lbi1;
1557           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1558           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1559           u32 sw_if_index0, sw_if_index1;
1560
1561           pi0 = to_next[0] = from[0];
1562           pi1 = to_next[1] = from[1];
1563           from += 2;
1564           n_left_from -= 2;
1565           to_next += 2;
1566           n_left_to_next -= 2;
1567
1568           next0 = next1 = IP_LOCAL_NEXT_DROP;
1569           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1570
1571           p0 = vlib_get_buffer (vm, pi0);
1572           p1 = vlib_get_buffer (vm, pi1);
1573
1574           ip0 = vlib_buffer_get_current (p0);
1575           ip1 = vlib_buffer_get_current (p1);
1576
1577           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1578           vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
1579
1580           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1581           sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1582
1583           /* Treat IP frag packets as "experimental" protocol for now
1584              until support of IP frag reassembly is implemented */
1585           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1586           proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1587
1588           if (head_of_feature_arc == 0)
1589             goto skip_checks;
1590
1591           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1592           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1593           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1594           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1595
1596           good_tcp_udp0 =
1597             (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1598              || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1599                  || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1600           good_tcp_udp1 = (p1->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1601                            || (p1->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1602                                || p1->flags &
1603                                VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1604
1605           if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)
1606                              || ip4_local_do_l4_check (is_tcp_udp1,
1607                                                        p1->flags)))
1608             {
1609               if (is_tcp_udp0)
1610                 ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1611                                        &good_tcp_udp0);
1612               if (is_tcp_udp1)
1613                 ip4_local_validate_l4 (vm, p1, ip1, is_udp1, &error1,
1614                                        &good_tcp_udp1);
1615             }
1616
1617           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1618           error0 = (is_tcp_udp0 && !good_tcp_udp0
1619                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1620           error1 = (is_tcp_udp1 && !good_tcp_udp1
1621                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1622
1623           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1624           fib_index0 =
1625             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1626              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1627
1628           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1629           fib_index1 =
1630             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1631              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1632
1633           /* TODO maybe move to lookup? */
1634           vnet_buffer (p0)->ip.fib_index = fib_index0;
1635           vnet_buffer (p1)->ip.fib_index = fib_index1;
1636
1637           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1638           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1639
1640           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1641           leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1642           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1643                                              2);
1644           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1645                                              2);
1646           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1647                                              3);
1648           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1649                                              3);
1650
1651           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1652             ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1653           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1654
1655           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1656             ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1657           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1658
1659           lb0 = load_balance_get (lbi0);
1660           lb1 = load_balance_get (lbi1);
1661           dpo0 = load_balance_get_bucket_i (lb0, 0);
1662           dpo1 = load_balance_get_bucket_i (lb1, 0);
1663
1664           /*
1665            * Must have a route to source otherwise we drop the packet.
1666            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1667            *
1668            * The checks are:
1669            *  - the source is a recieve => it's from us => bogus, do this
1670            *    first since it sets a different error code.
1671            *  - uRPF check for any route to source - accept if passes.
1672            *  - allow packets destined to the broadcast address from unknown sources
1673            */
1674           if (p0->flags & VNET_BUFFER_F_IS_NATED)
1675             goto skip_check0;
1676
1677           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1678                      dpo0->dpoi_type == DPO_RECEIVE) ?
1679                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1680           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1681                      !fib_urpf_check_size (lb0->lb_urpf) &&
1682                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1683                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1684
1685         skip_check0:
1686           if (p1->flags & VNET_BUFFER_F_IS_NATED)
1687             goto skip_checks;
1688
1689           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1690                      dpo1->dpoi_type == DPO_RECEIVE) ?
1691                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1692           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1693                      !fib_urpf_check_size (lb1->lb_urpf) &&
1694                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1695                     ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1696
1697         skip_checks:
1698
1699           next0 = lm->local_next_by_ip_protocol[proto0];
1700           next1 = lm->local_next_by_ip_protocol[proto1];
1701
1702           next0 =
1703             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1704           next1 =
1705             error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1706
1707           p0->error = error0 ? error_node->errors[error0] : 0;
1708           p1->error = error1 ? error_node->errors[error1] : 0;
1709
1710           if (head_of_feature_arc)
1711             {
1712               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1713                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1714               if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1715                 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1716             }
1717
1718           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1719                                            n_left_to_next, pi0, pi1,
1720                                            next0, next1);
1721         }
1722
1723       while (n_left_from > 0 && n_left_to_next > 0)
1724         {
1725           vlib_buffer_t *p0;
1726           ip4_header_t *ip0;
1727           ip4_fib_mtrie_t *mtrie0;
1728           ip4_fib_mtrie_leaf_t leaf0;
1729           u32 pi0, next0, fib_index0, lbi0;
1730           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1731           load_balance_t *lb0;
1732           const dpo_id_t *dpo0;
1733           u32 sw_if_index0;
1734
1735           pi0 = to_next[0] = from[0];
1736           from += 1;
1737           n_left_from -= 1;
1738           to_next += 1;
1739           n_left_to_next -= 1;
1740
1741           next0 = IP_LOCAL_NEXT_DROP;
1742           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1743
1744           p0 = vlib_get_buffer (vm, pi0);
1745           ip0 = vlib_buffer_get_current (p0);
1746           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1747           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1748
1749           /* Treat IP frag packets as "experimental" protocol for now
1750              until support of IP frag reassembly is implemented */
1751           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1752
1753           if (head_of_feature_arc == 0 || p0->flags & VNET_BUFFER_F_IS_NATED)
1754             goto skip_check;
1755
1756           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1757           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1758
1759           good_tcp_udp0 =
1760             (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1761              || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1762                  || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1763
1764           if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)))
1765             {
1766               ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1767                                      &good_tcp_udp0);
1768             }
1769
1770           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1771           error0 = (is_tcp_udp0 && !good_tcp_udp0
1772                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1773
1774           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1775           fib_index0 =
1776             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1777              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1778           vnet_buffer (p0)->ip.fib_index = fib_index0;
1779           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1780           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1781           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1782                                              2);
1783           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1784                                              3);
1785           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1786           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1787           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1788
1789           lb0 = load_balance_get (lbi0);
1790           dpo0 = load_balance_get_bucket_i (lb0, 0);
1791
1792           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1793                      dpo0->dpoi_type == DPO_RECEIVE) ?
1794                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1795           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1796                      !fib_urpf_check_size (lb0->lb_urpf) &&
1797                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1798                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1799
1800         skip_check:
1801           next0 = lm->local_next_by_ip_protocol[proto0];
1802           next0 =
1803             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1804
1805           p0->error = error0 ? error_node->errors[error0] : 0;
1806
1807           if (head_of_feature_arc)
1808             {
1809               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1810                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1811             }
1812
1813           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1814                                            n_left_to_next, pi0, next0);
1815         }
1816       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1817     }
1818
1819   return frame->n_vectors;
1820 }
1821
1822 static uword
1823 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1824 {
1825   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1826 }
1827
1828 /* *INDENT-OFF* */
1829 VLIB_REGISTER_NODE (ip4_local_node) =
1830 {
1831   .function = ip4_local,
1832   .name = "ip4-local",
1833   .vector_size = sizeof (u32),
1834   .format_trace = format_ip4_forward_next_trace,
1835   .n_next_nodes = IP_LOCAL_N_NEXT,
1836   .next_nodes =
1837   {
1838     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1839     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1840     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1841     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1842   },
1843 };
1844 /* *INDENT-ON* */
1845
1846 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1847
1848 static uword
1849 ip4_local_end_of_arc (vlib_main_t * vm,
1850                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1851 {
1852   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1853 }
1854
1855 /* *INDENT-OFF* */
1856 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1857   .function = ip4_local_end_of_arc,
1858   .name = "ip4-local-end-of-arc",
1859   .vector_size = sizeof (u32),
1860
1861   .format_trace = format_ip4_forward_next_trace,
1862   .sibling_of = "ip4-local",
1863 };
1864
1865 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1866
1867 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1868   .arc_name = "ip4-local",
1869   .node_name = "ip4-local-end-of-arc",
1870   .runs_before = 0, /* not before any other features */
1871 };
1872 /* *INDENT-ON* */
1873
1874 void
1875 ip4_register_protocol (u32 protocol, u32 node_index)
1876 {
1877   vlib_main_t *vm = vlib_get_main ();
1878   ip4_main_t *im = &ip4_main;
1879   ip_lookup_main_t *lm = &im->lookup_main;
1880
1881   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1882   lm->local_next_by_ip_protocol[protocol] =
1883     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1884 }
1885
1886 static clib_error_t *
1887 show_ip_local_command_fn (vlib_main_t * vm,
1888                           unformat_input_t * input, vlib_cli_command_t * cmd)
1889 {
1890   ip4_main_t *im = &ip4_main;
1891   ip_lookup_main_t *lm = &im->lookup_main;
1892   int i;
1893
1894   vlib_cli_output (vm, "Protocols handled by ip4_local");
1895   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1896     {
1897       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1898         {
1899           u32 node_index = vlib_get_node (vm,
1900                                           ip4_local_node.index)->
1901             next_nodes[lm->local_next_by_ip_protocol[i]];
1902           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1903                            node_index);
1904         }
1905     }
1906   return 0;
1907 }
1908
1909
1910
1911 /*?
1912  * Display the set of protocols handled by the local IPv4 stack.
1913  *
1914  * @cliexpar
1915  * Example of how to display local protocol table:
1916  * @cliexstart{show ip local}
1917  * Protocols handled by ip4_local
1918  * 1
1919  * 17
1920  * 47
1921  * @cliexend
1922 ?*/
1923 /* *INDENT-OFF* */
1924 VLIB_CLI_COMMAND (show_ip_local, static) =
1925 {
1926   .path = "show ip local",
1927   .function = show_ip_local_command_fn,
1928   .short_help = "show ip local",
1929 };
1930 /* *INDENT-ON* */
1931
1932 always_inline uword
1933 ip4_arp_inline (vlib_main_t * vm,
1934                 vlib_node_runtime_t * node,
1935                 vlib_frame_t * frame, int is_glean)
1936 {
1937   vnet_main_t *vnm = vnet_get_main ();
1938   ip4_main_t *im = &ip4_main;
1939   ip_lookup_main_t *lm = &im->lookup_main;
1940   u32 *from, *to_next_drop;
1941   uword n_left_from, n_left_to_next_drop, next_index;
1942   static f64 time_last_seed_change = -1e100;
1943   static u32 hash_seeds[3];
1944   static uword hash_bitmap[256 / BITS (uword)];
1945   f64 time_now;
1946
1947   if (node->flags & VLIB_NODE_FLAG_TRACE)
1948     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1949
1950   time_now = vlib_time_now (vm);
1951   if (time_now - time_last_seed_change > 1e-3)
1952     {
1953       uword i;
1954       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1955                                             sizeof (hash_seeds));
1956       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1957         hash_seeds[i] = r[i];
1958
1959       /* Mark all hash keys as been no-seen before. */
1960       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1961         hash_bitmap[i] = 0;
1962
1963       time_last_seed_change = time_now;
1964     }
1965
1966   from = vlib_frame_vector_args (frame);
1967   n_left_from = frame->n_vectors;
1968   next_index = node->cached_next_index;
1969   if (next_index == IP4_ARP_NEXT_DROP)
1970     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1971
1972   while (n_left_from > 0)
1973     {
1974       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1975                            to_next_drop, n_left_to_next_drop);
1976
1977       while (n_left_from > 0 && n_left_to_next_drop > 0)
1978         {
1979           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1980           ip_adjacency_t *adj0;
1981           vlib_buffer_t *p0;
1982           ip4_header_t *ip0;
1983           uword bm0;
1984
1985           pi0 = from[0];
1986
1987           p0 = vlib_get_buffer (vm, pi0);
1988
1989           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1990           adj0 = adj_get (adj_index0);
1991           ip0 = vlib_buffer_get_current (p0);
1992
1993           a0 = hash_seeds[0];
1994           b0 = hash_seeds[1];
1995           c0 = hash_seeds[2];
1996
1997           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1998           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1999
2000           if (is_glean)
2001             {
2002               /*
2003                * this is the Glean case, so we are ARPing for the
2004                * packet's destination
2005                */
2006               a0 ^= ip0->dst_address.data_u32;
2007             }
2008           else
2009             {
2010               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2011             }
2012           b0 ^= sw_if_index0;
2013
2014           hash_v3_mix32 (a0, b0, c0);
2015           hash_v3_finalize32 (a0, b0, c0);
2016
2017           c0 &= BITS (hash_bitmap) - 1;
2018           m0 = (uword) 1 << (c0 % BITS (uword));
2019           c0 = c0 / BITS (uword);
2020
2021           bm0 = hash_bitmap[c0];
2022           drop0 = (bm0 & m0) != 0;
2023
2024           /* Mark it as seen. */
2025           hash_bitmap[c0] = bm0 | m0;
2026
2027           from += 1;
2028           n_left_from -= 1;
2029           to_next_drop[0] = pi0;
2030           to_next_drop += 1;
2031           n_left_to_next_drop -= 1;
2032
2033           p0->error =
2034             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2035                          IP4_ARP_ERROR_REQUEST_SENT];
2036
2037           /*
2038            * the adj has been updated to a rewrite but the node the DPO that got
2039            * us here hasn't - yet. no big deal. we'll drop while we wait.
2040            */
2041           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2042             continue;
2043
2044           if (drop0)
2045             continue;
2046
2047           /*
2048            * Can happen if the control-plane is programming tables
2049            * with traffic flowing; at least that's today's lame excuse.
2050            */
2051           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2052               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2053             {
2054               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2055             }
2056           else
2057             /* Send ARP request. */
2058             {
2059               u32 bi0 = 0;
2060               vlib_buffer_t *b0;
2061               ethernet_arp_header_t *h0;
2062               vnet_hw_interface_t *hw_if0;
2063
2064               h0 =
2065                 vlib_packet_template_get_packet (vm,
2066                                                  &im->ip4_arp_request_packet_template,
2067                                                  &bi0);
2068
2069               /* Seems we're out of buffers */
2070               if (PREDICT_FALSE (!h0))
2071                 continue;
2072
2073               /* Add rewrite/encap string for ARP packet. */
2074               vnet_rewrite_one_header (adj0[0], h0,
2075                                        sizeof (ethernet_header_t));
2076
2077               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2078
2079               /* Src ethernet address in ARP header. */
2080               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2081                            hw_if0->hw_address,
2082                            sizeof (h0->ip4_over_ethernet[0].ethernet));
2083
2084               if (is_glean)
2085                 {
2086                   /* The interface's source address is stashed in the Glean Adj */
2087                   h0->ip4_over_ethernet[0].ip4 =
2088                     adj0->sub_type.glean.receive_addr.ip4;
2089
2090                   /* Copy in destination address we are requesting. This is the
2091                    * glean case, so it's the packet's destination.*/
2092                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2093                     ip0->dst_address.data_u32;
2094                 }
2095               else
2096                 {
2097                   /* Src IP address in ARP header. */
2098                   if (ip4_src_address_for_packet (lm, sw_if_index0,
2099                                                   &h0->
2100                                                   ip4_over_ethernet[0].ip4))
2101                     {
2102                       /* No source address available */
2103                       p0->error =
2104                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2105                       vlib_buffer_free (vm, &bi0, 1);
2106                       continue;
2107                     }
2108
2109                   /* Copy in destination address we are requesting from the
2110                      incomplete adj */
2111                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2112                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
2113                 }
2114
2115               vlib_buffer_copy_trace_flag (vm, p0, bi0);
2116               b0 = vlib_get_buffer (vm, bi0);
2117               VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2118               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2119
2120               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2121
2122               vlib_set_next_frame_buffer (vm, node,
2123                                           adj0->rewrite_header.next_index,
2124                                           bi0);
2125             }
2126         }
2127
2128       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2129     }
2130
2131   return frame->n_vectors;
2132 }
2133
2134 static uword
2135 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2136 {
2137   return (ip4_arp_inline (vm, node, frame, 0));
2138 }
2139
2140 static uword
2141 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2142 {
2143   return (ip4_arp_inline (vm, node, frame, 1));
2144 }
2145
2146 static char *ip4_arp_error_strings[] = {
2147   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2148   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2149   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2150   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2151   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2152   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2153 };
2154
2155 /* *INDENT-OFF* */
2156 VLIB_REGISTER_NODE (ip4_arp_node) =
2157 {
2158   .function = ip4_arp,
2159   .name = "ip4-arp",
2160   .vector_size = sizeof (u32),
2161   .format_trace = format_ip4_forward_next_trace,
2162   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2163   .error_strings = ip4_arp_error_strings,
2164   .n_next_nodes = IP4_ARP_N_NEXT,
2165   .next_nodes =
2166   {
2167     [IP4_ARP_NEXT_DROP] = "error-drop",
2168   },
2169 };
2170
2171 VLIB_REGISTER_NODE (ip4_glean_node) =
2172 {
2173   .function = ip4_glean,
2174   .name = "ip4-glean",
2175   .vector_size = sizeof (u32),
2176   .format_trace = format_ip4_forward_next_trace,
2177   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2178   .error_strings = ip4_arp_error_strings,
2179   .n_next_nodes = IP4_ARP_N_NEXT,
2180   .next_nodes = {
2181   [IP4_ARP_NEXT_DROP] = "error-drop",
2182   },
2183 };
2184 /* *INDENT-ON* */
2185
2186 #define foreach_notrace_ip4_arp_error           \
2187 _(DROP)                                         \
2188 _(REQUEST_SENT)                                 \
2189 _(REPLICATE_DROP)                               \
2190 _(REPLICATE_FAIL)
2191
2192 clib_error_t *
2193 arp_notrace_init (vlib_main_t * vm)
2194 {
2195   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2196
2197   /* don't trace ARP request packets */
2198 #define _(a)                                    \
2199     vnet_pcap_drop_trace_filter_add_del         \
2200         (rt->errors[IP4_ARP_ERROR_##a],         \
2201          1 /* is_add */);
2202   foreach_notrace_ip4_arp_error;
2203 #undef _
2204   return 0;
2205 }
2206
2207 VLIB_INIT_FUNCTION (arp_notrace_init);
2208
2209
2210 /* Send an ARP request to see if given destination is reachable on given interface. */
2211 clib_error_t *
2212 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2213 {
2214   vnet_main_t *vnm = vnet_get_main ();
2215   ip4_main_t *im = &ip4_main;
2216   ethernet_arp_header_t *h;
2217   ip4_address_t *src;
2218   ip_interface_address_t *ia;
2219   ip_adjacency_t *adj;
2220   vnet_hw_interface_t *hi;
2221   vnet_sw_interface_t *si;
2222   vlib_buffer_t *b;
2223   adj_index_t ai;
2224   u32 bi = 0;
2225
2226   si = vnet_get_sw_interface (vnm, sw_if_index);
2227
2228   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2229     {
2230       return clib_error_return (0, "%U: interface %U down",
2231                                 format_ip4_address, dst,
2232                                 format_vnet_sw_if_index_name, vnm,
2233                                 sw_if_index);
2234     }
2235
2236   src =
2237     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2238   if (!src)
2239     {
2240       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2241       return clib_error_return
2242         (0,
2243          "no matching interface address for destination %U (interface %U)",
2244          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2245          sw_if_index);
2246     }
2247
2248   h = vlib_packet_template_get_packet (vm,
2249                                        &im->ip4_arp_request_packet_template,
2250                                        &bi);
2251
2252   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2253   if (PREDICT_FALSE (!hi->hw_address))
2254     {
2255       return clib_error_return (0, "%U: interface %U do not support ip probe",
2256                                 format_ip4_address, dst,
2257                                 format_vnet_sw_if_index_name, vnm,
2258                                 sw_if_index);
2259     }
2260
2261   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2262                sizeof (h->ip4_over_ethernet[0].ethernet));
2263
2264   h->ip4_over_ethernet[0].ip4 = src[0];
2265   h->ip4_over_ethernet[1].ip4 = dst[0];
2266
2267   b = vlib_get_buffer (vm, bi);
2268   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2269     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2270
2271   ip46_address_t nh = {
2272     .ip4 = *dst,
2273   };
2274
2275   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2276                             VNET_LINK_IP4, &nh, sw_if_index);
2277   adj = adj_get (ai);
2278
2279   /* Peer has been previously resolved, retrieve glean adj instead */
2280   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2281     {
2282       adj_unlock (ai);
2283       ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, sw_if_index, &nh);
2284       adj = adj_get (ai);
2285     }
2286
2287   /* Add encapsulation string for software interface (e.g. ethernet header). */
2288   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2289   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2290
2291   {
2292     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2293     u32 *to_next = vlib_frame_vector_args (f);
2294     to_next[0] = bi;
2295     f->n_vectors = 1;
2296     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2297   }
2298
2299   adj_unlock (ai);
2300   return /* no error */ 0;
2301 }
2302
2303 typedef enum
2304 {
2305   IP4_REWRITE_NEXT_DROP,
2306   IP4_REWRITE_NEXT_ICMP_ERROR,
2307 } ip4_rewrite_next_t;
2308
2309 always_inline uword
2310 ip4_rewrite_inline (vlib_main_t * vm,
2311                     vlib_node_runtime_t * node,
2312                     vlib_frame_t * frame,
2313                     int do_counters, int is_midchain, int is_mcast)
2314 {
2315   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2316   u32 *from = vlib_frame_vector_args (frame);
2317   u32 n_left_from, n_left_to_next, *to_next, next_index;
2318   vlib_node_runtime_t *error_node =
2319     vlib_node_get_runtime (vm, ip4_input_node.index);
2320
2321   n_left_from = frame->n_vectors;
2322   next_index = node->cached_next_index;
2323   u32 thread_index = vlib_get_thread_index ();
2324
2325   while (n_left_from > 0)
2326     {
2327       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2328
2329       while (n_left_from >= 4 && n_left_to_next >= 2)
2330         {
2331           ip_adjacency_t *adj0, *adj1;
2332           vlib_buffer_t *p0, *p1;
2333           ip4_header_t *ip0, *ip1;
2334           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2335           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2336           u32 tx_sw_if_index0, tx_sw_if_index1;
2337
2338           /* Prefetch next iteration. */
2339           {
2340             vlib_buffer_t *p2, *p3;
2341
2342             p2 = vlib_get_buffer (vm, from[2]);
2343             p3 = vlib_get_buffer (vm, from[3]);
2344
2345             vlib_prefetch_buffer_header (p2, STORE);
2346             vlib_prefetch_buffer_header (p3, STORE);
2347
2348             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2349             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2350           }
2351
2352           pi0 = to_next[0] = from[0];
2353           pi1 = to_next[1] = from[1];
2354
2355           from += 2;
2356           n_left_from -= 2;
2357           to_next += 2;
2358           n_left_to_next -= 2;
2359
2360           p0 = vlib_get_buffer (vm, pi0);
2361           p1 = vlib_get_buffer (vm, pi1);
2362
2363           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2364           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2365
2366           /*
2367            * pre-fetch the per-adjacency counters
2368            */
2369           if (do_counters)
2370             {
2371               vlib_prefetch_combined_counter (&adjacency_counters,
2372                                               thread_index, adj_index0);
2373               vlib_prefetch_combined_counter (&adjacency_counters,
2374                                               thread_index, adj_index1);
2375             }
2376
2377           ip0 = vlib_buffer_get_current (p0);
2378           ip1 = vlib_buffer_get_current (p1);
2379
2380           error0 = error1 = IP4_ERROR_NONE;
2381           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2382
2383           /* Decrement TTL & update checksum.
2384              Works either endian, so no need for byte swap. */
2385           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2386             {
2387               i32 ttl0 = ip0->ttl;
2388
2389               /* Input node should have reject packets with ttl 0. */
2390               ASSERT (ip0->ttl > 0);
2391
2392               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2393               checksum0 += checksum0 >= 0xffff;
2394
2395               ip0->checksum = checksum0;
2396               ttl0 -= 1;
2397               ip0->ttl = ttl0;
2398
2399               /*
2400                * If the ttl drops below 1 when forwarding, generate
2401                * an ICMP response.
2402                */
2403               if (PREDICT_FALSE (ttl0 <= 0))
2404                 {
2405                   error0 = IP4_ERROR_TIME_EXPIRED;
2406                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2407                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2408                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2409                                                0);
2410                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2411                 }
2412
2413               /* Verify checksum. */
2414               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2415                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2416             }
2417           else
2418             {
2419               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2420             }
2421           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2422             {
2423               i32 ttl1 = ip1->ttl;
2424
2425               /* Input node should have reject packets with ttl 0. */
2426               ASSERT (ip1->ttl > 0);
2427
2428               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2429               checksum1 += checksum1 >= 0xffff;
2430
2431               ip1->checksum = checksum1;
2432               ttl1 -= 1;
2433               ip1->ttl = ttl1;
2434
2435               /*
2436                * If the ttl drops below 1 when forwarding, generate
2437                * an ICMP response.
2438                */
2439               if (PREDICT_FALSE (ttl1 <= 0))
2440                 {
2441                   error1 = IP4_ERROR_TIME_EXPIRED;
2442                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2443                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2444                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2445                                                0);
2446                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2447                 }
2448
2449               /* Verify checksum. */
2450               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2451                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2452             }
2453           else
2454             {
2455               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2456             }
2457
2458           /* Rewrite packet header and updates lengths. */
2459           adj0 = adj_get (adj_index0);
2460           adj1 = adj_get (adj_index1);
2461
2462           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2463           rw_len0 = adj0[0].rewrite_header.data_bytes;
2464           rw_len1 = adj1[0].rewrite_header.data_bytes;
2465           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2466           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2467
2468           /* Check MTU of outgoing interface. */
2469           error0 =
2470             (vlib_buffer_length_in_chain (vm, p0) >
2471              adj0[0].
2472              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2473              error0);
2474           error1 =
2475             (vlib_buffer_length_in_chain (vm, p1) >
2476              adj1[0].
2477              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2478              error1);
2479
2480           if (is_mcast)
2481             {
2482               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2483                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2484                         IP4_ERROR_SAME_INTERFACE : error0);
2485               error1 = ((adj1[0].rewrite_header.sw_if_index ==
2486                          vnet_buffer (p1)->sw_if_index[VLIB_RX]) ?
2487                         IP4_ERROR_SAME_INTERFACE : error1);
2488             }
2489
2490           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2491            * to see the IP headerr */
2492           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2493             {
2494               next0 = adj0[0].rewrite_header.next_index;
2495               p0->current_data -= rw_len0;
2496               p0->current_length += rw_len0;
2497               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2498               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2499
2500               if (PREDICT_FALSE
2501                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2502                 vnet_feature_arc_start (lm->output_feature_arc_index,
2503                                         tx_sw_if_index0, &next0, p0);
2504             }
2505           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2506             {
2507               next1 = adj1[0].rewrite_header.next_index;
2508               p1->current_data -= rw_len1;
2509               p1->current_length += rw_len1;
2510
2511               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2512               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2513
2514               if (PREDICT_FALSE
2515                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2516                 vnet_feature_arc_start (lm->output_feature_arc_index,
2517                                         tx_sw_if_index1, &next1, p1);
2518             }
2519
2520           /* Guess we are only writing on simple Ethernet header. */
2521           vnet_rewrite_two_headers (adj0[0], adj1[0],
2522                                     ip0, ip1, sizeof (ethernet_header_t));
2523
2524           /*
2525            * Bump the per-adjacency counters
2526            */
2527           if (do_counters)
2528             {
2529               vlib_increment_combined_counter
2530                 (&adjacency_counters,
2531                  thread_index,
2532                  adj_index0, 1,
2533                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2534
2535               vlib_increment_combined_counter
2536                 (&adjacency_counters,
2537                  thread_index,
2538                  adj_index1, 1,
2539                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2540             }
2541
2542           if (is_midchain)
2543             {
2544               adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2545               adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2546             }
2547           if (is_mcast)
2548             {
2549               /*
2550                * copy bytes from the IP address into the MAC rewrite
2551                */
2552               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2553               vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2554             }
2555
2556           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2557                                            to_next, n_left_to_next,
2558                                            pi0, pi1, next0, next1);
2559         }
2560
2561       while (n_left_from > 0 && n_left_to_next > 0)
2562         {
2563           ip_adjacency_t *adj0;
2564           vlib_buffer_t *p0;
2565           ip4_header_t *ip0;
2566           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2567           u32 tx_sw_if_index0;
2568
2569           pi0 = to_next[0] = from[0];
2570
2571           p0 = vlib_get_buffer (vm, pi0);
2572
2573           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2574
2575           adj0 = adj_get (adj_index0);
2576
2577           ip0 = vlib_buffer_get_current (p0);
2578
2579           error0 = IP4_ERROR_NONE;
2580           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2581
2582           /* Decrement TTL & update checksum. */
2583           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2584             {
2585               i32 ttl0 = ip0->ttl;
2586
2587               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2588
2589               checksum0 += checksum0 >= 0xffff;
2590
2591               ip0->checksum = checksum0;
2592
2593               ASSERT (ip0->ttl > 0);
2594
2595               ttl0 -= 1;
2596
2597               ip0->ttl = ttl0;
2598
2599               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2600                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2601
2602               if (PREDICT_FALSE (ttl0 <= 0))
2603                 {
2604                   /*
2605                    * If the ttl drops below 1 when forwarding, generate
2606                    * an ICMP response.
2607                    */
2608                   error0 = IP4_ERROR_TIME_EXPIRED;
2609                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2610                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2611                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2612                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2613                                                0);
2614                 }
2615             }
2616           else
2617             {
2618               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2619             }
2620
2621           if (do_counters)
2622             vlib_prefetch_combined_counter (&adjacency_counters,
2623                                             thread_index, adj_index0);
2624
2625           /* Guess we are only writing on simple Ethernet header. */
2626           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2627           if (is_mcast)
2628             {
2629               /*
2630                * copy bytes from the IP address into the MAC rewrite
2631                */
2632               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2633             }
2634
2635           /* Update packet buffer attributes/set output interface. */
2636           rw_len0 = adj0[0].rewrite_header.data_bytes;
2637           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2638
2639           if (do_counters)
2640             vlib_increment_combined_counter
2641               (&adjacency_counters,
2642                thread_index, adj_index0, 1,
2643                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2644
2645           /* Check MTU of outgoing interface. */
2646           error0 = (vlib_buffer_length_in_chain (vm, p0)
2647                     > adj0[0].rewrite_header.max_l3_packet_bytes
2648                     ? IP4_ERROR_MTU_EXCEEDED : error0);
2649           if (is_mcast)
2650             {
2651               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2652                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2653                         IP4_ERROR_SAME_INTERFACE : error0);
2654             }
2655           p0->error = error_node->errors[error0];
2656
2657           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2658            * to see the IP headerr */
2659           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2660             {
2661               p0->current_data -= rw_len0;
2662               p0->current_length += rw_len0;
2663               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2664
2665               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2666               next0 = adj0[0].rewrite_header.next_index;
2667
2668               if (is_midchain)
2669                 {
2670                   adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2671                 }
2672
2673               if (PREDICT_FALSE
2674                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2675                 vnet_feature_arc_start (lm->output_feature_arc_index,
2676                                         tx_sw_if_index0, &next0, p0);
2677
2678             }
2679
2680           from += 1;
2681           n_left_from -= 1;
2682           to_next += 1;
2683           n_left_to_next -= 1;
2684
2685           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2686                                            to_next, n_left_to_next,
2687                                            pi0, next0);
2688         }
2689
2690       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2691     }
2692
2693   /* Need to do trace after rewrites to pick up new packet data. */
2694   if (node->flags & VLIB_NODE_FLAG_TRACE)
2695     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2696
2697   return frame->n_vectors;
2698 }
2699
2700
2701 /** @brief IPv4 rewrite node.
2702     @node ip4-rewrite
2703
2704     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2705     header checksum, fetch the ip adjacency, check the outbound mtu,
2706     apply the adjacency rewrite, and send pkts to the adjacency
2707     rewrite header's rewrite_next_index.
2708
2709     @param vm vlib_main_t corresponding to the current thread
2710     @param node vlib_node_runtime_t
2711     @param frame vlib_frame_t whose contents should be dispatched
2712
2713     @par Graph mechanics: buffer metadata, next index usage
2714
2715     @em Uses:
2716     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2717         - the rewrite adjacency index
2718     - <code>adj->lookup_next_index</code>
2719         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2720           the packet will be dropped.
2721     - <code>adj->rewrite_header</code>
2722         - Rewrite string length, rewrite string, next_index
2723
2724     @em Sets:
2725     - <code>b->current_data, b->current_length</code>
2726         - Updated net of applying the rewrite string
2727
2728     <em>Next Indices:</em>
2729     - <code> adj->rewrite_header.next_index </code>
2730       or @c ip4-drop
2731 */
2732 static uword
2733 ip4_rewrite (vlib_main_t * vm,
2734              vlib_node_runtime_t * node, vlib_frame_t * frame)
2735 {
2736   if (adj_are_counters_enabled ())
2737     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2738   else
2739     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2740 }
2741
2742 static uword
2743 ip4_midchain (vlib_main_t * vm,
2744               vlib_node_runtime_t * node, vlib_frame_t * frame)
2745 {
2746   if (adj_are_counters_enabled ())
2747     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2748   else
2749     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2750 }
2751
2752 static uword
2753 ip4_rewrite_mcast (vlib_main_t * vm,
2754                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2755 {
2756   if (adj_are_counters_enabled ())
2757     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2758   else
2759     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2760 }
2761
2762 static uword
2763 ip4_mcast_midchain (vlib_main_t * vm,
2764                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2765 {
2766   if (adj_are_counters_enabled ())
2767     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2768   else
2769     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2770 }
2771
2772 /* *INDENT-OFF* */
2773 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2774   .function = ip4_rewrite,
2775   .name = "ip4-rewrite",
2776   .vector_size = sizeof (u32),
2777
2778   .format_trace = format_ip4_rewrite_trace,
2779
2780   .n_next_nodes = 2,
2781   .next_nodes = {
2782     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2783     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2784   },
2785 };
2786 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2787
2788 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2789   .function = ip4_rewrite_mcast,
2790   .name = "ip4-rewrite-mcast",
2791   .vector_size = sizeof (u32),
2792
2793   .format_trace = format_ip4_rewrite_trace,
2794   .sibling_of = "ip4-rewrite",
2795 };
2796 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2797
2798 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2799   .function = ip4_mcast_midchain,
2800   .name = "ip4-mcast-midchain",
2801   .vector_size = sizeof (u32),
2802
2803   .format_trace = format_ip4_rewrite_trace,
2804   .sibling_of = "ip4-rewrite",
2805 };
2806 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2807
2808 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2809   .function = ip4_midchain,
2810   .name = "ip4-midchain",
2811   .vector_size = sizeof (u32),
2812   .format_trace = format_ip4_forward_next_trace,
2813   .sibling_of =  "ip4-rewrite",
2814 };
2815 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2816 /* *INDENT-ON */
2817
2818 int
2819 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2820 {
2821   ip4_fib_mtrie_t *mtrie0;
2822   ip4_fib_mtrie_leaf_t leaf0;
2823   u32 lbi0;
2824
2825   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2826
2827   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2828   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2829   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2830
2831   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2832
2833   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2834 }
2835
2836 static clib_error_t *
2837 test_lookup_command_fn (vlib_main_t * vm,
2838                         unformat_input_t * input, vlib_cli_command_t * cmd)
2839 {
2840   ip4_fib_t *fib;
2841   u32 table_id = 0;
2842   f64 count = 1;
2843   u32 n;
2844   int i;
2845   ip4_address_t ip4_base_address;
2846   u64 errors = 0;
2847
2848   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2849     {
2850       if (unformat (input, "table %d", &table_id))
2851         {
2852           /* Make sure the entry exists. */
2853           fib = ip4_fib_get (table_id);
2854           if ((fib) && (fib->index != table_id))
2855             return clib_error_return (0, "<fib-index> %d does not exist",
2856                                       table_id);
2857         }
2858       else if (unformat (input, "count %f", &count))
2859         ;
2860
2861       else if (unformat (input, "%U",
2862                          unformat_ip4_address, &ip4_base_address))
2863         ;
2864       else
2865         return clib_error_return (0, "unknown input `%U'",
2866                                   format_unformat_error, input);
2867     }
2868
2869   n = count;
2870
2871   for (i = 0; i < n; i++)
2872     {
2873       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2874         errors++;
2875
2876       ip4_base_address.as_u32 =
2877         clib_host_to_net_u32 (1 +
2878                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2879     }
2880
2881   if (errors)
2882     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2883   else
2884     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2885
2886   return 0;
2887 }
2888
2889 /*?
2890  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2891  * given FIB table to determine if there is a conflict with the
2892  * adjacency table. The fib-id can be determined by using the
2893  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2894  * of 0 is used.
2895  *
2896  * @todo This command uses fib-id, other commands use table-id (not
2897  * just a name, they are different indexes). Would like to change this
2898  * to table-id for consistency.
2899  *
2900  * @cliexpar
2901  * Example of how to run the test lookup command:
2902  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2903  * No errors in 2 lookups
2904  * @cliexend
2905 ?*/
2906 /* *INDENT-OFF* */
2907 VLIB_CLI_COMMAND (lookup_test_command, static) =
2908 {
2909   .path = "test lookup",
2910   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2911   .function = test_lookup_command_fn,
2912 };
2913 /* *INDENT-ON* */
2914
2915 int
2916 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2917 {
2918   u32 fib_index;
2919
2920   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2921
2922   if (~0 == fib_index)
2923     return VNET_API_ERROR_NO_SUCH_FIB;
2924
2925   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2926                                   flow_hash_config);
2927
2928   return 0;
2929 }
2930
2931 static clib_error_t *
2932 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2933                              unformat_input_t * input,
2934                              vlib_cli_command_t * cmd)
2935 {
2936   int matched = 0;
2937   u32 table_id = 0;
2938   u32 flow_hash_config = 0;
2939   int rv;
2940
2941   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2942     {
2943       if (unformat (input, "table %d", &table_id))
2944         matched = 1;
2945 #define _(a,v) \
2946     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2947       foreach_flow_hash_bit
2948 #undef _
2949         else
2950         break;
2951     }
2952
2953   if (matched == 0)
2954     return clib_error_return (0, "unknown input `%U'",
2955                               format_unformat_error, input);
2956
2957   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2958   switch (rv)
2959     {
2960     case 0:
2961       break;
2962
2963     case VNET_API_ERROR_NO_SUCH_FIB:
2964       return clib_error_return (0, "no such FIB table %d", table_id);
2965
2966     default:
2967       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2968       break;
2969     }
2970
2971   return 0;
2972 }
2973
2974 /*?
2975  * Configure the set of IPv4 fields used by the flow hash.
2976  *
2977  * @cliexpar
2978  * Example of how to set the flow hash on a given table:
2979  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2980  * Example of display the configured flow hash:
2981  * @cliexstart{show ip fib}
2982  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2983  * 0.0.0.0/0
2984  *   unicast-ip4-chain
2985  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2986  *     [0] [@0]: dpo-drop ip6
2987  * 0.0.0.0/32
2988  *   unicast-ip4-chain
2989  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2990  *     [0] [@0]: dpo-drop ip6
2991  * 224.0.0.0/8
2992  *   unicast-ip4-chain
2993  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2994  *     [0] [@0]: dpo-drop ip6
2995  * 6.0.1.2/32
2996  *   unicast-ip4-chain
2997  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2998  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2999  * 7.0.0.1/32
3000  *   unicast-ip4-chain
3001  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3002  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3003  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3004  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3005  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3006  * 240.0.0.0/8
3007  *   unicast-ip4-chain
3008  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3009  *     [0] [@0]: dpo-drop ip6
3010  * 255.255.255.255/32
3011  *   unicast-ip4-chain
3012  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3013  *     [0] [@0]: dpo-drop ip6
3014  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3015  * 0.0.0.0/0
3016  *   unicast-ip4-chain
3017  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3018  *     [0] [@0]: dpo-drop ip6
3019  * 0.0.0.0/32
3020  *   unicast-ip4-chain
3021  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3022  *     [0] [@0]: dpo-drop ip6
3023  * 172.16.1.0/24
3024  *   unicast-ip4-chain
3025  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3026  *     [0] [@4]: ipv4-glean: af_packet0
3027  * 172.16.1.1/32
3028  *   unicast-ip4-chain
3029  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3030  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3031  * 172.16.1.2/32
3032  *   unicast-ip4-chain
3033  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3034  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3035  * 172.16.2.0/24
3036  *   unicast-ip4-chain
3037  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3038  *     [0] [@4]: ipv4-glean: af_packet1
3039  * 172.16.2.1/32
3040  *   unicast-ip4-chain
3041  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3042  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3043  * 224.0.0.0/8
3044  *   unicast-ip4-chain
3045  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3046  *     [0] [@0]: dpo-drop ip6
3047  * 240.0.0.0/8
3048  *   unicast-ip4-chain
3049  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3050  *     [0] [@0]: dpo-drop ip6
3051  * 255.255.255.255/32
3052  *   unicast-ip4-chain
3053  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3054  *     [0] [@0]: dpo-drop ip6
3055  * @cliexend
3056 ?*/
3057 /* *INDENT-OFF* */
3058 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3059 {
3060   .path = "set ip flow-hash",
3061   .short_help =
3062   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3063   .function = set_ip_flow_hash_command_fn,
3064 };
3065 /* *INDENT-ON* */
3066
3067 int
3068 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3069                              u32 table_index)
3070 {
3071   vnet_main_t *vnm = vnet_get_main ();
3072   vnet_interface_main_t *im = &vnm->interface_main;
3073   ip4_main_t *ipm = &ip4_main;
3074   ip_lookup_main_t *lm = &ipm->lookup_main;
3075   vnet_classify_main_t *cm = &vnet_classify_main;
3076   ip4_address_t *if_addr;
3077
3078   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3079     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3080
3081   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3082     return VNET_API_ERROR_NO_SUCH_ENTRY;
3083
3084   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3085   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3086
3087   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3088
3089   if (NULL != if_addr)
3090     {
3091       fib_prefix_t pfx = {
3092         .fp_len = 32,
3093         .fp_proto = FIB_PROTOCOL_IP4,
3094         .fp_addr.ip4 = *if_addr,
3095       };
3096       u32 fib_index;
3097
3098       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3099                                                        sw_if_index);
3100
3101
3102       if (table_index != (u32) ~ 0)
3103         {
3104           dpo_id_t dpo = DPO_INVALID;
3105
3106           dpo_set (&dpo,
3107                    DPO_CLASSIFY,
3108                    DPO_PROTO_IP4,
3109                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3110
3111           fib_table_entry_special_dpo_add (fib_index,
3112                                            &pfx,
3113                                            FIB_SOURCE_CLASSIFY,
3114                                            FIB_ENTRY_FLAG_NONE, &dpo);
3115           dpo_reset (&dpo);
3116         }
3117       else
3118         {
3119           fib_table_entry_special_remove (fib_index,
3120                                           &pfx, FIB_SOURCE_CLASSIFY);
3121         }
3122     }
3123
3124   return 0;
3125 }
3126
3127 static clib_error_t *
3128 set_ip_classify_command_fn (vlib_main_t * vm,
3129                             unformat_input_t * input,
3130                             vlib_cli_command_t * cmd)
3131 {
3132   u32 table_index = ~0;
3133   int table_index_set = 0;
3134   u32 sw_if_index = ~0;
3135   int rv;
3136
3137   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3138     {
3139       if (unformat (input, "table-index %d", &table_index))
3140         table_index_set = 1;
3141       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3142                          vnet_get_main (), &sw_if_index))
3143         ;
3144       else
3145         break;
3146     }
3147
3148   if (table_index_set == 0)
3149     return clib_error_return (0, "classify table-index must be specified");
3150
3151   if (sw_if_index == ~0)
3152     return clib_error_return (0, "interface / subif must be specified");
3153
3154   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3155
3156   switch (rv)
3157     {
3158     case 0:
3159       break;
3160
3161     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3162       return clib_error_return (0, "No such interface");
3163
3164     case VNET_API_ERROR_NO_SUCH_ENTRY:
3165       return clib_error_return (0, "No such classifier table");
3166     }
3167   return 0;
3168 }
3169
3170 /*?
3171  * Assign a classification table to an interface. The classification
3172  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3173  * commands. Once the table is create, use this command to filter packets
3174  * on an interface.
3175  *
3176  * @cliexpar
3177  * Example of how to assign a classification table to an interface:
3178  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3179 ?*/
3180 /* *INDENT-OFF* */
3181 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3182 {
3183     .path = "set ip classify",
3184     .short_help =
3185     "set ip classify intfc <interface> table-index <classify-idx>",
3186     .function = set_ip_classify_command_fn,
3187 };
3188 /* *INDENT-ON* */
3189
3190 static clib_error_t *
3191 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3192 {
3193   ip4_main_t *im = &ip4_main;
3194   uword heapsize = 0;
3195
3196   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3197     {
3198       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3199         ;
3200       else
3201         return clib_error_return (0,
3202                                   "invalid heap-size parameter `%U'",
3203                                   format_unformat_error, input);
3204     }
3205
3206   im->mtrie_heap_size = heapsize;
3207
3208   return 0;
3209 }
3210
3211 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3212
3213 /*
3214  * fd.io coding-style-patch-verification: ON
3215  *
3216  * Local Variables:
3217  * eval: (c-set-style "gnu")
3218  * End:
3219  */