Add some indent-off to the node declarations in ip4-forward
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/load_balance_map.h>
53 #include <vnet/dpo/classify_dpo.h>
54 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
55
56 /**
57  * @file
58  * @brief IPv4 Forwarding.
59  *
60  * This file contains the source code for IPv4 forwarding.
61  */
62
63 always_inline uword
64 ip4_lookup_inline (vlib_main_t * vm,
65                    vlib_node_runtime_t * node,
66                    vlib_frame_t * frame,
67                    int lookup_for_responses_to_locally_received_packets)
68 {
69   ip4_main_t *im = &ip4_main;
70   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
71   u32 n_left_from, n_left_to_next, *from, *to_next;
72   ip_lookup_next_t next;
73   u32 thread_index = vlib_get_thread_index ();
74
75   from = vlib_frame_vector_args (frame);
76   n_left_from = frame->n_vectors;
77   next = node->cached_next_index;
78
79   while (n_left_from > 0)
80     {
81       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
82
83       while (n_left_from >= 8 && n_left_to_next >= 4)
84         {
85           vlib_buffer_t *p0, *p1, *p2, *p3;
86           ip4_header_t *ip0, *ip1, *ip2, *ip3;
87           ip_lookup_next_t next0, next1, next2, next3;
88           const load_balance_t *lb0, *lb1, *lb2, *lb3;
89           ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
90           ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
91           ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
92           u32 pi0, fib_index0, lb_index0;
93           u32 pi1, fib_index1, lb_index1;
94           u32 pi2, fib_index2, lb_index2;
95           u32 pi3, fib_index3, lb_index3;
96           flow_hash_config_t flow_hash_config0, flow_hash_config1;
97           flow_hash_config_t flow_hash_config2, flow_hash_config3;
98           u32 hash_c0, hash_c1, hash_c2, hash_c3;
99           const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
100
101           /* Prefetch next iteration. */
102           {
103             vlib_buffer_t *p4, *p5, *p6, *p7;
104
105             p4 = vlib_get_buffer (vm, from[4]);
106             p5 = vlib_get_buffer (vm, from[5]);
107             p6 = vlib_get_buffer (vm, from[6]);
108             p7 = vlib_get_buffer (vm, from[7]);
109
110             vlib_prefetch_buffer_header (p4, LOAD);
111             vlib_prefetch_buffer_header (p5, LOAD);
112             vlib_prefetch_buffer_header (p6, LOAD);
113             vlib_prefetch_buffer_header (p7, LOAD);
114
115             CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
116             CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
117             CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
118             CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
119           }
120
121           pi0 = to_next[0] = from[0];
122           pi1 = to_next[1] = from[1];
123           pi2 = to_next[2] = from[2];
124           pi3 = to_next[3] = from[3];
125
126           from += 4;
127           to_next += 4;
128           n_left_to_next -= 4;
129           n_left_from -= 4;
130
131           p0 = vlib_get_buffer (vm, pi0);
132           p1 = vlib_get_buffer (vm, pi1);
133           p2 = vlib_get_buffer (vm, pi2);
134           p3 = vlib_get_buffer (vm, pi3);
135
136           ip0 = vlib_buffer_get_current (p0);
137           ip1 = vlib_buffer_get_current (p1);
138           ip2 = vlib_buffer_get_current (p2);
139           ip3 = vlib_buffer_get_current (p3);
140
141           dst_addr0 = &ip0->dst_address;
142           dst_addr1 = &ip1->dst_address;
143           dst_addr2 = &ip2->dst_address;
144           dst_addr3 = &ip3->dst_address;
145
146           fib_index0 =
147             vec_elt (im->fib_index_by_sw_if_index,
148                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
149           fib_index1 =
150             vec_elt (im->fib_index_by_sw_if_index,
151                      vnet_buffer (p1)->sw_if_index[VLIB_RX]);
152           fib_index2 =
153             vec_elt (im->fib_index_by_sw_if_index,
154                      vnet_buffer (p2)->sw_if_index[VLIB_RX]);
155           fib_index3 =
156             vec_elt (im->fib_index_by_sw_if_index,
157                      vnet_buffer (p3)->sw_if_index[VLIB_RX]);
158           fib_index0 =
159             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
160              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
161           fib_index1 =
162             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
163              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
164           fib_index2 =
165             (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
166              (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
167           fib_index3 =
168             (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
169              (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
170
171
172           if (!lookup_for_responses_to_locally_received_packets)
173             {
174               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
175               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
176               mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
177               mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
178
179               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
180               leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
181               leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
182               leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
183             }
184
185           if (!lookup_for_responses_to_locally_received_packets)
186             {
187               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
188               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
189               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
190               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
191             }
192
193           if (!lookup_for_responses_to_locally_received_packets)
194             {
195               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
196               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
197               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
198               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
199             }
200
201           if (lookup_for_responses_to_locally_received_packets)
202             {
203               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
204               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
205               lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
206               lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
207             }
208           else
209             {
210               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
211               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
212               lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
213               lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
214             }
215
216           ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
217           lb0 = load_balance_get (lb_index0);
218           lb1 = load_balance_get (lb_index1);
219           lb2 = load_balance_get (lb_index2);
220           lb3 = load_balance_get (lb_index3);
221
222           ASSERT (lb0->lb_n_buckets > 0);
223           ASSERT (is_pow2 (lb0->lb_n_buckets));
224           ASSERT (lb1->lb_n_buckets > 0);
225           ASSERT (is_pow2 (lb1->lb_n_buckets));
226           ASSERT (lb2->lb_n_buckets > 0);
227           ASSERT (is_pow2 (lb2->lb_n_buckets));
228           ASSERT (lb3->lb_n_buckets > 0);
229           ASSERT (is_pow2 (lb3->lb_n_buckets));
230
231           /* Use flow hash to compute multipath adjacency. */
232           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
233           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
234           hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
235           hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
236           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
237             {
238               flow_hash_config0 = lb0->lb_hash_config;
239               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, flow_hash_config0);
241               dpo0 =
242                 load_balance_get_fwd_bucket (lb0,
243                                              (hash_c0 &
244                                               (lb0->lb_n_buckets_minus_1)));
245             }
246           else
247             {
248               dpo0 = load_balance_get_bucket_i (lb0, 0);
249             }
250           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
251             {
252               flow_hash_config1 = lb1->lb_hash_config;
253               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
254                 ip4_compute_flow_hash (ip1, flow_hash_config1);
255               dpo1 =
256                 load_balance_get_fwd_bucket (lb1,
257                                              (hash_c1 &
258                                               (lb1->lb_n_buckets_minus_1)));
259             }
260           else
261             {
262               dpo1 = load_balance_get_bucket_i (lb1, 0);
263             }
264           if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
265             {
266               flow_hash_config2 = lb2->lb_hash_config;
267               hash_c2 = vnet_buffer (p2)->ip.flow_hash =
268                 ip4_compute_flow_hash (ip2, flow_hash_config2);
269               dpo2 =
270                 load_balance_get_fwd_bucket (lb2,
271                                              (hash_c2 &
272                                               (lb2->lb_n_buckets_minus_1)));
273             }
274           else
275             {
276               dpo2 = load_balance_get_bucket_i (lb2, 0);
277             }
278           if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
279             {
280               flow_hash_config3 = lb3->lb_hash_config;
281               hash_c3 = vnet_buffer (p3)->ip.flow_hash =
282                 ip4_compute_flow_hash (ip3, flow_hash_config3);
283               dpo3 =
284                 load_balance_get_fwd_bucket (lb3,
285                                              (hash_c3 &
286                                               (lb3->lb_n_buckets_minus_1)));
287             }
288           else
289             {
290               dpo3 = load_balance_get_bucket_i (lb3, 0);
291             }
292
293           next0 = dpo0->dpoi_next_node;
294           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
295           next1 = dpo1->dpoi_next_node;
296           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
297           next2 = dpo2->dpoi_next_node;
298           vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
299           next3 = dpo3->dpoi_next_node;
300           vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
301
302           vlib_increment_combined_counter
303             (cm, thread_index, lb_index0, 1,
304              vlib_buffer_length_in_chain (vm, p0));
305           vlib_increment_combined_counter
306             (cm, thread_index, lb_index1, 1,
307              vlib_buffer_length_in_chain (vm, p1));
308           vlib_increment_combined_counter
309             (cm, thread_index, lb_index2, 1,
310              vlib_buffer_length_in_chain (vm, p2));
311           vlib_increment_combined_counter
312             (cm, thread_index, lb_index3, 1,
313              vlib_buffer_length_in_chain (vm, p3));
314
315           vlib_validate_buffer_enqueue_x4 (vm, node, next,
316                                            to_next, n_left_to_next,
317                                            pi0, pi1, pi2, pi3,
318                                            next0, next1, next2, next3);
319         }
320
321       while (n_left_from > 0 && n_left_to_next > 0)
322         {
323           vlib_buffer_t *p0;
324           ip4_header_t *ip0;
325           ip_lookup_next_t next0;
326           const load_balance_t *lb0;
327           ip4_fib_mtrie_t *mtrie0;
328           ip4_fib_mtrie_leaf_t leaf0;
329           ip4_address_t *dst_addr0;
330           u32 pi0, fib_index0, lbi0;
331           flow_hash_config_t flow_hash_config0;
332           const dpo_id_t *dpo0;
333           u32 hash_c0;
334
335           pi0 = from[0];
336           to_next[0] = pi0;
337
338           p0 = vlib_get_buffer (vm, pi0);
339
340           ip0 = vlib_buffer_get_current (p0);
341
342           dst_addr0 = &ip0->dst_address;
343
344           fib_index0 =
345             vec_elt (im->fib_index_by_sw_if_index,
346                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
347           fib_index0 =
348             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
349              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
350
351           if (!lookup_for_responses_to_locally_received_packets)
352             {
353               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
354
355               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
356             }
357
358           if (!lookup_for_responses_to_locally_received_packets)
359             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
360
361           if (!lookup_for_responses_to_locally_received_packets)
362             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
363
364           if (lookup_for_responses_to_locally_received_packets)
365             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
366           else
367             {
368               /* Handle default route. */
369               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
370             }
371
372           ASSERT (lbi0);
373           lb0 = load_balance_get (lbi0);
374
375           ASSERT (lb0->lb_n_buckets > 0);
376           ASSERT (is_pow2 (lb0->lb_n_buckets));
377
378           /* Use flow hash to compute multipath adjacency. */
379           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
380           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
381             {
382               flow_hash_config0 = lb0->lb_hash_config;
383
384               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
385                 ip4_compute_flow_hash (ip0, flow_hash_config0);
386               dpo0 =
387                 load_balance_get_fwd_bucket (lb0,
388                                              (hash_c0 &
389                                               (lb0->lb_n_buckets_minus_1)));
390             }
391           else
392             {
393               dpo0 = load_balance_get_bucket_i (lb0, 0);
394             }
395
396           next0 = dpo0->dpoi_next_node;
397           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
398
399           vlib_increment_combined_counter (cm, thread_index, lbi0, 1,
400                                            vlib_buffer_length_in_chain (vm,
401                                                                         p0));
402
403           from += 1;
404           to_next += 1;
405           n_left_to_next -= 1;
406           n_left_from -= 1;
407
408           if (PREDICT_FALSE (next0 != next))
409             {
410               n_left_to_next += 1;
411               vlib_put_next_frame (vm, node, next, n_left_to_next);
412               next = next0;
413               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
414               to_next[0] = pi0;
415               to_next += 1;
416               n_left_to_next -= 1;
417             }
418         }
419
420       vlib_put_next_frame (vm, node, next, n_left_to_next);
421     }
422
423   if (node->flags & VLIB_NODE_FLAG_TRACE)
424     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
425
426   return frame->n_vectors;
427 }
428
429 /** @brief IPv4 lookup node.
430     @node ip4-lookup
431
432     This is the main IPv4 lookup dispatch node.
433
434     @param vm vlib_main_t corresponding to the current thread
435     @param node vlib_node_runtime_t
436     @param frame vlib_frame_t whose contents should be dispatched
437
438     @par Graph mechanics: buffer metadata, next index usage
439
440     @em Uses:
441     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
442         - Indicates the @c sw_if_index value of the interface that the
443           packet was received on.
444     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
445         - When the value is @c ~0 then the node performs a longest prefix
446           match (LPM) for the packet destination address in the FIB attached
447           to the receive interface.
448         - Otherwise perform LPM for the packet destination address in the
449           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
450           value (0, 1, ...) and not a VRF id.
451
452     @em Sets:
453     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
454         - The lookup result adjacency index.
455
456     <em>Next Index:</em>
457     - Dispatches the packet to the node index found in
458       ip_adjacency_t @c adj->lookup_next_index
459       (where @c adj is the lookup result adjacency).
460 */
461 static uword
462 ip4_lookup (vlib_main_t * vm,
463             vlib_node_runtime_t * node, vlib_frame_t * frame)
464 {
465   return ip4_lookup_inline (vm, node, frame,
466                             /* lookup_for_responses_to_locally_received_packets */
467                             0);
468
469 }
470
471 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
472
473 /* *INDENT-OFF* */
474 VLIB_REGISTER_NODE (ip4_lookup_node) =
475 {
476   .function = ip4_lookup,
477   .name = "ip4-lookup",
478   .vector_size = sizeof (u32),
479   .format_trace = format_ip4_lookup_trace,
480   .n_next_nodes = IP_LOOKUP_N_NEXT,
481   .next_nodes = IP4_LOOKUP_NEXT_NODES,
482 };
483 /* *INDENT-ON* */
484
485 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
486
487 always_inline uword
488 ip4_load_balance (vlib_main_t * vm,
489                   vlib_node_runtime_t * node, vlib_frame_t * frame)
490 {
491   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
492   u32 n_left_from, n_left_to_next, *from, *to_next;
493   ip_lookup_next_t next;
494   u32 thread_index = vlib_get_thread_index ();
495
496   from = vlib_frame_vector_args (frame);
497   n_left_from = frame->n_vectors;
498   next = node->cached_next_index;
499
500   if (node->flags & VLIB_NODE_FLAG_TRACE)
501     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
502
503   while (n_left_from > 0)
504     {
505       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
506
507
508       while (n_left_from >= 4 && n_left_to_next >= 2)
509         {
510           ip_lookup_next_t next0, next1;
511           const load_balance_t *lb0, *lb1;
512           vlib_buffer_t *p0, *p1;
513           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
514           const ip4_header_t *ip0, *ip1;
515           const dpo_id_t *dpo0, *dpo1;
516
517           /* Prefetch next iteration. */
518           {
519             vlib_buffer_t *p2, *p3;
520
521             p2 = vlib_get_buffer (vm, from[2]);
522             p3 = vlib_get_buffer (vm, from[3]);
523
524             vlib_prefetch_buffer_header (p2, STORE);
525             vlib_prefetch_buffer_header (p3, STORE);
526
527             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
528             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
529           }
530
531           pi0 = to_next[0] = from[0];
532           pi1 = to_next[1] = from[1];
533
534           from += 2;
535           n_left_from -= 2;
536           to_next += 2;
537           n_left_to_next -= 2;
538
539           p0 = vlib_get_buffer (vm, pi0);
540           p1 = vlib_get_buffer (vm, pi1);
541
542           ip0 = vlib_buffer_get_current (p0);
543           ip1 = vlib_buffer_get_current (p1);
544           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
545           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
546
547           lb0 = load_balance_get (lbi0);
548           lb1 = load_balance_get (lbi1);
549
550           /*
551            * this node is for via FIBs we can re-use the hash value from the
552            * to node if present.
553            * We don't want to use the same hash value at each level in the recursion
554            * graph as that would lead to polarisation
555            */
556           hc0 = hc1 = 0;
557
558           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
559             {
560               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
561                 {
562                   hc0 = vnet_buffer (p0)->ip.flow_hash =
563                     vnet_buffer (p0)->ip.flow_hash >> 1;
564                 }
565               else
566                 {
567                   hc0 = vnet_buffer (p0)->ip.flow_hash =
568                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
569                 }
570               dpo0 = load_balance_get_fwd_bucket
571                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
572             }
573           else
574             {
575               dpo0 = load_balance_get_bucket_i (lb0, 0);
576             }
577           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
578             {
579               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
580                 {
581                   hc1 = vnet_buffer (p1)->ip.flow_hash =
582                     vnet_buffer (p1)->ip.flow_hash >> 1;
583                 }
584               else
585                 {
586                   hc1 = vnet_buffer (p1)->ip.flow_hash =
587                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
588                 }
589               dpo1 = load_balance_get_fwd_bucket
590                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
591             }
592           else
593             {
594               dpo1 = load_balance_get_bucket_i (lb1, 0);
595             }
596
597           next0 = dpo0->dpoi_next_node;
598           next1 = dpo1->dpoi_next_node;
599
600           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
601           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
602
603           vlib_increment_combined_counter
604             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
605           vlib_increment_combined_counter
606             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
607
608           vlib_validate_buffer_enqueue_x2 (vm, node, next,
609                                            to_next, n_left_to_next,
610                                            pi0, pi1, next0, next1);
611         }
612
613       while (n_left_from > 0 && n_left_to_next > 0)
614         {
615           ip_lookup_next_t next0;
616           const load_balance_t *lb0;
617           vlib_buffer_t *p0;
618           u32 pi0, lbi0, hc0;
619           const ip4_header_t *ip0;
620           const dpo_id_t *dpo0;
621
622           pi0 = from[0];
623           to_next[0] = pi0;
624           from += 1;
625           to_next += 1;
626           n_left_to_next -= 1;
627           n_left_from -= 1;
628
629           p0 = vlib_get_buffer (vm, pi0);
630
631           ip0 = vlib_buffer_get_current (p0);
632           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
633
634           lb0 = load_balance_get (lbi0);
635
636           hc0 = 0;
637           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
638             {
639               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
640                 {
641                   hc0 = vnet_buffer (p0)->ip.flow_hash =
642                     vnet_buffer (p0)->ip.flow_hash >> 1;
643                 }
644               else
645                 {
646                   hc0 = vnet_buffer (p0)->ip.flow_hash =
647                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
648                 }
649               dpo0 = load_balance_get_fwd_bucket
650                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
651             }
652           else
653             {
654               dpo0 = load_balance_get_bucket_i (lb0, 0);
655             }
656
657           next0 = dpo0->dpoi_next_node;
658           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
659
660           vlib_increment_combined_counter
661             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
662
663           vlib_validate_buffer_enqueue_x1 (vm, node, next,
664                                            to_next, n_left_to_next,
665                                            pi0, next0);
666         }
667
668       vlib_put_next_frame (vm, node, next, n_left_to_next);
669     }
670
671   return frame->n_vectors;
672 }
673
674 /* *INDENT-OFF* */
675 VLIB_REGISTER_NODE (ip4_load_balance_node) =
676 {
677   .function = ip4_load_balance,
678   .name = "ip4-load-balance",
679   .vector_size = sizeof (u32),
680   .sibling_of = "ip4-lookup",
681   .format_trace =
682   format_ip4_lookup_trace,
683 };
684 /* *INDENT-ON* */
685
686 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
687
688 /* get first interface address */
689 ip4_address_t *
690 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
691                              ip_interface_address_t ** result_ia)
692 {
693   ip_lookup_main_t *lm = &im->lookup_main;
694   ip_interface_address_t *ia = 0;
695   ip4_address_t *result = 0;
696
697   /* *INDENT-OFF* */
698   foreach_ip_interface_address
699     (lm, ia, sw_if_index,
700      1 /* honor unnumbered */ ,
701      ({
702        ip4_address_t * a =
703          ip_interface_address_get_address (lm, ia);
704        result = a;
705        break;
706      }));
707   /* *INDENT-OFF* */
708   if (result_ia)
709     *result_ia = result ? ia : 0;
710   return result;
711 }
712
713 static void
714 ip4_add_interface_routes (u32 sw_if_index,
715                           ip4_main_t * im, u32 fib_index,
716                           ip_interface_address_t * a)
717 {
718   ip_lookup_main_t *lm = &im->lookup_main;
719   ip4_address_t *address = ip_interface_address_get_address (lm, a);
720   fib_prefix_t pfx = {
721     .fp_len = a->address_length,
722     .fp_proto = FIB_PROTOCOL_IP4,
723     .fp_addr.ip4 = *address,
724   };
725
726   if (pfx.fp_len <= 30)
727     {
728       /* a /30 or shorter - add a glean for the network address */
729       fib_table_entry_update_one_path (fib_index, &pfx,
730                                        FIB_SOURCE_INTERFACE,
731                                        (FIB_ENTRY_FLAG_CONNECTED |
732                                         FIB_ENTRY_FLAG_ATTACHED),
733                                        DPO_PROTO_IP4,
734                                        /* No next-hop address */
735                                        NULL,
736                                        sw_if_index,
737                                        // invalid FIB index
738                                        ~0,
739                                        1,
740                                        // no out-label stack
741                                        NULL,
742                                        FIB_ROUTE_PATH_FLAG_NONE);
743
744       /* Add the two broadcast addresses as drop */
745       fib_prefix_t net_pfx = {
746         .fp_len = 32,
747         .fp_proto = FIB_PROTOCOL_IP4,
748         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
749       };
750       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
751         fib_table_entry_special_add(fib_index,
752                                     &net_pfx,
753                                     FIB_SOURCE_INTERFACE,
754                                     (FIB_ENTRY_FLAG_DROP |
755                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
756       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
757       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
758         fib_table_entry_special_add(fib_index,
759                                     &net_pfx,
760                                     FIB_SOURCE_INTERFACE,
761                                     (FIB_ENTRY_FLAG_DROP |
762                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
763     }
764   else if (pfx.fp_len == 31)
765     {
766       u32 mask = clib_host_to_net_u32(1);
767       fib_prefix_t net_pfx = pfx;
768
769       net_pfx.fp_len = 32;
770       net_pfx.fp_addr.ip4.as_u32 ^= mask;
771
772       /* a /31 - add the other end as an attached host */
773       fib_table_entry_update_one_path (fib_index, &net_pfx,
774                                        FIB_SOURCE_INTERFACE,
775                                        (FIB_ENTRY_FLAG_ATTACHED),
776                                        DPO_PROTO_IP4,
777                                        &net_pfx.fp_addr,
778                                        sw_if_index,
779                                        // invalid FIB index
780                                        ~0,
781                                        1,
782                                        NULL,
783                                        FIB_ROUTE_PATH_FLAG_NONE);
784     }
785   pfx.fp_len = 32;
786
787   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
788     {
789       u32 classify_table_index =
790         lm->classify_table_index_by_sw_if_index[sw_if_index];
791       if (classify_table_index != (u32) ~ 0)
792         {
793           dpo_id_t dpo = DPO_INVALID;
794
795           dpo_set (&dpo,
796                    DPO_CLASSIFY,
797                    DPO_PROTO_IP4,
798                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
799
800           fib_table_entry_special_dpo_add (fib_index,
801                                            &pfx,
802                                            FIB_SOURCE_CLASSIFY,
803                                            FIB_ENTRY_FLAG_NONE, &dpo);
804           dpo_reset (&dpo);
805         }
806     }
807
808   fib_table_entry_update_one_path (fib_index, &pfx,
809                                    FIB_SOURCE_INTERFACE,
810                                    (FIB_ENTRY_FLAG_CONNECTED |
811                                     FIB_ENTRY_FLAG_LOCAL),
812                                    DPO_PROTO_IP4,
813                                    &pfx.fp_addr,
814                                    sw_if_index,
815                                    // invalid FIB index
816                                    ~0,
817                                    1, NULL,
818                                    FIB_ROUTE_PATH_FLAG_NONE);
819 }
820
821 static void
822 ip4_del_interface_routes (ip4_main_t * im,
823                           u32 fib_index,
824                           ip4_address_t * address, u32 address_length)
825 {
826   fib_prefix_t pfx = {
827     .fp_len = address_length,
828     .fp_proto = FIB_PROTOCOL_IP4,
829     .fp_addr.ip4 = *address,
830   };
831
832   if (pfx.fp_len <= 30)
833     {
834       fib_prefix_t net_pfx = {
835         .fp_len = 32,
836         .fp_proto = FIB_PROTOCOL_IP4,
837         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
838       };
839       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
840         fib_table_entry_special_remove(fib_index,
841                                        &net_pfx,
842                                        FIB_SOURCE_INTERFACE);
843       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
844       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
845         fib_table_entry_special_remove(fib_index,
846                                        &net_pfx,
847                                        FIB_SOURCE_INTERFACE);
848       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
849     }
850     else if (pfx.fp_len == 31)
851     {
852       u32 mask = clib_host_to_net_u32(1);
853       fib_prefix_t net_pfx = pfx;
854
855       net_pfx.fp_len = 32;
856       net_pfx.fp_addr.ip4.as_u32 ^= mask;
857
858       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
859     }
860
861   pfx.fp_len = 32;
862   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
863 }
864
865 void
866 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
867 {
868   ip4_main_t *im = &ip4_main;
869
870   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
871
872   /*
873    * enable/disable only on the 1<->0 transition
874    */
875   if (is_enable)
876     {
877       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
878         return;
879     }
880   else
881     {
882       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
883       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
884         return;
885     }
886   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
887                                !is_enable, 0, 0);
888
889
890   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
891                                sw_if_index, !is_enable, 0, 0);
892 }
893
894 static clib_error_t *
895 ip4_add_del_interface_address_internal (vlib_main_t * vm,
896                                         u32 sw_if_index,
897                                         ip4_address_t * address,
898                                         u32 address_length, u32 is_del)
899 {
900   vnet_main_t *vnm = vnet_get_main ();
901   ip4_main_t *im = &ip4_main;
902   ip_lookup_main_t *lm = &im->lookup_main;
903   clib_error_t *error = 0;
904   u32 if_address_index, elts_before;
905   ip4_address_fib_t ip4_af, *addr_fib = 0;
906
907   /* local0 interface doesn't support IP addressing  */
908   if (sw_if_index == 0)
909     {
910       return
911        clib_error_create ("local0 interface doesn't support IP addressing");
912     }
913
914   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
915   ip4_addr_fib_init (&ip4_af, address,
916                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
917   vec_add1 (addr_fib, ip4_af);
918
919   /* FIXME-LATER
920    * there is no support for adj-fib handling in the presence of overlapping
921    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
922    * most routers do.
923    */
924   /* *INDENT-OFF* */
925   if (!is_del)
926     {
927       /* When adding an address check that it does not conflict
928          with an existing address. */
929       ip_interface_address_t *ia;
930       foreach_ip_interface_address
931         (&im->lookup_main, ia, sw_if_index,
932          0 /* honor unnumbered */ ,
933          ({
934            ip4_address_t * x =
935              ip_interface_address_get_address
936              (&im->lookup_main, ia);
937            if (ip4_destination_matches_route
938                (im, address, x, ia->address_length) ||
939                ip4_destination_matches_route (im,
940                                               x,
941                                               address,
942                                               address_length))
943              return
944                clib_error_create
945                ("failed to add %U which conflicts with %U for interface %U",
946                 format_ip4_address_and_length, address,
947                 address_length,
948                 format_ip4_address_and_length, x,
949                 ia->address_length,
950                 format_vnet_sw_if_index_name, vnm,
951                 sw_if_index);
952          }));
953     }
954   /* *INDENT-ON* */
955
956   elts_before = pool_elts (lm->if_address_pool);
957
958   error = ip_interface_address_add_del
959     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
960   if (error)
961     goto done;
962
963   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
964
965   if (is_del)
966     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
967   else
968     ip4_add_interface_routes (sw_if_index,
969                               im, ip4_af.fib_index,
970                               pool_elt_at_index
971                               (lm->if_address_pool, if_address_index));
972
973   /* If pool did not grow/shrink: add duplicate address. */
974   if (elts_before != pool_elts (lm->if_address_pool))
975     {
976       ip4_add_del_interface_address_callback_t *cb;
977       vec_foreach (cb, im->add_del_interface_address_callbacks)
978         cb->function (im, cb->function_opaque, sw_if_index,
979                       address, address_length, if_address_index, is_del);
980     }
981
982 done:
983   vec_free (addr_fib);
984   return error;
985 }
986
987 clib_error_t *
988 ip4_add_del_interface_address (vlib_main_t * vm,
989                                u32 sw_if_index,
990                                ip4_address_t * address,
991                                u32 address_length, u32 is_del)
992 {
993   return ip4_add_del_interface_address_internal
994     (vm, sw_if_index, address, address_length, is_del);
995 }
996
997 /* Built-in ip4 unicast rx feature path definition */
998 /* *INDENT-OFF* */
999 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
1000 {
1001   .arc_name = "ip4-unicast",
1002   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1003   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
1004 };
1005
1006 VNET_FEATURE_INIT (ip4_flow_classify, static) =
1007 {
1008   .arc_name = "ip4-unicast",
1009   .node_name = "ip4-flow-classify",
1010   .runs_before = VNET_FEATURES ("ip4-inacl"),
1011 };
1012
1013 VNET_FEATURE_INIT (ip4_inacl, static) =
1014 {
1015   .arc_name = "ip4-unicast",
1016   .node_name = "ip4-inacl",
1017   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
1018 };
1019
1020 VNET_FEATURE_INIT (ip4_source_check_1, static) =
1021 {
1022   .arc_name = "ip4-unicast",
1023   .node_name = "ip4-source-check-via-rx",
1024   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
1025 };
1026
1027 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1028 {
1029   .arc_name = "ip4-unicast",
1030   .node_name = "ip4-source-check-via-any",
1031   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1032 };
1033
1034 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1035 {
1036   .arc_name = "ip4-unicast",
1037   .node_name = "ip4-source-and-port-range-check-rx",
1038   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1039 };
1040
1041 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1042 {
1043   .arc_name = "ip4-unicast",
1044   .node_name = "ip4-policer-classify",
1045   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1046 };
1047
1048 VNET_FEATURE_INIT (ip4_ipsec, static) =
1049 {
1050   .arc_name = "ip4-unicast",
1051   .node_name = "ipsec-input-ip4",
1052   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1053 };
1054
1055 VNET_FEATURE_INIT (ip4_vpath, static) =
1056 {
1057   .arc_name = "ip4-unicast",
1058   .node_name = "vpath-input-ip4",
1059   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1060 };
1061
1062 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1063 {
1064   .arc_name = "ip4-unicast",
1065   .node_name = "ip4-vxlan-bypass",
1066   .runs_before = VNET_FEATURES ("ip4-lookup"),
1067 };
1068
1069 VNET_FEATURE_INIT (ip4_drop, static) =
1070 {
1071   .arc_name = "ip4-unicast",
1072   .node_name = "ip4-drop",
1073   .runs_before = VNET_FEATURES ("ip4-lookup"),
1074 };
1075
1076 VNET_FEATURE_INIT (ip4_lookup, static) =
1077 {
1078   .arc_name = "ip4-unicast",
1079   .node_name = "ip4-lookup",
1080   .runs_before = 0,     /* not before any other features */
1081 };
1082
1083 /* Built-in ip4 multicast rx feature path definition */
1084 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1085 {
1086   .arc_name = "ip4-multicast",
1087   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1088   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1089 };
1090
1091 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1092 {
1093   .arc_name = "ip4-multicast",
1094   .node_name = "vpath-input-ip4",
1095   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1096 };
1097
1098 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1099 {
1100   .arc_name = "ip4-multicast",
1101   .node_name = "ip4-drop",
1102   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1103 };
1104
1105 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1106 {
1107   .arc_name = "ip4-multicast",
1108   .node_name = "ip4-mfib-forward-lookup",
1109   .runs_before = 0,     /* last feature */
1110 };
1111
1112 /* Source and port-range check ip4 tx feature path definition */
1113 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1114 {
1115   .arc_name = "ip4-output",
1116   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1117   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1118 };
1119
1120 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1121 {
1122   .arc_name = "ip4-output",
1123   .node_name = "ip4-source-and-port-range-check-tx",
1124   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1125 };
1126
1127 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1128 {
1129   .arc_name = "ip4-output",
1130   .node_name = "ipsec-output-ip4",
1131   .runs_before = VNET_FEATURES ("interface-output"),
1132 };
1133
1134 /* Built-in ip4 tx feature path definition */
1135 VNET_FEATURE_INIT (ip4_interface_output, static) =
1136 {
1137   .arc_name = "ip4-output",
1138   .node_name = "interface-output",
1139   .runs_before = 0,     /* not before any other features */
1140 };
1141 /* *INDENT-ON* */
1142
1143 static clib_error_t *
1144 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1145 {
1146   ip4_main_t *im = &ip4_main;
1147
1148   /* Fill in lookup tables with default table (0). */
1149   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1150   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1151
1152   if (!is_add)
1153     {
1154       ip4_main_t *im4 = &ip4_main;
1155       ip_lookup_main_t *lm4 = &im4->lookup_main;
1156       ip_interface_address_t *ia = 0;
1157       ip4_address_t *address;
1158       vlib_main_t *vm = vlib_get_main ();
1159
1160       /* *INDENT-OFF* */
1161       foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */,
1162       ({
1163         address = ip_interface_address_get_address (lm4, ia);
1164         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1165       }));
1166       /* *INDENT-ON* */
1167     }
1168
1169   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1170                                is_add, 0, 0);
1171
1172   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1173                                is_add, 0, 0);
1174
1175   return /* no error */ 0;
1176 }
1177
1178 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1179
1180 /* Global IP4 main. */
1181 ip4_main_t ip4_main;
1182
1183 clib_error_t *
1184 ip4_lookup_init (vlib_main_t * vm)
1185 {
1186   ip4_main_t *im = &ip4_main;
1187   clib_error_t *error;
1188   uword i;
1189
1190   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1191     return error;
1192
1193   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1194     {
1195       u32 m;
1196
1197       if (i < 32)
1198         m = pow2_mask (i) << (32 - i);
1199       else
1200         m = ~0;
1201       im->fib_masks[i] = clib_host_to_net_u32 (m);
1202     }
1203
1204   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1205
1206   /* Create FIB with index 0 and table id of 0. */
1207   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1208                                      FIB_SOURCE_DEFAULT_ROUTE);
1209   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1210                                       MFIB_SOURCE_DEFAULT_ROUTE);
1211
1212   {
1213     pg_node_t *pn;
1214     pn = pg_get_node (ip4_lookup_node.index);
1215     pn->unformat_edit = unformat_pg_ip4_header;
1216   }
1217
1218   {
1219     ethernet_arp_header_t h;
1220
1221     memset (&h, 0, sizeof (h));
1222
1223     /* Set target ethernet address to all zeros. */
1224     memset (h.ip4_over_ethernet[1].ethernet, 0,
1225             sizeof (h.ip4_over_ethernet[1].ethernet));
1226
1227 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1228 #define _8(f,v) h.f = v;
1229     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1230     _16 (l3_type, ETHERNET_TYPE_IP4);
1231     _8 (n_l2_address_bytes, 6);
1232     _8 (n_l3_address_bytes, 4);
1233     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1234 #undef _16
1235 #undef _8
1236
1237     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1238                                /* data */ &h,
1239                                sizeof (h),
1240                                /* alloc chunk size */ 8,
1241                                "ip4 arp");
1242   }
1243
1244   return error;
1245 }
1246
1247 VLIB_INIT_FUNCTION (ip4_lookup_init);
1248
1249 typedef struct
1250 {
1251   /* Adjacency taken. */
1252   u32 dpo_index;
1253   u32 flow_hash;
1254   u32 fib_index;
1255
1256   /* Packet data, possibly *after* rewrite. */
1257   u8 packet_data[64 - 1 * sizeof (u32)];
1258 }
1259 ip4_forward_next_trace_t;
1260
1261 u8 *
1262 format_ip4_forward_next_trace (u8 * s, va_list * args)
1263 {
1264   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1265   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1266   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1267   u32 indent = format_get_indent (s);
1268   s = format (s, "%U%U",
1269               format_white_space, indent,
1270               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1271   return s;
1272 }
1273
1274 static u8 *
1275 format_ip4_lookup_trace (u8 * s, va_list * args)
1276 {
1277   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1278   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1279   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1280   u32 indent = format_get_indent (s);
1281
1282   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1283               t->fib_index, t->dpo_index, t->flow_hash);
1284   s = format (s, "\n%U%U",
1285               format_white_space, indent,
1286               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1287   return s;
1288 }
1289
1290 static u8 *
1291 format_ip4_rewrite_trace (u8 * s, va_list * args)
1292 {
1293   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1294   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1295   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1296   u32 indent = format_get_indent (s);
1297
1298   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1299               t->fib_index, t->dpo_index, format_ip_adjacency,
1300               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1301   s = format (s, "\n%U%U",
1302               format_white_space, indent,
1303               format_ip_adjacency_packet_data,
1304               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1305   return s;
1306 }
1307
1308 /* Common trace function for all ip4-forward next nodes. */
1309 void
1310 ip4_forward_next_trace (vlib_main_t * vm,
1311                         vlib_node_runtime_t * node,
1312                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1313 {
1314   u32 *from, n_left;
1315   ip4_main_t *im = &ip4_main;
1316
1317   n_left = frame->n_vectors;
1318   from = vlib_frame_vector_args (frame);
1319
1320   while (n_left >= 4)
1321     {
1322       u32 bi0, bi1;
1323       vlib_buffer_t *b0, *b1;
1324       ip4_forward_next_trace_t *t0, *t1;
1325
1326       /* Prefetch next iteration. */
1327       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1328       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1329
1330       bi0 = from[0];
1331       bi1 = from[1];
1332
1333       b0 = vlib_get_buffer (vm, bi0);
1334       b1 = vlib_get_buffer (vm, bi1);
1335
1336       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1337         {
1338           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1339           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1340           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1341           t0->fib_index =
1342             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1343              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1344             vec_elt (im->fib_index_by_sw_if_index,
1345                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1346
1347           clib_memcpy (t0->packet_data,
1348                        vlib_buffer_get_current (b0),
1349                        sizeof (t0->packet_data));
1350         }
1351       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1352         {
1353           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1354           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1355           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1356           t1->fib_index =
1357             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1358              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1359             vec_elt (im->fib_index_by_sw_if_index,
1360                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1361           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1362                        sizeof (t1->packet_data));
1363         }
1364       from += 2;
1365       n_left -= 2;
1366     }
1367
1368   while (n_left >= 1)
1369     {
1370       u32 bi0;
1371       vlib_buffer_t *b0;
1372       ip4_forward_next_trace_t *t0;
1373
1374       bi0 = from[0];
1375
1376       b0 = vlib_get_buffer (vm, bi0);
1377
1378       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1379         {
1380           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1381           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1382           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1383           t0->fib_index =
1384             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1385              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1386             vec_elt (im->fib_index_by_sw_if_index,
1387                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1388           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1389                        sizeof (t0->packet_data));
1390         }
1391       from += 1;
1392       n_left -= 1;
1393     }
1394 }
1395
1396 /* Compute TCP/UDP/ICMP4 checksum in software. */
1397 u16
1398 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1399                               ip4_header_t * ip0)
1400 {
1401   ip_csum_t sum0;
1402   u32 ip_header_length, payload_length_host_byte_order;
1403   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1404   u16 sum16;
1405   void *data_this_buffer;
1406
1407   /* Initialize checksum with ip header. */
1408   ip_header_length = ip4_header_bytes (ip0);
1409   payload_length_host_byte_order =
1410     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1411   sum0 =
1412     clib_host_to_net_u32 (payload_length_host_byte_order +
1413                           (ip0->protocol << 16));
1414
1415   if (BITS (uword) == 32)
1416     {
1417       sum0 =
1418         ip_csum_with_carry (sum0,
1419                             clib_mem_unaligned (&ip0->src_address, u32));
1420       sum0 =
1421         ip_csum_with_carry (sum0,
1422                             clib_mem_unaligned (&ip0->dst_address, u32));
1423     }
1424   else
1425     sum0 =
1426       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1427
1428   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1429   data_this_buffer = (void *) ip0 + ip_header_length;
1430   n_ip_bytes_this_buffer =
1431     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1432   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1433     {
1434       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1435         n_ip_bytes_this_buffer - ip_header_length : 0;
1436     }
1437   while (1)
1438     {
1439       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1440       n_bytes_left -= n_this_buffer;
1441       if (n_bytes_left == 0)
1442         break;
1443
1444       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1445       p0 = vlib_get_buffer (vm, p0->next_buffer);
1446       data_this_buffer = vlib_buffer_get_current (p0);
1447       n_this_buffer = p0->current_length;
1448     }
1449
1450   sum16 = ~ip_csum_fold (sum0);
1451
1452   return sum16;
1453 }
1454
1455 u32
1456 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1457 {
1458   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1459   udp_header_t *udp0;
1460   u16 sum16;
1461
1462   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1463           || ip0->protocol == IP_PROTOCOL_UDP);
1464
1465   udp0 = (void *) (ip0 + 1);
1466   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1467     {
1468       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1469                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1470       return p0->flags;
1471     }
1472
1473   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1474
1475   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1476                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1477
1478   return p0->flags;
1479 }
1480
1481 /* *INDENT-OFF* */
1482 VNET_FEATURE_ARC_INIT (ip4_local) =
1483 {
1484   .arc_name  = "ip4-local",
1485   .start_nodes = VNET_FEATURES ("ip4-local"),
1486 };
1487 /* *INDENT-ON* */
1488
1489 static inline void
1490 ip4_local_validate_l4 (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip,
1491                        u8 is_udp, u8 * error, u8 * good_tcp_udp)
1492 {
1493   u32 flags0;
1494   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1495   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1496   if (is_udp)
1497     {
1498       udp_header_t *udp;
1499       u32 ip_len, udp_len;
1500       i32 len_diff;
1501       udp = ip4_next_header (ip);
1502       /* Verify UDP length. */
1503       ip_len = clib_net_to_host_u16 (ip->length);
1504       udp_len = clib_net_to_host_u16 (udp->length);
1505
1506       len_diff = ip_len - udp_len;
1507       *good_tcp_udp &= len_diff >= 0;
1508       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1509     }
1510 }
1511
1512 #define ip4_local_do_l4_check(is_tcp_udp, flags)                        \
1513     (is_tcp_udp && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1514     || flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1515     || flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM))
1516
1517 static inline uword
1518 ip4_local_inline (vlib_main_t * vm,
1519                   vlib_node_runtime_t * node,
1520                   vlib_frame_t * frame, int head_of_feature_arc)
1521 {
1522   ip4_main_t *im = &ip4_main;
1523   ip_lookup_main_t *lm = &im->lookup_main;
1524   ip_local_next_t next_index;
1525   u32 *from, *to_next, n_left_from, n_left_to_next;
1526   vlib_node_runtime_t *error_node =
1527     vlib_node_get_runtime (vm, ip4_input_node.index);
1528   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1529
1530   from = vlib_frame_vector_args (frame);
1531   n_left_from = frame->n_vectors;
1532   next_index = node->cached_next_index;
1533
1534   if (node->flags & VLIB_NODE_FLAG_TRACE)
1535     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1536
1537   while (n_left_from > 0)
1538     {
1539       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1540
1541       while (n_left_from >= 4 && n_left_to_next >= 2)
1542         {
1543           vlib_buffer_t *p0, *p1;
1544           ip4_header_t *ip0, *ip1;
1545           ip4_fib_mtrie_t *mtrie0, *mtrie1;
1546           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1547           const dpo_id_t *dpo0, *dpo1;
1548           const load_balance_t *lb0, *lb1;
1549           u32 pi0, next0, fib_index0, lbi0;
1550           u32 pi1, next1, fib_index1, lbi1;
1551           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1552           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1553           u32 sw_if_index0, sw_if_index1;
1554
1555           pi0 = to_next[0] = from[0];
1556           pi1 = to_next[1] = from[1];
1557           from += 2;
1558           n_left_from -= 2;
1559           to_next += 2;
1560           n_left_to_next -= 2;
1561
1562           next0 = next1 = IP_LOCAL_NEXT_DROP;
1563           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1564
1565           p0 = vlib_get_buffer (vm, pi0);
1566           p1 = vlib_get_buffer (vm, pi1);
1567
1568           ip0 = vlib_buffer_get_current (p0);
1569           ip1 = vlib_buffer_get_current (p1);
1570
1571           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1572           vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
1573
1574           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1575           sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1576
1577           /* Treat IP frag packets as "experimental" protocol for now
1578              until support of IP frag reassembly is implemented */
1579           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1580           proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1581
1582           if (head_of_feature_arc == 0)
1583             goto skip_checks;
1584
1585           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1586           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1587           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1588           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1589
1590           good_tcp_udp0 =
1591             (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1592              || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1593                  || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1594           good_tcp_udp1 = (p1->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1595                            || (p1->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1596                                || p1->flags &
1597                                VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1598
1599           if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)
1600                              || ip4_local_do_l4_check (is_tcp_udp1,
1601                                                        p1->flags)))
1602             {
1603               if (is_tcp_udp0)
1604                 ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1605                                        &good_tcp_udp0);
1606               if (is_tcp_udp1)
1607                 ip4_local_validate_l4 (vm, p1, ip1, is_udp1, &error1,
1608                                        &good_tcp_udp1);
1609             }
1610
1611           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1612           error0 = (is_tcp_udp0 && !good_tcp_udp0
1613                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1614           error1 = (is_tcp_udp1 && !good_tcp_udp1
1615                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1616
1617           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1618           fib_index0 =
1619             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1620              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1621
1622           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1623           fib_index1 =
1624             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1625              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1626
1627           /* TODO maybe move to lookup? */
1628           vnet_buffer (p0)->ip.fib_index = fib_index0;
1629           vnet_buffer (p1)->ip.fib_index = fib_index1;
1630
1631           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1632           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1633
1634           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1635           leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1636           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1637                                              2);
1638           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1639                                              2);
1640           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1641                                              3);
1642           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1643                                              3);
1644
1645           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1646             ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1647           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1648
1649           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1650             ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1651           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1652
1653           lb0 = load_balance_get (lbi0);
1654           lb1 = load_balance_get (lbi1);
1655           dpo0 = load_balance_get_bucket_i (lb0, 0);
1656           dpo1 = load_balance_get_bucket_i (lb1, 0);
1657
1658           /*
1659            * Must have a route to source otherwise we drop the packet.
1660            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1661            *
1662            * The checks are:
1663            *  - the source is a recieve => it's from us => bogus, do this
1664            *    first since it sets a different error code.
1665            *  - uRPF check for any route to source - accept if passes.
1666            *  - allow packets destined to the broadcast address from unknown sources
1667            */
1668           if (p0->flags & VNET_BUFFER_F_IS_NATED)
1669             goto skip_check0;
1670
1671           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1672                      dpo0->dpoi_type == DPO_RECEIVE) ?
1673                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1674           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1675                      !fib_urpf_check_size (lb0->lb_urpf) &&
1676                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1677                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1678
1679         skip_check0:
1680           if (p1->flags & VNET_BUFFER_F_IS_NATED)
1681             goto skip_checks;
1682
1683           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1684                      dpo1->dpoi_type == DPO_RECEIVE) ?
1685                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1686           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1687                      !fib_urpf_check_size (lb1->lb_urpf) &&
1688                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1689                     ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1690
1691         skip_checks:
1692
1693           next0 = lm->local_next_by_ip_protocol[proto0];
1694           next1 = lm->local_next_by_ip_protocol[proto1];
1695
1696           next0 =
1697             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1698           next1 =
1699             error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1700
1701           p0->error = error0 ? error_node->errors[error0] : 0;
1702           p1->error = error1 ? error_node->errors[error1] : 0;
1703
1704           if (head_of_feature_arc)
1705             {
1706               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1707                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1708               if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1709                 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1710             }
1711
1712           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1713                                            n_left_to_next, pi0, pi1,
1714                                            next0, next1);
1715         }
1716
1717       while (n_left_from > 0 && n_left_to_next > 0)
1718         {
1719           vlib_buffer_t *p0;
1720           ip4_header_t *ip0;
1721           ip4_fib_mtrie_t *mtrie0;
1722           ip4_fib_mtrie_leaf_t leaf0;
1723           u32 pi0, next0, fib_index0, lbi0;
1724           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1725           load_balance_t *lb0;
1726           const dpo_id_t *dpo0;
1727           u32 sw_if_index0;
1728
1729           pi0 = to_next[0] = from[0];
1730           from += 1;
1731           n_left_from -= 1;
1732           to_next += 1;
1733           n_left_to_next -= 1;
1734
1735           next0 = IP_LOCAL_NEXT_DROP;
1736           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1737
1738           p0 = vlib_get_buffer (vm, pi0);
1739           ip0 = vlib_buffer_get_current (p0);
1740           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1741           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1742
1743           /* Treat IP frag packets as "experimental" protocol for now
1744              until support of IP frag reassembly is implemented */
1745           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1746
1747           if (head_of_feature_arc == 0 || p0->flags & VNET_BUFFER_F_IS_NATED)
1748             goto skip_check;
1749
1750           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1751           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1752
1753           good_tcp_udp0 =
1754             (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1755              || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1756                  || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1757
1758           if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)))
1759             {
1760               ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1761                                      &good_tcp_udp0);
1762             }
1763
1764           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1765           error0 = (is_tcp_udp0 && !good_tcp_udp0
1766                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1767
1768           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1769           fib_index0 =
1770             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1771              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1772           vnet_buffer (p0)->ip.fib_index = fib_index0;
1773           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1774           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1775           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1776                                              2);
1777           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1778                                              3);
1779           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1780           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1781           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1782
1783           lb0 = load_balance_get (lbi0);
1784           dpo0 = load_balance_get_bucket_i (lb0, 0);
1785
1786           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1787                      dpo0->dpoi_type == DPO_RECEIVE) ?
1788                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1789           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1790                      !fib_urpf_check_size (lb0->lb_urpf) &&
1791                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1792                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1793
1794         skip_check:
1795           next0 = lm->local_next_by_ip_protocol[proto0];
1796           next0 =
1797             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1798
1799           p0->error = error0 ? error_node->errors[error0] : 0;
1800
1801           if (head_of_feature_arc)
1802             {
1803               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1804                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1805             }
1806
1807           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1808                                            n_left_to_next, pi0, next0);
1809         }
1810       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1811     }
1812
1813   return frame->n_vectors;
1814 }
1815
1816 static uword
1817 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1818 {
1819   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1820 }
1821
1822 /* *INDENT-OFF* */
1823 VLIB_REGISTER_NODE (ip4_local_node) =
1824 {
1825   .function = ip4_local,
1826   .name = "ip4-local",
1827   .vector_size = sizeof (u32),
1828   .format_trace = format_ip4_forward_next_trace,
1829   .n_next_nodes = IP_LOCAL_N_NEXT,
1830   .next_nodes =
1831   {
1832     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1833     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1834     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1835     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1836   },
1837 };
1838 /* *INDENT-ON* */
1839
1840 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1841
1842 static uword
1843 ip4_local_end_of_arc (vlib_main_t * vm,
1844                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1845 {
1846   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1847 }
1848
1849 /* *INDENT-OFF* */
1850 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1851   .function = ip4_local_end_of_arc,
1852   .name = "ip4-local-end-of-arc",
1853   .vector_size = sizeof (u32),
1854
1855   .format_trace = format_ip4_forward_next_trace,
1856   .sibling_of = "ip4-local",
1857 };
1858
1859 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1860
1861 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1862   .arc_name = "ip4-local",
1863   .node_name = "ip4-local-end-of-arc",
1864   .runs_before = 0, /* not before any other features */
1865 };
1866 /* *INDENT-ON* */
1867
1868 void
1869 ip4_register_protocol (u32 protocol, u32 node_index)
1870 {
1871   vlib_main_t *vm = vlib_get_main ();
1872   ip4_main_t *im = &ip4_main;
1873   ip_lookup_main_t *lm = &im->lookup_main;
1874
1875   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1876   lm->local_next_by_ip_protocol[protocol] =
1877     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1878 }
1879
1880 static clib_error_t *
1881 show_ip_local_command_fn (vlib_main_t * vm,
1882                           unformat_input_t * input, vlib_cli_command_t * cmd)
1883 {
1884   ip4_main_t *im = &ip4_main;
1885   ip_lookup_main_t *lm = &im->lookup_main;
1886   int i;
1887
1888   vlib_cli_output (vm, "Protocols handled by ip4_local");
1889   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1890     {
1891       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1892         {
1893           u32 node_index = vlib_get_node (vm,
1894                                           ip4_local_node.index)->
1895             next_nodes[lm->local_next_by_ip_protocol[i]];
1896           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1897                            node_index);
1898         }
1899     }
1900   return 0;
1901 }
1902
1903
1904
1905 /*?
1906  * Display the set of protocols handled by the local IPv4 stack.
1907  *
1908  * @cliexpar
1909  * Example of how to display local protocol table:
1910  * @cliexstart{show ip local}
1911  * Protocols handled by ip4_local
1912  * 1
1913  * 17
1914  * 47
1915  * @cliexend
1916 ?*/
1917 /* *INDENT-OFF* */
1918 VLIB_CLI_COMMAND (show_ip_local, static) =
1919 {
1920   .path = "show ip local",
1921   .function = show_ip_local_command_fn,
1922   .short_help = "show ip local",
1923 };
1924 /* *INDENT-ON* */
1925
1926 always_inline uword
1927 ip4_arp_inline (vlib_main_t * vm,
1928                 vlib_node_runtime_t * node,
1929                 vlib_frame_t * frame, int is_glean)
1930 {
1931   vnet_main_t *vnm = vnet_get_main ();
1932   ip4_main_t *im = &ip4_main;
1933   ip_lookup_main_t *lm = &im->lookup_main;
1934   u32 *from, *to_next_drop;
1935   uword n_left_from, n_left_to_next_drop, next_index;
1936   static f64 time_last_seed_change = -1e100;
1937   static u32 hash_seeds[3];
1938   static uword hash_bitmap[256 / BITS (uword)];
1939   f64 time_now;
1940
1941   if (node->flags & VLIB_NODE_FLAG_TRACE)
1942     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1943
1944   time_now = vlib_time_now (vm);
1945   if (time_now - time_last_seed_change > 1e-3)
1946     {
1947       uword i;
1948       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1949                                             sizeof (hash_seeds));
1950       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1951         hash_seeds[i] = r[i];
1952
1953       /* Mark all hash keys as been no-seen before. */
1954       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1955         hash_bitmap[i] = 0;
1956
1957       time_last_seed_change = time_now;
1958     }
1959
1960   from = vlib_frame_vector_args (frame);
1961   n_left_from = frame->n_vectors;
1962   next_index = node->cached_next_index;
1963   if (next_index == IP4_ARP_NEXT_DROP)
1964     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1965
1966   while (n_left_from > 0)
1967     {
1968       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1969                            to_next_drop, n_left_to_next_drop);
1970
1971       while (n_left_from > 0 && n_left_to_next_drop > 0)
1972         {
1973           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1974           ip_adjacency_t *adj0;
1975           vlib_buffer_t *p0;
1976           ip4_header_t *ip0;
1977           uword bm0;
1978
1979           pi0 = from[0];
1980
1981           p0 = vlib_get_buffer (vm, pi0);
1982
1983           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1984           adj0 = adj_get (adj_index0);
1985           ip0 = vlib_buffer_get_current (p0);
1986
1987           a0 = hash_seeds[0];
1988           b0 = hash_seeds[1];
1989           c0 = hash_seeds[2];
1990
1991           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1992           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1993
1994           if (is_glean)
1995             {
1996               /*
1997                * this is the Glean case, so we are ARPing for the
1998                * packet's destination
1999                */
2000               a0 ^= ip0->dst_address.data_u32;
2001             }
2002           else
2003             {
2004               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2005             }
2006           b0 ^= sw_if_index0;
2007
2008           hash_v3_mix32 (a0, b0, c0);
2009           hash_v3_finalize32 (a0, b0, c0);
2010
2011           c0 &= BITS (hash_bitmap) - 1;
2012           m0 = (uword) 1 << (c0 % BITS (uword));
2013           c0 = c0 / BITS (uword);
2014
2015           bm0 = hash_bitmap[c0];
2016           drop0 = (bm0 & m0) != 0;
2017
2018           /* Mark it as seen. */
2019           hash_bitmap[c0] = bm0 | m0;
2020
2021           from += 1;
2022           n_left_from -= 1;
2023           to_next_drop[0] = pi0;
2024           to_next_drop += 1;
2025           n_left_to_next_drop -= 1;
2026
2027           p0->error =
2028             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2029                          IP4_ARP_ERROR_REQUEST_SENT];
2030
2031           /*
2032            * the adj has been updated to a rewrite but the node the DPO that got
2033            * us here hasn't - yet. no big deal. we'll drop while we wait.
2034            */
2035           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2036             continue;
2037
2038           if (drop0)
2039             continue;
2040
2041           /*
2042            * Can happen if the control-plane is programming tables
2043            * with traffic flowing; at least that's today's lame excuse.
2044            */
2045           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2046               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2047             {
2048               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2049             }
2050           else
2051             /* Send ARP request. */
2052             {
2053               u32 bi0 = 0;
2054               vlib_buffer_t *b0;
2055               ethernet_arp_header_t *h0;
2056               vnet_hw_interface_t *hw_if0;
2057
2058               h0 =
2059                 vlib_packet_template_get_packet (vm,
2060                                                  &im->ip4_arp_request_packet_template,
2061                                                  &bi0);
2062
2063               /* Seems we're out of buffers */
2064               if (PREDICT_FALSE (!h0))
2065                 continue;
2066
2067               /* Add rewrite/encap string for ARP packet. */
2068               vnet_rewrite_one_header (adj0[0], h0,
2069                                        sizeof (ethernet_header_t));
2070
2071               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2072
2073               /* Src ethernet address in ARP header. */
2074               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2075                            hw_if0->hw_address,
2076                            sizeof (h0->ip4_over_ethernet[0].ethernet));
2077
2078               if (is_glean)
2079                 {
2080                   /* The interface's source address is stashed in the Glean Adj */
2081                   h0->ip4_over_ethernet[0].ip4 =
2082                     adj0->sub_type.glean.receive_addr.ip4;
2083
2084                   /* Copy in destination address we are requesting. This is the
2085                    * glean case, so it's the packet's destination.*/
2086                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2087                     ip0->dst_address.data_u32;
2088                 }
2089               else
2090                 {
2091                   /* Src IP address in ARP header. */
2092                   if (ip4_src_address_for_packet (lm, sw_if_index0,
2093                                                   &h0->
2094                                                   ip4_over_ethernet[0].ip4))
2095                     {
2096                       /* No source address available */
2097                       p0->error =
2098                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2099                       vlib_buffer_free (vm, &bi0, 1);
2100                       continue;
2101                     }
2102
2103                   /* Copy in destination address we are requesting from the
2104                      incomplete adj */
2105                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2106                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
2107                 }
2108
2109               vlib_buffer_copy_trace_flag (vm, p0, bi0);
2110               b0 = vlib_get_buffer (vm, bi0);
2111               VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2112               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2113
2114               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2115
2116               vlib_set_next_frame_buffer (vm, node,
2117                                           adj0->rewrite_header.next_index,
2118                                           bi0);
2119             }
2120         }
2121
2122       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2123     }
2124
2125   return frame->n_vectors;
2126 }
2127
2128 static uword
2129 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2130 {
2131   return (ip4_arp_inline (vm, node, frame, 0));
2132 }
2133
2134 static uword
2135 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2136 {
2137   return (ip4_arp_inline (vm, node, frame, 1));
2138 }
2139
2140 static char *ip4_arp_error_strings[] = {
2141   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2142   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2143   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2144   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2145   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2146   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2147 };
2148
2149 /* *INDENT-OFF* */
2150 VLIB_REGISTER_NODE (ip4_arp_node) =
2151 {
2152   .function = ip4_arp,
2153   .name = "ip4-arp",
2154   .vector_size = sizeof (u32),
2155   .format_trace = format_ip4_forward_next_trace,
2156   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2157   .error_strings = ip4_arp_error_strings,
2158   .n_next_nodes = IP4_ARP_N_NEXT,
2159   .next_nodes =
2160   {
2161     [IP4_ARP_NEXT_DROP] = "error-drop",
2162   },
2163 };
2164
2165 VLIB_REGISTER_NODE (ip4_glean_node) =
2166 {
2167   .function = ip4_glean,
2168   .name = "ip4-glean",
2169   .vector_size = sizeof (u32),
2170   .format_trace = format_ip4_forward_next_trace,
2171   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2172   .error_strings = ip4_arp_error_strings,
2173   .n_next_nodes = IP4_ARP_N_NEXT,
2174   .next_nodes = {
2175   [IP4_ARP_NEXT_DROP] = "error-drop",
2176   },
2177 };
2178 /* *INDENT-ON* */
2179
2180 #define foreach_notrace_ip4_arp_error           \
2181 _(DROP)                                         \
2182 _(REQUEST_SENT)                                 \
2183 _(REPLICATE_DROP)                               \
2184 _(REPLICATE_FAIL)
2185
2186 clib_error_t *
2187 arp_notrace_init (vlib_main_t * vm)
2188 {
2189   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2190
2191   /* don't trace ARP request packets */
2192 #define _(a)                                    \
2193     vnet_pcap_drop_trace_filter_add_del         \
2194         (rt->errors[IP4_ARP_ERROR_##a],         \
2195          1 /* is_add */);
2196   foreach_notrace_ip4_arp_error;
2197 #undef _
2198   return 0;
2199 }
2200
2201 VLIB_INIT_FUNCTION (arp_notrace_init);
2202
2203
2204 /* Send an ARP request to see if given destination is reachable on given interface. */
2205 clib_error_t *
2206 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2207 {
2208   vnet_main_t *vnm = vnet_get_main ();
2209   ip4_main_t *im = &ip4_main;
2210   ethernet_arp_header_t *h;
2211   ip4_address_t *src;
2212   ip_interface_address_t *ia;
2213   ip_adjacency_t *adj;
2214   vnet_hw_interface_t *hi;
2215   vnet_sw_interface_t *si;
2216   vlib_buffer_t *b;
2217   adj_index_t ai;
2218   u32 bi = 0;
2219
2220   si = vnet_get_sw_interface (vnm, sw_if_index);
2221
2222   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2223     {
2224       return clib_error_return (0, "%U: interface %U down",
2225                                 format_ip4_address, dst,
2226                                 format_vnet_sw_if_index_name, vnm,
2227                                 sw_if_index);
2228     }
2229
2230   src =
2231     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2232   if (!src)
2233     {
2234       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2235       return clib_error_return
2236         (0,
2237          "no matching interface address for destination %U (interface %U)",
2238          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2239          sw_if_index);
2240     }
2241
2242   h = vlib_packet_template_get_packet (vm,
2243                                        &im->ip4_arp_request_packet_template,
2244                                        &bi);
2245
2246   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2247   if (PREDICT_FALSE (!hi->hw_address))
2248     {
2249       return clib_error_return (0, "%U: interface %U do not support ip probe",
2250                                 format_ip4_address, dst,
2251                                 format_vnet_sw_if_index_name, vnm,
2252                                 sw_if_index);
2253     }
2254
2255   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2256                sizeof (h->ip4_over_ethernet[0].ethernet));
2257
2258   h->ip4_over_ethernet[0].ip4 = src[0];
2259   h->ip4_over_ethernet[1].ip4 = dst[0];
2260
2261   b = vlib_get_buffer (vm, bi);
2262   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2263     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2264
2265   ip46_address_t nh = {
2266     .ip4 = *dst,
2267   };
2268
2269   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2270                             VNET_LINK_IP4, &nh, sw_if_index);
2271   adj = adj_get (ai);
2272
2273   /* Peer has been previously resolved, retrieve glean adj instead */
2274   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2275     {
2276       adj_unlock (ai);
2277       ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, sw_if_index, &nh);
2278       adj = adj_get (ai);
2279     }
2280
2281   /* Add encapsulation string for software interface (e.g. ethernet header). */
2282   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2283   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2284
2285   {
2286     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2287     u32 *to_next = vlib_frame_vector_args (f);
2288     to_next[0] = bi;
2289     f->n_vectors = 1;
2290     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2291   }
2292
2293   adj_unlock (ai);
2294   return /* no error */ 0;
2295 }
2296
2297 typedef enum
2298 {
2299   IP4_REWRITE_NEXT_DROP,
2300   IP4_REWRITE_NEXT_ICMP_ERROR,
2301 } ip4_rewrite_next_t;
2302
2303 always_inline uword
2304 ip4_rewrite_inline (vlib_main_t * vm,
2305                     vlib_node_runtime_t * node,
2306                     vlib_frame_t * frame,
2307                     int do_counters, int is_midchain, int is_mcast)
2308 {
2309   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2310   u32 *from = vlib_frame_vector_args (frame);
2311   u32 n_left_from, n_left_to_next, *to_next, next_index;
2312   vlib_node_runtime_t *error_node =
2313     vlib_node_get_runtime (vm, ip4_input_node.index);
2314
2315   n_left_from = frame->n_vectors;
2316   next_index = node->cached_next_index;
2317   u32 thread_index = vlib_get_thread_index ();
2318
2319   while (n_left_from > 0)
2320     {
2321       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2322
2323       while (n_left_from >= 4 && n_left_to_next >= 2)
2324         {
2325           ip_adjacency_t *adj0, *adj1;
2326           vlib_buffer_t *p0, *p1;
2327           ip4_header_t *ip0, *ip1;
2328           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2329           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2330           u32 tx_sw_if_index0, tx_sw_if_index1;
2331
2332           /* Prefetch next iteration. */
2333           {
2334             vlib_buffer_t *p2, *p3;
2335
2336             p2 = vlib_get_buffer (vm, from[2]);
2337             p3 = vlib_get_buffer (vm, from[3]);
2338
2339             vlib_prefetch_buffer_header (p2, STORE);
2340             vlib_prefetch_buffer_header (p3, STORE);
2341
2342             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2343             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2344           }
2345
2346           pi0 = to_next[0] = from[0];
2347           pi1 = to_next[1] = from[1];
2348
2349           from += 2;
2350           n_left_from -= 2;
2351           to_next += 2;
2352           n_left_to_next -= 2;
2353
2354           p0 = vlib_get_buffer (vm, pi0);
2355           p1 = vlib_get_buffer (vm, pi1);
2356
2357           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2358           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2359
2360           /*
2361            * pre-fetch the per-adjacency counters
2362            */
2363           if (do_counters)
2364             {
2365               vlib_prefetch_combined_counter (&adjacency_counters,
2366                                               thread_index, adj_index0);
2367               vlib_prefetch_combined_counter (&adjacency_counters,
2368                                               thread_index, adj_index1);
2369             }
2370
2371           ip0 = vlib_buffer_get_current (p0);
2372           ip1 = vlib_buffer_get_current (p1);
2373
2374           error0 = error1 = IP4_ERROR_NONE;
2375           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2376
2377           /* Decrement TTL & update checksum.
2378              Works either endian, so no need for byte swap. */
2379           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2380             {
2381               i32 ttl0 = ip0->ttl;
2382
2383               /* Input node should have reject packets with ttl 0. */
2384               ASSERT (ip0->ttl > 0);
2385
2386               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2387               checksum0 += checksum0 >= 0xffff;
2388
2389               ip0->checksum = checksum0;
2390               ttl0 -= 1;
2391               ip0->ttl = ttl0;
2392
2393               /*
2394                * If the ttl drops below 1 when forwarding, generate
2395                * an ICMP response.
2396                */
2397               if (PREDICT_FALSE (ttl0 <= 0))
2398                 {
2399                   error0 = IP4_ERROR_TIME_EXPIRED;
2400                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2401                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2402                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2403                                                0);
2404                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2405                 }
2406
2407               /* Verify checksum. */
2408               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2409                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2410             }
2411           else
2412             {
2413               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2414             }
2415           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2416             {
2417               i32 ttl1 = ip1->ttl;
2418
2419               /* Input node should have reject packets with ttl 0. */
2420               ASSERT (ip1->ttl > 0);
2421
2422               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2423               checksum1 += checksum1 >= 0xffff;
2424
2425               ip1->checksum = checksum1;
2426               ttl1 -= 1;
2427               ip1->ttl = ttl1;
2428
2429               /*
2430                * If the ttl drops below 1 when forwarding, generate
2431                * an ICMP response.
2432                */
2433               if (PREDICT_FALSE (ttl1 <= 0))
2434                 {
2435                   error1 = IP4_ERROR_TIME_EXPIRED;
2436                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2437                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2438                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2439                                                0);
2440                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2441                 }
2442
2443               /* Verify checksum. */
2444               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2445                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2446             }
2447           else
2448             {
2449               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2450             }
2451
2452           /* Rewrite packet header and updates lengths. */
2453           adj0 = adj_get (adj_index0);
2454           adj1 = adj_get (adj_index1);
2455
2456           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2457           rw_len0 = adj0[0].rewrite_header.data_bytes;
2458           rw_len1 = adj1[0].rewrite_header.data_bytes;
2459           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2460           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2461
2462           /* Check MTU of outgoing interface. */
2463           error0 =
2464             (vlib_buffer_length_in_chain (vm, p0) >
2465              adj0[0].
2466              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2467              error0);
2468           error1 =
2469             (vlib_buffer_length_in_chain (vm, p1) >
2470              adj1[0].
2471              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2472              error1);
2473
2474           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2475            * to see the IP headerr */
2476           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2477             {
2478               next0 = adj0[0].rewrite_header.next_index;
2479               p0->current_data -= rw_len0;
2480               p0->current_length += rw_len0;
2481               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2482               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2483
2484               if (PREDICT_FALSE
2485                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2486                 vnet_feature_arc_start (lm->output_feature_arc_index,
2487                                         tx_sw_if_index0, &next0, p0);
2488             }
2489           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2490             {
2491               next1 = adj1[0].rewrite_header.next_index;
2492               p1->current_data -= rw_len1;
2493               p1->current_length += rw_len1;
2494
2495               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2496               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2497
2498               if (PREDICT_FALSE
2499                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2500                 vnet_feature_arc_start (lm->output_feature_arc_index,
2501                                         tx_sw_if_index1, &next1, p1);
2502             }
2503
2504           /* Guess we are only writing on simple Ethernet header. */
2505           vnet_rewrite_two_headers (adj0[0], adj1[0],
2506                                     ip0, ip1, sizeof (ethernet_header_t));
2507
2508           /*
2509            * Bump the per-adjacency counters
2510            */
2511           if (do_counters)
2512             {
2513               vlib_increment_combined_counter
2514                 (&adjacency_counters,
2515                  thread_index,
2516                  adj_index0, 1,
2517                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2518
2519               vlib_increment_combined_counter
2520                 (&adjacency_counters,
2521                  thread_index,
2522                  adj_index1, 1,
2523                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2524             }
2525
2526           if (is_midchain)
2527             {
2528               adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2529               adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2530             }
2531           if (is_mcast)
2532             {
2533               /*
2534                * copy bytes from the IP address into the MAC rewrite
2535                */
2536               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2537               vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2538             }
2539
2540           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2541                                            to_next, n_left_to_next,
2542                                            pi0, pi1, next0, next1);
2543         }
2544
2545       while (n_left_from > 0 && n_left_to_next > 0)
2546         {
2547           ip_adjacency_t *adj0;
2548           vlib_buffer_t *p0;
2549           ip4_header_t *ip0;
2550           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2551           u32 tx_sw_if_index0;
2552
2553           pi0 = to_next[0] = from[0];
2554
2555           p0 = vlib_get_buffer (vm, pi0);
2556
2557           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2558
2559           adj0 = adj_get (adj_index0);
2560
2561           ip0 = vlib_buffer_get_current (p0);
2562
2563           error0 = IP4_ERROR_NONE;
2564           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2565
2566           /* Decrement TTL & update checksum. */
2567           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2568             {
2569               i32 ttl0 = ip0->ttl;
2570
2571               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2572
2573               checksum0 += checksum0 >= 0xffff;
2574
2575               ip0->checksum = checksum0;
2576
2577               ASSERT (ip0->ttl > 0);
2578
2579               ttl0 -= 1;
2580
2581               ip0->ttl = ttl0;
2582
2583               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2584                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2585
2586               if (PREDICT_FALSE (ttl0 <= 0))
2587                 {
2588                   /*
2589                    * If the ttl drops below 1 when forwarding, generate
2590                    * an ICMP response.
2591                    */
2592                   error0 = IP4_ERROR_TIME_EXPIRED;
2593                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2594                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2595                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2596                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2597                                                0);
2598                 }
2599             }
2600           else
2601             {
2602               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2603             }
2604
2605           if (do_counters)
2606             vlib_prefetch_combined_counter (&adjacency_counters,
2607                                             thread_index, adj_index0);
2608
2609           /* Guess we are only writing on simple Ethernet header. */
2610           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2611           if (is_mcast)
2612             {
2613               /*
2614                * copy bytes from the IP address into the MAC rewrite
2615                */
2616               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2617             }
2618
2619           /* Update packet buffer attributes/set output interface. */
2620           rw_len0 = adj0[0].rewrite_header.data_bytes;
2621           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2622
2623           if (do_counters)
2624             vlib_increment_combined_counter
2625               (&adjacency_counters,
2626                thread_index, adj_index0, 1,
2627                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2628
2629           /* Check MTU of outgoing interface. */
2630           error0 = (vlib_buffer_length_in_chain (vm, p0)
2631                     > adj0[0].rewrite_header.max_l3_packet_bytes
2632                     ? IP4_ERROR_MTU_EXCEEDED : error0);
2633
2634           p0->error = error_node->errors[error0];
2635
2636           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2637            * to see the IP headerr */
2638           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2639             {
2640               p0->current_data -= rw_len0;
2641               p0->current_length += rw_len0;
2642               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2643
2644               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2645               next0 = adj0[0].rewrite_header.next_index;
2646
2647               if (is_midchain)
2648                 {
2649                   adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2650                 }
2651
2652               if (PREDICT_FALSE
2653                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2654                 vnet_feature_arc_start (lm->output_feature_arc_index,
2655                                         tx_sw_if_index0, &next0, p0);
2656
2657             }
2658
2659           from += 1;
2660           n_left_from -= 1;
2661           to_next += 1;
2662           n_left_to_next -= 1;
2663
2664           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2665                                            to_next, n_left_to_next,
2666                                            pi0, next0);
2667         }
2668
2669       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2670     }
2671
2672   /* Need to do trace after rewrites to pick up new packet data. */
2673   if (node->flags & VLIB_NODE_FLAG_TRACE)
2674     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2675
2676   return frame->n_vectors;
2677 }
2678
2679
2680 /** @brief IPv4 rewrite node.
2681     @node ip4-rewrite
2682
2683     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2684     header checksum, fetch the ip adjacency, check the outbound mtu,
2685     apply the adjacency rewrite, and send pkts to the adjacency
2686     rewrite header's rewrite_next_index.
2687
2688     @param vm vlib_main_t corresponding to the current thread
2689     @param node vlib_node_runtime_t
2690     @param frame vlib_frame_t whose contents should be dispatched
2691
2692     @par Graph mechanics: buffer metadata, next index usage
2693
2694     @em Uses:
2695     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2696         - the rewrite adjacency index
2697     - <code>adj->lookup_next_index</code>
2698         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2699           the packet will be dropped.
2700     - <code>adj->rewrite_header</code>
2701         - Rewrite string length, rewrite string, next_index
2702
2703     @em Sets:
2704     - <code>b->current_data, b->current_length</code>
2705         - Updated net of applying the rewrite string
2706
2707     <em>Next Indices:</em>
2708     - <code> adj->rewrite_header.next_index </code>
2709       or @c error-drop
2710 */
2711 static uword
2712 ip4_rewrite (vlib_main_t * vm,
2713              vlib_node_runtime_t * node, vlib_frame_t * frame)
2714 {
2715   if (adj_are_counters_enabled ())
2716     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2717   else
2718     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2719 }
2720
2721 static uword
2722 ip4_midchain (vlib_main_t * vm,
2723               vlib_node_runtime_t * node, vlib_frame_t * frame)
2724 {
2725   if (adj_are_counters_enabled ())
2726     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2727   else
2728     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2729 }
2730
2731 static uword
2732 ip4_rewrite_mcast (vlib_main_t * vm,
2733                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2734 {
2735   if (adj_are_counters_enabled ())
2736     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2737   else
2738     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2739 }
2740
2741 static uword
2742 ip4_mcast_midchain (vlib_main_t * vm,
2743                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2744 {
2745   if (adj_are_counters_enabled ())
2746     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2747   else
2748     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2749 }
2750
2751 /* *INDENT-OFF* */
2752 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2753   .function = ip4_rewrite,
2754   .name = "ip4-rewrite",
2755   .vector_size = sizeof (u32),
2756
2757   .format_trace = format_ip4_rewrite_trace,
2758
2759   .n_next_nodes = 2,
2760   .next_nodes = {
2761     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2762     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2763   },
2764 };
2765 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2766
2767 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2768   .function = ip4_rewrite_mcast,
2769   .name = "ip4-rewrite-mcast",
2770   .vector_size = sizeof (u32),
2771
2772   .format_trace = format_ip4_rewrite_trace,
2773   .sibling_of = "ip4-rewrite",
2774 };
2775 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2776
2777 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2778   .function = ip4_mcast_midchain,
2779   .name = "ip4-mcast-midchain",
2780   .vector_size = sizeof (u32),
2781
2782   .format_trace = format_ip4_rewrite_trace,
2783   .sibling_of = "ip4-rewrite",
2784 };
2785 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2786
2787 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2788   .function = ip4_midchain,
2789   .name = "ip4-midchain",
2790   .vector_size = sizeof (u32),
2791   .format_trace = format_ip4_forward_next_trace,
2792   .sibling_of =  "ip4-rewrite",
2793 };
2794 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2795 /* *INDENT-ON */
2796
2797 int
2798 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2799 {
2800   ip4_fib_mtrie_t *mtrie0;
2801   ip4_fib_mtrie_leaf_t leaf0;
2802   u32 lbi0;
2803
2804   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2805
2806   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2807   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2808   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2809
2810   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2811
2812   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2813 }
2814
2815 static clib_error_t *
2816 test_lookup_command_fn (vlib_main_t * vm,
2817                         unformat_input_t * input, vlib_cli_command_t * cmd)
2818 {
2819   ip4_fib_t *fib;
2820   u32 table_id = 0;
2821   f64 count = 1;
2822   u32 n;
2823   int i;
2824   ip4_address_t ip4_base_address;
2825   u64 errors = 0;
2826
2827   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2828     {
2829       if (unformat (input, "table %d", &table_id))
2830         {
2831           /* Make sure the entry exists. */
2832           fib = ip4_fib_get (table_id);
2833           if ((fib) && (fib->index != table_id))
2834             return clib_error_return (0, "<fib-index> %d does not exist",
2835                                       table_id);
2836         }
2837       else if (unformat (input, "count %f", &count))
2838         ;
2839
2840       else if (unformat (input, "%U",
2841                          unformat_ip4_address, &ip4_base_address))
2842         ;
2843       else
2844         return clib_error_return (0, "unknown input `%U'",
2845                                   format_unformat_error, input);
2846     }
2847
2848   n = count;
2849
2850   for (i = 0; i < n; i++)
2851     {
2852       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2853         errors++;
2854
2855       ip4_base_address.as_u32 =
2856         clib_host_to_net_u32 (1 +
2857                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2858     }
2859
2860   if (errors)
2861     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2862   else
2863     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2864
2865   return 0;
2866 }
2867
2868 /*?
2869  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2870  * given FIB table to determine if there is a conflict with the
2871  * adjacency table. The fib-id can be determined by using the
2872  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2873  * of 0 is used.
2874  *
2875  * @todo This command uses fib-id, other commands use table-id (not
2876  * just a name, they are different indexes). Would like to change this
2877  * to table-id for consistency.
2878  *
2879  * @cliexpar
2880  * Example of how to run the test lookup command:
2881  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2882  * No errors in 2 lookups
2883  * @cliexend
2884 ?*/
2885 /* *INDENT-OFF* */
2886 VLIB_CLI_COMMAND (lookup_test_command, static) =
2887 {
2888   .path = "test lookup",
2889   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2890   .function = test_lookup_command_fn,
2891 };
2892 /* *INDENT-ON* */
2893
2894 int
2895 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2896 {
2897   u32 fib_index;
2898
2899   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2900
2901   if (~0 == fib_index)
2902     return VNET_API_ERROR_NO_SUCH_FIB;
2903
2904   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2905                                   flow_hash_config);
2906
2907   return 0;
2908 }
2909
2910 static clib_error_t *
2911 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2912                              unformat_input_t * input,
2913                              vlib_cli_command_t * cmd)
2914 {
2915   int matched = 0;
2916   u32 table_id = 0;
2917   u32 flow_hash_config = 0;
2918   int rv;
2919
2920   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2921     {
2922       if (unformat (input, "table %d", &table_id))
2923         matched = 1;
2924 #define _(a,v) \
2925     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2926       foreach_flow_hash_bit
2927 #undef _
2928         else
2929         break;
2930     }
2931
2932   if (matched == 0)
2933     return clib_error_return (0, "unknown input `%U'",
2934                               format_unformat_error, input);
2935
2936   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2937   switch (rv)
2938     {
2939     case 0:
2940       break;
2941
2942     case VNET_API_ERROR_NO_SUCH_FIB:
2943       return clib_error_return (0, "no such FIB table %d", table_id);
2944
2945     default:
2946       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2947       break;
2948     }
2949
2950   return 0;
2951 }
2952
2953 /*?
2954  * Configure the set of IPv4 fields used by the flow hash.
2955  *
2956  * @cliexpar
2957  * Example of how to set the flow hash on a given table:
2958  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2959  * Example of display the configured flow hash:
2960  * @cliexstart{show ip fib}
2961  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2962  * 0.0.0.0/0
2963  *   unicast-ip4-chain
2964  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2965  *     [0] [@0]: dpo-drop ip6
2966  * 0.0.0.0/32
2967  *   unicast-ip4-chain
2968  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2969  *     [0] [@0]: dpo-drop ip6
2970  * 224.0.0.0/8
2971  *   unicast-ip4-chain
2972  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2973  *     [0] [@0]: dpo-drop ip6
2974  * 6.0.1.2/32
2975  *   unicast-ip4-chain
2976  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2977  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2978  * 7.0.0.1/32
2979  *   unicast-ip4-chain
2980  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2981  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2982  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2983  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2984  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2985  * 240.0.0.0/8
2986  *   unicast-ip4-chain
2987  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2988  *     [0] [@0]: dpo-drop ip6
2989  * 255.255.255.255/32
2990  *   unicast-ip4-chain
2991  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2992  *     [0] [@0]: dpo-drop ip6
2993  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2994  * 0.0.0.0/0
2995  *   unicast-ip4-chain
2996  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2997  *     [0] [@0]: dpo-drop ip6
2998  * 0.0.0.0/32
2999  *   unicast-ip4-chain
3000  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3001  *     [0] [@0]: dpo-drop ip6
3002  * 172.16.1.0/24
3003  *   unicast-ip4-chain
3004  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3005  *     [0] [@4]: ipv4-glean: af_packet0
3006  * 172.16.1.1/32
3007  *   unicast-ip4-chain
3008  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3009  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3010  * 172.16.1.2/32
3011  *   unicast-ip4-chain
3012  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3013  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3014  * 172.16.2.0/24
3015  *   unicast-ip4-chain
3016  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3017  *     [0] [@4]: ipv4-glean: af_packet1
3018  * 172.16.2.1/32
3019  *   unicast-ip4-chain
3020  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3021  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3022  * 224.0.0.0/8
3023  *   unicast-ip4-chain
3024  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3025  *     [0] [@0]: dpo-drop ip6
3026  * 240.0.0.0/8
3027  *   unicast-ip4-chain
3028  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3029  *     [0] [@0]: dpo-drop ip6
3030  * 255.255.255.255/32
3031  *   unicast-ip4-chain
3032  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3033  *     [0] [@0]: dpo-drop ip6
3034  * @cliexend
3035 ?*/
3036 /* *INDENT-OFF* */
3037 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3038 {
3039   .path = "set ip flow-hash",
3040   .short_help =
3041   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3042   .function = set_ip_flow_hash_command_fn,
3043 };
3044 /* *INDENT-ON* */
3045
3046 int
3047 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3048                              u32 table_index)
3049 {
3050   vnet_main_t *vnm = vnet_get_main ();
3051   vnet_interface_main_t *im = &vnm->interface_main;
3052   ip4_main_t *ipm = &ip4_main;
3053   ip_lookup_main_t *lm = &ipm->lookup_main;
3054   vnet_classify_main_t *cm = &vnet_classify_main;
3055   ip4_address_t *if_addr;
3056
3057   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3058     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3059
3060   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3061     return VNET_API_ERROR_NO_SUCH_ENTRY;
3062
3063   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3064   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3065
3066   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3067
3068   if (NULL != if_addr)
3069     {
3070       fib_prefix_t pfx = {
3071         .fp_len = 32,
3072         .fp_proto = FIB_PROTOCOL_IP4,
3073         .fp_addr.ip4 = *if_addr,
3074       };
3075       u32 fib_index;
3076
3077       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3078                                                        sw_if_index);
3079
3080
3081       if (table_index != (u32) ~ 0)
3082         {
3083           dpo_id_t dpo = DPO_INVALID;
3084
3085           dpo_set (&dpo,
3086                    DPO_CLASSIFY,
3087                    DPO_PROTO_IP4,
3088                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3089
3090           fib_table_entry_special_dpo_add (fib_index,
3091                                            &pfx,
3092                                            FIB_SOURCE_CLASSIFY,
3093                                            FIB_ENTRY_FLAG_NONE, &dpo);
3094           dpo_reset (&dpo);
3095         }
3096       else
3097         {
3098           fib_table_entry_special_remove (fib_index,
3099                                           &pfx, FIB_SOURCE_CLASSIFY);
3100         }
3101     }
3102
3103   return 0;
3104 }
3105
3106 static clib_error_t *
3107 set_ip_classify_command_fn (vlib_main_t * vm,
3108                             unformat_input_t * input,
3109                             vlib_cli_command_t * cmd)
3110 {
3111   u32 table_index = ~0;
3112   int table_index_set = 0;
3113   u32 sw_if_index = ~0;
3114   int rv;
3115
3116   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3117     {
3118       if (unformat (input, "table-index %d", &table_index))
3119         table_index_set = 1;
3120       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3121                          vnet_get_main (), &sw_if_index))
3122         ;
3123       else
3124         break;
3125     }
3126
3127   if (table_index_set == 0)
3128     return clib_error_return (0, "classify table-index must be specified");
3129
3130   if (sw_if_index == ~0)
3131     return clib_error_return (0, "interface / subif must be specified");
3132
3133   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3134
3135   switch (rv)
3136     {
3137     case 0:
3138       break;
3139
3140     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3141       return clib_error_return (0, "No such interface");
3142
3143     case VNET_API_ERROR_NO_SUCH_ENTRY:
3144       return clib_error_return (0, "No such classifier table");
3145     }
3146   return 0;
3147 }
3148
3149 /*?
3150  * Assign a classification table to an interface. The classification
3151  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3152  * commands. Once the table is create, use this command to filter packets
3153  * on an interface.
3154  *
3155  * @cliexpar
3156  * Example of how to assign a classification table to an interface:
3157  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3158 ?*/
3159 /* *INDENT-OFF* */
3160 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3161 {
3162     .path = "set ip classify",
3163     .short_help =
3164     "set ip classify intfc <interface> table-index <classify-idx>",
3165     .function = set_ip_classify_command_fn,
3166 };
3167 /* *INDENT-ON* */
3168
3169 /*
3170  * fd.io coding-style-patch-verification: ON
3171  *
3172  * Local Variables:
3173  * eval: (c-set-style "gnu")
3174  * End:
3175  */