classifier-based ACL: refactor + add output ACL
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/load_balance_map.h>
53 #include <vnet/dpo/classify_dpo.h>
54 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
55
56 /**
57  * @file
58  * @brief IPv4 Forwarding.
59  *
60  * This file contains the source code for IPv4 forwarding.
61  */
62
63 always_inline uword
64 ip4_lookup_inline (vlib_main_t * vm,
65                    vlib_node_runtime_t * node,
66                    vlib_frame_t * frame,
67                    int lookup_for_responses_to_locally_received_packets)
68 {
69   ip4_main_t *im = &ip4_main;
70   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
71   u32 n_left_from, n_left_to_next, *from, *to_next;
72   ip_lookup_next_t next;
73   u32 thread_index = vlib_get_thread_index ();
74
75   from = vlib_frame_vector_args (frame);
76   n_left_from = frame->n_vectors;
77   next = node->cached_next_index;
78
79   while (n_left_from > 0)
80     {
81       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
82
83       while (n_left_from >= 8 && n_left_to_next >= 4)
84         {
85           vlib_buffer_t *p0, *p1, *p2, *p3;
86           ip4_header_t *ip0, *ip1, *ip2, *ip3;
87           ip_lookup_next_t next0, next1, next2, next3;
88           const load_balance_t *lb0, *lb1, *lb2, *lb3;
89           ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
90           ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
91           ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
92           u32 pi0, fib_index0, lb_index0;
93           u32 pi1, fib_index1, lb_index1;
94           u32 pi2, fib_index2, lb_index2;
95           u32 pi3, fib_index3, lb_index3;
96           flow_hash_config_t flow_hash_config0, flow_hash_config1;
97           flow_hash_config_t flow_hash_config2, flow_hash_config3;
98           u32 hash_c0, hash_c1, hash_c2, hash_c3;
99           const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
100
101           /* Prefetch next iteration. */
102           {
103             vlib_buffer_t *p4, *p5, *p6, *p7;
104
105             p4 = vlib_get_buffer (vm, from[4]);
106             p5 = vlib_get_buffer (vm, from[5]);
107             p6 = vlib_get_buffer (vm, from[6]);
108             p7 = vlib_get_buffer (vm, from[7]);
109
110             vlib_prefetch_buffer_header (p4, LOAD);
111             vlib_prefetch_buffer_header (p5, LOAD);
112             vlib_prefetch_buffer_header (p6, LOAD);
113             vlib_prefetch_buffer_header (p7, LOAD);
114
115             CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
116             CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
117             CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
118             CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
119           }
120
121           pi0 = to_next[0] = from[0];
122           pi1 = to_next[1] = from[1];
123           pi2 = to_next[2] = from[2];
124           pi3 = to_next[3] = from[3];
125
126           from += 4;
127           to_next += 4;
128           n_left_to_next -= 4;
129           n_left_from -= 4;
130
131           p0 = vlib_get_buffer (vm, pi0);
132           p1 = vlib_get_buffer (vm, pi1);
133           p2 = vlib_get_buffer (vm, pi2);
134           p3 = vlib_get_buffer (vm, pi3);
135
136           ip0 = vlib_buffer_get_current (p0);
137           ip1 = vlib_buffer_get_current (p1);
138           ip2 = vlib_buffer_get_current (p2);
139           ip3 = vlib_buffer_get_current (p3);
140
141           dst_addr0 = &ip0->dst_address;
142           dst_addr1 = &ip1->dst_address;
143           dst_addr2 = &ip2->dst_address;
144           dst_addr3 = &ip3->dst_address;
145
146           fib_index0 =
147             vec_elt (im->fib_index_by_sw_if_index,
148                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
149           fib_index1 =
150             vec_elt (im->fib_index_by_sw_if_index,
151                      vnet_buffer (p1)->sw_if_index[VLIB_RX]);
152           fib_index2 =
153             vec_elt (im->fib_index_by_sw_if_index,
154                      vnet_buffer (p2)->sw_if_index[VLIB_RX]);
155           fib_index3 =
156             vec_elt (im->fib_index_by_sw_if_index,
157                      vnet_buffer (p3)->sw_if_index[VLIB_RX]);
158           fib_index0 =
159             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
160              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
161           fib_index1 =
162             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
163              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
164           fib_index2 =
165             (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
166              (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
167           fib_index3 =
168             (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
169              (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
170
171
172           if (!lookup_for_responses_to_locally_received_packets)
173             {
174               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
175               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
176               mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
177               mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
178
179               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
180               leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
181               leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
182               leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
183             }
184
185           if (!lookup_for_responses_to_locally_received_packets)
186             {
187               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
188               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
189               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
190               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
191             }
192
193           if (!lookup_for_responses_to_locally_received_packets)
194             {
195               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
196               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
197               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
198               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
199             }
200
201           if (lookup_for_responses_to_locally_received_packets)
202             {
203               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
204               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
205               lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
206               lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
207             }
208           else
209             {
210               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
211               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
212               lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
213               lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
214             }
215
216           ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
217           lb0 = load_balance_get (lb_index0);
218           lb1 = load_balance_get (lb_index1);
219           lb2 = load_balance_get (lb_index2);
220           lb3 = load_balance_get (lb_index3);
221
222           ASSERT (lb0->lb_n_buckets > 0);
223           ASSERT (is_pow2 (lb0->lb_n_buckets));
224           ASSERT (lb1->lb_n_buckets > 0);
225           ASSERT (is_pow2 (lb1->lb_n_buckets));
226           ASSERT (lb2->lb_n_buckets > 0);
227           ASSERT (is_pow2 (lb2->lb_n_buckets));
228           ASSERT (lb3->lb_n_buckets > 0);
229           ASSERT (is_pow2 (lb3->lb_n_buckets));
230
231           /* Use flow hash to compute multipath adjacency. */
232           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
233           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
234           hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
235           hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
236           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
237             {
238               flow_hash_config0 = lb0->lb_hash_config;
239               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, flow_hash_config0);
241               dpo0 =
242                 load_balance_get_fwd_bucket (lb0,
243                                              (hash_c0 &
244                                               (lb0->lb_n_buckets_minus_1)));
245             }
246           else
247             {
248               dpo0 = load_balance_get_bucket_i (lb0, 0);
249             }
250           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
251             {
252               flow_hash_config1 = lb1->lb_hash_config;
253               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
254                 ip4_compute_flow_hash (ip1, flow_hash_config1);
255               dpo1 =
256                 load_balance_get_fwd_bucket (lb1,
257                                              (hash_c1 &
258                                               (lb1->lb_n_buckets_minus_1)));
259             }
260           else
261             {
262               dpo1 = load_balance_get_bucket_i (lb1, 0);
263             }
264           if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
265             {
266               flow_hash_config2 = lb2->lb_hash_config;
267               hash_c2 = vnet_buffer (p2)->ip.flow_hash =
268                 ip4_compute_flow_hash (ip2, flow_hash_config2);
269               dpo2 =
270                 load_balance_get_fwd_bucket (lb2,
271                                              (hash_c2 &
272                                               (lb2->lb_n_buckets_minus_1)));
273             }
274           else
275             {
276               dpo2 = load_balance_get_bucket_i (lb2, 0);
277             }
278           if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
279             {
280               flow_hash_config3 = lb3->lb_hash_config;
281               hash_c3 = vnet_buffer (p3)->ip.flow_hash =
282                 ip4_compute_flow_hash (ip3, flow_hash_config3);
283               dpo3 =
284                 load_balance_get_fwd_bucket (lb3,
285                                              (hash_c3 &
286                                               (lb3->lb_n_buckets_minus_1)));
287             }
288           else
289             {
290               dpo3 = load_balance_get_bucket_i (lb3, 0);
291             }
292
293           next0 = dpo0->dpoi_next_node;
294           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
295           next1 = dpo1->dpoi_next_node;
296           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
297           next2 = dpo2->dpoi_next_node;
298           vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
299           next3 = dpo3->dpoi_next_node;
300           vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
301
302           vlib_increment_combined_counter
303             (cm, thread_index, lb_index0, 1,
304              vlib_buffer_length_in_chain (vm, p0));
305           vlib_increment_combined_counter
306             (cm, thread_index, lb_index1, 1,
307              vlib_buffer_length_in_chain (vm, p1));
308           vlib_increment_combined_counter
309             (cm, thread_index, lb_index2, 1,
310              vlib_buffer_length_in_chain (vm, p2));
311           vlib_increment_combined_counter
312             (cm, thread_index, lb_index3, 1,
313              vlib_buffer_length_in_chain (vm, p3));
314
315           vlib_validate_buffer_enqueue_x4 (vm, node, next,
316                                            to_next, n_left_to_next,
317                                            pi0, pi1, pi2, pi3,
318                                            next0, next1, next2, next3);
319         }
320
321       while (n_left_from > 0 && n_left_to_next > 0)
322         {
323           vlib_buffer_t *p0;
324           ip4_header_t *ip0;
325           ip_lookup_next_t next0;
326           const load_balance_t *lb0;
327           ip4_fib_mtrie_t *mtrie0;
328           ip4_fib_mtrie_leaf_t leaf0;
329           ip4_address_t *dst_addr0;
330           u32 pi0, fib_index0, lbi0;
331           flow_hash_config_t flow_hash_config0;
332           const dpo_id_t *dpo0;
333           u32 hash_c0;
334
335           pi0 = from[0];
336           to_next[0] = pi0;
337
338           p0 = vlib_get_buffer (vm, pi0);
339
340           ip0 = vlib_buffer_get_current (p0);
341
342           dst_addr0 = &ip0->dst_address;
343
344           fib_index0 =
345             vec_elt (im->fib_index_by_sw_if_index,
346                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
347           fib_index0 =
348             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
349              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
350
351           if (!lookup_for_responses_to_locally_received_packets)
352             {
353               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
354
355               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
356             }
357
358           if (!lookup_for_responses_to_locally_received_packets)
359             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
360
361           if (!lookup_for_responses_to_locally_received_packets)
362             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
363
364           if (lookup_for_responses_to_locally_received_packets)
365             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
366           else
367             {
368               /* Handle default route. */
369               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
370             }
371
372           ASSERT (lbi0);
373           lb0 = load_balance_get (lbi0);
374
375           ASSERT (lb0->lb_n_buckets > 0);
376           ASSERT (is_pow2 (lb0->lb_n_buckets));
377
378           /* Use flow hash to compute multipath adjacency. */
379           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
380           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
381             {
382               flow_hash_config0 = lb0->lb_hash_config;
383
384               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
385                 ip4_compute_flow_hash (ip0, flow_hash_config0);
386               dpo0 =
387                 load_balance_get_fwd_bucket (lb0,
388                                              (hash_c0 &
389                                               (lb0->lb_n_buckets_minus_1)));
390             }
391           else
392             {
393               dpo0 = load_balance_get_bucket_i (lb0, 0);
394             }
395
396           next0 = dpo0->dpoi_next_node;
397           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
398
399           vlib_increment_combined_counter (cm, thread_index, lbi0, 1,
400                                            vlib_buffer_length_in_chain (vm,
401                                                                         p0));
402
403           from += 1;
404           to_next += 1;
405           n_left_to_next -= 1;
406           n_left_from -= 1;
407
408           if (PREDICT_FALSE (next0 != next))
409             {
410               n_left_to_next += 1;
411               vlib_put_next_frame (vm, node, next, n_left_to_next);
412               next = next0;
413               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
414               to_next[0] = pi0;
415               to_next += 1;
416               n_left_to_next -= 1;
417             }
418         }
419
420       vlib_put_next_frame (vm, node, next, n_left_to_next);
421     }
422
423   if (node->flags & VLIB_NODE_FLAG_TRACE)
424     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
425
426   return frame->n_vectors;
427 }
428
429 /** @brief IPv4 lookup node.
430     @node ip4-lookup
431
432     This is the main IPv4 lookup dispatch node.
433
434     @param vm vlib_main_t corresponding to the current thread
435     @param node vlib_node_runtime_t
436     @param frame vlib_frame_t whose contents should be dispatched
437
438     @par Graph mechanics: buffer metadata, next index usage
439
440     @em Uses:
441     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
442         - Indicates the @c sw_if_index value of the interface that the
443           packet was received on.
444     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
445         - When the value is @c ~0 then the node performs a longest prefix
446           match (LPM) for the packet destination address in the FIB attached
447           to the receive interface.
448         - Otherwise perform LPM for the packet destination address in the
449           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
450           value (0, 1, ...) and not a VRF id.
451
452     @em Sets:
453     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
454         - The lookup result adjacency index.
455
456     <em>Next Index:</em>
457     - Dispatches the packet to the node index found in
458       ip_adjacency_t @c adj->lookup_next_index
459       (where @c adj is the lookup result adjacency).
460 */
461 static uword
462 ip4_lookup (vlib_main_t * vm,
463             vlib_node_runtime_t * node, vlib_frame_t * frame)
464 {
465   return ip4_lookup_inline (vm, node, frame,
466                             /* lookup_for_responses_to_locally_received_packets */
467                             0);
468
469 }
470
471 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
472
473 /* *INDENT-OFF* */
474 VLIB_REGISTER_NODE (ip4_lookup_node) =
475 {
476   .function = ip4_lookup,
477   .name = "ip4-lookup",
478   .vector_size = sizeof (u32),
479   .format_trace = format_ip4_lookup_trace,
480   .n_next_nodes = IP_LOOKUP_N_NEXT,
481   .next_nodes = IP4_LOOKUP_NEXT_NODES,
482 };
483 /* *INDENT-ON* */
484
485 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
486
487 always_inline uword
488 ip4_load_balance (vlib_main_t * vm,
489                   vlib_node_runtime_t * node, vlib_frame_t * frame)
490 {
491   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
492   u32 n_left_from, n_left_to_next, *from, *to_next;
493   ip_lookup_next_t next;
494   u32 thread_index = vlib_get_thread_index ();
495
496   from = vlib_frame_vector_args (frame);
497   n_left_from = frame->n_vectors;
498   next = node->cached_next_index;
499
500   if (node->flags & VLIB_NODE_FLAG_TRACE)
501     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
502
503   while (n_left_from > 0)
504     {
505       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
506
507
508       while (n_left_from >= 4 && n_left_to_next >= 2)
509         {
510           ip_lookup_next_t next0, next1;
511           const load_balance_t *lb0, *lb1;
512           vlib_buffer_t *p0, *p1;
513           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
514           const ip4_header_t *ip0, *ip1;
515           const dpo_id_t *dpo0, *dpo1;
516
517           /* Prefetch next iteration. */
518           {
519             vlib_buffer_t *p2, *p3;
520
521             p2 = vlib_get_buffer (vm, from[2]);
522             p3 = vlib_get_buffer (vm, from[3]);
523
524             vlib_prefetch_buffer_header (p2, STORE);
525             vlib_prefetch_buffer_header (p3, STORE);
526
527             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
528             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
529           }
530
531           pi0 = to_next[0] = from[0];
532           pi1 = to_next[1] = from[1];
533
534           from += 2;
535           n_left_from -= 2;
536           to_next += 2;
537           n_left_to_next -= 2;
538
539           p0 = vlib_get_buffer (vm, pi0);
540           p1 = vlib_get_buffer (vm, pi1);
541
542           ip0 = vlib_buffer_get_current (p0);
543           ip1 = vlib_buffer_get_current (p1);
544           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
545           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
546
547           lb0 = load_balance_get (lbi0);
548           lb1 = load_balance_get (lbi1);
549
550           /*
551            * this node is for via FIBs we can re-use the hash value from the
552            * to node if present.
553            * We don't want to use the same hash value at each level in the recursion
554            * graph as that would lead to polarisation
555            */
556           hc0 = hc1 = 0;
557
558           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
559             {
560               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
561                 {
562                   hc0 = vnet_buffer (p0)->ip.flow_hash =
563                     vnet_buffer (p0)->ip.flow_hash >> 1;
564                 }
565               else
566                 {
567                   hc0 = vnet_buffer (p0)->ip.flow_hash =
568                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
569                 }
570               dpo0 = load_balance_get_fwd_bucket
571                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
572             }
573           else
574             {
575               dpo0 = load_balance_get_bucket_i (lb0, 0);
576             }
577           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
578             {
579               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
580                 {
581                   hc1 = vnet_buffer (p1)->ip.flow_hash =
582                     vnet_buffer (p1)->ip.flow_hash >> 1;
583                 }
584               else
585                 {
586                   hc1 = vnet_buffer (p1)->ip.flow_hash =
587                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
588                 }
589               dpo1 = load_balance_get_fwd_bucket
590                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
591             }
592           else
593             {
594               dpo1 = load_balance_get_bucket_i (lb1, 0);
595             }
596
597           next0 = dpo0->dpoi_next_node;
598           next1 = dpo1->dpoi_next_node;
599
600           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
601           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
602
603           vlib_increment_combined_counter
604             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
605           vlib_increment_combined_counter
606             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
607
608           vlib_validate_buffer_enqueue_x2 (vm, node, next,
609                                            to_next, n_left_to_next,
610                                            pi0, pi1, next0, next1);
611         }
612
613       while (n_left_from > 0 && n_left_to_next > 0)
614         {
615           ip_lookup_next_t next0;
616           const load_balance_t *lb0;
617           vlib_buffer_t *p0;
618           u32 pi0, lbi0, hc0;
619           const ip4_header_t *ip0;
620           const dpo_id_t *dpo0;
621
622           pi0 = from[0];
623           to_next[0] = pi0;
624           from += 1;
625           to_next += 1;
626           n_left_to_next -= 1;
627           n_left_from -= 1;
628
629           p0 = vlib_get_buffer (vm, pi0);
630
631           ip0 = vlib_buffer_get_current (p0);
632           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
633
634           lb0 = load_balance_get (lbi0);
635
636           hc0 = 0;
637           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
638             {
639               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
640                 {
641                   hc0 = vnet_buffer (p0)->ip.flow_hash =
642                     vnet_buffer (p0)->ip.flow_hash >> 1;
643                 }
644               else
645                 {
646                   hc0 = vnet_buffer (p0)->ip.flow_hash =
647                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
648                 }
649               dpo0 = load_balance_get_fwd_bucket
650                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
651             }
652           else
653             {
654               dpo0 = load_balance_get_bucket_i (lb0, 0);
655             }
656
657           next0 = dpo0->dpoi_next_node;
658           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
659
660           vlib_increment_combined_counter
661             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
662
663           vlib_validate_buffer_enqueue_x1 (vm, node, next,
664                                            to_next, n_left_to_next,
665                                            pi0, next0);
666         }
667
668       vlib_put_next_frame (vm, node, next, n_left_to_next);
669     }
670
671   return frame->n_vectors;
672 }
673
674 /* *INDENT-OFF* */
675 VLIB_REGISTER_NODE (ip4_load_balance_node) =
676 {
677   .function = ip4_load_balance,
678   .name = "ip4-load-balance",
679   .vector_size = sizeof (u32),
680   .sibling_of = "ip4-lookup",
681   .format_trace =
682   format_ip4_lookup_trace,
683 };
684 /* *INDENT-ON* */
685
686 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
687
688 /* get first interface address */
689 ip4_address_t *
690 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
691                              ip_interface_address_t ** result_ia)
692 {
693   ip_lookup_main_t *lm = &im->lookup_main;
694   ip_interface_address_t *ia = 0;
695   ip4_address_t *result = 0;
696
697   /* *INDENT-OFF* */
698   foreach_ip_interface_address
699     (lm, ia, sw_if_index,
700      1 /* honor unnumbered */ ,
701      ({
702        ip4_address_t * a =
703          ip_interface_address_get_address (lm, ia);
704        result = a;
705        break;
706      }));
707   /* *INDENT-OFF* */
708   if (result_ia)
709     *result_ia = result ? ia : 0;
710   return result;
711 }
712
713 static void
714 ip4_add_interface_routes (u32 sw_if_index,
715                           ip4_main_t * im, u32 fib_index,
716                           ip_interface_address_t * a)
717 {
718   ip_lookup_main_t *lm = &im->lookup_main;
719   ip4_address_t *address = ip_interface_address_get_address (lm, a);
720   fib_prefix_t pfx = {
721     .fp_len = a->address_length,
722     .fp_proto = FIB_PROTOCOL_IP4,
723     .fp_addr.ip4 = *address,
724   };
725
726   if (pfx.fp_len <= 30)
727     {
728       /* a /30 or shorter - add a glean for the network address */
729       fib_table_entry_update_one_path (fib_index, &pfx,
730                                        FIB_SOURCE_INTERFACE,
731                                        (FIB_ENTRY_FLAG_CONNECTED |
732                                         FIB_ENTRY_FLAG_ATTACHED),
733                                        DPO_PROTO_IP4,
734                                        /* No next-hop address */
735                                        NULL,
736                                        sw_if_index,
737                                        // invalid FIB index
738                                        ~0,
739                                        1,
740                                        // no out-label stack
741                                        NULL,
742                                        FIB_ROUTE_PATH_FLAG_NONE);
743
744       /* Add the two broadcast addresses as drop */
745       fib_prefix_t net_pfx = {
746         .fp_len = 32,
747         .fp_proto = FIB_PROTOCOL_IP4,
748         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
749       };
750       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
751         fib_table_entry_special_add(fib_index,
752                                     &net_pfx,
753                                     FIB_SOURCE_INTERFACE,
754                                     (FIB_ENTRY_FLAG_DROP |
755                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
756       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
757       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
758         fib_table_entry_special_add(fib_index,
759                                     &net_pfx,
760                                     FIB_SOURCE_INTERFACE,
761                                     (FIB_ENTRY_FLAG_DROP |
762                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
763     }
764   else if (pfx.fp_len == 31)
765     {
766       u32 mask = clib_host_to_net_u32(1);
767       fib_prefix_t net_pfx = pfx;
768
769       net_pfx.fp_len = 32;
770       net_pfx.fp_addr.ip4.as_u32 ^= mask;
771
772       /* a /31 - add the other end as an attached host */
773       fib_table_entry_update_one_path (fib_index, &net_pfx,
774                                        FIB_SOURCE_INTERFACE,
775                                        (FIB_ENTRY_FLAG_ATTACHED),
776                                        DPO_PROTO_IP4,
777                                        &net_pfx.fp_addr,
778                                        sw_if_index,
779                                        // invalid FIB index
780                                        ~0,
781                                        1,
782                                        NULL,
783                                        FIB_ROUTE_PATH_FLAG_NONE);
784     }
785   pfx.fp_len = 32;
786
787   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
788     {
789       u32 classify_table_index =
790         lm->classify_table_index_by_sw_if_index[sw_if_index];
791       if (classify_table_index != (u32) ~ 0)
792         {
793           dpo_id_t dpo = DPO_INVALID;
794
795           dpo_set (&dpo,
796                    DPO_CLASSIFY,
797                    DPO_PROTO_IP4,
798                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
799
800           fib_table_entry_special_dpo_add (fib_index,
801                                            &pfx,
802                                            FIB_SOURCE_CLASSIFY,
803                                            FIB_ENTRY_FLAG_NONE, &dpo);
804           dpo_reset (&dpo);
805         }
806     }
807
808   fib_table_entry_update_one_path (fib_index, &pfx,
809                                    FIB_SOURCE_INTERFACE,
810                                    (FIB_ENTRY_FLAG_CONNECTED |
811                                     FIB_ENTRY_FLAG_LOCAL),
812                                    DPO_PROTO_IP4,
813                                    &pfx.fp_addr,
814                                    sw_if_index,
815                                    // invalid FIB index
816                                    ~0,
817                                    1, NULL,
818                                    FIB_ROUTE_PATH_FLAG_NONE);
819 }
820
821 static void
822 ip4_del_interface_routes (ip4_main_t * im,
823                           u32 fib_index,
824                           ip4_address_t * address, u32 address_length)
825 {
826   fib_prefix_t pfx = {
827     .fp_len = address_length,
828     .fp_proto = FIB_PROTOCOL_IP4,
829     .fp_addr.ip4 = *address,
830   };
831
832   if (pfx.fp_len <= 30)
833     {
834       fib_prefix_t net_pfx = {
835         .fp_len = 32,
836         .fp_proto = FIB_PROTOCOL_IP4,
837         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
838       };
839       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
840         fib_table_entry_special_remove(fib_index,
841                                        &net_pfx,
842                                        FIB_SOURCE_INTERFACE);
843       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
844       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
845         fib_table_entry_special_remove(fib_index,
846                                        &net_pfx,
847                                        FIB_SOURCE_INTERFACE);
848       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
849     }
850     else if (pfx.fp_len == 31)
851     {
852       u32 mask = clib_host_to_net_u32(1);
853       fib_prefix_t net_pfx = pfx;
854
855       net_pfx.fp_len = 32;
856       net_pfx.fp_addr.ip4.as_u32 ^= mask;
857
858       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
859     }
860
861   pfx.fp_len = 32;
862   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
863 }
864
865 void
866 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
867 {
868   ip4_main_t *im = &ip4_main;
869
870   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
871
872   /*
873    * enable/disable only on the 1<->0 transition
874    */
875   if (is_enable)
876     {
877       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
878         return;
879     }
880   else
881     {
882       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
883       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
884         return;
885     }
886   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
887                                !is_enable, 0, 0);
888
889
890   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
891                                sw_if_index, !is_enable, 0, 0);
892 }
893
894 static clib_error_t *
895 ip4_add_del_interface_address_internal (vlib_main_t * vm,
896                                         u32 sw_if_index,
897                                         ip4_address_t * address,
898                                         u32 address_length, u32 is_del)
899 {
900   vnet_main_t *vnm = vnet_get_main ();
901   ip4_main_t *im = &ip4_main;
902   ip_lookup_main_t *lm = &im->lookup_main;
903   clib_error_t *error = 0;
904   u32 if_address_index, elts_before;
905   ip4_address_fib_t ip4_af, *addr_fib = 0;
906
907   /* local0 interface doesn't support IP addressing  */
908   if (sw_if_index == 0)
909     {
910       return
911        clib_error_create ("local0 interface doesn't support IP addressing");
912     }
913
914   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
915   ip4_addr_fib_init (&ip4_af, address,
916                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
917   vec_add1 (addr_fib, ip4_af);
918
919   /* FIXME-LATER
920    * there is no support for adj-fib handling in the presence of overlapping
921    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
922    * most routers do.
923    */
924   /* *INDENT-OFF* */
925   if (!is_del)
926     {
927       /* When adding an address check that it does not conflict
928          with an existing address. */
929       ip_interface_address_t *ia;
930       foreach_ip_interface_address
931         (&im->lookup_main, ia, sw_if_index,
932          0 /* honor unnumbered */ ,
933          ({
934            ip4_address_t * x =
935              ip_interface_address_get_address
936              (&im->lookup_main, ia);
937            if (ip4_destination_matches_route
938                (im, address, x, ia->address_length) ||
939                ip4_destination_matches_route (im,
940                                               x,
941                                               address,
942                                               address_length))
943              return
944                clib_error_create
945                ("failed to add %U which conflicts with %U for interface %U",
946                 format_ip4_address_and_length, address,
947                 address_length,
948                 format_ip4_address_and_length, x,
949                 ia->address_length,
950                 format_vnet_sw_if_index_name, vnm,
951                 sw_if_index);
952          }));
953     }
954   /* *INDENT-ON* */
955
956   elts_before = pool_elts (lm->if_address_pool);
957
958   error = ip_interface_address_add_del
959     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
960   if (error)
961     goto done;
962
963   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
964
965   if (is_del)
966     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
967   else
968     ip4_add_interface_routes (sw_if_index,
969                               im, ip4_af.fib_index,
970                               pool_elt_at_index
971                               (lm->if_address_pool, if_address_index));
972
973   /* If pool did not grow/shrink: add duplicate address. */
974   if (elts_before != pool_elts (lm->if_address_pool))
975     {
976       ip4_add_del_interface_address_callback_t *cb;
977       vec_foreach (cb, im->add_del_interface_address_callbacks)
978         cb->function (im, cb->function_opaque, sw_if_index,
979                       address, address_length, if_address_index, is_del);
980     }
981
982 done:
983   vec_free (addr_fib);
984   return error;
985 }
986
987 clib_error_t *
988 ip4_add_del_interface_address (vlib_main_t * vm,
989                                u32 sw_if_index,
990                                ip4_address_t * address,
991                                u32 address_length, u32 is_del)
992 {
993   return ip4_add_del_interface_address_internal
994     (vm, sw_if_index, address, address_length, is_del);
995 }
996
997 /* Built-in ip4 unicast rx feature path definition */
998 /* *INDENT-OFF* */
999 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
1000 {
1001   .arc_name = "ip4-unicast",
1002   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1003   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
1004 };
1005
1006 VNET_FEATURE_INIT (ip4_flow_classify, static) =
1007 {
1008   .arc_name = "ip4-unicast",
1009   .node_name = "ip4-flow-classify",
1010   .runs_before = VNET_FEATURES ("ip4-inacl"),
1011 };
1012
1013 VNET_FEATURE_INIT (ip4_inacl, static) =
1014 {
1015   .arc_name = "ip4-unicast",
1016   .node_name = "ip4-inacl",
1017   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
1018 };
1019
1020 VNET_FEATURE_INIT (ip4_source_check_1, static) =
1021 {
1022   .arc_name = "ip4-unicast",
1023   .node_name = "ip4-source-check-via-rx",
1024   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
1025 };
1026
1027 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1028 {
1029   .arc_name = "ip4-unicast",
1030   .node_name = "ip4-source-check-via-any",
1031   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1032 };
1033
1034 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1035 {
1036   .arc_name = "ip4-unicast",
1037   .node_name = "ip4-source-and-port-range-check-rx",
1038   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1039 };
1040
1041 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1042 {
1043   .arc_name = "ip4-unicast",
1044   .node_name = "ip4-policer-classify",
1045   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1046 };
1047
1048 VNET_FEATURE_INIT (ip4_ipsec, static) =
1049 {
1050   .arc_name = "ip4-unicast",
1051   .node_name = "ipsec-input-ip4",
1052   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1053 };
1054
1055 VNET_FEATURE_INIT (ip4_vpath, static) =
1056 {
1057   .arc_name = "ip4-unicast",
1058   .node_name = "vpath-input-ip4",
1059   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1060 };
1061
1062 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1063 {
1064   .arc_name = "ip4-unicast",
1065   .node_name = "ip4-vxlan-bypass",
1066   .runs_before = VNET_FEATURES ("ip4-lookup"),
1067 };
1068
1069 VNET_FEATURE_INIT (ip4_not_enabled, static) =
1070 {
1071   .arc_name = "ip4-unicast",
1072   .node_name = "ip4-not-enabled",
1073   .runs_before = VNET_FEATURES ("ip4-lookup"),
1074 };
1075
1076 VNET_FEATURE_INIT (ip4_lookup, static) =
1077 {
1078   .arc_name = "ip4-unicast",
1079   .node_name = "ip4-lookup",
1080   .runs_before = 0,     /* not before any other features */
1081 };
1082
1083 /* Built-in ip4 multicast rx feature path definition */
1084 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1085 {
1086   .arc_name = "ip4-multicast",
1087   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1088   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1089 };
1090
1091 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1092 {
1093   .arc_name = "ip4-multicast",
1094   .node_name = "vpath-input-ip4",
1095   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1096 };
1097
1098 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1099 {
1100   .arc_name = "ip4-multicast",
1101   .node_name = "ip4-not-enabled",
1102   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1103 };
1104
1105 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1106 {
1107   .arc_name = "ip4-multicast",
1108   .node_name = "ip4-mfib-forward-lookup",
1109   .runs_before = 0,     /* last feature */
1110 };
1111
1112 /* Source and port-range check ip4 tx feature path definition */
1113 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1114 {
1115   .arc_name = "ip4-output",
1116   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1117   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1118 };
1119
1120 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1121 {
1122   .arc_name = "ip4-output",
1123   .node_name = "ip4-source-and-port-range-check-tx",
1124   .runs_before = VNET_FEATURES ("ip4-outacl"),
1125 };
1126
1127 VNET_FEATURE_INIT (ip4_outacl, static) =
1128 {
1129   .arc_name = "ip4-output",
1130   .node_name = "ip4-outacl",
1131   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1132 };
1133
1134 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1135 {
1136   .arc_name = "ip4-output",
1137   .node_name = "ipsec-output-ip4",
1138   .runs_before = VNET_FEATURES ("interface-output"),
1139 };
1140
1141 /* Built-in ip4 tx feature path definition */
1142 VNET_FEATURE_INIT (ip4_interface_output, static) =
1143 {
1144   .arc_name = "ip4-output",
1145   .node_name = "interface-output",
1146   .runs_before = 0,     /* not before any other features */
1147 };
1148 /* *INDENT-ON* */
1149
1150 static clib_error_t *
1151 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1152 {
1153   ip4_main_t *im = &ip4_main;
1154
1155   /* Fill in lookup tables with default table (0). */
1156   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1157   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1158
1159   if (!is_add)
1160     {
1161       ip4_main_t *im4 = &ip4_main;
1162       ip_lookup_main_t *lm4 = &im4->lookup_main;
1163       ip_interface_address_t *ia = 0;
1164       ip4_address_t *address;
1165       vlib_main_t *vm = vlib_get_main ();
1166
1167       /* *INDENT-OFF* */
1168       foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */,
1169       ({
1170         address = ip_interface_address_get_address (lm4, ia);
1171         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1172       }));
1173       /* *INDENT-ON* */
1174     }
1175
1176   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1177                                is_add, 0, 0);
1178
1179   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1180                                sw_if_index, is_add, 0, 0);
1181
1182   return /* no error */ 0;
1183 }
1184
1185 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1186
1187 /* Global IP4 main. */
1188 ip4_main_t ip4_main;
1189
1190 clib_error_t *
1191 ip4_lookup_init (vlib_main_t * vm)
1192 {
1193   ip4_main_t *im = &ip4_main;
1194   clib_error_t *error;
1195   uword i;
1196
1197   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1198     return error;
1199   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1200     return (error);
1201   if ((error = vlib_call_init_function (vm, fib_module_init)))
1202     return error;
1203   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1204     return error;
1205
1206   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1207     {
1208       u32 m;
1209
1210       if (i < 32)
1211         m = pow2_mask (i) << (32 - i);
1212       else
1213         m = ~0;
1214       im->fib_masks[i] = clib_host_to_net_u32 (m);
1215     }
1216
1217   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1218
1219   /* Create FIB with index 0 and table id of 0. */
1220   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1221                                      FIB_SOURCE_DEFAULT_ROUTE);
1222   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1223                                       MFIB_SOURCE_DEFAULT_ROUTE);
1224
1225   {
1226     pg_node_t *pn;
1227     pn = pg_get_node (ip4_lookup_node.index);
1228     pn->unformat_edit = unformat_pg_ip4_header;
1229   }
1230
1231   {
1232     ethernet_arp_header_t h;
1233
1234     memset (&h, 0, sizeof (h));
1235
1236     /* Set target ethernet address to all zeros. */
1237     memset (h.ip4_over_ethernet[1].ethernet, 0,
1238             sizeof (h.ip4_over_ethernet[1].ethernet));
1239
1240 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1241 #define _8(f,v) h.f = v;
1242     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1243     _16 (l3_type, ETHERNET_TYPE_IP4);
1244     _8 (n_l2_address_bytes, 6);
1245     _8 (n_l3_address_bytes, 4);
1246     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1247 #undef _16
1248 #undef _8
1249
1250     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1251                                /* data */ &h,
1252                                sizeof (h),
1253                                /* alloc chunk size */ 8,
1254                                "ip4 arp");
1255   }
1256
1257   return error;
1258 }
1259
1260 VLIB_INIT_FUNCTION (ip4_lookup_init);
1261
1262 typedef struct
1263 {
1264   /* Adjacency taken. */
1265   u32 dpo_index;
1266   u32 flow_hash;
1267   u32 fib_index;
1268
1269   /* Packet data, possibly *after* rewrite. */
1270   u8 packet_data[64 - 1 * sizeof (u32)];
1271 }
1272 ip4_forward_next_trace_t;
1273
1274 u8 *
1275 format_ip4_forward_next_trace (u8 * s, va_list * args)
1276 {
1277   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1278   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1279   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1280   u32 indent = format_get_indent (s);
1281   s = format (s, "%U%U",
1282               format_white_space, indent,
1283               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1284   return s;
1285 }
1286
1287 static u8 *
1288 format_ip4_lookup_trace (u8 * s, va_list * args)
1289 {
1290   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1291   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1292   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1293   u32 indent = format_get_indent (s);
1294
1295   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1296               t->fib_index, t->dpo_index, t->flow_hash);
1297   s = format (s, "\n%U%U",
1298               format_white_space, indent,
1299               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1300   return s;
1301 }
1302
1303 static u8 *
1304 format_ip4_rewrite_trace (u8 * s, va_list * args)
1305 {
1306   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1307   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1308   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1309   u32 indent = format_get_indent (s);
1310
1311   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1312               t->fib_index, t->dpo_index, format_ip_adjacency,
1313               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1314   s = format (s, "\n%U%U",
1315               format_white_space, indent,
1316               format_ip_adjacency_packet_data,
1317               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1318   return s;
1319 }
1320
1321 /* Common trace function for all ip4-forward next nodes. */
1322 void
1323 ip4_forward_next_trace (vlib_main_t * vm,
1324                         vlib_node_runtime_t * node,
1325                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1326 {
1327   u32 *from, n_left;
1328   ip4_main_t *im = &ip4_main;
1329
1330   n_left = frame->n_vectors;
1331   from = vlib_frame_vector_args (frame);
1332
1333   while (n_left >= 4)
1334     {
1335       u32 bi0, bi1;
1336       vlib_buffer_t *b0, *b1;
1337       ip4_forward_next_trace_t *t0, *t1;
1338
1339       /* Prefetch next iteration. */
1340       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1341       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1342
1343       bi0 = from[0];
1344       bi1 = from[1];
1345
1346       b0 = vlib_get_buffer (vm, bi0);
1347       b1 = vlib_get_buffer (vm, bi1);
1348
1349       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1350         {
1351           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1352           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1353           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1354           t0->fib_index =
1355             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1356              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1357             vec_elt (im->fib_index_by_sw_if_index,
1358                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1359
1360           clib_memcpy (t0->packet_data,
1361                        vlib_buffer_get_current (b0),
1362                        sizeof (t0->packet_data));
1363         }
1364       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1365         {
1366           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1367           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1368           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1369           t1->fib_index =
1370             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1371              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1372             vec_elt (im->fib_index_by_sw_if_index,
1373                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1374           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1375                        sizeof (t1->packet_data));
1376         }
1377       from += 2;
1378       n_left -= 2;
1379     }
1380
1381   while (n_left >= 1)
1382     {
1383       u32 bi0;
1384       vlib_buffer_t *b0;
1385       ip4_forward_next_trace_t *t0;
1386
1387       bi0 = from[0];
1388
1389       b0 = vlib_get_buffer (vm, bi0);
1390
1391       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1392         {
1393           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1394           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1395           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1396           t0->fib_index =
1397             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1398              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1399             vec_elt (im->fib_index_by_sw_if_index,
1400                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1401           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1402                        sizeof (t0->packet_data));
1403         }
1404       from += 1;
1405       n_left -= 1;
1406     }
1407 }
1408
1409 /* Compute TCP/UDP/ICMP4 checksum in software. */
1410 u16
1411 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1412                               ip4_header_t * ip0)
1413 {
1414   ip_csum_t sum0;
1415   u32 ip_header_length, payload_length_host_byte_order;
1416   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1417   u16 sum16;
1418   void *data_this_buffer;
1419
1420   /* Initialize checksum with ip header. */
1421   ip_header_length = ip4_header_bytes (ip0);
1422   payload_length_host_byte_order =
1423     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1424   sum0 =
1425     clib_host_to_net_u32 (payload_length_host_byte_order +
1426                           (ip0->protocol << 16));
1427
1428   if (BITS (uword) == 32)
1429     {
1430       sum0 =
1431         ip_csum_with_carry (sum0,
1432                             clib_mem_unaligned (&ip0->src_address, u32));
1433       sum0 =
1434         ip_csum_with_carry (sum0,
1435                             clib_mem_unaligned (&ip0->dst_address, u32));
1436     }
1437   else
1438     sum0 =
1439       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1440
1441   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1442   data_this_buffer = (void *) ip0 + ip_header_length;
1443   n_ip_bytes_this_buffer =
1444     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1445   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1446     {
1447       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1448         n_ip_bytes_this_buffer - ip_header_length : 0;
1449     }
1450   while (1)
1451     {
1452       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1453       n_bytes_left -= n_this_buffer;
1454       if (n_bytes_left == 0)
1455         break;
1456
1457       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1458       p0 = vlib_get_buffer (vm, p0->next_buffer);
1459       data_this_buffer = vlib_buffer_get_current (p0);
1460       n_this_buffer = p0->current_length;
1461     }
1462
1463   sum16 = ~ip_csum_fold (sum0);
1464
1465   return sum16;
1466 }
1467
1468 u32
1469 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1470 {
1471   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1472   udp_header_t *udp0;
1473   u16 sum16;
1474
1475   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1476           || ip0->protocol == IP_PROTOCOL_UDP);
1477
1478   udp0 = (void *) (ip0 + 1);
1479   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1480     {
1481       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1482                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1483       return p0->flags;
1484     }
1485
1486   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1487
1488   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1489                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1490
1491   return p0->flags;
1492 }
1493
1494 /* *INDENT-OFF* */
1495 VNET_FEATURE_ARC_INIT (ip4_local) =
1496 {
1497   .arc_name  = "ip4-local",
1498   .start_nodes = VNET_FEATURES ("ip4-local"),
1499 };
1500 /* *INDENT-ON* */
1501
1502 static inline void
1503 ip4_local_validate_l4 (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip,
1504                        u8 is_udp, u8 * error, u8 * good_tcp_udp)
1505 {
1506   u32 flags0;
1507   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1508   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1509   if (is_udp)
1510     {
1511       udp_header_t *udp;
1512       u32 ip_len, udp_len;
1513       i32 len_diff;
1514       udp = ip4_next_header (ip);
1515       /* Verify UDP length. */
1516       ip_len = clib_net_to_host_u16 (ip->length);
1517       udp_len = clib_net_to_host_u16 (udp->length);
1518
1519       len_diff = ip_len - udp_len;
1520       *good_tcp_udp &= len_diff >= 0;
1521       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1522     }
1523 }
1524
1525 #define ip4_local_do_l4_check(is_tcp_udp, flags)                        \
1526     (is_tcp_udp && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1527     || flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1528     || flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM))
1529
1530 static inline uword
1531 ip4_local_inline (vlib_main_t * vm,
1532                   vlib_node_runtime_t * node,
1533                   vlib_frame_t * frame, int head_of_feature_arc)
1534 {
1535   ip4_main_t *im = &ip4_main;
1536   ip_lookup_main_t *lm = &im->lookup_main;
1537   ip_local_next_t next_index;
1538   u32 *from, *to_next, n_left_from, n_left_to_next;
1539   vlib_node_runtime_t *error_node =
1540     vlib_node_get_runtime (vm, ip4_input_node.index);
1541   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1542
1543   from = vlib_frame_vector_args (frame);
1544   n_left_from = frame->n_vectors;
1545   next_index = node->cached_next_index;
1546
1547   if (node->flags & VLIB_NODE_FLAG_TRACE)
1548     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1549
1550   while (n_left_from > 0)
1551     {
1552       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1553
1554       while (n_left_from >= 4 && n_left_to_next >= 2)
1555         {
1556           vlib_buffer_t *p0, *p1;
1557           ip4_header_t *ip0, *ip1;
1558           ip4_fib_mtrie_t *mtrie0, *mtrie1;
1559           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1560           const dpo_id_t *dpo0, *dpo1;
1561           const load_balance_t *lb0, *lb1;
1562           u32 pi0, next0, fib_index0, lbi0;
1563           u32 pi1, next1, fib_index1, lbi1;
1564           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1565           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1566           u32 sw_if_index0, sw_if_index1;
1567
1568           pi0 = to_next[0] = from[0];
1569           pi1 = to_next[1] = from[1];
1570           from += 2;
1571           n_left_from -= 2;
1572           to_next += 2;
1573           n_left_to_next -= 2;
1574
1575           next0 = next1 = IP_LOCAL_NEXT_DROP;
1576           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1577
1578           p0 = vlib_get_buffer (vm, pi0);
1579           p1 = vlib_get_buffer (vm, pi1);
1580
1581           ip0 = vlib_buffer_get_current (p0);
1582           ip1 = vlib_buffer_get_current (p1);
1583
1584           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1585           vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
1586
1587           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1588           sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1589
1590           /* Treat IP frag packets as "experimental" protocol for now
1591              until support of IP frag reassembly is implemented */
1592           proto0 =
1593             ip4_is_fragment (ip0) ? IP_PROTOCOL_VPP_FRAGMENTATION :
1594             ip0->protocol;
1595           proto1 =
1596             ip4_is_fragment (ip1) ? IP_PROTOCOL_VPP_FRAGMENTATION :
1597             ip1->protocol;
1598
1599           if (head_of_feature_arc == 0)
1600             goto skip_checks;
1601
1602           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1603           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1604           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1605           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1606
1607           good_tcp_udp0 =
1608             (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1609              || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1610                  || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1611           good_tcp_udp1 = (p1->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1612                            || (p1->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1613                                || p1->flags &
1614                                VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1615
1616           if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)
1617                              || ip4_local_do_l4_check (is_tcp_udp1,
1618                                                        p1->flags)))
1619             {
1620               if (is_tcp_udp0)
1621                 ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1622                                        &good_tcp_udp0);
1623               if (is_tcp_udp1)
1624                 ip4_local_validate_l4 (vm, p1, ip1, is_udp1, &error1,
1625                                        &good_tcp_udp1);
1626             }
1627
1628           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1629           error0 = (is_tcp_udp0 && !good_tcp_udp0
1630                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1631           error1 = (is_tcp_udp1 && !good_tcp_udp1
1632                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1633
1634           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1635           fib_index0 =
1636             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1637              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1638
1639           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1640           fib_index1 =
1641             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1642              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1643
1644           /* TODO maybe move to lookup? */
1645           vnet_buffer (p0)->ip.fib_index = fib_index0;
1646           vnet_buffer (p1)->ip.fib_index = fib_index1;
1647
1648           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1649           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1650
1651           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1652           leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1653           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1654                                              2);
1655           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1656                                              2);
1657           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1658                                              3);
1659           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1660                                              3);
1661
1662           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1663             ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1664           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1665
1666           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1667             ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1668           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1669
1670           lb0 = load_balance_get (lbi0);
1671           lb1 = load_balance_get (lbi1);
1672           dpo0 = load_balance_get_bucket_i (lb0, 0);
1673           dpo1 = load_balance_get_bucket_i (lb1, 0);
1674
1675           /*
1676            * Must have a route to source otherwise we drop the packet.
1677            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1678            *
1679            * The checks are:
1680            *  - the source is a recieve => it's from us => bogus, do this
1681            *    first since it sets a different error code.
1682            *  - uRPF check for any route to source - accept if passes.
1683            *  - allow packets destined to the broadcast address from unknown sources
1684            */
1685           if (p0->flags & VNET_BUFFER_F_IS_NATED)
1686             goto skip_check0;
1687
1688           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1689                      dpo0->dpoi_type == DPO_RECEIVE) ?
1690                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1691           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1692                      !fib_urpf_check_size (lb0->lb_urpf) &&
1693                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1694                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1695
1696         skip_check0:
1697           if (p1->flags & VNET_BUFFER_F_IS_NATED)
1698             goto skip_checks;
1699
1700           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1701                      dpo1->dpoi_type == DPO_RECEIVE) ?
1702                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1703           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1704                      !fib_urpf_check_size (lb1->lb_urpf) &&
1705                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1706                     ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1707
1708         skip_checks:
1709
1710           next0 = lm->local_next_by_ip_protocol[proto0];
1711           next1 = lm->local_next_by_ip_protocol[proto1];
1712
1713           next0 =
1714             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1715           next1 =
1716             error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1717
1718           p0->error = error0 ? error_node->errors[error0] : 0;
1719           p1->error = error1 ? error_node->errors[error1] : 0;
1720
1721           if (head_of_feature_arc)
1722             {
1723               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1724                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1725               if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1726                 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1727             }
1728
1729           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1730                                            n_left_to_next, pi0, pi1,
1731                                            next0, next1);
1732         }
1733
1734       while (n_left_from > 0 && n_left_to_next > 0)
1735         {
1736           vlib_buffer_t *p0;
1737           ip4_header_t *ip0;
1738           ip4_fib_mtrie_t *mtrie0;
1739           ip4_fib_mtrie_leaf_t leaf0;
1740           u32 pi0, next0, fib_index0, lbi0;
1741           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1742           load_balance_t *lb0;
1743           const dpo_id_t *dpo0;
1744           u32 sw_if_index0;
1745
1746           pi0 = to_next[0] = from[0];
1747           from += 1;
1748           n_left_from -= 1;
1749           to_next += 1;
1750           n_left_to_next -= 1;
1751
1752           next0 = IP_LOCAL_NEXT_DROP;
1753           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1754
1755           p0 = vlib_get_buffer (vm, pi0);
1756           ip0 = vlib_buffer_get_current (p0);
1757           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1758           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1759
1760           /* Treat IP frag packets as "experimental" protocol for now
1761              until support of IP frag reassembly is implemented */
1762           proto0 =
1763             ip4_is_fragment (ip0) ? IP_PROTOCOL_VPP_FRAGMENTATION :
1764             ip0->protocol;
1765
1766           if (head_of_feature_arc == 0 || p0->flags & VNET_BUFFER_F_IS_NATED)
1767             goto skip_check;
1768
1769           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1770           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1771
1772           good_tcp_udp0 =
1773             (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1774              || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1775                  || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1776
1777           if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)))
1778             {
1779               ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1780                                      &good_tcp_udp0);
1781             }
1782
1783           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1784           error0 = (is_tcp_udp0 && !good_tcp_udp0
1785                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1786
1787           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1788           fib_index0 =
1789             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1790              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1791           vnet_buffer (p0)->ip.fib_index = fib_index0;
1792           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1793           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1794           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1795                                              2);
1796           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1797                                              3);
1798           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1799           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1800           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1801
1802           lb0 = load_balance_get (lbi0);
1803           dpo0 = load_balance_get_bucket_i (lb0, 0);
1804
1805           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1806                      dpo0->dpoi_type == DPO_RECEIVE) ?
1807                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1808           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1809                      !fib_urpf_check_size (lb0->lb_urpf) &&
1810                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1811                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1812
1813         skip_check:
1814           next0 = lm->local_next_by_ip_protocol[proto0];
1815           next0 =
1816             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1817
1818           p0->error = error0 ? error_node->errors[error0] : 0;
1819
1820           if (head_of_feature_arc)
1821             {
1822               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1823                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1824             }
1825
1826           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1827                                            n_left_to_next, pi0, next0);
1828         }
1829       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1830     }
1831
1832   return frame->n_vectors;
1833 }
1834
1835 static uword
1836 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1837 {
1838   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1839 }
1840
1841 /* *INDENT-OFF* */
1842 VLIB_REGISTER_NODE (ip4_local_node) =
1843 {
1844   .function = ip4_local,
1845   .name = "ip4-local",
1846   .vector_size = sizeof (u32),
1847   .format_trace = format_ip4_forward_next_trace,
1848   .n_next_nodes = IP_LOCAL_N_NEXT,
1849   .next_nodes =
1850   {
1851     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1852     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1853     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1854     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1855     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1856   },
1857 };
1858 /* *INDENT-ON* */
1859
1860 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1861
1862 static uword
1863 ip4_local_end_of_arc (vlib_main_t * vm,
1864                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1865 {
1866   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1867 }
1868
1869 /* *INDENT-OFF* */
1870 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1871   .function = ip4_local_end_of_arc,
1872   .name = "ip4-local-end-of-arc",
1873   .vector_size = sizeof (u32),
1874
1875   .format_trace = format_ip4_forward_next_trace,
1876   .sibling_of = "ip4-local",
1877 };
1878
1879 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1880
1881 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1882   .arc_name = "ip4-local",
1883   .node_name = "ip4-local-end-of-arc",
1884   .runs_before = 0, /* not before any other features */
1885 };
1886 /* *INDENT-ON* */
1887
1888 void
1889 ip4_register_protocol (u32 protocol, u32 node_index)
1890 {
1891   vlib_main_t *vm = vlib_get_main ();
1892   ip4_main_t *im = &ip4_main;
1893   ip_lookup_main_t *lm = &im->lookup_main;
1894
1895   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1896   lm->local_next_by_ip_protocol[protocol] =
1897     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1898 }
1899
1900 static clib_error_t *
1901 show_ip_local_command_fn (vlib_main_t * vm,
1902                           unformat_input_t * input, vlib_cli_command_t * cmd)
1903 {
1904   ip4_main_t *im = &ip4_main;
1905   ip_lookup_main_t *lm = &im->lookup_main;
1906   int i;
1907
1908   vlib_cli_output (vm, "Protocols handled by ip4_local");
1909   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1910     {
1911       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1912         {
1913           u32 node_index = vlib_get_node (vm,
1914                                           ip4_local_node.index)->
1915             next_nodes[lm->local_next_by_ip_protocol[i]];
1916           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1917                            node_index);
1918         }
1919     }
1920   return 0;
1921 }
1922
1923
1924
1925 /*?
1926  * Display the set of protocols handled by the local IPv4 stack.
1927  *
1928  * @cliexpar
1929  * Example of how to display local protocol table:
1930  * @cliexstart{show ip local}
1931  * Protocols handled by ip4_local
1932  * 1
1933  * 17
1934  * 47
1935  * @cliexend
1936 ?*/
1937 /* *INDENT-OFF* */
1938 VLIB_CLI_COMMAND (show_ip_local, static) =
1939 {
1940   .path = "show ip local",
1941   .function = show_ip_local_command_fn,
1942   .short_help = "show ip local",
1943 };
1944 /* *INDENT-ON* */
1945
1946 always_inline uword
1947 ip4_arp_inline (vlib_main_t * vm,
1948                 vlib_node_runtime_t * node,
1949                 vlib_frame_t * frame, int is_glean)
1950 {
1951   vnet_main_t *vnm = vnet_get_main ();
1952   ip4_main_t *im = &ip4_main;
1953   ip_lookup_main_t *lm = &im->lookup_main;
1954   u32 *from, *to_next_drop;
1955   uword n_left_from, n_left_to_next_drop, next_index;
1956   static f64 time_last_seed_change = -1e100;
1957   static u32 hash_seeds[3];
1958   static uword hash_bitmap[256 / BITS (uword)];
1959   f64 time_now;
1960
1961   if (node->flags & VLIB_NODE_FLAG_TRACE)
1962     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1963
1964   time_now = vlib_time_now (vm);
1965   if (time_now - time_last_seed_change > 1e-3)
1966     {
1967       uword i;
1968       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1969                                             sizeof (hash_seeds));
1970       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1971         hash_seeds[i] = r[i];
1972
1973       /* Mark all hash keys as been no-seen before. */
1974       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1975         hash_bitmap[i] = 0;
1976
1977       time_last_seed_change = time_now;
1978     }
1979
1980   from = vlib_frame_vector_args (frame);
1981   n_left_from = frame->n_vectors;
1982   next_index = node->cached_next_index;
1983   if (next_index == IP4_ARP_NEXT_DROP)
1984     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1985
1986   while (n_left_from > 0)
1987     {
1988       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1989                            to_next_drop, n_left_to_next_drop);
1990
1991       while (n_left_from > 0 && n_left_to_next_drop > 0)
1992         {
1993           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1994           ip_adjacency_t *adj0;
1995           vlib_buffer_t *p0;
1996           ip4_header_t *ip0;
1997           uword bm0;
1998
1999           pi0 = from[0];
2000
2001           p0 = vlib_get_buffer (vm, pi0);
2002
2003           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2004           adj0 = adj_get (adj_index0);
2005           ip0 = vlib_buffer_get_current (p0);
2006
2007           a0 = hash_seeds[0];
2008           b0 = hash_seeds[1];
2009           c0 = hash_seeds[2];
2010
2011           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2012           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2013
2014           if (is_glean)
2015             {
2016               /*
2017                * this is the Glean case, so we are ARPing for the
2018                * packet's destination
2019                */
2020               a0 ^= ip0->dst_address.data_u32;
2021             }
2022           else
2023             {
2024               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2025             }
2026           b0 ^= sw_if_index0;
2027
2028           hash_v3_mix32 (a0, b0, c0);
2029           hash_v3_finalize32 (a0, b0, c0);
2030
2031           c0 &= BITS (hash_bitmap) - 1;
2032           m0 = (uword) 1 << (c0 % BITS (uword));
2033           c0 = c0 / BITS (uword);
2034
2035           bm0 = hash_bitmap[c0];
2036           drop0 = (bm0 & m0) != 0;
2037
2038           /* Mark it as seen. */
2039           hash_bitmap[c0] = bm0 | m0;
2040
2041           from += 1;
2042           n_left_from -= 1;
2043           to_next_drop[0] = pi0;
2044           to_next_drop += 1;
2045           n_left_to_next_drop -= 1;
2046
2047           p0->error =
2048             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2049                          IP4_ARP_ERROR_REQUEST_SENT];
2050
2051           /*
2052            * the adj has been updated to a rewrite but the node the DPO that got
2053            * us here hasn't - yet. no big deal. we'll drop while we wait.
2054            */
2055           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2056             continue;
2057
2058           if (drop0)
2059             continue;
2060
2061           /*
2062            * Can happen if the control-plane is programming tables
2063            * with traffic flowing; at least that's today's lame excuse.
2064            */
2065           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2066               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2067             {
2068               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2069             }
2070           else
2071             /* Send ARP request. */
2072             {
2073               u32 bi0 = 0;
2074               vlib_buffer_t *b0;
2075               ethernet_arp_header_t *h0;
2076               vnet_hw_interface_t *hw_if0;
2077
2078               h0 =
2079                 vlib_packet_template_get_packet (vm,
2080                                                  &im->ip4_arp_request_packet_template,
2081                                                  &bi0);
2082
2083               /* Seems we're out of buffers */
2084               if (PREDICT_FALSE (!h0))
2085                 continue;
2086
2087               /* Add rewrite/encap string for ARP packet. */
2088               vnet_rewrite_one_header (adj0[0], h0,
2089                                        sizeof (ethernet_header_t));
2090
2091               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2092
2093               /* Src ethernet address in ARP header. */
2094               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2095                            hw_if0->hw_address,
2096                            sizeof (h0->ip4_over_ethernet[0].ethernet));
2097
2098               if (is_glean)
2099                 {
2100                   /* The interface's source address is stashed in the Glean Adj */
2101                   h0->ip4_over_ethernet[0].ip4 =
2102                     adj0->sub_type.glean.receive_addr.ip4;
2103
2104                   /* Copy in destination address we are requesting. This is the
2105                    * glean case, so it's the packet's destination.*/
2106                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2107                     ip0->dst_address.data_u32;
2108                 }
2109               else
2110                 {
2111                   /* Src IP address in ARP header. */
2112                   if (ip4_src_address_for_packet (lm, sw_if_index0,
2113                                                   &h0->
2114                                                   ip4_over_ethernet[0].ip4))
2115                     {
2116                       /* No source address available */
2117                       p0->error =
2118                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2119                       vlib_buffer_free (vm, &bi0, 1);
2120                       continue;
2121                     }
2122
2123                   /* Copy in destination address we are requesting from the
2124                      incomplete adj */
2125                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2126                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
2127                 }
2128
2129               vlib_buffer_copy_trace_flag (vm, p0, bi0);
2130               b0 = vlib_get_buffer (vm, bi0);
2131               VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2132               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2133
2134               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2135
2136               vlib_set_next_frame_buffer (vm, node,
2137                                           adj0->rewrite_header.next_index,
2138                                           bi0);
2139             }
2140         }
2141
2142       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2143     }
2144
2145   return frame->n_vectors;
2146 }
2147
2148 static uword
2149 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2150 {
2151   return (ip4_arp_inline (vm, node, frame, 0));
2152 }
2153
2154 static uword
2155 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2156 {
2157   return (ip4_arp_inline (vm, node, frame, 1));
2158 }
2159
2160 static char *ip4_arp_error_strings[] = {
2161   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2162   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2163   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2164   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2165   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2166   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2167 };
2168
2169 /* *INDENT-OFF* */
2170 VLIB_REGISTER_NODE (ip4_arp_node) =
2171 {
2172   .function = ip4_arp,
2173   .name = "ip4-arp",
2174   .vector_size = sizeof (u32),
2175   .format_trace = format_ip4_forward_next_trace,
2176   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2177   .error_strings = ip4_arp_error_strings,
2178   .n_next_nodes = IP4_ARP_N_NEXT,
2179   .next_nodes =
2180   {
2181     [IP4_ARP_NEXT_DROP] = "error-drop",
2182   },
2183 };
2184
2185 VLIB_REGISTER_NODE (ip4_glean_node) =
2186 {
2187   .function = ip4_glean,
2188   .name = "ip4-glean",
2189   .vector_size = sizeof (u32),
2190   .format_trace = format_ip4_forward_next_trace,
2191   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2192   .error_strings = ip4_arp_error_strings,
2193   .n_next_nodes = IP4_ARP_N_NEXT,
2194   .next_nodes = {
2195   [IP4_ARP_NEXT_DROP] = "error-drop",
2196   },
2197 };
2198 /* *INDENT-ON* */
2199
2200 #define foreach_notrace_ip4_arp_error           \
2201 _(DROP)                                         \
2202 _(REQUEST_SENT)                                 \
2203 _(REPLICATE_DROP)                               \
2204 _(REPLICATE_FAIL)
2205
2206 clib_error_t *
2207 arp_notrace_init (vlib_main_t * vm)
2208 {
2209   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2210
2211   /* don't trace ARP request packets */
2212 #define _(a)                                    \
2213     vnet_pcap_drop_trace_filter_add_del         \
2214         (rt->errors[IP4_ARP_ERROR_##a],         \
2215          1 /* is_add */);
2216   foreach_notrace_ip4_arp_error;
2217 #undef _
2218   return 0;
2219 }
2220
2221 VLIB_INIT_FUNCTION (arp_notrace_init);
2222
2223
2224 /* Send an ARP request to see if given destination is reachable on given interface. */
2225 clib_error_t *
2226 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2227 {
2228   vnet_main_t *vnm = vnet_get_main ();
2229   ip4_main_t *im = &ip4_main;
2230   ethernet_arp_header_t *h;
2231   ip4_address_t *src;
2232   ip_interface_address_t *ia;
2233   ip_adjacency_t *adj;
2234   vnet_hw_interface_t *hi;
2235   vnet_sw_interface_t *si;
2236   vlib_buffer_t *b;
2237   adj_index_t ai;
2238   u32 bi = 0;
2239
2240   si = vnet_get_sw_interface (vnm, sw_if_index);
2241
2242   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2243     {
2244       return clib_error_return (0, "%U: interface %U down",
2245                                 format_ip4_address, dst,
2246                                 format_vnet_sw_if_index_name, vnm,
2247                                 sw_if_index);
2248     }
2249
2250   src =
2251     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2252   if (!src)
2253     {
2254       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2255       return clib_error_return
2256         (0,
2257          "no matching interface address for destination %U (interface %U)",
2258          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2259          sw_if_index);
2260     }
2261
2262   h = vlib_packet_template_get_packet (vm,
2263                                        &im->ip4_arp_request_packet_template,
2264                                        &bi);
2265
2266   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2267   if (PREDICT_FALSE (!hi->hw_address))
2268     {
2269       return clib_error_return (0, "%U: interface %U do not support ip probe",
2270                                 format_ip4_address, dst,
2271                                 format_vnet_sw_if_index_name, vnm,
2272                                 sw_if_index);
2273     }
2274
2275   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2276                sizeof (h->ip4_over_ethernet[0].ethernet));
2277
2278   h->ip4_over_ethernet[0].ip4 = src[0];
2279   h->ip4_over_ethernet[1].ip4 = dst[0];
2280
2281   b = vlib_get_buffer (vm, bi);
2282   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2283     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2284
2285   ip46_address_t nh = {
2286     .ip4 = *dst,
2287   };
2288
2289   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2290                             VNET_LINK_IP4, &nh, sw_if_index);
2291   adj = adj_get (ai);
2292
2293   /* Peer has been previously resolved, retrieve glean adj instead */
2294   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2295     {
2296       adj_unlock (ai);
2297       ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, sw_if_index, &nh);
2298       adj = adj_get (ai);
2299     }
2300
2301   /* Add encapsulation string for software interface (e.g. ethernet header). */
2302   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2303   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2304
2305   {
2306     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2307     u32 *to_next = vlib_frame_vector_args (f);
2308     to_next[0] = bi;
2309     f->n_vectors = 1;
2310     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2311   }
2312
2313   adj_unlock (ai);
2314   return /* no error */ 0;
2315 }
2316
2317 typedef enum
2318 {
2319   IP4_REWRITE_NEXT_DROP,
2320   IP4_REWRITE_NEXT_ICMP_ERROR,
2321 } ip4_rewrite_next_t;
2322
2323 always_inline uword
2324 ip4_rewrite_inline (vlib_main_t * vm,
2325                     vlib_node_runtime_t * node,
2326                     vlib_frame_t * frame,
2327                     int do_counters, int is_midchain, int is_mcast)
2328 {
2329   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2330   u32 *from = vlib_frame_vector_args (frame);
2331   u32 n_left_from, n_left_to_next, *to_next, next_index;
2332   vlib_node_runtime_t *error_node =
2333     vlib_node_get_runtime (vm, ip4_input_node.index);
2334
2335   n_left_from = frame->n_vectors;
2336   next_index = node->cached_next_index;
2337   u32 thread_index = vlib_get_thread_index ();
2338
2339   while (n_left_from > 0)
2340     {
2341       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2342
2343       while (n_left_from >= 4 && n_left_to_next >= 2)
2344         {
2345           ip_adjacency_t *adj0, *adj1;
2346           vlib_buffer_t *p0, *p1;
2347           ip4_header_t *ip0, *ip1;
2348           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2349           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2350           u32 tx_sw_if_index0, tx_sw_if_index1;
2351
2352           /* Prefetch next iteration. */
2353           {
2354             vlib_buffer_t *p2, *p3;
2355
2356             p2 = vlib_get_buffer (vm, from[2]);
2357             p3 = vlib_get_buffer (vm, from[3]);
2358
2359             vlib_prefetch_buffer_header (p2, STORE);
2360             vlib_prefetch_buffer_header (p3, STORE);
2361
2362             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2363             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2364           }
2365
2366           pi0 = to_next[0] = from[0];
2367           pi1 = to_next[1] = from[1];
2368
2369           from += 2;
2370           n_left_from -= 2;
2371           to_next += 2;
2372           n_left_to_next -= 2;
2373
2374           p0 = vlib_get_buffer (vm, pi0);
2375           p1 = vlib_get_buffer (vm, pi1);
2376
2377           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2378           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2379
2380           /*
2381            * pre-fetch the per-adjacency counters
2382            */
2383           if (do_counters)
2384             {
2385               vlib_prefetch_combined_counter (&adjacency_counters,
2386                                               thread_index, adj_index0);
2387               vlib_prefetch_combined_counter (&adjacency_counters,
2388                                               thread_index, adj_index1);
2389             }
2390
2391           ip0 = vlib_buffer_get_current (p0);
2392           ip1 = vlib_buffer_get_current (p1);
2393
2394           error0 = error1 = IP4_ERROR_NONE;
2395           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2396
2397           /* Decrement TTL & update checksum.
2398              Works either endian, so no need for byte swap. */
2399           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2400             {
2401               i32 ttl0 = ip0->ttl;
2402
2403               /* Input node should have reject packets with ttl 0. */
2404               ASSERT (ip0->ttl > 0);
2405
2406               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2407               checksum0 += checksum0 >= 0xffff;
2408
2409               ip0->checksum = checksum0;
2410               ttl0 -= 1;
2411               ip0->ttl = ttl0;
2412
2413               /*
2414                * If the ttl drops below 1 when forwarding, generate
2415                * an ICMP response.
2416                */
2417               if (PREDICT_FALSE (ttl0 <= 0))
2418                 {
2419                   error0 = IP4_ERROR_TIME_EXPIRED;
2420                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2421                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2422                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2423                                                0);
2424                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2425                 }
2426
2427               /* Verify checksum. */
2428               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2429                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2430             }
2431           else
2432             {
2433               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2434             }
2435           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2436             {
2437               i32 ttl1 = ip1->ttl;
2438
2439               /* Input node should have reject packets with ttl 0. */
2440               ASSERT (ip1->ttl > 0);
2441
2442               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2443               checksum1 += checksum1 >= 0xffff;
2444
2445               ip1->checksum = checksum1;
2446               ttl1 -= 1;
2447               ip1->ttl = ttl1;
2448
2449               /*
2450                * If the ttl drops below 1 when forwarding, generate
2451                * an ICMP response.
2452                */
2453               if (PREDICT_FALSE (ttl1 <= 0))
2454                 {
2455                   error1 = IP4_ERROR_TIME_EXPIRED;
2456                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2457                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2458                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2459                                                0);
2460                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2461                 }
2462
2463               /* Verify checksum. */
2464               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2465                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2466             }
2467           else
2468             {
2469               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2470             }
2471
2472           /* Rewrite packet header and updates lengths. */
2473           adj0 = adj_get (adj_index0);
2474           adj1 = adj_get (adj_index1);
2475
2476           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2477           rw_len0 = adj0[0].rewrite_header.data_bytes;
2478           rw_len1 = adj1[0].rewrite_header.data_bytes;
2479           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2480           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2481
2482           /* Check MTU of outgoing interface. */
2483           error0 =
2484             (vlib_buffer_length_in_chain (vm, p0) >
2485              adj0[0].
2486              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2487              error0);
2488           error1 =
2489             (vlib_buffer_length_in_chain (vm, p1) >
2490              adj1[0].
2491              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2492              error1);
2493
2494           if (is_mcast)
2495             {
2496               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2497                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2498                         IP4_ERROR_SAME_INTERFACE : error0);
2499               error1 = ((adj1[0].rewrite_header.sw_if_index ==
2500                          vnet_buffer (p1)->sw_if_index[VLIB_RX]) ?
2501                         IP4_ERROR_SAME_INTERFACE : error1);
2502             }
2503
2504           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2505            * to see the IP headerr */
2506           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2507             {
2508               next0 = adj0[0].rewrite_header.next_index;
2509               p0->current_data -= rw_len0;
2510               p0->current_length += rw_len0;
2511               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2512               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2513
2514               if (PREDICT_FALSE
2515                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2516                 vnet_feature_arc_start (lm->output_feature_arc_index,
2517                                         tx_sw_if_index0, &next0, p0);
2518             }
2519           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2520             {
2521               next1 = adj1[0].rewrite_header.next_index;
2522               p1->current_data -= rw_len1;
2523               p1->current_length += rw_len1;
2524
2525               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2526               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2527
2528               if (PREDICT_FALSE
2529                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2530                 vnet_feature_arc_start (lm->output_feature_arc_index,
2531                                         tx_sw_if_index1, &next1, p1);
2532             }
2533
2534           /* Guess we are only writing on simple Ethernet header. */
2535           vnet_rewrite_two_headers (adj0[0], adj1[0],
2536                                     ip0, ip1, sizeof (ethernet_header_t));
2537
2538           /*
2539            * Bump the per-adjacency counters
2540            */
2541           if (do_counters)
2542             {
2543               vlib_increment_combined_counter
2544                 (&adjacency_counters,
2545                  thread_index,
2546                  adj_index0, 1,
2547                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2548
2549               vlib_increment_combined_counter
2550                 (&adjacency_counters,
2551                  thread_index,
2552                  adj_index1, 1,
2553                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2554             }
2555
2556           if (is_midchain)
2557             {
2558               adj0->sub_type.midchain.fixup_func
2559                 (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2560               adj1->sub_type.midchain.fixup_func
2561                 (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
2562             }
2563           if (is_mcast)
2564             {
2565               /*
2566                * copy bytes from the IP address into the MAC rewrite
2567                */
2568               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2569               vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2570             }
2571
2572           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2573                                            to_next, n_left_to_next,
2574                                            pi0, pi1, next0, next1);
2575         }
2576
2577       while (n_left_from > 0 && n_left_to_next > 0)
2578         {
2579           ip_adjacency_t *adj0;
2580           vlib_buffer_t *p0;
2581           ip4_header_t *ip0;
2582           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2583           u32 tx_sw_if_index0;
2584
2585           pi0 = to_next[0] = from[0];
2586
2587           p0 = vlib_get_buffer (vm, pi0);
2588
2589           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2590
2591           adj0 = adj_get (adj_index0);
2592
2593           ip0 = vlib_buffer_get_current (p0);
2594
2595           error0 = IP4_ERROR_NONE;
2596           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2597
2598           /* Decrement TTL & update checksum. */
2599           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2600             {
2601               i32 ttl0 = ip0->ttl;
2602
2603               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2604
2605               checksum0 += checksum0 >= 0xffff;
2606
2607               ip0->checksum = checksum0;
2608
2609               ASSERT (ip0->ttl > 0);
2610
2611               ttl0 -= 1;
2612
2613               ip0->ttl = ttl0;
2614
2615               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2616                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2617
2618               if (PREDICT_FALSE (ttl0 <= 0))
2619                 {
2620                   /*
2621                    * If the ttl drops below 1 when forwarding, generate
2622                    * an ICMP response.
2623                    */
2624                   error0 = IP4_ERROR_TIME_EXPIRED;
2625                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2626                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2627                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2628                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2629                                                0);
2630                 }
2631             }
2632           else
2633             {
2634               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2635             }
2636
2637           if (do_counters)
2638             vlib_prefetch_combined_counter (&adjacency_counters,
2639                                             thread_index, adj_index0);
2640
2641           /* Guess we are only writing on simple Ethernet header. */
2642           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2643           if (is_mcast)
2644             {
2645               /*
2646                * copy bytes from the IP address into the MAC rewrite
2647                */
2648               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2649             }
2650
2651           /* Update packet buffer attributes/set output interface. */
2652           rw_len0 = adj0[0].rewrite_header.data_bytes;
2653           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2654
2655           if (do_counters)
2656             vlib_increment_combined_counter
2657               (&adjacency_counters,
2658                thread_index, adj_index0, 1,
2659                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2660
2661           /* Check MTU of outgoing interface. */
2662           error0 = (vlib_buffer_length_in_chain (vm, p0)
2663                     > adj0[0].rewrite_header.max_l3_packet_bytes
2664                     ? IP4_ERROR_MTU_EXCEEDED : error0);
2665           if (is_mcast)
2666             {
2667               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2668                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2669                         IP4_ERROR_SAME_INTERFACE : error0);
2670             }
2671           p0->error = error_node->errors[error0];
2672
2673           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2674            * to see the IP headerr */
2675           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2676             {
2677               p0->current_data -= rw_len0;
2678               p0->current_length += rw_len0;
2679               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2680
2681               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2682               next0 = adj0[0].rewrite_header.next_index;
2683
2684               if (is_midchain)
2685                 {
2686                   adj0->sub_type.midchain.fixup_func
2687                     (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2688                 }
2689
2690               if (PREDICT_FALSE
2691                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2692                 vnet_feature_arc_start (lm->output_feature_arc_index,
2693                                         tx_sw_if_index0, &next0, p0);
2694
2695             }
2696
2697           from += 1;
2698           n_left_from -= 1;
2699           to_next += 1;
2700           n_left_to_next -= 1;
2701
2702           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2703                                            to_next, n_left_to_next,
2704                                            pi0, next0);
2705         }
2706
2707       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2708     }
2709
2710   /* Need to do trace after rewrites to pick up new packet data. */
2711   if (node->flags & VLIB_NODE_FLAG_TRACE)
2712     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2713
2714   return frame->n_vectors;
2715 }
2716
2717
2718 /** @brief IPv4 rewrite node.
2719     @node ip4-rewrite
2720
2721     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2722     header checksum, fetch the ip adjacency, check the outbound mtu,
2723     apply the adjacency rewrite, and send pkts to the adjacency
2724     rewrite header's rewrite_next_index.
2725
2726     @param vm vlib_main_t corresponding to the current thread
2727     @param node vlib_node_runtime_t
2728     @param frame vlib_frame_t whose contents should be dispatched
2729
2730     @par Graph mechanics: buffer metadata, next index usage
2731
2732     @em Uses:
2733     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2734         - the rewrite adjacency index
2735     - <code>adj->lookup_next_index</code>
2736         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2737           the packet will be dropped.
2738     - <code>adj->rewrite_header</code>
2739         - Rewrite string length, rewrite string, next_index
2740
2741     @em Sets:
2742     - <code>b->current_data, b->current_length</code>
2743         - Updated net of applying the rewrite string
2744
2745     <em>Next Indices:</em>
2746     - <code> adj->rewrite_header.next_index </code>
2747       or @c ip4-drop
2748 */
2749 static uword
2750 ip4_rewrite (vlib_main_t * vm,
2751              vlib_node_runtime_t * node, vlib_frame_t * frame)
2752 {
2753   if (adj_are_counters_enabled ())
2754     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2755   else
2756     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2757 }
2758
2759 static uword
2760 ip4_midchain (vlib_main_t * vm,
2761               vlib_node_runtime_t * node, vlib_frame_t * frame)
2762 {
2763   if (adj_are_counters_enabled ())
2764     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2765   else
2766     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2767 }
2768
2769 static uword
2770 ip4_rewrite_mcast (vlib_main_t * vm,
2771                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2772 {
2773   if (adj_are_counters_enabled ())
2774     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2775   else
2776     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2777 }
2778
2779 static uword
2780 ip4_mcast_midchain (vlib_main_t * vm,
2781                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2782 {
2783   if (adj_are_counters_enabled ())
2784     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2785   else
2786     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2787 }
2788
2789 /* *INDENT-OFF* */
2790 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2791   .function = ip4_rewrite,
2792   .name = "ip4-rewrite",
2793   .vector_size = sizeof (u32),
2794
2795   .format_trace = format_ip4_rewrite_trace,
2796
2797   .n_next_nodes = 2,
2798   .next_nodes = {
2799     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2800     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2801   },
2802 };
2803 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2804
2805 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2806   .function = ip4_rewrite_mcast,
2807   .name = "ip4-rewrite-mcast",
2808   .vector_size = sizeof (u32),
2809
2810   .format_trace = format_ip4_rewrite_trace,
2811   .sibling_of = "ip4-rewrite",
2812 };
2813 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2814
2815 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2816   .function = ip4_mcast_midchain,
2817   .name = "ip4-mcast-midchain",
2818   .vector_size = sizeof (u32),
2819
2820   .format_trace = format_ip4_rewrite_trace,
2821   .sibling_of = "ip4-rewrite",
2822 };
2823 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2824
2825 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2826   .function = ip4_midchain,
2827   .name = "ip4-midchain",
2828   .vector_size = sizeof (u32),
2829   .format_trace = format_ip4_forward_next_trace,
2830   .sibling_of =  "ip4-rewrite",
2831 };
2832 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2833 /* *INDENT-ON */
2834
2835 int
2836 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2837 {
2838   ip4_fib_mtrie_t *mtrie0;
2839   ip4_fib_mtrie_leaf_t leaf0;
2840   u32 lbi0;
2841
2842   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2843
2844   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2845   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2846   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2847
2848   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2849
2850   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2851 }
2852
2853 static clib_error_t *
2854 test_lookup_command_fn (vlib_main_t * vm,
2855                         unformat_input_t * input, vlib_cli_command_t * cmd)
2856 {
2857   ip4_fib_t *fib;
2858   u32 table_id = 0;
2859   f64 count = 1;
2860   u32 n;
2861   int i;
2862   ip4_address_t ip4_base_address;
2863   u64 errors = 0;
2864
2865   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2866     {
2867       if (unformat (input, "table %d", &table_id))
2868         {
2869           /* Make sure the entry exists. */
2870           fib = ip4_fib_get (table_id);
2871           if ((fib) && (fib->index != table_id))
2872             return clib_error_return (0, "<fib-index> %d does not exist",
2873                                       table_id);
2874         }
2875       else if (unformat (input, "count %f", &count))
2876         ;
2877
2878       else if (unformat (input, "%U",
2879                          unformat_ip4_address, &ip4_base_address))
2880         ;
2881       else
2882         return clib_error_return (0, "unknown input `%U'",
2883                                   format_unformat_error, input);
2884     }
2885
2886   n = count;
2887
2888   for (i = 0; i < n; i++)
2889     {
2890       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2891         errors++;
2892
2893       ip4_base_address.as_u32 =
2894         clib_host_to_net_u32 (1 +
2895                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2896     }
2897
2898   if (errors)
2899     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2900   else
2901     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2902
2903   return 0;
2904 }
2905
2906 /*?
2907  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2908  * given FIB table to determine if there is a conflict with the
2909  * adjacency table. The fib-id can be determined by using the
2910  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2911  * of 0 is used.
2912  *
2913  * @todo This command uses fib-id, other commands use table-id (not
2914  * just a name, they are different indexes). Would like to change this
2915  * to table-id for consistency.
2916  *
2917  * @cliexpar
2918  * Example of how to run the test lookup command:
2919  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2920  * No errors in 2 lookups
2921  * @cliexend
2922 ?*/
2923 /* *INDENT-OFF* */
2924 VLIB_CLI_COMMAND (lookup_test_command, static) =
2925 {
2926   .path = "test lookup",
2927   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2928   .function = test_lookup_command_fn,
2929 };
2930 /* *INDENT-ON* */
2931
2932 int
2933 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2934 {
2935   u32 fib_index;
2936
2937   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2938
2939   if (~0 == fib_index)
2940     return VNET_API_ERROR_NO_SUCH_FIB;
2941
2942   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2943                                   flow_hash_config);
2944
2945   return 0;
2946 }
2947
2948 static clib_error_t *
2949 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2950                              unformat_input_t * input,
2951                              vlib_cli_command_t * cmd)
2952 {
2953   int matched = 0;
2954   u32 table_id = 0;
2955   u32 flow_hash_config = 0;
2956   int rv;
2957
2958   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2959     {
2960       if (unformat (input, "table %d", &table_id))
2961         matched = 1;
2962 #define _(a,v) \
2963     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2964       foreach_flow_hash_bit
2965 #undef _
2966         else
2967         break;
2968     }
2969
2970   if (matched == 0)
2971     return clib_error_return (0, "unknown input `%U'",
2972                               format_unformat_error, input);
2973
2974   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2975   switch (rv)
2976     {
2977     case 0:
2978       break;
2979
2980     case VNET_API_ERROR_NO_SUCH_FIB:
2981       return clib_error_return (0, "no such FIB table %d", table_id);
2982
2983     default:
2984       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2985       break;
2986     }
2987
2988   return 0;
2989 }
2990
2991 /*?
2992  * Configure the set of IPv4 fields used by the flow hash.
2993  *
2994  * @cliexpar
2995  * Example of how to set the flow hash on a given table:
2996  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2997  * Example of display the configured flow hash:
2998  * @cliexstart{show ip fib}
2999  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3000  * 0.0.0.0/0
3001  *   unicast-ip4-chain
3002  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3003  *     [0] [@0]: dpo-drop ip6
3004  * 0.0.0.0/32
3005  *   unicast-ip4-chain
3006  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3007  *     [0] [@0]: dpo-drop ip6
3008  * 224.0.0.0/8
3009  *   unicast-ip4-chain
3010  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3011  *     [0] [@0]: dpo-drop ip6
3012  * 6.0.1.2/32
3013  *   unicast-ip4-chain
3014  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3015  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3016  * 7.0.0.1/32
3017  *   unicast-ip4-chain
3018  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3019  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3020  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3021  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3022  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3023  * 240.0.0.0/8
3024  *   unicast-ip4-chain
3025  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3026  *     [0] [@0]: dpo-drop ip6
3027  * 255.255.255.255/32
3028  *   unicast-ip4-chain
3029  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3030  *     [0] [@0]: dpo-drop ip6
3031  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3032  * 0.0.0.0/0
3033  *   unicast-ip4-chain
3034  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3035  *     [0] [@0]: dpo-drop ip6
3036  * 0.0.0.0/32
3037  *   unicast-ip4-chain
3038  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3039  *     [0] [@0]: dpo-drop ip6
3040  * 172.16.1.0/24
3041  *   unicast-ip4-chain
3042  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3043  *     [0] [@4]: ipv4-glean: af_packet0
3044  * 172.16.1.1/32
3045  *   unicast-ip4-chain
3046  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3047  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3048  * 172.16.1.2/32
3049  *   unicast-ip4-chain
3050  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3051  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3052  * 172.16.2.0/24
3053  *   unicast-ip4-chain
3054  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3055  *     [0] [@4]: ipv4-glean: af_packet1
3056  * 172.16.2.1/32
3057  *   unicast-ip4-chain
3058  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3059  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3060  * 224.0.0.0/8
3061  *   unicast-ip4-chain
3062  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3063  *     [0] [@0]: dpo-drop ip6
3064  * 240.0.0.0/8
3065  *   unicast-ip4-chain
3066  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3067  *     [0] [@0]: dpo-drop ip6
3068  * 255.255.255.255/32
3069  *   unicast-ip4-chain
3070  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3071  *     [0] [@0]: dpo-drop ip6
3072  * @cliexend
3073 ?*/
3074 /* *INDENT-OFF* */
3075 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3076 {
3077   .path = "set ip flow-hash",
3078   .short_help =
3079   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3080   .function = set_ip_flow_hash_command_fn,
3081 };
3082 /* *INDENT-ON* */
3083
3084 int
3085 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3086                              u32 table_index)
3087 {
3088   vnet_main_t *vnm = vnet_get_main ();
3089   vnet_interface_main_t *im = &vnm->interface_main;
3090   ip4_main_t *ipm = &ip4_main;
3091   ip_lookup_main_t *lm = &ipm->lookup_main;
3092   vnet_classify_main_t *cm = &vnet_classify_main;
3093   ip4_address_t *if_addr;
3094
3095   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3096     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3097
3098   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3099     return VNET_API_ERROR_NO_SUCH_ENTRY;
3100
3101   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3102   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3103
3104   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3105
3106   if (NULL != if_addr)
3107     {
3108       fib_prefix_t pfx = {
3109         .fp_len = 32,
3110         .fp_proto = FIB_PROTOCOL_IP4,
3111         .fp_addr.ip4 = *if_addr,
3112       };
3113       u32 fib_index;
3114
3115       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3116                                                        sw_if_index);
3117
3118
3119       if (table_index != (u32) ~ 0)
3120         {
3121           dpo_id_t dpo = DPO_INVALID;
3122
3123           dpo_set (&dpo,
3124                    DPO_CLASSIFY,
3125                    DPO_PROTO_IP4,
3126                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3127
3128           fib_table_entry_special_dpo_add (fib_index,
3129                                            &pfx,
3130                                            FIB_SOURCE_CLASSIFY,
3131                                            FIB_ENTRY_FLAG_NONE, &dpo);
3132           dpo_reset (&dpo);
3133         }
3134       else
3135         {
3136           fib_table_entry_special_remove (fib_index,
3137                                           &pfx, FIB_SOURCE_CLASSIFY);
3138         }
3139     }
3140
3141   return 0;
3142 }
3143
3144 static clib_error_t *
3145 set_ip_classify_command_fn (vlib_main_t * vm,
3146                             unformat_input_t * input,
3147                             vlib_cli_command_t * cmd)
3148 {
3149   u32 table_index = ~0;
3150   int table_index_set = 0;
3151   u32 sw_if_index = ~0;
3152   int rv;
3153
3154   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3155     {
3156       if (unformat (input, "table-index %d", &table_index))
3157         table_index_set = 1;
3158       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3159                          vnet_get_main (), &sw_if_index))
3160         ;
3161       else
3162         break;
3163     }
3164
3165   if (table_index_set == 0)
3166     return clib_error_return (0, "classify table-index must be specified");
3167
3168   if (sw_if_index == ~0)
3169     return clib_error_return (0, "interface / subif must be specified");
3170
3171   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3172
3173   switch (rv)
3174     {
3175     case 0:
3176       break;
3177
3178     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3179       return clib_error_return (0, "No such interface");
3180
3181     case VNET_API_ERROR_NO_SUCH_ENTRY:
3182       return clib_error_return (0, "No such classifier table");
3183     }
3184   return 0;
3185 }
3186
3187 /*?
3188  * Assign a classification table to an interface. The classification
3189  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3190  * commands. Once the table is create, use this command to filter packets
3191  * on an interface.
3192  *
3193  * @cliexpar
3194  * Example of how to assign a classification table to an interface:
3195  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3196 ?*/
3197 /* *INDENT-OFF* */
3198 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3199 {
3200     .path = "set ip classify",
3201     .short_help =
3202     "set ip classify intfc <interface> table-index <classify-idx>",
3203     .function = set_ip_classify_command_fn,
3204 };
3205 /* *INDENT-ON* */
3206
3207 static clib_error_t *
3208 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3209 {
3210   ip4_main_t *im = &ip4_main;
3211   uword heapsize = 0;
3212
3213   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3214     {
3215       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3216         ;
3217       else
3218         return clib_error_return (0,
3219                                   "invalid heap-size parameter `%U'",
3220                                   format_unformat_error, input);
3221     }
3222
3223   im->mtrie_heap_size = heapsize;
3224
3225   return 0;
3226 }
3227
3228 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3229
3230 /*
3231  * fd.io coding-style-patch-verification: ON
3232  *
3233  * Local Variables:
3234  * eval: (c-set-style "gnu")
3235  * End:
3236  */