IPv4/6 reassembly
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/load_balance_map.h>
53 #include <vnet/dpo/classify_dpo.h>
54 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
55
56 /**
57  * @file
58  * @brief IPv4 Forwarding.
59  *
60  * This file contains the source code for IPv4 forwarding.
61  */
62
63 always_inline uword
64 ip4_lookup_inline (vlib_main_t * vm,
65                    vlib_node_runtime_t * node,
66                    vlib_frame_t * frame,
67                    int lookup_for_responses_to_locally_received_packets)
68 {
69   ip4_main_t *im = &ip4_main;
70   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
71   u32 n_left_from, n_left_to_next, *from, *to_next;
72   ip_lookup_next_t next;
73   u32 thread_index = vlib_get_thread_index ();
74
75   from = vlib_frame_vector_args (frame);
76   n_left_from = frame->n_vectors;
77   next = node->cached_next_index;
78
79   while (n_left_from > 0)
80     {
81       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
82
83       while (n_left_from >= 8 && n_left_to_next >= 4)
84         {
85           vlib_buffer_t *p0, *p1, *p2, *p3;
86           ip4_header_t *ip0, *ip1, *ip2, *ip3;
87           ip_lookup_next_t next0, next1, next2, next3;
88           const load_balance_t *lb0, *lb1, *lb2, *lb3;
89           ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
90           ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
91           ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
92           u32 pi0, fib_index0, lb_index0;
93           u32 pi1, fib_index1, lb_index1;
94           u32 pi2, fib_index2, lb_index2;
95           u32 pi3, fib_index3, lb_index3;
96           flow_hash_config_t flow_hash_config0, flow_hash_config1;
97           flow_hash_config_t flow_hash_config2, flow_hash_config3;
98           u32 hash_c0, hash_c1, hash_c2, hash_c3;
99           const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
100
101           /* Prefetch next iteration. */
102           {
103             vlib_buffer_t *p4, *p5, *p6, *p7;
104
105             p4 = vlib_get_buffer (vm, from[4]);
106             p5 = vlib_get_buffer (vm, from[5]);
107             p6 = vlib_get_buffer (vm, from[6]);
108             p7 = vlib_get_buffer (vm, from[7]);
109
110             vlib_prefetch_buffer_header (p4, LOAD);
111             vlib_prefetch_buffer_header (p5, LOAD);
112             vlib_prefetch_buffer_header (p6, LOAD);
113             vlib_prefetch_buffer_header (p7, LOAD);
114
115             CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
116             CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
117             CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
118             CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
119           }
120
121           pi0 = to_next[0] = from[0];
122           pi1 = to_next[1] = from[1];
123           pi2 = to_next[2] = from[2];
124           pi3 = to_next[3] = from[3];
125
126           from += 4;
127           to_next += 4;
128           n_left_to_next -= 4;
129           n_left_from -= 4;
130
131           p0 = vlib_get_buffer (vm, pi0);
132           p1 = vlib_get_buffer (vm, pi1);
133           p2 = vlib_get_buffer (vm, pi2);
134           p3 = vlib_get_buffer (vm, pi3);
135
136           ip0 = vlib_buffer_get_current (p0);
137           ip1 = vlib_buffer_get_current (p1);
138           ip2 = vlib_buffer_get_current (p2);
139           ip3 = vlib_buffer_get_current (p3);
140
141           dst_addr0 = &ip0->dst_address;
142           dst_addr1 = &ip1->dst_address;
143           dst_addr2 = &ip2->dst_address;
144           dst_addr3 = &ip3->dst_address;
145
146           fib_index0 =
147             vec_elt (im->fib_index_by_sw_if_index,
148                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
149           fib_index1 =
150             vec_elt (im->fib_index_by_sw_if_index,
151                      vnet_buffer (p1)->sw_if_index[VLIB_RX]);
152           fib_index2 =
153             vec_elt (im->fib_index_by_sw_if_index,
154                      vnet_buffer (p2)->sw_if_index[VLIB_RX]);
155           fib_index3 =
156             vec_elt (im->fib_index_by_sw_if_index,
157                      vnet_buffer (p3)->sw_if_index[VLIB_RX]);
158           fib_index0 =
159             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
160              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
161           fib_index1 =
162             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
163              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
164           fib_index2 =
165             (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
166              (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
167           fib_index3 =
168             (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
169              (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
170
171
172           if (!lookup_for_responses_to_locally_received_packets)
173             {
174               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
175               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
176               mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
177               mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
178
179               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
180               leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
181               leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
182               leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
183             }
184
185           if (!lookup_for_responses_to_locally_received_packets)
186             {
187               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
188               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
189               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
190               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
191             }
192
193           if (!lookup_for_responses_to_locally_received_packets)
194             {
195               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
196               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
197               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
198               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
199             }
200
201           if (lookup_for_responses_to_locally_received_packets)
202             {
203               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
204               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
205               lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
206               lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
207             }
208           else
209             {
210               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
211               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
212               lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
213               lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
214             }
215
216           ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
217           lb0 = load_balance_get (lb_index0);
218           lb1 = load_balance_get (lb_index1);
219           lb2 = load_balance_get (lb_index2);
220           lb3 = load_balance_get (lb_index3);
221
222           ASSERT (lb0->lb_n_buckets > 0);
223           ASSERT (is_pow2 (lb0->lb_n_buckets));
224           ASSERT (lb1->lb_n_buckets > 0);
225           ASSERT (is_pow2 (lb1->lb_n_buckets));
226           ASSERT (lb2->lb_n_buckets > 0);
227           ASSERT (is_pow2 (lb2->lb_n_buckets));
228           ASSERT (lb3->lb_n_buckets > 0);
229           ASSERT (is_pow2 (lb3->lb_n_buckets));
230
231           /* Use flow hash to compute multipath adjacency. */
232           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
233           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
234           hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
235           hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
236           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
237             {
238               flow_hash_config0 = lb0->lb_hash_config;
239               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, flow_hash_config0);
241               dpo0 =
242                 load_balance_get_fwd_bucket (lb0,
243                                              (hash_c0 &
244                                               (lb0->lb_n_buckets_minus_1)));
245             }
246           else
247             {
248               dpo0 = load_balance_get_bucket_i (lb0, 0);
249             }
250           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
251             {
252               flow_hash_config1 = lb1->lb_hash_config;
253               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
254                 ip4_compute_flow_hash (ip1, flow_hash_config1);
255               dpo1 =
256                 load_balance_get_fwd_bucket (lb1,
257                                              (hash_c1 &
258                                               (lb1->lb_n_buckets_minus_1)));
259             }
260           else
261             {
262               dpo1 = load_balance_get_bucket_i (lb1, 0);
263             }
264           if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
265             {
266               flow_hash_config2 = lb2->lb_hash_config;
267               hash_c2 = vnet_buffer (p2)->ip.flow_hash =
268                 ip4_compute_flow_hash (ip2, flow_hash_config2);
269               dpo2 =
270                 load_balance_get_fwd_bucket (lb2,
271                                              (hash_c2 &
272                                               (lb2->lb_n_buckets_minus_1)));
273             }
274           else
275             {
276               dpo2 = load_balance_get_bucket_i (lb2, 0);
277             }
278           if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
279             {
280               flow_hash_config3 = lb3->lb_hash_config;
281               hash_c3 = vnet_buffer (p3)->ip.flow_hash =
282                 ip4_compute_flow_hash (ip3, flow_hash_config3);
283               dpo3 =
284                 load_balance_get_fwd_bucket (lb3,
285                                              (hash_c3 &
286                                               (lb3->lb_n_buckets_minus_1)));
287             }
288           else
289             {
290               dpo3 = load_balance_get_bucket_i (lb3, 0);
291             }
292
293           next0 = dpo0->dpoi_next_node;
294           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
295           next1 = dpo1->dpoi_next_node;
296           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
297           next2 = dpo2->dpoi_next_node;
298           vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
299           next3 = dpo3->dpoi_next_node;
300           vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
301
302           vlib_increment_combined_counter
303             (cm, thread_index, lb_index0, 1,
304              vlib_buffer_length_in_chain (vm, p0));
305           vlib_increment_combined_counter
306             (cm, thread_index, lb_index1, 1,
307              vlib_buffer_length_in_chain (vm, p1));
308           vlib_increment_combined_counter
309             (cm, thread_index, lb_index2, 1,
310              vlib_buffer_length_in_chain (vm, p2));
311           vlib_increment_combined_counter
312             (cm, thread_index, lb_index3, 1,
313              vlib_buffer_length_in_chain (vm, p3));
314
315           vlib_validate_buffer_enqueue_x4 (vm, node, next,
316                                            to_next, n_left_to_next,
317                                            pi0, pi1, pi2, pi3,
318                                            next0, next1, next2, next3);
319         }
320
321       while (n_left_from > 0 && n_left_to_next > 0)
322         {
323           vlib_buffer_t *p0;
324           ip4_header_t *ip0;
325           ip_lookup_next_t next0;
326           const load_balance_t *lb0;
327           ip4_fib_mtrie_t *mtrie0;
328           ip4_fib_mtrie_leaf_t leaf0;
329           ip4_address_t *dst_addr0;
330           u32 pi0, fib_index0, lbi0;
331           flow_hash_config_t flow_hash_config0;
332           const dpo_id_t *dpo0;
333           u32 hash_c0;
334
335           pi0 = from[0];
336           to_next[0] = pi0;
337
338           p0 = vlib_get_buffer (vm, pi0);
339
340           ip0 = vlib_buffer_get_current (p0);
341
342           dst_addr0 = &ip0->dst_address;
343
344           fib_index0 =
345             vec_elt (im->fib_index_by_sw_if_index,
346                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
347           fib_index0 =
348             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
349              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
350
351           if (!lookup_for_responses_to_locally_received_packets)
352             {
353               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
354
355               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
356             }
357
358           if (!lookup_for_responses_to_locally_received_packets)
359             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
360
361           if (!lookup_for_responses_to_locally_received_packets)
362             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
363
364           if (lookup_for_responses_to_locally_received_packets)
365             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
366           else
367             {
368               /* Handle default route. */
369               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
370             }
371
372           ASSERT (lbi0);
373           lb0 = load_balance_get (lbi0);
374
375           ASSERT (lb0->lb_n_buckets > 0);
376           ASSERT (is_pow2 (lb0->lb_n_buckets));
377
378           /* Use flow hash to compute multipath adjacency. */
379           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
380           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
381             {
382               flow_hash_config0 = lb0->lb_hash_config;
383
384               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
385                 ip4_compute_flow_hash (ip0, flow_hash_config0);
386               dpo0 =
387                 load_balance_get_fwd_bucket (lb0,
388                                              (hash_c0 &
389                                               (lb0->lb_n_buckets_minus_1)));
390             }
391           else
392             {
393               dpo0 = load_balance_get_bucket_i (lb0, 0);
394             }
395
396           next0 = dpo0->dpoi_next_node;
397           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
398
399           vlib_increment_combined_counter (cm, thread_index, lbi0, 1,
400                                            vlib_buffer_length_in_chain (vm,
401                                                                         p0));
402
403           from += 1;
404           to_next += 1;
405           n_left_to_next -= 1;
406           n_left_from -= 1;
407
408           if (PREDICT_FALSE (next0 != next))
409             {
410               n_left_to_next += 1;
411               vlib_put_next_frame (vm, node, next, n_left_to_next);
412               next = next0;
413               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
414               to_next[0] = pi0;
415               to_next += 1;
416               n_left_to_next -= 1;
417             }
418         }
419
420       vlib_put_next_frame (vm, node, next, n_left_to_next);
421     }
422
423   if (node->flags & VLIB_NODE_FLAG_TRACE)
424     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
425
426   return frame->n_vectors;
427 }
428
429 /** @brief IPv4 lookup node.
430     @node ip4-lookup
431
432     This is the main IPv4 lookup dispatch node.
433
434     @param vm vlib_main_t corresponding to the current thread
435     @param node vlib_node_runtime_t
436     @param frame vlib_frame_t whose contents should be dispatched
437
438     @par Graph mechanics: buffer metadata, next index usage
439
440     @em Uses:
441     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
442         - Indicates the @c sw_if_index value of the interface that the
443           packet was received on.
444     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
445         - When the value is @c ~0 then the node performs a longest prefix
446           match (LPM) for the packet destination address in the FIB attached
447           to the receive interface.
448         - Otherwise perform LPM for the packet destination address in the
449           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
450           value (0, 1, ...) and not a VRF id.
451
452     @em Sets:
453     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
454         - The lookup result adjacency index.
455
456     <em>Next Index:</em>
457     - Dispatches the packet to the node index found in
458       ip_adjacency_t @c adj->lookup_next_index
459       (where @c adj is the lookup result adjacency).
460 */
461 static uword
462 ip4_lookup (vlib_main_t * vm,
463             vlib_node_runtime_t * node, vlib_frame_t * frame)
464 {
465   return ip4_lookup_inline (vm, node, frame,
466                             /* lookup_for_responses_to_locally_received_packets */
467                             0);
468
469 }
470
471 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
472
473 /* *INDENT-OFF* */
474 VLIB_REGISTER_NODE (ip4_lookup_node) =
475 {
476   .function = ip4_lookup,
477   .name = "ip4-lookup",
478   .vector_size = sizeof (u32),
479   .format_trace = format_ip4_lookup_trace,
480   .n_next_nodes = IP_LOOKUP_N_NEXT,
481   .next_nodes = IP4_LOOKUP_NEXT_NODES,
482 };
483 /* *INDENT-ON* */
484
485 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
486
487 always_inline uword
488 ip4_load_balance (vlib_main_t * vm,
489                   vlib_node_runtime_t * node, vlib_frame_t * frame)
490 {
491   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
492   u32 n_left_from, n_left_to_next, *from, *to_next;
493   ip_lookup_next_t next;
494   u32 thread_index = vlib_get_thread_index ();
495
496   from = vlib_frame_vector_args (frame);
497   n_left_from = frame->n_vectors;
498   next = node->cached_next_index;
499
500   if (node->flags & VLIB_NODE_FLAG_TRACE)
501     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
502
503   while (n_left_from > 0)
504     {
505       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
506
507
508       while (n_left_from >= 4 && n_left_to_next >= 2)
509         {
510           ip_lookup_next_t next0, next1;
511           const load_balance_t *lb0, *lb1;
512           vlib_buffer_t *p0, *p1;
513           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
514           const ip4_header_t *ip0, *ip1;
515           const dpo_id_t *dpo0, *dpo1;
516
517           /* Prefetch next iteration. */
518           {
519             vlib_buffer_t *p2, *p3;
520
521             p2 = vlib_get_buffer (vm, from[2]);
522             p3 = vlib_get_buffer (vm, from[3]);
523
524             vlib_prefetch_buffer_header (p2, STORE);
525             vlib_prefetch_buffer_header (p3, STORE);
526
527             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
528             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
529           }
530
531           pi0 = to_next[0] = from[0];
532           pi1 = to_next[1] = from[1];
533
534           from += 2;
535           n_left_from -= 2;
536           to_next += 2;
537           n_left_to_next -= 2;
538
539           p0 = vlib_get_buffer (vm, pi0);
540           p1 = vlib_get_buffer (vm, pi1);
541
542           ip0 = vlib_buffer_get_current (p0);
543           ip1 = vlib_buffer_get_current (p1);
544           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
545           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
546
547           lb0 = load_balance_get (lbi0);
548           lb1 = load_balance_get (lbi1);
549
550           /*
551            * this node is for via FIBs we can re-use the hash value from the
552            * to node if present.
553            * We don't want to use the same hash value at each level in the recursion
554            * graph as that would lead to polarisation
555            */
556           hc0 = hc1 = 0;
557
558           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
559             {
560               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
561                 {
562                   hc0 = vnet_buffer (p0)->ip.flow_hash =
563                     vnet_buffer (p0)->ip.flow_hash >> 1;
564                 }
565               else
566                 {
567                   hc0 = vnet_buffer (p0)->ip.flow_hash =
568                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
569                 }
570               dpo0 = load_balance_get_fwd_bucket
571                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
572             }
573           else
574             {
575               dpo0 = load_balance_get_bucket_i (lb0, 0);
576             }
577           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
578             {
579               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
580                 {
581                   hc1 = vnet_buffer (p1)->ip.flow_hash =
582                     vnet_buffer (p1)->ip.flow_hash >> 1;
583                 }
584               else
585                 {
586                   hc1 = vnet_buffer (p1)->ip.flow_hash =
587                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
588                 }
589               dpo1 = load_balance_get_fwd_bucket
590                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
591             }
592           else
593             {
594               dpo1 = load_balance_get_bucket_i (lb1, 0);
595             }
596
597           next0 = dpo0->dpoi_next_node;
598           next1 = dpo1->dpoi_next_node;
599
600           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
601           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
602
603           vlib_increment_combined_counter
604             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
605           vlib_increment_combined_counter
606             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
607
608           vlib_validate_buffer_enqueue_x2 (vm, node, next,
609                                            to_next, n_left_to_next,
610                                            pi0, pi1, next0, next1);
611         }
612
613       while (n_left_from > 0 && n_left_to_next > 0)
614         {
615           ip_lookup_next_t next0;
616           const load_balance_t *lb0;
617           vlib_buffer_t *p0;
618           u32 pi0, lbi0, hc0;
619           const ip4_header_t *ip0;
620           const dpo_id_t *dpo0;
621
622           pi0 = from[0];
623           to_next[0] = pi0;
624           from += 1;
625           to_next += 1;
626           n_left_to_next -= 1;
627           n_left_from -= 1;
628
629           p0 = vlib_get_buffer (vm, pi0);
630
631           ip0 = vlib_buffer_get_current (p0);
632           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
633
634           lb0 = load_balance_get (lbi0);
635
636           hc0 = 0;
637           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
638             {
639               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
640                 {
641                   hc0 = vnet_buffer (p0)->ip.flow_hash =
642                     vnet_buffer (p0)->ip.flow_hash >> 1;
643                 }
644               else
645                 {
646                   hc0 = vnet_buffer (p0)->ip.flow_hash =
647                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
648                 }
649               dpo0 = load_balance_get_fwd_bucket
650                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
651             }
652           else
653             {
654               dpo0 = load_balance_get_bucket_i (lb0, 0);
655             }
656
657           next0 = dpo0->dpoi_next_node;
658           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
659
660           vlib_increment_combined_counter
661             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
662
663           vlib_validate_buffer_enqueue_x1 (vm, node, next,
664                                            to_next, n_left_to_next,
665                                            pi0, next0);
666         }
667
668       vlib_put_next_frame (vm, node, next, n_left_to_next);
669     }
670
671   return frame->n_vectors;
672 }
673
674 /* *INDENT-OFF* */
675 VLIB_REGISTER_NODE (ip4_load_balance_node) =
676 {
677   .function = ip4_load_balance,
678   .name = "ip4-load-balance",
679   .vector_size = sizeof (u32),
680   .sibling_of = "ip4-lookup",
681   .format_trace =
682   format_ip4_lookup_trace,
683 };
684 /* *INDENT-ON* */
685
686 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
687
688 /* get first interface address */
689 ip4_address_t *
690 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
691                              ip_interface_address_t ** result_ia)
692 {
693   ip_lookup_main_t *lm = &im->lookup_main;
694   ip_interface_address_t *ia = 0;
695   ip4_address_t *result = 0;
696
697   /* *INDENT-OFF* */
698   foreach_ip_interface_address
699     (lm, ia, sw_if_index,
700      1 /* honor unnumbered */ ,
701      ({
702        ip4_address_t * a =
703          ip_interface_address_get_address (lm, ia);
704        result = a;
705        break;
706      }));
707   /* *INDENT-OFF* */
708   if (result_ia)
709     *result_ia = result ? ia : 0;
710   return result;
711 }
712
713 static void
714 ip4_add_interface_routes (u32 sw_if_index,
715                           ip4_main_t * im, u32 fib_index,
716                           ip_interface_address_t * a)
717 {
718   ip_lookup_main_t *lm = &im->lookup_main;
719   ip4_address_t *address = ip_interface_address_get_address (lm, a);
720   fib_prefix_t pfx = {
721     .fp_len = a->address_length,
722     .fp_proto = FIB_PROTOCOL_IP4,
723     .fp_addr.ip4 = *address,
724   };
725
726   if (pfx.fp_len <= 30)
727     {
728       /* a /30 or shorter - add a glean for the network address */
729       fib_table_entry_update_one_path (fib_index, &pfx,
730                                        FIB_SOURCE_INTERFACE,
731                                        (FIB_ENTRY_FLAG_CONNECTED |
732                                         FIB_ENTRY_FLAG_ATTACHED),
733                                        DPO_PROTO_IP4,
734                                        /* No next-hop address */
735                                        NULL,
736                                        sw_if_index,
737                                        // invalid FIB index
738                                        ~0,
739                                        1,
740                                        // no out-label stack
741                                        NULL,
742                                        FIB_ROUTE_PATH_FLAG_NONE);
743
744       /* Add the two broadcast addresses as drop */
745       fib_prefix_t net_pfx = {
746         .fp_len = 32,
747         .fp_proto = FIB_PROTOCOL_IP4,
748         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
749       };
750       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
751         fib_table_entry_special_add(fib_index,
752                                     &net_pfx,
753                                     FIB_SOURCE_INTERFACE,
754                                     (FIB_ENTRY_FLAG_DROP |
755                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
756       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
757       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
758         fib_table_entry_special_add(fib_index,
759                                     &net_pfx,
760                                     FIB_SOURCE_INTERFACE,
761                                     (FIB_ENTRY_FLAG_DROP |
762                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
763     }
764   else if (pfx.fp_len == 31)
765     {
766       u32 mask = clib_host_to_net_u32(1);
767       fib_prefix_t net_pfx = pfx;
768
769       net_pfx.fp_len = 32;
770       net_pfx.fp_addr.ip4.as_u32 ^= mask;
771
772       /* a /31 - add the other end as an attached host */
773       fib_table_entry_update_one_path (fib_index, &net_pfx,
774                                        FIB_SOURCE_INTERFACE,
775                                        (FIB_ENTRY_FLAG_ATTACHED),
776                                        DPO_PROTO_IP4,
777                                        &net_pfx.fp_addr,
778                                        sw_if_index,
779                                        // invalid FIB index
780                                        ~0,
781                                        1,
782                                        NULL,
783                                        FIB_ROUTE_PATH_FLAG_NONE);
784     }
785   pfx.fp_len = 32;
786
787   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
788     {
789       u32 classify_table_index =
790         lm->classify_table_index_by_sw_if_index[sw_if_index];
791       if (classify_table_index != (u32) ~ 0)
792         {
793           dpo_id_t dpo = DPO_INVALID;
794
795           dpo_set (&dpo,
796                    DPO_CLASSIFY,
797                    DPO_PROTO_IP4,
798                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
799
800           fib_table_entry_special_dpo_add (fib_index,
801                                            &pfx,
802                                            FIB_SOURCE_CLASSIFY,
803                                            FIB_ENTRY_FLAG_NONE, &dpo);
804           dpo_reset (&dpo);
805         }
806     }
807
808   fib_table_entry_update_one_path (fib_index, &pfx,
809                                    FIB_SOURCE_INTERFACE,
810                                    (FIB_ENTRY_FLAG_CONNECTED |
811                                     FIB_ENTRY_FLAG_LOCAL),
812                                    DPO_PROTO_IP4,
813                                    &pfx.fp_addr,
814                                    sw_if_index,
815                                    // invalid FIB index
816                                    ~0,
817                                    1, NULL,
818                                    FIB_ROUTE_PATH_FLAG_NONE);
819 }
820
821 static void
822 ip4_del_interface_routes (ip4_main_t * im,
823                           u32 fib_index,
824                           ip4_address_t * address, u32 address_length)
825 {
826   fib_prefix_t pfx = {
827     .fp_len = address_length,
828     .fp_proto = FIB_PROTOCOL_IP4,
829     .fp_addr.ip4 = *address,
830   };
831
832   if (pfx.fp_len <= 30)
833     {
834       fib_prefix_t net_pfx = {
835         .fp_len = 32,
836         .fp_proto = FIB_PROTOCOL_IP4,
837         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
838       };
839       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
840         fib_table_entry_special_remove(fib_index,
841                                        &net_pfx,
842                                        FIB_SOURCE_INTERFACE);
843       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
844       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
845         fib_table_entry_special_remove(fib_index,
846                                        &net_pfx,
847                                        FIB_SOURCE_INTERFACE);
848       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
849     }
850     else if (pfx.fp_len == 31)
851     {
852       u32 mask = clib_host_to_net_u32(1);
853       fib_prefix_t net_pfx = pfx;
854
855       net_pfx.fp_len = 32;
856       net_pfx.fp_addr.ip4.as_u32 ^= mask;
857
858       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
859     }
860
861   pfx.fp_len = 32;
862   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
863 }
864
865 void
866 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
867 {
868   ip4_main_t *im = &ip4_main;
869
870   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
871
872   /*
873    * enable/disable only on the 1<->0 transition
874    */
875   if (is_enable)
876     {
877       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
878         return;
879     }
880   else
881     {
882       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
883       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
884         return;
885     }
886   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
887                                !is_enable, 0, 0);
888
889
890   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
891                                sw_if_index, !is_enable, 0, 0);
892 }
893
894 static clib_error_t *
895 ip4_add_del_interface_address_internal (vlib_main_t * vm,
896                                         u32 sw_if_index,
897                                         ip4_address_t * address,
898                                         u32 address_length, u32 is_del)
899 {
900   vnet_main_t *vnm = vnet_get_main ();
901   ip4_main_t *im = &ip4_main;
902   ip_lookup_main_t *lm = &im->lookup_main;
903   clib_error_t *error = 0;
904   u32 if_address_index, elts_before;
905   ip4_address_fib_t ip4_af, *addr_fib = 0;
906
907   /* local0 interface doesn't support IP addressing  */
908   if (sw_if_index == 0)
909     {
910       return
911        clib_error_create ("local0 interface doesn't support IP addressing");
912     }
913
914   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
915   ip4_addr_fib_init (&ip4_af, address,
916                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
917   vec_add1 (addr_fib, ip4_af);
918
919   /* FIXME-LATER
920    * there is no support for adj-fib handling in the presence of overlapping
921    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
922    * most routers do.
923    */
924   /* *INDENT-OFF* */
925   if (!is_del)
926     {
927       /* When adding an address check that it does not conflict
928          with an existing address. */
929       ip_interface_address_t *ia;
930       foreach_ip_interface_address
931         (&im->lookup_main, ia, sw_if_index,
932          0 /* honor unnumbered */ ,
933          ({
934            ip4_address_t * x =
935              ip_interface_address_get_address
936              (&im->lookup_main, ia);
937            if (ip4_destination_matches_route
938                (im, address, x, ia->address_length) ||
939                ip4_destination_matches_route (im,
940                                               x,
941                                               address,
942                                               address_length))
943              return
944                clib_error_create
945                ("failed to add %U which conflicts with %U for interface %U",
946                 format_ip4_address_and_length, address,
947                 address_length,
948                 format_ip4_address_and_length, x,
949                 ia->address_length,
950                 format_vnet_sw_if_index_name, vnm,
951                 sw_if_index);
952          }));
953     }
954   /* *INDENT-ON* */
955
956   elts_before = pool_elts (lm->if_address_pool);
957
958   error = ip_interface_address_add_del
959     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
960   if (error)
961     goto done;
962
963   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
964
965   if (is_del)
966     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
967   else
968     ip4_add_interface_routes (sw_if_index,
969                               im, ip4_af.fib_index,
970                               pool_elt_at_index
971                               (lm->if_address_pool, if_address_index));
972
973   /* If pool did not grow/shrink: add duplicate address. */
974   if (elts_before != pool_elts (lm->if_address_pool))
975     {
976       ip4_add_del_interface_address_callback_t *cb;
977       vec_foreach (cb, im->add_del_interface_address_callbacks)
978         cb->function (im, cb->function_opaque, sw_if_index,
979                       address, address_length, if_address_index, is_del);
980     }
981
982 done:
983   vec_free (addr_fib);
984   return error;
985 }
986
987 clib_error_t *
988 ip4_add_del_interface_address (vlib_main_t * vm,
989                                u32 sw_if_index,
990                                ip4_address_t * address,
991                                u32 address_length, u32 is_del)
992 {
993   return ip4_add_del_interface_address_internal
994     (vm, sw_if_index, address, address_length, is_del);
995 }
996
997 /* Built-in ip4 unicast rx feature path definition */
998 /* *INDENT-OFF* */
999 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
1000 {
1001   .arc_name = "ip4-unicast",
1002   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1003   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
1004 };
1005
1006 VNET_FEATURE_INIT (ip4_flow_classify, static) =
1007 {
1008   .arc_name = "ip4-unicast",
1009   .node_name = "ip4-flow-classify",
1010   .runs_before = VNET_FEATURES ("ip4-inacl"),
1011 };
1012
1013 VNET_FEATURE_INIT (ip4_inacl, static) =
1014 {
1015   .arc_name = "ip4-unicast",
1016   .node_name = "ip4-inacl",
1017   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
1018 };
1019
1020 VNET_FEATURE_INIT (ip4_source_check_1, static) =
1021 {
1022   .arc_name = "ip4-unicast",
1023   .node_name = "ip4-source-check-via-rx",
1024   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
1025 };
1026
1027 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1028 {
1029   .arc_name = "ip4-unicast",
1030   .node_name = "ip4-source-check-via-any",
1031   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1032 };
1033
1034 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1035 {
1036   .arc_name = "ip4-unicast",
1037   .node_name = "ip4-source-and-port-range-check-rx",
1038   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1039 };
1040
1041 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1042 {
1043   .arc_name = "ip4-unicast",
1044   .node_name = "ip4-policer-classify",
1045   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1046 };
1047
1048 VNET_FEATURE_INIT (ip4_ipsec, static) =
1049 {
1050   .arc_name = "ip4-unicast",
1051   .node_name = "ipsec-input-ip4",
1052   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1053 };
1054
1055 VNET_FEATURE_INIT (ip4_vpath, static) =
1056 {
1057   .arc_name = "ip4-unicast",
1058   .node_name = "vpath-input-ip4",
1059   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1060 };
1061
1062 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1063 {
1064   .arc_name = "ip4-unicast",
1065   .node_name = "ip4-vxlan-bypass",
1066   .runs_before = VNET_FEATURES ("ip4-lookup"),
1067 };
1068
1069 VNET_FEATURE_INIT (ip4_not_enabled, static) =
1070 {
1071   .arc_name = "ip4-unicast",
1072   .node_name = "ip4-not-enabled",
1073   .runs_before = VNET_FEATURES ("ip4-lookup"),
1074 };
1075
1076 VNET_FEATURE_INIT (ip4_lookup, static) =
1077 {
1078   .arc_name = "ip4-unicast",
1079   .node_name = "ip4-lookup",
1080   .runs_before = 0,     /* not before any other features */
1081 };
1082
1083 /* Built-in ip4 multicast rx feature path definition */
1084 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1085 {
1086   .arc_name = "ip4-multicast",
1087   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1088   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1089 };
1090
1091 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1092 {
1093   .arc_name = "ip4-multicast",
1094   .node_name = "vpath-input-ip4",
1095   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1096 };
1097
1098 VNET_FEATURE_INIT (ip4_mc_not_enabled, static) =
1099 {
1100   .arc_name = "ip4-multicast",
1101   .node_name = "ip4-not-enabled",
1102   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1103 };
1104
1105 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1106 {
1107   .arc_name = "ip4-multicast",
1108   .node_name = "ip4-mfib-forward-lookup",
1109   .runs_before = 0,     /* last feature */
1110 };
1111
1112 /* Source and port-range check ip4 tx feature path definition */
1113 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1114 {
1115   .arc_name = "ip4-output",
1116   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1117   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1118 };
1119
1120 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1121 {
1122   .arc_name = "ip4-output",
1123   .node_name = "ip4-source-and-port-range-check-tx",
1124   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1125 };
1126
1127 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1128 {
1129   .arc_name = "ip4-output",
1130   .node_name = "ipsec-output-ip4",
1131   .runs_before = VNET_FEATURES ("interface-output"),
1132 };
1133
1134 /* Built-in ip4 tx feature path definition */
1135 VNET_FEATURE_INIT (ip4_interface_output, static) =
1136 {
1137   .arc_name = "ip4-output",
1138   .node_name = "interface-output",
1139   .runs_before = 0,     /* not before any other features */
1140 };
1141 /* *INDENT-ON* */
1142
1143 static clib_error_t *
1144 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1145 {
1146   ip4_main_t *im = &ip4_main;
1147
1148   /* Fill in lookup tables with default table (0). */
1149   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1150   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1151
1152   if (!is_add)
1153     {
1154       ip4_main_t *im4 = &ip4_main;
1155       ip_lookup_main_t *lm4 = &im4->lookup_main;
1156       ip_interface_address_t *ia = 0;
1157       ip4_address_t *address;
1158       vlib_main_t *vm = vlib_get_main ();
1159
1160       /* *INDENT-OFF* */
1161       foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */,
1162       ({
1163         address = ip_interface_address_get_address (lm4, ia);
1164         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1165       }));
1166       /* *INDENT-ON* */
1167     }
1168
1169   vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
1170                                is_add, 0, 0);
1171
1172   vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled",
1173                                sw_if_index, is_add, 0, 0);
1174
1175   return /* no error */ 0;
1176 }
1177
1178 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1179
1180 /* Global IP4 main. */
1181 ip4_main_t ip4_main;
1182
1183 clib_error_t *
1184 ip4_lookup_init (vlib_main_t * vm)
1185 {
1186   ip4_main_t *im = &ip4_main;
1187   clib_error_t *error;
1188   uword i;
1189
1190   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1191     return error;
1192   if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1193     return (error);
1194   if ((error = vlib_call_init_function (vm, fib_module_init)))
1195     return error;
1196   if ((error = vlib_call_init_function (vm, mfib_module_init)))
1197     return error;
1198
1199   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1200     {
1201       u32 m;
1202
1203       if (i < 32)
1204         m = pow2_mask (i) << (32 - i);
1205       else
1206         m = ~0;
1207       im->fib_masks[i] = clib_host_to_net_u32 (m);
1208     }
1209
1210   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1211
1212   /* Create FIB with index 0 and table id of 0. */
1213   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1214                                      FIB_SOURCE_DEFAULT_ROUTE);
1215   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1216                                       MFIB_SOURCE_DEFAULT_ROUTE);
1217
1218   {
1219     pg_node_t *pn;
1220     pn = pg_get_node (ip4_lookup_node.index);
1221     pn->unformat_edit = unformat_pg_ip4_header;
1222   }
1223
1224   {
1225     ethernet_arp_header_t h;
1226
1227     memset (&h, 0, sizeof (h));
1228
1229     /* Set target ethernet address to all zeros. */
1230     memset (h.ip4_over_ethernet[1].ethernet, 0,
1231             sizeof (h.ip4_over_ethernet[1].ethernet));
1232
1233 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1234 #define _8(f,v) h.f = v;
1235     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1236     _16 (l3_type, ETHERNET_TYPE_IP4);
1237     _8 (n_l2_address_bytes, 6);
1238     _8 (n_l3_address_bytes, 4);
1239     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1240 #undef _16
1241 #undef _8
1242
1243     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1244                                /* data */ &h,
1245                                sizeof (h),
1246                                /* alloc chunk size */ 8,
1247                                "ip4 arp");
1248   }
1249
1250   return error;
1251 }
1252
1253 VLIB_INIT_FUNCTION (ip4_lookup_init);
1254
1255 typedef struct
1256 {
1257   /* Adjacency taken. */
1258   u32 dpo_index;
1259   u32 flow_hash;
1260   u32 fib_index;
1261
1262   /* Packet data, possibly *after* rewrite. */
1263   u8 packet_data[64 - 1 * sizeof (u32)];
1264 }
1265 ip4_forward_next_trace_t;
1266
1267 u8 *
1268 format_ip4_forward_next_trace (u8 * s, va_list * args)
1269 {
1270   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1271   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1272   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1273   u32 indent = format_get_indent (s);
1274   s = format (s, "%U%U",
1275               format_white_space, indent,
1276               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1277   return s;
1278 }
1279
1280 static u8 *
1281 format_ip4_lookup_trace (u8 * s, va_list * args)
1282 {
1283   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1284   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1285   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1286   u32 indent = format_get_indent (s);
1287
1288   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1289               t->fib_index, t->dpo_index, t->flow_hash);
1290   s = format (s, "\n%U%U",
1291               format_white_space, indent,
1292               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1293   return s;
1294 }
1295
1296 static u8 *
1297 format_ip4_rewrite_trace (u8 * s, va_list * args)
1298 {
1299   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1300   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1301   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1302   u32 indent = format_get_indent (s);
1303
1304   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1305               t->fib_index, t->dpo_index, format_ip_adjacency,
1306               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1307   s = format (s, "\n%U%U",
1308               format_white_space, indent,
1309               format_ip_adjacency_packet_data,
1310               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1311   return s;
1312 }
1313
1314 /* Common trace function for all ip4-forward next nodes. */
1315 void
1316 ip4_forward_next_trace (vlib_main_t * vm,
1317                         vlib_node_runtime_t * node,
1318                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1319 {
1320   u32 *from, n_left;
1321   ip4_main_t *im = &ip4_main;
1322
1323   n_left = frame->n_vectors;
1324   from = vlib_frame_vector_args (frame);
1325
1326   while (n_left >= 4)
1327     {
1328       u32 bi0, bi1;
1329       vlib_buffer_t *b0, *b1;
1330       ip4_forward_next_trace_t *t0, *t1;
1331
1332       /* Prefetch next iteration. */
1333       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1334       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1335
1336       bi0 = from[0];
1337       bi1 = from[1];
1338
1339       b0 = vlib_get_buffer (vm, bi0);
1340       b1 = vlib_get_buffer (vm, bi1);
1341
1342       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1343         {
1344           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1345           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1346           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1347           t0->fib_index =
1348             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1349              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1350             vec_elt (im->fib_index_by_sw_if_index,
1351                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1352
1353           clib_memcpy (t0->packet_data,
1354                        vlib_buffer_get_current (b0),
1355                        sizeof (t0->packet_data));
1356         }
1357       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1358         {
1359           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1360           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1361           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1362           t1->fib_index =
1363             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1364              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1365             vec_elt (im->fib_index_by_sw_if_index,
1366                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1367           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1368                        sizeof (t1->packet_data));
1369         }
1370       from += 2;
1371       n_left -= 2;
1372     }
1373
1374   while (n_left >= 1)
1375     {
1376       u32 bi0;
1377       vlib_buffer_t *b0;
1378       ip4_forward_next_trace_t *t0;
1379
1380       bi0 = from[0];
1381
1382       b0 = vlib_get_buffer (vm, bi0);
1383
1384       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1385         {
1386           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1387           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1388           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1389           t0->fib_index =
1390             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1391              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1392             vec_elt (im->fib_index_by_sw_if_index,
1393                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1394           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1395                        sizeof (t0->packet_data));
1396         }
1397       from += 1;
1398       n_left -= 1;
1399     }
1400 }
1401
1402 /* Compute TCP/UDP/ICMP4 checksum in software. */
1403 u16
1404 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1405                               ip4_header_t * ip0)
1406 {
1407   ip_csum_t sum0;
1408   u32 ip_header_length, payload_length_host_byte_order;
1409   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1410   u16 sum16;
1411   void *data_this_buffer;
1412
1413   /* Initialize checksum with ip header. */
1414   ip_header_length = ip4_header_bytes (ip0);
1415   payload_length_host_byte_order =
1416     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1417   sum0 =
1418     clib_host_to_net_u32 (payload_length_host_byte_order +
1419                           (ip0->protocol << 16));
1420
1421   if (BITS (uword) == 32)
1422     {
1423       sum0 =
1424         ip_csum_with_carry (sum0,
1425                             clib_mem_unaligned (&ip0->src_address, u32));
1426       sum0 =
1427         ip_csum_with_carry (sum0,
1428                             clib_mem_unaligned (&ip0->dst_address, u32));
1429     }
1430   else
1431     sum0 =
1432       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1433
1434   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1435   data_this_buffer = (void *) ip0 + ip_header_length;
1436   n_ip_bytes_this_buffer =
1437     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1438   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1439     {
1440       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1441         n_ip_bytes_this_buffer - ip_header_length : 0;
1442     }
1443   while (1)
1444     {
1445       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1446       n_bytes_left -= n_this_buffer;
1447       if (n_bytes_left == 0)
1448         break;
1449
1450       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1451       p0 = vlib_get_buffer (vm, p0->next_buffer);
1452       data_this_buffer = vlib_buffer_get_current (p0);
1453       n_this_buffer = p0->current_length;
1454     }
1455
1456   sum16 = ~ip_csum_fold (sum0);
1457
1458   return sum16;
1459 }
1460
1461 u32
1462 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1463 {
1464   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1465   udp_header_t *udp0;
1466   u16 sum16;
1467
1468   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1469           || ip0->protocol == IP_PROTOCOL_UDP);
1470
1471   udp0 = (void *) (ip0 + 1);
1472   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1473     {
1474       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1475                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1476       return p0->flags;
1477     }
1478
1479   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1480
1481   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1482                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1483
1484   return p0->flags;
1485 }
1486
1487 /* *INDENT-OFF* */
1488 VNET_FEATURE_ARC_INIT (ip4_local) =
1489 {
1490   .arc_name  = "ip4-local",
1491   .start_nodes = VNET_FEATURES ("ip4-local"),
1492 };
1493 /* *INDENT-ON* */
1494
1495 static inline void
1496 ip4_local_validate_l4 (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip,
1497                        u8 is_udp, u8 * error, u8 * good_tcp_udp)
1498 {
1499   u32 flags0;
1500   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1501   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1502   if (is_udp)
1503     {
1504       udp_header_t *udp;
1505       u32 ip_len, udp_len;
1506       i32 len_diff;
1507       udp = ip4_next_header (ip);
1508       /* Verify UDP length. */
1509       ip_len = clib_net_to_host_u16 (ip->length);
1510       udp_len = clib_net_to_host_u16 (udp->length);
1511
1512       len_diff = ip_len - udp_len;
1513       *good_tcp_udp &= len_diff >= 0;
1514       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1515     }
1516 }
1517
1518 #define ip4_local_do_l4_check(is_tcp_udp, flags)                        \
1519     (is_tcp_udp && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1520     || flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1521     || flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM))
1522
1523 static inline uword
1524 ip4_local_inline (vlib_main_t * vm,
1525                   vlib_node_runtime_t * node,
1526                   vlib_frame_t * frame, int head_of_feature_arc)
1527 {
1528   ip4_main_t *im = &ip4_main;
1529   ip_lookup_main_t *lm = &im->lookup_main;
1530   ip_local_next_t next_index;
1531   u32 *from, *to_next, n_left_from, n_left_to_next;
1532   vlib_node_runtime_t *error_node =
1533     vlib_node_get_runtime (vm, ip4_input_node.index);
1534   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1535
1536   from = vlib_frame_vector_args (frame);
1537   n_left_from = frame->n_vectors;
1538   next_index = node->cached_next_index;
1539
1540   if (node->flags & VLIB_NODE_FLAG_TRACE)
1541     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1542
1543   while (n_left_from > 0)
1544     {
1545       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1546
1547       while (n_left_from >= 4 && n_left_to_next >= 2)
1548         {
1549           vlib_buffer_t *p0, *p1;
1550           ip4_header_t *ip0, *ip1;
1551           ip4_fib_mtrie_t *mtrie0, *mtrie1;
1552           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1553           const dpo_id_t *dpo0, *dpo1;
1554           const load_balance_t *lb0, *lb1;
1555           u32 pi0, next0, fib_index0, lbi0;
1556           u32 pi1, next1, fib_index1, lbi1;
1557           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1558           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1559           u32 sw_if_index0, sw_if_index1;
1560
1561           pi0 = to_next[0] = from[0];
1562           pi1 = to_next[1] = from[1];
1563           from += 2;
1564           n_left_from -= 2;
1565           to_next += 2;
1566           n_left_to_next -= 2;
1567
1568           next0 = next1 = IP_LOCAL_NEXT_DROP;
1569           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1570
1571           p0 = vlib_get_buffer (vm, pi0);
1572           p1 = vlib_get_buffer (vm, pi1);
1573
1574           ip0 = vlib_buffer_get_current (p0);
1575           ip1 = vlib_buffer_get_current (p1);
1576
1577           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1578           vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
1579
1580           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1581           sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1582
1583           /* Treat IP frag packets as "experimental" protocol for now
1584              until support of IP frag reassembly is implemented */
1585           proto0 =
1586             ip4_is_fragment (ip0) ? IP_PROTOCOL_VPP_FRAGMENTATION :
1587             ip0->protocol;
1588           proto1 =
1589             ip4_is_fragment (ip1) ? IP_PROTOCOL_VPP_FRAGMENTATION :
1590             ip1->protocol;
1591
1592           if (head_of_feature_arc == 0)
1593             goto skip_checks;
1594
1595           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1596           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1597           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1598           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1599
1600           good_tcp_udp0 =
1601             (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1602              || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1603                  || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1604           good_tcp_udp1 = (p1->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1605                            || (p1->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1606                                || p1->flags &
1607                                VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1608
1609           if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)
1610                              || ip4_local_do_l4_check (is_tcp_udp1,
1611                                                        p1->flags)))
1612             {
1613               if (is_tcp_udp0)
1614                 ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1615                                        &good_tcp_udp0);
1616               if (is_tcp_udp1)
1617                 ip4_local_validate_l4 (vm, p1, ip1, is_udp1, &error1,
1618                                        &good_tcp_udp1);
1619             }
1620
1621           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1622           error0 = (is_tcp_udp0 && !good_tcp_udp0
1623                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1624           error1 = (is_tcp_udp1 && !good_tcp_udp1
1625                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1626
1627           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1628           fib_index0 =
1629             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1630              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1631
1632           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1633           fib_index1 =
1634             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1635              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1636
1637           /* TODO maybe move to lookup? */
1638           vnet_buffer (p0)->ip.fib_index = fib_index0;
1639           vnet_buffer (p1)->ip.fib_index = fib_index1;
1640
1641           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1642           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1643
1644           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1645           leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1646           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1647                                              2);
1648           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1649                                              2);
1650           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1651                                              3);
1652           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1653                                              3);
1654
1655           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1656             ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1657           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1658
1659           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1660             ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1661           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1662
1663           lb0 = load_balance_get (lbi0);
1664           lb1 = load_balance_get (lbi1);
1665           dpo0 = load_balance_get_bucket_i (lb0, 0);
1666           dpo1 = load_balance_get_bucket_i (lb1, 0);
1667
1668           /*
1669            * Must have a route to source otherwise we drop the packet.
1670            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1671            *
1672            * The checks are:
1673            *  - the source is a recieve => it's from us => bogus, do this
1674            *    first since it sets a different error code.
1675            *  - uRPF check for any route to source - accept if passes.
1676            *  - allow packets destined to the broadcast address from unknown sources
1677            */
1678           if (p0->flags & VNET_BUFFER_F_IS_NATED)
1679             goto skip_check0;
1680
1681           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1682                      dpo0->dpoi_type == DPO_RECEIVE) ?
1683                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1684           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1685                      !fib_urpf_check_size (lb0->lb_urpf) &&
1686                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1687                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1688
1689         skip_check0:
1690           if (p1->flags & VNET_BUFFER_F_IS_NATED)
1691             goto skip_checks;
1692
1693           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1694                      dpo1->dpoi_type == DPO_RECEIVE) ?
1695                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1696           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1697                      !fib_urpf_check_size (lb1->lb_urpf) &&
1698                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1699                     ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1700
1701         skip_checks:
1702
1703           next0 = lm->local_next_by_ip_protocol[proto0];
1704           next1 = lm->local_next_by_ip_protocol[proto1];
1705
1706           next0 =
1707             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1708           next1 =
1709             error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1710
1711           p0->error = error0 ? error_node->errors[error0] : 0;
1712           p1->error = error1 ? error_node->errors[error1] : 0;
1713
1714           if (head_of_feature_arc)
1715             {
1716               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1717                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1718               if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1719                 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1720             }
1721
1722           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1723                                            n_left_to_next, pi0, pi1,
1724                                            next0, next1);
1725         }
1726
1727       while (n_left_from > 0 && n_left_to_next > 0)
1728         {
1729           vlib_buffer_t *p0;
1730           ip4_header_t *ip0;
1731           ip4_fib_mtrie_t *mtrie0;
1732           ip4_fib_mtrie_leaf_t leaf0;
1733           u32 pi0, next0, fib_index0, lbi0;
1734           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1735           load_balance_t *lb0;
1736           const dpo_id_t *dpo0;
1737           u32 sw_if_index0;
1738
1739           pi0 = to_next[0] = from[0];
1740           from += 1;
1741           n_left_from -= 1;
1742           to_next += 1;
1743           n_left_to_next -= 1;
1744
1745           next0 = IP_LOCAL_NEXT_DROP;
1746           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1747
1748           p0 = vlib_get_buffer (vm, pi0);
1749           ip0 = vlib_buffer_get_current (p0);
1750           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1751           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1752
1753           /* Treat IP frag packets as "experimental" protocol for now
1754              until support of IP frag reassembly is implemented */
1755           proto0 =
1756             ip4_is_fragment (ip0) ? IP_PROTOCOL_VPP_FRAGMENTATION :
1757             ip0->protocol;
1758
1759           if (head_of_feature_arc == 0 || p0->flags & VNET_BUFFER_F_IS_NATED)
1760             goto skip_check;
1761
1762           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1763           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1764
1765           good_tcp_udp0 =
1766             (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1767              || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1768                  || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1769
1770           if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)))
1771             {
1772               ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1773                                      &good_tcp_udp0);
1774             }
1775
1776           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1777           error0 = (is_tcp_udp0 && !good_tcp_udp0
1778                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1779
1780           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1781           fib_index0 =
1782             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1783              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1784           vnet_buffer (p0)->ip.fib_index = fib_index0;
1785           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1786           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1787           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1788                                              2);
1789           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1790                                              3);
1791           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1792           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1793           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1794
1795           lb0 = load_balance_get (lbi0);
1796           dpo0 = load_balance_get_bucket_i (lb0, 0);
1797
1798           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1799                      dpo0->dpoi_type == DPO_RECEIVE) ?
1800                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1801           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1802                      !fib_urpf_check_size (lb0->lb_urpf) &&
1803                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1804                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1805
1806         skip_check:
1807           next0 = lm->local_next_by_ip_protocol[proto0];
1808           next0 =
1809             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1810
1811           p0->error = error0 ? error_node->errors[error0] : 0;
1812
1813           if (head_of_feature_arc)
1814             {
1815               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1816                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1817             }
1818
1819           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1820                                            n_left_to_next, pi0, next0);
1821         }
1822       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1823     }
1824
1825   return frame->n_vectors;
1826 }
1827
1828 static uword
1829 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1830 {
1831   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1832 }
1833
1834 /* *INDENT-OFF* */
1835 VLIB_REGISTER_NODE (ip4_local_node) =
1836 {
1837   .function = ip4_local,
1838   .name = "ip4-local",
1839   .vector_size = sizeof (u32),
1840   .format_trace = format_ip4_forward_next_trace,
1841   .n_next_nodes = IP_LOCAL_N_NEXT,
1842   .next_nodes =
1843   {
1844     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1845     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1846     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1847     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1848     [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-reassembly",
1849   },
1850 };
1851 /* *INDENT-ON* */
1852
1853 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1854
1855 static uword
1856 ip4_local_end_of_arc (vlib_main_t * vm,
1857                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1858 {
1859   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1860 }
1861
1862 /* *INDENT-OFF* */
1863 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1864   .function = ip4_local_end_of_arc,
1865   .name = "ip4-local-end-of-arc",
1866   .vector_size = sizeof (u32),
1867
1868   .format_trace = format_ip4_forward_next_trace,
1869   .sibling_of = "ip4-local",
1870 };
1871
1872 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1873
1874 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1875   .arc_name = "ip4-local",
1876   .node_name = "ip4-local-end-of-arc",
1877   .runs_before = 0, /* not before any other features */
1878 };
1879 /* *INDENT-ON* */
1880
1881 void
1882 ip4_register_protocol (u32 protocol, u32 node_index)
1883 {
1884   vlib_main_t *vm = vlib_get_main ();
1885   ip4_main_t *im = &ip4_main;
1886   ip_lookup_main_t *lm = &im->lookup_main;
1887
1888   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1889   lm->local_next_by_ip_protocol[protocol] =
1890     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1891 }
1892
1893 static clib_error_t *
1894 show_ip_local_command_fn (vlib_main_t * vm,
1895                           unformat_input_t * input, vlib_cli_command_t * cmd)
1896 {
1897   ip4_main_t *im = &ip4_main;
1898   ip_lookup_main_t *lm = &im->lookup_main;
1899   int i;
1900
1901   vlib_cli_output (vm, "Protocols handled by ip4_local");
1902   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1903     {
1904       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1905         {
1906           u32 node_index = vlib_get_node (vm,
1907                                           ip4_local_node.index)->
1908             next_nodes[lm->local_next_by_ip_protocol[i]];
1909           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1910                            node_index);
1911         }
1912     }
1913   return 0;
1914 }
1915
1916
1917
1918 /*?
1919  * Display the set of protocols handled by the local IPv4 stack.
1920  *
1921  * @cliexpar
1922  * Example of how to display local protocol table:
1923  * @cliexstart{show ip local}
1924  * Protocols handled by ip4_local
1925  * 1
1926  * 17
1927  * 47
1928  * @cliexend
1929 ?*/
1930 /* *INDENT-OFF* */
1931 VLIB_CLI_COMMAND (show_ip_local, static) =
1932 {
1933   .path = "show ip local",
1934   .function = show_ip_local_command_fn,
1935   .short_help = "show ip local",
1936 };
1937 /* *INDENT-ON* */
1938
1939 always_inline uword
1940 ip4_arp_inline (vlib_main_t * vm,
1941                 vlib_node_runtime_t * node,
1942                 vlib_frame_t * frame, int is_glean)
1943 {
1944   vnet_main_t *vnm = vnet_get_main ();
1945   ip4_main_t *im = &ip4_main;
1946   ip_lookup_main_t *lm = &im->lookup_main;
1947   u32 *from, *to_next_drop;
1948   uword n_left_from, n_left_to_next_drop, next_index;
1949   static f64 time_last_seed_change = -1e100;
1950   static u32 hash_seeds[3];
1951   static uword hash_bitmap[256 / BITS (uword)];
1952   f64 time_now;
1953
1954   if (node->flags & VLIB_NODE_FLAG_TRACE)
1955     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1956
1957   time_now = vlib_time_now (vm);
1958   if (time_now - time_last_seed_change > 1e-3)
1959     {
1960       uword i;
1961       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1962                                             sizeof (hash_seeds));
1963       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1964         hash_seeds[i] = r[i];
1965
1966       /* Mark all hash keys as been no-seen before. */
1967       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1968         hash_bitmap[i] = 0;
1969
1970       time_last_seed_change = time_now;
1971     }
1972
1973   from = vlib_frame_vector_args (frame);
1974   n_left_from = frame->n_vectors;
1975   next_index = node->cached_next_index;
1976   if (next_index == IP4_ARP_NEXT_DROP)
1977     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1978
1979   while (n_left_from > 0)
1980     {
1981       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1982                            to_next_drop, n_left_to_next_drop);
1983
1984       while (n_left_from > 0 && n_left_to_next_drop > 0)
1985         {
1986           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1987           ip_adjacency_t *adj0;
1988           vlib_buffer_t *p0;
1989           ip4_header_t *ip0;
1990           uword bm0;
1991
1992           pi0 = from[0];
1993
1994           p0 = vlib_get_buffer (vm, pi0);
1995
1996           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1997           adj0 = adj_get (adj_index0);
1998           ip0 = vlib_buffer_get_current (p0);
1999
2000           a0 = hash_seeds[0];
2001           b0 = hash_seeds[1];
2002           c0 = hash_seeds[2];
2003
2004           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2005           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2006
2007           if (is_glean)
2008             {
2009               /*
2010                * this is the Glean case, so we are ARPing for the
2011                * packet's destination
2012                */
2013               a0 ^= ip0->dst_address.data_u32;
2014             }
2015           else
2016             {
2017               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2018             }
2019           b0 ^= sw_if_index0;
2020
2021           hash_v3_mix32 (a0, b0, c0);
2022           hash_v3_finalize32 (a0, b0, c0);
2023
2024           c0 &= BITS (hash_bitmap) - 1;
2025           m0 = (uword) 1 << (c0 % BITS (uword));
2026           c0 = c0 / BITS (uword);
2027
2028           bm0 = hash_bitmap[c0];
2029           drop0 = (bm0 & m0) != 0;
2030
2031           /* Mark it as seen. */
2032           hash_bitmap[c0] = bm0 | m0;
2033
2034           from += 1;
2035           n_left_from -= 1;
2036           to_next_drop[0] = pi0;
2037           to_next_drop += 1;
2038           n_left_to_next_drop -= 1;
2039
2040           p0->error =
2041             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2042                          IP4_ARP_ERROR_REQUEST_SENT];
2043
2044           /*
2045            * the adj has been updated to a rewrite but the node the DPO that got
2046            * us here hasn't - yet. no big deal. we'll drop while we wait.
2047            */
2048           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2049             continue;
2050
2051           if (drop0)
2052             continue;
2053
2054           /*
2055            * Can happen if the control-plane is programming tables
2056            * with traffic flowing; at least that's today's lame excuse.
2057            */
2058           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2059               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2060             {
2061               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2062             }
2063           else
2064             /* Send ARP request. */
2065             {
2066               u32 bi0 = 0;
2067               vlib_buffer_t *b0;
2068               ethernet_arp_header_t *h0;
2069               vnet_hw_interface_t *hw_if0;
2070
2071               h0 =
2072                 vlib_packet_template_get_packet (vm,
2073                                                  &im->ip4_arp_request_packet_template,
2074                                                  &bi0);
2075
2076               /* Seems we're out of buffers */
2077               if (PREDICT_FALSE (!h0))
2078                 continue;
2079
2080               /* Add rewrite/encap string for ARP packet. */
2081               vnet_rewrite_one_header (adj0[0], h0,
2082                                        sizeof (ethernet_header_t));
2083
2084               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2085
2086               /* Src ethernet address in ARP header. */
2087               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2088                            hw_if0->hw_address,
2089                            sizeof (h0->ip4_over_ethernet[0].ethernet));
2090
2091               if (is_glean)
2092                 {
2093                   /* The interface's source address is stashed in the Glean Adj */
2094                   h0->ip4_over_ethernet[0].ip4 =
2095                     adj0->sub_type.glean.receive_addr.ip4;
2096
2097                   /* Copy in destination address we are requesting. This is the
2098                    * glean case, so it's the packet's destination.*/
2099                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2100                     ip0->dst_address.data_u32;
2101                 }
2102               else
2103                 {
2104                   /* Src IP address in ARP header. */
2105                   if (ip4_src_address_for_packet (lm, sw_if_index0,
2106                                                   &h0->
2107                                                   ip4_over_ethernet[0].ip4))
2108                     {
2109                       /* No source address available */
2110                       p0->error =
2111                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2112                       vlib_buffer_free (vm, &bi0, 1);
2113                       continue;
2114                     }
2115
2116                   /* Copy in destination address we are requesting from the
2117                      incomplete adj */
2118                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2119                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
2120                 }
2121
2122               vlib_buffer_copy_trace_flag (vm, p0, bi0);
2123               b0 = vlib_get_buffer (vm, bi0);
2124               VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2125               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2126
2127               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2128
2129               vlib_set_next_frame_buffer (vm, node,
2130                                           adj0->rewrite_header.next_index,
2131                                           bi0);
2132             }
2133         }
2134
2135       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2136     }
2137
2138   return frame->n_vectors;
2139 }
2140
2141 static uword
2142 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2143 {
2144   return (ip4_arp_inline (vm, node, frame, 0));
2145 }
2146
2147 static uword
2148 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2149 {
2150   return (ip4_arp_inline (vm, node, frame, 1));
2151 }
2152
2153 static char *ip4_arp_error_strings[] = {
2154   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2155   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2156   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2157   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2158   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2159   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2160 };
2161
2162 /* *INDENT-OFF* */
2163 VLIB_REGISTER_NODE (ip4_arp_node) =
2164 {
2165   .function = ip4_arp,
2166   .name = "ip4-arp",
2167   .vector_size = sizeof (u32),
2168   .format_trace = format_ip4_forward_next_trace,
2169   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2170   .error_strings = ip4_arp_error_strings,
2171   .n_next_nodes = IP4_ARP_N_NEXT,
2172   .next_nodes =
2173   {
2174     [IP4_ARP_NEXT_DROP] = "error-drop",
2175   },
2176 };
2177
2178 VLIB_REGISTER_NODE (ip4_glean_node) =
2179 {
2180   .function = ip4_glean,
2181   .name = "ip4-glean",
2182   .vector_size = sizeof (u32),
2183   .format_trace = format_ip4_forward_next_trace,
2184   .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2185   .error_strings = ip4_arp_error_strings,
2186   .n_next_nodes = IP4_ARP_N_NEXT,
2187   .next_nodes = {
2188   [IP4_ARP_NEXT_DROP] = "error-drop",
2189   },
2190 };
2191 /* *INDENT-ON* */
2192
2193 #define foreach_notrace_ip4_arp_error           \
2194 _(DROP)                                         \
2195 _(REQUEST_SENT)                                 \
2196 _(REPLICATE_DROP)                               \
2197 _(REPLICATE_FAIL)
2198
2199 clib_error_t *
2200 arp_notrace_init (vlib_main_t * vm)
2201 {
2202   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2203
2204   /* don't trace ARP request packets */
2205 #define _(a)                                    \
2206     vnet_pcap_drop_trace_filter_add_del         \
2207         (rt->errors[IP4_ARP_ERROR_##a],         \
2208          1 /* is_add */);
2209   foreach_notrace_ip4_arp_error;
2210 #undef _
2211   return 0;
2212 }
2213
2214 VLIB_INIT_FUNCTION (arp_notrace_init);
2215
2216
2217 /* Send an ARP request to see if given destination is reachable on given interface. */
2218 clib_error_t *
2219 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2220 {
2221   vnet_main_t *vnm = vnet_get_main ();
2222   ip4_main_t *im = &ip4_main;
2223   ethernet_arp_header_t *h;
2224   ip4_address_t *src;
2225   ip_interface_address_t *ia;
2226   ip_adjacency_t *adj;
2227   vnet_hw_interface_t *hi;
2228   vnet_sw_interface_t *si;
2229   vlib_buffer_t *b;
2230   adj_index_t ai;
2231   u32 bi = 0;
2232
2233   si = vnet_get_sw_interface (vnm, sw_if_index);
2234
2235   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2236     {
2237       return clib_error_return (0, "%U: interface %U down",
2238                                 format_ip4_address, dst,
2239                                 format_vnet_sw_if_index_name, vnm,
2240                                 sw_if_index);
2241     }
2242
2243   src =
2244     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2245   if (!src)
2246     {
2247       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2248       return clib_error_return
2249         (0,
2250          "no matching interface address for destination %U (interface %U)",
2251          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2252          sw_if_index);
2253     }
2254
2255   h = vlib_packet_template_get_packet (vm,
2256                                        &im->ip4_arp_request_packet_template,
2257                                        &bi);
2258
2259   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2260   if (PREDICT_FALSE (!hi->hw_address))
2261     {
2262       return clib_error_return (0, "%U: interface %U do not support ip probe",
2263                                 format_ip4_address, dst,
2264                                 format_vnet_sw_if_index_name, vnm,
2265                                 sw_if_index);
2266     }
2267
2268   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2269                sizeof (h->ip4_over_ethernet[0].ethernet));
2270
2271   h->ip4_over_ethernet[0].ip4 = src[0];
2272   h->ip4_over_ethernet[1].ip4 = dst[0];
2273
2274   b = vlib_get_buffer (vm, bi);
2275   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2276     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2277
2278   ip46_address_t nh = {
2279     .ip4 = *dst,
2280   };
2281
2282   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2283                             VNET_LINK_IP4, &nh, sw_if_index);
2284   adj = adj_get (ai);
2285
2286   /* Peer has been previously resolved, retrieve glean adj instead */
2287   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2288     {
2289       adj_unlock (ai);
2290       ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, sw_if_index, &nh);
2291       adj = adj_get (ai);
2292     }
2293
2294   /* Add encapsulation string for software interface (e.g. ethernet header). */
2295   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2296   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2297
2298   {
2299     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2300     u32 *to_next = vlib_frame_vector_args (f);
2301     to_next[0] = bi;
2302     f->n_vectors = 1;
2303     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2304   }
2305
2306   adj_unlock (ai);
2307   return /* no error */ 0;
2308 }
2309
2310 typedef enum
2311 {
2312   IP4_REWRITE_NEXT_DROP,
2313   IP4_REWRITE_NEXT_ICMP_ERROR,
2314 } ip4_rewrite_next_t;
2315
2316 always_inline uword
2317 ip4_rewrite_inline (vlib_main_t * vm,
2318                     vlib_node_runtime_t * node,
2319                     vlib_frame_t * frame,
2320                     int do_counters, int is_midchain, int is_mcast)
2321 {
2322   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2323   u32 *from = vlib_frame_vector_args (frame);
2324   u32 n_left_from, n_left_to_next, *to_next, next_index;
2325   vlib_node_runtime_t *error_node =
2326     vlib_node_get_runtime (vm, ip4_input_node.index);
2327
2328   n_left_from = frame->n_vectors;
2329   next_index = node->cached_next_index;
2330   u32 thread_index = vlib_get_thread_index ();
2331
2332   while (n_left_from > 0)
2333     {
2334       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2335
2336       while (n_left_from >= 4 && n_left_to_next >= 2)
2337         {
2338           ip_adjacency_t *adj0, *adj1;
2339           vlib_buffer_t *p0, *p1;
2340           ip4_header_t *ip0, *ip1;
2341           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2342           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2343           u32 tx_sw_if_index0, tx_sw_if_index1;
2344
2345           /* Prefetch next iteration. */
2346           {
2347             vlib_buffer_t *p2, *p3;
2348
2349             p2 = vlib_get_buffer (vm, from[2]);
2350             p3 = vlib_get_buffer (vm, from[3]);
2351
2352             vlib_prefetch_buffer_header (p2, STORE);
2353             vlib_prefetch_buffer_header (p3, STORE);
2354
2355             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2356             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2357           }
2358
2359           pi0 = to_next[0] = from[0];
2360           pi1 = to_next[1] = from[1];
2361
2362           from += 2;
2363           n_left_from -= 2;
2364           to_next += 2;
2365           n_left_to_next -= 2;
2366
2367           p0 = vlib_get_buffer (vm, pi0);
2368           p1 = vlib_get_buffer (vm, pi1);
2369
2370           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2371           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2372
2373           /*
2374            * pre-fetch the per-adjacency counters
2375            */
2376           if (do_counters)
2377             {
2378               vlib_prefetch_combined_counter (&adjacency_counters,
2379                                               thread_index, adj_index0);
2380               vlib_prefetch_combined_counter (&adjacency_counters,
2381                                               thread_index, adj_index1);
2382             }
2383
2384           ip0 = vlib_buffer_get_current (p0);
2385           ip1 = vlib_buffer_get_current (p1);
2386
2387           error0 = error1 = IP4_ERROR_NONE;
2388           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2389
2390           /* Decrement TTL & update checksum.
2391              Works either endian, so no need for byte swap. */
2392           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2393             {
2394               i32 ttl0 = ip0->ttl;
2395
2396               /* Input node should have reject packets with ttl 0. */
2397               ASSERT (ip0->ttl > 0);
2398
2399               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2400               checksum0 += checksum0 >= 0xffff;
2401
2402               ip0->checksum = checksum0;
2403               ttl0 -= 1;
2404               ip0->ttl = ttl0;
2405
2406               /*
2407                * If the ttl drops below 1 when forwarding, generate
2408                * an ICMP response.
2409                */
2410               if (PREDICT_FALSE (ttl0 <= 0))
2411                 {
2412                   error0 = IP4_ERROR_TIME_EXPIRED;
2413                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2414                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2415                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2416                                                0);
2417                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2418                 }
2419
2420               /* Verify checksum. */
2421               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2422                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2423             }
2424           else
2425             {
2426               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2427             }
2428           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2429             {
2430               i32 ttl1 = ip1->ttl;
2431
2432               /* Input node should have reject packets with ttl 0. */
2433               ASSERT (ip1->ttl > 0);
2434
2435               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2436               checksum1 += checksum1 >= 0xffff;
2437
2438               ip1->checksum = checksum1;
2439               ttl1 -= 1;
2440               ip1->ttl = ttl1;
2441
2442               /*
2443                * If the ttl drops below 1 when forwarding, generate
2444                * an ICMP response.
2445                */
2446               if (PREDICT_FALSE (ttl1 <= 0))
2447                 {
2448                   error1 = IP4_ERROR_TIME_EXPIRED;
2449                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2450                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2451                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2452                                                0);
2453                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2454                 }
2455
2456               /* Verify checksum. */
2457               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2458                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2459             }
2460           else
2461             {
2462               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2463             }
2464
2465           /* Rewrite packet header and updates lengths. */
2466           adj0 = adj_get (adj_index0);
2467           adj1 = adj_get (adj_index1);
2468
2469           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2470           rw_len0 = adj0[0].rewrite_header.data_bytes;
2471           rw_len1 = adj1[0].rewrite_header.data_bytes;
2472           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2473           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2474
2475           /* Check MTU of outgoing interface. */
2476           error0 =
2477             (vlib_buffer_length_in_chain (vm, p0) >
2478              adj0[0].
2479              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2480              error0);
2481           error1 =
2482             (vlib_buffer_length_in_chain (vm, p1) >
2483              adj1[0].
2484              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2485              error1);
2486
2487           if (is_mcast)
2488             {
2489               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2490                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2491                         IP4_ERROR_SAME_INTERFACE : error0);
2492               error1 = ((adj1[0].rewrite_header.sw_if_index ==
2493                          vnet_buffer (p1)->sw_if_index[VLIB_RX]) ?
2494                         IP4_ERROR_SAME_INTERFACE : error1);
2495             }
2496
2497           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2498            * to see the IP headerr */
2499           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2500             {
2501               next0 = adj0[0].rewrite_header.next_index;
2502               p0->current_data -= rw_len0;
2503               p0->current_length += rw_len0;
2504               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2505               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2506
2507               if (PREDICT_FALSE
2508                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2509                 vnet_feature_arc_start (lm->output_feature_arc_index,
2510                                         tx_sw_if_index0, &next0, p0);
2511             }
2512           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2513             {
2514               next1 = adj1[0].rewrite_header.next_index;
2515               p1->current_data -= rw_len1;
2516               p1->current_length += rw_len1;
2517
2518               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2519               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2520
2521               if (PREDICT_FALSE
2522                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2523                 vnet_feature_arc_start (lm->output_feature_arc_index,
2524                                         tx_sw_if_index1, &next1, p1);
2525             }
2526
2527           /* Guess we are only writing on simple Ethernet header. */
2528           vnet_rewrite_two_headers (adj0[0], adj1[0],
2529                                     ip0, ip1, sizeof (ethernet_header_t));
2530
2531           /*
2532            * Bump the per-adjacency counters
2533            */
2534           if (do_counters)
2535             {
2536               vlib_increment_combined_counter
2537                 (&adjacency_counters,
2538                  thread_index,
2539                  adj_index0, 1,
2540                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2541
2542               vlib_increment_combined_counter
2543                 (&adjacency_counters,
2544                  thread_index,
2545                  adj_index1, 1,
2546                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2547             }
2548
2549           if (is_midchain)
2550             {
2551               adj0->sub_type.midchain.fixup_func
2552                 (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2553               adj1->sub_type.midchain.fixup_func
2554                 (vm, adj1, p1, adj0->sub_type.midchain.fixup_data);
2555             }
2556           if (is_mcast)
2557             {
2558               /*
2559                * copy bytes from the IP address into the MAC rewrite
2560                */
2561               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2562               vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2563             }
2564
2565           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2566                                            to_next, n_left_to_next,
2567                                            pi0, pi1, next0, next1);
2568         }
2569
2570       while (n_left_from > 0 && n_left_to_next > 0)
2571         {
2572           ip_adjacency_t *adj0;
2573           vlib_buffer_t *p0;
2574           ip4_header_t *ip0;
2575           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2576           u32 tx_sw_if_index0;
2577
2578           pi0 = to_next[0] = from[0];
2579
2580           p0 = vlib_get_buffer (vm, pi0);
2581
2582           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2583
2584           adj0 = adj_get (adj_index0);
2585
2586           ip0 = vlib_buffer_get_current (p0);
2587
2588           error0 = IP4_ERROR_NONE;
2589           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2590
2591           /* Decrement TTL & update checksum. */
2592           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2593             {
2594               i32 ttl0 = ip0->ttl;
2595
2596               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2597
2598               checksum0 += checksum0 >= 0xffff;
2599
2600               ip0->checksum = checksum0;
2601
2602               ASSERT (ip0->ttl > 0);
2603
2604               ttl0 -= 1;
2605
2606               ip0->ttl = ttl0;
2607
2608               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2609                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2610
2611               if (PREDICT_FALSE (ttl0 <= 0))
2612                 {
2613                   /*
2614                    * If the ttl drops below 1 when forwarding, generate
2615                    * an ICMP response.
2616                    */
2617                   error0 = IP4_ERROR_TIME_EXPIRED;
2618                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2619                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2620                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2621                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2622                                                0);
2623                 }
2624             }
2625           else
2626             {
2627               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2628             }
2629
2630           if (do_counters)
2631             vlib_prefetch_combined_counter (&adjacency_counters,
2632                                             thread_index, adj_index0);
2633
2634           /* Guess we are only writing on simple Ethernet header. */
2635           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2636           if (is_mcast)
2637             {
2638               /*
2639                * copy bytes from the IP address into the MAC rewrite
2640                */
2641               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2642             }
2643
2644           /* Update packet buffer attributes/set output interface. */
2645           rw_len0 = adj0[0].rewrite_header.data_bytes;
2646           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2647
2648           if (do_counters)
2649             vlib_increment_combined_counter
2650               (&adjacency_counters,
2651                thread_index, adj_index0, 1,
2652                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2653
2654           /* Check MTU of outgoing interface. */
2655           error0 = (vlib_buffer_length_in_chain (vm, p0)
2656                     > adj0[0].rewrite_header.max_l3_packet_bytes
2657                     ? IP4_ERROR_MTU_EXCEEDED : error0);
2658           if (is_mcast)
2659             {
2660               error0 = ((adj0[0].rewrite_header.sw_if_index ==
2661                          vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2662                         IP4_ERROR_SAME_INTERFACE : error0);
2663             }
2664           p0->error = error_node->errors[error0];
2665
2666           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2667            * to see the IP headerr */
2668           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2669             {
2670               p0->current_data -= rw_len0;
2671               p0->current_length += rw_len0;
2672               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2673
2674               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2675               next0 = adj0[0].rewrite_header.next_index;
2676
2677               if (is_midchain)
2678                 {
2679                   adj0->sub_type.midchain.fixup_func
2680                     (vm, adj0, p0, adj0->sub_type.midchain.fixup_data);
2681                 }
2682
2683               if (PREDICT_FALSE
2684                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2685                 vnet_feature_arc_start (lm->output_feature_arc_index,
2686                                         tx_sw_if_index0, &next0, p0);
2687
2688             }
2689
2690           from += 1;
2691           n_left_from -= 1;
2692           to_next += 1;
2693           n_left_to_next -= 1;
2694
2695           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2696                                            to_next, n_left_to_next,
2697                                            pi0, next0);
2698         }
2699
2700       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2701     }
2702
2703   /* Need to do trace after rewrites to pick up new packet data. */
2704   if (node->flags & VLIB_NODE_FLAG_TRACE)
2705     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2706
2707   return frame->n_vectors;
2708 }
2709
2710
2711 /** @brief IPv4 rewrite node.
2712     @node ip4-rewrite
2713
2714     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2715     header checksum, fetch the ip adjacency, check the outbound mtu,
2716     apply the adjacency rewrite, and send pkts to the adjacency
2717     rewrite header's rewrite_next_index.
2718
2719     @param vm vlib_main_t corresponding to the current thread
2720     @param node vlib_node_runtime_t
2721     @param frame vlib_frame_t whose contents should be dispatched
2722
2723     @par Graph mechanics: buffer metadata, next index usage
2724
2725     @em Uses:
2726     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2727         - the rewrite adjacency index
2728     - <code>adj->lookup_next_index</code>
2729         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2730           the packet will be dropped.
2731     - <code>adj->rewrite_header</code>
2732         - Rewrite string length, rewrite string, next_index
2733
2734     @em Sets:
2735     - <code>b->current_data, b->current_length</code>
2736         - Updated net of applying the rewrite string
2737
2738     <em>Next Indices:</em>
2739     - <code> adj->rewrite_header.next_index </code>
2740       or @c ip4-drop
2741 */
2742 static uword
2743 ip4_rewrite (vlib_main_t * vm,
2744              vlib_node_runtime_t * node, vlib_frame_t * frame)
2745 {
2746   if (adj_are_counters_enabled ())
2747     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2748   else
2749     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2750 }
2751
2752 static uword
2753 ip4_midchain (vlib_main_t * vm,
2754               vlib_node_runtime_t * node, vlib_frame_t * frame)
2755 {
2756   if (adj_are_counters_enabled ())
2757     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2758   else
2759     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2760 }
2761
2762 static uword
2763 ip4_rewrite_mcast (vlib_main_t * vm,
2764                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2765 {
2766   if (adj_are_counters_enabled ())
2767     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2768   else
2769     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2770 }
2771
2772 static uword
2773 ip4_mcast_midchain (vlib_main_t * vm,
2774                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2775 {
2776   if (adj_are_counters_enabled ())
2777     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2778   else
2779     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2780 }
2781
2782 /* *INDENT-OFF* */
2783 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2784   .function = ip4_rewrite,
2785   .name = "ip4-rewrite",
2786   .vector_size = sizeof (u32),
2787
2788   .format_trace = format_ip4_rewrite_trace,
2789
2790   .n_next_nodes = 2,
2791   .next_nodes = {
2792     [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2793     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2794   },
2795 };
2796 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2797
2798 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2799   .function = ip4_rewrite_mcast,
2800   .name = "ip4-rewrite-mcast",
2801   .vector_size = sizeof (u32),
2802
2803   .format_trace = format_ip4_rewrite_trace,
2804   .sibling_of = "ip4-rewrite",
2805 };
2806 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2807
2808 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2809   .function = ip4_mcast_midchain,
2810   .name = "ip4-mcast-midchain",
2811   .vector_size = sizeof (u32),
2812
2813   .format_trace = format_ip4_rewrite_trace,
2814   .sibling_of = "ip4-rewrite",
2815 };
2816 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2817
2818 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2819   .function = ip4_midchain,
2820   .name = "ip4-midchain",
2821   .vector_size = sizeof (u32),
2822   .format_trace = format_ip4_forward_next_trace,
2823   .sibling_of =  "ip4-rewrite",
2824 };
2825 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2826 /* *INDENT-ON */
2827
2828 int
2829 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2830 {
2831   ip4_fib_mtrie_t *mtrie0;
2832   ip4_fib_mtrie_leaf_t leaf0;
2833   u32 lbi0;
2834
2835   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2836
2837   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2838   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2839   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2840
2841   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2842
2843   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2844 }
2845
2846 static clib_error_t *
2847 test_lookup_command_fn (vlib_main_t * vm,
2848                         unformat_input_t * input, vlib_cli_command_t * cmd)
2849 {
2850   ip4_fib_t *fib;
2851   u32 table_id = 0;
2852   f64 count = 1;
2853   u32 n;
2854   int i;
2855   ip4_address_t ip4_base_address;
2856   u64 errors = 0;
2857
2858   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2859     {
2860       if (unformat (input, "table %d", &table_id))
2861         {
2862           /* Make sure the entry exists. */
2863           fib = ip4_fib_get (table_id);
2864           if ((fib) && (fib->index != table_id))
2865             return clib_error_return (0, "<fib-index> %d does not exist",
2866                                       table_id);
2867         }
2868       else if (unformat (input, "count %f", &count))
2869         ;
2870
2871       else if (unformat (input, "%U",
2872                          unformat_ip4_address, &ip4_base_address))
2873         ;
2874       else
2875         return clib_error_return (0, "unknown input `%U'",
2876                                   format_unformat_error, input);
2877     }
2878
2879   n = count;
2880
2881   for (i = 0; i < n; i++)
2882     {
2883       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2884         errors++;
2885
2886       ip4_base_address.as_u32 =
2887         clib_host_to_net_u32 (1 +
2888                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2889     }
2890
2891   if (errors)
2892     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2893   else
2894     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2895
2896   return 0;
2897 }
2898
2899 /*?
2900  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2901  * given FIB table to determine if there is a conflict with the
2902  * adjacency table. The fib-id can be determined by using the
2903  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2904  * of 0 is used.
2905  *
2906  * @todo This command uses fib-id, other commands use table-id (not
2907  * just a name, they are different indexes). Would like to change this
2908  * to table-id for consistency.
2909  *
2910  * @cliexpar
2911  * Example of how to run the test lookup command:
2912  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2913  * No errors in 2 lookups
2914  * @cliexend
2915 ?*/
2916 /* *INDENT-OFF* */
2917 VLIB_CLI_COMMAND (lookup_test_command, static) =
2918 {
2919   .path = "test lookup",
2920   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2921   .function = test_lookup_command_fn,
2922 };
2923 /* *INDENT-ON* */
2924
2925 int
2926 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2927 {
2928   u32 fib_index;
2929
2930   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2931
2932   if (~0 == fib_index)
2933     return VNET_API_ERROR_NO_SUCH_FIB;
2934
2935   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2936                                   flow_hash_config);
2937
2938   return 0;
2939 }
2940
2941 static clib_error_t *
2942 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2943                              unformat_input_t * input,
2944                              vlib_cli_command_t * cmd)
2945 {
2946   int matched = 0;
2947   u32 table_id = 0;
2948   u32 flow_hash_config = 0;
2949   int rv;
2950
2951   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2952     {
2953       if (unformat (input, "table %d", &table_id))
2954         matched = 1;
2955 #define _(a,v) \
2956     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2957       foreach_flow_hash_bit
2958 #undef _
2959         else
2960         break;
2961     }
2962
2963   if (matched == 0)
2964     return clib_error_return (0, "unknown input `%U'",
2965                               format_unformat_error, input);
2966
2967   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2968   switch (rv)
2969     {
2970     case 0:
2971       break;
2972
2973     case VNET_API_ERROR_NO_SUCH_FIB:
2974       return clib_error_return (0, "no such FIB table %d", table_id);
2975
2976     default:
2977       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2978       break;
2979     }
2980
2981   return 0;
2982 }
2983
2984 /*?
2985  * Configure the set of IPv4 fields used by the flow hash.
2986  *
2987  * @cliexpar
2988  * Example of how to set the flow hash on a given table:
2989  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2990  * Example of display the configured flow hash:
2991  * @cliexstart{show ip fib}
2992  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2993  * 0.0.0.0/0
2994  *   unicast-ip4-chain
2995  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2996  *     [0] [@0]: dpo-drop ip6
2997  * 0.0.0.0/32
2998  *   unicast-ip4-chain
2999  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3000  *     [0] [@0]: dpo-drop ip6
3001  * 224.0.0.0/8
3002  *   unicast-ip4-chain
3003  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3004  *     [0] [@0]: dpo-drop ip6
3005  * 6.0.1.2/32
3006  *   unicast-ip4-chain
3007  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3008  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3009  * 7.0.0.1/32
3010  *   unicast-ip4-chain
3011  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3012  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3013  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3014  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3015  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3016  * 240.0.0.0/8
3017  *   unicast-ip4-chain
3018  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3019  *     [0] [@0]: dpo-drop ip6
3020  * 255.255.255.255/32
3021  *   unicast-ip4-chain
3022  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3023  *     [0] [@0]: dpo-drop ip6
3024  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3025  * 0.0.0.0/0
3026  *   unicast-ip4-chain
3027  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3028  *     [0] [@0]: dpo-drop ip6
3029  * 0.0.0.0/32
3030  *   unicast-ip4-chain
3031  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3032  *     [0] [@0]: dpo-drop ip6
3033  * 172.16.1.0/24
3034  *   unicast-ip4-chain
3035  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3036  *     [0] [@4]: ipv4-glean: af_packet0
3037  * 172.16.1.1/32
3038  *   unicast-ip4-chain
3039  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3040  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3041  * 172.16.1.2/32
3042  *   unicast-ip4-chain
3043  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3044  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3045  * 172.16.2.0/24
3046  *   unicast-ip4-chain
3047  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3048  *     [0] [@4]: ipv4-glean: af_packet1
3049  * 172.16.2.1/32
3050  *   unicast-ip4-chain
3051  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3052  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3053  * 224.0.0.0/8
3054  *   unicast-ip4-chain
3055  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3056  *     [0] [@0]: dpo-drop ip6
3057  * 240.0.0.0/8
3058  *   unicast-ip4-chain
3059  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3060  *     [0] [@0]: dpo-drop ip6
3061  * 255.255.255.255/32
3062  *   unicast-ip4-chain
3063  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3064  *     [0] [@0]: dpo-drop ip6
3065  * @cliexend
3066 ?*/
3067 /* *INDENT-OFF* */
3068 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3069 {
3070   .path = "set ip flow-hash",
3071   .short_help =
3072   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3073   .function = set_ip_flow_hash_command_fn,
3074 };
3075 /* *INDENT-ON* */
3076
3077 int
3078 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3079                              u32 table_index)
3080 {
3081   vnet_main_t *vnm = vnet_get_main ();
3082   vnet_interface_main_t *im = &vnm->interface_main;
3083   ip4_main_t *ipm = &ip4_main;
3084   ip_lookup_main_t *lm = &ipm->lookup_main;
3085   vnet_classify_main_t *cm = &vnet_classify_main;
3086   ip4_address_t *if_addr;
3087
3088   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3089     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3090
3091   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3092     return VNET_API_ERROR_NO_SUCH_ENTRY;
3093
3094   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3095   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3096
3097   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3098
3099   if (NULL != if_addr)
3100     {
3101       fib_prefix_t pfx = {
3102         .fp_len = 32,
3103         .fp_proto = FIB_PROTOCOL_IP4,
3104         .fp_addr.ip4 = *if_addr,
3105       };
3106       u32 fib_index;
3107
3108       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3109                                                        sw_if_index);
3110
3111
3112       if (table_index != (u32) ~ 0)
3113         {
3114           dpo_id_t dpo = DPO_INVALID;
3115
3116           dpo_set (&dpo,
3117                    DPO_CLASSIFY,
3118                    DPO_PROTO_IP4,
3119                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3120
3121           fib_table_entry_special_dpo_add (fib_index,
3122                                            &pfx,
3123                                            FIB_SOURCE_CLASSIFY,
3124                                            FIB_ENTRY_FLAG_NONE, &dpo);
3125           dpo_reset (&dpo);
3126         }
3127       else
3128         {
3129           fib_table_entry_special_remove (fib_index,
3130                                           &pfx, FIB_SOURCE_CLASSIFY);
3131         }
3132     }
3133
3134   return 0;
3135 }
3136
3137 static clib_error_t *
3138 set_ip_classify_command_fn (vlib_main_t * vm,
3139                             unformat_input_t * input,
3140                             vlib_cli_command_t * cmd)
3141 {
3142   u32 table_index = ~0;
3143   int table_index_set = 0;
3144   u32 sw_if_index = ~0;
3145   int rv;
3146
3147   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3148     {
3149       if (unformat (input, "table-index %d", &table_index))
3150         table_index_set = 1;
3151       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3152                          vnet_get_main (), &sw_if_index))
3153         ;
3154       else
3155         break;
3156     }
3157
3158   if (table_index_set == 0)
3159     return clib_error_return (0, "classify table-index must be specified");
3160
3161   if (sw_if_index == ~0)
3162     return clib_error_return (0, "interface / subif must be specified");
3163
3164   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3165
3166   switch (rv)
3167     {
3168     case 0:
3169       break;
3170
3171     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3172       return clib_error_return (0, "No such interface");
3173
3174     case VNET_API_ERROR_NO_SUCH_ENTRY:
3175       return clib_error_return (0, "No such classifier table");
3176     }
3177   return 0;
3178 }
3179
3180 /*?
3181  * Assign a classification table to an interface. The classification
3182  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3183  * commands. Once the table is create, use this command to filter packets
3184  * on an interface.
3185  *
3186  * @cliexpar
3187  * Example of how to assign a classification table to an interface:
3188  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3189 ?*/
3190 /* *INDENT-OFF* */
3191 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3192 {
3193     .path = "set ip classify",
3194     .short_help =
3195     "set ip classify intfc <interface> table-index <classify-idx>",
3196     .function = set_ip_classify_command_fn,
3197 };
3198 /* *INDENT-ON* */
3199
3200 static clib_error_t *
3201 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3202 {
3203   ip4_main_t *im = &ip4_main;
3204   uword heapsize = 0;
3205
3206   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3207     {
3208       if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3209         ;
3210       else
3211         return clib_error_return (0,
3212                                   "invalid heap-size parameter `%U'",
3213                                   format_unformat_error, input);
3214     }
3215
3216   im->mtrie_heap_size = heapsize;
3217
3218   return 0;
3219 }
3220
3221 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3222
3223 /*
3224  * fd.io coding-style-patch-verification: ON
3225  *
3226  * Local Variables:
3227  * eval: (c-set-style "gnu")
3228  * End:
3229  */