daffae410cc397b3ade0855e2e1a5018d9679712
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/load_balance_map.h>
53 #include <vnet/dpo/classify_dpo.h>
54 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
55
56 /**
57  * @file
58  * @brief IPv4 Forwarding.
59  *
60  * This file contains the source code for IPv4 forwarding.
61  */
62
63 always_inline uword
64 ip4_lookup_inline (vlib_main_t * vm,
65                    vlib_node_runtime_t * node,
66                    vlib_frame_t * frame,
67                    int lookup_for_responses_to_locally_received_packets)
68 {
69   ip4_main_t *im = &ip4_main;
70   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
71   u32 n_left_from, n_left_to_next, *from, *to_next;
72   ip_lookup_next_t next;
73   u32 thread_index = vlib_get_thread_index ();
74
75   from = vlib_frame_vector_args (frame);
76   n_left_from = frame->n_vectors;
77   next = node->cached_next_index;
78
79   while (n_left_from > 0)
80     {
81       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
82
83       while (n_left_from >= 8 && n_left_to_next >= 4)
84         {
85           vlib_buffer_t *p0, *p1, *p2, *p3;
86           ip4_header_t *ip0, *ip1, *ip2, *ip3;
87           ip_lookup_next_t next0, next1, next2, next3;
88           const load_balance_t *lb0, *lb1, *lb2, *lb3;
89           ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
90           ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
91           ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
92           u32 pi0, fib_index0, lb_index0;
93           u32 pi1, fib_index1, lb_index1;
94           u32 pi2, fib_index2, lb_index2;
95           u32 pi3, fib_index3, lb_index3;
96           flow_hash_config_t flow_hash_config0, flow_hash_config1;
97           flow_hash_config_t flow_hash_config2, flow_hash_config3;
98           u32 hash_c0, hash_c1, hash_c2, hash_c3;
99           const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
100
101           /* Prefetch next iteration. */
102           {
103             vlib_buffer_t *p4, *p5, *p6, *p7;
104
105             p4 = vlib_get_buffer (vm, from[4]);
106             p5 = vlib_get_buffer (vm, from[5]);
107             p6 = vlib_get_buffer (vm, from[6]);
108             p7 = vlib_get_buffer (vm, from[7]);
109
110             vlib_prefetch_buffer_header (p4, LOAD);
111             vlib_prefetch_buffer_header (p5, LOAD);
112             vlib_prefetch_buffer_header (p6, LOAD);
113             vlib_prefetch_buffer_header (p7, LOAD);
114
115             CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
116             CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
117             CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
118             CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
119           }
120
121           pi0 = to_next[0] = from[0];
122           pi1 = to_next[1] = from[1];
123           pi2 = to_next[2] = from[2];
124           pi3 = to_next[3] = from[3];
125
126           from += 4;
127           to_next += 4;
128           n_left_to_next -= 4;
129           n_left_from -= 4;
130
131           p0 = vlib_get_buffer (vm, pi0);
132           p1 = vlib_get_buffer (vm, pi1);
133           p2 = vlib_get_buffer (vm, pi2);
134           p3 = vlib_get_buffer (vm, pi3);
135
136           ip0 = vlib_buffer_get_current (p0);
137           ip1 = vlib_buffer_get_current (p1);
138           ip2 = vlib_buffer_get_current (p2);
139           ip3 = vlib_buffer_get_current (p3);
140
141           dst_addr0 = &ip0->dst_address;
142           dst_addr1 = &ip1->dst_address;
143           dst_addr2 = &ip2->dst_address;
144           dst_addr3 = &ip3->dst_address;
145
146           fib_index0 =
147             vec_elt (im->fib_index_by_sw_if_index,
148                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
149           fib_index1 =
150             vec_elt (im->fib_index_by_sw_if_index,
151                      vnet_buffer (p1)->sw_if_index[VLIB_RX]);
152           fib_index2 =
153             vec_elt (im->fib_index_by_sw_if_index,
154                      vnet_buffer (p2)->sw_if_index[VLIB_RX]);
155           fib_index3 =
156             vec_elt (im->fib_index_by_sw_if_index,
157                      vnet_buffer (p3)->sw_if_index[VLIB_RX]);
158           fib_index0 =
159             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
160              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
161           fib_index1 =
162             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
163              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
164           fib_index2 =
165             (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
166              (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
167           fib_index3 =
168             (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
169              (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
170
171
172           if (!lookup_for_responses_to_locally_received_packets)
173             {
174               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
175               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
176               mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
177               mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
178
179               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
180               leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
181               leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
182               leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
183             }
184
185           if (!lookup_for_responses_to_locally_received_packets)
186             {
187               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
188               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
189               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
190               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
191             }
192
193           if (!lookup_for_responses_to_locally_received_packets)
194             {
195               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
196               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
197               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
198               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
199             }
200
201           if (lookup_for_responses_to_locally_received_packets)
202             {
203               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
204               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
205               lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
206               lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
207             }
208           else
209             {
210               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
211               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
212               lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
213               lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
214             }
215
216           ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
217           lb0 = load_balance_get (lb_index0);
218           lb1 = load_balance_get (lb_index1);
219           lb2 = load_balance_get (lb_index2);
220           lb3 = load_balance_get (lb_index3);
221
222           ASSERT (lb0->lb_n_buckets > 0);
223           ASSERT (is_pow2 (lb0->lb_n_buckets));
224           ASSERT (lb1->lb_n_buckets > 0);
225           ASSERT (is_pow2 (lb1->lb_n_buckets));
226           ASSERT (lb2->lb_n_buckets > 0);
227           ASSERT (is_pow2 (lb2->lb_n_buckets));
228           ASSERT (lb3->lb_n_buckets > 0);
229           ASSERT (is_pow2 (lb3->lb_n_buckets));
230
231           /* Use flow hash to compute multipath adjacency. */
232           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
233           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
234           hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
235           hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
236           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
237             {
238               flow_hash_config0 = lb0->lb_hash_config;
239               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
240                 ip4_compute_flow_hash (ip0, flow_hash_config0);
241               dpo0 =
242                 load_balance_get_fwd_bucket (lb0,
243                                              (hash_c0 &
244                                               (lb0->lb_n_buckets_minus_1)));
245             }
246           else
247             {
248               dpo0 = load_balance_get_bucket_i (lb0, 0);
249             }
250           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
251             {
252               flow_hash_config1 = lb1->lb_hash_config;
253               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
254                 ip4_compute_flow_hash (ip1, flow_hash_config1);
255               dpo1 =
256                 load_balance_get_fwd_bucket (lb1,
257                                              (hash_c1 &
258                                               (lb1->lb_n_buckets_minus_1)));
259             }
260           else
261             {
262               dpo1 = load_balance_get_bucket_i (lb1, 0);
263             }
264           if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
265             {
266               flow_hash_config2 = lb2->lb_hash_config;
267               hash_c2 = vnet_buffer (p2)->ip.flow_hash =
268                 ip4_compute_flow_hash (ip2, flow_hash_config2);
269               dpo2 =
270                 load_balance_get_fwd_bucket (lb2,
271                                              (hash_c2 &
272                                               (lb2->lb_n_buckets_minus_1)));
273             }
274           else
275             {
276               dpo2 = load_balance_get_bucket_i (lb2, 0);
277             }
278           if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
279             {
280               flow_hash_config3 = lb3->lb_hash_config;
281               hash_c3 = vnet_buffer (p3)->ip.flow_hash =
282                 ip4_compute_flow_hash (ip3, flow_hash_config3);
283               dpo3 =
284                 load_balance_get_fwd_bucket (lb3,
285                                              (hash_c3 &
286                                               (lb3->lb_n_buckets_minus_1)));
287             }
288           else
289             {
290               dpo3 = load_balance_get_bucket_i (lb3, 0);
291             }
292
293           next0 = dpo0->dpoi_next_node;
294           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
295           next1 = dpo1->dpoi_next_node;
296           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
297           next2 = dpo2->dpoi_next_node;
298           vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
299           next3 = dpo3->dpoi_next_node;
300           vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
301
302           vlib_increment_combined_counter
303             (cm, thread_index, lb_index0, 1,
304              vlib_buffer_length_in_chain (vm, p0));
305           vlib_increment_combined_counter
306             (cm, thread_index, lb_index1, 1,
307              vlib_buffer_length_in_chain (vm, p1));
308           vlib_increment_combined_counter
309             (cm, thread_index, lb_index2, 1,
310              vlib_buffer_length_in_chain (vm, p2));
311           vlib_increment_combined_counter
312             (cm, thread_index, lb_index3, 1,
313              vlib_buffer_length_in_chain (vm, p3));
314
315           vlib_validate_buffer_enqueue_x4 (vm, node, next,
316                                            to_next, n_left_to_next,
317                                            pi0, pi1, pi2, pi3,
318                                            next0, next1, next2, next3);
319         }
320
321       while (n_left_from > 0 && n_left_to_next > 0)
322         {
323           vlib_buffer_t *p0;
324           ip4_header_t *ip0;
325           ip_lookup_next_t next0;
326           const load_balance_t *lb0;
327           ip4_fib_mtrie_t *mtrie0;
328           ip4_fib_mtrie_leaf_t leaf0;
329           ip4_address_t *dst_addr0;
330           u32 pi0, fib_index0, lbi0;
331           flow_hash_config_t flow_hash_config0;
332           const dpo_id_t *dpo0;
333           u32 hash_c0;
334
335           pi0 = from[0];
336           to_next[0] = pi0;
337
338           p0 = vlib_get_buffer (vm, pi0);
339
340           ip0 = vlib_buffer_get_current (p0);
341
342           dst_addr0 = &ip0->dst_address;
343
344           fib_index0 =
345             vec_elt (im->fib_index_by_sw_if_index,
346                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
347           fib_index0 =
348             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
349              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
350
351           if (!lookup_for_responses_to_locally_received_packets)
352             {
353               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
354
355               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
356             }
357
358           if (!lookup_for_responses_to_locally_received_packets)
359             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
360
361           if (!lookup_for_responses_to_locally_received_packets)
362             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
363
364           if (lookup_for_responses_to_locally_received_packets)
365             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
366           else
367             {
368               /* Handle default route. */
369               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
370             }
371
372           ASSERT (lbi0);
373           lb0 = load_balance_get (lbi0);
374
375           ASSERT (lb0->lb_n_buckets > 0);
376           ASSERT (is_pow2 (lb0->lb_n_buckets));
377
378           /* Use flow hash to compute multipath adjacency. */
379           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
380           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
381             {
382               flow_hash_config0 = lb0->lb_hash_config;
383
384               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
385                 ip4_compute_flow_hash (ip0, flow_hash_config0);
386               dpo0 =
387                 load_balance_get_fwd_bucket (lb0,
388                                              (hash_c0 &
389                                               (lb0->lb_n_buckets_minus_1)));
390             }
391           else
392             {
393               dpo0 = load_balance_get_bucket_i (lb0, 0);
394             }
395
396           next0 = dpo0->dpoi_next_node;
397           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
398
399           vlib_increment_combined_counter (cm, thread_index, lbi0, 1,
400                                            vlib_buffer_length_in_chain (vm,
401                                                                         p0));
402
403           from += 1;
404           to_next += 1;
405           n_left_to_next -= 1;
406           n_left_from -= 1;
407
408           if (PREDICT_FALSE (next0 != next))
409             {
410               n_left_to_next += 1;
411               vlib_put_next_frame (vm, node, next, n_left_to_next);
412               next = next0;
413               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
414               to_next[0] = pi0;
415               to_next += 1;
416               n_left_to_next -= 1;
417             }
418         }
419
420       vlib_put_next_frame (vm, node, next, n_left_to_next);
421     }
422
423   if (node->flags & VLIB_NODE_FLAG_TRACE)
424     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
425
426   return frame->n_vectors;
427 }
428
429 /** @brief IPv4 lookup node.
430     @node ip4-lookup
431
432     This is the main IPv4 lookup dispatch node.
433
434     @param vm vlib_main_t corresponding to the current thread
435     @param node vlib_node_runtime_t
436     @param frame vlib_frame_t whose contents should be dispatched
437
438     @par Graph mechanics: buffer metadata, next index usage
439
440     @em Uses:
441     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
442         - Indicates the @c sw_if_index value of the interface that the
443           packet was received on.
444     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
445         - When the value is @c ~0 then the node performs a longest prefix
446           match (LPM) for the packet destination address in the FIB attached
447           to the receive interface.
448         - Otherwise perform LPM for the packet destination address in the
449           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
450           value (0, 1, ...) and not a VRF id.
451
452     @em Sets:
453     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
454         - The lookup result adjacency index.
455
456     <em>Next Index:</em>
457     - Dispatches the packet to the node index found in
458       ip_adjacency_t @c adj->lookup_next_index
459       (where @c adj is the lookup result adjacency).
460 */
461 static uword
462 ip4_lookup (vlib_main_t * vm,
463             vlib_node_runtime_t * node, vlib_frame_t * frame)
464 {
465   return ip4_lookup_inline (vm, node, frame,
466                             /* lookup_for_responses_to_locally_received_packets */
467                             0);
468
469 }
470
471 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
472
473 VLIB_REGISTER_NODE (ip4_lookup_node) =
474 {
475 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
476     sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
477     IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
478
479 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
480
481 always_inline uword
482 ip4_load_balance (vlib_main_t * vm,
483                   vlib_node_runtime_t * node, vlib_frame_t * frame)
484 {
485   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
486   u32 n_left_from, n_left_to_next, *from, *to_next;
487   ip_lookup_next_t next;
488   u32 thread_index = vlib_get_thread_index ();
489
490   from = vlib_frame_vector_args (frame);
491   n_left_from = frame->n_vectors;
492   next = node->cached_next_index;
493
494   if (node->flags & VLIB_NODE_FLAG_TRACE)
495     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
496
497   while (n_left_from > 0)
498     {
499       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
500
501
502       while (n_left_from >= 4 && n_left_to_next >= 2)
503         {
504           ip_lookup_next_t next0, next1;
505           const load_balance_t *lb0, *lb1;
506           vlib_buffer_t *p0, *p1;
507           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
508           const ip4_header_t *ip0, *ip1;
509           const dpo_id_t *dpo0, *dpo1;
510
511           /* Prefetch next iteration. */
512           {
513             vlib_buffer_t *p2, *p3;
514
515             p2 = vlib_get_buffer (vm, from[2]);
516             p3 = vlib_get_buffer (vm, from[3]);
517
518             vlib_prefetch_buffer_header (p2, STORE);
519             vlib_prefetch_buffer_header (p3, STORE);
520
521             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
522             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
523           }
524
525           pi0 = to_next[0] = from[0];
526           pi1 = to_next[1] = from[1];
527
528           from += 2;
529           n_left_from -= 2;
530           to_next += 2;
531           n_left_to_next -= 2;
532
533           p0 = vlib_get_buffer (vm, pi0);
534           p1 = vlib_get_buffer (vm, pi1);
535
536           ip0 = vlib_buffer_get_current (p0);
537           ip1 = vlib_buffer_get_current (p1);
538           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
539           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
540
541           lb0 = load_balance_get (lbi0);
542           lb1 = load_balance_get (lbi1);
543
544           /*
545            * this node is for via FIBs we can re-use the hash value from the
546            * to node if present.
547            * We don't want to use the same hash value at each level in the recursion
548            * graph as that would lead to polarisation
549            */
550           hc0 = hc1 = 0;
551
552           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
553             {
554               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
555                 {
556                   hc0 = vnet_buffer (p0)->ip.flow_hash =
557                     vnet_buffer (p0)->ip.flow_hash >> 1;
558                 }
559               else
560                 {
561                   hc0 = vnet_buffer (p0)->ip.flow_hash =
562                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
563                 }
564               dpo0 = load_balance_get_fwd_bucket
565                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
566             }
567           else
568             {
569               dpo0 = load_balance_get_bucket_i (lb0, 0);
570             }
571           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
572             {
573               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
574                 {
575                   hc1 = vnet_buffer (p1)->ip.flow_hash =
576                     vnet_buffer (p1)->ip.flow_hash >> 1;
577                 }
578               else
579                 {
580                   hc1 = vnet_buffer (p1)->ip.flow_hash =
581                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
582                 }
583               dpo1 = load_balance_get_fwd_bucket
584                 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
585             }
586           else
587             {
588               dpo1 = load_balance_get_bucket_i (lb1, 0);
589             }
590
591           next0 = dpo0->dpoi_next_node;
592           next1 = dpo1->dpoi_next_node;
593
594           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
595           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
596
597           vlib_increment_combined_counter
598             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
599           vlib_increment_combined_counter
600             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
601
602           vlib_validate_buffer_enqueue_x2 (vm, node, next,
603                                            to_next, n_left_to_next,
604                                            pi0, pi1, next0, next1);
605         }
606
607       while (n_left_from > 0 && n_left_to_next > 0)
608         {
609           ip_lookup_next_t next0;
610           const load_balance_t *lb0;
611           vlib_buffer_t *p0;
612           u32 pi0, lbi0, hc0;
613           const ip4_header_t *ip0;
614           const dpo_id_t *dpo0;
615
616           pi0 = from[0];
617           to_next[0] = pi0;
618           from += 1;
619           to_next += 1;
620           n_left_to_next -= 1;
621           n_left_from -= 1;
622
623           p0 = vlib_get_buffer (vm, pi0);
624
625           ip0 = vlib_buffer_get_current (p0);
626           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
627
628           lb0 = load_balance_get (lbi0);
629
630           hc0 = 0;
631           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
632             {
633               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
634                 {
635                   hc0 = vnet_buffer (p0)->ip.flow_hash =
636                     vnet_buffer (p0)->ip.flow_hash >> 1;
637                 }
638               else
639                 {
640                   hc0 = vnet_buffer (p0)->ip.flow_hash =
641                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
642                 }
643               dpo0 = load_balance_get_fwd_bucket
644                 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
645             }
646           else
647             {
648               dpo0 = load_balance_get_bucket_i (lb0, 0);
649             }
650
651           next0 = dpo0->dpoi_next_node;
652           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
653
654           vlib_increment_combined_counter
655             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
656
657           vlib_validate_buffer_enqueue_x1 (vm, node, next,
658                                            to_next, n_left_to_next,
659                                            pi0, next0);
660         }
661
662       vlib_put_next_frame (vm, node, next, n_left_to_next);
663     }
664
665   return frame->n_vectors;
666 }
667
668 VLIB_REGISTER_NODE (ip4_load_balance_node) =
669 {
670 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
671     sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
672     format_ip4_lookup_trace,};
673
674 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
675
676 /* get first interface address */
677 ip4_address_t *
678 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
679                              ip_interface_address_t ** result_ia)
680 {
681   ip_lookup_main_t *lm = &im->lookup_main;
682   ip_interface_address_t *ia = 0;
683   ip4_address_t *result = 0;
684
685   /* *INDENT-OFF* */
686   foreach_ip_interface_address
687     (lm, ia, sw_if_index,
688      1 /* honor unnumbered */ ,
689      ({
690        ip4_address_t * a =
691          ip_interface_address_get_address (lm, ia);
692        result = a;
693        break;
694      }));
695   /* *INDENT-OFF* */
696   if (result_ia)
697     *result_ia = result ? ia : 0;
698   return result;
699 }
700
701 static void
702 ip4_add_interface_routes (u32 sw_if_index,
703                           ip4_main_t * im, u32 fib_index,
704                           ip_interface_address_t * a)
705 {
706   ip_lookup_main_t *lm = &im->lookup_main;
707   ip4_address_t *address = ip_interface_address_get_address (lm, a);
708   fib_prefix_t pfx = {
709     .fp_len = a->address_length,
710     .fp_proto = FIB_PROTOCOL_IP4,
711     .fp_addr.ip4 = *address,
712   };
713
714   if (pfx.fp_len <= 30)
715     {
716       /* a /30 or shorter - add a glean for the network address */
717       fib_table_entry_update_one_path (fib_index, &pfx,
718                                        FIB_SOURCE_INTERFACE,
719                                        (FIB_ENTRY_FLAG_CONNECTED |
720                                         FIB_ENTRY_FLAG_ATTACHED),
721                                        DPO_PROTO_IP4,
722                                        /* No next-hop address */
723                                        NULL,
724                                        sw_if_index,
725                                        // invalid FIB index
726                                        ~0,
727                                        1,
728                                        // no out-label stack
729                                        NULL,
730                                        FIB_ROUTE_PATH_FLAG_NONE);
731
732       /* Add the two broadcast addresses as drop */
733       fib_prefix_t net_pfx = {
734         .fp_len = 32,
735         .fp_proto = FIB_PROTOCOL_IP4,
736         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
737       };
738       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
739         fib_table_entry_special_add(fib_index,
740                                     &net_pfx,
741                                     FIB_SOURCE_INTERFACE,
742                                     (FIB_ENTRY_FLAG_DROP |
743                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
744       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
745       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
746         fib_table_entry_special_add(fib_index,
747                                     &net_pfx,
748                                     FIB_SOURCE_INTERFACE,
749                                     (FIB_ENTRY_FLAG_DROP |
750                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
751     }
752   else if (pfx.fp_len == 31)
753     {
754       u32 mask = clib_host_to_net_u32(1);
755       fib_prefix_t net_pfx = pfx;
756
757       net_pfx.fp_len = 32;
758       net_pfx.fp_addr.ip4.as_u32 ^= mask;
759
760       /* a /31 - add the other end as an attached host */
761       fib_table_entry_update_one_path (fib_index, &net_pfx,
762                                        FIB_SOURCE_INTERFACE,
763                                        (FIB_ENTRY_FLAG_ATTACHED),
764                                        DPO_PROTO_IP4,
765                                        &net_pfx.fp_addr,
766                                        sw_if_index,
767                                        // invalid FIB index
768                                        ~0,
769                                        1,
770                                        NULL,
771                                        FIB_ROUTE_PATH_FLAG_NONE);
772     }
773   pfx.fp_len = 32;
774
775   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
776     {
777       u32 classify_table_index =
778         lm->classify_table_index_by_sw_if_index[sw_if_index];
779       if (classify_table_index != (u32) ~ 0)
780         {
781           dpo_id_t dpo = DPO_INVALID;
782
783           dpo_set (&dpo,
784                    DPO_CLASSIFY,
785                    DPO_PROTO_IP4,
786                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
787
788           fib_table_entry_special_dpo_add (fib_index,
789                                            &pfx,
790                                            FIB_SOURCE_CLASSIFY,
791                                            FIB_ENTRY_FLAG_NONE, &dpo);
792           dpo_reset (&dpo);
793         }
794     }
795
796   fib_table_entry_update_one_path (fib_index, &pfx,
797                                    FIB_SOURCE_INTERFACE,
798                                    (FIB_ENTRY_FLAG_CONNECTED |
799                                     FIB_ENTRY_FLAG_LOCAL),
800                                    DPO_PROTO_IP4,
801                                    &pfx.fp_addr,
802                                    sw_if_index,
803                                    // invalid FIB index
804                                    ~0,
805                                    1, NULL,
806                                    FIB_ROUTE_PATH_FLAG_NONE);
807 }
808
809 static void
810 ip4_del_interface_routes (ip4_main_t * im,
811                           u32 fib_index,
812                           ip4_address_t * address, u32 address_length)
813 {
814   fib_prefix_t pfx = {
815     .fp_len = address_length,
816     .fp_proto = FIB_PROTOCOL_IP4,
817     .fp_addr.ip4 = *address,
818   };
819
820   if (pfx.fp_len <= 30)
821     {
822       fib_prefix_t net_pfx = {
823         .fp_len = 32,
824         .fp_proto = FIB_PROTOCOL_IP4,
825         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
826       };
827       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
828         fib_table_entry_special_remove(fib_index,
829                                        &net_pfx,
830                                        FIB_SOURCE_INTERFACE);
831       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
832       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
833         fib_table_entry_special_remove(fib_index,
834                                        &net_pfx,
835                                        FIB_SOURCE_INTERFACE);
836       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
837     }
838     else if (pfx.fp_len == 31)
839     {
840       u32 mask = clib_host_to_net_u32(1);
841       fib_prefix_t net_pfx = pfx;
842
843       net_pfx.fp_len = 32;
844       net_pfx.fp_addr.ip4.as_u32 ^= mask;
845
846       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
847     }
848
849   pfx.fp_len = 32;
850   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
851 }
852
853 void
854 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
855 {
856   ip4_main_t *im = &ip4_main;
857
858   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
859
860   /*
861    * enable/disable only on the 1<->0 transition
862    */
863   if (is_enable)
864     {
865       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
866         return;
867     }
868   else
869     {
870       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
871       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
872         return;
873     }
874   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
875                                !is_enable, 0, 0);
876
877
878   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
879                                sw_if_index, !is_enable, 0, 0);
880 }
881
882 static clib_error_t *
883 ip4_add_del_interface_address_internal (vlib_main_t * vm,
884                                         u32 sw_if_index,
885                                         ip4_address_t * address,
886                                         u32 address_length, u32 is_del)
887 {
888   vnet_main_t *vnm = vnet_get_main ();
889   ip4_main_t *im = &ip4_main;
890   ip_lookup_main_t *lm = &im->lookup_main;
891   clib_error_t *error = 0;
892   u32 if_address_index, elts_before;
893   ip4_address_fib_t ip4_af, *addr_fib = 0;
894
895   /* local0 interface doesn't support IP addressing  */
896   if (sw_if_index == 0)
897     {
898       return
899        clib_error_create ("local0 interface doesn't support IP addressing");
900     }
901
902   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
903   ip4_addr_fib_init (&ip4_af, address,
904                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
905   vec_add1 (addr_fib, ip4_af);
906
907   /* FIXME-LATER
908    * there is no support for adj-fib handling in the presence of overlapping
909    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
910    * most routers do.
911    */
912   /* *INDENT-OFF* */
913   if (!is_del)
914     {
915       /* When adding an address check that it does not conflict
916          with an existing address. */
917       ip_interface_address_t *ia;
918       foreach_ip_interface_address
919         (&im->lookup_main, ia, sw_if_index,
920          0 /* honor unnumbered */ ,
921          ({
922            ip4_address_t * x =
923              ip_interface_address_get_address
924              (&im->lookup_main, ia);
925            if (ip4_destination_matches_route
926                (im, address, x, ia->address_length) ||
927                ip4_destination_matches_route (im,
928                                               x,
929                                               address,
930                                               address_length))
931              return
932                clib_error_create
933                ("failed to add %U which conflicts with %U for interface %U",
934                 format_ip4_address_and_length, address,
935                 address_length,
936                 format_ip4_address_and_length, x,
937                 ia->address_length,
938                 format_vnet_sw_if_index_name, vnm,
939                 sw_if_index);
940          }));
941     }
942   /* *INDENT-ON* */
943
944   elts_before = pool_elts (lm->if_address_pool);
945
946   error = ip_interface_address_add_del
947     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
948   if (error)
949     goto done;
950
951   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
952
953   if (is_del)
954     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
955   else
956     ip4_add_interface_routes (sw_if_index,
957                               im, ip4_af.fib_index,
958                               pool_elt_at_index
959                               (lm->if_address_pool, if_address_index));
960
961   /* If pool did not grow/shrink: add duplicate address. */
962   if (elts_before != pool_elts (lm->if_address_pool))
963     {
964       ip4_add_del_interface_address_callback_t *cb;
965       vec_foreach (cb, im->add_del_interface_address_callbacks)
966         cb->function (im, cb->function_opaque, sw_if_index,
967                       address, address_length, if_address_index, is_del);
968     }
969
970 done:
971   vec_free (addr_fib);
972   return error;
973 }
974
975 clib_error_t *
976 ip4_add_del_interface_address (vlib_main_t * vm,
977                                u32 sw_if_index,
978                                ip4_address_t * address,
979                                u32 address_length, u32 is_del)
980 {
981   return ip4_add_del_interface_address_internal
982     (vm, sw_if_index, address, address_length, is_del);
983 }
984
985 /* Built-in ip4 unicast rx feature path definition */
986 /* *INDENT-OFF* */
987 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
988 {
989   .arc_name = "ip4-unicast",
990   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
991   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
992 };
993
994 VNET_FEATURE_INIT (ip4_flow_classify, static) =
995 {
996   .arc_name = "ip4-unicast",
997   .node_name = "ip4-flow-classify",
998   .runs_before = VNET_FEATURES ("ip4-inacl"),
999 };
1000
1001 VNET_FEATURE_INIT (ip4_inacl, static) =
1002 {
1003   .arc_name = "ip4-unicast",
1004   .node_name = "ip4-inacl",
1005   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
1006 };
1007
1008 VNET_FEATURE_INIT (ip4_source_check_1, static) =
1009 {
1010   .arc_name = "ip4-unicast",
1011   .node_name = "ip4-source-check-via-rx",
1012   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
1013 };
1014
1015 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1016 {
1017   .arc_name = "ip4-unicast",
1018   .node_name = "ip4-source-check-via-any",
1019   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1020 };
1021
1022 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1023 {
1024   .arc_name = "ip4-unicast",
1025   .node_name = "ip4-source-and-port-range-check-rx",
1026   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1027 };
1028
1029 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1030 {
1031   .arc_name = "ip4-unicast",
1032   .node_name = "ip4-policer-classify",
1033   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1034 };
1035
1036 VNET_FEATURE_INIT (ip4_ipsec, static) =
1037 {
1038   .arc_name = "ip4-unicast",
1039   .node_name = "ipsec-input-ip4",
1040   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1041 };
1042
1043 VNET_FEATURE_INIT (ip4_vpath, static) =
1044 {
1045   .arc_name = "ip4-unicast",
1046   .node_name = "vpath-input-ip4",
1047   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1048 };
1049
1050 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1051 {
1052   .arc_name = "ip4-unicast",
1053   .node_name = "ip4-vxlan-bypass",
1054   .runs_before = VNET_FEATURES ("ip4-lookup"),
1055 };
1056
1057 VNET_FEATURE_INIT (ip4_drop, static) =
1058 {
1059   .arc_name = "ip4-unicast",
1060   .node_name = "ip4-drop",
1061   .runs_before = VNET_FEATURES ("ip4-lookup"),
1062 };
1063
1064 VNET_FEATURE_INIT (ip4_lookup, static) =
1065 {
1066   .arc_name = "ip4-unicast",
1067   .node_name = "ip4-lookup",
1068   .runs_before = 0,     /* not before any other features */
1069 };
1070
1071 /* Built-in ip4 multicast rx feature path definition */
1072 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1073 {
1074   .arc_name = "ip4-multicast",
1075   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1076   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1077 };
1078
1079 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1080 {
1081   .arc_name = "ip4-multicast",
1082   .node_name = "vpath-input-ip4",
1083   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1084 };
1085
1086 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1087 {
1088   .arc_name = "ip4-multicast",
1089   .node_name = "ip4-drop",
1090   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1091 };
1092
1093 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1094 {
1095   .arc_name = "ip4-multicast",
1096   .node_name = "ip4-mfib-forward-lookup",
1097   .runs_before = 0,     /* last feature */
1098 };
1099
1100 /* Source and port-range check ip4 tx feature path definition */
1101 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1102 {
1103   .arc_name = "ip4-output",
1104   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1105   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1106 };
1107
1108 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1109 {
1110   .arc_name = "ip4-output",
1111   .node_name = "ip4-source-and-port-range-check-tx",
1112   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1113 };
1114
1115 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1116 {
1117   .arc_name = "ip4-output",
1118   .node_name = "ipsec-output-ip4",
1119   .runs_before = VNET_FEATURES ("interface-output"),
1120 };
1121
1122 /* Built-in ip4 tx feature path definition */
1123 VNET_FEATURE_INIT (ip4_interface_output, static) =
1124 {
1125   .arc_name = "ip4-output",
1126   .node_name = "interface-output",
1127   .runs_before = 0,     /* not before any other features */
1128 };
1129 /* *INDENT-ON* */
1130
1131 static clib_error_t *
1132 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1133 {
1134   ip4_main_t *im = &ip4_main;
1135
1136   /* Fill in lookup tables with default table (0). */
1137   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1138   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1139
1140   if (!is_add)
1141     {
1142       ip4_main_t *im4 = &ip4_main;
1143       ip_lookup_main_t *lm4 = &im4->lookup_main;
1144       ip_interface_address_t *ia = 0;
1145       ip4_address_t *address;
1146       vlib_main_t *vm = vlib_get_main ();
1147
1148       /* *INDENT-OFF* */
1149       foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */,
1150       ({
1151         address = ip_interface_address_get_address (lm4, ia);
1152         ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1153       }));
1154       /* *INDENT-ON* */
1155     }
1156
1157   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1158                                is_add, 0, 0);
1159
1160   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1161                                is_add, 0, 0);
1162
1163   return /* no error */ 0;
1164 }
1165
1166 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1167
1168 /* Global IP4 main. */
1169 ip4_main_t ip4_main;
1170
1171 clib_error_t *
1172 ip4_lookup_init (vlib_main_t * vm)
1173 {
1174   ip4_main_t *im = &ip4_main;
1175   clib_error_t *error;
1176   uword i;
1177
1178   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1179     return error;
1180
1181   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1182     {
1183       u32 m;
1184
1185       if (i < 32)
1186         m = pow2_mask (i) << (32 - i);
1187       else
1188         m = ~0;
1189       im->fib_masks[i] = clib_host_to_net_u32 (m);
1190     }
1191
1192   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1193
1194   /* Create FIB with index 0 and table id of 0. */
1195   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1196                                      FIB_SOURCE_DEFAULT_ROUTE);
1197   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1198                                       MFIB_SOURCE_DEFAULT_ROUTE);
1199
1200   {
1201     pg_node_t *pn;
1202     pn = pg_get_node (ip4_lookup_node.index);
1203     pn->unformat_edit = unformat_pg_ip4_header;
1204   }
1205
1206   {
1207     ethernet_arp_header_t h;
1208
1209     memset (&h, 0, sizeof (h));
1210
1211     /* Set target ethernet address to all zeros. */
1212     memset (h.ip4_over_ethernet[1].ethernet, 0,
1213             sizeof (h.ip4_over_ethernet[1].ethernet));
1214
1215 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1216 #define _8(f,v) h.f = v;
1217     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1218     _16 (l3_type, ETHERNET_TYPE_IP4);
1219     _8 (n_l2_address_bytes, 6);
1220     _8 (n_l3_address_bytes, 4);
1221     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1222 #undef _16
1223 #undef _8
1224
1225     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1226                                /* data */ &h,
1227                                sizeof (h),
1228                                /* alloc chunk size */ 8,
1229                                "ip4 arp");
1230   }
1231
1232   return error;
1233 }
1234
1235 VLIB_INIT_FUNCTION (ip4_lookup_init);
1236
1237 typedef struct
1238 {
1239   /* Adjacency taken. */
1240   u32 dpo_index;
1241   u32 flow_hash;
1242   u32 fib_index;
1243
1244   /* Packet data, possibly *after* rewrite. */
1245   u8 packet_data[64 - 1 * sizeof (u32)];
1246 }
1247 ip4_forward_next_trace_t;
1248
1249 u8 *
1250 format_ip4_forward_next_trace (u8 * s, va_list * args)
1251 {
1252   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1253   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1254   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1255   u32 indent = format_get_indent (s);
1256   s = format (s, "%U%U",
1257               format_white_space, indent,
1258               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1259   return s;
1260 }
1261
1262 static u8 *
1263 format_ip4_lookup_trace (u8 * s, va_list * args)
1264 {
1265   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1266   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1267   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1268   u32 indent = format_get_indent (s);
1269
1270   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1271               t->fib_index, t->dpo_index, t->flow_hash);
1272   s = format (s, "\n%U%U",
1273               format_white_space, indent,
1274               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1275   return s;
1276 }
1277
1278 static u8 *
1279 format_ip4_rewrite_trace (u8 * s, va_list * args)
1280 {
1281   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1282   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1283   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1284   u32 indent = format_get_indent (s);
1285
1286   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1287               t->fib_index, t->dpo_index, format_ip_adjacency,
1288               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1289   s = format (s, "\n%U%U",
1290               format_white_space, indent,
1291               format_ip_adjacency_packet_data,
1292               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1293   return s;
1294 }
1295
1296 /* Common trace function for all ip4-forward next nodes. */
1297 void
1298 ip4_forward_next_trace (vlib_main_t * vm,
1299                         vlib_node_runtime_t * node,
1300                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1301 {
1302   u32 *from, n_left;
1303   ip4_main_t *im = &ip4_main;
1304
1305   n_left = frame->n_vectors;
1306   from = vlib_frame_vector_args (frame);
1307
1308   while (n_left >= 4)
1309     {
1310       u32 bi0, bi1;
1311       vlib_buffer_t *b0, *b1;
1312       ip4_forward_next_trace_t *t0, *t1;
1313
1314       /* Prefetch next iteration. */
1315       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1316       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1317
1318       bi0 = from[0];
1319       bi1 = from[1];
1320
1321       b0 = vlib_get_buffer (vm, bi0);
1322       b1 = vlib_get_buffer (vm, bi1);
1323
1324       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1325         {
1326           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1327           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1328           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1329           t0->fib_index =
1330             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1331              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1332             vec_elt (im->fib_index_by_sw_if_index,
1333                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1334
1335           clib_memcpy (t0->packet_data,
1336                        vlib_buffer_get_current (b0),
1337                        sizeof (t0->packet_data));
1338         }
1339       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1340         {
1341           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1342           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1343           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1344           t1->fib_index =
1345             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1346              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1347             vec_elt (im->fib_index_by_sw_if_index,
1348                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1349           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1350                        sizeof (t1->packet_data));
1351         }
1352       from += 2;
1353       n_left -= 2;
1354     }
1355
1356   while (n_left >= 1)
1357     {
1358       u32 bi0;
1359       vlib_buffer_t *b0;
1360       ip4_forward_next_trace_t *t0;
1361
1362       bi0 = from[0];
1363
1364       b0 = vlib_get_buffer (vm, bi0);
1365
1366       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1367         {
1368           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1369           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1370           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1371           t0->fib_index =
1372             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1373              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1374             vec_elt (im->fib_index_by_sw_if_index,
1375                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1376           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1377                        sizeof (t0->packet_data));
1378         }
1379       from += 1;
1380       n_left -= 1;
1381     }
1382 }
1383
1384 /* Compute TCP/UDP/ICMP4 checksum in software. */
1385 u16
1386 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1387                               ip4_header_t * ip0)
1388 {
1389   ip_csum_t sum0;
1390   u32 ip_header_length, payload_length_host_byte_order;
1391   u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1392   u16 sum16;
1393   void *data_this_buffer;
1394
1395   /* Initialize checksum with ip header. */
1396   ip_header_length = ip4_header_bytes (ip0);
1397   payload_length_host_byte_order =
1398     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1399   sum0 =
1400     clib_host_to_net_u32 (payload_length_host_byte_order +
1401                           (ip0->protocol << 16));
1402
1403   if (BITS (uword) == 32)
1404     {
1405       sum0 =
1406         ip_csum_with_carry (sum0,
1407                             clib_mem_unaligned (&ip0->src_address, u32));
1408       sum0 =
1409         ip_csum_with_carry (sum0,
1410                             clib_mem_unaligned (&ip0->dst_address, u32));
1411     }
1412   else
1413     sum0 =
1414       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1415
1416   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1417   data_this_buffer = (void *) ip0 + ip_header_length;
1418   n_ip_bytes_this_buffer =
1419     p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1420   if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1421     {
1422       n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1423         n_ip_bytes_this_buffer - ip_header_length : 0;
1424     }
1425   while (1)
1426     {
1427       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1428       n_bytes_left -= n_this_buffer;
1429       if (n_bytes_left == 0)
1430         break;
1431
1432       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1433       p0 = vlib_get_buffer (vm, p0->next_buffer);
1434       data_this_buffer = vlib_buffer_get_current (p0);
1435       n_this_buffer = p0->current_length;
1436     }
1437
1438   sum16 = ~ip_csum_fold (sum0);
1439
1440   return sum16;
1441 }
1442
1443 u32
1444 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1445 {
1446   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1447   udp_header_t *udp0;
1448   u16 sum16;
1449
1450   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1451           || ip0->protocol == IP_PROTOCOL_UDP);
1452
1453   udp0 = (void *) (ip0 + 1);
1454   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1455     {
1456       p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1457                     | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1458       return p0->flags;
1459     }
1460
1461   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1462
1463   p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1464                 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1465
1466   return p0->flags;
1467 }
1468
1469 /* *INDENT-OFF* */
1470 VNET_FEATURE_ARC_INIT (ip4_local) =
1471 {
1472   .arc_name  = "ip4-local",
1473   .start_nodes = VNET_FEATURES ("ip4-local"),
1474 };
1475 /* *INDENT-ON* */
1476
1477 static inline void
1478 ip4_local_validate_l4 (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip,
1479                        u8 is_udp, u8 * error, u8 * good_tcp_udp)
1480 {
1481   u32 flags0;
1482   flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1483   *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1484   if (is_udp)
1485     {
1486       udp_header_t *udp;
1487       u32 ip_len, udp_len;
1488       i32 len_diff;
1489       udp = ip4_next_header (ip);
1490       /* Verify UDP length. */
1491       ip_len = clib_net_to_host_u16 (ip->length);
1492       udp_len = clib_net_to_host_u16 (udp->length);
1493
1494       len_diff = ip_len - udp_len;
1495       *good_tcp_udp &= len_diff >= 0;
1496       *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1497     }
1498 }
1499
1500 #define ip4_local_do_l4_check(is_tcp_udp, flags)                        \
1501     (is_tcp_udp && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED))
1502
1503 static inline uword
1504 ip4_local_inline (vlib_main_t * vm,
1505                   vlib_node_runtime_t * node,
1506                   vlib_frame_t * frame, int head_of_feature_arc)
1507 {
1508   ip4_main_t *im = &ip4_main;
1509   ip_lookup_main_t *lm = &im->lookup_main;
1510   ip_local_next_t next_index;
1511   u32 *from, *to_next, n_left_from, n_left_to_next;
1512   vlib_node_runtime_t *error_node =
1513     vlib_node_get_runtime (vm, ip4_input_node.index);
1514   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1515
1516   from = vlib_frame_vector_args (frame);
1517   n_left_from = frame->n_vectors;
1518   next_index = node->cached_next_index;
1519
1520   if (node->flags & VLIB_NODE_FLAG_TRACE)
1521     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1522
1523   while (n_left_from > 0)
1524     {
1525       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1526
1527       while (n_left_from >= 4 && n_left_to_next >= 2)
1528         {
1529           vlib_buffer_t *p0, *p1;
1530           ip4_header_t *ip0, *ip1;
1531           ip4_fib_mtrie_t *mtrie0, *mtrie1;
1532           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1533           const dpo_id_t *dpo0, *dpo1;
1534           const load_balance_t *lb0, *lb1;
1535           u32 pi0, next0, fib_index0, lbi0;
1536           u32 pi1, next1, fib_index1, lbi1;
1537           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1538           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1539           u32 sw_if_index0, sw_if_index1;
1540
1541           pi0 = to_next[0] = from[0];
1542           pi1 = to_next[1] = from[1];
1543           from += 2;
1544           n_left_from -= 2;
1545           to_next += 2;
1546           n_left_to_next -= 2;
1547
1548           next0 = next1 = IP_LOCAL_NEXT_DROP;
1549           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1550
1551           p0 = vlib_get_buffer (vm, pi0);
1552           p1 = vlib_get_buffer (vm, pi1);
1553
1554           ip0 = vlib_buffer_get_current (p0);
1555           ip1 = vlib_buffer_get_current (p1);
1556
1557           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1558           vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
1559
1560           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1561           sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1562
1563           /* Treat IP frag packets as "experimental" protocol for now
1564              until support of IP frag reassembly is implemented */
1565           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1566           proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1567
1568           if (head_of_feature_arc == 0)
1569             goto skip_checks;
1570
1571           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1572           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1573           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1574           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1575
1576           good_tcp_udp0 =
1577             (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1578           good_tcp_udp1 =
1579             (p1->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1580
1581           if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)
1582                              || ip4_local_do_l4_check (is_tcp_udp1,
1583                                                        p1->flags)))
1584             {
1585               if (is_tcp_udp0)
1586                 ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1587                                        &good_tcp_udp0);
1588               if (is_tcp_udp1)
1589                 ip4_local_validate_l4 (vm, p1, ip1, is_udp1, &error1,
1590                                        &good_tcp_udp1);
1591             }
1592
1593           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1594           error0 = (is_tcp_udp0 && !good_tcp_udp0
1595                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1596           error1 = (is_tcp_udp1 && !good_tcp_udp1
1597                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1598
1599           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1600           fib_index0 =
1601             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1602              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1603
1604           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1605           fib_index1 =
1606             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1607              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1608
1609           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1610           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1611
1612           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1613           leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1614           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1615                                              2);
1616           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1617                                              2);
1618           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1619                                              3);
1620           leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1621                                              3);
1622
1623           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1624             ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1625           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1626
1627           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1628             ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1629           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1630
1631           lb0 = load_balance_get (lbi0);
1632           lb1 = load_balance_get (lbi1);
1633           dpo0 = load_balance_get_bucket_i (lb0, 0);
1634           dpo1 = load_balance_get_bucket_i (lb1, 0);
1635
1636           /*
1637            * Must have a route to source otherwise we drop the packet.
1638            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1639            *
1640            * The checks are:
1641            *  - the source is a recieve => it's from us => bogus, do this
1642            *    first since it sets a different error code.
1643            *  - uRPF check for any route to source - accept if passes.
1644            *  - allow packets destined to the broadcast address from unknown sources
1645            */
1646           if (p0->flags & VNET_BUFFER_F_IS_NATED)
1647             goto skip_check0;
1648
1649           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1650                      dpo0->dpoi_type == DPO_RECEIVE) ?
1651                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1652           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1653                      !fib_urpf_check_size (lb0->lb_urpf) &&
1654                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1655                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1656
1657         skip_check0:
1658           if (p1->flags & VNET_BUFFER_F_IS_NATED)
1659             goto skip_checks;
1660
1661           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1662                      dpo1->dpoi_type == DPO_RECEIVE) ?
1663                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1664           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1665                      !fib_urpf_check_size (lb1->lb_urpf) &&
1666                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1667                     ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1668
1669         skip_checks:
1670
1671           next0 = lm->local_next_by_ip_protocol[proto0];
1672           next1 = lm->local_next_by_ip_protocol[proto1];
1673
1674           next0 =
1675             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1676           next1 =
1677             error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1678
1679           p0->error = error0 ? error_node->errors[error0] : 0;
1680           p1->error = error1 ? error_node->errors[error1] : 0;
1681
1682           if (head_of_feature_arc)
1683             {
1684               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1685                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1686               if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1687                 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1688             }
1689
1690           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1691                                            n_left_to_next, pi0, pi1,
1692                                            next0, next1);
1693         }
1694
1695       while (n_left_from > 0 && n_left_to_next > 0)
1696         {
1697           vlib_buffer_t *p0;
1698           ip4_header_t *ip0;
1699           ip4_fib_mtrie_t *mtrie0;
1700           ip4_fib_mtrie_leaf_t leaf0;
1701           u32 pi0, next0, fib_index0, lbi0;
1702           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1703           load_balance_t *lb0;
1704           const dpo_id_t *dpo0;
1705           u32 sw_if_index0;
1706
1707           pi0 = to_next[0] = from[0];
1708           from += 1;
1709           n_left_from -= 1;
1710           to_next += 1;
1711           n_left_to_next -= 1;
1712
1713           next0 = IP_LOCAL_NEXT_DROP;
1714           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1715
1716           p0 = vlib_get_buffer (vm, pi0);
1717           ip0 = vlib_buffer_get_current (p0);
1718           vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1719           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1720
1721           /* Treat IP frag packets as "experimental" protocol for now
1722              until support of IP frag reassembly is implemented */
1723           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1724
1725           if (head_of_feature_arc == 0 || p0->flags & VNET_BUFFER_F_IS_NATED)
1726             goto skip_check;
1727
1728           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1729           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1730           good_tcp_udp0 =
1731             (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1732
1733           if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)))
1734             {
1735               ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1736                                      &good_tcp_udp0);
1737             }
1738
1739           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1740           error0 = (is_tcp_udp0 && !good_tcp_udp0
1741                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1742
1743           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1744           fib_index0 =
1745             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1746              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1747           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1748           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1749           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1750                                              2);
1751           leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1752                                              3);
1753           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1754           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1755           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1756
1757           lb0 = load_balance_get (lbi0);
1758           dpo0 = load_balance_get_bucket_i (lb0, 0);
1759
1760           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1761                      dpo0->dpoi_type == DPO_RECEIVE) ?
1762                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1763           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1764                      !fib_urpf_check_size (lb0->lb_urpf) &&
1765                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1766                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1767
1768         skip_check:
1769           next0 = lm->local_next_by_ip_protocol[proto0];
1770           next0 =
1771             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1772
1773           p0->error = error0 ? error_node->errors[error0] : 0;
1774
1775           if (head_of_feature_arc)
1776             {
1777               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1778                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1779             }
1780
1781           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1782                                            n_left_to_next, pi0, next0);
1783         }
1784       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1785     }
1786
1787   return frame->n_vectors;
1788 }
1789
1790 static uword
1791 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1792 {
1793   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1794 }
1795
1796 /* *INDENT-OFF* */
1797 VLIB_REGISTER_NODE (ip4_local_node) =
1798 {
1799   .function = ip4_local,
1800   .name = "ip4-local",
1801   .vector_size = sizeof (u32),
1802   .format_trace = format_ip4_forward_next_trace,
1803   .n_next_nodes = IP_LOCAL_N_NEXT,
1804   .next_nodes =
1805   {
1806     [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1807     [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1808     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1809     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1810   },
1811 };
1812 /* *INDENT-ON* */
1813
1814 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1815
1816 static uword
1817 ip4_local_end_of_arc (vlib_main_t * vm,
1818                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1819 {
1820   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1821 }
1822
1823 /* *INDENT-OFF* */
1824 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1825   .function = ip4_local_end_of_arc,
1826   .name = "ip4-local-end-of-arc",
1827   .vector_size = sizeof (u32),
1828
1829   .format_trace = format_ip4_forward_next_trace,
1830   .sibling_of = "ip4-local",
1831 };
1832
1833 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1834
1835 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1836   .arc_name = "ip4-local",
1837   .node_name = "ip4-local-end-of-arc",
1838   .runs_before = 0, /* not before any other features */
1839 };
1840 /* *INDENT-ON* */
1841
1842 void
1843 ip4_register_protocol (u32 protocol, u32 node_index)
1844 {
1845   vlib_main_t *vm = vlib_get_main ();
1846   ip4_main_t *im = &ip4_main;
1847   ip_lookup_main_t *lm = &im->lookup_main;
1848
1849   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1850   lm->local_next_by_ip_protocol[protocol] =
1851     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1852 }
1853
1854 static clib_error_t *
1855 show_ip_local_command_fn (vlib_main_t * vm,
1856                           unformat_input_t * input, vlib_cli_command_t * cmd)
1857 {
1858   ip4_main_t *im = &ip4_main;
1859   ip_lookup_main_t *lm = &im->lookup_main;
1860   int i;
1861
1862   vlib_cli_output (vm, "Protocols handled by ip4_local");
1863   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1864     {
1865       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1866         {
1867           u32 node_index = vlib_get_node (vm,
1868                                           ip4_local_node.index)->
1869             next_nodes[lm->local_next_by_ip_protocol[i]];
1870           vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1871                            node_index);
1872         }
1873     }
1874   return 0;
1875 }
1876
1877
1878
1879 /*?
1880  * Display the set of protocols handled by the local IPv4 stack.
1881  *
1882  * @cliexpar
1883  * Example of how to display local protocol table:
1884  * @cliexstart{show ip local}
1885  * Protocols handled by ip4_local
1886  * 1
1887  * 17
1888  * 47
1889  * @cliexend
1890 ?*/
1891 /* *INDENT-OFF* */
1892 VLIB_CLI_COMMAND (show_ip_local, static) =
1893 {
1894   .path = "show ip local",
1895   .function = show_ip_local_command_fn,
1896   .short_help = "show ip local",
1897 };
1898 /* *INDENT-ON* */
1899
1900 always_inline uword
1901 ip4_arp_inline (vlib_main_t * vm,
1902                 vlib_node_runtime_t * node,
1903                 vlib_frame_t * frame, int is_glean)
1904 {
1905   vnet_main_t *vnm = vnet_get_main ();
1906   ip4_main_t *im = &ip4_main;
1907   ip_lookup_main_t *lm = &im->lookup_main;
1908   u32 *from, *to_next_drop;
1909   uword n_left_from, n_left_to_next_drop, next_index;
1910   static f64 time_last_seed_change = -1e100;
1911   static u32 hash_seeds[3];
1912   static uword hash_bitmap[256 / BITS (uword)];
1913   f64 time_now;
1914
1915   if (node->flags & VLIB_NODE_FLAG_TRACE)
1916     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1917
1918   time_now = vlib_time_now (vm);
1919   if (time_now - time_last_seed_change > 1e-3)
1920     {
1921       uword i;
1922       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1923                                             sizeof (hash_seeds));
1924       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1925         hash_seeds[i] = r[i];
1926
1927       /* Mark all hash keys as been no-seen before. */
1928       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1929         hash_bitmap[i] = 0;
1930
1931       time_last_seed_change = time_now;
1932     }
1933
1934   from = vlib_frame_vector_args (frame);
1935   n_left_from = frame->n_vectors;
1936   next_index = node->cached_next_index;
1937   if (next_index == IP4_ARP_NEXT_DROP)
1938     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
1939
1940   while (n_left_from > 0)
1941     {
1942       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1943                            to_next_drop, n_left_to_next_drop);
1944
1945       while (n_left_from > 0 && n_left_to_next_drop > 0)
1946         {
1947           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1948           ip_adjacency_t *adj0;
1949           vlib_buffer_t *p0;
1950           ip4_header_t *ip0;
1951           uword bm0;
1952
1953           pi0 = from[0];
1954
1955           p0 = vlib_get_buffer (vm, pi0);
1956
1957           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1958           adj0 = adj_get (adj_index0);
1959           ip0 = vlib_buffer_get_current (p0);
1960
1961           a0 = hash_seeds[0];
1962           b0 = hash_seeds[1];
1963           c0 = hash_seeds[2];
1964
1965           sw_if_index0 = adj0->rewrite_header.sw_if_index;
1966           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1967
1968           if (is_glean)
1969             {
1970               /*
1971                * this is the Glean case, so we are ARPing for the
1972                * packet's destination
1973                */
1974               a0 ^= ip0->dst_address.data_u32;
1975             }
1976           else
1977             {
1978               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1979             }
1980           b0 ^= sw_if_index0;
1981
1982           hash_v3_mix32 (a0, b0, c0);
1983           hash_v3_finalize32 (a0, b0, c0);
1984
1985           c0 &= BITS (hash_bitmap) - 1;
1986           m0 = (uword) 1 << (c0 % BITS (uword));
1987           c0 = c0 / BITS (uword);
1988
1989           bm0 = hash_bitmap[c0];
1990           drop0 = (bm0 & m0) != 0;
1991
1992           /* Mark it as seen. */
1993           hash_bitmap[c0] = bm0 | m0;
1994
1995           from += 1;
1996           n_left_from -= 1;
1997           to_next_drop[0] = pi0;
1998           to_next_drop += 1;
1999           n_left_to_next_drop -= 1;
2000
2001           p0->error =
2002             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2003                          IP4_ARP_ERROR_REQUEST_SENT];
2004
2005           /*
2006            * the adj has been updated to a rewrite but the node the DPO that got
2007            * us here hasn't - yet. no big deal. we'll drop while we wait.
2008            */
2009           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2010             continue;
2011
2012           if (drop0)
2013             continue;
2014
2015           /*
2016            * Can happen if the control-plane is programming tables
2017            * with traffic flowing; at least that's today's lame excuse.
2018            */
2019           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2020               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2021             {
2022               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2023             }
2024           else
2025             /* Send ARP request. */
2026             {
2027               u32 bi0 = 0;
2028               vlib_buffer_t *b0;
2029               ethernet_arp_header_t *h0;
2030               vnet_hw_interface_t *hw_if0;
2031
2032               h0 =
2033                 vlib_packet_template_get_packet (vm,
2034                                                  &im->ip4_arp_request_packet_template,
2035                                                  &bi0);
2036
2037               /* Add rewrite/encap string for ARP packet. */
2038               vnet_rewrite_one_header (adj0[0], h0,
2039                                        sizeof (ethernet_header_t));
2040
2041               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2042
2043               /* Src ethernet address in ARP header. */
2044               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2045                            hw_if0->hw_address,
2046                            sizeof (h0->ip4_over_ethernet[0].ethernet));
2047
2048               if (is_glean)
2049                 {
2050                   /* The interface's source address is stashed in the Glean Adj */
2051                   h0->ip4_over_ethernet[0].ip4 =
2052                     adj0->sub_type.glean.receive_addr.ip4;
2053
2054                   /* Copy in destination address we are requesting. This is the
2055                    * glean case, so it's the packet's destination.*/
2056                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2057                     ip0->dst_address.data_u32;
2058                 }
2059               else
2060                 {
2061                   /* Src IP address in ARP header. */
2062                   if (ip4_src_address_for_packet (lm, sw_if_index0,
2063                                                   &h0->
2064                                                   ip4_over_ethernet[0].ip4))
2065                     {
2066                       /* No source address available */
2067                       p0->error =
2068                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2069                       vlib_buffer_free (vm, &bi0, 1);
2070                       continue;
2071                     }
2072
2073                   /* Copy in destination address we are requesting from the
2074                      incomplete adj */
2075                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2076                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
2077                 }
2078
2079               vlib_buffer_copy_trace_flag (vm, p0, bi0);
2080               b0 = vlib_get_buffer (vm, bi0);
2081               VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2082               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2083
2084               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2085
2086               vlib_set_next_frame_buffer (vm, node,
2087                                           adj0->rewrite_header.next_index,
2088                                           bi0);
2089             }
2090         }
2091
2092       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2093     }
2094
2095   return frame->n_vectors;
2096 }
2097
2098 static uword
2099 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2100 {
2101   return (ip4_arp_inline (vm, node, frame, 0));
2102 }
2103
2104 static uword
2105 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2106 {
2107   return (ip4_arp_inline (vm, node, frame, 1));
2108 }
2109
2110 static char *ip4_arp_error_strings[] = {
2111   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2112   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2113   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2114   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2115   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2116   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2117 };
2118
2119 VLIB_REGISTER_NODE (ip4_arp_node) =
2120 {
2121   .function = ip4_arp,.name = "ip4-arp",.vector_size =
2122     sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2123     ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2124     ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2125   {
2126   [IP4_ARP_NEXT_DROP] = "error-drop",}
2127 ,};
2128
2129 VLIB_REGISTER_NODE (ip4_glean_node) =
2130 {
2131   .function = ip4_glean,.name = "ip4-glean",.vector_size =
2132     sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2133     ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2134     ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2135   {
2136   [IP4_ARP_NEXT_DROP] = "error-drop",}
2137 ,};
2138
2139 #define foreach_notrace_ip4_arp_error           \
2140 _(DROP)                                         \
2141 _(REQUEST_SENT)                                 \
2142 _(REPLICATE_DROP)                               \
2143 _(REPLICATE_FAIL)
2144
2145 clib_error_t *
2146 arp_notrace_init (vlib_main_t * vm)
2147 {
2148   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2149
2150   /* don't trace ARP request packets */
2151 #define _(a)                                    \
2152     vnet_pcap_drop_trace_filter_add_del         \
2153         (rt->errors[IP4_ARP_ERROR_##a],         \
2154          1 /* is_add */);
2155   foreach_notrace_ip4_arp_error;
2156 #undef _
2157   return 0;
2158 }
2159
2160 VLIB_INIT_FUNCTION (arp_notrace_init);
2161
2162
2163 /* Send an ARP request to see if given destination is reachable on given interface. */
2164 clib_error_t *
2165 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2166 {
2167   vnet_main_t *vnm = vnet_get_main ();
2168   ip4_main_t *im = &ip4_main;
2169   ethernet_arp_header_t *h;
2170   ip4_address_t *src;
2171   ip_interface_address_t *ia;
2172   ip_adjacency_t *adj;
2173   vnet_hw_interface_t *hi;
2174   vnet_sw_interface_t *si;
2175   vlib_buffer_t *b;
2176   adj_index_t ai;
2177   u32 bi = 0;
2178
2179   si = vnet_get_sw_interface (vnm, sw_if_index);
2180
2181   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2182     {
2183       return clib_error_return (0, "%U: interface %U down",
2184                                 format_ip4_address, dst,
2185                                 format_vnet_sw_if_index_name, vnm,
2186                                 sw_if_index);
2187     }
2188
2189   src =
2190     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2191   if (!src)
2192     {
2193       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2194       return clib_error_return
2195         (0,
2196          "no matching interface address for destination %U (interface %U)",
2197          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2198          sw_if_index);
2199     }
2200
2201   h = vlib_packet_template_get_packet (vm,
2202                                        &im->ip4_arp_request_packet_template,
2203                                        &bi);
2204
2205   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2206   if (PREDICT_FALSE (!hi->hw_address))
2207     {
2208       return clib_error_return (0, "%U: interface %U do not support ip probe",
2209                                 format_ip4_address, dst,
2210                                 format_vnet_sw_if_index_name, vnm,
2211                                 sw_if_index);
2212     }
2213
2214   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2215                sizeof (h->ip4_over_ethernet[0].ethernet));
2216
2217   h->ip4_over_ethernet[0].ip4 = src[0];
2218   h->ip4_over_ethernet[1].ip4 = dst[0];
2219
2220   b = vlib_get_buffer (vm, bi);
2221   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2222     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2223
2224   ip46_address_t nh = {
2225     .ip4 = *dst,
2226   };
2227
2228   ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2229                             VNET_LINK_IP4, &nh, sw_if_index);
2230   adj = adj_get (ai);
2231
2232   /* Peer has been previously resolved, retrieve glean adj instead */
2233   if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2234     {
2235       adj_unlock (ai);
2236       ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, sw_if_index, &nh);
2237       adj = adj_get (ai);
2238     }
2239
2240   /* Add encapsulation string for software interface (e.g. ethernet header). */
2241   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2242   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2243
2244   {
2245     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2246     u32 *to_next = vlib_frame_vector_args (f);
2247     to_next[0] = bi;
2248     f->n_vectors = 1;
2249     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2250   }
2251
2252   adj_unlock (ai);
2253   return /* no error */ 0;
2254 }
2255
2256 typedef enum
2257 {
2258   IP4_REWRITE_NEXT_DROP,
2259   IP4_REWRITE_NEXT_ICMP_ERROR,
2260 } ip4_rewrite_next_t;
2261
2262 always_inline uword
2263 ip4_rewrite_inline (vlib_main_t * vm,
2264                     vlib_node_runtime_t * node,
2265                     vlib_frame_t * frame,
2266                     int do_counters, int is_midchain, int is_mcast)
2267 {
2268   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2269   u32 *from = vlib_frame_vector_args (frame);
2270   u32 n_left_from, n_left_to_next, *to_next, next_index;
2271   vlib_node_runtime_t *error_node =
2272     vlib_node_get_runtime (vm, ip4_input_node.index);
2273
2274   n_left_from = frame->n_vectors;
2275   next_index = node->cached_next_index;
2276   u32 thread_index = vlib_get_thread_index ();
2277
2278   while (n_left_from > 0)
2279     {
2280       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2281
2282       while (n_left_from >= 4 && n_left_to_next >= 2)
2283         {
2284           ip_adjacency_t *adj0, *adj1;
2285           vlib_buffer_t *p0, *p1;
2286           ip4_header_t *ip0, *ip1;
2287           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2288           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2289           u32 tx_sw_if_index0, tx_sw_if_index1;
2290
2291           /* Prefetch next iteration. */
2292           {
2293             vlib_buffer_t *p2, *p3;
2294
2295             p2 = vlib_get_buffer (vm, from[2]);
2296             p3 = vlib_get_buffer (vm, from[3]);
2297
2298             vlib_prefetch_buffer_header (p2, STORE);
2299             vlib_prefetch_buffer_header (p3, STORE);
2300
2301             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2302             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2303           }
2304
2305           pi0 = to_next[0] = from[0];
2306           pi1 = to_next[1] = from[1];
2307
2308           from += 2;
2309           n_left_from -= 2;
2310           to_next += 2;
2311           n_left_to_next -= 2;
2312
2313           p0 = vlib_get_buffer (vm, pi0);
2314           p1 = vlib_get_buffer (vm, pi1);
2315
2316           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2317           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2318
2319           /*
2320            * pre-fetch the per-adjacency counters
2321            */
2322           if (do_counters)
2323             {
2324               vlib_prefetch_combined_counter (&adjacency_counters,
2325                                               thread_index, adj_index0);
2326               vlib_prefetch_combined_counter (&adjacency_counters,
2327                                               thread_index, adj_index1);
2328             }
2329
2330           ip0 = vlib_buffer_get_current (p0);
2331           ip1 = vlib_buffer_get_current (p1);
2332
2333           error0 = error1 = IP4_ERROR_NONE;
2334           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2335
2336           /* Decrement TTL & update checksum.
2337              Works either endian, so no need for byte swap. */
2338           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2339             {
2340               i32 ttl0 = ip0->ttl;
2341
2342               /* Input node should have reject packets with ttl 0. */
2343               ASSERT (ip0->ttl > 0);
2344
2345               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2346               checksum0 += checksum0 >= 0xffff;
2347
2348               ip0->checksum = checksum0;
2349               ttl0 -= 1;
2350               ip0->ttl = ttl0;
2351
2352               /*
2353                * If the ttl drops below 1 when forwarding, generate
2354                * an ICMP response.
2355                */
2356               if (PREDICT_FALSE (ttl0 <= 0))
2357                 {
2358                   error0 = IP4_ERROR_TIME_EXPIRED;
2359                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2360                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2361                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2362                                                0);
2363                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2364                 }
2365
2366               /* Verify checksum. */
2367               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2368                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2369             }
2370           else
2371             {
2372               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2373             }
2374           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2375             {
2376               i32 ttl1 = ip1->ttl;
2377
2378               /* Input node should have reject packets with ttl 0. */
2379               ASSERT (ip1->ttl > 0);
2380
2381               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2382               checksum1 += checksum1 >= 0xffff;
2383
2384               ip1->checksum = checksum1;
2385               ttl1 -= 1;
2386               ip1->ttl = ttl1;
2387
2388               /*
2389                * If the ttl drops below 1 when forwarding, generate
2390                * an ICMP response.
2391                */
2392               if (PREDICT_FALSE (ttl1 <= 0))
2393                 {
2394                   error1 = IP4_ERROR_TIME_EXPIRED;
2395                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2396                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2397                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2398                                                0);
2399                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2400                 }
2401
2402               /* Verify checksum. */
2403               ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2404                       (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2405             }
2406           else
2407             {
2408               p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2409             }
2410
2411           /* Rewrite packet header and updates lengths. */
2412           adj0 = adj_get (adj_index0);
2413           adj1 = adj_get (adj_index1);
2414
2415           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2416           rw_len0 = adj0[0].rewrite_header.data_bytes;
2417           rw_len1 = adj1[0].rewrite_header.data_bytes;
2418           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2419           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2420
2421           /* Check MTU of outgoing interface. */
2422           error0 =
2423             (vlib_buffer_length_in_chain (vm, p0) >
2424              adj0[0].
2425              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2426              error0);
2427           error1 =
2428             (vlib_buffer_length_in_chain (vm, p1) >
2429              adj1[0].
2430              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2431              error1);
2432
2433           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2434            * to see the IP headerr */
2435           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2436             {
2437               next0 = adj0[0].rewrite_header.next_index;
2438               p0->current_data -= rw_len0;
2439               p0->current_length += rw_len0;
2440               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2441               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2442
2443               if (PREDICT_FALSE
2444                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2445                 vnet_feature_arc_start (lm->output_feature_arc_index,
2446                                         tx_sw_if_index0, &next0, p0);
2447             }
2448           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2449             {
2450               next1 = adj1[0].rewrite_header.next_index;
2451               p1->current_data -= rw_len1;
2452               p1->current_length += rw_len1;
2453
2454               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2455               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2456
2457               if (PREDICT_FALSE
2458                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2459                 vnet_feature_arc_start (lm->output_feature_arc_index,
2460                                         tx_sw_if_index1, &next1, p1);
2461             }
2462
2463           /* Guess we are only writing on simple Ethernet header. */
2464           vnet_rewrite_two_headers (adj0[0], adj1[0],
2465                                     ip0, ip1, sizeof (ethernet_header_t));
2466
2467           /*
2468            * Bump the per-adjacency counters
2469            */
2470           if (do_counters)
2471             {
2472               vlib_increment_combined_counter
2473                 (&adjacency_counters,
2474                  thread_index,
2475                  adj_index0, 1,
2476                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2477
2478               vlib_increment_combined_counter
2479                 (&adjacency_counters,
2480                  thread_index,
2481                  adj_index1, 1,
2482                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2483             }
2484
2485           if (is_midchain)
2486             {
2487               adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2488               adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2489             }
2490           if (is_mcast)
2491             {
2492               /*
2493                * copy bytes from the IP address into the MAC rewrite
2494                */
2495               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2496               vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2497             }
2498
2499           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2500                                            to_next, n_left_to_next,
2501                                            pi0, pi1, next0, next1);
2502         }
2503
2504       while (n_left_from > 0 && n_left_to_next > 0)
2505         {
2506           ip_adjacency_t *adj0;
2507           vlib_buffer_t *p0;
2508           ip4_header_t *ip0;
2509           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2510           u32 tx_sw_if_index0;
2511
2512           pi0 = to_next[0] = from[0];
2513
2514           p0 = vlib_get_buffer (vm, pi0);
2515
2516           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2517
2518           adj0 = adj_get (adj_index0);
2519
2520           ip0 = vlib_buffer_get_current (p0);
2521
2522           error0 = IP4_ERROR_NONE;
2523           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2524
2525           /* Decrement TTL & update checksum. */
2526           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2527             {
2528               i32 ttl0 = ip0->ttl;
2529
2530               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2531
2532               checksum0 += checksum0 >= 0xffff;
2533
2534               ip0->checksum = checksum0;
2535
2536               ASSERT (ip0->ttl > 0);
2537
2538               ttl0 -= 1;
2539
2540               ip0->ttl = ttl0;
2541
2542               ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2543                       (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2544
2545               if (PREDICT_FALSE (ttl0 <= 0))
2546                 {
2547                   /*
2548                    * If the ttl drops below 1 when forwarding, generate
2549                    * an ICMP response.
2550                    */
2551                   error0 = IP4_ERROR_TIME_EXPIRED;
2552                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2553                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2554                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2555                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2556                                                0);
2557                 }
2558             }
2559           else
2560             {
2561               p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2562             }
2563
2564           if (do_counters)
2565             vlib_prefetch_combined_counter (&adjacency_counters,
2566                                             thread_index, adj_index0);
2567
2568           /* Guess we are only writing on simple Ethernet header. */
2569           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2570           if (is_mcast)
2571             {
2572               /*
2573                * copy bytes from the IP address into the MAC rewrite
2574                */
2575               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2576             }
2577
2578           /* Update packet buffer attributes/set output interface. */
2579           rw_len0 = adj0[0].rewrite_header.data_bytes;
2580           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2581
2582           if (do_counters)
2583             vlib_increment_combined_counter
2584               (&adjacency_counters,
2585                thread_index, adj_index0, 1,
2586                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2587
2588           /* Check MTU of outgoing interface. */
2589           error0 = (vlib_buffer_length_in_chain (vm, p0)
2590                     > adj0[0].rewrite_header.max_l3_packet_bytes
2591                     ? IP4_ERROR_MTU_EXCEEDED : error0);
2592
2593           p0->error = error_node->errors[error0];
2594
2595           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2596            * to see the IP headerr */
2597           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2598             {
2599               p0->current_data -= rw_len0;
2600               p0->current_length += rw_len0;
2601               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2602
2603               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2604               next0 = adj0[0].rewrite_header.next_index;
2605
2606               if (is_midchain)
2607                 {
2608                   adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2609                 }
2610
2611               if (PREDICT_FALSE
2612                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2613                 vnet_feature_arc_start (lm->output_feature_arc_index,
2614                                         tx_sw_if_index0, &next0, p0);
2615
2616             }
2617
2618           from += 1;
2619           n_left_from -= 1;
2620           to_next += 1;
2621           n_left_to_next -= 1;
2622
2623           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2624                                            to_next, n_left_to_next,
2625                                            pi0, next0);
2626         }
2627
2628       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2629     }
2630
2631   /* Need to do trace after rewrites to pick up new packet data. */
2632   if (node->flags & VLIB_NODE_FLAG_TRACE)
2633     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2634
2635   return frame->n_vectors;
2636 }
2637
2638
2639 /** @brief IPv4 rewrite node.
2640     @node ip4-rewrite
2641
2642     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2643     header checksum, fetch the ip adjacency, check the outbound mtu,
2644     apply the adjacency rewrite, and send pkts to the adjacency
2645     rewrite header's rewrite_next_index.
2646
2647     @param vm vlib_main_t corresponding to the current thread
2648     @param node vlib_node_runtime_t
2649     @param frame vlib_frame_t whose contents should be dispatched
2650
2651     @par Graph mechanics: buffer metadata, next index usage
2652
2653     @em Uses:
2654     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2655         - the rewrite adjacency index
2656     - <code>adj->lookup_next_index</code>
2657         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2658           the packet will be dropped.
2659     - <code>adj->rewrite_header</code>
2660         - Rewrite string length, rewrite string, next_index
2661
2662     @em Sets:
2663     - <code>b->current_data, b->current_length</code>
2664         - Updated net of applying the rewrite string
2665
2666     <em>Next Indices:</em>
2667     - <code> adj->rewrite_header.next_index </code>
2668       or @c error-drop
2669 */
2670 static uword
2671 ip4_rewrite (vlib_main_t * vm,
2672              vlib_node_runtime_t * node, vlib_frame_t * frame)
2673 {
2674   if (adj_are_counters_enabled ())
2675     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2676   else
2677     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2678 }
2679
2680 static uword
2681 ip4_midchain (vlib_main_t * vm,
2682               vlib_node_runtime_t * node, vlib_frame_t * frame)
2683 {
2684   if (adj_are_counters_enabled ())
2685     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2686   else
2687     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2688 }
2689
2690 static uword
2691 ip4_rewrite_mcast (vlib_main_t * vm,
2692                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2693 {
2694   if (adj_are_counters_enabled ())
2695     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2696   else
2697     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2698 }
2699
2700 static uword
2701 ip4_mcast_midchain (vlib_main_t * vm,
2702                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2703 {
2704   if (adj_are_counters_enabled ())
2705     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2706   else
2707     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2708 }
2709
2710 /* *INDENT-OFF* */
2711 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2712   .function = ip4_rewrite,
2713   .name = "ip4-rewrite",
2714   .vector_size = sizeof (u32),
2715
2716   .format_trace = format_ip4_rewrite_trace,
2717
2718   .n_next_nodes = 2,
2719   .next_nodes = {
2720     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2721     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2722   },
2723 };
2724 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2725
2726 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2727   .function = ip4_rewrite_mcast,
2728   .name = "ip4-rewrite-mcast",
2729   .vector_size = sizeof (u32),
2730
2731   .format_trace = format_ip4_rewrite_trace,
2732   .sibling_of = "ip4-rewrite",
2733 };
2734 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2735
2736 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2737   .function = ip4_mcast_midchain,
2738   .name = "ip4-mcast-midchain",
2739   .vector_size = sizeof (u32),
2740
2741   .format_trace = format_ip4_rewrite_trace,
2742   .sibling_of = "ip4-rewrite",
2743 };
2744 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2745
2746 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2747   .function = ip4_midchain,
2748   .name = "ip4-midchain",
2749   .vector_size = sizeof (u32),
2750   .format_trace = format_ip4_forward_next_trace,
2751   .sibling_of =  "ip4-rewrite",
2752 };
2753 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2754 /* *INDENT-ON */
2755
2756 int
2757 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2758 {
2759   ip4_fib_mtrie_t *mtrie0;
2760   ip4_fib_mtrie_leaf_t leaf0;
2761   u32 lbi0;
2762
2763   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2764
2765   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2766   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2767   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2768
2769   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2770
2771   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2772 }
2773
2774 static clib_error_t *
2775 test_lookup_command_fn (vlib_main_t * vm,
2776                         unformat_input_t * input, vlib_cli_command_t * cmd)
2777 {
2778   ip4_fib_t *fib;
2779   u32 table_id = 0;
2780   f64 count = 1;
2781   u32 n;
2782   int i;
2783   ip4_address_t ip4_base_address;
2784   u64 errors = 0;
2785
2786   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2787     {
2788       if (unformat (input, "table %d", &table_id))
2789         {
2790           /* Make sure the entry exists. */
2791           fib = ip4_fib_get (table_id);
2792           if ((fib) && (fib->index != table_id))
2793             return clib_error_return (0, "<fib-index> %d does not exist",
2794                                       table_id);
2795         }
2796       else if (unformat (input, "count %f", &count))
2797         ;
2798
2799       else if (unformat (input, "%U",
2800                          unformat_ip4_address, &ip4_base_address))
2801         ;
2802       else
2803         return clib_error_return (0, "unknown input `%U'",
2804                                   format_unformat_error, input);
2805     }
2806
2807   n = count;
2808
2809   for (i = 0; i < n; i++)
2810     {
2811       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2812         errors++;
2813
2814       ip4_base_address.as_u32 =
2815         clib_host_to_net_u32 (1 +
2816                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2817     }
2818
2819   if (errors)
2820     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2821   else
2822     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2823
2824   return 0;
2825 }
2826
2827 /*?
2828  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2829  * given FIB table to determine if there is a conflict with the
2830  * adjacency table. The fib-id can be determined by using the
2831  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2832  * of 0 is used.
2833  *
2834  * @todo This command uses fib-id, other commands use table-id (not
2835  * just a name, they are different indexes). Would like to change this
2836  * to table-id for consistency.
2837  *
2838  * @cliexpar
2839  * Example of how to run the test lookup command:
2840  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2841  * No errors in 2 lookups
2842  * @cliexend
2843 ?*/
2844 /* *INDENT-OFF* */
2845 VLIB_CLI_COMMAND (lookup_test_command, static) =
2846 {
2847   .path = "test lookup",
2848   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2849   .function = test_lookup_command_fn,
2850 };
2851 /* *INDENT-ON* */
2852
2853 int
2854 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2855 {
2856   u32 fib_index;
2857
2858   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2859
2860   if (~0 == fib_index)
2861     return VNET_API_ERROR_NO_SUCH_FIB;
2862
2863   fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2864                                   flow_hash_config);
2865
2866   return 0;
2867 }
2868
2869 static clib_error_t *
2870 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2871                              unformat_input_t * input,
2872                              vlib_cli_command_t * cmd)
2873 {
2874   int matched = 0;
2875   u32 table_id = 0;
2876   u32 flow_hash_config = 0;
2877   int rv;
2878
2879   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2880     {
2881       if (unformat (input, "table %d", &table_id))
2882         matched = 1;
2883 #define _(a,v) \
2884     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2885       foreach_flow_hash_bit
2886 #undef _
2887         else
2888         break;
2889     }
2890
2891   if (matched == 0)
2892     return clib_error_return (0, "unknown input `%U'",
2893                               format_unformat_error, input);
2894
2895   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2896   switch (rv)
2897     {
2898     case 0:
2899       break;
2900
2901     case VNET_API_ERROR_NO_SUCH_FIB:
2902       return clib_error_return (0, "no such FIB table %d", table_id);
2903
2904     default:
2905       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2906       break;
2907     }
2908
2909   return 0;
2910 }
2911
2912 /*?
2913  * Configure the set of IPv4 fields used by the flow hash.
2914  *
2915  * @cliexpar
2916  * Example of how to set the flow hash on a given table:
2917  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2918  * Example of display the configured flow hash:
2919  * @cliexstart{show ip fib}
2920  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2921  * 0.0.0.0/0
2922  *   unicast-ip4-chain
2923  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2924  *     [0] [@0]: dpo-drop ip6
2925  * 0.0.0.0/32
2926  *   unicast-ip4-chain
2927  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2928  *     [0] [@0]: dpo-drop ip6
2929  * 224.0.0.0/8
2930  *   unicast-ip4-chain
2931  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2932  *     [0] [@0]: dpo-drop ip6
2933  * 6.0.1.2/32
2934  *   unicast-ip4-chain
2935  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2936  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2937  * 7.0.0.1/32
2938  *   unicast-ip4-chain
2939  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
2940  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2941  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2942  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
2943  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
2944  * 240.0.0.0/8
2945  *   unicast-ip4-chain
2946  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
2947  *     [0] [@0]: dpo-drop ip6
2948  * 255.255.255.255/32
2949  *   unicast-ip4-chain
2950  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
2951  *     [0] [@0]: dpo-drop ip6
2952  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
2953  * 0.0.0.0/0
2954  *   unicast-ip4-chain
2955  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
2956  *     [0] [@0]: dpo-drop ip6
2957  * 0.0.0.0/32
2958  *   unicast-ip4-chain
2959  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
2960  *     [0] [@0]: dpo-drop ip6
2961  * 172.16.1.0/24
2962  *   unicast-ip4-chain
2963  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
2964  *     [0] [@4]: ipv4-glean: af_packet0
2965  * 172.16.1.1/32
2966  *   unicast-ip4-chain
2967  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
2968  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
2969  * 172.16.1.2/32
2970  *   unicast-ip4-chain
2971  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
2972  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
2973  * 172.16.2.0/24
2974  *   unicast-ip4-chain
2975  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
2976  *     [0] [@4]: ipv4-glean: af_packet1
2977  * 172.16.2.1/32
2978  *   unicast-ip4-chain
2979  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
2980  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
2981  * 224.0.0.0/8
2982  *   unicast-ip4-chain
2983  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
2984  *     [0] [@0]: dpo-drop ip6
2985  * 240.0.0.0/8
2986  *   unicast-ip4-chain
2987  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
2988  *     [0] [@0]: dpo-drop ip6
2989  * 255.255.255.255/32
2990  *   unicast-ip4-chain
2991  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
2992  *     [0] [@0]: dpo-drop ip6
2993  * @cliexend
2994 ?*/
2995 /* *INDENT-OFF* */
2996 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
2997 {
2998   .path = "set ip flow-hash",
2999   .short_help =
3000   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3001   .function = set_ip_flow_hash_command_fn,
3002 };
3003 /* *INDENT-ON* */
3004
3005 int
3006 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3007                              u32 table_index)
3008 {
3009   vnet_main_t *vnm = vnet_get_main ();
3010   vnet_interface_main_t *im = &vnm->interface_main;
3011   ip4_main_t *ipm = &ip4_main;
3012   ip_lookup_main_t *lm = &ipm->lookup_main;
3013   vnet_classify_main_t *cm = &vnet_classify_main;
3014   ip4_address_t *if_addr;
3015
3016   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3017     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3018
3019   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3020     return VNET_API_ERROR_NO_SUCH_ENTRY;
3021
3022   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3023   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3024
3025   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3026
3027   if (NULL != if_addr)
3028     {
3029       fib_prefix_t pfx = {
3030         .fp_len = 32,
3031         .fp_proto = FIB_PROTOCOL_IP4,
3032         .fp_addr.ip4 = *if_addr,
3033       };
3034       u32 fib_index;
3035
3036       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3037                                                        sw_if_index);
3038
3039
3040       if (table_index != (u32) ~ 0)
3041         {
3042           dpo_id_t dpo = DPO_INVALID;
3043
3044           dpo_set (&dpo,
3045                    DPO_CLASSIFY,
3046                    DPO_PROTO_IP4,
3047                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3048
3049           fib_table_entry_special_dpo_add (fib_index,
3050                                            &pfx,
3051                                            FIB_SOURCE_CLASSIFY,
3052                                            FIB_ENTRY_FLAG_NONE, &dpo);
3053           dpo_reset (&dpo);
3054         }
3055       else
3056         {
3057           fib_table_entry_special_remove (fib_index,
3058                                           &pfx, FIB_SOURCE_CLASSIFY);
3059         }
3060     }
3061
3062   return 0;
3063 }
3064
3065 static clib_error_t *
3066 set_ip_classify_command_fn (vlib_main_t * vm,
3067                             unformat_input_t * input,
3068                             vlib_cli_command_t * cmd)
3069 {
3070   u32 table_index = ~0;
3071   int table_index_set = 0;
3072   u32 sw_if_index = ~0;
3073   int rv;
3074
3075   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3076     {
3077       if (unformat (input, "table-index %d", &table_index))
3078         table_index_set = 1;
3079       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3080                          vnet_get_main (), &sw_if_index))
3081         ;
3082       else
3083         break;
3084     }
3085
3086   if (table_index_set == 0)
3087     return clib_error_return (0, "classify table-index must be specified");
3088
3089   if (sw_if_index == ~0)
3090     return clib_error_return (0, "interface / subif must be specified");
3091
3092   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3093
3094   switch (rv)
3095     {
3096     case 0:
3097       break;
3098
3099     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3100       return clib_error_return (0, "No such interface");
3101
3102     case VNET_API_ERROR_NO_SUCH_ENTRY:
3103       return clib_error_return (0, "No such classifier table");
3104     }
3105   return 0;
3106 }
3107
3108 /*?
3109  * Assign a classification table to an interface. The classification
3110  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3111  * commands. Once the table is create, use this command to filter packets
3112  * on an interface.
3113  *
3114  * @cliexpar
3115  * Example of how to assign a classification table to an interface:
3116  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3117 ?*/
3118 /* *INDENT-OFF* */
3119 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3120 {
3121     .path = "set ip classify",
3122     .short_help =
3123     "set ip classify intfc <interface> table-index <classify-idx>",
3124     .function = set_ip_classify_command_fn,
3125 };
3126 /* *INDENT-ON* */
3127
3128 /*
3129  * fd.io coding-style-patch-verification: ON
3130  *
3131  * Local Variables:
3132  * eval: (c-set-style "gnu")
3133  * End:
3134  */