Remove unsed parameter from fib_table_entry_special_add() (only used in FIB tests...
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
54
55 /**
56  * @file
57  * @brief IPv4 Forwarding.
58  *
59  * This file contains the source code for IPv4 forwarding.
60  */
61
62 void
63 ip4_forward_next_trace (vlib_main_t * vm,
64                         vlib_node_runtime_t * node,
65                         vlib_frame_t * frame,
66                         vlib_rx_or_tx_t which_adj_index);
67
68 always_inline uword
69 ip4_lookup_inline (vlib_main_t * vm,
70                    vlib_node_runtime_t * node,
71                    vlib_frame_t * frame,
72                    int lookup_for_responses_to_locally_received_packets)
73 {
74   ip4_main_t *im = &ip4_main;
75   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
76   u32 n_left_from, n_left_to_next, *from, *to_next;
77   ip_lookup_next_t next;
78   u32 thread_index = vlib_get_thread_index ();
79
80   from = vlib_frame_vector_args (frame);
81   n_left_from = frame->n_vectors;
82   next = node->cached_next_index;
83
84   while (n_left_from > 0)
85     {
86       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
87
88       while (n_left_from >= 8 && n_left_to_next >= 4)
89         {
90           vlib_buffer_t *p0, *p1, *p2, *p3;
91           ip4_header_t *ip0, *ip1, *ip2, *ip3;
92           __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
93           ip_lookup_next_t next0, next1, next2, next3;
94           const load_balance_t *lb0, *lb1, *lb2, *lb3;
95           ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
96           ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97           ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98           u32 pi0, fib_index0, lb_index0;
99           u32 pi1, fib_index1, lb_index1;
100           u32 pi2, fib_index2, lb_index2;
101           u32 pi3, fib_index3, lb_index3;
102           flow_hash_config_t flow_hash_config0, flow_hash_config1;
103           flow_hash_config_t flow_hash_config2, flow_hash_config3;
104           u32 hash_c0, hash_c1, hash_c2, hash_c3;
105           const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
106
107           /* Prefetch next iteration. */
108           {
109             vlib_buffer_t *p4, *p5, *p6, *p7;
110
111             p4 = vlib_get_buffer (vm, from[4]);
112             p5 = vlib_get_buffer (vm, from[5]);
113             p6 = vlib_get_buffer (vm, from[6]);
114             p7 = vlib_get_buffer (vm, from[7]);
115
116             vlib_prefetch_buffer_header (p4, LOAD);
117             vlib_prefetch_buffer_header (p5, LOAD);
118             vlib_prefetch_buffer_header (p6, LOAD);
119             vlib_prefetch_buffer_header (p7, LOAD);
120
121             CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
122             CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
123             CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
124             CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
125           }
126
127           pi0 = to_next[0] = from[0];
128           pi1 = to_next[1] = from[1];
129           pi2 = to_next[2] = from[2];
130           pi3 = to_next[3] = from[3];
131
132           from += 4;
133           to_next += 4;
134           n_left_to_next -= 4;
135           n_left_from -= 4;
136
137           p0 = vlib_get_buffer (vm, pi0);
138           p1 = vlib_get_buffer (vm, pi1);
139           p2 = vlib_get_buffer (vm, pi2);
140           p3 = vlib_get_buffer (vm, pi3);
141
142           ip0 = vlib_buffer_get_current (p0);
143           ip1 = vlib_buffer_get_current (p1);
144           ip2 = vlib_buffer_get_current (p2);
145           ip3 = vlib_buffer_get_current (p3);
146
147           dst_addr0 = &ip0->dst_address;
148           dst_addr1 = &ip1->dst_address;
149           dst_addr2 = &ip2->dst_address;
150           dst_addr3 = &ip3->dst_address;
151
152           fib_index0 =
153             vec_elt (im->fib_index_by_sw_if_index,
154                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
155           fib_index1 =
156             vec_elt (im->fib_index_by_sw_if_index,
157                      vnet_buffer (p1)->sw_if_index[VLIB_RX]);
158           fib_index2 =
159             vec_elt (im->fib_index_by_sw_if_index,
160                      vnet_buffer (p2)->sw_if_index[VLIB_RX]);
161           fib_index3 =
162             vec_elt (im->fib_index_by_sw_if_index,
163                      vnet_buffer (p3)->sw_if_index[VLIB_RX]);
164           fib_index0 =
165             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
166              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
167           fib_index1 =
168             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
169              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
170           fib_index2 =
171             (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
172              (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
173           fib_index3 =
174             (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
175              (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
176
177
178           if (!lookup_for_responses_to_locally_received_packets)
179             {
180               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
181               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
182               mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
183               mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
184
185               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
186               leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
187               leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
188               leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
189             }
190
191           tcp0 = (void *) (ip0 + 1);
192           tcp1 = (void *) (ip1 + 1);
193           tcp2 = (void *) (ip2 + 1);
194           tcp3 = (void *) (ip3 + 1);
195
196           if (!lookup_for_responses_to_locally_received_packets)
197             {
198               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
199               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
200               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
201               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
202             }
203
204           if (!lookup_for_responses_to_locally_received_packets)
205             {
206               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
207               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
208               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
209               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
210             }
211
212           if (lookup_for_responses_to_locally_received_packets)
213             {
214               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
215               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
216               lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
217               lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
218             }
219           else
220             {
221               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
222               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
223               lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
224               lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
225             }
226
227           ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
228           lb0 = load_balance_get (lb_index0);
229           lb1 = load_balance_get (lb_index1);
230           lb2 = load_balance_get (lb_index2);
231           lb3 = load_balance_get (lb_index3);
232
233           /* Use flow hash to compute multipath adjacency. */
234           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
235           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
236           hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
237           hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
238           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
239             {
240               flow_hash_config0 = lb0->lb_hash_config;
241               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
242                 ip4_compute_flow_hash (ip0, flow_hash_config0);
243             }
244           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
245             {
246               flow_hash_config1 = lb1->lb_hash_config;
247               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
248                 ip4_compute_flow_hash (ip1, flow_hash_config1);
249             }
250           if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
251             {
252               flow_hash_config2 = lb2->lb_hash_config;
253               hash_c2 = vnet_buffer (p2)->ip.flow_hash =
254                 ip4_compute_flow_hash (ip2, flow_hash_config2);
255             }
256           if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
257             {
258               flow_hash_config3 = lb3->lb_hash_config;
259               hash_c3 = vnet_buffer (p3)->ip.flow_hash =
260                 ip4_compute_flow_hash (ip3, flow_hash_config3);
261             }
262
263           ASSERT (lb0->lb_n_buckets > 0);
264           ASSERT (is_pow2 (lb0->lb_n_buckets));
265           ASSERT (lb1->lb_n_buckets > 0);
266           ASSERT (is_pow2 (lb1->lb_n_buckets));
267           ASSERT (lb2->lb_n_buckets > 0);
268           ASSERT (is_pow2 (lb2->lb_n_buckets));
269           ASSERT (lb3->lb_n_buckets > 0);
270           ASSERT (is_pow2 (lb3->lb_n_buckets));
271
272           dpo0 = load_balance_get_bucket_i (lb0,
273                                             (hash_c0 &
274                                              (lb0->lb_n_buckets_minus_1)));
275           dpo1 = load_balance_get_bucket_i (lb1,
276                                             (hash_c1 &
277                                              (lb1->lb_n_buckets_minus_1)));
278           dpo2 = load_balance_get_bucket_i (lb2,
279                                             (hash_c2 &
280                                              (lb2->lb_n_buckets_minus_1)));
281           dpo3 = load_balance_get_bucket_i (lb3,
282                                             (hash_c3 &
283                                              (lb3->lb_n_buckets_minus_1)));
284
285           next0 = dpo0->dpoi_next_node;
286           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
287           next1 = dpo1->dpoi_next_node;
288           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
289           next2 = dpo2->dpoi_next_node;
290           vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
291           next3 = dpo3->dpoi_next_node;
292           vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
293
294           vlib_increment_combined_counter
295             (cm, thread_index, lb_index0, 1,
296              vlib_buffer_length_in_chain (vm, p0)
297              + sizeof (ethernet_header_t));
298           vlib_increment_combined_counter
299             (cm, thread_index, lb_index1, 1,
300              vlib_buffer_length_in_chain (vm, p1)
301              + sizeof (ethernet_header_t));
302           vlib_increment_combined_counter
303             (cm, thread_index, lb_index2, 1,
304              vlib_buffer_length_in_chain (vm, p2)
305              + sizeof (ethernet_header_t));
306           vlib_increment_combined_counter
307             (cm, thread_index, lb_index3, 1,
308              vlib_buffer_length_in_chain (vm, p3)
309              + sizeof (ethernet_header_t));
310
311           vlib_validate_buffer_enqueue_x4 (vm, node, next,
312                                            to_next, n_left_to_next,
313                                            pi0, pi1, pi2, pi3,
314                                            next0, next1, next2, next3);
315         }
316
317       while (n_left_from > 0 && n_left_to_next > 0)
318         {
319           vlib_buffer_t *p0;
320           ip4_header_t *ip0;
321           __attribute__ ((unused)) tcp_header_t *tcp0;
322           ip_lookup_next_t next0;
323           const load_balance_t *lb0;
324           ip4_fib_mtrie_t *mtrie0;
325           ip4_fib_mtrie_leaf_t leaf0;
326           ip4_address_t *dst_addr0;
327           u32 pi0, fib_index0, lbi0;
328           flow_hash_config_t flow_hash_config0;
329           const dpo_id_t *dpo0;
330           u32 hash_c0;
331
332           pi0 = from[0];
333           to_next[0] = pi0;
334
335           p0 = vlib_get_buffer (vm, pi0);
336
337           ip0 = vlib_buffer_get_current (p0);
338
339           dst_addr0 = &ip0->dst_address;
340
341           fib_index0 =
342             vec_elt (im->fib_index_by_sw_if_index,
343                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
344           fib_index0 =
345             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
346              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
347
348           if (!lookup_for_responses_to_locally_received_packets)
349             {
350               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
351
352               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
353             }
354
355           tcp0 = (void *) (ip0 + 1);
356
357           if (!lookup_for_responses_to_locally_received_packets)
358             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
359
360           if (!lookup_for_responses_to_locally_received_packets)
361             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
362
363           if (lookup_for_responses_to_locally_received_packets)
364             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
365           else
366             {
367               /* Handle default route. */
368               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
369             }
370
371           ASSERT (lbi0);
372           lb0 = load_balance_get (lbi0);
373
374           /* Use flow hash to compute multipath adjacency. */
375           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
376           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
377             {
378               flow_hash_config0 = lb0->lb_hash_config;
379
380               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
381                 ip4_compute_flow_hash (ip0, flow_hash_config0);
382             }
383
384           ASSERT (lb0->lb_n_buckets > 0);
385           ASSERT (is_pow2 (lb0->lb_n_buckets));
386
387           dpo0 = load_balance_get_bucket_i (lb0,
388                                             (hash_c0 &
389                                              (lb0->lb_n_buckets_minus_1)));
390
391           next0 = dpo0->dpoi_next_node;
392           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
393
394           vlib_increment_combined_counter
395             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
396
397           from += 1;
398           to_next += 1;
399           n_left_to_next -= 1;
400           n_left_from -= 1;
401
402           if (PREDICT_FALSE (next0 != next))
403             {
404               n_left_to_next += 1;
405               vlib_put_next_frame (vm, node, next, n_left_to_next);
406               next = next0;
407               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
408               to_next[0] = pi0;
409               to_next += 1;
410               n_left_to_next -= 1;
411             }
412         }
413
414       vlib_put_next_frame (vm, node, next, n_left_to_next);
415     }
416
417   if (node->flags & VLIB_NODE_FLAG_TRACE)
418     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
419
420   return frame->n_vectors;
421 }
422
423 /** @brief IPv4 lookup node.
424     @node ip4-lookup
425
426     This is the main IPv4 lookup dispatch node.
427
428     @param vm vlib_main_t corresponding to the current thread
429     @param node vlib_node_runtime_t
430     @param frame vlib_frame_t whose contents should be dispatched
431
432     @par Graph mechanics: buffer metadata, next index usage
433
434     @em Uses:
435     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
436         - Indicates the @c sw_if_index value of the interface that the
437           packet was received on.
438     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
439         - When the value is @c ~0 then the node performs a longest prefix
440           match (LPM) for the packet destination address in the FIB attached
441           to the receive interface.
442         - Otherwise perform LPM for the packet destination address in the
443           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
444           value (0, 1, ...) and not a VRF id.
445
446     @em Sets:
447     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
448         - The lookup result adjacency index.
449
450     <em>Next Index:</em>
451     - Dispatches the packet to the node index found in
452       ip_adjacency_t @c adj->lookup_next_index
453       (where @c adj is the lookup result adjacency).
454 */
455 static uword
456 ip4_lookup (vlib_main_t * vm,
457             vlib_node_runtime_t * node, vlib_frame_t * frame)
458 {
459   return ip4_lookup_inline (vm, node, frame,
460                             /* lookup_for_responses_to_locally_received_packets */
461                             0);
462
463 }
464
465 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
466
467 VLIB_REGISTER_NODE (ip4_lookup_node) =
468 {
469 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
470     sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
471     IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
472
473 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
474
475 always_inline uword
476 ip4_load_balance (vlib_main_t * vm,
477                   vlib_node_runtime_t * node, vlib_frame_t * frame)
478 {
479   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
480   u32 n_left_from, n_left_to_next, *from, *to_next;
481   ip_lookup_next_t next;
482   u32 thread_index = vlib_get_thread_index ();
483
484   from = vlib_frame_vector_args (frame);
485   n_left_from = frame->n_vectors;
486   next = node->cached_next_index;
487
488   if (node->flags & VLIB_NODE_FLAG_TRACE)
489     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
490
491   while (n_left_from > 0)
492     {
493       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
494
495
496       while (n_left_from >= 4 && n_left_to_next >= 2)
497         {
498           ip_lookup_next_t next0, next1;
499           const load_balance_t *lb0, *lb1;
500           vlib_buffer_t *p0, *p1;
501           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
502           const ip4_header_t *ip0, *ip1;
503           const dpo_id_t *dpo0, *dpo1;
504
505           /* Prefetch next iteration. */
506           {
507             vlib_buffer_t *p2, *p3;
508
509             p2 = vlib_get_buffer (vm, from[2]);
510             p3 = vlib_get_buffer (vm, from[3]);
511
512             vlib_prefetch_buffer_header (p2, STORE);
513             vlib_prefetch_buffer_header (p3, STORE);
514
515             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
516             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
517           }
518
519           pi0 = to_next[0] = from[0];
520           pi1 = to_next[1] = from[1];
521
522           from += 2;
523           n_left_from -= 2;
524           to_next += 2;
525           n_left_to_next -= 2;
526
527           p0 = vlib_get_buffer (vm, pi0);
528           p1 = vlib_get_buffer (vm, pi1);
529
530           ip0 = vlib_buffer_get_current (p0);
531           ip1 = vlib_buffer_get_current (p1);
532           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
533           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
534
535           lb0 = load_balance_get (lbi0);
536           lb1 = load_balance_get (lbi1);
537
538           /*
539            * this node is for via FIBs we can re-use the hash value from the
540            * to node if present.
541            * We don't want to use the same hash value at each level in the recursion
542            * graph as that would lead to polarisation
543            */
544           hc0 = hc1 = 0;
545
546           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
547             {
548               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
549                 {
550                   hc0 = vnet_buffer (p0)->ip.flow_hash =
551                     vnet_buffer (p0)->ip.flow_hash >> 1;
552                 }
553               else
554                 {
555                   hc0 = vnet_buffer (p0)->ip.flow_hash =
556                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
557                 }
558             }
559           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
560             {
561               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
562                 {
563                   hc1 = vnet_buffer (p1)->ip.flow_hash =
564                     vnet_buffer (p1)->ip.flow_hash >> 1;
565                 }
566               else
567                 {
568                   hc1 = vnet_buffer (p1)->ip.flow_hash =
569                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
570                 }
571             }
572
573           dpo0 =
574             load_balance_get_bucket_i (lb0,
575                                        hc0 & (lb0->lb_n_buckets_minus_1));
576           dpo1 =
577             load_balance_get_bucket_i (lb1,
578                                        hc1 & (lb1->lb_n_buckets_minus_1));
579
580           next0 = dpo0->dpoi_next_node;
581           next1 = dpo1->dpoi_next_node;
582
583           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
584           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
585
586           vlib_increment_combined_counter
587             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
588           vlib_increment_combined_counter
589             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
590
591           vlib_validate_buffer_enqueue_x2 (vm, node, next,
592                                            to_next, n_left_to_next,
593                                            pi0, pi1, next0, next1);
594         }
595
596       while (n_left_from > 0 && n_left_to_next > 0)
597         {
598           ip_lookup_next_t next0;
599           const load_balance_t *lb0;
600           vlib_buffer_t *p0;
601           u32 pi0, lbi0, hc0;
602           const ip4_header_t *ip0;
603           const dpo_id_t *dpo0;
604
605           pi0 = from[0];
606           to_next[0] = pi0;
607           from += 1;
608           to_next += 1;
609           n_left_to_next -= 1;
610           n_left_from -= 1;
611
612           p0 = vlib_get_buffer (vm, pi0);
613
614           ip0 = vlib_buffer_get_current (p0);
615           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
616
617           lb0 = load_balance_get (lbi0);
618
619           hc0 = 0;
620           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
621             {
622               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
623                 {
624                   hc0 = vnet_buffer (p0)->ip.flow_hash =
625                     vnet_buffer (p0)->ip.flow_hash >> 1;
626                 }
627               else
628                 {
629                   hc0 = vnet_buffer (p0)->ip.flow_hash =
630                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
631                 }
632             }
633
634           dpo0 =
635             load_balance_get_bucket_i (lb0,
636                                        hc0 & (lb0->lb_n_buckets_minus_1));
637
638           next0 = dpo0->dpoi_next_node;
639           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
640
641           vlib_increment_combined_counter
642             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
643
644           vlib_validate_buffer_enqueue_x1 (vm, node, next,
645                                            to_next, n_left_to_next,
646                                            pi0, next0);
647         }
648
649       vlib_put_next_frame (vm, node, next, n_left_to_next);
650     }
651
652   return frame->n_vectors;
653 }
654
655 VLIB_REGISTER_NODE (ip4_load_balance_node) =
656 {
657 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
658     sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
659     format_ip4_lookup_trace,};
660
661 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
662
663 /* get first interface address */
664 ip4_address_t *
665 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
666                              ip_interface_address_t ** result_ia)
667 {
668   ip_lookup_main_t *lm = &im->lookup_main;
669   ip_interface_address_t *ia = 0;
670   ip4_address_t *result = 0;
671
672   /* *INDENT-OFF* */
673   foreach_ip_interface_address
674     (lm, ia, sw_if_index,
675      1 /* honor unnumbered */ ,
676      ({
677        ip4_address_t * a =
678          ip_interface_address_get_address (lm, ia);
679        result = a;
680        break;
681      }));
682   /* *INDENT-OFF* */
683   if (result_ia)
684     *result_ia = result ? ia : 0;
685   return result;
686 }
687
688 static void
689 ip4_add_interface_routes (u32 sw_if_index,
690                           ip4_main_t * im, u32 fib_index,
691                           ip_interface_address_t * a)
692 {
693   ip_lookup_main_t *lm = &im->lookup_main;
694   ip4_address_t *address = ip_interface_address_get_address (lm, a);
695   fib_prefix_t pfx = {
696     .fp_len = a->address_length,
697     .fp_proto = FIB_PROTOCOL_IP4,
698     .fp_addr.ip4 = *address,
699   };
700
701   a->neighbor_probe_adj_index = ~0;
702
703   if (pfx.fp_len <= 30)
704     {
705       /* a /30 or shorter - add a glean for the network address */
706       fib_node_index_t fei;
707
708       fei = fib_table_entry_update_one_path (fib_index, &pfx,
709                                              FIB_SOURCE_INTERFACE,
710                                              (FIB_ENTRY_FLAG_CONNECTED |
711                                               FIB_ENTRY_FLAG_ATTACHED),
712                                              FIB_PROTOCOL_IP4,
713                                              /* No next-hop address */
714                                              NULL,
715                                              sw_if_index,
716                                              // invalid FIB index
717                                              ~0,
718                                              1,
719                                              // no out-label stack
720                                              NULL,
721                                              FIB_ROUTE_PATH_FLAG_NONE);
722       a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
723
724       /* Add the two broadcast addresses as drop */
725       fib_prefix_t net_pfx = {
726         .fp_len = 32,
727         .fp_proto = FIB_PROTOCOL_IP4,
728         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
729       };
730       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
731         fib_table_entry_special_add(fib_index,
732                                     &net_pfx,
733                                     FIB_SOURCE_INTERFACE,
734                                     (FIB_ENTRY_FLAG_DROP |
735                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
736       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
737       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
738         fib_table_entry_special_add(fib_index,
739                                     &net_pfx,
740                                     FIB_SOURCE_INTERFACE,
741                                     (FIB_ENTRY_FLAG_DROP |
742                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
743     }
744   else if (pfx.fp_len == 31)
745     {
746       u32 mask = clib_host_to_net_u32(1);
747       fib_prefix_t net_pfx = pfx;
748
749       net_pfx.fp_len = 32;
750       net_pfx.fp_addr.ip4.as_u32 ^= mask;
751
752       /* a /31 - add the other end as an attached host */
753       fib_table_entry_update_one_path (fib_index, &net_pfx,
754                                        FIB_SOURCE_INTERFACE,
755                                        (FIB_ENTRY_FLAG_ATTACHED),
756                                        FIB_PROTOCOL_IP4,
757                                        &net_pfx.fp_addr,
758                                        sw_if_index,
759                                        // invalid FIB index
760                                        ~0,
761                                        1,
762                                        NULL,
763                                        FIB_ROUTE_PATH_FLAG_NONE);
764     }
765   pfx.fp_len = 32;
766
767   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
768     {
769       u32 classify_table_index =
770         lm->classify_table_index_by_sw_if_index[sw_if_index];
771       if (classify_table_index != (u32) ~ 0)
772         {
773           dpo_id_t dpo = DPO_INVALID;
774
775           dpo_set (&dpo,
776                    DPO_CLASSIFY,
777                    DPO_PROTO_IP4,
778                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
779
780           fib_table_entry_special_dpo_add (fib_index,
781                                            &pfx,
782                                            FIB_SOURCE_CLASSIFY,
783                                            FIB_ENTRY_FLAG_NONE, &dpo);
784           dpo_reset (&dpo);
785         }
786     }
787
788   fib_table_entry_update_one_path (fib_index, &pfx,
789                                    FIB_SOURCE_INTERFACE,
790                                    (FIB_ENTRY_FLAG_CONNECTED |
791                                     FIB_ENTRY_FLAG_LOCAL),
792                                    FIB_PROTOCOL_IP4,
793                                    &pfx.fp_addr,
794                                    sw_if_index,
795                                    // invalid FIB index
796                                    ~0,
797                                    1, NULL,
798                                    FIB_ROUTE_PATH_FLAG_NONE);
799 }
800
801 static void
802 ip4_del_interface_routes (ip4_main_t * im,
803                           u32 fib_index,
804                           ip4_address_t * address, u32 address_length)
805 {
806   fib_prefix_t pfx = {
807     .fp_len = address_length,
808     .fp_proto = FIB_PROTOCOL_IP4,
809     .fp_addr.ip4 = *address,
810   };
811
812   if (pfx.fp_len <= 30)
813     {
814       fib_prefix_t net_pfx = {
815         .fp_len = 32,
816         .fp_proto = FIB_PROTOCOL_IP4,
817         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
818       };
819       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
820         fib_table_entry_special_remove(fib_index,
821                                        &net_pfx,
822                                        FIB_SOURCE_INTERFACE);
823       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
824       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
825         fib_table_entry_special_remove(fib_index,
826                                        &net_pfx,
827                                        FIB_SOURCE_INTERFACE);
828       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
829     }
830     else if (pfx.fp_len == 31)
831     {
832       u32 mask = clib_host_to_net_u32(1);
833       fib_prefix_t net_pfx = pfx;
834
835       net_pfx.fp_len = 32;
836       net_pfx.fp_addr.ip4.as_u32 ^= mask;
837
838       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
839     }
840
841   pfx.fp_len = 32;
842   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
843 }
844
845 void
846 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
847 {
848   ip4_main_t *im = &ip4_main;
849
850   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
851
852   /*
853    * enable/disable only on the 1<->0 transition
854    */
855   if (is_enable)
856     {
857       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
858         return;
859     }
860   else
861     {
862       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
863       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
864         return;
865     }
866   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
867                                !is_enable, 0, 0);
868
869
870   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
871                                sw_if_index, !is_enable, 0, 0);
872 }
873
874 static clib_error_t *
875 ip4_add_del_interface_address_internal (vlib_main_t * vm,
876                                         u32 sw_if_index,
877                                         ip4_address_t * address,
878                                         u32 address_length, u32 is_del)
879 {
880   vnet_main_t *vnm = vnet_get_main ();
881   ip4_main_t *im = &ip4_main;
882   ip_lookup_main_t *lm = &im->lookup_main;
883   clib_error_t *error = 0;
884   u32 if_address_index, elts_before;
885   ip4_address_fib_t ip4_af, *addr_fib = 0;
886
887   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
888   ip4_addr_fib_init (&ip4_af, address,
889                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
890   vec_add1 (addr_fib, ip4_af);
891
892   /* FIXME-LATER
893    * there is no support for adj-fib handling in the presence of overlapping
894    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
895    * most routers do.
896    */
897   /* *INDENT-OFF* */
898   if (!is_del)
899     {
900       /* When adding an address check that it does not conflict
901          with an existing address. */
902       ip_interface_address_t *ia;
903       foreach_ip_interface_address
904         (&im->lookup_main, ia, sw_if_index,
905          0 /* honor unnumbered */ ,
906          ({
907            ip4_address_t * x =
908              ip_interface_address_get_address
909              (&im->lookup_main, ia);
910            if (ip4_destination_matches_route
911                (im, address, x, ia->address_length) ||
912                ip4_destination_matches_route (im,
913                                               x,
914                                               address,
915                                               address_length))
916              return
917                clib_error_create
918                ("failed to add %U which conflicts with %U for interface %U",
919                 format_ip4_address_and_length, address,
920                 address_length,
921                 format_ip4_address_and_length, x,
922                 ia->address_length,
923                 format_vnet_sw_if_index_name, vnm,
924                 sw_if_index);
925          }));
926     }
927   /* *INDENT-ON* */
928
929   elts_before = pool_elts (lm->if_address_pool);
930
931   error = ip_interface_address_add_del
932     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
933   if (error)
934     goto done;
935
936   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
937
938   if (is_del)
939     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
940   else
941     ip4_add_interface_routes (sw_if_index,
942                               im, ip4_af.fib_index,
943                               pool_elt_at_index
944                               (lm->if_address_pool, if_address_index));
945
946   /* If pool did not grow/shrink: add duplicate address. */
947   if (elts_before != pool_elts (lm->if_address_pool))
948     {
949       ip4_add_del_interface_address_callback_t *cb;
950       vec_foreach (cb, im->add_del_interface_address_callbacks)
951         cb->function (im, cb->function_opaque, sw_if_index,
952                       address, address_length, if_address_index, is_del);
953     }
954
955 done:
956   vec_free (addr_fib);
957   return error;
958 }
959
960 clib_error_t *
961 ip4_add_del_interface_address (vlib_main_t * vm,
962                                u32 sw_if_index,
963                                ip4_address_t * address,
964                                u32 address_length, u32 is_del)
965 {
966   return ip4_add_del_interface_address_internal
967     (vm, sw_if_index, address, address_length, is_del);
968 }
969
970 /* Built-in ip4 unicast rx feature path definition */
971 /* *INDENT-OFF* */
972 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
973 {
974   .arc_name = "ip4-unicast",
975   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
976   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
977 };
978
979 VNET_FEATURE_INIT (ip4_flow_classify, static) =
980 {
981   .arc_name = "ip4-unicast",
982   .node_name = "ip4-flow-classify",
983   .runs_before = VNET_FEATURES ("ip4-inacl"),
984 };
985
986 VNET_FEATURE_INIT (ip4_inacl, static) =
987 {
988   .arc_name = "ip4-unicast",
989   .node_name = "ip4-inacl",
990   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
991 };
992
993 VNET_FEATURE_INIT (ip4_source_check_1, static) =
994 {
995   .arc_name = "ip4-unicast",
996   .node_name = "ip4-source-check-via-rx",
997   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
998 };
999
1000 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1001 {
1002   .arc_name = "ip4-unicast",
1003   .node_name = "ip4-source-check-via-any",
1004   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1005 };
1006
1007 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1008 {
1009   .arc_name = "ip4-unicast",
1010   .node_name = "ip4-source-and-port-range-check-rx",
1011   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1012 };
1013
1014 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1015 {
1016   .arc_name = "ip4-unicast",
1017   .node_name = "ip4-policer-classify",
1018   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1019 };
1020
1021 VNET_FEATURE_INIT (ip4_ipsec, static) =
1022 {
1023   .arc_name = "ip4-unicast",
1024   .node_name = "ipsec-input-ip4",
1025   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1026 };
1027
1028 VNET_FEATURE_INIT (ip4_vpath, static) =
1029 {
1030   .arc_name = "ip4-unicast",
1031   .node_name = "vpath-input-ip4",
1032   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1033 };
1034
1035 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1036 {
1037   .arc_name = "ip4-unicast",
1038   .node_name = "ip4-vxlan-bypass",
1039   .runs_before = VNET_FEATURES ("ip4-lookup"),
1040 };
1041
1042 VNET_FEATURE_INIT (ip4_drop, static) =
1043 {
1044   .arc_name = "ip4-unicast",
1045   .node_name = "ip4-drop",
1046   .runs_before = VNET_FEATURES ("ip4-lookup"),
1047 };
1048
1049 VNET_FEATURE_INIT (ip4_lookup, static) =
1050 {
1051   .arc_name = "ip4-unicast",
1052   .node_name = "ip4-lookup",
1053   .runs_before = 0,     /* not before any other features */
1054 };
1055
1056 /* Built-in ip4 multicast rx feature path definition */
1057 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1058 {
1059   .arc_name = "ip4-multicast",
1060   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1061   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1062 };
1063
1064 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1065 {
1066   .arc_name = "ip4-multicast",
1067   .node_name = "vpath-input-ip4",
1068   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1069 };
1070
1071 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1072 {
1073   .arc_name = "ip4-multicast",
1074   .node_name = "ip4-drop",
1075   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1076 };
1077
1078 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1079 {
1080   .arc_name = "ip4-multicast",
1081   .node_name = "ip4-mfib-forward-lookup",
1082   .runs_before = 0,     /* last feature */
1083 };
1084
1085 /* Source and port-range check ip4 tx feature path definition */
1086 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1087 {
1088   .arc_name = "ip4-output",
1089   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1090   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1091 };
1092
1093 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1094 {
1095   .arc_name = "ip4-output",
1096   .node_name = "ip4-source-and-port-range-check-tx",
1097   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1098 };
1099
1100 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1101 {
1102   .arc_name = "ip4-output",
1103   .node_name = "ipsec-output-ip4",
1104   .runs_before = VNET_FEATURES ("interface-output"),
1105 };
1106
1107 /* Built-in ip4 tx feature path definition */
1108 VNET_FEATURE_INIT (ip4_interface_output, static) =
1109 {
1110   .arc_name = "ip4-output",
1111   .node_name = "interface-output",
1112   .runs_before = 0,     /* not before any other features */
1113 };
1114 /* *INDENT-ON* */
1115
1116 static clib_error_t *
1117 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1118 {
1119   ip4_main_t *im = &ip4_main;
1120
1121   /* Fill in lookup tables with default table (0). */
1122   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1123   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1124
1125   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1126                                is_add, 0, 0);
1127
1128   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1129                                is_add, 0, 0);
1130
1131   return /* no error */ 0;
1132 }
1133
1134 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1135
1136 /* Global IP4 main. */
1137 ip4_main_t ip4_main;
1138
1139 clib_error_t *
1140 ip4_lookup_init (vlib_main_t * vm)
1141 {
1142   ip4_main_t *im = &ip4_main;
1143   clib_error_t *error;
1144   uword i;
1145
1146   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1147     return error;
1148
1149   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1150     {
1151       u32 m;
1152
1153       if (i < 32)
1154         m = pow2_mask (i) << (32 - i);
1155       else
1156         m = ~0;
1157       im->fib_masks[i] = clib_host_to_net_u32 (m);
1158     }
1159
1160   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1161
1162   /* Create FIB with index 0 and table id of 0. */
1163   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1164   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1165
1166   {
1167     pg_node_t *pn;
1168     pn = pg_get_node (ip4_lookup_node.index);
1169     pn->unformat_edit = unformat_pg_ip4_header;
1170   }
1171
1172   {
1173     ethernet_arp_header_t h;
1174
1175     memset (&h, 0, sizeof (h));
1176
1177     /* Set target ethernet address to all zeros. */
1178     memset (h.ip4_over_ethernet[1].ethernet, 0,
1179             sizeof (h.ip4_over_ethernet[1].ethernet));
1180
1181 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1182 #define _8(f,v) h.f = v;
1183     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1184     _16 (l3_type, ETHERNET_TYPE_IP4);
1185     _8 (n_l2_address_bytes, 6);
1186     _8 (n_l3_address_bytes, 4);
1187     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1188 #undef _16
1189 #undef _8
1190
1191     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1192                                /* data */ &h,
1193                                sizeof (h),
1194                                /* alloc chunk size */ 8,
1195                                "ip4 arp");
1196   }
1197
1198   return error;
1199 }
1200
1201 VLIB_INIT_FUNCTION (ip4_lookup_init);
1202
1203 typedef struct
1204 {
1205   /* Adjacency taken. */
1206   u32 dpo_index;
1207   u32 flow_hash;
1208   u32 fib_index;
1209
1210   /* Packet data, possibly *after* rewrite. */
1211   u8 packet_data[64 - 1 * sizeof (u32)];
1212 }
1213 ip4_forward_next_trace_t;
1214
1215 u8 *
1216 format_ip4_forward_next_trace (u8 * s, va_list * args)
1217 {
1218   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1219   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1220   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1221   uword indent = format_get_indent (s);
1222   s = format (s, "%U%U",
1223               format_white_space, indent,
1224               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1225   return s;
1226 }
1227
1228 static u8 *
1229 format_ip4_lookup_trace (u8 * s, va_list * args)
1230 {
1231   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1232   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1233   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1234   uword indent = format_get_indent (s);
1235
1236   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1237               t->fib_index, t->dpo_index, t->flow_hash);
1238   s = format (s, "\n%U%U",
1239               format_white_space, indent,
1240               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1241   return s;
1242 }
1243
1244 static u8 *
1245 format_ip4_rewrite_trace (u8 * s, va_list * args)
1246 {
1247   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1248   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1249   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1250   uword indent = format_get_indent (s);
1251
1252   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1253               t->fib_index, t->dpo_index, format_ip_adjacency,
1254               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1255   s = format (s, "\n%U%U",
1256               format_white_space, indent,
1257               format_ip_adjacency_packet_data,
1258               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1259   return s;
1260 }
1261
1262 /* Common trace function for all ip4-forward next nodes. */
1263 void
1264 ip4_forward_next_trace (vlib_main_t * vm,
1265                         vlib_node_runtime_t * node,
1266                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1267 {
1268   u32 *from, n_left;
1269   ip4_main_t *im = &ip4_main;
1270
1271   n_left = frame->n_vectors;
1272   from = vlib_frame_vector_args (frame);
1273
1274   while (n_left >= 4)
1275     {
1276       u32 bi0, bi1;
1277       vlib_buffer_t *b0, *b1;
1278       ip4_forward_next_trace_t *t0, *t1;
1279
1280       /* Prefetch next iteration. */
1281       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1282       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1283
1284       bi0 = from[0];
1285       bi1 = from[1];
1286
1287       b0 = vlib_get_buffer (vm, bi0);
1288       b1 = vlib_get_buffer (vm, bi1);
1289
1290       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1291         {
1292           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1293           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1294           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1295           t0->fib_index =
1296             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1297              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1298             vec_elt (im->fib_index_by_sw_if_index,
1299                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1300
1301           clib_memcpy (t0->packet_data,
1302                        vlib_buffer_get_current (b0),
1303                        sizeof (t0->packet_data));
1304         }
1305       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1306         {
1307           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1308           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1309           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1310           t1->fib_index =
1311             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1312              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1313             vec_elt (im->fib_index_by_sw_if_index,
1314                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1315           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1316                        sizeof (t1->packet_data));
1317         }
1318       from += 2;
1319       n_left -= 2;
1320     }
1321
1322   while (n_left >= 1)
1323     {
1324       u32 bi0;
1325       vlib_buffer_t *b0;
1326       ip4_forward_next_trace_t *t0;
1327
1328       bi0 = from[0];
1329
1330       b0 = vlib_get_buffer (vm, bi0);
1331
1332       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1333         {
1334           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1335           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1336           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1337           t0->fib_index =
1338             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1339              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1340             vec_elt (im->fib_index_by_sw_if_index,
1341                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1342           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1343                        sizeof (t0->packet_data));
1344         }
1345       from += 1;
1346       n_left -= 1;
1347     }
1348 }
1349
1350 static uword
1351 ip4_drop_or_punt (vlib_main_t * vm,
1352                   vlib_node_runtime_t * node,
1353                   vlib_frame_t * frame, ip4_error_t error_code)
1354 {
1355   u32 *buffers = vlib_frame_vector_args (frame);
1356   uword n_packets = frame->n_vectors;
1357
1358   vlib_error_drop_buffers (vm, node, buffers,
1359                            /* stride */ 1,
1360                            n_packets,
1361                            /* next */ 0,
1362                            ip4_input_node.index, error_code);
1363
1364   if (node->flags & VLIB_NODE_FLAG_TRACE)
1365     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1366
1367   return n_packets;
1368 }
1369
1370 static uword
1371 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1372 {
1373   return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1374 }
1375
1376 static uword
1377 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1378 {
1379   return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1380 }
1381
1382 /* *INDENT-OFF* */
1383 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1384 {
1385   .function = ip4_drop,.
1386   name = "ip4-drop",
1387   .vector_size = sizeof (u32),
1388   .format_trace = format_ip4_forward_next_trace,
1389   .n_next_nodes = 1,
1390   .next_nodes = {
1391     [0] = "error-drop",
1392   },
1393 };
1394
1395 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1396
1397 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1398 {
1399   .function = ip4_punt,
1400   .name = "ip4-punt",
1401   .vector_size = sizeof (u32),
1402   .format_trace = format_ip4_forward_next_trace,
1403   .n_next_nodes = 1,
1404   .next_nodes = {
1405     [0] = "error-punt",
1406   },
1407 };
1408
1409 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1410 /* *INDENT-ON */
1411
1412 /* Compute TCP/UDP/ICMP4 checksum in software. */
1413 u16
1414 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1415                               ip4_header_t * ip0)
1416 {
1417   ip_csum_t sum0;
1418   u32 ip_header_length, payload_length_host_byte_order;
1419   u32 n_this_buffer, n_bytes_left;
1420   u16 sum16;
1421   void *data_this_buffer;
1422
1423   /* Initialize checksum with ip header. */
1424   ip_header_length = ip4_header_bytes (ip0);
1425   payload_length_host_byte_order =
1426     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1427   sum0 =
1428     clib_host_to_net_u32 (payload_length_host_byte_order +
1429                           (ip0->protocol << 16));
1430
1431   if (BITS (uword) == 32)
1432     {
1433       sum0 =
1434         ip_csum_with_carry (sum0,
1435                             clib_mem_unaligned (&ip0->src_address, u32));
1436       sum0 =
1437         ip_csum_with_carry (sum0,
1438                             clib_mem_unaligned (&ip0->dst_address, u32));
1439     }
1440   else
1441     sum0 =
1442       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1443
1444   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1445   data_this_buffer = (void *) ip0 + ip_header_length;
1446   if (n_this_buffer + ip_header_length > p0->current_length)
1447     n_this_buffer =
1448       p0->current_length >
1449       ip_header_length ? p0->current_length - ip_header_length : 0;
1450   while (1)
1451     {
1452       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1453       n_bytes_left -= n_this_buffer;
1454       if (n_bytes_left == 0)
1455         break;
1456
1457       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1458       p0 = vlib_get_buffer (vm, p0->next_buffer);
1459       data_this_buffer = vlib_buffer_get_current (p0);
1460       n_this_buffer = p0->current_length;
1461     }
1462
1463   sum16 = ~ip_csum_fold (sum0);
1464
1465   return sum16;
1466 }
1467
1468 u32
1469 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1470 {
1471   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1472   udp_header_t *udp0;
1473   u16 sum16;
1474
1475   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1476           || ip0->protocol == IP_PROTOCOL_UDP);
1477
1478   udp0 = (void *) (ip0 + 1);
1479   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1480     {
1481       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1482                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1483       return p0->flags;
1484     }
1485
1486   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1487
1488   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1489                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1490
1491   return p0->flags;
1492 }
1493
1494 /* *INDENT-OFF* */
1495 VNET_FEATURE_ARC_INIT (ip4_local) =
1496 {
1497   .arc_name  = "ip4-local",
1498   .start_nodes = VNET_FEATURES ("ip4-local"),
1499 };
1500 /* *INDENT-ON* */
1501
1502 static inline uword
1503 ip4_local_inline (vlib_main_t * vm,
1504                   vlib_node_runtime_t * node,
1505                   vlib_frame_t * frame, int head_of_feature_arc)
1506 {
1507   ip4_main_t *im = &ip4_main;
1508   ip_lookup_main_t *lm = &im->lookup_main;
1509   ip_local_next_t next_index;
1510   u32 *from, *to_next, n_left_from, n_left_to_next;
1511   vlib_node_runtime_t *error_node =
1512     vlib_node_get_runtime (vm, ip4_input_node.index);
1513   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1514
1515   from = vlib_frame_vector_args (frame);
1516   n_left_from = frame->n_vectors;
1517   next_index = node->cached_next_index;
1518
1519   if (node->flags & VLIB_NODE_FLAG_TRACE)
1520     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1521
1522   while (n_left_from > 0)
1523     {
1524       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1525
1526       while (n_left_from >= 4 && n_left_to_next >= 2)
1527         {
1528           vlib_buffer_t *p0, *p1;
1529           ip4_header_t *ip0, *ip1;
1530           udp_header_t *udp0, *udp1;
1531           ip4_fib_mtrie_t *mtrie0, *mtrie1;
1532           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1533           const dpo_id_t *dpo0, *dpo1;
1534           const load_balance_t *lb0, *lb1;
1535           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1536           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1537           i32 len_diff0, len_diff1;
1538           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1539           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1540           u32 sw_if_index0, sw_if_index1;
1541
1542           pi0 = to_next[0] = from[0];
1543           pi1 = to_next[1] = from[1];
1544           from += 2;
1545           n_left_from -= 2;
1546           to_next += 2;
1547           n_left_to_next -= 2;
1548
1549           next0 = next1 = IP_LOCAL_NEXT_DROP;
1550
1551           p0 = vlib_get_buffer (vm, pi0);
1552           p1 = vlib_get_buffer (vm, pi1);
1553
1554           ip0 = vlib_buffer_get_current (p0);
1555           ip1 = vlib_buffer_get_current (p1);
1556
1557           vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1558           vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1559
1560           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1561           sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1562
1563           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1564           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1565
1566           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1567           fib_index0 =
1568             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1569              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1570
1571           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1572           fib_index1 =
1573             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1574              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1575
1576           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1577           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1578
1579           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1580           leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1581
1582           /* Treat IP frag packets as "experimental" protocol for now
1583              until support of IP frag reassembly is implemented */
1584           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1585           proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1586
1587           if (head_of_feature_arc == 0)
1588             {
1589               error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1590               goto skip_checks;
1591             }
1592
1593           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1594           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1595           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1596           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1597
1598           flags0 = p0->flags;
1599           flags1 = p1->flags;
1600
1601           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1602           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1603
1604           udp0 = ip4_next_header (ip0);
1605           udp1 = ip4_next_header (ip1);
1606
1607           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1608           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1609           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1610
1611           /* Verify UDP length. */
1612           ip_len0 = clib_net_to_host_u16 (ip0->length);
1613           ip_len1 = clib_net_to_host_u16 (ip1->length);
1614           udp_len0 = clib_net_to_host_u16 (udp0->length);
1615           udp_len1 = clib_net_to_host_u16 (udp1->length);
1616
1617           len_diff0 = ip_len0 - udp_len0;
1618           len_diff1 = ip_len1 - udp_len1;
1619
1620           len_diff0 = is_udp0 ? len_diff0 : 0;
1621           len_diff1 = is_udp1 ? len_diff1 : 0;
1622
1623           if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1624                                & good_tcp_udp0 & good_tcp_udp1)))
1625             {
1626               if (is_tcp_udp0)
1627                 {
1628                   if (is_tcp_udp0
1629                       && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1630                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1631                   good_tcp_udp0 =
1632                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1633                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1634                 }
1635               if (is_tcp_udp1)
1636                 {
1637                   if (is_tcp_udp1
1638                       && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1639                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1640                   good_tcp_udp1 =
1641                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1642                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1643                 }
1644             }
1645
1646           good_tcp_udp0 &= len_diff0 >= 0;
1647           good_tcp_udp1 &= len_diff1 >= 0;
1648
1649           leaf0 =
1650             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1651           leaf1 =
1652             ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1653
1654           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1655
1656           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1657           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1658
1659           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1660           error0 = (is_tcp_udp0 && !good_tcp_udp0
1661                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1662           error1 = (is_tcp_udp1 && !good_tcp_udp1
1663                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1664
1665           leaf0 =
1666             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1667           leaf1 =
1668             ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1669
1670           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1671             ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1672           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1673
1674           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1675             ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1676           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1677
1678           lb0 = load_balance_get (lbi0);
1679           lb1 = load_balance_get (lbi1);
1680           dpo0 = load_balance_get_bucket_i (lb0, 0);
1681           dpo1 = load_balance_get_bucket_i (lb1, 0);
1682
1683           /*
1684            * Must have a route to source otherwise we drop the packet.
1685            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1686            *
1687            * The checks are:
1688            *  - the source is a recieve => it's from us => bogus, do this
1689            *    first since it sets a different error code.
1690            *  - uRPF check for any route to source - accept if passes.
1691            *  - allow packets destined to the broadcast address from unknown sources
1692            */
1693           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1694                      dpo0->dpoi_type == DPO_RECEIVE) ?
1695                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1696           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1697                      !fib_urpf_check_size (lb0->lb_urpf) &&
1698                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1699                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1700           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1701                      dpo1->dpoi_type == DPO_RECEIVE) ?
1702                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1703           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1704                      !fib_urpf_check_size (lb1->lb_urpf) &&
1705                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1706                     ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1707
1708         skip_checks:
1709
1710           next0 = lm->local_next_by_ip_protocol[proto0];
1711           next1 = lm->local_next_by_ip_protocol[proto1];
1712
1713           next0 =
1714             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1715           next1 =
1716             error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1717
1718           p0->error = error0 ? error_node->errors[error0] : 0;
1719           p1->error = error1 ? error_node->errors[error1] : 0;
1720
1721           if (head_of_feature_arc)
1722             {
1723               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1724                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1725               if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1726                 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1727             }
1728
1729           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1730                                            n_left_to_next, pi0, pi1,
1731                                            next0, next1);
1732         }
1733
1734       while (n_left_from > 0 && n_left_to_next > 0)
1735         {
1736           vlib_buffer_t *p0;
1737           ip4_header_t *ip0;
1738           udp_header_t *udp0;
1739           ip4_fib_mtrie_t *mtrie0;
1740           ip4_fib_mtrie_leaf_t leaf0;
1741           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1742           i32 len_diff0;
1743           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1744           load_balance_t *lb0;
1745           const dpo_id_t *dpo0;
1746           u32 sw_if_index0;
1747
1748           pi0 = to_next[0] = from[0];
1749           from += 1;
1750           n_left_from -= 1;
1751           to_next += 1;
1752           n_left_to_next -= 1;
1753
1754           next0 = IP_LOCAL_NEXT_DROP;
1755
1756           p0 = vlib_get_buffer (vm, pi0);
1757
1758           ip0 = vlib_buffer_get_current (p0);
1759
1760           vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1761
1762           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1763
1764           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1765
1766           fib_index0 =
1767             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1768              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1769
1770           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1771
1772           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1773
1774           /* Treat IP frag packets as "experimental" protocol for now
1775              until support of IP frag reassembly is implemented */
1776           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1777
1778           if (head_of_feature_arc == 0)
1779             {
1780               error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1781               goto skip_check;
1782             }
1783
1784           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1785           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1786
1787           flags0 = p0->flags;
1788
1789           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1790
1791           udp0 = ip4_next_header (ip0);
1792
1793           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1794           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1795
1796           /* Verify UDP length. */
1797           ip_len0 = clib_net_to_host_u16 (ip0->length);
1798           udp_len0 = clib_net_to_host_u16 (udp0->length);
1799
1800           len_diff0 = ip_len0 - udp_len0;
1801
1802           len_diff0 = is_udp0 ? len_diff0 : 0;
1803
1804           if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1805             {
1806               if (is_tcp_udp0)
1807                 {
1808                   if (is_tcp_udp0
1809                       && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1810                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1811                   good_tcp_udp0 =
1812                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1813                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1814                 }
1815             }
1816
1817           good_tcp_udp0 &= len_diff0 >= 0;
1818
1819           leaf0 =
1820             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1821
1822           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1823
1824           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1825
1826           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1827           error0 = (is_tcp_udp0 && !good_tcp_udp0
1828                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1829
1830           leaf0 =
1831             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1832
1833           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1834           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1835
1836           lb0 = load_balance_get (lbi0);
1837           dpo0 = load_balance_get_bucket_i (lb0, 0);
1838
1839           vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1840             vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1841
1842           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1843                      dpo0->dpoi_type == DPO_RECEIVE) ?
1844                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1845           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1846                      !fib_urpf_check_size (lb0->lb_urpf) &&
1847                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1848                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1849
1850         skip_check:
1851
1852           next0 = lm->local_next_by_ip_protocol[proto0];
1853
1854           next0 =
1855             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1856
1857           p0->error = error0 ? error_node->errors[error0] : 0;
1858
1859           if (head_of_feature_arc)
1860             {
1861               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1862                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1863             }
1864
1865           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1866                                            n_left_to_next, pi0, next0);
1867
1868         }
1869
1870       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1871     }
1872
1873   return frame->n_vectors;
1874 }
1875
1876 static uword
1877 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1878 {
1879   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1880 }
1881
1882 /* *INDENT-OFF* */
1883 VLIB_REGISTER_NODE (ip4_local_node) =
1884 {
1885   .function = ip4_local,
1886   .name = "ip4-local",
1887   .vector_size = sizeof (u32),
1888   .format_trace = format_ip4_forward_next_trace,
1889   .n_next_nodes = IP_LOCAL_N_NEXT,
1890   .next_nodes =
1891   {
1892     [IP_LOCAL_NEXT_DROP] = "error-drop",
1893     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1894     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1895     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",},
1896 };
1897 /* *INDENT-ON* */
1898
1899 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1900
1901 static uword
1902 ip4_local_end_of_arc (vlib_main_t * vm,
1903                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1904 {
1905   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1906 }
1907
1908 /* *INDENT-OFF* */
1909 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1910   .function = ip4_local_end_of_arc,
1911   .name = "ip4-local-end-of-arc",
1912   .vector_size = sizeof (u32),
1913
1914   .format_trace = format_ip4_forward_next_trace,
1915   .sibling_of = "ip4-local",
1916 };
1917
1918 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1919
1920 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1921   .arc_name = "ip4-local",
1922   .node_name = "ip4-local-end-of-arc",
1923   .runs_before = 0, /* not before any other features */
1924 };
1925 /* *INDENT-ON* */
1926
1927 void
1928 ip4_register_protocol (u32 protocol, u32 node_index)
1929 {
1930   vlib_main_t *vm = vlib_get_main ();
1931   ip4_main_t *im = &ip4_main;
1932   ip_lookup_main_t *lm = &im->lookup_main;
1933
1934   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1935   lm->local_next_by_ip_protocol[protocol] =
1936     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1937 }
1938
1939 static clib_error_t *
1940 show_ip_local_command_fn (vlib_main_t * vm,
1941                           unformat_input_t * input, vlib_cli_command_t * cmd)
1942 {
1943   ip4_main_t *im = &ip4_main;
1944   ip_lookup_main_t *lm = &im->lookup_main;
1945   int i;
1946
1947   vlib_cli_output (vm, "Protocols handled by ip4_local");
1948   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1949     {
1950       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1951         vlib_cli_output (vm, "%d", i);
1952     }
1953   return 0;
1954 }
1955
1956
1957
1958 /*?
1959  * Display the set of protocols handled by the local IPv4 stack.
1960  *
1961  * @cliexpar
1962  * Example of how to display local protocol table:
1963  * @cliexstart{show ip local}
1964  * Protocols handled by ip4_local
1965  * 1
1966  * 17
1967  * 47
1968  * @cliexend
1969 ?*/
1970 /* *INDENT-OFF* */
1971 VLIB_CLI_COMMAND (show_ip_local, static) =
1972 {
1973   .path = "show ip local",
1974   .function = show_ip_local_command_fn,
1975   .short_help = "show ip local",
1976 };
1977 /* *INDENT-ON* */
1978
1979 always_inline uword
1980 ip4_arp_inline (vlib_main_t * vm,
1981                 vlib_node_runtime_t * node,
1982                 vlib_frame_t * frame, int is_glean)
1983 {
1984   vnet_main_t *vnm = vnet_get_main ();
1985   ip4_main_t *im = &ip4_main;
1986   ip_lookup_main_t *lm = &im->lookup_main;
1987   u32 *from, *to_next_drop;
1988   uword n_left_from, n_left_to_next_drop, next_index;
1989   static f64 time_last_seed_change = -1e100;
1990   static u32 hash_seeds[3];
1991   static uword hash_bitmap[256 / BITS (uword)];
1992   f64 time_now;
1993
1994   if (node->flags & VLIB_NODE_FLAG_TRACE)
1995     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1996
1997   time_now = vlib_time_now (vm);
1998   if (time_now - time_last_seed_change > 1e-3)
1999     {
2000       uword i;
2001       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
2002                                             sizeof (hash_seeds));
2003       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2004         hash_seeds[i] = r[i];
2005
2006       /* Mark all hash keys as been no-seen before. */
2007       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2008         hash_bitmap[i] = 0;
2009
2010       time_last_seed_change = time_now;
2011     }
2012
2013   from = vlib_frame_vector_args (frame);
2014   n_left_from = frame->n_vectors;
2015   next_index = node->cached_next_index;
2016   if (next_index == IP4_ARP_NEXT_DROP)
2017     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
2018
2019   while (n_left_from > 0)
2020     {
2021       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2022                            to_next_drop, n_left_to_next_drop);
2023
2024       while (n_left_from > 0 && n_left_to_next_drop > 0)
2025         {
2026           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2027           ip_adjacency_t *adj0;
2028           vlib_buffer_t *p0;
2029           ip4_header_t *ip0;
2030           uword bm0;
2031
2032           pi0 = from[0];
2033
2034           p0 = vlib_get_buffer (vm, pi0);
2035
2036           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2037           adj0 = adj_get (adj_index0);
2038           ip0 = vlib_buffer_get_current (p0);
2039
2040           a0 = hash_seeds[0];
2041           b0 = hash_seeds[1];
2042           c0 = hash_seeds[2];
2043
2044           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2045           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2046
2047           if (is_glean)
2048             {
2049               /*
2050                * this is the Glean case, so we are ARPing for the
2051                * packet's destination
2052                */
2053               a0 ^= ip0->dst_address.data_u32;
2054             }
2055           else
2056             {
2057               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2058             }
2059           b0 ^= sw_if_index0;
2060
2061           hash_v3_finalize32 (a0, b0, c0);
2062
2063           c0 &= BITS (hash_bitmap) - 1;
2064           c0 = c0 / BITS (uword);
2065           m0 = (uword) 1 << (c0 % BITS (uword));
2066
2067           bm0 = hash_bitmap[c0];
2068           drop0 = (bm0 & m0) != 0;
2069
2070           /* Mark it as seen. */
2071           hash_bitmap[c0] = bm0 | m0;
2072
2073           from += 1;
2074           n_left_from -= 1;
2075           to_next_drop[0] = pi0;
2076           to_next_drop += 1;
2077           n_left_to_next_drop -= 1;
2078
2079           p0->error =
2080             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2081                          IP4_ARP_ERROR_REQUEST_SENT];
2082
2083           /*
2084            * the adj has been updated to a rewrite but the node the DPO that got
2085            * us here hasn't - yet. no big deal. we'll drop while we wait.
2086            */
2087           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2088             continue;
2089
2090           if (drop0)
2091             continue;
2092
2093           /*
2094            * Can happen if the control-plane is programming tables
2095            * with traffic flowing; at least that's today's lame excuse.
2096            */
2097           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2098               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2099             {
2100               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2101             }
2102           else
2103             /* Send ARP request. */
2104             {
2105               u32 bi0 = 0;
2106               vlib_buffer_t *b0;
2107               ethernet_arp_header_t *h0;
2108               vnet_hw_interface_t *hw_if0;
2109
2110               h0 =
2111                 vlib_packet_template_get_packet (vm,
2112                                                  &im->ip4_arp_request_packet_template,
2113                                                  &bi0);
2114
2115               /* Add rewrite/encap string for ARP packet. */
2116               vnet_rewrite_one_header (adj0[0], h0,
2117                                        sizeof (ethernet_header_t));
2118
2119               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2120
2121               /* Src ethernet address in ARP header. */
2122               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2123                            hw_if0->hw_address,
2124                            sizeof (h0->ip4_over_ethernet[0].ethernet));
2125
2126               if (is_glean)
2127                 {
2128                   /* The interface's source address is stashed in the Glean Adj */
2129                   h0->ip4_over_ethernet[0].ip4 =
2130                     adj0->sub_type.glean.receive_addr.ip4;
2131
2132                   /* Copy in destination address we are requesting. This is the
2133                    * glean case, so it's the packet's destination.*/
2134                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2135                     ip0->dst_address.data_u32;
2136                 }
2137               else
2138                 {
2139                   /* Src IP address in ARP header. */
2140                   if (ip4_src_address_for_packet (lm, sw_if_index0,
2141                                                   &h0->
2142                                                   ip4_over_ethernet[0].ip4))
2143                     {
2144                       /* No source address available */
2145                       p0->error =
2146                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2147                       vlib_buffer_free (vm, &bi0, 1);
2148                       continue;
2149                     }
2150
2151                   /* Copy in destination address we are requesting from the
2152                      incomplete adj */
2153                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2154                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
2155                 }
2156
2157               vlib_buffer_copy_trace_flag (vm, p0, bi0);
2158               b0 = vlib_get_buffer (vm, bi0);
2159               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2160
2161               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2162
2163               vlib_set_next_frame_buffer (vm, node,
2164                                           adj0->rewrite_header.next_index,
2165                                           bi0);
2166             }
2167         }
2168
2169       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2170     }
2171
2172   return frame->n_vectors;
2173 }
2174
2175 static uword
2176 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2177 {
2178   return (ip4_arp_inline (vm, node, frame, 0));
2179 }
2180
2181 static uword
2182 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2183 {
2184   return (ip4_arp_inline (vm, node, frame, 1));
2185 }
2186
2187 static char *ip4_arp_error_strings[] = {
2188   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2189   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2190   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2191   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2192   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2193   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2194 };
2195
2196 VLIB_REGISTER_NODE (ip4_arp_node) =
2197 {
2198   .function = ip4_arp,.name = "ip4-arp",.vector_size =
2199     sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2200     ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2201     ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2202   {
2203   [IP4_ARP_NEXT_DROP] = "error-drop",}
2204 ,};
2205
2206 VLIB_REGISTER_NODE (ip4_glean_node) =
2207 {
2208   .function = ip4_glean,.name = "ip4-glean",.vector_size =
2209     sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2210     ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2211     ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2212   {
2213   [IP4_ARP_NEXT_DROP] = "error-drop",}
2214 ,};
2215
2216 #define foreach_notrace_ip4_arp_error           \
2217 _(DROP)                                         \
2218 _(REQUEST_SENT)                                 \
2219 _(REPLICATE_DROP)                               \
2220 _(REPLICATE_FAIL)
2221
2222 clib_error_t *
2223 arp_notrace_init (vlib_main_t * vm)
2224 {
2225   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2226
2227   /* don't trace ARP request packets */
2228 #define _(a)                                    \
2229     vnet_pcap_drop_trace_filter_add_del         \
2230         (rt->errors[IP4_ARP_ERROR_##a],         \
2231          1 /* is_add */);
2232   foreach_notrace_ip4_arp_error;
2233 #undef _
2234   return 0;
2235 }
2236
2237 VLIB_INIT_FUNCTION (arp_notrace_init);
2238
2239
2240 /* Send an ARP request to see if given destination is reachable on given interface. */
2241 clib_error_t *
2242 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2243 {
2244   vnet_main_t *vnm = vnet_get_main ();
2245   ip4_main_t *im = &ip4_main;
2246   ethernet_arp_header_t *h;
2247   ip4_address_t *src;
2248   ip_interface_address_t *ia;
2249   ip_adjacency_t *adj;
2250   vnet_hw_interface_t *hi;
2251   vnet_sw_interface_t *si;
2252   vlib_buffer_t *b;
2253   u32 bi = 0;
2254
2255   si = vnet_get_sw_interface (vnm, sw_if_index);
2256
2257   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2258     {
2259       return clib_error_return (0, "%U: interface %U down",
2260                                 format_ip4_address, dst,
2261                                 format_vnet_sw_if_index_name, vnm,
2262                                 sw_if_index);
2263     }
2264
2265   src =
2266     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2267   if (!src)
2268     {
2269       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2270       return clib_error_return
2271         (0,
2272          "no matching interface address for destination %U (interface %U)",
2273          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2274          sw_if_index);
2275     }
2276
2277   adj = adj_get (ia->neighbor_probe_adj_index);
2278
2279   h =
2280     vlib_packet_template_get_packet (vm,
2281                                      &im->ip4_arp_request_packet_template,
2282                                      &bi);
2283
2284   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2285
2286   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2287                sizeof (h->ip4_over_ethernet[0].ethernet));
2288
2289   h->ip4_over_ethernet[0].ip4 = src[0];
2290   h->ip4_over_ethernet[1].ip4 = dst[0];
2291
2292   b = vlib_get_buffer (vm, bi);
2293   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2294     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2295
2296   /* Add encapsulation string for software interface (e.g. ethernet header). */
2297   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2298   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2299
2300   {
2301     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2302     u32 *to_next = vlib_frame_vector_args (f);
2303     to_next[0] = bi;
2304     f->n_vectors = 1;
2305     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2306   }
2307
2308   return /* no error */ 0;
2309 }
2310
2311 typedef enum
2312 {
2313   IP4_REWRITE_NEXT_DROP,
2314   IP4_REWRITE_NEXT_ICMP_ERROR,
2315 } ip4_rewrite_next_t;
2316
2317 always_inline uword
2318 ip4_rewrite_inline (vlib_main_t * vm,
2319                     vlib_node_runtime_t * node,
2320                     vlib_frame_t * frame,
2321                     int do_counters, int is_midchain, int is_mcast)
2322 {
2323   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2324   u32 *from = vlib_frame_vector_args (frame);
2325   u32 n_left_from, n_left_to_next, *to_next, next_index;
2326   vlib_node_runtime_t *error_node =
2327     vlib_node_get_runtime (vm, ip4_input_node.index);
2328
2329   n_left_from = frame->n_vectors;
2330   next_index = node->cached_next_index;
2331   u32 thread_index = vlib_get_thread_index ();
2332
2333   while (n_left_from > 0)
2334     {
2335       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2336
2337       while (n_left_from >= 4 && n_left_to_next >= 2)
2338         {
2339           ip_adjacency_t *adj0, *adj1;
2340           vlib_buffer_t *p0, *p1;
2341           ip4_header_t *ip0, *ip1;
2342           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2343           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2344           u32 tx_sw_if_index0, tx_sw_if_index1;
2345
2346           /* Prefetch next iteration. */
2347           {
2348             vlib_buffer_t *p2, *p3;
2349
2350             p2 = vlib_get_buffer (vm, from[2]);
2351             p3 = vlib_get_buffer (vm, from[3]);
2352
2353             vlib_prefetch_buffer_header (p2, STORE);
2354             vlib_prefetch_buffer_header (p3, STORE);
2355
2356             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2357             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2358           }
2359
2360           pi0 = to_next[0] = from[0];
2361           pi1 = to_next[1] = from[1];
2362
2363           from += 2;
2364           n_left_from -= 2;
2365           to_next += 2;
2366           n_left_to_next -= 2;
2367
2368           p0 = vlib_get_buffer (vm, pi0);
2369           p1 = vlib_get_buffer (vm, pi1);
2370
2371           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2372           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2373
2374           /*
2375            * pre-fetch the per-adjacency counters
2376            */
2377           if (do_counters)
2378             {
2379               vlib_prefetch_combined_counter (&adjacency_counters,
2380                                               thread_index, adj_index0);
2381               vlib_prefetch_combined_counter (&adjacency_counters,
2382                                               thread_index, adj_index1);
2383             }
2384
2385           ip0 = vlib_buffer_get_current (p0);
2386           ip1 = vlib_buffer_get_current (p1);
2387
2388           error0 = error1 = IP4_ERROR_NONE;
2389           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2390
2391           /* Decrement TTL & update checksum.
2392              Works either endian, so no need for byte swap. */
2393           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2394             {
2395               i32 ttl0 = ip0->ttl;
2396
2397               /* Input node should have reject packets with ttl 0. */
2398               ASSERT (ip0->ttl > 0);
2399
2400               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2401               checksum0 += checksum0 >= 0xffff;
2402
2403               ip0->checksum = checksum0;
2404               ttl0 -= 1;
2405               ip0->ttl = ttl0;
2406
2407               /*
2408                * If the ttl drops below 1 when forwarding, generate
2409                * an ICMP response.
2410                */
2411               if (PREDICT_FALSE (ttl0 <= 0))
2412                 {
2413                   error0 = IP4_ERROR_TIME_EXPIRED;
2414                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2415                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2416                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2417                                                0);
2418                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2419                 }
2420
2421               /* Verify checksum. */
2422               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2423             }
2424           else
2425             {
2426               p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2427             }
2428           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2429             {
2430               i32 ttl1 = ip1->ttl;
2431
2432               /* Input node should have reject packets with ttl 0. */
2433               ASSERT (ip1->ttl > 0);
2434
2435               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2436               checksum1 += checksum1 >= 0xffff;
2437
2438               ip1->checksum = checksum1;
2439               ttl1 -= 1;
2440               ip1->ttl = ttl1;
2441
2442               /*
2443                * If the ttl drops below 1 when forwarding, generate
2444                * an ICMP response.
2445                */
2446               if (PREDICT_FALSE (ttl1 <= 0))
2447                 {
2448                   error1 = IP4_ERROR_TIME_EXPIRED;
2449                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2450                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2451                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2452                                                0);
2453                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2454                 }
2455
2456               /* Verify checksum. */
2457               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2458               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2459             }
2460           else
2461             {
2462               p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2463             }
2464
2465           /* Rewrite packet header and updates lengths. */
2466           adj0 = adj_get (adj_index0);
2467           adj1 = adj_get (adj_index1);
2468
2469           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2470           rw_len0 = adj0[0].rewrite_header.data_bytes;
2471           rw_len1 = adj1[0].rewrite_header.data_bytes;
2472           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2473           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2474
2475           /* Check MTU of outgoing interface. */
2476           error0 =
2477             (vlib_buffer_length_in_chain (vm, p0) >
2478              adj0[0].
2479              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2480              error0);
2481           error1 =
2482             (vlib_buffer_length_in_chain (vm, p1) >
2483              adj1[0].
2484              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2485              error1);
2486
2487           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2488            * to see the IP headerr */
2489           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2490             {
2491               next0 = adj0[0].rewrite_header.next_index;
2492               p0->current_data -= rw_len0;
2493               p0->current_length += rw_len0;
2494               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2495               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2496
2497               if (PREDICT_FALSE
2498                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2499                 vnet_feature_arc_start (lm->output_feature_arc_index,
2500                                         tx_sw_if_index0, &next0, p0);
2501             }
2502           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2503             {
2504               next1 = adj1[0].rewrite_header.next_index;
2505               p1->current_data -= rw_len1;
2506               p1->current_length += rw_len1;
2507
2508               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2509               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2510
2511               if (PREDICT_FALSE
2512                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2513                 vnet_feature_arc_start (lm->output_feature_arc_index,
2514                                         tx_sw_if_index1, &next1, p1);
2515             }
2516
2517           /* Guess we are only writing on simple Ethernet header. */
2518           vnet_rewrite_two_headers (adj0[0], adj1[0],
2519                                     ip0, ip1, sizeof (ethernet_header_t));
2520
2521           /*
2522            * Bump the per-adjacency counters
2523            */
2524           if (do_counters)
2525             {
2526               vlib_increment_combined_counter
2527                 (&adjacency_counters,
2528                  thread_index,
2529                  adj_index0, 1,
2530                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2531
2532               vlib_increment_combined_counter
2533                 (&adjacency_counters,
2534                  thread_index,
2535                  adj_index1, 1,
2536                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2537             }
2538
2539           if (is_midchain)
2540             {
2541               adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2542               adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2543             }
2544           if (is_mcast)
2545             {
2546               /*
2547                * copy bytes from the IP address into the MAC rewrite
2548                */
2549               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2550               vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2551             }
2552
2553           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2554                                            to_next, n_left_to_next,
2555                                            pi0, pi1, next0, next1);
2556         }
2557
2558       while (n_left_from > 0 && n_left_to_next > 0)
2559         {
2560           ip_adjacency_t *adj0;
2561           vlib_buffer_t *p0;
2562           ip4_header_t *ip0;
2563           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2564           u32 tx_sw_if_index0;
2565
2566           pi0 = to_next[0] = from[0];
2567
2568           p0 = vlib_get_buffer (vm, pi0);
2569
2570           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2571
2572           adj0 = adj_get (adj_index0);
2573
2574           ip0 = vlib_buffer_get_current (p0);
2575
2576           error0 = IP4_ERROR_NONE;
2577           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2578
2579           /* Decrement TTL & update checksum. */
2580           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2581             {
2582               i32 ttl0 = ip0->ttl;
2583
2584               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2585
2586               checksum0 += checksum0 >= 0xffff;
2587
2588               ip0->checksum = checksum0;
2589
2590               ASSERT (ip0->ttl > 0);
2591
2592               ttl0 -= 1;
2593
2594               ip0->ttl = ttl0;
2595
2596               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2597
2598               if (PREDICT_FALSE (ttl0 <= 0))
2599                 {
2600                   /*
2601                    * If the ttl drops below 1 when forwarding, generate
2602                    * an ICMP response.
2603                    */
2604                   error0 = IP4_ERROR_TIME_EXPIRED;
2605                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2606                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2607                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2608                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2609                                                0);
2610                 }
2611             }
2612           else
2613             {
2614               p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2615             }
2616
2617           if (do_counters)
2618             vlib_prefetch_combined_counter (&adjacency_counters,
2619                                             thread_index, adj_index0);
2620
2621           /* Guess we are only writing on simple Ethernet header. */
2622           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2623           if (is_mcast)
2624             {
2625               /*
2626                * copy bytes from the IP address into the MAC rewrite
2627                */
2628               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2629             }
2630
2631           /* Update packet buffer attributes/set output interface. */
2632           rw_len0 = adj0[0].rewrite_header.data_bytes;
2633           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2634
2635           if (do_counters)
2636             vlib_increment_combined_counter
2637               (&adjacency_counters,
2638                thread_index, adj_index0, 1,
2639                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2640
2641           /* Check MTU of outgoing interface. */
2642           error0 = (vlib_buffer_length_in_chain (vm, p0)
2643                     > adj0[0].rewrite_header.max_l3_packet_bytes
2644                     ? IP4_ERROR_MTU_EXCEEDED : error0);
2645
2646           p0->error = error_node->errors[error0];
2647
2648           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2649            * to see the IP headerr */
2650           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2651             {
2652               p0->current_data -= rw_len0;
2653               p0->current_length += rw_len0;
2654               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2655
2656               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2657               next0 = adj0[0].rewrite_header.next_index;
2658
2659               if (is_midchain)
2660                 {
2661                   adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2662                 }
2663
2664               if (PREDICT_FALSE
2665                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2666                 vnet_feature_arc_start (lm->output_feature_arc_index,
2667                                         tx_sw_if_index0, &next0, p0);
2668
2669             }
2670
2671           from += 1;
2672           n_left_from -= 1;
2673           to_next += 1;
2674           n_left_to_next -= 1;
2675
2676           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2677                                            to_next, n_left_to_next,
2678                                            pi0, next0);
2679         }
2680
2681       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2682     }
2683
2684   /* Need to do trace after rewrites to pick up new packet data. */
2685   if (node->flags & VLIB_NODE_FLAG_TRACE)
2686     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2687
2688   return frame->n_vectors;
2689 }
2690
2691
2692 /** @brief IPv4 rewrite node.
2693     @node ip4-rewrite
2694
2695     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2696     header checksum, fetch the ip adjacency, check the outbound mtu,
2697     apply the adjacency rewrite, and send pkts to the adjacency
2698     rewrite header's rewrite_next_index.
2699
2700     @param vm vlib_main_t corresponding to the current thread
2701     @param node vlib_node_runtime_t
2702     @param frame vlib_frame_t whose contents should be dispatched
2703
2704     @par Graph mechanics: buffer metadata, next index usage
2705
2706     @em Uses:
2707     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2708         - the rewrite adjacency index
2709     - <code>adj->lookup_next_index</code>
2710         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2711           the packet will be dropped.
2712     - <code>adj->rewrite_header</code>
2713         - Rewrite string length, rewrite string, next_index
2714
2715     @em Sets:
2716     - <code>b->current_data, b->current_length</code>
2717         - Updated net of applying the rewrite string
2718
2719     <em>Next Indices:</em>
2720     - <code> adj->rewrite_header.next_index </code>
2721       or @c error-drop
2722 */
2723 static uword
2724 ip4_rewrite (vlib_main_t * vm,
2725              vlib_node_runtime_t * node, vlib_frame_t * frame)
2726 {
2727   if (adj_are_counters_enabled ())
2728     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2729   else
2730     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2731 }
2732
2733 static uword
2734 ip4_midchain (vlib_main_t * vm,
2735               vlib_node_runtime_t * node, vlib_frame_t * frame)
2736 {
2737   if (adj_are_counters_enabled ())
2738     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2739   else
2740     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2741 }
2742
2743 static uword
2744 ip4_rewrite_mcast (vlib_main_t * vm,
2745                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2746 {
2747   if (adj_are_counters_enabled ())
2748     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2749   else
2750     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2751 }
2752
2753 static uword
2754 ip4_mcast_midchain (vlib_main_t * vm,
2755                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2756 {
2757   if (adj_are_counters_enabled ())
2758     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2759   else
2760     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2761 }
2762
2763 /* *INDENT-OFF* */
2764 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2765   .function = ip4_rewrite,
2766   .name = "ip4-rewrite",
2767   .vector_size = sizeof (u32),
2768
2769   .format_trace = format_ip4_rewrite_trace,
2770
2771   .n_next_nodes = 2,
2772   .next_nodes = {
2773     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2774     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2775   },
2776 };
2777 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2778
2779 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2780   .function = ip4_rewrite_mcast,
2781   .name = "ip4-rewrite-mcast",
2782   .vector_size = sizeof (u32),
2783
2784   .format_trace = format_ip4_rewrite_trace,
2785   .sibling_of = "ip4-rewrite",
2786 };
2787 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2788
2789 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2790   .function = ip4_mcast_midchain,
2791   .name = "ip4-mcast-midchain",
2792   .vector_size = sizeof (u32),
2793
2794   .format_trace = format_ip4_rewrite_trace,
2795   .sibling_of = "ip4-rewrite",
2796 };
2797 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2798
2799 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2800   .function = ip4_midchain,
2801   .name = "ip4-midchain",
2802   .vector_size = sizeof (u32),
2803   .format_trace = format_ip4_forward_next_trace,
2804   .sibling_of =  "ip4-rewrite",
2805 };
2806 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2807 /* *INDENT-ON */
2808
2809 static clib_error_t *
2810 add_del_interface_table (vlib_main_t * vm,
2811                          unformat_input_t * input, vlib_cli_command_t * cmd)
2812 {
2813   vnet_main_t *vnm = vnet_get_main ();
2814   ip_interface_address_t *ia;
2815   clib_error_t *error = 0;
2816   u32 sw_if_index, table_id;
2817
2818   sw_if_index = ~0;
2819
2820   if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2821     {
2822       error = clib_error_return (0, "unknown interface `%U'",
2823                                  format_unformat_error, input);
2824       goto done;
2825     }
2826
2827   if (unformat (input, "%d", &table_id))
2828     ;
2829   else
2830     {
2831       error = clib_error_return (0, "expected table id `%U'",
2832                                  format_unformat_error, input);
2833       goto done;
2834     }
2835
2836   /*
2837    * If the interface already has in IP address, then a change int
2838    * VRF is not allowed. The IP address applied must first be removed.
2839    * We do not do that automatically here, since VPP has no knowledge
2840    * of whether thoses subnets are valid in the destination VRF.
2841    */
2842   /* *INDENT-OFF* */
2843   foreach_ip_interface_address (&ip4_main.lookup_main,
2844                                 ia, sw_if_index,
2845                                 1 /* honor unnumbered */,
2846   ({
2847       ip4_address_t * a;
2848
2849       a = ip_interface_address_get_address (&ip4_main.lookup_main, ia);
2850       error = clib_error_return (0, "interface %U has address %U",
2851                                  format_vnet_sw_if_index_name, vnm,
2852                                  sw_if_index,
2853                                  format_ip4_address, a);
2854       goto done;
2855    }));
2856    /* *INDENT-ON* */
2857
2858 {
2859   ip4_main_t *im = &ip4_main;
2860   u32 fib_index;
2861
2862   fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2863
2864   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2865   im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2866
2867   fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2868   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
2869   im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
2870 }
2871
2872 done:
2873 return error;
2874 }
2875
2876 /*?
2877  * Place the indicated interface into the supplied IPv4 FIB table (also known
2878  * as a VRF). If the FIB table does not exist, this command creates it. To
2879  * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2880  * FIB table will only be displayed if a route has been added to the table, or
2881  * an IP Address is assigned to an interface in the table (which adds a route
2882  * automatically).
2883  *
2884  * @note IP addresses added after setting the interface IP table are added to
2885  * the indicated FIB table. If an IP address is added prior to changing the
2886  * table then this is an error. The control plane must remove these addresses
2887  * first and then change the table. VPP will not automatically move the
2888  * addresses from the old to the new table as it does not know the validity
2889  * of such a change.
2890  *
2891  * @cliexpar
2892  * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2893  * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2894  ?*/
2895 /* *INDENT-OFF* */
2896 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2897 {
2898   .path = "set interface ip table",
2899   .function = add_del_interface_table,
2900   .short_help = "set interface ip table <interface> <table-id>",
2901 };
2902 /* *INDENT-ON* */
2903
2904 int
2905 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2906 {
2907   ip4_fib_mtrie_t *mtrie0;
2908   ip4_fib_mtrie_leaf_t leaf0;
2909   u32 lbi0;
2910
2911   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2912
2913   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2914   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2915   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2916
2917   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2918
2919   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2920 }
2921
2922 static clib_error_t *
2923 test_lookup_command_fn (vlib_main_t * vm,
2924                         unformat_input_t * input, vlib_cli_command_t * cmd)
2925 {
2926   ip4_fib_t *fib;
2927   u32 table_id = 0;
2928   f64 count = 1;
2929   u32 n;
2930   int i;
2931   ip4_address_t ip4_base_address;
2932   u64 errors = 0;
2933
2934   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2935     {
2936       if (unformat (input, "table %d", &table_id))
2937         {
2938           /* Make sure the entry exists. */
2939           fib = ip4_fib_get (table_id);
2940           if ((fib) && (fib->index != table_id))
2941             return clib_error_return (0, "<fib-index> %d does not exist",
2942                                       table_id);
2943         }
2944       else if (unformat (input, "count %f", &count))
2945         ;
2946
2947       else if (unformat (input, "%U",
2948                          unformat_ip4_address, &ip4_base_address))
2949         ;
2950       else
2951         return clib_error_return (0, "unknown input `%U'",
2952                                   format_unformat_error, input);
2953     }
2954
2955   n = count;
2956
2957   for (i = 0; i < n; i++)
2958     {
2959       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2960         errors++;
2961
2962       ip4_base_address.as_u32 =
2963         clib_host_to_net_u32 (1 +
2964                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2965     }
2966
2967   if (errors)
2968     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2969   else
2970     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2971
2972   return 0;
2973 }
2974
2975 /*?
2976  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2977  * given FIB table to determine if there is a conflict with the
2978  * adjacency table. The fib-id can be determined by using the
2979  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2980  * of 0 is used.
2981  *
2982  * @todo This command uses fib-id, other commands use table-id (not
2983  * just a name, they are different indexes). Would like to change this
2984  * to table-id for consistency.
2985  *
2986  * @cliexpar
2987  * Example of how to run the test lookup command:
2988  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2989  * No errors in 2 lookups
2990  * @cliexend
2991 ?*/
2992 /* *INDENT-OFF* */
2993 VLIB_CLI_COMMAND (lookup_test_command, static) =
2994 {
2995   .path = "test lookup",
2996   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2997   .function = test_lookup_command_fn,
2998 };
2999 /* *INDENT-ON* */
3000
3001 int
3002 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3003 {
3004   ip4_fib_t *fib;
3005   u32 fib_index;
3006
3007   fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
3008
3009   if (~0 == fib_index)
3010     return VNET_API_ERROR_NO_SUCH_FIB;
3011
3012   fib = ip4_fib_get (fib_index);
3013
3014   fib->flow_hash_config = flow_hash_config;
3015   return 0;
3016 }
3017
3018 static clib_error_t *
3019 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3020                              unformat_input_t * input,
3021                              vlib_cli_command_t * cmd)
3022 {
3023   int matched = 0;
3024   u32 table_id = 0;
3025   u32 flow_hash_config = 0;
3026   int rv;
3027
3028   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3029     {
3030       if (unformat (input, "table %d", &table_id))
3031         matched = 1;
3032 #define _(a,v) \
3033     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3034       foreach_flow_hash_bit
3035 #undef _
3036         else
3037         break;
3038     }
3039
3040   if (matched == 0)
3041     return clib_error_return (0, "unknown input `%U'",
3042                               format_unformat_error, input);
3043
3044   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3045   switch (rv)
3046     {
3047     case 0:
3048       break;
3049
3050     case VNET_API_ERROR_NO_SUCH_FIB:
3051       return clib_error_return (0, "no such FIB table %d", table_id);
3052
3053     default:
3054       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3055       break;
3056     }
3057
3058   return 0;
3059 }
3060
3061 /*?
3062  * Configure the set of IPv4 fields used by the flow hash.
3063  *
3064  * @cliexpar
3065  * Example of how to set the flow hash on a given table:
3066  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3067  * Example of display the configured flow hash:
3068  * @cliexstart{show ip fib}
3069  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3070  * 0.0.0.0/0
3071  *   unicast-ip4-chain
3072  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3073  *     [0] [@0]: dpo-drop ip6
3074  * 0.0.0.0/32
3075  *   unicast-ip4-chain
3076  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3077  *     [0] [@0]: dpo-drop ip6
3078  * 224.0.0.0/8
3079  *   unicast-ip4-chain
3080  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3081  *     [0] [@0]: dpo-drop ip6
3082  * 6.0.1.2/32
3083  *   unicast-ip4-chain
3084  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3085  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3086  * 7.0.0.1/32
3087  *   unicast-ip4-chain
3088  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3089  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3090  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3091  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3092  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3093  * 240.0.0.0/8
3094  *   unicast-ip4-chain
3095  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3096  *     [0] [@0]: dpo-drop ip6
3097  * 255.255.255.255/32
3098  *   unicast-ip4-chain
3099  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3100  *     [0] [@0]: dpo-drop ip6
3101  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3102  * 0.0.0.0/0
3103  *   unicast-ip4-chain
3104  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3105  *     [0] [@0]: dpo-drop ip6
3106  * 0.0.0.0/32
3107  *   unicast-ip4-chain
3108  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3109  *     [0] [@0]: dpo-drop ip6
3110  * 172.16.1.0/24
3111  *   unicast-ip4-chain
3112  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3113  *     [0] [@4]: ipv4-glean: af_packet0
3114  * 172.16.1.1/32
3115  *   unicast-ip4-chain
3116  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3117  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3118  * 172.16.1.2/32
3119  *   unicast-ip4-chain
3120  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3121  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3122  * 172.16.2.0/24
3123  *   unicast-ip4-chain
3124  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3125  *     [0] [@4]: ipv4-glean: af_packet1
3126  * 172.16.2.1/32
3127  *   unicast-ip4-chain
3128  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3129  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3130  * 224.0.0.0/8
3131  *   unicast-ip4-chain
3132  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3133  *     [0] [@0]: dpo-drop ip6
3134  * 240.0.0.0/8
3135  *   unicast-ip4-chain
3136  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3137  *     [0] [@0]: dpo-drop ip6
3138  * 255.255.255.255/32
3139  *   unicast-ip4-chain
3140  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3141  *     [0] [@0]: dpo-drop ip6
3142  * @cliexend
3143 ?*/
3144 /* *INDENT-OFF* */
3145 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3146 {
3147   .path = "set ip flow-hash",
3148   .short_help =
3149   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3150   .function = set_ip_flow_hash_command_fn,
3151 };
3152 /* *INDENT-ON* */
3153
3154 int
3155 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3156                              u32 table_index)
3157 {
3158   vnet_main_t *vnm = vnet_get_main ();
3159   vnet_interface_main_t *im = &vnm->interface_main;
3160   ip4_main_t *ipm = &ip4_main;
3161   ip_lookup_main_t *lm = &ipm->lookup_main;
3162   vnet_classify_main_t *cm = &vnet_classify_main;
3163   ip4_address_t *if_addr;
3164
3165   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3166     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3167
3168   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3169     return VNET_API_ERROR_NO_SUCH_ENTRY;
3170
3171   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3172   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3173
3174   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3175
3176   if (NULL != if_addr)
3177     {
3178       fib_prefix_t pfx = {
3179         .fp_len = 32,
3180         .fp_proto = FIB_PROTOCOL_IP4,
3181         .fp_addr.ip4 = *if_addr,
3182       };
3183       u32 fib_index;
3184
3185       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3186                                                        sw_if_index);
3187
3188
3189       if (table_index != (u32) ~ 0)
3190         {
3191           dpo_id_t dpo = DPO_INVALID;
3192
3193           dpo_set (&dpo,
3194                    DPO_CLASSIFY,
3195                    DPO_PROTO_IP4,
3196                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3197
3198           fib_table_entry_special_dpo_add (fib_index,
3199                                            &pfx,
3200                                            FIB_SOURCE_CLASSIFY,
3201                                            FIB_ENTRY_FLAG_NONE, &dpo);
3202           dpo_reset (&dpo);
3203         }
3204       else
3205         {
3206           fib_table_entry_special_remove (fib_index,
3207                                           &pfx, FIB_SOURCE_CLASSIFY);
3208         }
3209     }
3210
3211   return 0;
3212 }
3213
3214 static clib_error_t *
3215 set_ip_classify_command_fn (vlib_main_t * vm,
3216                             unformat_input_t * input,
3217                             vlib_cli_command_t * cmd)
3218 {
3219   u32 table_index = ~0;
3220   int table_index_set = 0;
3221   u32 sw_if_index = ~0;
3222   int rv;
3223
3224   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3225     {
3226       if (unformat (input, "table-index %d", &table_index))
3227         table_index_set = 1;
3228       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3229                          vnet_get_main (), &sw_if_index))
3230         ;
3231       else
3232         break;
3233     }
3234
3235   if (table_index_set == 0)
3236     return clib_error_return (0, "classify table-index must be specified");
3237
3238   if (sw_if_index == ~0)
3239     return clib_error_return (0, "interface / subif must be specified");
3240
3241   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3242
3243   switch (rv)
3244     {
3245     case 0:
3246       break;
3247
3248     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3249       return clib_error_return (0, "No such interface");
3250
3251     case VNET_API_ERROR_NO_SUCH_ENTRY:
3252       return clib_error_return (0, "No such classifier table");
3253     }
3254   return 0;
3255 }
3256
3257 /*?
3258  * Assign a classification table to an interface. The classification
3259  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3260  * commands. Once the table is create, use this command to filter packets
3261  * on an interface.
3262  *
3263  * @cliexpar
3264  * Example of how to assign a classification table to an interface:
3265  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3266 ?*/
3267 /* *INDENT-OFF* */
3268 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3269 {
3270     .path = "set ip classify",
3271     .short_help =
3272     "set ip classify intfc <interface> table-index <classify-idx>",
3273     .function = set_ip_classify_command_fn,
3274 };
3275 /* *INDENT-ON* */
3276
3277 /*
3278  * fd.io coding-style-patch-verification: ON
3279  *
3280  * Local Variables:
3281  * eval: (c-set-style "gnu")
3282  * End:
3283  */