MPLS Mcast
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
54
55 /**
56  * @file
57  * @brief IPv4 Forwarding.
58  *
59  * This file contains the source code for IPv4 forwarding.
60  */
61
62 void
63 ip4_forward_next_trace (vlib_main_t * vm,
64                         vlib_node_runtime_t * node,
65                         vlib_frame_t * frame,
66                         vlib_rx_or_tx_t which_adj_index);
67
68 always_inline uword
69 ip4_lookup_inline (vlib_main_t * vm,
70                    vlib_node_runtime_t * node,
71                    vlib_frame_t * frame,
72                    int lookup_for_responses_to_locally_received_packets)
73 {
74   ip4_main_t *im = &ip4_main;
75   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
76   u32 n_left_from, n_left_to_next, *from, *to_next;
77   ip_lookup_next_t next;
78   u32 thread_index = vlib_get_thread_index ();
79
80   from = vlib_frame_vector_args (frame);
81   n_left_from = frame->n_vectors;
82   next = node->cached_next_index;
83
84   while (n_left_from > 0)
85     {
86       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
87
88       while (n_left_from >= 8 && n_left_to_next >= 4)
89         {
90           vlib_buffer_t *p0, *p1, *p2, *p3;
91           ip4_header_t *ip0, *ip1, *ip2, *ip3;
92           __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
93           ip_lookup_next_t next0, next1, next2, next3;
94           const load_balance_t *lb0, *lb1, *lb2, *lb3;
95           ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
96           ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97           ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98           u32 pi0, fib_index0, lb_index0;
99           u32 pi1, fib_index1, lb_index1;
100           u32 pi2, fib_index2, lb_index2;
101           u32 pi3, fib_index3, lb_index3;
102           flow_hash_config_t flow_hash_config0, flow_hash_config1;
103           flow_hash_config_t flow_hash_config2, flow_hash_config3;
104           u32 hash_c0, hash_c1, hash_c2, hash_c3;
105           const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
106
107           /* Prefetch next iteration. */
108           {
109             vlib_buffer_t *p4, *p5, *p6, *p7;
110
111             p4 = vlib_get_buffer (vm, from[4]);
112             p5 = vlib_get_buffer (vm, from[5]);
113             p6 = vlib_get_buffer (vm, from[6]);
114             p7 = vlib_get_buffer (vm, from[7]);
115
116             vlib_prefetch_buffer_header (p4, LOAD);
117             vlib_prefetch_buffer_header (p5, LOAD);
118             vlib_prefetch_buffer_header (p6, LOAD);
119             vlib_prefetch_buffer_header (p7, LOAD);
120
121             CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
122             CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
123             CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
124             CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
125           }
126
127           pi0 = to_next[0] = from[0];
128           pi1 = to_next[1] = from[1];
129           pi2 = to_next[2] = from[2];
130           pi3 = to_next[3] = from[3];
131
132           from += 4;
133           to_next += 4;
134           n_left_to_next -= 4;
135           n_left_from -= 4;
136
137           p0 = vlib_get_buffer (vm, pi0);
138           p1 = vlib_get_buffer (vm, pi1);
139           p2 = vlib_get_buffer (vm, pi2);
140           p3 = vlib_get_buffer (vm, pi3);
141
142           ip0 = vlib_buffer_get_current (p0);
143           ip1 = vlib_buffer_get_current (p1);
144           ip2 = vlib_buffer_get_current (p2);
145           ip3 = vlib_buffer_get_current (p3);
146
147           dst_addr0 = &ip0->dst_address;
148           dst_addr1 = &ip1->dst_address;
149           dst_addr2 = &ip2->dst_address;
150           dst_addr3 = &ip3->dst_address;
151
152           fib_index0 =
153             vec_elt (im->fib_index_by_sw_if_index,
154                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
155           fib_index1 =
156             vec_elt (im->fib_index_by_sw_if_index,
157                      vnet_buffer (p1)->sw_if_index[VLIB_RX]);
158           fib_index2 =
159             vec_elt (im->fib_index_by_sw_if_index,
160                      vnet_buffer (p2)->sw_if_index[VLIB_RX]);
161           fib_index3 =
162             vec_elt (im->fib_index_by_sw_if_index,
163                      vnet_buffer (p3)->sw_if_index[VLIB_RX]);
164           fib_index0 =
165             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
166              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
167           fib_index1 =
168             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
169              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
170           fib_index2 =
171             (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
172              (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
173           fib_index3 =
174             (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
175              (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
176
177
178           if (!lookup_for_responses_to_locally_received_packets)
179             {
180               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
181               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
182               mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
183               mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
184
185               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
186               leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
187               leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
188               leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
189             }
190
191           tcp0 = (void *) (ip0 + 1);
192           tcp1 = (void *) (ip1 + 1);
193           tcp2 = (void *) (ip2 + 1);
194           tcp3 = (void *) (ip3 + 1);
195
196           if (!lookup_for_responses_to_locally_received_packets)
197             {
198               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
199               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
200               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
201               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
202             }
203
204           if (!lookup_for_responses_to_locally_received_packets)
205             {
206               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
207               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
208               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
209               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
210             }
211
212           if (lookup_for_responses_to_locally_received_packets)
213             {
214               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
215               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
216               lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
217               lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
218             }
219           else
220             {
221               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
222               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
223               lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
224               lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
225             }
226
227           ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
228           lb0 = load_balance_get (lb_index0);
229           lb1 = load_balance_get (lb_index1);
230           lb2 = load_balance_get (lb_index2);
231           lb3 = load_balance_get (lb_index3);
232
233           /* Use flow hash to compute multipath adjacency. */
234           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
235           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
236           hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
237           hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
238           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
239             {
240               flow_hash_config0 = lb0->lb_hash_config;
241               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
242                 ip4_compute_flow_hash (ip0, flow_hash_config0);
243             }
244           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
245             {
246               flow_hash_config1 = lb1->lb_hash_config;
247               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
248                 ip4_compute_flow_hash (ip1, flow_hash_config1);
249             }
250           if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
251             {
252               flow_hash_config2 = lb2->lb_hash_config;
253               hash_c2 = vnet_buffer (p2)->ip.flow_hash =
254                 ip4_compute_flow_hash (ip2, flow_hash_config2);
255             }
256           if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
257             {
258               flow_hash_config3 = lb3->lb_hash_config;
259               hash_c3 = vnet_buffer (p3)->ip.flow_hash =
260                 ip4_compute_flow_hash (ip3, flow_hash_config3);
261             }
262
263           ASSERT (lb0->lb_n_buckets > 0);
264           ASSERT (is_pow2 (lb0->lb_n_buckets));
265           ASSERT (lb1->lb_n_buckets > 0);
266           ASSERT (is_pow2 (lb1->lb_n_buckets));
267           ASSERT (lb2->lb_n_buckets > 0);
268           ASSERT (is_pow2 (lb2->lb_n_buckets));
269           ASSERT (lb3->lb_n_buckets > 0);
270           ASSERT (is_pow2 (lb3->lb_n_buckets));
271
272           dpo0 = load_balance_get_bucket_i (lb0,
273                                             (hash_c0 &
274                                              (lb0->lb_n_buckets_minus_1)));
275           dpo1 = load_balance_get_bucket_i (lb1,
276                                             (hash_c1 &
277                                              (lb1->lb_n_buckets_minus_1)));
278           dpo2 = load_balance_get_bucket_i (lb2,
279                                             (hash_c2 &
280                                              (lb2->lb_n_buckets_minus_1)));
281           dpo3 = load_balance_get_bucket_i (lb3,
282                                             (hash_c3 &
283                                              (lb3->lb_n_buckets_minus_1)));
284
285           next0 = dpo0->dpoi_next_node;
286           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
287           next1 = dpo1->dpoi_next_node;
288           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
289           next2 = dpo2->dpoi_next_node;
290           vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
291           next3 = dpo3->dpoi_next_node;
292           vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
293
294           vlib_increment_combined_counter
295             (cm, thread_index, lb_index0, 1,
296              vlib_buffer_length_in_chain (vm, p0)
297              + sizeof (ethernet_header_t));
298           vlib_increment_combined_counter
299             (cm, thread_index, lb_index1, 1,
300              vlib_buffer_length_in_chain (vm, p1)
301              + sizeof (ethernet_header_t));
302           vlib_increment_combined_counter
303             (cm, thread_index, lb_index2, 1,
304              vlib_buffer_length_in_chain (vm, p2)
305              + sizeof (ethernet_header_t));
306           vlib_increment_combined_counter
307             (cm, thread_index, lb_index3, 1,
308              vlib_buffer_length_in_chain (vm, p3)
309              + sizeof (ethernet_header_t));
310
311           vlib_validate_buffer_enqueue_x4 (vm, node, next,
312                                            to_next, n_left_to_next,
313                                            pi0, pi1, pi2, pi3,
314                                            next0, next1, next2, next3);
315         }
316
317       while (n_left_from > 0 && n_left_to_next > 0)
318         {
319           vlib_buffer_t *p0;
320           ip4_header_t *ip0;
321           __attribute__ ((unused)) tcp_header_t *tcp0;
322           ip_lookup_next_t next0;
323           const load_balance_t *lb0;
324           ip4_fib_mtrie_t *mtrie0;
325           ip4_fib_mtrie_leaf_t leaf0;
326           ip4_address_t *dst_addr0;
327           u32 pi0, fib_index0, lbi0;
328           flow_hash_config_t flow_hash_config0;
329           const dpo_id_t *dpo0;
330           u32 hash_c0;
331
332           pi0 = from[0];
333           to_next[0] = pi0;
334
335           p0 = vlib_get_buffer (vm, pi0);
336
337           ip0 = vlib_buffer_get_current (p0);
338
339           dst_addr0 = &ip0->dst_address;
340
341           fib_index0 =
342             vec_elt (im->fib_index_by_sw_if_index,
343                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
344           fib_index0 =
345             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
346              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
347
348           if (!lookup_for_responses_to_locally_received_packets)
349             {
350               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
351
352               leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
353             }
354
355           tcp0 = (void *) (ip0 + 1);
356
357           if (!lookup_for_responses_to_locally_received_packets)
358             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
359
360           if (!lookup_for_responses_to_locally_received_packets)
361             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
362
363           if (lookup_for_responses_to_locally_received_packets)
364             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
365           else
366             {
367               /* Handle default route. */
368               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
369             }
370
371           ASSERT (lbi0);
372           lb0 = load_balance_get (lbi0);
373
374           /* Use flow hash to compute multipath adjacency. */
375           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
376           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
377             {
378               flow_hash_config0 = lb0->lb_hash_config;
379
380               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
381                 ip4_compute_flow_hash (ip0, flow_hash_config0);
382             }
383
384           ASSERT (lb0->lb_n_buckets > 0);
385           ASSERT (is_pow2 (lb0->lb_n_buckets));
386
387           dpo0 = load_balance_get_bucket_i (lb0,
388                                             (hash_c0 &
389                                              (lb0->lb_n_buckets_minus_1)));
390
391           next0 = dpo0->dpoi_next_node;
392           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
393
394           vlib_increment_combined_counter
395             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
396
397           from += 1;
398           to_next += 1;
399           n_left_to_next -= 1;
400           n_left_from -= 1;
401
402           if (PREDICT_FALSE (next0 != next))
403             {
404               n_left_to_next += 1;
405               vlib_put_next_frame (vm, node, next, n_left_to_next);
406               next = next0;
407               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
408               to_next[0] = pi0;
409               to_next += 1;
410               n_left_to_next -= 1;
411             }
412         }
413
414       vlib_put_next_frame (vm, node, next, n_left_to_next);
415     }
416
417   if (node->flags & VLIB_NODE_FLAG_TRACE)
418     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
419
420   return frame->n_vectors;
421 }
422
423 /** @brief IPv4 lookup node.
424     @node ip4-lookup
425
426     This is the main IPv4 lookup dispatch node.
427
428     @param vm vlib_main_t corresponding to the current thread
429     @param node vlib_node_runtime_t
430     @param frame vlib_frame_t whose contents should be dispatched
431
432     @par Graph mechanics: buffer metadata, next index usage
433
434     @em Uses:
435     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
436         - Indicates the @c sw_if_index value of the interface that the
437           packet was received on.
438     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
439         - When the value is @c ~0 then the node performs a longest prefix
440           match (LPM) for the packet destination address in the FIB attached
441           to the receive interface.
442         - Otherwise perform LPM for the packet destination address in the
443           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
444           value (0, 1, ...) and not a VRF id.
445
446     @em Sets:
447     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
448         - The lookup result adjacency index.
449
450     <em>Next Index:</em>
451     - Dispatches the packet to the node index found in
452       ip_adjacency_t @c adj->lookup_next_index
453       (where @c adj is the lookup result adjacency).
454 */
455 static uword
456 ip4_lookup (vlib_main_t * vm,
457             vlib_node_runtime_t * node, vlib_frame_t * frame)
458 {
459   return ip4_lookup_inline (vm, node, frame,
460                             /* lookup_for_responses_to_locally_received_packets */
461                             0);
462
463 }
464
465 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
466
467 VLIB_REGISTER_NODE (ip4_lookup_node) =
468 {
469 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
470     sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
471     IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
472
473 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
474
475 always_inline uword
476 ip4_load_balance (vlib_main_t * vm,
477                   vlib_node_runtime_t * node, vlib_frame_t * frame)
478 {
479   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
480   u32 n_left_from, n_left_to_next, *from, *to_next;
481   ip_lookup_next_t next;
482   u32 thread_index = vlib_get_thread_index ();
483
484   from = vlib_frame_vector_args (frame);
485   n_left_from = frame->n_vectors;
486   next = node->cached_next_index;
487
488   if (node->flags & VLIB_NODE_FLAG_TRACE)
489     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
490
491   while (n_left_from > 0)
492     {
493       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
494
495
496       while (n_left_from >= 4 && n_left_to_next >= 2)
497         {
498           ip_lookup_next_t next0, next1;
499           const load_balance_t *lb0, *lb1;
500           vlib_buffer_t *p0, *p1;
501           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
502           const ip4_header_t *ip0, *ip1;
503           const dpo_id_t *dpo0, *dpo1;
504
505           /* Prefetch next iteration. */
506           {
507             vlib_buffer_t *p2, *p3;
508
509             p2 = vlib_get_buffer (vm, from[2]);
510             p3 = vlib_get_buffer (vm, from[3]);
511
512             vlib_prefetch_buffer_header (p2, STORE);
513             vlib_prefetch_buffer_header (p3, STORE);
514
515             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
516             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
517           }
518
519           pi0 = to_next[0] = from[0];
520           pi1 = to_next[1] = from[1];
521
522           from += 2;
523           n_left_from -= 2;
524           to_next += 2;
525           n_left_to_next -= 2;
526
527           p0 = vlib_get_buffer (vm, pi0);
528           p1 = vlib_get_buffer (vm, pi1);
529
530           ip0 = vlib_buffer_get_current (p0);
531           ip1 = vlib_buffer_get_current (p1);
532           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
533           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
534
535           lb0 = load_balance_get (lbi0);
536           lb1 = load_balance_get (lbi1);
537
538           /*
539            * this node is for via FIBs we can re-use the hash value from the
540            * to node if present.
541            * We don't want to use the same hash value at each level in the recursion
542            * graph as that would lead to polarisation
543            */
544           hc0 = hc1 = 0;
545
546           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
547             {
548               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
549                 {
550                   hc0 = vnet_buffer (p0)->ip.flow_hash =
551                     vnet_buffer (p0)->ip.flow_hash >> 1;
552                 }
553               else
554                 {
555                   hc0 = vnet_buffer (p0)->ip.flow_hash =
556                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
557                 }
558             }
559           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
560             {
561               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
562                 {
563                   hc1 = vnet_buffer (p1)->ip.flow_hash =
564                     vnet_buffer (p1)->ip.flow_hash >> 1;
565                 }
566               else
567                 {
568                   hc1 = vnet_buffer (p1)->ip.flow_hash =
569                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
570                 }
571             }
572
573           dpo0 =
574             load_balance_get_bucket_i (lb0,
575                                        hc0 & (lb0->lb_n_buckets_minus_1));
576           dpo1 =
577             load_balance_get_bucket_i (lb1,
578                                        hc1 & (lb1->lb_n_buckets_minus_1));
579
580           next0 = dpo0->dpoi_next_node;
581           next1 = dpo1->dpoi_next_node;
582
583           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
584           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
585
586           vlib_increment_combined_counter
587             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
588           vlib_increment_combined_counter
589             (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
590
591           vlib_validate_buffer_enqueue_x2 (vm, node, next,
592                                            to_next, n_left_to_next,
593                                            pi0, pi1, next0, next1);
594         }
595
596       while (n_left_from > 0 && n_left_to_next > 0)
597         {
598           ip_lookup_next_t next0;
599           const load_balance_t *lb0;
600           vlib_buffer_t *p0;
601           u32 pi0, lbi0, hc0;
602           const ip4_header_t *ip0;
603           const dpo_id_t *dpo0;
604
605           pi0 = from[0];
606           to_next[0] = pi0;
607           from += 1;
608           to_next += 1;
609           n_left_to_next -= 1;
610           n_left_from -= 1;
611
612           p0 = vlib_get_buffer (vm, pi0);
613
614           ip0 = vlib_buffer_get_current (p0);
615           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
616
617           lb0 = load_balance_get (lbi0);
618
619           hc0 = 0;
620           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
621             {
622               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
623                 {
624                   hc0 = vnet_buffer (p0)->ip.flow_hash =
625                     vnet_buffer (p0)->ip.flow_hash >> 1;
626                 }
627               else
628                 {
629                   hc0 = vnet_buffer (p0)->ip.flow_hash =
630                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
631                 }
632             }
633
634           dpo0 =
635             load_balance_get_bucket_i (lb0,
636                                        hc0 & (lb0->lb_n_buckets_minus_1));
637
638           next0 = dpo0->dpoi_next_node;
639           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
640
641           vlib_increment_combined_counter
642             (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
643
644           vlib_validate_buffer_enqueue_x1 (vm, node, next,
645                                            to_next, n_left_to_next,
646                                            pi0, next0);
647         }
648
649       vlib_put_next_frame (vm, node, next, n_left_to_next);
650     }
651
652   return frame->n_vectors;
653 }
654
655 VLIB_REGISTER_NODE (ip4_load_balance_node) =
656 {
657 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
658     sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
659     format_ip4_lookup_trace,};
660
661 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
662
663 /* get first interface address */
664 ip4_address_t *
665 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
666                              ip_interface_address_t ** result_ia)
667 {
668   ip_lookup_main_t *lm = &im->lookup_main;
669   ip_interface_address_t *ia = 0;
670   ip4_address_t *result = 0;
671
672   /* *INDENT-OFF* */
673   foreach_ip_interface_address
674     (lm, ia, sw_if_index,
675      1 /* honor unnumbered */ ,
676      ({
677        ip4_address_t * a =
678          ip_interface_address_get_address (lm, ia);
679        result = a;
680        break;
681      }));
682   /* *INDENT-OFF* */
683   if (result_ia)
684     *result_ia = result ? ia : 0;
685   return result;
686 }
687
688 static void
689 ip4_add_interface_routes (u32 sw_if_index,
690                           ip4_main_t * im, u32 fib_index,
691                           ip_interface_address_t * a)
692 {
693   ip_lookup_main_t *lm = &im->lookup_main;
694   ip4_address_t *address = ip_interface_address_get_address (lm, a);
695   fib_prefix_t pfx = {
696     .fp_len = a->address_length,
697     .fp_proto = FIB_PROTOCOL_IP4,
698     .fp_addr.ip4 = *address,
699   };
700
701   a->neighbor_probe_adj_index = ~0;
702
703   if (pfx.fp_len <= 30)
704     {
705       /* a /30 or shorter - add a glean for the network address */
706       fib_node_index_t fei;
707
708       fei = fib_table_entry_update_one_path (fib_index, &pfx,
709                                              FIB_SOURCE_INTERFACE,
710                                              (FIB_ENTRY_FLAG_CONNECTED |
711                                               FIB_ENTRY_FLAG_ATTACHED),
712                                              FIB_PROTOCOL_IP4,
713                                              /* No next-hop address */
714                                              NULL,
715                                              sw_if_index,
716                                              // invalid FIB index
717                                              ~0,
718                                              1,
719                                              // no out-label stack
720                                              NULL,
721                                              FIB_ROUTE_PATH_FLAG_NONE);
722       a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
723
724       /* Add the two broadcast addresses as drop */
725       fib_prefix_t net_pfx = {
726         .fp_len = 32,
727         .fp_proto = FIB_PROTOCOL_IP4,
728         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
729       };
730       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
731         fib_table_entry_special_add(fib_index,
732                                     &net_pfx,
733                                     FIB_SOURCE_INTERFACE,
734                                     (FIB_ENTRY_FLAG_DROP |
735                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT),
736                                     ADJ_INDEX_INVALID);
737       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
738       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
739         fib_table_entry_special_add(fib_index,
740                                     &net_pfx,
741                                     FIB_SOURCE_INTERFACE,
742                                     (FIB_ENTRY_FLAG_DROP |
743                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT),
744                                     ADJ_INDEX_INVALID);
745     }
746   else if (pfx.fp_len == 31)
747     {
748       u32 mask = clib_host_to_net_u32(1);
749       fib_prefix_t net_pfx = pfx;
750
751       net_pfx.fp_len = 32;
752       net_pfx.fp_addr.ip4.as_u32 ^= mask;
753
754       /* a /31 - add the other end as an attached host */
755       fib_table_entry_update_one_path (fib_index, &net_pfx,
756                                        FIB_SOURCE_INTERFACE,
757                                        (FIB_ENTRY_FLAG_ATTACHED),
758                                        FIB_PROTOCOL_IP4,
759                                        &net_pfx.fp_addr,
760                                        sw_if_index,
761                                        // invalid FIB index
762                                        ~0,
763                                        1,
764                                        NULL,
765                                        FIB_ROUTE_PATH_FLAG_NONE);
766     }
767   pfx.fp_len = 32;
768
769   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
770     {
771       u32 classify_table_index =
772         lm->classify_table_index_by_sw_if_index[sw_if_index];
773       if (classify_table_index != (u32) ~ 0)
774         {
775           dpo_id_t dpo = DPO_INVALID;
776
777           dpo_set (&dpo,
778                    DPO_CLASSIFY,
779                    DPO_PROTO_IP4,
780                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
781
782           fib_table_entry_special_dpo_add (fib_index,
783                                            &pfx,
784                                            FIB_SOURCE_CLASSIFY,
785                                            FIB_ENTRY_FLAG_NONE, &dpo);
786           dpo_reset (&dpo);
787         }
788     }
789
790   fib_table_entry_update_one_path (fib_index, &pfx,
791                                    FIB_SOURCE_INTERFACE,
792                                    (FIB_ENTRY_FLAG_CONNECTED |
793                                     FIB_ENTRY_FLAG_LOCAL),
794                                    FIB_PROTOCOL_IP4,
795                                    &pfx.fp_addr,
796                                    sw_if_index,
797                                    // invalid FIB index
798                                    ~0,
799                                    1, NULL,
800                                    FIB_ROUTE_PATH_FLAG_NONE);
801 }
802
803 static void
804 ip4_del_interface_routes (ip4_main_t * im,
805                           u32 fib_index,
806                           ip4_address_t * address, u32 address_length)
807 {
808   fib_prefix_t pfx = {
809     .fp_len = address_length,
810     .fp_proto = FIB_PROTOCOL_IP4,
811     .fp_addr.ip4 = *address,
812   };
813
814   if (pfx.fp_len <= 30)
815     {
816       fib_prefix_t net_pfx = {
817         .fp_len = 32,
818         .fp_proto = FIB_PROTOCOL_IP4,
819         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
820       };
821       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
822         fib_table_entry_special_remove(fib_index,
823                                        &net_pfx,
824                                        FIB_SOURCE_INTERFACE);
825       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
826       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
827         fib_table_entry_special_remove(fib_index,
828                                        &net_pfx,
829                                        FIB_SOURCE_INTERFACE);
830       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
831     }
832     else if (pfx.fp_len == 31)
833     {
834       u32 mask = clib_host_to_net_u32(1);
835       fib_prefix_t net_pfx = pfx;
836
837       net_pfx.fp_len = 32;
838       net_pfx.fp_addr.ip4.as_u32 ^= mask;
839
840       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
841     }
842
843   pfx.fp_len = 32;
844   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
845 }
846
847 void
848 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
849 {
850   ip4_main_t *im = &ip4_main;
851
852   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
853
854   /*
855    * enable/disable only on the 1<->0 transition
856    */
857   if (is_enable)
858     {
859       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
860         return;
861     }
862   else
863     {
864       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
865       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
866         return;
867     }
868   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
869                                !is_enable, 0, 0);
870
871
872   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
873                                sw_if_index, !is_enable, 0, 0);
874 }
875
876 static clib_error_t *
877 ip4_add_del_interface_address_internal (vlib_main_t * vm,
878                                         u32 sw_if_index,
879                                         ip4_address_t * address,
880                                         u32 address_length, u32 is_del)
881 {
882   vnet_main_t *vnm = vnet_get_main ();
883   ip4_main_t *im = &ip4_main;
884   ip_lookup_main_t *lm = &im->lookup_main;
885   clib_error_t *error = 0;
886   u32 if_address_index, elts_before;
887   ip4_address_fib_t ip4_af, *addr_fib = 0;
888
889   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
890   ip4_addr_fib_init (&ip4_af, address,
891                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
892   vec_add1 (addr_fib, ip4_af);
893
894   /* FIXME-LATER
895    * there is no support for adj-fib handling in the presence of overlapping
896    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
897    * most routers do.
898    */
899   /* *INDENT-OFF* */
900   if (!is_del)
901     {
902       /* When adding an address check that it does not conflict
903          with an existing address. */
904       ip_interface_address_t *ia;
905       foreach_ip_interface_address
906         (&im->lookup_main, ia, sw_if_index,
907          0 /* honor unnumbered */ ,
908          ({
909            ip4_address_t * x =
910              ip_interface_address_get_address
911              (&im->lookup_main, ia);
912            if (ip4_destination_matches_route
913                (im, address, x, ia->address_length) ||
914                ip4_destination_matches_route (im,
915                                               x,
916                                               address,
917                                               address_length))
918              return
919                clib_error_create
920                ("failed to add %U which conflicts with %U for interface %U",
921                 format_ip4_address_and_length, address,
922                 address_length,
923                 format_ip4_address_and_length, x,
924                 ia->address_length,
925                 format_vnet_sw_if_index_name, vnm,
926                 sw_if_index);
927          }));
928     }
929   /* *INDENT-ON* */
930
931   elts_before = pool_elts (lm->if_address_pool);
932
933   error = ip_interface_address_add_del
934     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
935   if (error)
936     goto done;
937
938   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
939
940   if (is_del)
941     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
942   else
943     ip4_add_interface_routes (sw_if_index,
944                               im, ip4_af.fib_index,
945                               pool_elt_at_index
946                               (lm->if_address_pool, if_address_index));
947
948   /* If pool did not grow/shrink: add duplicate address. */
949   if (elts_before != pool_elts (lm->if_address_pool))
950     {
951       ip4_add_del_interface_address_callback_t *cb;
952       vec_foreach (cb, im->add_del_interface_address_callbacks)
953         cb->function (im, cb->function_opaque, sw_if_index,
954                       address, address_length, if_address_index, is_del);
955     }
956
957 done:
958   vec_free (addr_fib);
959   return error;
960 }
961
962 clib_error_t *
963 ip4_add_del_interface_address (vlib_main_t * vm,
964                                u32 sw_if_index,
965                                ip4_address_t * address,
966                                u32 address_length, u32 is_del)
967 {
968   return ip4_add_del_interface_address_internal
969     (vm, sw_if_index, address, address_length, is_del);
970 }
971
972 /* Built-in ip4 unicast rx feature path definition */
973 /* *INDENT-OFF* */
974 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
975 {
976   .arc_name = "ip4-unicast",
977   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
978   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
979 };
980
981 VNET_FEATURE_INIT (ip4_flow_classify, static) =
982 {
983   .arc_name = "ip4-unicast",
984   .node_name = "ip4-flow-classify",
985   .runs_before = VNET_FEATURES ("ip4-inacl"),
986 };
987
988 VNET_FEATURE_INIT (ip4_inacl, static) =
989 {
990   .arc_name = "ip4-unicast",
991   .node_name = "ip4-inacl",
992   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
993 };
994
995 VNET_FEATURE_INIT (ip4_source_check_1, static) =
996 {
997   .arc_name = "ip4-unicast",
998   .node_name = "ip4-source-check-via-rx",
999   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
1000 };
1001
1002 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1003 {
1004   .arc_name = "ip4-unicast",
1005   .node_name = "ip4-source-check-via-any",
1006   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1007 };
1008
1009 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1010 {
1011   .arc_name = "ip4-unicast",
1012   .node_name = "ip4-source-and-port-range-check-rx",
1013   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1014 };
1015
1016 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1017 {
1018   .arc_name = "ip4-unicast",
1019   .node_name = "ip4-policer-classify",
1020   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1021 };
1022
1023 VNET_FEATURE_INIT (ip4_ipsec, static) =
1024 {
1025   .arc_name = "ip4-unicast",
1026   .node_name = "ipsec-input-ip4",
1027   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1028 };
1029
1030 VNET_FEATURE_INIT (ip4_vpath, static) =
1031 {
1032   .arc_name = "ip4-unicast",
1033   .node_name = "vpath-input-ip4",
1034   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1035 };
1036
1037 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1038 {
1039   .arc_name = "ip4-unicast",
1040   .node_name = "ip4-vxlan-bypass",
1041   .runs_before = VNET_FEATURES ("ip4-lookup"),
1042 };
1043
1044 VNET_FEATURE_INIT (ip4_drop, static) =
1045 {
1046   .arc_name = "ip4-unicast",
1047   .node_name = "ip4-drop",
1048   .runs_before = VNET_FEATURES ("ip4-lookup"),
1049 };
1050
1051 VNET_FEATURE_INIT (ip4_lookup, static) =
1052 {
1053   .arc_name = "ip4-unicast",
1054   .node_name = "ip4-lookup",
1055   .runs_before = 0,     /* not before any other features */
1056 };
1057
1058 /* Built-in ip4 multicast rx feature path definition */
1059 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1060 {
1061   .arc_name = "ip4-multicast",
1062   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1063   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1064 };
1065
1066 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1067 {
1068   .arc_name = "ip4-multicast",
1069   .node_name = "vpath-input-ip4",
1070   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1071 };
1072
1073 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1074 {
1075   .arc_name = "ip4-multicast",
1076   .node_name = "ip4-drop",
1077   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1078 };
1079
1080 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1081 {
1082   .arc_name = "ip4-multicast",
1083   .node_name = "ip4-mfib-forward-lookup",
1084   .runs_before = 0,     /* last feature */
1085 };
1086
1087 /* Source and port-range check ip4 tx feature path definition */
1088 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1089 {
1090   .arc_name = "ip4-output",
1091   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1092   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1093 };
1094
1095 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1096 {
1097   .arc_name = "ip4-output",
1098   .node_name = "ip4-source-and-port-range-check-tx",
1099   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1100 };
1101
1102 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1103 {
1104   .arc_name = "ip4-output",
1105   .node_name = "ipsec-output-ip4",
1106   .runs_before = VNET_FEATURES ("interface-output"),
1107 };
1108
1109 /* Built-in ip4 tx feature path definition */
1110 VNET_FEATURE_INIT (ip4_interface_output, static) =
1111 {
1112   .arc_name = "ip4-output",
1113   .node_name = "interface-output",
1114   .runs_before = 0,     /* not before any other features */
1115 };
1116 /* *INDENT-ON* */
1117
1118 static clib_error_t *
1119 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1120 {
1121   ip4_main_t *im = &ip4_main;
1122
1123   /* Fill in lookup tables with default table (0). */
1124   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1125   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1126
1127   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1128                                is_add, 0, 0);
1129
1130   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1131                                is_add, 0, 0);
1132
1133   return /* no error */ 0;
1134 }
1135
1136 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1137
1138 /* Global IP4 main. */
1139 ip4_main_t ip4_main;
1140
1141 clib_error_t *
1142 ip4_lookup_init (vlib_main_t * vm)
1143 {
1144   ip4_main_t *im = &ip4_main;
1145   clib_error_t *error;
1146   uword i;
1147
1148   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1149     return error;
1150
1151   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1152     {
1153       u32 m;
1154
1155       if (i < 32)
1156         m = pow2_mask (i) << (32 - i);
1157       else
1158         m = ~0;
1159       im->fib_masks[i] = clib_host_to_net_u32 (m);
1160     }
1161
1162   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1163
1164   /* Create FIB with index 0 and table id of 0. */
1165   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1166   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1167
1168   {
1169     pg_node_t *pn;
1170     pn = pg_get_node (ip4_lookup_node.index);
1171     pn->unformat_edit = unformat_pg_ip4_header;
1172   }
1173
1174   {
1175     ethernet_arp_header_t h;
1176
1177     memset (&h, 0, sizeof (h));
1178
1179     /* Set target ethernet address to all zeros. */
1180     memset (h.ip4_over_ethernet[1].ethernet, 0,
1181             sizeof (h.ip4_over_ethernet[1].ethernet));
1182
1183 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1184 #define _8(f,v) h.f = v;
1185     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1186     _16 (l3_type, ETHERNET_TYPE_IP4);
1187     _8 (n_l2_address_bytes, 6);
1188     _8 (n_l3_address_bytes, 4);
1189     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1190 #undef _16
1191 #undef _8
1192
1193     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1194                                /* data */ &h,
1195                                sizeof (h),
1196                                /* alloc chunk size */ 8,
1197                                "ip4 arp");
1198   }
1199
1200   return error;
1201 }
1202
1203 VLIB_INIT_FUNCTION (ip4_lookup_init);
1204
1205 typedef struct
1206 {
1207   /* Adjacency taken. */
1208   u32 dpo_index;
1209   u32 flow_hash;
1210   u32 fib_index;
1211
1212   /* Packet data, possibly *after* rewrite. */
1213   u8 packet_data[64 - 1 * sizeof (u32)];
1214 }
1215 ip4_forward_next_trace_t;
1216
1217 u8 *
1218 format_ip4_forward_next_trace (u8 * s, va_list * args)
1219 {
1220   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1221   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1222   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1223   uword indent = format_get_indent (s);
1224   s = format (s, "%U%U",
1225               format_white_space, indent,
1226               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1227   return s;
1228 }
1229
1230 static u8 *
1231 format_ip4_lookup_trace (u8 * s, va_list * args)
1232 {
1233   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1234   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1235   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1236   uword indent = format_get_indent (s);
1237
1238   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1239               t->fib_index, t->dpo_index, t->flow_hash);
1240   s = format (s, "\n%U%U",
1241               format_white_space, indent,
1242               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1243   return s;
1244 }
1245
1246 static u8 *
1247 format_ip4_rewrite_trace (u8 * s, va_list * args)
1248 {
1249   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1250   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1251   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1252   uword indent = format_get_indent (s);
1253
1254   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1255               t->fib_index, t->dpo_index, format_ip_adjacency,
1256               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1257   s = format (s, "\n%U%U",
1258               format_white_space, indent,
1259               format_ip_adjacency_packet_data,
1260               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1261   return s;
1262 }
1263
1264 /* Common trace function for all ip4-forward next nodes. */
1265 void
1266 ip4_forward_next_trace (vlib_main_t * vm,
1267                         vlib_node_runtime_t * node,
1268                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1269 {
1270   u32 *from, n_left;
1271   ip4_main_t *im = &ip4_main;
1272
1273   n_left = frame->n_vectors;
1274   from = vlib_frame_vector_args (frame);
1275
1276   while (n_left >= 4)
1277     {
1278       u32 bi0, bi1;
1279       vlib_buffer_t *b0, *b1;
1280       ip4_forward_next_trace_t *t0, *t1;
1281
1282       /* Prefetch next iteration. */
1283       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1284       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1285
1286       bi0 = from[0];
1287       bi1 = from[1];
1288
1289       b0 = vlib_get_buffer (vm, bi0);
1290       b1 = vlib_get_buffer (vm, bi1);
1291
1292       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1293         {
1294           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1295           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1296           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1297           t0->fib_index =
1298             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1299              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1300             vec_elt (im->fib_index_by_sw_if_index,
1301                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1302
1303           clib_memcpy (t0->packet_data,
1304                        vlib_buffer_get_current (b0),
1305                        sizeof (t0->packet_data));
1306         }
1307       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1308         {
1309           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1310           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1311           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1312           t1->fib_index =
1313             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1314              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1315             vec_elt (im->fib_index_by_sw_if_index,
1316                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1317           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1318                        sizeof (t1->packet_data));
1319         }
1320       from += 2;
1321       n_left -= 2;
1322     }
1323
1324   while (n_left >= 1)
1325     {
1326       u32 bi0;
1327       vlib_buffer_t *b0;
1328       ip4_forward_next_trace_t *t0;
1329
1330       bi0 = from[0];
1331
1332       b0 = vlib_get_buffer (vm, bi0);
1333
1334       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1335         {
1336           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1337           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1338           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1339           t0->fib_index =
1340             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1341              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1342             vec_elt (im->fib_index_by_sw_if_index,
1343                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1344           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1345                        sizeof (t0->packet_data));
1346         }
1347       from += 1;
1348       n_left -= 1;
1349     }
1350 }
1351
1352 static uword
1353 ip4_drop_or_punt (vlib_main_t * vm,
1354                   vlib_node_runtime_t * node,
1355                   vlib_frame_t * frame, ip4_error_t error_code)
1356 {
1357   u32 *buffers = vlib_frame_vector_args (frame);
1358   uword n_packets = frame->n_vectors;
1359
1360   vlib_error_drop_buffers (vm, node, buffers,
1361                            /* stride */ 1,
1362                            n_packets,
1363                            /* next */ 0,
1364                            ip4_input_node.index, error_code);
1365
1366   if (node->flags & VLIB_NODE_FLAG_TRACE)
1367     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1368
1369   return n_packets;
1370 }
1371
1372 static uword
1373 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1374 {
1375   return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1376 }
1377
1378 static uword
1379 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1380 {
1381   return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1382 }
1383
1384 /* *INDENT-OFF* */
1385 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1386 {
1387   .function = ip4_drop,.
1388   name = "ip4-drop",
1389   .vector_size = sizeof (u32),
1390   .format_trace = format_ip4_forward_next_trace,
1391   .n_next_nodes = 1,
1392   .next_nodes = {
1393     [0] = "error-drop",
1394   },
1395 };
1396
1397 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1398
1399 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1400 {
1401   .function = ip4_punt,
1402   .name = "ip4-punt",
1403   .vector_size = sizeof (u32),
1404   .format_trace = format_ip4_forward_next_trace,
1405   .n_next_nodes = 1,
1406   .next_nodes = {
1407     [0] = "error-punt",
1408   },
1409 };
1410
1411 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1412 /* *INDENT-ON */
1413
1414 /* Compute TCP/UDP/ICMP4 checksum in software. */
1415 u16
1416 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1417                               ip4_header_t * ip0)
1418 {
1419   ip_csum_t sum0;
1420   u32 ip_header_length, payload_length_host_byte_order;
1421   u32 n_this_buffer, n_bytes_left;
1422   u16 sum16;
1423   void *data_this_buffer;
1424
1425   /* Initialize checksum with ip header. */
1426   ip_header_length = ip4_header_bytes (ip0);
1427   payload_length_host_byte_order =
1428     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1429   sum0 =
1430     clib_host_to_net_u32 (payload_length_host_byte_order +
1431                           (ip0->protocol << 16));
1432
1433   if (BITS (uword) == 32)
1434     {
1435       sum0 =
1436         ip_csum_with_carry (sum0,
1437                             clib_mem_unaligned (&ip0->src_address, u32));
1438       sum0 =
1439         ip_csum_with_carry (sum0,
1440                             clib_mem_unaligned (&ip0->dst_address, u32));
1441     }
1442   else
1443     sum0 =
1444       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1445
1446   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1447   data_this_buffer = (void *) ip0 + ip_header_length;
1448   if (n_this_buffer + ip_header_length > p0->current_length)
1449     n_this_buffer =
1450       p0->current_length >
1451       ip_header_length ? p0->current_length - ip_header_length : 0;
1452   while (1)
1453     {
1454       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1455       n_bytes_left -= n_this_buffer;
1456       if (n_bytes_left == 0)
1457         break;
1458
1459       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1460       p0 = vlib_get_buffer (vm, p0->next_buffer);
1461       data_this_buffer = vlib_buffer_get_current (p0);
1462       n_this_buffer = p0->current_length;
1463     }
1464
1465   sum16 = ~ip_csum_fold (sum0);
1466
1467   return sum16;
1468 }
1469
1470 u32
1471 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1472 {
1473   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1474   udp_header_t *udp0;
1475   u16 sum16;
1476
1477   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1478           || ip0->protocol == IP_PROTOCOL_UDP);
1479
1480   udp0 = (void *) (ip0 + 1);
1481   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1482     {
1483       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1484                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1485       return p0->flags;
1486     }
1487
1488   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1489
1490   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1491                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1492
1493   return p0->flags;
1494 }
1495
1496 /* *INDENT-OFF* */
1497 VNET_FEATURE_ARC_INIT (ip4_local) =
1498 {
1499   .arc_name  = "ip4-local",
1500   .start_nodes = VNET_FEATURES ("ip4-local"),
1501 };
1502 /* *INDENT-ON* */
1503
1504 static inline uword
1505 ip4_local_inline (vlib_main_t * vm,
1506                   vlib_node_runtime_t * node,
1507                   vlib_frame_t * frame, int head_of_feature_arc)
1508 {
1509   ip4_main_t *im = &ip4_main;
1510   ip_lookup_main_t *lm = &im->lookup_main;
1511   ip_local_next_t next_index;
1512   u32 *from, *to_next, n_left_from, n_left_to_next;
1513   vlib_node_runtime_t *error_node =
1514     vlib_node_get_runtime (vm, ip4_input_node.index);
1515   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1516
1517   from = vlib_frame_vector_args (frame);
1518   n_left_from = frame->n_vectors;
1519   next_index = node->cached_next_index;
1520
1521   if (node->flags & VLIB_NODE_FLAG_TRACE)
1522     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1523
1524   while (n_left_from > 0)
1525     {
1526       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1527
1528       while (n_left_from >= 4 && n_left_to_next >= 2)
1529         {
1530           vlib_buffer_t *p0, *p1;
1531           ip4_header_t *ip0, *ip1;
1532           udp_header_t *udp0, *udp1;
1533           ip4_fib_mtrie_t *mtrie0, *mtrie1;
1534           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1535           const dpo_id_t *dpo0, *dpo1;
1536           const load_balance_t *lb0, *lb1;
1537           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1538           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1539           i32 len_diff0, len_diff1;
1540           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1541           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1542           u32 sw_if_index0, sw_if_index1;
1543
1544           pi0 = to_next[0] = from[0];
1545           pi1 = to_next[1] = from[1];
1546           from += 2;
1547           n_left_from -= 2;
1548           to_next += 2;
1549           n_left_to_next -= 2;
1550
1551           next0 = next1 = IP_LOCAL_NEXT_DROP;
1552
1553           p0 = vlib_get_buffer (vm, pi0);
1554           p1 = vlib_get_buffer (vm, pi1);
1555
1556           ip0 = vlib_buffer_get_current (p0);
1557           ip1 = vlib_buffer_get_current (p1);
1558
1559           vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1560           vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1561
1562           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1563           sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1564
1565           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1566           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1567
1568           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1569           fib_index0 =
1570             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1571              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1572
1573           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1574           fib_index1 =
1575             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1576              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1577
1578           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1579           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1580
1581           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1582           leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1583
1584           /* Treat IP frag packets as "experimental" protocol for now
1585              until support of IP frag reassembly is implemented */
1586           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1587           proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1588
1589           if (head_of_feature_arc == 0)
1590             {
1591               error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1592               goto skip_checks;
1593             }
1594
1595           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1596           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1597           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1598           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1599
1600           flags0 = p0->flags;
1601           flags1 = p1->flags;
1602
1603           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1604           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1605
1606           udp0 = ip4_next_header (ip0);
1607           udp1 = ip4_next_header (ip1);
1608
1609           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1610           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1611           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1612
1613           /* Verify UDP length. */
1614           ip_len0 = clib_net_to_host_u16 (ip0->length);
1615           ip_len1 = clib_net_to_host_u16 (ip1->length);
1616           udp_len0 = clib_net_to_host_u16 (udp0->length);
1617           udp_len1 = clib_net_to_host_u16 (udp1->length);
1618
1619           len_diff0 = ip_len0 - udp_len0;
1620           len_diff1 = ip_len1 - udp_len1;
1621
1622           len_diff0 = is_udp0 ? len_diff0 : 0;
1623           len_diff1 = is_udp1 ? len_diff1 : 0;
1624
1625           if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1626                                & good_tcp_udp0 & good_tcp_udp1)))
1627             {
1628               if (is_tcp_udp0)
1629                 {
1630                   if (is_tcp_udp0
1631                       && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1632                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1633                   good_tcp_udp0 =
1634                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1635                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1636                 }
1637               if (is_tcp_udp1)
1638                 {
1639                   if (is_tcp_udp1
1640                       && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1641                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1642                   good_tcp_udp1 =
1643                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1644                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1645                 }
1646             }
1647
1648           good_tcp_udp0 &= len_diff0 >= 0;
1649           good_tcp_udp1 &= len_diff1 >= 0;
1650
1651           leaf0 =
1652             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1653           leaf1 =
1654             ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1655
1656           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1657
1658           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1659           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1660
1661           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1662           error0 = (is_tcp_udp0 && !good_tcp_udp0
1663                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1664           error1 = (is_tcp_udp1 && !good_tcp_udp1
1665                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1666
1667           leaf0 =
1668             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1669           leaf1 =
1670             ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1671
1672           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1673             ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1674           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1675
1676           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1677             ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1678           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1679
1680           lb0 = load_balance_get (lbi0);
1681           lb1 = load_balance_get (lbi1);
1682           dpo0 = load_balance_get_bucket_i (lb0, 0);
1683           dpo1 = load_balance_get_bucket_i (lb1, 0);
1684
1685           /*
1686            * Must have a route to source otherwise we drop the packet.
1687            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1688            *
1689            * The checks are:
1690            *  - the source is a recieve => it's from us => bogus, do this
1691            *    first since it sets a different error code.
1692            *  - uRPF check for any route to source - accept if passes.
1693            *  - allow packets destined to the broadcast address from unknown sources
1694            */
1695           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1696                      dpo0->dpoi_type == DPO_RECEIVE) ?
1697                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1698           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1699                      !fib_urpf_check_size (lb0->lb_urpf) &&
1700                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1701                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1702           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1703                      dpo1->dpoi_type == DPO_RECEIVE) ?
1704                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1705           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1706                      !fib_urpf_check_size (lb1->lb_urpf) &&
1707                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1708                     ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1709
1710         skip_checks:
1711
1712           next0 = lm->local_next_by_ip_protocol[proto0];
1713           next1 = lm->local_next_by_ip_protocol[proto1];
1714
1715           next0 =
1716             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1717           next1 =
1718             error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1719
1720           p0->error = error0 ? error_node->errors[error0] : 0;
1721           p1->error = error1 ? error_node->errors[error1] : 0;
1722
1723           if (head_of_feature_arc)
1724             {
1725               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1726                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1727               if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1728                 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1729             }
1730
1731           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1732                                            n_left_to_next, pi0, pi1,
1733                                            next0, next1);
1734         }
1735
1736       while (n_left_from > 0 && n_left_to_next > 0)
1737         {
1738           vlib_buffer_t *p0;
1739           ip4_header_t *ip0;
1740           udp_header_t *udp0;
1741           ip4_fib_mtrie_t *mtrie0;
1742           ip4_fib_mtrie_leaf_t leaf0;
1743           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1744           i32 len_diff0;
1745           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1746           load_balance_t *lb0;
1747           const dpo_id_t *dpo0;
1748           u32 sw_if_index0;
1749
1750           pi0 = to_next[0] = from[0];
1751           from += 1;
1752           n_left_from -= 1;
1753           to_next += 1;
1754           n_left_to_next -= 1;
1755
1756           next0 = IP_LOCAL_NEXT_DROP;
1757
1758           p0 = vlib_get_buffer (vm, pi0);
1759
1760           ip0 = vlib_buffer_get_current (p0);
1761
1762           vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1763
1764           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1765
1766           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1767
1768           fib_index0 =
1769             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1770              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1771
1772           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1773
1774           leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1775
1776           /* Treat IP frag packets as "experimental" protocol for now
1777              until support of IP frag reassembly is implemented */
1778           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1779
1780           if (head_of_feature_arc == 0)
1781             {
1782               error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1783               goto skip_check;
1784             }
1785
1786           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1787           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1788
1789           flags0 = p0->flags;
1790
1791           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1792
1793           udp0 = ip4_next_header (ip0);
1794
1795           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1796           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1797
1798           /* Verify UDP length. */
1799           ip_len0 = clib_net_to_host_u16 (ip0->length);
1800           udp_len0 = clib_net_to_host_u16 (udp0->length);
1801
1802           len_diff0 = ip_len0 - udp_len0;
1803
1804           len_diff0 = is_udp0 ? len_diff0 : 0;
1805
1806           if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1807             {
1808               if (is_tcp_udp0)
1809                 {
1810                   if (is_tcp_udp0
1811                       && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1812                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1813                   good_tcp_udp0 =
1814                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1815                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1816                 }
1817             }
1818
1819           good_tcp_udp0 &= len_diff0 >= 0;
1820
1821           leaf0 =
1822             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1823
1824           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1825
1826           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1827
1828           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1829           error0 = (is_tcp_udp0 && !good_tcp_udp0
1830                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1831
1832           leaf0 =
1833             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1834
1835           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1836           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1837
1838           lb0 = load_balance_get (lbi0);
1839           dpo0 = load_balance_get_bucket_i (lb0, 0);
1840
1841           vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1842             vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1843
1844           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1845                      dpo0->dpoi_type == DPO_RECEIVE) ?
1846                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1847           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1848                      !fib_urpf_check_size (lb0->lb_urpf) &&
1849                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1850                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1851
1852         skip_check:
1853
1854           next0 = lm->local_next_by_ip_protocol[proto0];
1855
1856           next0 =
1857             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1858
1859           p0->error = error0 ? error_node->errors[error0] : 0;
1860
1861           if (head_of_feature_arc)
1862             {
1863               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1864                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1865             }
1866
1867           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1868                                            n_left_to_next, pi0, next0);
1869
1870         }
1871
1872       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1873     }
1874
1875   return frame->n_vectors;
1876 }
1877
1878 static uword
1879 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1880 {
1881   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1882 }
1883
1884 /* *INDENT-OFF* */
1885 VLIB_REGISTER_NODE (ip4_local_node) =
1886 {
1887   .function = ip4_local,
1888   .name = "ip4-local",
1889   .vector_size = sizeof (u32),
1890   .format_trace = format_ip4_forward_next_trace,
1891   .n_next_nodes = IP_LOCAL_N_NEXT,
1892   .next_nodes =
1893   {
1894     [IP_LOCAL_NEXT_DROP] = "error-drop",
1895     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1896     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1897     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",},
1898 };
1899 /* *INDENT-ON* */
1900
1901 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1902
1903 static uword
1904 ip4_local_end_of_arc (vlib_main_t * vm,
1905                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1906 {
1907   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1908 }
1909
1910 /* *INDENT-OFF* */
1911 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1912   .function = ip4_local_end_of_arc,
1913   .name = "ip4-local-end-of-arc",
1914   .vector_size = sizeof (u32),
1915
1916   .format_trace = format_ip4_forward_next_trace,
1917   .sibling_of = "ip4-local",
1918 };
1919
1920 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1921
1922 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1923   .arc_name = "ip4-local",
1924   .node_name = "ip4-local-end-of-arc",
1925   .runs_before = 0, /* not before any other features */
1926 };
1927 /* *INDENT-ON* */
1928
1929 void
1930 ip4_register_protocol (u32 protocol, u32 node_index)
1931 {
1932   vlib_main_t *vm = vlib_get_main ();
1933   ip4_main_t *im = &ip4_main;
1934   ip_lookup_main_t *lm = &im->lookup_main;
1935
1936   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1937   lm->local_next_by_ip_protocol[protocol] =
1938     vlib_node_add_next (vm, ip4_local_node.index, node_index);
1939 }
1940
1941 static clib_error_t *
1942 show_ip_local_command_fn (vlib_main_t * vm,
1943                           unformat_input_t * input, vlib_cli_command_t * cmd)
1944 {
1945   ip4_main_t *im = &ip4_main;
1946   ip_lookup_main_t *lm = &im->lookup_main;
1947   int i;
1948
1949   vlib_cli_output (vm, "Protocols handled by ip4_local");
1950   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1951     {
1952       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1953         vlib_cli_output (vm, "%d", i);
1954     }
1955   return 0;
1956 }
1957
1958
1959
1960 /*?
1961  * Display the set of protocols handled by the local IPv4 stack.
1962  *
1963  * @cliexpar
1964  * Example of how to display local protocol table:
1965  * @cliexstart{show ip local}
1966  * Protocols handled by ip4_local
1967  * 1
1968  * 17
1969  * 47
1970  * @cliexend
1971 ?*/
1972 /* *INDENT-OFF* */
1973 VLIB_CLI_COMMAND (show_ip_local, static) =
1974 {
1975   .path = "show ip local",
1976   .function = show_ip_local_command_fn,
1977   .short_help = "show ip local",
1978 };
1979 /* *INDENT-ON* */
1980
1981 always_inline uword
1982 ip4_arp_inline (vlib_main_t * vm,
1983                 vlib_node_runtime_t * node,
1984                 vlib_frame_t * frame, int is_glean)
1985 {
1986   vnet_main_t *vnm = vnet_get_main ();
1987   ip4_main_t *im = &ip4_main;
1988   ip_lookup_main_t *lm = &im->lookup_main;
1989   u32 *from, *to_next_drop;
1990   uword n_left_from, n_left_to_next_drop, next_index;
1991   static f64 time_last_seed_change = -1e100;
1992   static u32 hash_seeds[3];
1993   static uword hash_bitmap[256 / BITS (uword)];
1994   f64 time_now;
1995
1996   if (node->flags & VLIB_NODE_FLAG_TRACE)
1997     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1998
1999   time_now = vlib_time_now (vm);
2000   if (time_now - time_last_seed_change > 1e-3)
2001     {
2002       uword i;
2003       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
2004                                             sizeof (hash_seeds));
2005       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2006         hash_seeds[i] = r[i];
2007
2008       /* Mark all hash keys as been no-seen before. */
2009       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2010         hash_bitmap[i] = 0;
2011
2012       time_last_seed_change = time_now;
2013     }
2014
2015   from = vlib_frame_vector_args (frame);
2016   n_left_from = frame->n_vectors;
2017   next_index = node->cached_next_index;
2018   if (next_index == IP4_ARP_NEXT_DROP)
2019     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
2020
2021   while (n_left_from > 0)
2022     {
2023       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2024                            to_next_drop, n_left_to_next_drop);
2025
2026       while (n_left_from > 0 && n_left_to_next_drop > 0)
2027         {
2028           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2029           ip_adjacency_t *adj0;
2030           vlib_buffer_t *p0;
2031           ip4_header_t *ip0;
2032           uword bm0;
2033
2034           pi0 = from[0];
2035
2036           p0 = vlib_get_buffer (vm, pi0);
2037
2038           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2039           adj0 = ip_get_adjacency (lm, adj_index0);
2040           ip0 = vlib_buffer_get_current (p0);
2041
2042           a0 = hash_seeds[0];
2043           b0 = hash_seeds[1];
2044           c0 = hash_seeds[2];
2045
2046           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2047           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2048
2049           if (is_glean)
2050             {
2051               /*
2052                * this is the Glean case, so we are ARPing for the
2053                * packet's destination
2054                */
2055               a0 ^= ip0->dst_address.data_u32;
2056             }
2057           else
2058             {
2059               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2060             }
2061           b0 ^= sw_if_index0;
2062
2063           hash_v3_finalize32 (a0, b0, c0);
2064
2065           c0 &= BITS (hash_bitmap) - 1;
2066           c0 = c0 / BITS (uword);
2067           m0 = (uword) 1 << (c0 % BITS (uword));
2068
2069           bm0 = hash_bitmap[c0];
2070           drop0 = (bm0 & m0) != 0;
2071
2072           /* Mark it as seen. */
2073           hash_bitmap[c0] = bm0 | m0;
2074
2075           from += 1;
2076           n_left_from -= 1;
2077           to_next_drop[0] = pi0;
2078           to_next_drop += 1;
2079           n_left_to_next_drop -= 1;
2080
2081           p0->error =
2082             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2083                          IP4_ARP_ERROR_REQUEST_SENT];
2084
2085           /*
2086            * the adj has been updated to a rewrite but the node the DPO that got
2087            * us here hasn't - yet. no big deal. we'll drop while we wait.
2088            */
2089           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2090             continue;
2091
2092           if (drop0)
2093             continue;
2094
2095           /*
2096            * Can happen if the control-plane is programming tables
2097            * with traffic flowing; at least that's today's lame excuse.
2098            */
2099           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2100               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2101             {
2102               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2103             }
2104           else
2105             /* Send ARP request. */
2106             {
2107               u32 bi0 = 0;
2108               vlib_buffer_t *b0;
2109               ethernet_arp_header_t *h0;
2110               vnet_hw_interface_t *hw_if0;
2111
2112               h0 =
2113                 vlib_packet_template_get_packet (vm,
2114                                                  &im->ip4_arp_request_packet_template,
2115                                                  &bi0);
2116
2117               /* Add rewrite/encap string for ARP packet. */
2118               vnet_rewrite_one_header (adj0[0], h0,
2119                                        sizeof (ethernet_header_t));
2120
2121               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2122
2123               /* Src ethernet address in ARP header. */
2124               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2125                            hw_if0->hw_address,
2126                            sizeof (h0->ip4_over_ethernet[0].ethernet));
2127
2128               if (is_glean)
2129                 {
2130                   /* The interface's source address is stashed in the Glean Adj */
2131                   h0->ip4_over_ethernet[0].ip4 =
2132                     adj0->sub_type.glean.receive_addr.ip4;
2133
2134                   /* Copy in destination address we are requesting. This is the
2135                    * glean case, so it's the packet's destination.*/
2136                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2137                     ip0->dst_address.data_u32;
2138                 }
2139               else
2140                 {
2141                   /* Src IP address in ARP header. */
2142                   if (ip4_src_address_for_packet (lm, sw_if_index0,
2143                                                   &h0->
2144                                                   ip4_over_ethernet[0].ip4))
2145                     {
2146                       /* No source address available */
2147                       p0->error =
2148                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2149                       vlib_buffer_free (vm, &bi0, 1);
2150                       continue;
2151                     }
2152
2153                   /* Copy in destination address we are requesting from the
2154                      incomplete adj */
2155                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2156                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
2157                 }
2158
2159               vlib_buffer_copy_trace_flag (vm, p0, bi0);
2160               b0 = vlib_get_buffer (vm, bi0);
2161               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2162
2163               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2164
2165               vlib_set_next_frame_buffer (vm, node,
2166                                           adj0->rewrite_header.next_index,
2167                                           bi0);
2168             }
2169         }
2170
2171       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2172     }
2173
2174   return frame->n_vectors;
2175 }
2176
2177 static uword
2178 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2179 {
2180   return (ip4_arp_inline (vm, node, frame, 0));
2181 }
2182
2183 static uword
2184 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2185 {
2186   return (ip4_arp_inline (vm, node, frame, 1));
2187 }
2188
2189 static char *ip4_arp_error_strings[] = {
2190   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2191   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2192   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2193   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2194   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2195   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2196 };
2197
2198 VLIB_REGISTER_NODE (ip4_arp_node) =
2199 {
2200   .function = ip4_arp,.name = "ip4-arp",.vector_size =
2201     sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2202     ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2203     ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2204   {
2205   [IP4_ARP_NEXT_DROP] = "error-drop",}
2206 ,};
2207
2208 VLIB_REGISTER_NODE (ip4_glean_node) =
2209 {
2210   .function = ip4_glean,.name = "ip4-glean",.vector_size =
2211     sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2212     ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2213     ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2214   {
2215   [IP4_ARP_NEXT_DROP] = "error-drop",}
2216 ,};
2217
2218 #define foreach_notrace_ip4_arp_error           \
2219 _(DROP)                                         \
2220 _(REQUEST_SENT)                                 \
2221 _(REPLICATE_DROP)                               \
2222 _(REPLICATE_FAIL)
2223
2224 clib_error_t *
2225 arp_notrace_init (vlib_main_t * vm)
2226 {
2227   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2228
2229   /* don't trace ARP request packets */
2230 #define _(a)                                    \
2231     vnet_pcap_drop_trace_filter_add_del         \
2232         (rt->errors[IP4_ARP_ERROR_##a],         \
2233          1 /* is_add */);
2234   foreach_notrace_ip4_arp_error;
2235 #undef _
2236   return 0;
2237 }
2238
2239 VLIB_INIT_FUNCTION (arp_notrace_init);
2240
2241
2242 /* Send an ARP request to see if given destination is reachable on given interface. */
2243 clib_error_t *
2244 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2245 {
2246   vnet_main_t *vnm = vnet_get_main ();
2247   ip4_main_t *im = &ip4_main;
2248   ethernet_arp_header_t *h;
2249   ip4_address_t *src;
2250   ip_interface_address_t *ia;
2251   ip_adjacency_t *adj;
2252   vnet_hw_interface_t *hi;
2253   vnet_sw_interface_t *si;
2254   vlib_buffer_t *b;
2255   u32 bi = 0;
2256
2257   si = vnet_get_sw_interface (vnm, sw_if_index);
2258
2259   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2260     {
2261       return clib_error_return (0, "%U: interface %U down",
2262                                 format_ip4_address, dst,
2263                                 format_vnet_sw_if_index_name, vnm,
2264                                 sw_if_index);
2265     }
2266
2267   src =
2268     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2269   if (!src)
2270     {
2271       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2272       return clib_error_return
2273         (0,
2274          "no matching interface address for destination %U (interface %U)",
2275          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2276          sw_if_index);
2277     }
2278
2279   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2280
2281   h =
2282     vlib_packet_template_get_packet (vm,
2283                                      &im->ip4_arp_request_packet_template,
2284                                      &bi);
2285
2286   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2287
2288   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2289                sizeof (h->ip4_over_ethernet[0].ethernet));
2290
2291   h->ip4_over_ethernet[0].ip4 = src[0];
2292   h->ip4_over_ethernet[1].ip4 = dst[0];
2293
2294   b = vlib_get_buffer (vm, bi);
2295   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2296     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2297
2298   /* Add encapsulation string for software interface (e.g. ethernet header). */
2299   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2300   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2301
2302   {
2303     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2304     u32 *to_next = vlib_frame_vector_args (f);
2305     to_next[0] = bi;
2306     f->n_vectors = 1;
2307     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2308   }
2309
2310   return /* no error */ 0;
2311 }
2312
2313 typedef enum
2314 {
2315   IP4_REWRITE_NEXT_DROP,
2316   IP4_REWRITE_NEXT_ICMP_ERROR,
2317 } ip4_rewrite_next_t;
2318
2319 always_inline uword
2320 ip4_rewrite_inline (vlib_main_t * vm,
2321                     vlib_node_runtime_t * node,
2322                     vlib_frame_t * frame,
2323                     int do_counters, int is_midchain, int is_mcast)
2324 {
2325   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2326   u32 *from = vlib_frame_vector_args (frame);
2327   u32 n_left_from, n_left_to_next, *to_next, next_index;
2328   vlib_node_runtime_t *error_node =
2329     vlib_node_get_runtime (vm, ip4_input_node.index);
2330
2331   n_left_from = frame->n_vectors;
2332   next_index = node->cached_next_index;
2333   u32 thread_index = vlib_get_thread_index ();
2334
2335   while (n_left_from > 0)
2336     {
2337       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2338
2339       while (n_left_from >= 4 && n_left_to_next >= 2)
2340         {
2341           ip_adjacency_t *adj0, *adj1;
2342           vlib_buffer_t *p0, *p1;
2343           ip4_header_t *ip0, *ip1;
2344           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2345           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2346           u32 tx_sw_if_index0, tx_sw_if_index1;
2347
2348           /* Prefetch next iteration. */
2349           {
2350             vlib_buffer_t *p2, *p3;
2351
2352             p2 = vlib_get_buffer (vm, from[2]);
2353             p3 = vlib_get_buffer (vm, from[3]);
2354
2355             vlib_prefetch_buffer_header (p2, STORE);
2356             vlib_prefetch_buffer_header (p3, STORE);
2357
2358             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2359             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2360           }
2361
2362           pi0 = to_next[0] = from[0];
2363           pi1 = to_next[1] = from[1];
2364
2365           from += 2;
2366           n_left_from -= 2;
2367           to_next += 2;
2368           n_left_to_next -= 2;
2369
2370           p0 = vlib_get_buffer (vm, pi0);
2371           p1 = vlib_get_buffer (vm, pi1);
2372
2373           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2374           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2375
2376           /*
2377            * pre-fetch the per-adjacency counters
2378            */
2379           if (do_counters)
2380             {
2381               vlib_prefetch_combined_counter (&adjacency_counters,
2382                                               thread_index, adj_index0);
2383               vlib_prefetch_combined_counter (&adjacency_counters,
2384                                               thread_index, adj_index1);
2385             }
2386
2387           ip0 = vlib_buffer_get_current (p0);
2388           ip1 = vlib_buffer_get_current (p1);
2389
2390           error0 = error1 = IP4_ERROR_NONE;
2391           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2392
2393           /* Decrement TTL & update checksum.
2394              Works either endian, so no need for byte swap. */
2395           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2396             {
2397               i32 ttl0 = ip0->ttl;
2398
2399               /* Input node should have reject packets with ttl 0. */
2400               ASSERT (ip0->ttl > 0);
2401
2402               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2403               checksum0 += checksum0 >= 0xffff;
2404
2405               ip0->checksum = checksum0;
2406               ttl0 -= 1;
2407               ip0->ttl = ttl0;
2408
2409               /*
2410                * If the ttl drops below 1 when forwarding, generate
2411                * an ICMP response.
2412                */
2413               if (PREDICT_FALSE (ttl0 <= 0))
2414                 {
2415                   error0 = IP4_ERROR_TIME_EXPIRED;
2416                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2417                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2418                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2419                                                0);
2420                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2421                 }
2422
2423               /* Verify checksum. */
2424               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2425             }
2426           else
2427             {
2428               p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2429             }
2430           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2431             {
2432               i32 ttl1 = ip1->ttl;
2433
2434               /* Input node should have reject packets with ttl 0. */
2435               ASSERT (ip1->ttl > 0);
2436
2437               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2438               checksum1 += checksum1 >= 0xffff;
2439
2440               ip1->checksum = checksum1;
2441               ttl1 -= 1;
2442               ip1->ttl = ttl1;
2443
2444               /*
2445                * If the ttl drops below 1 when forwarding, generate
2446                * an ICMP response.
2447                */
2448               if (PREDICT_FALSE (ttl1 <= 0))
2449                 {
2450                   error1 = IP4_ERROR_TIME_EXPIRED;
2451                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2452                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2453                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2454                                                0);
2455                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2456                 }
2457
2458               /* Verify checksum. */
2459               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2460               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2461             }
2462           else
2463             {
2464               p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2465             }
2466
2467           /* Rewrite packet header and updates lengths. */
2468           adj0 = ip_get_adjacency (lm, adj_index0);
2469           adj1 = ip_get_adjacency (lm, adj_index1);
2470
2471           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2472           rw_len0 = adj0[0].rewrite_header.data_bytes;
2473           rw_len1 = adj1[0].rewrite_header.data_bytes;
2474           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2475           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2476
2477           /* Check MTU of outgoing interface. */
2478           error0 =
2479             (vlib_buffer_length_in_chain (vm, p0) >
2480              adj0[0].
2481              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2482              error0);
2483           error1 =
2484             (vlib_buffer_length_in_chain (vm, p1) >
2485              adj1[0].
2486              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2487              error1);
2488
2489           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2490            * to see the IP headerr */
2491           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2492             {
2493               next0 = adj0[0].rewrite_header.next_index;
2494               p0->current_data -= rw_len0;
2495               p0->current_length += rw_len0;
2496               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2497               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2498
2499               if (PREDICT_FALSE
2500                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2501                 vnet_feature_arc_start (lm->output_feature_arc_index,
2502                                         tx_sw_if_index0, &next0, p0);
2503             }
2504           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2505             {
2506               next1 = adj1[0].rewrite_header.next_index;
2507               p1->current_data -= rw_len1;
2508               p1->current_length += rw_len1;
2509
2510               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2511               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2512
2513               if (PREDICT_FALSE
2514                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2515                 vnet_feature_arc_start (lm->output_feature_arc_index,
2516                                         tx_sw_if_index1, &next1, p1);
2517             }
2518
2519           /* Guess we are only writing on simple Ethernet header. */
2520           vnet_rewrite_two_headers (adj0[0], adj1[0],
2521                                     ip0, ip1, sizeof (ethernet_header_t));
2522
2523           /*
2524            * Bump the per-adjacency counters
2525            */
2526           if (do_counters)
2527             {
2528               vlib_increment_combined_counter
2529                 (&adjacency_counters,
2530                  thread_index,
2531                  adj_index0, 1,
2532                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2533
2534               vlib_increment_combined_counter
2535                 (&adjacency_counters,
2536                  thread_index,
2537                  adj_index1, 1,
2538                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2539             }
2540
2541           if (is_midchain)
2542             {
2543               adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2544               adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2545             }
2546           if (is_mcast)
2547             {
2548               /*
2549                * copy bytes from the IP address into the MAC rewrite
2550                */
2551               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2552               vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2553             }
2554
2555           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2556                                            to_next, n_left_to_next,
2557                                            pi0, pi1, next0, next1);
2558         }
2559
2560       while (n_left_from > 0 && n_left_to_next > 0)
2561         {
2562           ip_adjacency_t *adj0;
2563           vlib_buffer_t *p0;
2564           ip4_header_t *ip0;
2565           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2566           u32 tx_sw_if_index0;
2567
2568           pi0 = to_next[0] = from[0];
2569
2570           p0 = vlib_get_buffer (vm, pi0);
2571
2572           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2573
2574           adj0 = ip_get_adjacency (lm, adj_index0);
2575
2576           ip0 = vlib_buffer_get_current (p0);
2577
2578           error0 = IP4_ERROR_NONE;
2579           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2580
2581           /* Decrement TTL & update checksum. */
2582           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2583             {
2584               i32 ttl0 = ip0->ttl;
2585
2586               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2587
2588               checksum0 += checksum0 >= 0xffff;
2589
2590               ip0->checksum = checksum0;
2591
2592               ASSERT (ip0->ttl > 0);
2593
2594               ttl0 -= 1;
2595
2596               ip0->ttl = ttl0;
2597
2598               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2599
2600               if (PREDICT_FALSE (ttl0 <= 0))
2601                 {
2602                   /*
2603                    * If the ttl drops below 1 when forwarding, generate
2604                    * an ICMP response.
2605                    */
2606                   error0 = IP4_ERROR_TIME_EXPIRED;
2607                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2608                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2609                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2610                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2611                                                0);
2612                 }
2613             }
2614           else
2615             {
2616               p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2617             }
2618
2619           if (do_counters)
2620             vlib_prefetch_combined_counter (&adjacency_counters,
2621                                             thread_index, adj_index0);
2622
2623           /* Guess we are only writing on simple Ethernet header. */
2624           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2625           if (is_mcast)
2626             {
2627               /*
2628                * copy bytes from the IP address into the MAC rewrite
2629                */
2630               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2631             }
2632
2633           /* Update packet buffer attributes/set output interface. */
2634           rw_len0 = adj0[0].rewrite_header.data_bytes;
2635           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2636
2637           if (do_counters)
2638             vlib_increment_combined_counter
2639               (&adjacency_counters,
2640                thread_index, adj_index0, 1,
2641                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2642
2643           /* Check MTU of outgoing interface. */
2644           error0 = (vlib_buffer_length_in_chain (vm, p0)
2645                     > adj0[0].rewrite_header.max_l3_packet_bytes
2646                     ? IP4_ERROR_MTU_EXCEEDED : error0);
2647
2648           p0->error = error_node->errors[error0];
2649
2650           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2651            * to see the IP headerr */
2652           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2653             {
2654               p0->current_data -= rw_len0;
2655               p0->current_length += rw_len0;
2656               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2657
2658               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2659               next0 = adj0[0].rewrite_header.next_index;
2660
2661               if (is_midchain)
2662                 {
2663                   adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2664                 }
2665
2666               if (PREDICT_FALSE
2667                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2668                 vnet_feature_arc_start (lm->output_feature_arc_index,
2669                                         tx_sw_if_index0, &next0, p0);
2670
2671             }
2672
2673           from += 1;
2674           n_left_from -= 1;
2675           to_next += 1;
2676           n_left_to_next -= 1;
2677
2678           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2679                                            to_next, n_left_to_next,
2680                                            pi0, next0);
2681         }
2682
2683       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2684     }
2685
2686   /* Need to do trace after rewrites to pick up new packet data. */
2687   if (node->flags & VLIB_NODE_FLAG_TRACE)
2688     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2689
2690   return frame->n_vectors;
2691 }
2692
2693
2694 /** @brief IPv4 rewrite node.
2695     @node ip4-rewrite
2696
2697     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2698     header checksum, fetch the ip adjacency, check the outbound mtu,
2699     apply the adjacency rewrite, and send pkts to the adjacency
2700     rewrite header's rewrite_next_index.
2701
2702     @param vm vlib_main_t corresponding to the current thread
2703     @param node vlib_node_runtime_t
2704     @param frame vlib_frame_t whose contents should be dispatched
2705
2706     @par Graph mechanics: buffer metadata, next index usage
2707
2708     @em Uses:
2709     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2710         - the rewrite adjacency index
2711     - <code>adj->lookup_next_index</code>
2712         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2713           the packet will be dropped.
2714     - <code>adj->rewrite_header</code>
2715         - Rewrite string length, rewrite string, next_index
2716
2717     @em Sets:
2718     - <code>b->current_data, b->current_length</code>
2719         - Updated net of applying the rewrite string
2720
2721     <em>Next Indices:</em>
2722     - <code> adj->rewrite_header.next_index </code>
2723       or @c error-drop
2724 */
2725 static uword
2726 ip4_rewrite (vlib_main_t * vm,
2727              vlib_node_runtime_t * node, vlib_frame_t * frame)
2728 {
2729   if (adj_are_counters_enabled ())
2730     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2731   else
2732     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2733 }
2734
2735 static uword
2736 ip4_midchain (vlib_main_t * vm,
2737               vlib_node_runtime_t * node, vlib_frame_t * frame)
2738 {
2739   if (adj_are_counters_enabled ())
2740     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2741   else
2742     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2743 }
2744
2745 static uword
2746 ip4_rewrite_mcast (vlib_main_t * vm,
2747                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2748 {
2749   if (adj_are_counters_enabled ())
2750     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2751   else
2752     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2753 }
2754
2755 static uword
2756 ip4_mcast_midchain (vlib_main_t * vm,
2757                     vlib_node_runtime_t * node, vlib_frame_t * frame)
2758 {
2759   if (adj_are_counters_enabled ())
2760     return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2761   else
2762     return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2763 }
2764
2765 /* *INDENT-OFF* */
2766 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2767   .function = ip4_rewrite,
2768   .name = "ip4-rewrite",
2769   .vector_size = sizeof (u32),
2770
2771   .format_trace = format_ip4_rewrite_trace,
2772
2773   .n_next_nodes = 2,
2774   .next_nodes = {
2775     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2776     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2777   },
2778 };
2779 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2780
2781 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2782   .function = ip4_rewrite_mcast,
2783   .name = "ip4-rewrite-mcast",
2784   .vector_size = sizeof (u32),
2785
2786   .format_trace = format_ip4_rewrite_trace,
2787   .sibling_of = "ip4-rewrite",
2788 };
2789 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2790
2791 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2792   .function = ip4_mcast_midchain,
2793   .name = "ip4-mcast-midchain",
2794   .vector_size = sizeof (u32),
2795
2796   .format_trace = format_ip4_rewrite_trace,
2797   .sibling_of = "ip4-rewrite",
2798 };
2799 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2800
2801 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2802   .function = ip4_midchain,
2803   .name = "ip4-midchain",
2804   .vector_size = sizeof (u32),
2805   .format_trace = format_ip4_forward_next_trace,
2806   .sibling_of =  "ip4-rewrite",
2807 };
2808 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2809 /* *INDENT-ON */
2810
2811 static clib_error_t *
2812 add_del_interface_table (vlib_main_t * vm,
2813                          unformat_input_t * input, vlib_cli_command_t * cmd)
2814 {
2815   vnet_main_t *vnm = vnet_get_main ();
2816   ip_interface_address_t *ia;
2817   clib_error_t *error = 0;
2818   u32 sw_if_index, table_id;
2819
2820   sw_if_index = ~0;
2821
2822   if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2823     {
2824       error = clib_error_return (0, "unknown interface `%U'",
2825                                  format_unformat_error, input);
2826       goto done;
2827     }
2828
2829   if (unformat (input, "%d", &table_id))
2830     ;
2831   else
2832     {
2833       error = clib_error_return (0, "expected table id `%U'",
2834                                  format_unformat_error, input);
2835       goto done;
2836     }
2837
2838   /*
2839    * If the interface already has in IP address, then a change int
2840    * VRF is not allowed. The IP address applied must first be removed.
2841    * We do not do that automatically here, since VPP has no knowledge
2842    * of whether thoses subnets are valid in the destination VRF.
2843    */
2844   /* *INDENT-OFF* */
2845   foreach_ip_interface_address (&ip4_main.lookup_main,
2846                                 ia, sw_if_index,
2847                                 1 /* honor unnumbered */,
2848   ({
2849       ip4_address_t * a;
2850
2851       a = ip_interface_address_get_address (&ip4_main.lookup_main, ia);
2852       error = clib_error_return (0, "interface %U has address %U",
2853                                  format_vnet_sw_if_index_name, vnm,
2854                                  sw_if_index,
2855                                  format_ip4_address, a);
2856       goto done;
2857    }));
2858    /* *INDENT-ON* */
2859
2860 {
2861   ip4_main_t *im = &ip4_main;
2862   u32 fib_index;
2863
2864   fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2865
2866   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2867   im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2868
2869   fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2870   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
2871   im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
2872 }
2873
2874 done:
2875 return error;
2876 }
2877
2878 /*?
2879  * Place the indicated interface into the supplied IPv4 FIB table (also known
2880  * as a VRF). If the FIB table does not exist, this command creates it. To
2881  * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2882  * FIB table will only be displayed if a route has been added to the table, or
2883  * an IP Address is assigned to an interface in the table (which adds a route
2884  * automatically).
2885  *
2886  * @note IP addresses added after setting the interface IP table are added to
2887  * the indicated FIB table. If an IP address is added prior to changing the
2888  * table then this is an error. The control plane must remove these addresses
2889  * first and then change the table. VPP will not automatically move the
2890  * addresses from the old to the new table as it does not know the validity
2891  * of such a change.
2892  *
2893  * @cliexpar
2894  * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2895  * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2896  ?*/
2897 /* *INDENT-OFF* */
2898 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2899 {
2900   .path = "set interface ip table",
2901   .function = add_del_interface_table,
2902   .short_help = "set interface ip table <interface> <table-id>",
2903 };
2904 /* *INDENT-ON* */
2905
2906 int
2907 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2908 {
2909   ip4_fib_mtrie_t *mtrie0;
2910   ip4_fib_mtrie_leaf_t leaf0;
2911   u32 lbi0;
2912
2913   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2914
2915   leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2916   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2917   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2918
2919   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2920
2921   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2922 }
2923
2924 static clib_error_t *
2925 test_lookup_command_fn (vlib_main_t * vm,
2926                         unformat_input_t * input, vlib_cli_command_t * cmd)
2927 {
2928   ip4_fib_t *fib;
2929   u32 table_id = 0;
2930   f64 count = 1;
2931   u32 n;
2932   int i;
2933   ip4_address_t ip4_base_address;
2934   u64 errors = 0;
2935
2936   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2937     {
2938       if (unformat (input, "table %d", &table_id))
2939         {
2940           /* Make sure the entry exists. */
2941           fib = ip4_fib_get (table_id);
2942           if ((fib) && (fib->index != table_id))
2943             return clib_error_return (0, "<fib-index> %d does not exist",
2944                                       table_id);
2945         }
2946       else if (unformat (input, "count %f", &count))
2947         ;
2948
2949       else if (unformat (input, "%U",
2950                          unformat_ip4_address, &ip4_base_address))
2951         ;
2952       else
2953         return clib_error_return (0, "unknown input `%U'",
2954                                   format_unformat_error, input);
2955     }
2956
2957   n = count;
2958
2959   for (i = 0; i < n; i++)
2960     {
2961       if (!ip4_lookup_validate (&ip4_base_address, table_id))
2962         errors++;
2963
2964       ip4_base_address.as_u32 =
2965         clib_host_to_net_u32 (1 +
2966                               clib_net_to_host_u32 (ip4_base_address.as_u32));
2967     }
2968
2969   if (errors)
2970     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2971   else
2972     vlib_cli_output (vm, "No errors in %d lookups\n", n);
2973
2974   return 0;
2975 }
2976
2977 /*?
2978  * Perform a lookup of an IPv4 Address (or range of addresses) in the
2979  * given FIB table to determine if there is a conflict with the
2980  * adjacency table. The fib-id can be determined by using the
2981  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2982  * of 0 is used.
2983  *
2984  * @todo This command uses fib-id, other commands use table-id (not
2985  * just a name, they are different indexes). Would like to change this
2986  * to table-id for consistency.
2987  *
2988  * @cliexpar
2989  * Example of how to run the test lookup command:
2990  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2991  * No errors in 2 lookups
2992  * @cliexend
2993 ?*/
2994 /* *INDENT-OFF* */
2995 VLIB_CLI_COMMAND (lookup_test_command, static) =
2996 {
2997   .path = "test lookup",
2998   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2999   .function = test_lookup_command_fn,
3000 };
3001 /* *INDENT-ON* */
3002
3003 int
3004 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3005 {
3006   ip4_main_t *im4 = &ip4_main;
3007   ip4_fib_t *fib;
3008   uword *p = hash_get (im4->fib_index_by_table_id, table_id);
3009
3010   if (p == 0)
3011     return VNET_API_ERROR_NO_SUCH_FIB;
3012
3013   fib = ip4_fib_get (p[0]);
3014
3015   fib->flow_hash_config = flow_hash_config;
3016   return 0;
3017 }
3018
3019 static clib_error_t *
3020 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3021                              unformat_input_t * input,
3022                              vlib_cli_command_t * cmd)
3023 {
3024   int matched = 0;
3025   u32 table_id = 0;
3026   u32 flow_hash_config = 0;
3027   int rv;
3028
3029   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3030     {
3031       if (unformat (input, "table %d", &table_id))
3032         matched = 1;
3033 #define _(a,v) \
3034     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3035       foreach_flow_hash_bit
3036 #undef _
3037         else
3038         break;
3039     }
3040
3041   if (matched == 0)
3042     return clib_error_return (0, "unknown input `%U'",
3043                               format_unformat_error, input);
3044
3045   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3046   switch (rv)
3047     {
3048     case 0:
3049       break;
3050
3051     case VNET_API_ERROR_NO_SUCH_FIB:
3052       return clib_error_return (0, "no such FIB table %d", table_id);
3053
3054     default:
3055       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3056       break;
3057     }
3058
3059   return 0;
3060 }
3061
3062 /*?
3063  * Configure the set of IPv4 fields used by the flow hash.
3064  *
3065  * @cliexpar
3066  * Example of how to set the flow hash on a given table:
3067  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3068  * Example of display the configured flow hash:
3069  * @cliexstart{show ip fib}
3070  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3071  * 0.0.0.0/0
3072  *   unicast-ip4-chain
3073  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3074  *     [0] [@0]: dpo-drop ip6
3075  * 0.0.0.0/32
3076  *   unicast-ip4-chain
3077  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3078  *     [0] [@0]: dpo-drop ip6
3079  * 224.0.0.0/8
3080  *   unicast-ip4-chain
3081  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3082  *     [0] [@0]: dpo-drop ip6
3083  * 6.0.1.2/32
3084  *   unicast-ip4-chain
3085  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3086  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3087  * 7.0.0.1/32
3088  *   unicast-ip4-chain
3089  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3090  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3091  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3092  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3093  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3094  * 240.0.0.0/8
3095  *   unicast-ip4-chain
3096  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3097  *     [0] [@0]: dpo-drop ip6
3098  * 255.255.255.255/32
3099  *   unicast-ip4-chain
3100  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3101  *     [0] [@0]: dpo-drop ip6
3102  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3103  * 0.0.0.0/0
3104  *   unicast-ip4-chain
3105  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3106  *     [0] [@0]: dpo-drop ip6
3107  * 0.0.0.0/32
3108  *   unicast-ip4-chain
3109  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3110  *     [0] [@0]: dpo-drop ip6
3111  * 172.16.1.0/24
3112  *   unicast-ip4-chain
3113  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3114  *     [0] [@4]: ipv4-glean: af_packet0
3115  * 172.16.1.1/32
3116  *   unicast-ip4-chain
3117  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3118  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3119  * 172.16.1.2/32
3120  *   unicast-ip4-chain
3121  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3122  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3123  * 172.16.2.0/24
3124  *   unicast-ip4-chain
3125  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3126  *     [0] [@4]: ipv4-glean: af_packet1
3127  * 172.16.2.1/32
3128  *   unicast-ip4-chain
3129  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3130  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3131  * 224.0.0.0/8
3132  *   unicast-ip4-chain
3133  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3134  *     [0] [@0]: dpo-drop ip6
3135  * 240.0.0.0/8
3136  *   unicast-ip4-chain
3137  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3138  *     [0] [@0]: dpo-drop ip6
3139  * 255.255.255.255/32
3140  *   unicast-ip4-chain
3141  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3142  *     [0] [@0]: dpo-drop ip6
3143  * @cliexend
3144 ?*/
3145 /* *INDENT-OFF* */
3146 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3147 {
3148   .path = "set ip flow-hash",
3149   .short_help =
3150   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3151   .function = set_ip_flow_hash_command_fn,
3152 };
3153 /* *INDENT-ON* */
3154
3155 int
3156 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3157                              u32 table_index)
3158 {
3159   vnet_main_t *vnm = vnet_get_main ();
3160   vnet_interface_main_t *im = &vnm->interface_main;
3161   ip4_main_t *ipm = &ip4_main;
3162   ip_lookup_main_t *lm = &ipm->lookup_main;
3163   vnet_classify_main_t *cm = &vnet_classify_main;
3164   ip4_address_t *if_addr;
3165
3166   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3167     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3168
3169   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3170     return VNET_API_ERROR_NO_SUCH_ENTRY;
3171
3172   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3173   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3174
3175   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3176
3177   if (NULL != if_addr)
3178     {
3179       fib_prefix_t pfx = {
3180         .fp_len = 32,
3181         .fp_proto = FIB_PROTOCOL_IP4,
3182         .fp_addr.ip4 = *if_addr,
3183       };
3184       u32 fib_index;
3185
3186       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3187                                                        sw_if_index);
3188
3189
3190       if (table_index != (u32) ~ 0)
3191         {
3192           dpo_id_t dpo = DPO_INVALID;
3193
3194           dpo_set (&dpo,
3195                    DPO_CLASSIFY,
3196                    DPO_PROTO_IP4,
3197                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3198
3199           fib_table_entry_special_dpo_add (fib_index,
3200                                            &pfx,
3201                                            FIB_SOURCE_CLASSIFY,
3202                                            FIB_ENTRY_FLAG_NONE, &dpo);
3203           dpo_reset (&dpo);
3204         }
3205       else
3206         {
3207           fib_table_entry_special_remove (fib_index,
3208                                           &pfx, FIB_SOURCE_CLASSIFY);
3209         }
3210     }
3211
3212   return 0;
3213 }
3214
3215 static clib_error_t *
3216 set_ip_classify_command_fn (vlib_main_t * vm,
3217                             unformat_input_t * input,
3218                             vlib_cli_command_t * cmd)
3219 {
3220   u32 table_index = ~0;
3221   int table_index_set = 0;
3222   u32 sw_if_index = ~0;
3223   int rv;
3224
3225   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3226     {
3227       if (unformat (input, "table-index %d", &table_index))
3228         table_index_set = 1;
3229       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3230                          vnet_get_main (), &sw_if_index))
3231         ;
3232       else
3233         break;
3234     }
3235
3236   if (table_index_set == 0)
3237     return clib_error_return (0, "classify table-index must be specified");
3238
3239   if (sw_if_index == ~0)
3240     return clib_error_return (0, "interface / subif must be specified");
3241
3242   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3243
3244   switch (rv)
3245     {
3246     case 0:
3247       break;
3248
3249     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3250       return clib_error_return (0, "No such interface");
3251
3252     case VNET_API_ERROR_NO_SUCH_ENTRY:
3253       return clib_error_return (0, "No such classifier table");
3254     }
3255   return 0;
3256 }
3257
3258 /*?
3259  * Assign a classification table to an interface. The classification
3260  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3261  * commands. Once the table is create, use this command to filter packets
3262  * on an interface.
3263  *
3264  * @cliexpar
3265  * Example of how to assign a classification table to an interface:
3266  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3267 ?*/
3268 /* *INDENT-OFF* */
3269 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3270 {
3271     .path = "set ip classify",
3272     .short_help =
3273     "set ip classify intfc <interface> table-index <classify-idx>",
3274     .function = set_ip_classify_command_fn,
3275 };
3276 /* *INDENT-ON* */
3277
3278 /*
3279  * fd.io coding-style-patch-verification: ON
3280  *
3281  * Local Variables:
3282  * eval: (c-set-style "gnu")
3283  * End:
3284  */