Sub-net broadcast addresses for IPv4
[vpp.git] / src / vnet / ip / ip4_forward.c
1 /*
2  * Copyright (c) 2015 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 /*
16  * ip/ip4_forward.c: IP v4 forwarding
17  *
18  * Copyright (c) 2008 Eliot Dresselhaus
19  *
20  * Permission is hereby granted, free of charge, to any person obtaining
21  * a copy of this software and associated documentation files (the
22  * "Software"), to deal in the Software without restriction, including
23  * without limitation the rights to use, copy, modify, merge, publish,
24  * distribute, sublicense, and/or sell copies of the Software, and to
25  * permit persons to whom the Software is furnished to do so, subject to
26  * the following conditions:
27  *
28  * The above copyright notice and this permission notice shall be
29  * included in all copies or substantial portions of the Software.
30  *
31  *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  *  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  *  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  *  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35  *  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36  *  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37  *  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
38  */
39
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h>     /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h>   /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h>       /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h>     /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h>     /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53 #include <vnet/mfib/mfib_table.h>       /* for mFIB table and entry creation */
54
55 /**
56  * @file
57  * @brief IPv4 Forwarding.
58  *
59  * This file contains the source code for IPv4 forwarding.
60  */
61
62 void
63 ip4_forward_next_trace (vlib_main_t * vm,
64                         vlib_node_runtime_t * node,
65                         vlib_frame_t * frame,
66                         vlib_rx_or_tx_t which_adj_index);
67
68 always_inline uword
69 ip4_lookup_inline (vlib_main_t * vm,
70                    vlib_node_runtime_t * node,
71                    vlib_frame_t * frame,
72                    int lookup_for_responses_to_locally_received_packets)
73 {
74   ip4_main_t *im = &ip4_main;
75   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
76   u32 n_left_from, n_left_to_next, *from, *to_next;
77   ip_lookup_next_t next;
78   u32 cpu_index = os_get_cpu_number ();
79
80   from = vlib_frame_vector_args (frame);
81   n_left_from = frame->n_vectors;
82   next = node->cached_next_index;
83
84   while (n_left_from > 0)
85     {
86       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
87
88       while (n_left_from >= 8 && n_left_to_next >= 4)
89         {
90           vlib_buffer_t *p0, *p1, *p2, *p3;
91           ip4_header_t *ip0, *ip1, *ip2, *ip3;
92           __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
93           ip_lookup_next_t next0, next1, next2, next3;
94           const load_balance_t *lb0, *lb1, *lb2, *lb3;
95           ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
96           ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97           ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98           __attribute__ ((unused)) u32 pi0, fib_index0, lb_index0,
99             is_tcp_udp0;
100           __attribute__ ((unused)) u32 pi1, fib_index1, lb_index1,
101             is_tcp_udp1;
102           __attribute__ ((unused)) u32 pi2, fib_index2, lb_index2,
103             is_tcp_udp2;
104           __attribute__ ((unused)) u32 pi3, fib_index3, lb_index3,
105             is_tcp_udp3;
106           flow_hash_config_t flow_hash_config0, flow_hash_config1;
107           flow_hash_config_t flow_hash_config2, flow_hash_config3;
108           u32 hash_c0, hash_c1, hash_c2, hash_c3;
109           const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
110
111           /* Prefetch next iteration. */
112           {
113             vlib_buffer_t *p4, *p5, *p6, *p7;
114
115             p4 = vlib_get_buffer (vm, from[4]);
116             p5 = vlib_get_buffer (vm, from[5]);
117             p6 = vlib_get_buffer (vm, from[6]);
118             p7 = vlib_get_buffer (vm, from[7]);
119
120             vlib_prefetch_buffer_header (p4, LOAD);
121             vlib_prefetch_buffer_header (p5, LOAD);
122             vlib_prefetch_buffer_header (p6, LOAD);
123             vlib_prefetch_buffer_header (p7, LOAD);
124
125             CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
126             CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
127             CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
128             CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
129           }
130
131           pi0 = to_next[0] = from[0];
132           pi1 = to_next[1] = from[1];
133           pi2 = to_next[2] = from[2];
134           pi3 = to_next[3] = from[3];
135
136           from += 4;
137           to_next += 4;
138           n_left_to_next -= 4;
139           n_left_from -= 4;
140
141           p0 = vlib_get_buffer (vm, pi0);
142           p1 = vlib_get_buffer (vm, pi1);
143           p2 = vlib_get_buffer (vm, pi2);
144           p3 = vlib_get_buffer (vm, pi3);
145
146           ip0 = vlib_buffer_get_current (p0);
147           ip1 = vlib_buffer_get_current (p1);
148           ip2 = vlib_buffer_get_current (p2);
149           ip3 = vlib_buffer_get_current (p3);
150
151           dst_addr0 = &ip0->dst_address;
152           dst_addr1 = &ip1->dst_address;
153           dst_addr2 = &ip2->dst_address;
154           dst_addr3 = &ip3->dst_address;
155
156           fib_index0 =
157             vec_elt (im->fib_index_by_sw_if_index,
158                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
159           fib_index1 =
160             vec_elt (im->fib_index_by_sw_if_index,
161                      vnet_buffer (p1)->sw_if_index[VLIB_RX]);
162           fib_index2 =
163             vec_elt (im->fib_index_by_sw_if_index,
164                      vnet_buffer (p2)->sw_if_index[VLIB_RX]);
165           fib_index3 =
166             vec_elt (im->fib_index_by_sw_if_index,
167                      vnet_buffer (p3)->sw_if_index[VLIB_RX]);
168           fib_index0 =
169             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
170              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
171           fib_index1 =
172             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
173              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
174           fib_index2 =
175             (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
176              (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
177           fib_index3 =
178             (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
179              (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
180
181
182           if (!lookup_for_responses_to_locally_received_packets)
183             {
184               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
185               mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
186               mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
187               mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
188
189               leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
190
191               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
192               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
193               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
194               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
195             }
196
197           tcp0 = (void *) (ip0 + 1);
198           tcp1 = (void *) (ip1 + 1);
199           tcp2 = (void *) (ip2 + 1);
200           tcp3 = (void *) (ip3 + 1);
201
202           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
203                          || ip0->protocol == IP_PROTOCOL_UDP);
204           is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
205                          || ip1->protocol == IP_PROTOCOL_UDP);
206           is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
207                          || ip2->protocol == IP_PROTOCOL_UDP);
208           is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
209                          || ip1->protocol == IP_PROTOCOL_UDP);
210
211           if (!lookup_for_responses_to_locally_received_packets)
212             {
213               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
214               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
215               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
216               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
217             }
218
219           if (!lookup_for_responses_to_locally_received_packets)
220             {
221               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
222               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
223               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
224               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
225             }
226
227           if (!lookup_for_responses_to_locally_received_packets)
228             {
229               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
230               leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
231               leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
232               leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
233             }
234
235           if (lookup_for_responses_to_locally_received_packets)
236             {
237               lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
238               lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
239               lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
240               lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
241             }
242           else
243             {
244               /* Handle default route. */
245               leaf0 =
246                 (leaf0 ==
247                  IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
248               leaf1 =
249                 (leaf1 ==
250                  IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
251               leaf2 =
252                 (leaf2 ==
253                  IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
254               leaf3 =
255                 (leaf3 ==
256                  IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
257               lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
258               lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
259               lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
260               lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
261             }
262
263           lb0 = load_balance_get (lb_index0);
264           lb1 = load_balance_get (lb_index1);
265           lb2 = load_balance_get (lb_index2);
266           lb3 = load_balance_get (lb_index3);
267
268           /* Use flow hash to compute multipath adjacency. */
269           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
270           hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
271           hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
272           hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
273           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
274             {
275               flow_hash_config0 = lb0->lb_hash_config;
276               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
277                 ip4_compute_flow_hash (ip0, flow_hash_config0);
278             }
279           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
280             {
281               flow_hash_config1 = lb1->lb_hash_config;
282               hash_c1 = vnet_buffer (p1)->ip.flow_hash =
283                 ip4_compute_flow_hash (ip1, flow_hash_config1);
284             }
285           if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
286             {
287               flow_hash_config2 = lb2->lb_hash_config;
288               hash_c2 = vnet_buffer (p2)->ip.flow_hash =
289                 ip4_compute_flow_hash (ip2, flow_hash_config2);
290             }
291           if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
292             {
293               flow_hash_config3 = lb3->lb_hash_config;
294               hash_c3 = vnet_buffer (p3)->ip.flow_hash =
295                 ip4_compute_flow_hash (ip3, flow_hash_config3);
296             }
297
298           ASSERT (lb0->lb_n_buckets > 0);
299           ASSERT (is_pow2 (lb0->lb_n_buckets));
300           ASSERT (lb1->lb_n_buckets > 0);
301           ASSERT (is_pow2 (lb1->lb_n_buckets));
302           ASSERT (lb2->lb_n_buckets > 0);
303           ASSERT (is_pow2 (lb2->lb_n_buckets));
304           ASSERT (lb3->lb_n_buckets > 0);
305           ASSERT (is_pow2 (lb3->lb_n_buckets));
306
307           dpo0 = load_balance_get_bucket_i (lb0,
308                                             (hash_c0 &
309                                              (lb0->lb_n_buckets_minus_1)));
310           dpo1 = load_balance_get_bucket_i (lb1,
311                                             (hash_c1 &
312                                              (lb1->lb_n_buckets_minus_1)));
313           dpo2 = load_balance_get_bucket_i (lb2,
314                                             (hash_c2 &
315                                              (lb2->lb_n_buckets_minus_1)));
316           dpo3 = load_balance_get_bucket_i (lb3,
317                                             (hash_c3 &
318                                              (lb3->lb_n_buckets_minus_1)));
319
320           next0 = dpo0->dpoi_next_node;
321           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
322           next1 = dpo1->dpoi_next_node;
323           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
324           next2 = dpo2->dpoi_next_node;
325           vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
326           next3 = dpo3->dpoi_next_node;
327           vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
328
329           vlib_increment_combined_counter
330             (cm, cpu_index, lb_index0, 1,
331              vlib_buffer_length_in_chain (vm, p0)
332              + sizeof (ethernet_header_t));
333           vlib_increment_combined_counter
334             (cm, cpu_index, lb_index1, 1,
335              vlib_buffer_length_in_chain (vm, p1)
336              + sizeof (ethernet_header_t));
337           vlib_increment_combined_counter
338             (cm, cpu_index, lb_index2, 1,
339              vlib_buffer_length_in_chain (vm, p2)
340              + sizeof (ethernet_header_t));
341           vlib_increment_combined_counter
342             (cm, cpu_index, lb_index3, 1,
343              vlib_buffer_length_in_chain (vm, p3)
344              + sizeof (ethernet_header_t));
345
346           vlib_validate_buffer_enqueue_x4 (vm, node, next,
347                                            to_next, n_left_to_next,
348                                            pi0, pi1, pi2, pi3,
349                                            next0, next1, next2, next3);
350         }
351
352       while (n_left_from > 0 && n_left_to_next > 0)
353         {
354           vlib_buffer_t *p0;
355           ip4_header_t *ip0;
356           __attribute__ ((unused)) tcp_header_t *tcp0;
357           ip_lookup_next_t next0;
358           const load_balance_t *lb0;
359           ip4_fib_mtrie_t *mtrie0;
360           ip4_fib_mtrie_leaf_t leaf0;
361           ip4_address_t *dst_addr0;
362           __attribute__ ((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
363           flow_hash_config_t flow_hash_config0;
364           const dpo_id_t *dpo0;
365           u32 hash_c0;
366
367           pi0 = from[0];
368           to_next[0] = pi0;
369
370           p0 = vlib_get_buffer (vm, pi0);
371
372           ip0 = vlib_buffer_get_current (p0);
373
374           dst_addr0 = &ip0->dst_address;
375
376           fib_index0 =
377             vec_elt (im->fib_index_by_sw_if_index,
378                      vnet_buffer (p0)->sw_if_index[VLIB_RX]);
379           fib_index0 =
380             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
381              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
382
383           if (!lookup_for_responses_to_locally_received_packets)
384             {
385               mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
386
387               leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
388
389               leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
390             }
391
392           tcp0 = (void *) (ip0 + 1);
393
394           is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
395                          || ip0->protocol == IP_PROTOCOL_UDP);
396
397           if (!lookup_for_responses_to_locally_received_packets)
398             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
399
400           if (!lookup_for_responses_to_locally_received_packets)
401             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
402
403           if (!lookup_for_responses_to_locally_received_packets)
404             leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
405
406           if (lookup_for_responses_to_locally_received_packets)
407             lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
408           else
409             {
410               /* Handle default route. */
411               leaf0 =
412                 (leaf0 ==
413                  IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
414               lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
415             }
416
417           lb0 = load_balance_get (lbi0);
418
419           /* Use flow hash to compute multipath adjacency. */
420           hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
421           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
422             {
423               flow_hash_config0 = lb0->lb_hash_config;
424
425               hash_c0 = vnet_buffer (p0)->ip.flow_hash =
426                 ip4_compute_flow_hash (ip0, flow_hash_config0);
427             }
428
429           ASSERT (lb0->lb_n_buckets > 0);
430           ASSERT (is_pow2 (lb0->lb_n_buckets));
431
432           dpo0 = load_balance_get_bucket_i (lb0,
433                                             (hash_c0 &
434                                              (lb0->lb_n_buckets_minus_1)));
435
436           next0 = dpo0->dpoi_next_node;
437           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
438
439           vlib_increment_combined_counter
440             (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
441
442           from += 1;
443           to_next += 1;
444           n_left_to_next -= 1;
445           n_left_from -= 1;
446
447           if (PREDICT_FALSE (next0 != next))
448             {
449               n_left_to_next += 1;
450               vlib_put_next_frame (vm, node, next, n_left_to_next);
451               next = next0;
452               vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
453               to_next[0] = pi0;
454               to_next += 1;
455               n_left_to_next -= 1;
456             }
457         }
458
459       vlib_put_next_frame (vm, node, next, n_left_to_next);
460     }
461
462   if (node->flags & VLIB_NODE_FLAG_TRACE)
463     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
464
465   return frame->n_vectors;
466 }
467
468 /** @brief IPv4 lookup node.
469     @node ip4-lookup
470
471     This is the main IPv4 lookup dispatch node.
472
473     @param vm vlib_main_t corresponding to the current thread
474     @param node vlib_node_runtime_t
475     @param frame vlib_frame_t whose contents should be dispatched
476
477     @par Graph mechanics: buffer metadata, next index usage
478
479     @em Uses:
480     - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
481         - Indicates the @c sw_if_index value of the interface that the
482           packet was received on.
483     - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
484         - When the value is @c ~0 then the node performs a longest prefix
485           match (LPM) for the packet destination address in the FIB attached
486           to the receive interface.
487         - Otherwise perform LPM for the packet destination address in the
488           indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
489           value (0, 1, ...) and not a VRF id.
490
491     @em Sets:
492     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
493         - The lookup result adjacency index.
494
495     <em>Next Index:</em>
496     - Dispatches the packet to the node index found in
497       ip_adjacency_t @c adj->lookup_next_index
498       (where @c adj is the lookup result adjacency).
499 */
500 static uword
501 ip4_lookup (vlib_main_t * vm,
502             vlib_node_runtime_t * node, vlib_frame_t * frame)
503 {
504   return ip4_lookup_inline (vm, node, frame,
505                             /* lookup_for_responses_to_locally_received_packets */
506                             0);
507
508 }
509
510 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
511
512 VLIB_REGISTER_NODE (ip4_lookup_node) =
513 {
514 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
515     sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
516     IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
517
518 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
519
520 always_inline uword
521 ip4_load_balance (vlib_main_t * vm,
522                   vlib_node_runtime_t * node, vlib_frame_t * frame)
523 {
524   vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
525   u32 n_left_from, n_left_to_next, *from, *to_next;
526   ip_lookup_next_t next;
527   u32 cpu_index = os_get_cpu_number ();
528
529   from = vlib_frame_vector_args (frame);
530   n_left_from = frame->n_vectors;
531   next = node->cached_next_index;
532
533   if (node->flags & VLIB_NODE_FLAG_TRACE)
534     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
535
536   while (n_left_from > 0)
537     {
538       vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
539
540
541       while (n_left_from >= 4 && n_left_to_next >= 2)
542         {
543           ip_lookup_next_t next0, next1;
544           const load_balance_t *lb0, *lb1;
545           vlib_buffer_t *p0, *p1;
546           u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
547           const ip4_header_t *ip0, *ip1;
548           const dpo_id_t *dpo0, *dpo1;
549
550           /* Prefetch next iteration. */
551           {
552             vlib_buffer_t *p2, *p3;
553
554             p2 = vlib_get_buffer (vm, from[2]);
555             p3 = vlib_get_buffer (vm, from[3]);
556
557             vlib_prefetch_buffer_header (p2, STORE);
558             vlib_prefetch_buffer_header (p3, STORE);
559
560             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
561             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
562           }
563
564           pi0 = to_next[0] = from[0];
565           pi1 = to_next[1] = from[1];
566
567           from += 2;
568           n_left_from -= 2;
569           to_next += 2;
570           n_left_to_next -= 2;
571
572           p0 = vlib_get_buffer (vm, pi0);
573           p1 = vlib_get_buffer (vm, pi1);
574
575           ip0 = vlib_buffer_get_current (p0);
576           ip1 = vlib_buffer_get_current (p1);
577           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
578           lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
579
580           lb0 = load_balance_get (lbi0);
581           lb1 = load_balance_get (lbi1);
582
583           /*
584            * this node is for via FIBs we can re-use the hash value from the
585            * to node if present.
586            * We don't want to use the same hash value at each level in the recursion
587            * graph as that would lead to polarisation
588            */
589           hc0 = hc1 = 0;
590
591           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
592             {
593               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
594                 {
595                   hc0 = vnet_buffer (p0)->ip.flow_hash =
596                     vnet_buffer (p0)->ip.flow_hash >> 1;
597                 }
598               else
599                 {
600                   hc0 = vnet_buffer (p0)->ip.flow_hash =
601                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
602                 }
603             }
604           if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
605             {
606               if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
607                 {
608                   hc1 = vnet_buffer (p1)->ip.flow_hash =
609                     vnet_buffer (p1)->ip.flow_hash >> 1;
610                 }
611               else
612                 {
613                   hc1 = vnet_buffer (p1)->ip.flow_hash =
614                     ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
615                 }
616             }
617
618           dpo0 =
619             load_balance_get_bucket_i (lb0,
620                                        hc0 & (lb0->lb_n_buckets_minus_1));
621           dpo1 =
622             load_balance_get_bucket_i (lb1,
623                                        hc1 & (lb1->lb_n_buckets_minus_1));
624
625           next0 = dpo0->dpoi_next_node;
626           next1 = dpo1->dpoi_next_node;
627
628           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
629           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
630
631           vlib_increment_combined_counter
632             (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
633           vlib_increment_combined_counter
634             (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
635
636           vlib_validate_buffer_enqueue_x2 (vm, node, next,
637                                            to_next, n_left_to_next,
638                                            pi0, pi1, next0, next1);
639         }
640
641       while (n_left_from > 0 && n_left_to_next > 0)
642         {
643           ip_lookup_next_t next0;
644           const load_balance_t *lb0;
645           vlib_buffer_t *p0;
646           u32 pi0, lbi0, hc0;
647           const ip4_header_t *ip0;
648           const dpo_id_t *dpo0;
649
650           pi0 = from[0];
651           to_next[0] = pi0;
652           from += 1;
653           to_next += 1;
654           n_left_to_next -= 1;
655           n_left_from -= 1;
656
657           p0 = vlib_get_buffer (vm, pi0);
658
659           ip0 = vlib_buffer_get_current (p0);
660           lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
661
662           lb0 = load_balance_get (lbi0);
663
664           hc0 = 0;
665           if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
666             {
667               if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
668                 {
669                   hc0 = vnet_buffer (p0)->ip.flow_hash =
670                     vnet_buffer (p0)->ip.flow_hash >> 1;
671                 }
672               else
673                 {
674                   hc0 = vnet_buffer (p0)->ip.flow_hash =
675                     ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
676                 }
677             }
678
679           dpo0 =
680             load_balance_get_bucket_i (lb0,
681                                        hc0 & (lb0->lb_n_buckets_minus_1));
682
683           next0 = dpo0->dpoi_next_node;
684           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
685
686           vlib_increment_combined_counter
687             (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
688
689           vlib_validate_buffer_enqueue_x1 (vm, node, next,
690                                            to_next, n_left_to_next,
691                                            pi0, next0);
692         }
693
694       vlib_put_next_frame (vm, node, next, n_left_to_next);
695     }
696
697   return frame->n_vectors;
698 }
699
700 VLIB_REGISTER_NODE (ip4_load_balance_node) =
701 {
702 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
703     sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
704     format_ip4_lookup_trace,};
705
706 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
707
708 /* get first interface address */
709 ip4_address_t *
710 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
711                              ip_interface_address_t ** result_ia)
712 {
713   ip_lookup_main_t *lm = &im->lookup_main;
714   ip_interface_address_t *ia = 0;
715   ip4_address_t *result = 0;
716
717   /* *INDENT-OFF* */
718   foreach_ip_interface_address
719     (lm, ia, sw_if_index,
720      1 /* honor unnumbered */ ,
721      ({
722        ip4_address_t * a =
723          ip_interface_address_get_address (lm, ia);
724        result = a;
725        break;
726      }));
727   /* *INDENT-OFF* */
728   if (result_ia)
729     *result_ia = result ? ia : 0;
730   return result;
731 }
732
733 static void
734 ip4_add_interface_routes (u32 sw_if_index,
735                           ip4_main_t * im, u32 fib_index,
736                           ip_interface_address_t * a)
737 {
738   ip_lookup_main_t *lm = &im->lookup_main;
739   ip4_address_t *address = ip_interface_address_get_address (lm, a);
740   fib_prefix_t pfx = {
741     .fp_len = a->address_length,
742     .fp_proto = FIB_PROTOCOL_IP4,
743     .fp_addr.ip4 = *address,
744   };
745
746   a->neighbor_probe_adj_index = ~0;
747
748   if (pfx.fp_len <= 30)
749     {
750       /* a /30 or shorter - add a glean for the network address */
751       fib_node_index_t fei;
752
753       fei = fib_table_entry_update_one_path (fib_index, &pfx,
754                                              FIB_SOURCE_INTERFACE,
755                                              (FIB_ENTRY_FLAG_CONNECTED |
756                                               FIB_ENTRY_FLAG_ATTACHED),
757                                              FIB_PROTOCOL_IP4,
758                                              /* No next-hop address */
759                                              NULL,
760                                              sw_if_index,
761                                              // invalid FIB index
762                                              ~0,
763                                              1,
764                                              // no out-label stack
765                                              NULL,
766                                              FIB_ROUTE_PATH_FLAG_NONE);
767       a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
768
769       /* Add the two broadcast addresses as drop */
770       fib_prefix_t net_pfx = {
771         .fp_len = 32,
772         .fp_proto = FIB_PROTOCOL_IP4,
773         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
774       };
775       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
776         fib_table_entry_special_add(fib_index,
777                                     &net_pfx,
778                                     FIB_SOURCE_INTERFACE,
779                                     (FIB_ENTRY_FLAG_DROP |
780                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT),
781                                     ADJ_INDEX_INVALID);
782       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
783       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
784         fib_table_entry_special_add(fib_index,
785                                     &net_pfx,
786                                     FIB_SOURCE_INTERFACE,
787                                     (FIB_ENTRY_FLAG_DROP |
788                                      FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT),
789                                     ADJ_INDEX_INVALID);
790     }
791   else if (pfx.fp_len == 31)
792     {
793       u32 mask = clib_host_to_net_u32(1);
794       fib_prefix_t net_pfx = pfx;
795
796       net_pfx.fp_len = 32;
797       net_pfx.fp_addr.ip4.as_u32 ^= mask;
798
799       /* a /31 - add the other end as an attached host */
800       fib_table_entry_update_one_path (fib_index, &net_pfx,
801                                        FIB_SOURCE_INTERFACE,
802                                        (FIB_ENTRY_FLAG_ATTACHED),
803                                        FIB_PROTOCOL_IP4,
804                                        &net_pfx.fp_addr,
805                                        sw_if_index,
806                                        // invalid FIB index
807                                        ~0,
808                                        1,
809                                        NULL,
810                                        FIB_ROUTE_PATH_FLAG_NONE);
811     }
812   pfx.fp_len = 32;
813
814   if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
815     {
816       u32 classify_table_index =
817         lm->classify_table_index_by_sw_if_index[sw_if_index];
818       if (classify_table_index != (u32) ~ 0)
819         {
820           dpo_id_t dpo = DPO_INVALID;
821
822           dpo_set (&dpo,
823                    DPO_CLASSIFY,
824                    DPO_PROTO_IP4,
825                    classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
826
827           fib_table_entry_special_dpo_add (fib_index,
828                                            &pfx,
829                                            FIB_SOURCE_CLASSIFY,
830                                            FIB_ENTRY_FLAG_NONE, &dpo);
831           dpo_reset (&dpo);
832         }
833     }
834
835   fib_table_entry_update_one_path (fib_index, &pfx,
836                                    FIB_SOURCE_INTERFACE,
837                                    (FIB_ENTRY_FLAG_CONNECTED |
838                                     FIB_ENTRY_FLAG_LOCAL),
839                                    FIB_PROTOCOL_IP4,
840                                    &pfx.fp_addr,
841                                    sw_if_index,
842                                    // invalid FIB index
843                                    ~0,
844                                    1, NULL,
845                                    FIB_ROUTE_PATH_FLAG_NONE);
846 }
847
848 static void
849 ip4_del_interface_routes (ip4_main_t * im,
850                           u32 fib_index,
851                           ip4_address_t * address, u32 address_length)
852 {
853   fib_prefix_t pfx = {
854     .fp_len = address_length,
855     .fp_proto = FIB_PROTOCOL_IP4,
856     .fp_addr.ip4 = *address,
857   };
858
859   if (pfx.fp_len <= 30)
860     {
861       fib_prefix_t net_pfx = {
862         .fp_len = 32,
863         .fp_proto = FIB_PROTOCOL_IP4,
864         .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
865       };
866       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
867         fib_table_entry_special_remove(fib_index,
868                                        &net_pfx,
869                                        FIB_SOURCE_INTERFACE);
870       net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
871       if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
872         fib_table_entry_special_remove(fib_index,
873                                        &net_pfx,
874                                        FIB_SOURCE_INTERFACE);
875       fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
876     }
877     else if (pfx.fp_len == 31)
878     {
879       u32 mask = clib_host_to_net_u32(1);
880       fib_prefix_t net_pfx = pfx;
881
882       net_pfx.fp_len = 32;
883       net_pfx.fp_addr.ip4.as_u32 ^= mask;
884
885       fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
886     }
887
888   pfx.fp_len = 32;
889   fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
890 }
891
892 void
893 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
894 {
895   ip4_main_t *im = &ip4_main;
896
897   vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
898
899   /*
900    * enable/disable only on the 1<->0 transition
901    */
902   if (is_enable)
903     {
904       if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
905         return;
906     }
907   else
908     {
909       ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
910       if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
911         return;
912     }
913   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
914                                !is_enable, 0, 0);
915
916
917   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
918                                sw_if_index, !is_enable, 0, 0);
919 }
920
921 static clib_error_t *
922 ip4_add_del_interface_address_internal (vlib_main_t * vm,
923                                         u32 sw_if_index,
924                                         ip4_address_t * address,
925                                         u32 address_length, u32 is_del)
926 {
927   vnet_main_t *vnm = vnet_get_main ();
928   ip4_main_t *im = &ip4_main;
929   ip_lookup_main_t *lm = &im->lookup_main;
930   clib_error_t *error = 0;
931   u32 if_address_index, elts_before;
932   ip4_address_fib_t ip4_af, *addr_fib = 0;
933
934   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
935   ip4_addr_fib_init (&ip4_af, address,
936                      vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
937   vec_add1 (addr_fib, ip4_af);
938
939   /* FIXME-LATER
940    * there is no support for adj-fib handling in the presence of overlapping
941    * subnets on interfaces. Easy fix - disallow overlapping subnets, like
942    * most routers do.
943    */
944   /* *INDENT-OFF* */
945   if (!is_del)
946     {
947       /* When adding an address check that it does not conflict
948          with an existing address. */
949       ip_interface_address_t *ia;
950       foreach_ip_interface_address
951         (&im->lookup_main, ia, sw_if_index,
952          0 /* honor unnumbered */ ,
953          ({
954            ip4_address_t * x =
955              ip_interface_address_get_address
956              (&im->lookup_main, ia);
957            if (ip4_destination_matches_route
958                (im, address, x, ia->address_length) ||
959                ip4_destination_matches_route (im,
960                                               x,
961                                               address,
962                                               address_length))
963              return
964                clib_error_create
965                ("failed to add %U which conflicts with %U for interface %U",
966                 format_ip4_address_and_length, address,
967                 address_length,
968                 format_ip4_address_and_length, x,
969                 ia->address_length,
970                 format_vnet_sw_if_index_name, vnm,
971                 sw_if_index);
972          }));
973     }
974   /* *INDENT-ON* */
975
976   elts_before = pool_elts (lm->if_address_pool);
977
978   error = ip_interface_address_add_del
979     (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
980   if (error)
981     goto done;
982
983   ip4_sw_interface_enable_disable (sw_if_index, !is_del);
984
985   if (is_del)
986     ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
987   else
988     ip4_add_interface_routes (sw_if_index,
989                               im, ip4_af.fib_index,
990                               pool_elt_at_index
991                               (lm->if_address_pool, if_address_index));
992
993   /* If pool did not grow/shrink: add duplicate address. */
994   if (elts_before != pool_elts (lm->if_address_pool))
995     {
996       ip4_add_del_interface_address_callback_t *cb;
997       vec_foreach (cb, im->add_del_interface_address_callbacks)
998         cb->function (im, cb->function_opaque, sw_if_index,
999                       address, address_length, if_address_index, is_del);
1000     }
1001
1002 done:
1003   vec_free (addr_fib);
1004   return error;
1005 }
1006
1007 clib_error_t *
1008 ip4_add_del_interface_address (vlib_main_t * vm,
1009                                u32 sw_if_index,
1010                                ip4_address_t * address,
1011                                u32 address_length, u32 is_del)
1012 {
1013   return ip4_add_del_interface_address_internal
1014     (vm, sw_if_index, address, address_length, is_del);
1015 }
1016
1017 /* Built-in ip4 unicast rx feature path definition */
1018 /* *INDENT-OFF* */
1019 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
1020 {
1021   .arc_name = "ip4-unicast",
1022   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1023   .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
1024 };
1025
1026 VNET_FEATURE_INIT (ip4_flow_classify, static) =
1027 {
1028   .arc_name = "ip4-unicast",
1029   .node_name = "ip4-flow-classify",
1030   .runs_before = VNET_FEATURES ("ip4-inacl"),
1031 };
1032
1033 VNET_FEATURE_INIT (ip4_inacl, static) =
1034 {
1035   .arc_name = "ip4-unicast",
1036   .node_name = "ip4-inacl",
1037   .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
1038 };
1039
1040 VNET_FEATURE_INIT (ip4_source_check_1, static) =
1041 {
1042   .arc_name = "ip4-unicast",
1043   .node_name = "ip4-source-check-via-rx",
1044   .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
1045 };
1046
1047 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1048 {
1049   .arc_name = "ip4-unicast",
1050   .node_name = "ip4-source-check-via-any",
1051   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1052 };
1053
1054 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1055 {
1056   .arc_name = "ip4-unicast",
1057   .node_name = "ip4-source-and-port-range-check-rx",
1058   .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1059 };
1060
1061 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1062 {
1063   .arc_name = "ip4-unicast",
1064   .node_name = "ip4-policer-classify",
1065   .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1066 };
1067
1068 VNET_FEATURE_INIT (ip4_ipsec, static) =
1069 {
1070   .arc_name = "ip4-unicast",
1071   .node_name = "ipsec-input-ip4",
1072   .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1073 };
1074
1075 VNET_FEATURE_INIT (ip4_vpath, static) =
1076 {
1077   .arc_name = "ip4-unicast",
1078   .node_name = "vpath-input-ip4",
1079   .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1080 };
1081
1082 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1083 {
1084   .arc_name = "ip4-unicast",
1085   .node_name = "ip4-vxlan-bypass",
1086   .runs_before = VNET_FEATURES ("ip4-lookup"),
1087 };
1088
1089 VNET_FEATURE_INIT (ip4_drop, static) =
1090 {
1091   .arc_name = "ip4-unicast",
1092   .node_name = "ip4-drop",
1093   .runs_before = VNET_FEATURES ("ip4-lookup"),
1094 };
1095
1096 VNET_FEATURE_INIT (ip4_lookup, static) =
1097 {
1098   .arc_name = "ip4-unicast",
1099   .node_name = "ip4-lookup",
1100   .runs_before = 0,     /* not before any other features */
1101 };
1102
1103 /* Built-in ip4 multicast rx feature path definition */
1104 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1105 {
1106   .arc_name = "ip4-multicast",
1107   .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1108   .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1109 };
1110
1111 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1112 {
1113   .arc_name = "ip4-multicast",
1114   .node_name = "vpath-input-ip4",
1115   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1116 };
1117
1118 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1119 {
1120   .arc_name = "ip4-multicast",
1121   .node_name = "ip4-drop",
1122   .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1123 };
1124
1125 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1126 {
1127   .arc_name = "ip4-multicast",
1128   .node_name = "ip4-mfib-forward-lookup",
1129   .runs_before = 0,     /* last feature */
1130 };
1131
1132 /* Source and port-range check ip4 tx feature path definition */
1133 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1134 {
1135   .arc_name = "ip4-output",
1136   .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1137   .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1138 };
1139
1140 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1141 {
1142   .arc_name = "ip4-output",
1143   .node_name = "ip4-source-and-port-range-check-tx",
1144   .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1145 };
1146
1147 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1148 {
1149   .arc_name = "ip4-output",
1150   .node_name = "ipsec-output-ip4",
1151   .runs_before = VNET_FEATURES ("interface-output"),
1152 };
1153
1154 /* Built-in ip4 tx feature path definition */
1155 VNET_FEATURE_INIT (ip4_interface_output, static) =
1156 {
1157   .arc_name = "ip4-output",
1158   .node_name = "interface-output",
1159   .runs_before = 0,     /* not before any other features */
1160 };
1161 /* *INDENT-ON* */
1162
1163 static clib_error_t *
1164 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1165 {
1166   ip4_main_t *im = &ip4_main;
1167
1168   /* Fill in lookup tables with default table (0). */
1169   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1170   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1171
1172   vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1173                                is_add, 0, 0);
1174
1175   vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1176                                is_add, 0, 0);
1177
1178   return /* no error */ 0;
1179 }
1180
1181 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1182
1183 /* Global IP4 main. */
1184 ip4_main_t ip4_main;
1185
1186 clib_error_t *
1187 ip4_lookup_init (vlib_main_t * vm)
1188 {
1189   ip4_main_t *im = &ip4_main;
1190   clib_error_t *error;
1191   uword i;
1192
1193   if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1194     return error;
1195
1196   for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1197     {
1198       u32 m;
1199
1200       if (i < 32)
1201         m = pow2_mask (i) << (32 - i);
1202       else
1203         m = ~0;
1204       im->fib_masks[i] = clib_host_to_net_u32 (m);
1205     }
1206
1207   ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1208
1209   /* Create FIB with index 0 and table id of 0. */
1210   fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1211   mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1212
1213   {
1214     pg_node_t *pn;
1215     pn = pg_get_node (ip4_lookup_node.index);
1216     pn->unformat_edit = unformat_pg_ip4_header;
1217   }
1218
1219   {
1220     ethernet_arp_header_t h;
1221
1222     memset (&h, 0, sizeof (h));
1223
1224     /* Set target ethernet address to all zeros. */
1225     memset (h.ip4_over_ethernet[1].ethernet, 0,
1226             sizeof (h.ip4_over_ethernet[1].ethernet));
1227
1228 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1229 #define _8(f,v) h.f = v;
1230     _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1231     _16 (l3_type, ETHERNET_TYPE_IP4);
1232     _8 (n_l2_address_bytes, 6);
1233     _8 (n_l3_address_bytes, 4);
1234     _16 (opcode, ETHERNET_ARP_OPCODE_request);
1235 #undef _16
1236 #undef _8
1237
1238     vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1239                                /* data */ &h,
1240                                sizeof (h),
1241                                /* alloc chunk size */ 8,
1242                                "ip4 arp");
1243   }
1244
1245   return error;
1246 }
1247
1248 VLIB_INIT_FUNCTION (ip4_lookup_init);
1249
1250 typedef struct
1251 {
1252   /* Adjacency taken. */
1253   u32 dpo_index;
1254   u32 flow_hash;
1255   u32 fib_index;
1256
1257   /* Packet data, possibly *after* rewrite. */
1258   u8 packet_data[64 - 1 * sizeof (u32)];
1259 }
1260 ip4_forward_next_trace_t;
1261
1262 u8 *
1263 format_ip4_forward_next_trace (u8 * s, va_list * args)
1264 {
1265   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1266   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1267   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1268   uword indent = format_get_indent (s);
1269   s = format (s, "%U%U",
1270               format_white_space, indent,
1271               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1272   return s;
1273 }
1274
1275 static u8 *
1276 format_ip4_lookup_trace (u8 * s, va_list * args)
1277 {
1278   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1279   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1280   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1281   uword indent = format_get_indent (s);
1282
1283   s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1284               t->fib_index, t->dpo_index, t->flow_hash);
1285   s = format (s, "\n%U%U",
1286               format_white_space, indent,
1287               format_ip4_header, t->packet_data, sizeof (t->packet_data));
1288   return s;
1289 }
1290
1291 static u8 *
1292 format_ip4_rewrite_trace (u8 * s, va_list * args)
1293 {
1294   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1295   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1296   ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1297   uword indent = format_get_indent (s);
1298
1299   s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1300               t->fib_index, t->dpo_index, format_ip_adjacency,
1301               t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1302   s = format (s, "\n%U%U",
1303               format_white_space, indent,
1304               format_ip_adjacency_packet_data,
1305               t->dpo_index, t->packet_data, sizeof (t->packet_data));
1306   return s;
1307 }
1308
1309 /* Common trace function for all ip4-forward next nodes. */
1310 void
1311 ip4_forward_next_trace (vlib_main_t * vm,
1312                         vlib_node_runtime_t * node,
1313                         vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1314 {
1315   u32 *from, n_left;
1316   ip4_main_t *im = &ip4_main;
1317
1318   n_left = frame->n_vectors;
1319   from = vlib_frame_vector_args (frame);
1320
1321   while (n_left >= 4)
1322     {
1323       u32 bi0, bi1;
1324       vlib_buffer_t *b0, *b1;
1325       ip4_forward_next_trace_t *t0, *t1;
1326
1327       /* Prefetch next iteration. */
1328       vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1329       vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1330
1331       bi0 = from[0];
1332       bi1 = from[1];
1333
1334       b0 = vlib_get_buffer (vm, bi0);
1335       b1 = vlib_get_buffer (vm, bi1);
1336
1337       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1338         {
1339           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1340           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1341           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1342           t0->fib_index =
1343             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1344              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1345             vec_elt (im->fib_index_by_sw_if_index,
1346                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1347
1348           clib_memcpy (t0->packet_data,
1349                        vlib_buffer_get_current (b0),
1350                        sizeof (t0->packet_data));
1351         }
1352       if (b1->flags & VLIB_BUFFER_IS_TRACED)
1353         {
1354           t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1355           t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1356           t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1357           t1->fib_index =
1358             (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1359              (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1360             vec_elt (im->fib_index_by_sw_if_index,
1361                      vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1362           clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1363                        sizeof (t1->packet_data));
1364         }
1365       from += 2;
1366       n_left -= 2;
1367     }
1368
1369   while (n_left >= 1)
1370     {
1371       u32 bi0;
1372       vlib_buffer_t *b0;
1373       ip4_forward_next_trace_t *t0;
1374
1375       bi0 = from[0];
1376
1377       b0 = vlib_get_buffer (vm, bi0);
1378
1379       if (b0->flags & VLIB_BUFFER_IS_TRACED)
1380         {
1381           t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1382           t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1383           t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1384           t0->fib_index =
1385             (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1386              (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1387             vec_elt (im->fib_index_by_sw_if_index,
1388                      vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1389           clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1390                        sizeof (t0->packet_data));
1391         }
1392       from += 1;
1393       n_left -= 1;
1394     }
1395 }
1396
1397 static uword
1398 ip4_drop_or_punt (vlib_main_t * vm,
1399                   vlib_node_runtime_t * node,
1400                   vlib_frame_t * frame, ip4_error_t error_code)
1401 {
1402   u32 *buffers = vlib_frame_vector_args (frame);
1403   uword n_packets = frame->n_vectors;
1404
1405   vlib_error_drop_buffers (vm, node, buffers,
1406                            /* stride */ 1,
1407                            n_packets,
1408                            /* next */ 0,
1409                            ip4_input_node.index, error_code);
1410
1411   if (node->flags & VLIB_NODE_FLAG_TRACE)
1412     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1413
1414   return n_packets;
1415 }
1416
1417 static uword
1418 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1419 {
1420   return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1421 }
1422
1423 static uword
1424 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1425 {
1426   return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1427 }
1428
1429 /* *INDENT-OFF* */
1430 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1431 {
1432   .function = ip4_drop,.
1433   name = "ip4-drop",
1434   .vector_size = sizeof (u32),
1435   .format_trace = format_ip4_forward_next_trace,
1436   .n_next_nodes = 1,
1437   .next_nodes = {
1438     [0] = "error-drop",
1439   },
1440 };
1441
1442 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1443
1444 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1445 {
1446   .function = ip4_punt,
1447   .name = "ip4-punt",
1448   .vector_size = sizeof (u32),
1449   .format_trace = format_ip4_forward_next_trace,
1450   .n_next_nodes = 1,
1451   .next_nodes = {
1452     [0] = "error-punt",
1453   },
1454 };
1455
1456 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1457 /* *INDENT-ON */
1458
1459 /* Compute TCP/UDP/ICMP4 checksum in software. */
1460 u16
1461 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1462                               ip4_header_t * ip0)
1463 {
1464   ip_csum_t sum0;
1465   u32 ip_header_length, payload_length_host_byte_order;
1466   u32 n_this_buffer, n_bytes_left;
1467   u16 sum16;
1468   void *data_this_buffer;
1469
1470   /* Initialize checksum with ip header. */
1471   ip_header_length = ip4_header_bytes (ip0);
1472   payload_length_host_byte_order =
1473     clib_net_to_host_u16 (ip0->length) - ip_header_length;
1474   sum0 =
1475     clib_host_to_net_u32 (payload_length_host_byte_order +
1476                           (ip0->protocol << 16));
1477
1478   if (BITS (uword) == 32)
1479     {
1480       sum0 =
1481         ip_csum_with_carry (sum0,
1482                             clib_mem_unaligned (&ip0->src_address, u32));
1483       sum0 =
1484         ip_csum_with_carry (sum0,
1485                             clib_mem_unaligned (&ip0->dst_address, u32));
1486     }
1487   else
1488     sum0 =
1489       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1490
1491   n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1492   data_this_buffer = (void *) ip0 + ip_header_length;
1493   if (n_this_buffer + ip_header_length > p0->current_length)
1494     n_this_buffer =
1495       p0->current_length >
1496       ip_header_length ? p0->current_length - ip_header_length : 0;
1497   while (1)
1498     {
1499       sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1500       n_bytes_left -= n_this_buffer;
1501       if (n_bytes_left == 0)
1502         break;
1503
1504       ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1505       p0 = vlib_get_buffer (vm, p0->next_buffer);
1506       data_this_buffer = vlib_buffer_get_current (p0);
1507       n_this_buffer = p0->current_length;
1508     }
1509
1510   sum16 = ~ip_csum_fold (sum0);
1511
1512   return sum16;
1513 }
1514
1515 u32
1516 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1517 {
1518   ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1519   udp_header_t *udp0;
1520   u16 sum16;
1521
1522   ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1523           || ip0->protocol == IP_PROTOCOL_UDP);
1524
1525   udp0 = (void *) (ip0 + 1);
1526   if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1527     {
1528       p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1529                     | IP_BUFFER_L4_CHECKSUM_CORRECT);
1530       return p0->flags;
1531     }
1532
1533   sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1534
1535   p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1536                 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1537
1538   return p0->flags;
1539 }
1540
1541 /* *INDENT-OFF* */
1542 VNET_FEATURE_ARC_INIT (ip4_local) =
1543 {
1544   .arc_name  = "ip4-local",
1545   .start_nodes = VNET_FEATURES ("ip4-local"),
1546 };
1547 /* *INDENT-ON* */
1548
1549 static inline uword
1550 ip4_local_inline (vlib_main_t * vm,
1551                   vlib_node_runtime_t * node,
1552                   vlib_frame_t * frame, int head_of_feature_arc)
1553 {
1554   ip4_main_t *im = &ip4_main;
1555   ip_lookup_main_t *lm = &im->lookup_main;
1556   ip_local_next_t next_index;
1557   u32 *from, *to_next, n_left_from, n_left_to_next;
1558   vlib_node_runtime_t *error_node =
1559     vlib_node_get_runtime (vm, ip4_input_node.index);
1560   u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1561
1562   from = vlib_frame_vector_args (frame);
1563   n_left_from = frame->n_vectors;
1564   next_index = node->cached_next_index;
1565
1566   if (node->flags & VLIB_NODE_FLAG_TRACE)
1567     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1568
1569   while (n_left_from > 0)
1570     {
1571       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1572
1573       while (n_left_from >= 4 && n_left_to_next >= 2)
1574         {
1575           vlib_buffer_t *p0, *p1;
1576           ip4_header_t *ip0, *ip1;
1577           udp_header_t *udp0, *udp1;
1578           ip4_fib_mtrie_t *mtrie0, *mtrie1;
1579           ip4_fib_mtrie_leaf_t leaf0, leaf1;
1580           const dpo_id_t *dpo0, *dpo1;
1581           const load_balance_t *lb0, *lb1;
1582           u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1583           u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1584           i32 len_diff0, len_diff1;
1585           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1586           u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1587           u32 sw_if_index0, sw_if_index1;
1588
1589           pi0 = to_next[0] = from[0];
1590           pi1 = to_next[1] = from[1];
1591           from += 2;
1592           n_left_from -= 2;
1593           to_next += 2;
1594           n_left_to_next -= 2;
1595
1596           next0 = next1 = IP_LOCAL_NEXT_DROP;
1597
1598           p0 = vlib_get_buffer (vm, pi0);
1599           p1 = vlib_get_buffer (vm, pi1);
1600
1601           ip0 = vlib_buffer_get_current (p0);
1602           ip1 = vlib_buffer_get_current (p1);
1603
1604           vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1605           vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1606
1607           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1608           sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1609
1610           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1611           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1612
1613           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1614           fib_index0 =
1615             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1616              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1617
1618           fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1619           fib_index1 =
1620             (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1621              (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1622
1623           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1624           mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1625
1626           leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1627
1628           leaf0 =
1629             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1630           leaf1 =
1631             ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1632
1633           /* Treat IP frag packets as "experimental" protocol for now
1634              until support of IP frag reassembly is implemented */
1635           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1636           proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1637
1638           if (head_of_feature_arc == 0)
1639             {
1640               error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1641               goto skip_checks;
1642             }
1643
1644           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1645           is_udp1 = proto1 == IP_PROTOCOL_UDP;
1646           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1647           is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1648
1649           flags0 = p0->flags;
1650           flags1 = p1->flags;
1651
1652           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1653           good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1654
1655           udp0 = ip4_next_header (ip0);
1656           udp1 = ip4_next_header (ip1);
1657
1658           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1659           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1660           good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1661
1662           leaf0 =
1663             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1664           leaf1 =
1665             ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1666
1667           /* Verify UDP length. */
1668           ip_len0 = clib_net_to_host_u16 (ip0->length);
1669           ip_len1 = clib_net_to_host_u16 (ip1->length);
1670           udp_len0 = clib_net_to_host_u16 (udp0->length);
1671           udp_len1 = clib_net_to_host_u16 (udp1->length);
1672
1673           len_diff0 = ip_len0 - udp_len0;
1674           len_diff1 = ip_len1 - udp_len1;
1675
1676           len_diff0 = is_udp0 ? len_diff0 : 0;
1677           len_diff1 = is_udp1 ? len_diff1 : 0;
1678
1679           if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1680                                & good_tcp_udp0 & good_tcp_udp1)))
1681             {
1682               if (is_tcp_udp0)
1683                 {
1684                   if (is_tcp_udp0
1685                       && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1686                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1687                   good_tcp_udp0 =
1688                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1689                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1690                 }
1691               if (is_tcp_udp1)
1692                 {
1693                   if (is_tcp_udp1
1694                       && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1695                     flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1696                   good_tcp_udp1 =
1697                     (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1698                   good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1699                 }
1700             }
1701
1702           good_tcp_udp0 &= len_diff0 >= 0;
1703           good_tcp_udp1 &= len_diff1 >= 0;
1704
1705           leaf0 =
1706             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1707           leaf1 =
1708             ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1709
1710           error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1711
1712           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1713           error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1714
1715           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1716           error0 = (is_tcp_udp0 && !good_tcp_udp0
1717                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1718           error1 = (is_tcp_udp1 && !good_tcp_udp1
1719                     ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1720
1721           leaf0 =
1722             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1723           leaf1 =
1724             ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1725           leaf0 =
1726             (leaf0 ==
1727              IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1728           leaf1 =
1729             (leaf1 ==
1730              IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1731
1732           vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1733             ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1734           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1735
1736           vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1737             ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1738           vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1739
1740           lb0 = load_balance_get (lbi0);
1741           lb1 = load_balance_get (lbi1);
1742           dpo0 = load_balance_get_bucket_i (lb0, 0);
1743           dpo1 = load_balance_get_bucket_i (lb1, 0);
1744
1745           /*
1746            * Must have a route to source otherwise we drop the packet.
1747            * ip4 broadcasts are accepted, e.g. to make dhcp client work
1748            *
1749            * The checks are:
1750            *  - the source is a recieve => it's from us => bogus, do this
1751            *    first since it sets a different error code.
1752            *  - uRPF check for any route to source - accept if passes.
1753            *  - allow packets destined to the broadcast address from unknown sources
1754            */
1755           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1756                      dpo0->dpoi_type == DPO_RECEIVE) ?
1757                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1758           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1759                      !fib_urpf_check_size (lb0->lb_urpf) &&
1760                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1761                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1762           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1763                      dpo1->dpoi_type == DPO_RECEIVE) ?
1764                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1765           error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1766                      !fib_urpf_check_size (lb1->lb_urpf) &&
1767                      ip1->dst_address.as_u32 != 0xFFFFFFFF)
1768                     ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1769
1770         skip_checks:
1771
1772           next0 = lm->local_next_by_ip_protocol[proto0];
1773           next1 = lm->local_next_by_ip_protocol[proto1];
1774
1775           next0 =
1776             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1777           next1 =
1778             error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1779
1780           p0->error = error0 ? error_node->errors[error0] : 0;
1781           p1->error = error1 ? error_node->errors[error1] : 0;
1782
1783           if (head_of_feature_arc)
1784             {
1785               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1786                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1787               if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1788                 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1789             }
1790
1791           vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1792                                            n_left_to_next, pi0, pi1,
1793                                            next0, next1);
1794         }
1795
1796       while (n_left_from > 0 && n_left_to_next > 0)
1797         {
1798           vlib_buffer_t *p0;
1799           ip4_header_t *ip0;
1800           udp_header_t *udp0;
1801           ip4_fib_mtrie_t *mtrie0;
1802           ip4_fib_mtrie_leaf_t leaf0;
1803           u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1804           i32 len_diff0;
1805           u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1806           load_balance_t *lb0;
1807           const dpo_id_t *dpo0;
1808           u32 sw_if_index0;
1809
1810           pi0 = to_next[0] = from[0];
1811           from += 1;
1812           n_left_from -= 1;
1813           to_next += 1;
1814           n_left_to_next -= 1;
1815
1816           next0 = IP_LOCAL_NEXT_DROP;
1817
1818           p0 = vlib_get_buffer (vm, pi0);
1819
1820           ip0 = vlib_buffer_get_current (p0);
1821
1822           vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1823
1824           sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1825
1826           fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1827
1828           fib_index0 =
1829             (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1830              (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1831
1832           mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1833
1834           leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1835
1836           leaf0 =
1837             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1838
1839           /* Treat IP frag packets as "experimental" protocol for now
1840              until support of IP frag reassembly is implemented */
1841           proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1842
1843           if (head_of_feature_arc == 0)
1844             {
1845               error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1846               goto skip_check;
1847             }
1848
1849           is_udp0 = proto0 == IP_PROTOCOL_UDP;
1850           is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1851
1852           flags0 = p0->flags;
1853
1854           good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1855
1856           udp0 = ip4_next_header (ip0);
1857
1858           /* Don't verify UDP checksum for packets with explicit zero checksum. */
1859           good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1860
1861           leaf0 =
1862             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1863
1864           /* Verify UDP length. */
1865           ip_len0 = clib_net_to_host_u16 (ip0->length);
1866           udp_len0 = clib_net_to_host_u16 (udp0->length);
1867
1868           len_diff0 = ip_len0 - udp_len0;
1869
1870           len_diff0 = is_udp0 ? len_diff0 : 0;
1871
1872           if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1873             {
1874               if (is_tcp_udp0)
1875                 {
1876                   if (is_tcp_udp0
1877                       && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1878                     flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1879                   good_tcp_udp0 =
1880                     (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1881                   good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1882                 }
1883             }
1884
1885           good_tcp_udp0 &= len_diff0 >= 0;
1886
1887           leaf0 =
1888             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1889
1890           error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1891
1892           error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1893
1894           ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1895           error0 = (is_tcp_udp0 && !good_tcp_udp0
1896                     ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1897
1898           leaf0 =
1899             ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1900           leaf0 =
1901             (leaf0 ==
1902              IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1903
1904           lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1905           vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1906
1907           lb0 = load_balance_get (lbi0);
1908           dpo0 = load_balance_get_bucket_i (lb0, 0);
1909
1910           vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1911             vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1912
1913           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1914                      dpo0->dpoi_type == DPO_RECEIVE) ?
1915                     IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1916           error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1917                      !fib_urpf_check_size (lb0->lb_urpf) &&
1918                      ip0->dst_address.as_u32 != 0xFFFFFFFF)
1919                     ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1920
1921         skip_check:
1922
1923           next0 = lm->local_next_by_ip_protocol[proto0];
1924
1925           next0 =
1926             error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1927
1928           p0->error = error0 ? error_node->errors[error0] : 0;
1929
1930           if (head_of_feature_arc)
1931             {
1932               if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1933                 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1934             }
1935
1936           vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1937                                            n_left_to_next, pi0, next0);
1938
1939         }
1940
1941       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1942     }
1943
1944   return frame->n_vectors;
1945 }
1946
1947 static uword
1948 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1949 {
1950   return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1951 }
1952
1953 /* *INDENT-OFF* */
1954 VLIB_REGISTER_NODE (ip4_local_node) =
1955 {
1956   .function = ip4_local,
1957   .name = "ip4-local",
1958   .vector_size = sizeof (u32),
1959   .format_trace = format_ip4_forward_next_trace,
1960   .n_next_nodes = IP_LOCAL_N_NEXT,
1961   .next_nodes =
1962   {
1963     [IP_LOCAL_NEXT_DROP] = "error-drop",
1964     [IP_LOCAL_NEXT_PUNT] = "error-punt",
1965     [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1966     [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",},
1967 };
1968 /* *INDENT-ON* */
1969
1970 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1971
1972 static uword
1973 ip4_local_end_of_arc (vlib_main_t * vm,
1974                       vlib_node_runtime_t * node, vlib_frame_t * frame)
1975 {
1976   return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1977 }
1978
1979 /* *INDENT-OFF* */
1980 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1981   .function = ip4_local_end_of_arc,
1982   .name = "ip4-local-end-of-arc",
1983   .vector_size = sizeof (u32),
1984
1985   .format_trace = format_ip4_forward_next_trace,
1986   .sibling_of = "ip4-local",
1987 };
1988
1989 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1990
1991 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1992   .arc_name = "ip4-local",
1993   .node_name = "ip4-local-end-of-arc",
1994   .runs_before = 0, /* not before any other features */
1995 };
1996 /* *INDENT-ON* */
1997
1998 void
1999 ip4_register_protocol (u32 protocol, u32 node_index)
2000 {
2001   vlib_main_t *vm = vlib_get_main ();
2002   ip4_main_t *im = &ip4_main;
2003   ip_lookup_main_t *lm = &im->lookup_main;
2004
2005   ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2006   lm->local_next_by_ip_protocol[protocol] =
2007     vlib_node_add_next (vm, ip4_local_node.index, node_index);
2008 }
2009
2010 static clib_error_t *
2011 show_ip_local_command_fn (vlib_main_t * vm,
2012                           unformat_input_t * input, vlib_cli_command_t * cmd)
2013 {
2014   ip4_main_t *im = &ip4_main;
2015   ip_lookup_main_t *lm = &im->lookup_main;
2016   int i;
2017
2018   vlib_cli_output (vm, "Protocols handled by ip4_local");
2019   for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
2020     {
2021       if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2022         vlib_cli_output (vm, "%d", i);
2023     }
2024   return 0;
2025 }
2026
2027
2028
2029 /*?
2030  * Display the set of protocols handled by the local IPv4 stack.
2031  *
2032  * @cliexpar
2033  * Example of how to display local protocol table:
2034  * @cliexstart{show ip local}
2035  * Protocols handled by ip4_local
2036  * 1
2037  * 17
2038  * 47
2039  * @cliexend
2040 ?*/
2041 /* *INDENT-OFF* */
2042 VLIB_CLI_COMMAND (show_ip_local, static) =
2043 {
2044   .path = "show ip local",
2045   .function = show_ip_local_command_fn,
2046   .short_help = "show ip local",
2047 };
2048 /* *INDENT-ON* */
2049
2050 always_inline uword
2051 ip4_arp_inline (vlib_main_t * vm,
2052                 vlib_node_runtime_t * node,
2053                 vlib_frame_t * frame, int is_glean)
2054 {
2055   vnet_main_t *vnm = vnet_get_main ();
2056   ip4_main_t *im = &ip4_main;
2057   ip_lookup_main_t *lm = &im->lookup_main;
2058   u32 *from, *to_next_drop;
2059   uword n_left_from, n_left_to_next_drop, next_index;
2060   static f64 time_last_seed_change = -1e100;
2061   static u32 hash_seeds[3];
2062   static uword hash_bitmap[256 / BITS (uword)];
2063   f64 time_now;
2064
2065   if (node->flags & VLIB_NODE_FLAG_TRACE)
2066     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2067
2068   time_now = vlib_time_now (vm);
2069   if (time_now - time_last_seed_change > 1e-3)
2070     {
2071       uword i;
2072       u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
2073                                             sizeof (hash_seeds));
2074       for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2075         hash_seeds[i] = r[i];
2076
2077       /* Mark all hash keys as been no-seen before. */
2078       for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2079         hash_bitmap[i] = 0;
2080
2081       time_last_seed_change = time_now;
2082     }
2083
2084   from = vlib_frame_vector_args (frame);
2085   n_left_from = frame->n_vectors;
2086   next_index = node->cached_next_index;
2087   if (next_index == IP4_ARP_NEXT_DROP)
2088     next_index = IP4_ARP_N_NEXT;        /* point to first interface */
2089
2090   while (n_left_from > 0)
2091     {
2092       vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2093                            to_next_drop, n_left_to_next_drop);
2094
2095       while (n_left_from > 0 && n_left_to_next_drop > 0)
2096         {
2097           u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2098           ip_adjacency_t *adj0;
2099           vlib_buffer_t *p0;
2100           ip4_header_t *ip0;
2101           uword bm0;
2102
2103           pi0 = from[0];
2104
2105           p0 = vlib_get_buffer (vm, pi0);
2106
2107           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2108           adj0 = ip_get_adjacency (lm, adj_index0);
2109           ip0 = vlib_buffer_get_current (p0);
2110
2111           a0 = hash_seeds[0];
2112           b0 = hash_seeds[1];
2113           c0 = hash_seeds[2];
2114
2115           sw_if_index0 = adj0->rewrite_header.sw_if_index;
2116           vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2117
2118           if (is_glean)
2119             {
2120               /*
2121                * this is the Glean case, so we are ARPing for the
2122                * packet's destination
2123                */
2124               a0 ^= ip0->dst_address.data_u32;
2125             }
2126           else
2127             {
2128               a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2129             }
2130           b0 ^= sw_if_index0;
2131
2132           hash_v3_finalize32 (a0, b0, c0);
2133
2134           c0 &= BITS (hash_bitmap) - 1;
2135           c0 = c0 / BITS (uword);
2136           m0 = (uword) 1 << (c0 % BITS (uword));
2137
2138           bm0 = hash_bitmap[c0];
2139           drop0 = (bm0 & m0) != 0;
2140
2141           /* Mark it as seen. */
2142           hash_bitmap[c0] = bm0 | m0;
2143
2144           from += 1;
2145           n_left_from -= 1;
2146           to_next_drop[0] = pi0;
2147           to_next_drop += 1;
2148           n_left_to_next_drop -= 1;
2149
2150           p0->error =
2151             node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2152                          IP4_ARP_ERROR_REQUEST_SENT];
2153
2154           /*
2155            * the adj has been updated to a rewrite but the node the DPO that got
2156            * us here hasn't - yet. no big deal. we'll drop while we wait.
2157            */
2158           if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2159             continue;
2160
2161           if (drop0)
2162             continue;
2163
2164           /*
2165            * Can happen if the control-plane is programming tables
2166            * with traffic flowing; at least that's today's lame excuse.
2167            */
2168           if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2169               || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2170             {
2171               p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2172             }
2173           else
2174             /* Send ARP request. */
2175             {
2176               u32 bi0 = 0;
2177               vlib_buffer_t *b0;
2178               ethernet_arp_header_t *h0;
2179               vnet_hw_interface_t *hw_if0;
2180
2181               h0 =
2182                 vlib_packet_template_get_packet (vm,
2183                                                  &im->ip4_arp_request_packet_template,
2184                                                  &bi0);
2185
2186               /* Add rewrite/encap string for ARP packet. */
2187               vnet_rewrite_one_header (adj0[0], h0,
2188                                        sizeof (ethernet_header_t));
2189
2190               hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2191
2192               /* Src ethernet address in ARP header. */
2193               clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2194                            hw_if0->hw_address,
2195                            sizeof (h0->ip4_over_ethernet[0].ethernet));
2196
2197               if (is_glean)
2198                 {
2199                   /* The interface's source address is stashed in the Glean Adj */
2200                   h0->ip4_over_ethernet[0].ip4 =
2201                     adj0->sub_type.glean.receive_addr.ip4;
2202
2203                   /* Copy in destination address we are requesting. This is the
2204                    * glean case, so it's the packet's destination.*/
2205                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2206                     ip0->dst_address.data_u32;
2207                 }
2208               else
2209                 {
2210                   /* Src IP address in ARP header. */
2211                   if (ip4_src_address_for_packet (lm, sw_if_index0,
2212                                                   &h0->
2213                                                   ip4_over_ethernet[0].ip4))
2214                     {
2215                       /* No source address available */
2216                       p0->error =
2217                         node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2218                       vlib_buffer_free (vm, &bi0, 1);
2219                       continue;
2220                     }
2221
2222                   /* Copy in destination address we are requesting from the
2223                      incomplete adj */
2224                   h0->ip4_over_ethernet[1].ip4.data_u32 =
2225                     adj0->sub_type.nbr.next_hop.ip4.as_u32;
2226                 }
2227
2228               vlib_buffer_copy_trace_flag (vm, p0, bi0);
2229               b0 = vlib_get_buffer (vm, bi0);
2230               vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2231
2232               vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2233
2234               vlib_set_next_frame_buffer (vm, node,
2235                                           adj0->rewrite_header.next_index,
2236                                           bi0);
2237             }
2238         }
2239
2240       vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2241     }
2242
2243   return frame->n_vectors;
2244 }
2245
2246 static uword
2247 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2248 {
2249   return (ip4_arp_inline (vm, node, frame, 0));
2250 }
2251
2252 static uword
2253 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2254 {
2255   return (ip4_arp_inline (vm, node, frame, 1));
2256 }
2257
2258 static char *ip4_arp_error_strings[] = {
2259   [IP4_ARP_ERROR_DROP] = "address overflow drops",
2260   [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2261   [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2262   [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2263   [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2264   [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2265 };
2266
2267 VLIB_REGISTER_NODE (ip4_arp_node) =
2268 {
2269   .function = ip4_arp,.name = "ip4-arp",.vector_size =
2270     sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2271     ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2272     ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2273   {
2274   [IP4_ARP_NEXT_DROP] = "error-drop",}
2275 ,};
2276
2277 VLIB_REGISTER_NODE (ip4_glean_node) =
2278 {
2279   .function = ip4_glean,.name = "ip4-glean",.vector_size =
2280     sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2281     ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2282     ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2283   {
2284   [IP4_ARP_NEXT_DROP] = "error-drop",}
2285 ,};
2286
2287 #define foreach_notrace_ip4_arp_error           \
2288 _(DROP)                                         \
2289 _(REQUEST_SENT)                                 \
2290 _(REPLICATE_DROP)                               \
2291 _(REPLICATE_FAIL)
2292
2293 clib_error_t *
2294 arp_notrace_init (vlib_main_t * vm)
2295 {
2296   vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2297
2298   /* don't trace ARP request packets */
2299 #define _(a)                                    \
2300     vnet_pcap_drop_trace_filter_add_del         \
2301         (rt->errors[IP4_ARP_ERROR_##a],         \
2302          1 /* is_add */);
2303   foreach_notrace_ip4_arp_error;
2304 #undef _
2305   return 0;
2306 }
2307
2308 VLIB_INIT_FUNCTION (arp_notrace_init);
2309
2310
2311 /* Send an ARP request to see if given destination is reachable on given interface. */
2312 clib_error_t *
2313 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2314 {
2315   vnet_main_t *vnm = vnet_get_main ();
2316   ip4_main_t *im = &ip4_main;
2317   ethernet_arp_header_t *h;
2318   ip4_address_t *src;
2319   ip_interface_address_t *ia;
2320   ip_adjacency_t *adj;
2321   vnet_hw_interface_t *hi;
2322   vnet_sw_interface_t *si;
2323   vlib_buffer_t *b;
2324   u32 bi = 0;
2325
2326   si = vnet_get_sw_interface (vnm, sw_if_index);
2327
2328   if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2329     {
2330       return clib_error_return (0, "%U: interface %U down",
2331                                 format_ip4_address, dst,
2332                                 format_vnet_sw_if_index_name, vnm,
2333                                 sw_if_index);
2334     }
2335
2336   src =
2337     ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2338   if (!src)
2339     {
2340       vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2341       return clib_error_return
2342         (0,
2343          "no matching interface address for destination %U (interface %U)",
2344          format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2345          sw_if_index);
2346     }
2347
2348   adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2349
2350   h =
2351     vlib_packet_template_get_packet (vm,
2352                                      &im->ip4_arp_request_packet_template,
2353                                      &bi);
2354
2355   hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2356
2357   clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2358                sizeof (h->ip4_over_ethernet[0].ethernet));
2359
2360   h->ip4_over_ethernet[0].ip4 = src[0];
2361   h->ip4_over_ethernet[1].ip4 = dst[0];
2362
2363   b = vlib_get_buffer (vm, bi);
2364   vnet_buffer (b)->sw_if_index[VLIB_RX] =
2365     vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2366
2367   /* Add encapsulation string for software interface (e.g. ethernet header). */
2368   vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2369   vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2370
2371   {
2372     vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2373     u32 *to_next = vlib_frame_vector_args (f);
2374     to_next[0] = bi;
2375     f->n_vectors = 1;
2376     vlib_put_frame_to_node (vm, hi->output_node_index, f);
2377   }
2378
2379   return /* no error */ 0;
2380 }
2381
2382 typedef enum
2383 {
2384   IP4_REWRITE_NEXT_DROP,
2385   IP4_REWRITE_NEXT_ICMP_ERROR,
2386 } ip4_rewrite_next_t;
2387
2388 always_inline uword
2389 ip4_rewrite_inline (vlib_main_t * vm,
2390                     vlib_node_runtime_t * node,
2391                     vlib_frame_t * frame,
2392                     int do_counters, int is_midchain, int is_mcast)
2393 {
2394   ip_lookup_main_t *lm = &ip4_main.lookup_main;
2395   u32 *from = vlib_frame_vector_args (frame);
2396   u32 n_left_from, n_left_to_next, *to_next, next_index;
2397   vlib_node_runtime_t *error_node =
2398     vlib_node_get_runtime (vm, ip4_input_node.index);
2399
2400   n_left_from = frame->n_vectors;
2401   next_index = node->cached_next_index;
2402   u32 cpu_index = os_get_cpu_number ();
2403
2404   while (n_left_from > 0)
2405     {
2406       vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2407
2408       while (n_left_from >= 4 && n_left_to_next >= 2)
2409         {
2410           ip_adjacency_t *adj0, *adj1;
2411           vlib_buffer_t *p0, *p1;
2412           ip4_header_t *ip0, *ip1;
2413           u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2414           u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2415           u32 tx_sw_if_index0, tx_sw_if_index1;
2416
2417           /* Prefetch next iteration. */
2418           {
2419             vlib_buffer_t *p2, *p3;
2420
2421             p2 = vlib_get_buffer (vm, from[2]);
2422             p3 = vlib_get_buffer (vm, from[3]);
2423
2424             vlib_prefetch_buffer_header (p2, STORE);
2425             vlib_prefetch_buffer_header (p3, STORE);
2426
2427             CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2428             CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2429           }
2430
2431           pi0 = to_next[0] = from[0];
2432           pi1 = to_next[1] = from[1];
2433
2434           from += 2;
2435           n_left_from -= 2;
2436           to_next += 2;
2437           n_left_to_next -= 2;
2438
2439           p0 = vlib_get_buffer (vm, pi0);
2440           p1 = vlib_get_buffer (vm, pi1);
2441
2442           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2443           adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2444
2445           /*
2446            * pre-fetch the per-adjacency counters
2447            */
2448           if (do_counters)
2449             {
2450               vlib_prefetch_combined_counter (&adjacency_counters,
2451                                               cpu_index, adj_index0);
2452               vlib_prefetch_combined_counter (&adjacency_counters,
2453                                               cpu_index, adj_index1);
2454             }
2455
2456           /* We should never rewrite a pkt using the MISS adjacency */
2457           ASSERT (adj_index0 && adj_index1);
2458
2459           ip0 = vlib_buffer_get_current (p0);
2460           ip1 = vlib_buffer_get_current (p1);
2461
2462           error0 = error1 = IP4_ERROR_NONE;
2463           next0 = next1 = IP4_REWRITE_NEXT_DROP;
2464
2465           /* Decrement TTL & update checksum.
2466              Works either endian, so no need for byte swap. */
2467           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2468             {
2469               i32 ttl0 = ip0->ttl;
2470
2471               /* Input node should have reject packets with ttl 0. */
2472               ASSERT (ip0->ttl > 0);
2473
2474               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2475               checksum0 += checksum0 >= 0xffff;
2476
2477               ip0->checksum = checksum0;
2478               ttl0 -= 1;
2479               ip0->ttl = ttl0;
2480
2481               /*
2482                * If the ttl drops below 1 when forwarding, generate
2483                * an ICMP response.
2484                */
2485               if (PREDICT_FALSE (ttl0 <= 0))
2486                 {
2487                   error0 = IP4_ERROR_TIME_EXPIRED;
2488                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2489                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2490                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2491                                                0);
2492                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2493                 }
2494
2495               /* Verify checksum. */
2496               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2497             }
2498           else
2499             {
2500               p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2501             }
2502           if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2503             {
2504               i32 ttl1 = ip1->ttl;
2505
2506               /* Input node should have reject packets with ttl 0. */
2507               ASSERT (ip1->ttl > 0);
2508
2509               checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2510               checksum1 += checksum1 >= 0xffff;
2511
2512               ip1->checksum = checksum1;
2513               ttl1 -= 1;
2514               ip1->ttl = ttl1;
2515
2516               /*
2517                * If the ttl drops below 1 when forwarding, generate
2518                * an ICMP response.
2519                */
2520               if (PREDICT_FALSE (ttl1 <= 0))
2521                 {
2522                   error1 = IP4_ERROR_TIME_EXPIRED;
2523                   vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2524                   icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2525                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2526                                                0);
2527                   next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2528                 }
2529
2530               /* Verify checksum. */
2531               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2532               ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2533             }
2534           else
2535             {
2536               p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2537             }
2538
2539           /* Rewrite packet header and updates lengths. */
2540           adj0 = ip_get_adjacency (lm, adj_index0);
2541           adj1 = ip_get_adjacency (lm, adj_index1);
2542
2543           /* Worth pipelining. No guarantee that adj0,1 are hot... */
2544           rw_len0 = adj0[0].rewrite_header.data_bytes;
2545           rw_len1 = adj1[0].rewrite_header.data_bytes;
2546           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2547           vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2548
2549           /* Check MTU of outgoing interface. */
2550           error0 =
2551             (vlib_buffer_length_in_chain (vm, p0) >
2552              adj0[0].
2553              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2554              error0);
2555           error1 =
2556             (vlib_buffer_length_in_chain (vm, p1) >
2557              adj1[0].
2558              rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2559              error1);
2560
2561           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2562            * to see the IP headerr */
2563           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2564             {
2565               next0 = adj0[0].rewrite_header.next_index;
2566               p0->current_data -= rw_len0;
2567               p0->current_length += rw_len0;
2568               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2569               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2570
2571               if (PREDICT_FALSE
2572                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2573                 vnet_feature_arc_start (lm->output_feature_arc_index,
2574                                         tx_sw_if_index0, &next0, p0);
2575             }
2576           if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2577             {
2578               next1 = adj1[0].rewrite_header.next_index;
2579               p1->current_data -= rw_len1;
2580               p1->current_length += rw_len1;
2581
2582               tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2583               vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2584
2585               if (PREDICT_FALSE
2586                   (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2587                 vnet_feature_arc_start (lm->output_feature_arc_index,
2588                                         tx_sw_if_index1, &next1, p1);
2589             }
2590
2591           /* Guess we are only writing on simple Ethernet header. */
2592           vnet_rewrite_two_headers (adj0[0], adj1[0],
2593                                     ip0, ip1, sizeof (ethernet_header_t));
2594
2595           /*
2596            * Bump the per-adjacency counters
2597            */
2598           if (do_counters)
2599             {
2600               vlib_increment_combined_counter
2601                 (&adjacency_counters,
2602                  cpu_index,
2603                  adj_index0, 1,
2604                  vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2605
2606               vlib_increment_combined_counter
2607                 (&adjacency_counters,
2608                  cpu_index,
2609                  adj_index1, 1,
2610                  vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2611             }
2612
2613           if (is_midchain)
2614             {
2615               adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2616               adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2617             }
2618           if (is_mcast)
2619             {
2620               /*
2621                * copy bytes from the IP address into the MAC rewrite
2622                */
2623               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2624               vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2625             }
2626
2627           vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2628                                            to_next, n_left_to_next,
2629                                            pi0, pi1, next0, next1);
2630         }
2631
2632       while (n_left_from > 0 && n_left_to_next > 0)
2633         {
2634           ip_adjacency_t *adj0;
2635           vlib_buffer_t *p0;
2636           ip4_header_t *ip0;
2637           u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2638           u32 tx_sw_if_index0;
2639
2640           pi0 = to_next[0] = from[0];
2641
2642           p0 = vlib_get_buffer (vm, pi0);
2643
2644           adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2645
2646           /* We should never rewrite a pkt using the MISS adjacency */
2647           ASSERT (adj_index0);
2648
2649           adj0 = ip_get_adjacency (lm, adj_index0);
2650
2651           ip0 = vlib_buffer_get_current (p0);
2652
2653           error0 = IP4_ERROR_NONE;
2654           next0 = IP4_REWRITE_NEXT_DROP;        /* drop on error */
2655
2656           /* Decrement TTL & update checksum. */
2657           if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2658             {
2659               i32 ttl0 = ip0->ttl;
2660
2661               checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2662
2663               checksum0 += checksum0 >= 0xffff;
2664
2665               ip0->checksum = checksum0;
2666
2667               ASSERT (ip0->ttl > 0);
2668
2669               ttl0 -= 1;
2670
2671               ip0->ttl = ttl0;
2672
2673               ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2674
2675               if (PREDICT_FALSE (ttl0 <= 0))
2676                 {
2677                   /*
2678                    * If the ttl drops below 1 when forwarding, generate
2679                    * an ICMP response.
2680                    */
2681                   error0 = IP4_ERROR_TIME_EXPIRED;
2682                   next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2683                   vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2684                   icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2685                                                ICMP4_time_exceeded_ttl_exceeded_in_transit,
2686                                                0);
2687                 }
2688             }
2689           else
2690             {
2691               p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2692             }
2693
2694           if (do_counters)
2695             vlib_prefetch_combined_counter (&adjacency_counters,
2696                                             cpu_index, adj_index0);
2697
2698           /* Guess we are only writing on simple Ethernet header. */
2699           vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2700           if (is_mcast)
2701             {
2702               /*
2703                * copy bytes from the IP address into the MAC rewrite
2704                */
2705               vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2706             }
2707
2708           /* Update packet buffer attributes/set output interface. */
2709           rw_len0 = adj0[0].rewrite_header.data_bytes;
2710           vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2711
2712           if (do_counters)
2713             vlib_increment_combined_counter
2714               (&adjacency_counters,
2715                cpu_index, adj_index0, 1,
2716                vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2717
2718           /* Check MTU of outgoing interface. */
2719           error0 = (vlib_buffer_length_in_chain (vm, p0)
2720                     > adj0[0].rewrite_header.max_l3_packet_bytes
2721                     ? IP4_ERROR_MTU_EXCEEDED : error0);
2722
2723           p0->error = error_node->errors[error0];
2724
2725           /* Don't adjust the buffer for ttl issue; icmp-error node wants
2726            * to see the IP headerr */
2727           if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2728             {
2729               p0->current_data -= rw_len0;
2730               p0->current_length += rw_len0;
2731               tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2732
2733               vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2734               next0 = adj0[0].rewrite_header.next_index;
2735
2736               if (is_midchain)
2737                 {
2738                   adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2739                 }
2740
2741               if (PREDICT_FALSE
2742                   (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2743                 vnet_feature_arc_start (lm->output_feature_arc_index,
2744                                         tx_sw_if_index0, &next0, p0);
2745
2746             }
2747
2748           from += 1;
2749           n_left_from -= 1;
2750           to_next += 1;
2751           n_left_to_next -= 1;
2752
2753           vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2754                                            to_next, n_left_to_next,
2755                                            pi0, next0);
2756         }
2757
2758       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2759     }
2760
2761   /* Need to do trace after rewrites to pick up new packet data. */
2762   if (node->flags & VLIB_NODE_FLAG_TRACE)
2763     ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2764
2765   return frame->n_vectors;
2766 }
2767
2768
2769 /** @brief IPv4 rewrite node.
2770     @node ip4-rewrite
2771
2772     This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2773     header checksum, fetch the ip adjacency, check the outbound mtu,
2774     apply the adjacency rewrite, and send pkts to the adjacency
2775     rewrite header's rewrite_next_index.
2776
2777     @param vm vlib_main_t corresponding to the current thread
2778     @param node vlib_node_runtime_t
2779     @param frame vlib_frame_t whose contents should be dispatched
2780
2781     @par Graph mechanics: buffer metadata, next index usage
2782
2783     @em Uses:
2784     - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2785         - the rewrite adjacency index
2786     - <code>adj->lookup_next_index</code>
2787         - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2788           the packet will be dropped.
2789     - <code>adj->rewrite_header</code>
2790         - Rewrite string length, rewrite string, next_index
2791
2792     @em Sets:
2793     - <code>b->current_data, b->current_length</code>
2794         - Updated net of applying the rewrite string
2795
2796     <em>Next Indices:</em>
2797     - <code> adj->rewrite_header.next_index </code>
2798       or @c error-drop
2799 */
2800 static uword
2801 ip4_rewrite (vlib_main_t * vm,
2802              vlib_node_runtime_t * node, vlib_frame_t * frame)
2803 {
2804   if (adj_are_counters_enabled ())
2805     return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2806   else
2807     return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2808 }
2809
2810 static uword
2811 ip4_midchain (vlib_main_t * vm,
2812               vlib_node_runtime_t * node, vlib_frame_t * frame)
2813 {
2814   if (adj_are_counters_enabled ())
2815     return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2816   else
2817     return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2818 }
2819
2820 static uword
2821 ip4_rewrite_mcast (vlib_main_t * vm,
2822                    vlib_node_runtime_t * node, vlib_frame_t * frame)
2823 {
2824   if (adj_are_counters_enabled ())
2825     return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2826   else
2827     return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2828 }
2829
2830 /* *INDENT-OFF* */
2831 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2832   .function = ip4_rewrite,
2833   .name = "ip4-rewrite",
2834   .vector_size = sizeof (u32),
2835
2836   .format_trace = format_ip4_rewrite_trace,
2837
2838   .n_next_nodes = 2,
2839   .next_nodes = {
2840     [IP4_REWRITE_NEXT_DROP] = "error-drop",
2841     [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2842   },
2843 };
2844 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2845
2846 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2847   .function = ip4_rewrite_mcast,
2848   .name = "ip4-rewrite-mcast",
2849   .vector_size = sizeof (u32),
2850
2851   .format_trace = format_ip4_rewrite_trace,
2852   .sibling_of = "ip4-rewrite",
2853 };
2854 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2855
2856 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2857   .function = ip4_midchain,
2858   .name = "ip4-midchain",
2859   .vector_size = sizeof (u32),
2860   .format_trace = format_ip4_forward_next_trace,
2861   .sibling_of =  "ip4-rewrite",
2862 };
2863 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2864 /* *INDENT-ON */
2865
2866 static clib_error_t *
2867 add_del_interface_table (vlib_main_t * vm,
2868                          unformat_input_t * input, vlib_cli_command_t * cmd)
2869 {
2870   vnet_main_t *vnm = vnet_get_main ();
2871   ip_interface_address_t *ia;
2872   clib_error_t *error = 0;
2873   u32 sw_if_index, table_id;
2874
2875   sw_if_index = ~0;
2876
2877   if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2878     {
2879       error = clib_error_return (0, "unknown interface `%U'",
2880                                  format_unformat_error, input);
2881       goto done;
2882     }
2883
2884   if (unformat (input, "%d", &table_id))
2885     ;
2886   else
2887     {
2888       error = clib_error_return (0, "expected table id `%U'",
2889                                  format_unformat_error, input);
2890       goto done;
2891     }
2892
2893   /*
2894    * If the interface already has in IP address, then a change int
2895    * VRF is not allowed. The IP address applied must first be removed.
2896    * We do not do that automatically here, since VPP has no knowledge
2897    * of whether thoses subnets are valid in the destination VRF.
2898    */
2899   /* *INDENT-OFF* */
2900   foreach_ip_interface_address (&ip4_main.lookup_main,
2901                                 ia, sw_if_index,
2902                                 1 /* honor unnumbered */,
2903   ({
2904       ip4_address_t * a;
2905
2906       a = ip_interface_address_get_address (&ip4_main.lookup_main, ia);
2907       error = clib_error_return (0, "interface %U has address %U",
2908                                  format_vnet_sw_if_index_name, vnm,
2909                                  sw_if_index,
2910                                  format_ip4_address, a);
2911       goto done;
2912    }));
2913    /* *INDENT-ON* */
2914
2915 {
2916   ip4_main_t *im = &ip4_main;
2917   u32 fib_index;
2918
2919   fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2920
2921   vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2922   im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2923
2924   fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2925   vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
2926   im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
2927 }
2928
2929 done:
2930 return error;
2931 }
2932
2933 /*?
2934  * Place the indicated interface into the supplied IPv4 FIB table (also known
2935  * as a VRF). If the FIB table does not exist, this command creates it. To
2936  * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2937  * FIB table will only be displayed if a route has been added to the table, or
2938  * an IP Address is assigned to an interface in the table (which adds a route
2939  * automatically).
2940  *
2941  * @note IP addresses added after setting the interface IP table are added to
2942  * the indicated FIB table. If an IP address is added prior to changing the
2943  * table then this is an error. The control plane must remove these addresses
2944  * first and then change the table. VPP will not automatically move the
2945  * addresses from the old to the new table as it does not know the validity
2946  * of such a change.
2947  *
2948  * @cliexpar
2949  * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2950  * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2951  ?*/
2952 /* *INDENT-OFF* */
2953 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2954 {
2955   .path = "set interface ip table",
2956   .function = add_del_interface_table,
2957   .short_help = "set interface ip table <interface> <table-id>",
2958 };
2959 /* *INDENT-ON* */
2960
2961 int
2962 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2963 {
2964   ip4_fib_mtrie_t *mtrie0;
2965   ip4_fib_mtrie_leaf_t leaf0;
2966   u32 lbi0;
2967
2968   mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2969
2970   leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2971   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2972   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2973   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2974   leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2975
2976   /* Handle default route. */
2977   leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2978
2979   lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2980
2981   return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2982 }
2983
2984 static clib_error_t *
2985 test_lookup_command_fn (vlib_main_t * vm,
2986                         unformat_input_t * input, vlib_cli_command_t * cmd)
2987 {
2988   ip4_fib_t *fib;
2989   u32 table_id = 0;
2990   f64 count = 1;
2991   u32 n;
2992   int i;
2993   ip4_address_t ip4_base_address;
2994   u64 errors = 0;
2995
2996   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2997     {
2998       if (unformat (input, "table %d", &table_id))
2999         {
3000           /* Make sure the entry exists. */
3001           fib = ip4_fib_get (table_id);
3002           if ((fib) && (fib->index != table_id))
3003             return clib_error_return (0, "<fib-index> %d does not exist",
3004                                       table_id);
3005         }
3006       else if (unformat (input, "count %f", &count))
3007         ;
3008
3009       else if (unformat (input, "%U",
3010                          unformat_ip4_address, &ip4_base_address))
3011         ;
3012       else
3013         return clib_error_return (0, "unknown input `%U'",
3014                                   format_unformat_error, input);
3015     }
3016
3017   n = count;
3018
3019   for (i = 0; i < n; i++)
3020     {
3021       if (!ip4_lookup_validate (&ip4_base_address, table_id))
3022         errors++;
3023
3024       ip4_base_address.as_u32 =
3025         clib_host_to_net_u32 (1 +
3026                               clib_net_to_host_u32 (ip4_base_address.as_u32));
3027     }
3028
3029   if (errors)
3030     vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3031   else
3032     vlib_cli_output (vm, "No errors in %d lookups\n", n);
3033
3034   return 0;
3035 }
3036
3037 /*?
3038  * Perform a lookup of an IPv4 Address (or range of addresses) in the
3039  * given FIB table to determine if there is a conflict with the
3040  * adjacency table. The fib-id can be determined by using the
3041  * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3042  * of 0 is used.
3043  *
3044  * @todo This command uses fib-id, other commands use table-id (not
3045  * just a name, they are different indexes). Would like to change this
3046  * to table-id for consistency.
3047  *
3048  * @cliexpar
3049  * Example of how to run the test lookup command:
3050  * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3051  * No errors in 2 lookups
3052  * @cliexend
3053 ?*/
3054 /* *INDENT-OFF* */
3055 VLIB_CLI_COMMAND (lookup_test_command, static) =
3056 {
3057   .path = "test lookup",
3058   .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3059   .function = test_lookup_command_fn,
3060 };
3061 /* *INDENT-ON* */
3062
3063 int
3064 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3065 {
3066   ip4_main_t *im4 = &ip4_main;
3067   ip4_fib_t *fib;
3068   uword *p = hash_get (im4->fib_index_by_table_id, table_id);
3069
3070   if (p == 0)
3071     return VNET_API_ERROR_NO_SUCH_FIB;
3072
3073   fib = ip4_fib_get (p[0]);
3074
3075   fib->flow_hash_config = flow_hash_config;
3076   return 0;
3077 }
3078
3079 static clib_error_t *
3080 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3081                              unformat_input_t * input,
3082                              vlib_cli_command_t * cmd)
3083 {
3084   int matched = 0;
3085   u32 table_id = 0;
3086   u32 flow_hash_config = 0;
3087   int rv;
3088
3089   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3090     {
3091       if (unformat (input, "table %d", &table_id))
3092         matched = 1;
3093 #define _(a,v) \
3094     else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3095       foreach_flow_hash_bit
3096 #undef _
3097         else
3098         break;
3099     }
3100
3101   if (matched == 0)
3102     return clib_error_return (0, "unknown input `%U'",
3103                               format_unformat_error, input);
3104
3105   rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3106   switch (rv)
3107     {
3108     case 0:
3109       break;
3110
3111     case VNET_API_ERROR_NO_SUCH_FIB:
3112       return clib_error_return (0, "no such FIB table %d", table_id);
3113
3114     default:
3115       clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3116       break;
3117     }
3118
3119   return 0;
3120 }
3121
3122 /*?
3123  * Configure the set of IPv4 fields used by the flow hash.
3124  *
3125  * @cliexpar
3126  * Example of how to set the flow hash on a given table:
3127  * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3128  * Example of display the configured flow hash:
3129  * @cliexstart{show ip fib}
3130  * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3131  * 0.0.0.0/0
3132  *   unicast-ip4-chain
3133  *   [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3134  *     [0] [@0]: dpo-drop ip6
3135  * 0.0.0.0/32
3136  *   unicast-ip4-chain
3137  *   [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3138  *     [0] [@0]: dpo-drop ip6
3139  * 224.0.0.0/8
3140  *   unicast-ip4-chain
3141  *   [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3142  *     [0] [@0]: dpo-drop ip6
3143  * 6.0.1.2/32
3144  *   unicast-ip4-chain
3145  *   [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3146  *     [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3147  * 7.0.0.1/32
3148  *   unicast-ip4-chain
3149  *   [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3150  *     [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3151  *     [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3152  *     [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3153  *     [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3154  * 240.0.0.0/8
3155  *   unicast-ip4-chain
3156  *   [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3157  *     [0] [@0]: dpo-drop ip6
3158  * 255.255.255.255/32
3159  *   unicast-ip4-chain
3160  *   [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3161  *     [0] [@0]: dpo-drop ip6
3162  * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3163  * 0.0.0.0/0
3164  *   unicast-ip4-chain
3165  *   [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3166  *     [0] [@0]: dpo-drop ip6
3167  * 0.0.0.0/32
3168  *   unicast-ip4-chain
3169  *   [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3170  *     [0] [@0]: dpo-drop ip6
3171  * 172.16.1.0/24
3172  *   unicast-ip4-chain
3173  *   [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3174  *     [0] [@4]: ipv4-glean: af_packet0
3175  * 172.16.1.1/32
3176  *   unicast-ip4-chain
3177  *   [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3178  *     [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3179  * 172.16.1.2/32
3180  *   unicast-ip4-chain
3181  *   [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3182  *     [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3183  * 172.16.2.0/24
3184  *   unicast-ip4-chain
3185  *   [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3186  *     [0] [@4]: ipv4-glean: af_packet1
3187  * 172.16.2.1/32
3188  *   unicast-ip4-chain
3189  *   [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3190  *     [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3191  * 224.0.0.0/8
3192  *   unicast-ip4-chain
3193  *   [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3194  *     [0] [@0]: dpo-drop ip6
3195  * 240.0.0.0/8
3196  *   unicast-ip4-chain
3197  *   [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3198  *     [0] [@0]: dpo-drop ip6
3199  * 255.255.255.255/32
3200  *   unicast-ip4-chain
3201  *   [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3202  *     [0] [@0]: dpo-drop ip6
3203  * @cliexend
3204 ?*/
3205 /* *INDENT-OFF* */
3206 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3207 {
3208   .path = "set ip flow-hash",
3209   .short_help =
3210   "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3211   .function = set_ip_flow_hash_command_fn,
3212 };
3213 /* *INDENT-ON* */
3214
3215 int
3216 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3217                              u32 table_index)
3218 {
3219   vnet_main_t *vnm = vnet_get_main ();
3220   vnet_interface_main_t *im = &vnm->interface_main;
3221   ip4_main_t *ipm = &ip4_main;
3222   ip_lookup_main_t *lm = &ipm->lookup_main;
3223   vnet_classify_main_t *cm = &vnet_classify_main;
3224   ip4_address_t *if_addr;
3225
3226   if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3227     return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3228
3229   if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3230     return VNET_API_ERROR_NO_SUCH_ENTRY;
3231
3232   vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3233   lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3234
3235   if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3236
3237   if (NULL != if_addr)
3238     {
3239       fib_prefix_t pfx = {
3240         .fp_len = 32,
3241         .fp_proto = FIB_PROTOCOL_IP4,
3242         .fp_addr.ip4 = *if_addr,
3243       };
3244       u32 fib_index;
3245
3246       fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3247                                                        sw_if_index);
3248
3249
3250       if (table_index != (u32) ~ 0)
3251         {
3252           dpo_id_t dpo = DPO_INVALID;
3253
3254           dpo_set (&dpo,
3255                    DPO_CLASSIFY,
3256                    DPO_PROTO_IP4,
3257                    classify_dpo_create (DPO_PROTO_IP4, table_index));
3258
3259           fib_table_entry_special_dpo_add (fib_index,
3260                                            &pfx,
3261                                            FIB_SOURCE_CLASSIFY,
3262                                            FIB_ENTRY_FLAG_NONE, &dpo);
3263           dpo_reset (&dpo);
3264         }
3265       else
3266         {
3267           fib_table_entry_special_remove (fib_index,
3268                                           &pfx, FIB_SOURCE_CLASSIFY);
3269         }
3270     }
3271
3272   return 0;
3273 }
3274
3275 static clib_error_t *
3276 set_ip_classify_command_fn (vlib_main_t * vm,
3277                             unformat_input_t * input,
3278                             vlib_cli_command_t * cmd)
3279 {
3280   u32 table_index = ~0;
3281   int table_index_set = 0;
3282   u32 sw_if_index = ~0;
3283   int rv;
3284
3285   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3286     {
3287       if (unformat (input, "table-index %d", &table_index))
3288         table_index_set = 1;
3289       else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3290                          vnet_get_main (), &sw_if_index))
3291         ;
3292       else
3293         break;
3294     }
3295
3296   if (table_index_set == 0)
3297     return clib_error_return (0, "classify table-index must be specified");
3298
3299   if (sw_if_index == ~0)
3300     return clib_error_return (0, "interface / subif must be specified");
3301
3302   rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3303
3304   switch (rv)
3305     {
3306     case 0:
3307       break;
3308
3309     case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3310       return clib_error_return (0, "No such interface");
3311
3312     case VNET_API_ERROR_NO_SUCH_ENTRY:
3313       return clib_error_return (0, "No such classifier table");
3314     }
3315   return 0;
3316 }
3317
3318 /*?
3319  * Assign a classification table to an interface. The classification
3320  * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3321  * commands. Once the table is create, use this command to filter packets
3322  * on an interface.
3323  *
3324  * @cliexpar
3325  * Example of how to assign a classification table to an interface:
3326  * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3327 ?*/
3328 /* *INDENT-OFF* */
3329 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3330 {
3331     .path = "set ip classify",
3332     .short_help =
3333     "set ip classify intfc <interface> table-index <classify-idx>",
3334     .function = set_ip_classify_command_fn,
3335 };
3336 /* *INDENT-ON* */
3337
3338 /*
3339  * fd.io coding-style-patch-verification: ON
3340  *
3341  * Local Variables:
3342  * eval: (c-set-style "gnu")
3343  * End:
3344  */