2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
56 * @brief IPv4 Forwarding.
58 * This file contains the source code for IPv4 forwarding.
62 ip4_forward_next_trace (vlib_main_t * vm,
63 vlib_node_runtime_t * node,
65 vlib_rx_or_tx_t which_adj_index);
68 ip4_lookup_inline (vlib_main_t * vm,
69 vlib_node_runtime_t * node,
71 int lookup_for_responses_to_locally_received_packets)
73 ip4_main_t *im = &ip4_main;
74 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
75 u32 n_left_from, n_left_to_next, *from, *to_next;
76 ip_lookup_next_t next;
77 u32 cpu_index = os_get_cpu_number ();
79 from = vlib_frame_vector_args (frame);
80 n_left_from = frame->n_vectors;
81 next = node->cached_next_index;
83 while (n_left_from > 0)
85 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
87 while (n_left_from >= 8 && n_left_to_next >= 4)
89 vlib_buffer_t *p0, *p1, *p2, *p3;
90 ip4_header_t *ip0, *ip1, *ip2, *ip3;
91 __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
92 ip_lookup_next_t next0, next1, next2, next3;
93 const load_balance_t *lb0, *lb1, *lb2, *lb3;
94 ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
95 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
96 ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
97 __attribute__ ((unused)) u32 pi0, fib_index0, lb_index0,
99 __attribute__ ((unused)) u32 pi1, fib_index1, lb_index1,
101 __attribute__ ((unused)) u32 pi2, fib_index2, lb_index2,
103 __attribute__ ((unused)) u32 pi3, fib_index3, lb_index3,
105 flow_hash_config_t flow_hash_config0, flow_hash_config1;
106 flow_hash_config_t flow_hash_config2, flow_hash_config3;
107 u32 hash_c0, hash_c1, hash_c2, hash_c3;
108 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
110 /* Prefetch next iteration. */
112 vlib_buffer_t *p4, *p5, *p6, *p7;
114 p4 = vlib_get_buffer (vm, from[4]);
115 p5 = vlib_get_buffer (vm, from[5]);
116 p6 = vlib_get_buffer (vm, from[6]);
117 p7 = vlib_get_buffer (vm, from[7]);
119 vlib_prefetch_buffer_header (p4, LOAD);
120 vlib_prefetch_buffer_header (p5, LOAD);
121 vlib_prefetch_buffer_header (p6, LOAD);
122 vlib_prefetch_buffer_header (p7, LOAD);
124 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
125 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
126 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
127 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
130 pi0 = to_next[0] = from[0];
131 pi1 = to_next[1] = from[1];
132 pi2 = to_next[2] = from[2];
133 pi3 = to_next[3] = from[3];
140 p0 = vlib_get_buffer (vm, pi0);
141 p1 = vlib_get_buffer (vm, pi1);
142 p2 = vlib_get_buffer (vm, pi2);
143 p3 = vlib_get_buffer (vm, pi3);
145 ip0 = vlib_buffer_get_current (p0);
146 ip1 = vlib_buffer_get_current (p1);
147 ip2 = vlib_buffer_get_current (p2);
148 ip3 = vlib_buffer_get_current (p3);
150 dst_addr0 = &ip0->dst_address;
151 dst_addr1 = &ip1->dst_address;
152 dst_addr2 = &ip2->dst_address;
153 dst_addr3 = &ip3->dst_address;
156 vec_elt (im->fib_index_by_sw_if_index,
157 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
159 vec_elt (im->fib_index_by_sw_if_index,
160 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
162 vec_elt (im->fib_index_by_sw_if_index,
163 vnet_buffer (p2)->sw_if_index[VLIB_RX]);
165 vec_elt (im->fib_index_by_sw_if_index,
166 vnet_buffer (p3)->sw_if_index[VLIB_RX]);
168 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
169 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
171 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
172 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
174 (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
175 (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
177 (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
178 (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
181 if (!lookup_for_responses_to_locally_received_packets)
183 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
184 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
185 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
186 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
188 leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
190 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
191 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
192 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
193 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
196 tcp0 = (void *) (ip0 + 1);
197 tcp1 = (void *) (ip1 + 1);
198 tcp2 = (void *) (ip2 + 1);
199 tcp3 = (void *) (ip3 + 1);
201 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
202 || ip0->protocol == IP_PROTOCOL_UDP);
203 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
204 || ip1->protocol == IP_PROTOCOL_UDP);
205 is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
206 || ip2->protocol == IP_PROTOCOL_UDP);
207 is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
208 || ip1->protocol == IP_PROTOCOL_UDP);
210 if (!lookup_for_responses_to_locally_received_packets)
212 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
213 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
214 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
215 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
218 if (!lookup_for_responses_to_locally_received_packets)
220 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
221 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
222 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
223 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
226 if (!lookup_for_responses_to_locally_received_packets)
228 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
229 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
230 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
231 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
234 if (lookup_for_responses_to_locally_received_packets)
236 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
237 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
238 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
239 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
243 /* Handle default route. */
246 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
249 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
252 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
255 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
256 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
257 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
258 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
259 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
262 lb0 = load_balance_get (lb_index0);
263 lb1 = load_balance_get (lb_index1);
264 lb2 = load_balance_get (lb_index2);
265 lb3 = load_balance_get (lb_index3);
267 /* Use flow hash to compute multipath adjacency. */
268 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
269 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
270 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
271 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
272 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
274 flow_hash_config0 = lb0->lb_hash_config;
275 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
276 ip4_compute_flow_hash (ip0, flow_hash_config0);
278 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
280 flow_hash_config1 = lb1->lb_hash_config;
281 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
282 ip4_compute_flow_hash (ip1, flow_hash_config1);
284 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
286 flow_hash_config2 = lb2->lb_hash_config;
287 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
288 ip4_compute_flow_hash (ip2, flow_hash_config2);
290 if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
292 flow_hash_config3 = lb3->lb_hash_config;
293 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
294 ip4_compute_flow_hash (ip3, flow_hash_config3);
297 ASSERT (lb0->lb_n_buckets > 0);
298 ASSERT (is_pow2 (lb0->lb_n_buckets));
299 ASSERT (lb1->lb_n_buckets > 0);
300 ASSERT (is_pow2 (lb1->lb_n_buckets));
301 ASSERT (lb2->lb_n_buckets > 0);
302 ASSERT (is_pow2 (lb2->lb_n_buckets));
303 ASSERT (lb3->lb_n_buckets > 0);
304 ASSERT (is_pow2 (lb3->lb_n_buckets));
306 dpo0 = load_balance_get_bucket_i (lb0,
308 (lb0->lb_n_buckets_minus_1)));
309 dpo1 = load_balance_get_bucket_i (lb1,
311 (lb1->lb_n_buckets_minus_1)));
312 dpo2 = load_balance_get_bucket_i (lb2,
314 (lb2->lb_n_buckets_minus_1)));
315 dpo3 = load_balance_get_bucket_i (lb3,
317 (lb3->lb_n_buckets_minus_1)));
319 next0 = dpo0->dpoi_next_node;
320 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
321 next1 = dpo1->dpoi_next_node;
322 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
323 next2 = dpo2->dpoi_next_node;
324 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
325 next3 = dpo3->dpoi_next_node;
326 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
328 vlib_increment_combined_counter
329 (cm, cpu_index, lb_index0, 1,
330 vlib_buffer_length_in_chain (vm, p0)
331 + sizeof (ethernet_header_t));
332 vlib_increment_combined_counter
333 (cm, cpu_index, lb_index1, 1,
334 vlib_buffer_length_in_chain (vm, p1)
335 + sizeof (ethernet_header_t));
336 vlib_increment_combined_counter
337 (cm, cpu_index, lb_index2, 1,
338 vlib_buffer_length_in_chain (vm, p2)
339 + sizeof (ethernet_header_t));
340 vlib_increment_combined_counter
341 (cm, cpu_index, lb_index3, 1,
342 vlib_buffer_length_in_chain (vm, p3)
343 + sizeof (ethernet_header_t));
345 vlib_validate_buffer_enqueue_x4 (vm, node, next,
346 to_next, n_left_to_next,
348 next0, next1, next2, next3);
351 while (n_left_from > 0 && n_left_to_next > 0)
355 __attribute__ ((unused)) tcp_header_t *tcp0;
356 ip_lookup_next_t next0;
357 const load_balance_t *lb0;
358 ip4_fib_mtrie_t *mtrie0;
359 ip4_fib_mtrie_leaf_t leaf0;
360 ip4_address_t *dst_addr0;
361 __attribute__ ((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
362 flow_hash_config_t flow_hash_config0;
363 const dpo_id_t *dpo0;
369 p0 = vlib_get_buffer (vm, pi0);
371 ip0 = vlib_buffer_get_current (p0);
373 dst_addr0 = &ip0->dst_address;
376 vec_elt (im->fib_index_by_sw_if_index,
377 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
379 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
380 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
382 if (!lookup_for_responses_to_locally_received_packets)
384 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
386 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
388 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
391 tcp0 = (void *) (ip0 + 1);
393 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
394 || ip0->protocol == IP_PROTOCOL_UDP);
396 if (!lookup_for_responses_to_locally_received_packets)
397 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
399 if (!lookup_for_responses_to_locally_received_packets)
400 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
402 if (!lookup_for_responses_to_locally_received_packets)
403 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
405 if (lookup_for_responses_to_locally_received_packets)
406 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
409 /* Handle default route. */
412 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
413 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
416 lb0 = load_balance_get (lbi0);
418 /* Use flow hash to compute multipath adjacency. */
419 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
420 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
422 flow_hash_config0 = lb0->lb_hash_config;
424 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
425 ip4_compute_flow_hash (ip0, flow_hash_config0);
428 ASSERT (lb0->lb_n_buckets > 0);
429 ASSERT (is_pow2 (lb0->lb_n_buckets));
431 dpo0 = load_balance_get_bucket_i (lb0,
433 (lb0->lb_n_buckets_minus_1)));
435 next0 = dpo0->dpoi_next_node;
436 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
438 vlib_increment_combined_counter
439 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
446 if (PREDICT_FALSE (next0 != next))
449 vlib_put_next_frame (vm, node, next, n_left_to_next);
451 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
458 vlib_put_next_frame (vm, node, next, n_left_to_next);
461 if (node->flags & VLIB_NODE_FLAG_TRACE)
462 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
464 return frame->n_vectors;
467 /** @brief IPv4 lookup node.
470 This is the main IPv4 lookup dispatch node.
472 @param vm vlib_main_t corresponding to the current thread
473 @param node vlib_node_runtime_t
474 @param frame vlib_frame_t whose contents should be dispatched
476 @par Graph mechanics: buffer metadata, next index usage
479 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
480 - Indicates the @c sw_if_index value of the interface that the
481 packet was received on.
482 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
483 - When the value is @c ~0 then the node performs a longest prefix
484 match (LPM) for the packet destination address in the FIB attached
485 to the receive interface.
486 - Otherwise perform LPM for the packet destination address in the
487 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
488 value (0, 1, ...) and not a VRF id.
491 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
492 - The lookup result adjacency index.
495 - Dispatches the packet to the node index found in
496 ip_adjacency_t @c adj->lookup_next_index
497 (where @c adj is the lookup result adjacency).
500 ip4_lookup (vlib_main_t * vm,
501 vlib_node_runtime_t * node, vlib_frame_t * frame)
503 return ip4_lookup_inline (vm, node, frame,
504 /* lookup_for_responses_to_locally_received_packets */
509 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
511 VLIB_REGISTER_NODE (ip4_lookup_node) =
513 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
514 sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
515 IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
517 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
520 ip4_load_balance (vlib_main_t * vm,
521 vlib_node_runtime_t * node, vlib_frame_t * frame)
523 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
524 u32 n_left_from, n_left_to_next, *from, *to_next;
525 ip_lookup_next_t next;
526 u32 cpu_index = os_get_cpu_number ();
528 from = vlib_frame_vector_args (frame);
529 n_left_from = frame->n_vectors;
530 next = node->cached_next_index;
532 if (node->flags & VLIB_NODE_FLAG_TRACE)
533 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
535 while (n_left_from > 0)
537 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
540 while (n_left_from >= 4 && n_left_to_next >= 2)
542 ip_lookup_next_t next0, next1;
543 const load_balance_t *lb0, *lb1;
544 vlib_buffer_t *p0, *p1;
545 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
546 const ip4_header_t *ip0, *ip1;
547 const dpo_id_t *dpo0, *dpo1;
549 /* Prefetch next iteration. */
551 vlib_buffer_t *p2, *p3;
553 p2 = vlib_get_buffer (vm, from[2]);
554 p3 = vlib_get_buffer (vm, from[3]);
556 vlib_prefetch_buffer_header (p2, STORE);
557 vlib_prefetch_buffer_header (p3, STORE);
559 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
560 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
563 pi0 = to_next[0] = from[0];
564 pi1 = to_next[1] = from[1];
571 p0 = vlib_get_buffer (vm, pi0);
572 p1 = vlib_get_buffer (vm, pi1);
574 ip0 = vlib_buffer_get_current (p0);
575 ip1 = vlib_buffer_get_current (p1);
576 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
577 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
579 lb0 = load_balance_get (lbi0);
580 lb1 = load_balance_get (lbi1);
583 * this node is for via FIBs we can re-use the hash value from the
584 * to node if present.
585 * We don't want to use the same hash value at each level in the recursion
586 * graph as that would lead to polarisation
588 hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
589 hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
591 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
593 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
595 hc0 = vnet_buffer (p0)->ip.flow_hash =
596 vnet_buffer (p0)->ip.flow_hash >> 1;
600 hc0 = vnet_buffer (p0)->ip.flow_hash =
601 ip4_compute_flow_hash (ip0, hc0);
604 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
606 if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
608 hc1 = vnet_buffer (p1)->ip.flow_hash =
609 vnet_buffer (p1)->ip.flow_hash >> 1;
613 hc1 = vnet_buffer (p1)->ip.flow_hash =
614 ip4_compute_flow_hash (ip1, hc1);
619 load_balance_get_bucket_i (lb0,
620 hc0 & (lb0->lb_n_buckets_minus_1));
622 load_balance_get_bucket_i (lb1,
623 hc1 & (lb1->lb_n_buckets_minus_1));
625 next0 = dpo0->dpoi_next_node;
626 next1 = dpo1->dpoi_next_node;
628 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
629 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
631 vlib_increment_combined_counter
632 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
633 vlib_increment_combined_counter
634 (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
636 vlib_validate_buffer_enqueue_x2 (vm, node, next,
637 to_next, n_left_to_next,
638 pi0, pi1, next0, next1);
641 while (n_left_from > 0 && n_left_to_next > 0)
643 ip_lookup_next_t next0;
644 const load_balance_t *lb0;
647 const ip4_header_t *ip0;
648 const dpo_id_t *dpo0;
657 p0 = vlib_get_buffer (vm, pi0);
659 ip0 = vlib_buffer_get_current (p0);
660 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
662 lb0 = load_balance_get (lbi0);
664 hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
665 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
667 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
669 hc0 = vnet_buffer (p0)->ip.flow_hash =
670 vnet_buffer (p0)->ip.flow_hash >> 1;
674 hc0 = vnet_buffer (p0)->ip.flow_hash =
675 ip4_compute_flow_hash (ip0, hc0);
680 load_balance_get_bucket_i (lb0,
681 hc0 & (lb0->lb_n_buckets_minus_1));
683 next0 = dpo0->dpoi_next_node;
684 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
686 vlib_increment_combined_counter
687 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
689 vlib_validate_buffer_enqueue_x1 (vm, node, next,
690 to_next, n_left_to_next,
694 vlib_put_next_frame (vm, node, next, n_left_to_next);
697 return frame->n_vectors;
700 VLIB_REGISTER_NODE (ip4_load_balance_node) =
702 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
703 sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
704 format_ip4_lookup_trace,};
706 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
708 /* get first interface address */
710 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
711 ip_interface_address_t ** result_ia)
713 ip_lookup_main_t *lm = &im->lookup_main;
714 ip_interface_address_t *ia = 0;
715 ip4_address_t *result = 0;
717 foreach_ip_interface_address (lm, ia, sw_if_index,
718 1 /* honor unnumbered */ ,
722 ip_interface_address_get_address (lm, ia);
728 *result_ia = result ? ia : 0;
733 ip4_add_interface_routes (u32 sw_if_index,
734 ip4_main_t * im, u32 fib_index,
735 ip_interface_address_t * a)
737 ip_lookup_main_t *lm = &im->lookup_main;
738 ip4_address_t *address = ip_interface_address_get_address (lm, a);
740 .fp_len = a->address_length,
741 .fp_proto = FIB_PROTOCOL_IP4,
742 .fp_addr.ip4 = *address,
745 a->neighbor_probe_adj_index = ~0;
749 fib_node_index_t fei;
751 fei = fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), FIB_PROTOCOL_IP4, NULL, /* No next-hop address */
752 sw_if_index, ~0, // invalid FIB index
753 1, NULL, // no out-label stack
754 FIB_ROUTE_PATH_FLAG_NONE);
755 a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
760 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
762 u32 classify_table_index =
763 lm->classify_table_index_by_sw_if_index[sw_if_index];
764 if (classify_table_index != (u32) ~ 0)
766 dpo_id_t dpo = DPO_INVALID;
771 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
773 fib_table_entry_special_dpo_add (fib_index,
776 FIB_ENTRY_FLAG_NONE, &dpo);
781 fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), FIB_PROTOCOL_IP4, &pfx.fp_addr, sw_if_index, ~0, // invalid FIB index
782 1, NULL, // no out-label stack
783 FIB_ROUTE_PATH_FLAG_NONE);
787 ip4_del_interface_routes (ip4_main_t * im,
789 ip4_address_t * address, u32 address_length)
792 .fp_len = address_length,
793 .fp_proto = FIB_PROTOCOL_IP4,
794 .fp_addr.ip4 = *address,
799 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
803 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
807 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
809 ip4_main_t *im = &ip4_main;
811 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
814 * enable/disable only on the 1<->0 transition
818 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
823 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
824 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
827 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
830 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
835 static clib_error_t *
836 ip4_add_del_interface_address_internal (vlib_main_t * vm,
838 ip4_address_t * address,
839 u32 address_length, u32 is_del)
841 vnet_main_t *vnm = vnet_get_main ();
842 ip4_main_t *im = &ip4_main;
843 ip_lookup_main_t *lm = &im->lookup_main;
844 clib_error_t *error = 0;
845 u32 if_address_index, elts_before;
846 ip4_address_fib_t ip4_af, *addr_fib = 0;
848 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
849 ip4_addr_fib_init (&ip4_af, address,
850 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
851 vec_add1 (addr_fib, ip4_af);
854 * there is no support for adj-fib handling in the presence of overlapping
855 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
860 /* When adding an address check that it does not conflict
861 with an existing address. */
862 ip_interface_address_t *ia;
863 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
864 0 /* honor unnumbered */ ,
868 ip_interface_address_get_address
869 (&im->lookup_main, ia);
870 if (ip4_destination_matches_route
871 (im, address, x, ia->address_length)
873 ip4_destination_matches_route (im,
879 ("failed to add %U which conflicts with %U for interface %U",
880 format_ip4_address_and_length, address,
882 format_ip4_address_and_length, x,
884 format_vnet_sw_if_index_name, vnm,
889 elts_before = pool_elts (lm->if_address_pool);
891 error = ip_interface_address_add_del
892 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
896 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
899 ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
901 ip4_add_interface_routes (sw_if_index,
902 im, ip4_af.fib_index,
904 (lm->if_address_pool, if_address_index));
906 /* If pool did not grow/shrink: add duplicate address. */
907 if (elts_before != pool_elts (lm->if_address_pool))
909 ip4_add_del_interface_address_callback_t *cb;
910 vec_foreach (cb, im->add_del_interface_address_callbacks)
911 cb->function (im, cb->function_opaque, sw_if_index,
912 address, address_length, if_address_index, is_del);
921 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
922 ip4_address_t * address, u32 address_length,
925 return ip4_add_del_interface_address_internal
926 (vm, sw_if_index, address, address_length, is_del);
929 /* Built-in ip4 unicast rx feature path definition */
931 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
933 .arc_name = "ip4-unicast",
934 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
935 .end_node = "ip4-lookup",
936 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
939 VNET_FEATURE_INIT (ip4_flow_classify, static) =
941 .arc_name = "ip4-unicast",
942 .node_name = "ip4-flow-classify",
943 .runs_before = VNET_FEATURES ("ip4-inacl"),
946 VNET_FEATURE_INIT (ip4_inacl, static) =
948 .arc_name = "ip4-unicast",
949 .node_name = "ip4-inacl",
950 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
953 VNET_FEATURE_INIT (ip4_source_check_1, static) =
955 .arc_name = "ip4-unicast",
956 .node_name = "ip4-source-check-via-rx",
957 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
960 VNET_FEATURE_INIT (ip4_source_check_2, static) =
962 .arc_name = "ip4-unicast",
963 .node_name = "ip4-source-check-via-any",
964 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
967 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
969 .arc_name = "ip4-unicast",
970 .node_name = "ip4-source-and-port-range-check-rx",
971 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
974 VNET_FEATURE_INIT (ip4_policer_classify, static) =
976 .arc_name = "ip4-unicast",
977 .node_name = "ip4-policer-classify",
978 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
981 VNET_FEATURE_INIT (ip4_ipsec, static) =
983 .arc_name = "ip4-unicast",
984 .node_name = "ipsec-input-ip4",
985 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
988 VNET_FEATURE_INIT (ip4_vpath, static) =
990 .arc_name = "ip4-unicast",
991 .node_name = "vpath-input-ip4",
992 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
995 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
997 .arc_name = "ip4-unicast",
998 .node_name = "ip4-vxlan-bypass",
999 .runs_before = VNET_FEATURES ("ip4-lookup"),
1002 VNET_FEATURE_INIT (ip4_lookup, static) =
1004 .arc_name = "ip4-unicast",
1005 .node_name = "ip4-lookup",
1006 .runs_before = VNET_FEATURES ("ip4-drop"),
1009 VNET_FEATURE_INIT (ip4_drop, static) =
1011 .arc_name = "ip4-unicast",
1012 .node_name = "ip4-drop",
1013 .runs_before = 0, /* not before any other features */
1017 /* Built-in ip4 multicast rx feature path definition */
1018 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1020 .arc_name = "ip4-multicast",
1021 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1022 .end_node = "ip4-lookup-multicast",
1023 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1026 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1028 .arc_name = "ip4-multicast",
1029 .node_name = "vpath-input-ip4",
1030 .runs_before = VNET_FEATURES ("ip4-lookup-multicast"),
1033 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1035 .arc_name = "ip4-multicast",
1036 .node_name = "ip4-lookup-multicast",
1037 .runs_before = VNET_FEATURES ("ip4-drop"),
1040 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1042 .arc_name = "ip4-multicast",
1043 .node_name = "ip4-drop",
1044 .runs_before = 0, /* last feature */
1047 /* Source and port-range check ip4 tx feature path definition */
1048 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1050 .arc_name = "ip4-output",
1051 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1052 .end_node = "interface-output",
1053 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1056 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1058 .arc_name = "ip4-output",
1059 .node_name = "ip4-source-and-port-range-check-tx",
1060 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1063 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1065 .arc_name = "ip4-output",
1066 .node_name = "ipsec-output-ip4",
1067 .runs_before = VNET_FEATURES ("interface-output"),
1070 /* Built-in ip4 tx feature path definition */
1071 VNET_FEATURE_INIT (ip4_interface_output, static) =
1073 .arc_name = "ip4-output",
1074 .node_name = "interface-output",
1075 .runs_before = 0, /* not before any other features */
1079 static clib_error_t *
1080 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1082 ip4_main_t *im = &ip4_main;
1084 /* Fill in lookup tables with default table (0). */
1085 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1087 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1090 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1093 return /* no error */ 0;
1096 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1098 /* Global IP4 main. */
1099 ip4_main_t ip4_main;
1102 ip4_lookup_init (vlib_main_t * vm)
1104 ip4_main_t *im = &ip4_main;
1105 clib_error_t *error;
1108 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1111 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1116 m = pow2_mask (i) << (32 - i);
1119 im->fib_masks[i] = clib_host_to_net_u32 (m);
1122 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1124 /* Create FIB with index 0 and table id of 0. */
1125 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1129 pn = pg_get_node (ip4_lookup_node.index);
1130 pn->unformat_edit = unformat_pg_ip4_header;
1134 ethernet_arp_header_t h;
1136 memset (&h, 0, sizeof (h));
1138 /* Set target ethernet address to all zeros. */
1139 memset (h.ip4_over_ethernet[1].ethernet, 0,
1140 sizeof (h.ip4_over_ethernet[1].ethernet));
1142 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1143 #define _8(f,v) h.f = v;
1144 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1145 _16 (l3_type, ETHERNET_TYPE_IP4);
1146 _8 (n_l2_address_bytes, 6);
1147 _8 (n_l3_address_bytes, 4);
1148 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1152 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1155 /* alloc chunk size */ 8,
1162 VLIB_INIT_FUNCTION (ip4_lookup_init);
1166 /* Adjacency taken. */
1171 /* Packet data, possibly *after* rewrite. */
1172 u8 packet_data[64 - 1 * sizeof (u32)];
1174 ip4_forward_next_trace_t;
1177 format_ip4_forward_next_trace (u8 * s, va_list * args)
1179 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1180 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1181 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1182 uword indent = format_get_indent (s);
1183 s = format (s, "%U%U",
1184 format_white_space, indent,
1185 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1190 format_ip4_lookup_trace (u8 * s, va_list * args)
1192 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1193 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1194 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1195 uword indent = format_get_indent (s);
1197 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1198 t->fib_index, t->dpo_index, t->flow_hash);
1199 s = format (s, "\n%U%U",
1200 format_white_space, indent,
1201 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1206 format_ip4_rewrite_trace (u8 * s, va_list * args)
1208 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1209 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1210 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1211 vnet_main_t *vnm = vnet_get_main ();
1212 uword indent = format_get_indent (s);
1214 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1215 t->fib_index, t->dpo_index, format_ip_adjacency,
1216 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1217 s = format (s, "\n%U%U",
1218 format_white_space, indent,
1219 format_ip_adjacency_packet_data,
1220 vnm, t->dpo_index, t->packet_data, sizeof (t->packet_data));
1224 /* Common trace function for all ip4-forward next nodes. */
1226 ip4_forward_next_trace (vlib_main_t * vm,
1227 vlib_node_runtime_t * node,
1228 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1231 ip4_main_t *im = &ip4_main;
1233 n_left = frame->n_vectors;
1234 from = vlib_frame_vector_args (frame);
1239 vlib_buffer_t *b0, *b1;
1240 ip4_forward_next_trace_t *t0, *t1;
1242 /* Prefetch next iteration. */
1243 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1244 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1249 b0 = vlib_get_buffer (vm, bi0);
1250 b1 = vlib_get_buffer (vm, bi1);
1252 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1254 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1255 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1256 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1258 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1259 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1260 vec_elt (im->fib_index_by_sw_if_index,
1261 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1263 clib_memcpy (t0->packet_data,
1264 vlib_buffer_get_current (b0),
1265 sizeof (t0->packet_data));
1267 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1269 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1270 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1271 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1273 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1274 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1275 vec_elt (im->fib_index_by_sw_if_index,
1276 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1277 clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1278 sizeof (t1->packet_data));
1288 ip4_forward_next_trace_t *t0;
1292 b0 = vlib_get_buffer (vm, bi0);
1294 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1296 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1297 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1298 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1300 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1301 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1302 vec_elt (im->fib_index_by_sw_if_index,
1303 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1304 clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1305 sizeof (t0->packet_data));
1313 ip4_drop_or_punt (vlib_main_t * vm,
1314 vlib_node_runtime_t * node,
1315 vlib_frame_t * frame, ip4_error_t error_code)
1317 u32 *buffers = vlib_frame_vector_args (frame);
1318 uword n_packets = frame->n_vectors;
1320 vlib_error_drop_buffers (vm, node, buffers,
1324 ip4_input_node.index, error_code);
1326 if (node->flags & VLIB_NODE_FLAG_TRACE)
1327 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1333 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1335 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1339 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1341 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1344 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1346 .function = ip4_drop,.name = "ip4-drop",.vector_size =
1347 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
1350 [0] = "error-drop",}
1353 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1355 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1357 .function = ip4_punt,.name = "ip4-punt",.vector_size =
1358 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
1361 [0] = "error-punt",}
1364 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1366 /* Compute TCP/UDP/ICMP4 checksum in software. */
1368 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1372 u32 ip_header_length, payload_length_host_byte_order;
1373 u32 n_this_buffer, n_bytes_left;
1375 void *data_this_buffer;
1377 /* Initialize checksum with ip header. */
1378 ip_header_length = ip4_header_bytes (ip0);
1379 payload_length_host_byte_order =
1380 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1382 clib_host_to_net_u32 (payload_length_host_byte_order +
1383 (ip0->protocol << 16));
1385 if (BITS (uword) == 32)
1388 ip_csum_with_carry (sum0,
1389 clib_mem_unaligned (&ip0->src_address, u32));
1391 ip_csum_with_carry (sum0,
1392 clib_mem_unaligned (&ip0->dst_address, u32));
1396 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1398 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1399 data_this_buffer = (void *) ip0 + ip_header_length;
1400 if (n_this_buffer + ip_header_length > p0->current_length)
1402 p0->current_length >
1403 ip_header_length ? p0->current_length - ip_header_length : 0;
1406 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1407 n_bytes_left -= n_this_buffer;
1408 if (n_bytes_left == 0)
1411 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1412 p0 = vlib_get_buffer (vm, p0->next_buffer);
1413 data_this_buffer = vlib_buffer_get_current (p0);
1414 n_this_buffer = p0->current_length;
1417 sum16 = ~ip_csum_fold (sum0);
1423 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1425 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1429 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1430 || ip0->protocol == IP_PROTOCOL_UDP);
1432 udp0 = (void *) (ip0 + 1);
1433 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1435 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1436 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1440 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1442 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1443 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1449 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1451 ip4_main_t *im = &ip4_main;
1452 ip_lookup_main_t *lm = &im->lookup_main;
1453 ip_local_next_t next_index;
1454 u32 *from, *to_next, n_left_from, n_left_to_next;
1455 vlib_node_runtime_t *error_node =
1456 vlib_node_get_runtime (vm, ip4_input_node.index);
1458 from = vlib_frame_vector_args (frame);
1459 n_left_from = frame->n_vectors;
1460 next_index = node->cached_next_index;
1462 if (node->flags & VLIB_NODE_FLAG_TRACE)
1463 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1465 while (n_left_from > 0)
1467 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1469 while (n_left_from >= 4 && n_left_to_next >= 2)
1471 vlib_buffer_t *p0, *p1;
1472 ip4_header_t *ip0, *ip1;
1473 udp_header_t *udp0, *udp1;
1474 ip4_fib_mtrie_t *mtrie0, *mtrie1;
1475 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1476 const dpo_id_t *dpo0, *dpo1;
1477 const load_balance_t *lb0, *lb1;
1478 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1479 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1480 i32 len_diff0, len_diff1;
1481 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1482 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1485 pi0 = to_next[0] = from[0];
1486 pi1 = to_next[1] = from[1];
1490 n_left_to_next -= 2;
1492 p0 = vlib_get_buffer (vm, pi0);
1493 p1 = vlib_get_buffer (vm, pi1);
1495 ip0 = vlib_buffer_get_current (p0);
1496 ip1 = vlib_buffer_get_current (p1);
1498 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1499 vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1501 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1502 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1503 fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1504 fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1506 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1507 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
1508 fib_index1 = (vnet_buffer (p1)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1509 fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1511 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1512 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1514 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1517 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1519 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1521 /* Treat IP frag packets as "experimental" protocol for now
1522 until support of IP frag reassembly is implemented */
1523 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1524 proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1525 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1526 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1527 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1528 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1533 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1534 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1536 udp0 = ip4_next_header (ip0);
1537 udp1 = ip4_next_header (ip1);
1539 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1540 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1541 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1544 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1546 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1548 /* Verify UDP length. */
1549 ip_len0 = clib_net_to_host_u16 (ip0->length);
1550 ip_len1 = clib_net_to_host_u16 (ip1->length);
1551 udp_len0 = clib_net_to_host_u16 (udp0->length);
1552 udp_len1 = clib_net_to_host_u16 (udp1->length);
1554 len_diff0 = ip_len0 - udp_len0;
1555 len_diff1 = ip_len1 - udp_len1;
1557 len_diff0 = is_udp0 ? len_diff0 : 0;
1558 len_diff1 = is_udp1 ? len_diff1 : 0;
1560 if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1561 & good_tcp_udp0 & good_tcp_udp1)))
1566 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1567 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1569 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1570 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1575 && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1576 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1578 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1579 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1583 good_tcp_udp0 &= len_diff0 >= 0;
1584 good_tcp_udp1 &= len_diff1 >= 0;
1587 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1589 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1591 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1593 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1594 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1596 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1597 error0 = (is_tcp_udp0 && !good_tcp_udp0
1598 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1599 error1 = (is_tcp_udp1 && !good_tcp_udp1
1600 ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1603 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1605 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1608 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1611 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1613 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1614 ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1615 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1617 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1618 ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1619 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1621 lb0 = load_balance_get (lbi0);
1622 lb1 = load_balance_get (lbi1);
1623 dpo0 = load_balance_get_bucket_i (lb0, 0);
1624 dpo1 = load_balance_get_bucket_i (lb1, 0);
1627 * Must have a route to source otherwise we drop the packet.
1628 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1631 * - the source is a recieve => it's from us => bogus, do this
1632 * first since it sets a different error code.
1633 * - uRPF check for any route to source - accept if passes.
1634 * - allow packets destined to the broadcast address from unknown sources
1636 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1637 dpo0->dpoi_type == DPO_RECEIVE) ?
1638 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1639 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1640 !fib_urpf_check_size (lb0->lb_urpf) &&
1641 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1642 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1643 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1644 dpo1->dpoi_type == DPO_RECEIVE) ?
1645 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1646 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1647 !fib_urpf_check_size (lb1->lb_urpf) &&
1648 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1649 ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1651 next0 = lm->local_next_by_ip_protocol[proto0];
1652 next1 = lm->local_next_by_ip_protocol[proto1];
1655 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1657 error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1659 p0->error = error0 ? error_node->errors[error0] : 0;
1660 p1->error = error1 ? error_node->errors[error1] : 0;
1662 enqueue_code = (next0 != next_index) + 2 * (next1 != next_index);
1664 if (PREDICT_FALSE (enqueue_code != 0))
1666 switch (enqueue_code)
1672 n_left_to_next += 1;
1673 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1679 n_left_to_next += 1;
1680 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1684 /* A B B or A B C */
1686 n_left_to_next += 2;
1687 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1688 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1691 vlib_put_next_frame (vm, node, next_index,
1694 vlib_get_next_frame (vm, node, next_index, to_next,
1702 while (n_left_from > 0 && n_left_to_next > 0)
1707 ip4_fib_mtrie_t *mtrie0;
1708 ip4_fib_mtrie_leaf_t leaf0;
1709 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1711 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1712 load_balance_t *lb0;
1713 const dpo_id_t *dpo0;
1715 pi0 = to_next[0] = from[0];
1719 n_left_to_next -= 1;
1721 p0 = vlib_get_buffer (vm, pi0);
1723 ip0 = vlib_buffer_get_current (p0);
1725 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1727 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1728 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1729 fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1730 fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1732 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1734 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1737 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1739 /* Treat IP frag packets as "experimental" protocol for now
1740 until support of IP frag reassembly is implemented */
1741 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1742 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1743 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1747 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1749 udp0 = ip4_next_header (ip0);
1751 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1752 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1755 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1757 /* Verify UDP length. */
1758 ip_len0 = clib_net_to_host_u16 (ip0->length);
1759 udp_len0 = clib_net_to_host_u16 (udp0->length);
1761 len_diff0 = ip_len0 - udp_len0;
1763 len_diff0 = is_udp0 ? len_diff0 : 0;
1765 if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1770 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1771 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1773 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1774 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1778 good_tcp_udp0 &= len_diff0 >= 0;
1781 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1783 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1785 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1787 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1788 error0 = (is_tcp_udp0 && !good_tcp_udp0
1789 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1792 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1795 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1797 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1798 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1800 lb0 = load_balance_get (lbi0);
1801 dpo0 = load_balance_get_bucket_i (lb0, 0);
1803 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1804 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1806 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1807 dpo0->dpoi_type == DPO_RECEIVE) ?
1808 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1809 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1810 !fib_urpf_check_size (lb0->lb_urpf) &&
1811 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1812 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1814 next0 = lm->local_next_by_ip_protocol[proto0];
1817 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1819 p0->error = error0 ? error_node->errors[error0] : 0;
1821 if (PREDICT_FALSE (next0 != next_index))
1823 n_left_to_next += 1;
1824 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1827 vlib_get_next_frame (vm, node, next_index, to_next,
1831 n_left_to_next -= 1;
1835 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1838 return frame->n_vectors;
1841 VLIB_REGISTER_NODE (ip4_local_node, static) =
1843 .function = ip4_local,.name = "ip4-local",.vector_size =
1844 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
1845 IP_LOCAL_N_NEXT,.next_nodes =
1847 [IP_LOCAL_NEXT_DROP] = "error-drop",
1848 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1849 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1850 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",}
1853 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1856 ip4_register_protocol (u32 protocol, u32 node_index)
1858 vlib_main_t *vm = vlib_get_main ();
1859 ip4_main_t *im = &ip4_main;
1860 ip_lookup_main_t *lm = &im->lookup_main;
1862 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1863 lm->local_next_by_ip_protocol[protocol] =
1864 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1867 static clib_error_t *
1868 show_ip_local_command_fn (vlib_main_t * vm,
1869 unformat_input_t * input, vlib_cli_command_t * cmd)
1871 ip4_main_t *im = &ip4_main;
1872 ip_lookup_main_t *lm = &im->lookup_main;
1875 vlib_cli_output (vm, "Protocols handled by ip4_local");
1876 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1878 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1879 vlib_cli_output (vm, "%d", i);
1887 * Display the set of protocols handled by the local IPv4 stack.
1890 * Example of how to display local protocol table:
1891 * @cliexstart{show ip local}
1892 * Protocols handled by ip4_local
1899 VLIB_CLI_COMMAND (show_ip_local, static) =
1901 .path = "show ip local",
1902 .function = show_ip_local_command_fn,
1903 .short_help = "show ip local",
1908 ip4_arp_inline (vlib_main_t * vm,
1909 vlib_node_runtime_t * node,
1910 vlib_frame_t * frame, int is_glean)
1912 vnet_main_t *vnm = vnet_get_main ();
1913 ip4_main_t *im = &ip4_main;
1914 ip_lookup_main_t *lm = &im->lookup_main;
1915 u32 *from, *to_next_drop;
1916 uword n_left_from, n_left_to_next_drop, next_index;
1917 static f64 time_last_seed_change = -1e100;
1918 static u32 hash_seeds[3];
1919 static uword hash_bitmap[256 / BITS (uword)];
1922 if (node->flags & VLIB_NODE_FLAG_TRACE)
1923 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1925 time_now = vlib_time_now (vm);
1926 if (time_now - time_last_seed_change > 1e-3)
1929 u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1930 sizeof (hash_seeds));
1931 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1932 hash_seeds[i] = r[i];
1934 /* Mark all hash keys as been no-seen before. */
1935 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1938 time_last_seed_change = time_now;
1941 from = vlib_frame_vector_args (frame);
1942 n_left_from = frame->n_vectors;
1943 next_index = node->cached_next_index;
1944 if (next_index == IP4_ARP_NEXT_DROP)
1945 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1947 while (n_left_from > 0)
1949 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1950 to_next_drop, n_left_to_next_drop);
1952 while (n_left_from > 0 && n_left_to_next_drop > 0)
1954 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1955 ip_adjacency_t *adj0;
1962 p0 = vlib_get_buffer (vm, pi0);
1964 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1965 adj0 = ip_get_adjacency (lm, adj_index0);
1966 ip0 = vlib_buffer_get_current (p0);
1972 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1973 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1978 * this is the Glean case, so we are ARPing for the
1979 * packet's destination
1981 a0 ^= ip0->dst_address.data_u32;
1985 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1989 hash_v3_finalize32 (a0, b0, c0);
1991 c0 &= BITS (hash_bitmap) - 1;
1992 c0 = c0 / BITS (uword);
1993 m0 = (uword) 1 << (c0 % BITS (uword));
1995 bm0 = hash_bitmap[c0];
1996 drop0 = (bm0 & m0) != 0;
1998 /* Mark it as seen. */
1999 hash_bitmap[c0] = bm0 | m0;
2003 to_next_drop[0] = pi0;
2005 n_left_to_next_drop -= 1;
2008 node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2009 IP4_ARP_ERROR_REQUEST_SENT];
2012 * the adj has been updated to a rewrite but the node the DPO that got
2013 * us here hasn't - yet. no big deal. we'll drop while we wait.
2015 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2022 * Can happen if the control-plane is programming tables
2023 * with traffic flowing; at least that's today's lame excuse.
2025 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
2026 (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2028 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2031 /* Send ARP request. */
2035 ethernet_arp_header_t *h0;
2036 vnet_hw_interface_t *hw_if0;
2039 vlib_packet_template_get_packet (vm,
2040 &im->ip4_arp_request_packet_template,
2043 /* Add rewrite/encap string for ARP packet. */
2044 vnet_rewrite_one_header (adj0[0], h0,
2045 sizeof (ethernet_header_t));
2047 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2049 /* Src ethernet address in ARP header. */
2050 clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2052 sizeof (h0->ip4_over_ethernet[0].ethernet));
2056 /* The interface's source address is stashed in the Glean Adj */
2057 h0->ip4_over_ethernet[0].ip4 =
2058 adj0->sub_type.glean.receive_addr.ip4;
2060 /* Copy in destination address we are requesting. This is the
2061 * glean case, so it's the packet's destination.*/
2062 h0->ip4_over_ethernet[1].ip4.data_u32 =
2063 ip0->dst_address.data_u32;
2067 /* Src IP address in ARP header. */
2068 if (ip4_src_address_for_packet (lm, sw_if_index0,
2070 ip4_over_ethernet[0].ip4))
2072 /* No source address available */
2074 node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2075 vlib_buffer_free (vm, &bi0, 1);
2079 /* Copy in destination address we are requesting from the
2081 h0->ip4_over_ethernet[1].ip4.data_u32 =
2082 adj0->sub_type.nbr.next_hop.ip4.as_u32;
2085 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2086 b0 = vlib_get_buffer (vm, bi0);
2087 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2089 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2091 vlib_set_next_frame_buffer (vm, node,
2092 adj0->rewrite_header.next_index,
2097 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2100 return frame->n_vectors;
2104 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2106 return (ip4_arp_inline (vm, node, frame, 0));
2110 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2112 return (ip4_arp_inline (vm, node, frame, 1));
2115 static char *ip4_arp_error_strings[] = {
2116 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2117 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2118 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2119 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2120 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2121 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2124 VLIB_REGISTER_NODE (ip4_arp_node) =
2126 .function = ip4_arp,.name = "ip4-arp",.vector_size =
2127 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2128 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2129 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2131 [IP4_ARP_NEXT_DROP] = "error-drop",}
2134 VLIB_REGISTER_NODE (ip4_glean_node) =
2136 .function = ip4_glean,.name = "ip4-glean",.vector_size =
2137 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2138 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2139 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2141 [IP4_ARP_NEXT_DROP] = "error-drop",}
2144 #define foreach_notrace_ip4_arp_error \
2151 arp_notrace_init (vlib_main_t * vm)
2153 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2155 /* don't trace ARP request packets */
2157 vnet_pcap_drop_trace_filter_add_del \
2158 (rt->errors[IP4_ARP_ERROR_##a], \
2160 foreach_notrace_ip4_arp_error;
2165 VLIB_INIT_FUNCTION (arp_notrace_init);
2168 /* Send an ARP request to see if given destination is reachable on given interface. */
2170 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2172 vnet_main_t *vnm = vnet_get_main ();
2173 ip4_main_t *im = &ip4_main;
2174 ethernet_arp_header_t *h;
2176 ip_interface_address_t *ia;
2177 ip_adjacency_t *adj;
2178 vnet_hw_interface_t *hi;
2179 vnet_sw_interface_t *si;
2183 si = vnet_get_sw_interface (vnm, sw_if_index);
2185 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2187 return clib_error_return (0, "%U: interface %U down",
2188 format_ip4_address, dst,
2189 format_vnet_sw_if_index_name, vnm,
2194 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2197 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2198 return clib_error_return
2199 (0, "no matching interface address for destination %U (interface %U)",
2200 format_ip4_address, dst,
2201 format_vnet_sw_if_index_name, vnm, sw_if_index);
2204 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2207 vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
2210 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2212 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2213 sizeof (h->ip4_over_ethernet[0].ethernet));
2215 h->ip4_over_ethernet[0].ip4 = src[0];
2216 h->ip4_over_ethernet[1].ip4 = dst[0];
2218 b = vlib_get_buffer (vm, bi);
2219 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2220 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2222 /* Add encapsulation string for software interface (e.g. ethernet header). */
2223 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2224 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2227 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2228 u32 *to_next = vlib_frame_vector_args (f);
2231 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2234 return /* no error */ 0;
2239 IP4_REWRITE_NEXT_DROP,
2240 IP4_REWRITE_NEXT_ICMP_ERROR,
2241 } ip4_rewrite_next_t;
2244 ip4_rewrite_inline (vlib_main_t * vm,
2245 vlib_node_runtime_t * node,
2246 vlib_frame_t * frame, int is_midchain)
2248 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2249 u32 *from = vlib_frame_vector_args (frame);
2250 u32 n_left_from, n_left_to_next, *to_next, next_index;
2251 vlib_node_runtime_t *error_node =
2252 vlib_node_get_runtime (vm, ip4_input_node.index);
2254 n_left_from = frame->n_vectors;
2255 next_index = node->cached_next_index;
2256 u32 cpu_index = os_get_cpu_number ();
2258 while (n_left_from > 0)
2260 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2262 while (n_left_from >= 4 && n_left_to_next >= 2)
2264 ip_adjacency_t *adj0, *adj1;
2265 vlib_buffer_t *p0, *p1;
2266 ip4_header_t *ip0, *ip1;
2267 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2268 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2269 u32 tx_sw_if_index0, tx_sw_if_index1;
2271 /* Prefetch next iteration. */
2273 vlib_buffer_t *p2, *p3;
2275 p2 = vlib_get_buffer (vm, from[2]);
2276 p3 = vlib_get_buffer (vm, from[3]);
2278 vlib_prefetch_buffer_header (p2, STORE);
2279 vlib_prefetch_buffer_header (p3, STORE);
2281 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2282 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2285 pi0 = to_next[0] = from[0];
2286 pi1 = to_next[1] = from[1];
2291 n_left_to_next -= 2;
2293 p0 = vlib_get_buffer (vm, pi0);
2294 p1 = vlib_get_buffer (vm, pi1);
2296 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2297 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2299 /* We should never rewrite a pkt using the MISS adjacency */
2300 ASSERT (adj_index0 && adj_index1);
2302 ip0 = vlib_buffer_get_current (p0);
2303 ip1 = vlib_buffer_get_current (p1);
2305 error0 = error1 = IP4_ERROR_NONE;
2306 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2308 /* Decrement TTL & update checksum.
2309 Works either endian, so no need for byte swap. */
2310 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2312 i32 ttl0 = ip0->ttl;
2314 /* Input node should have reject packets with ttl 0. */
2315 ASSERT (ip0->ttl > 0);
2317 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2318 checksum0 += checksum0 >= 0xffff;
2320 ip0->checksum = checksum0;
2325 * If the ttl drops below 1 when forwarding, generate
2328 if (PREDICT_FALSE (ttl0 <= 0))
2330 error0 = IP4_ERROR_TIME_EXPIRED;
2331 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2332 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2333 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2335 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2338 /* Verify checksum. */
2339 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2343 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2345 if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2347 i32 ttl1 = ip1->ttl;
2349 /* Input node should have reject packets with ttl 0. */
2350 ASSERT (ip1->ttl > 0);
2352 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2353 checksum1 += checksum1 >= 0xffff;
2355 ip1->checksum = checksum1;
2360 * If the ttl drops below 1 when forwarding, generate
2363 if (PREDICT_FALSE (ttl1 <= 0))
2365 error1 = IP4_ERROR_TIME_EXPIRED;
2366 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2367 icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2368 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2370 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2373 /* Verify checksum. */
2374 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2375 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2379 p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2382 /* Rewrite packet header and updates lengths. */
2383 adj0 = ip_get_adjacency (lm, adj_index0);
2384 adj1 = ip_get_adjacency (lm, adj_index1);
2386 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2387 rw_len0 = adj0[0].rewrite_header.data_bytes;
2388 rw_len1 = adj1[0].rewrite_header.data_bytes;
2389 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2390 vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2392 /* Check MTU of outgoing interface. */
2394 (vlib_buffer_length_in_chain (vm, p0) >
2396 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2399 (vlib_buffer_length_in_chain (vm, p1) >
2401 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2405 * pre-fetch the per-adjacency counters
2407 vlib_prefetch_combined_counter (&adjacency_counters,
2408 cpu_index, adj_index0);
2409 vlib_prefetch_combined_counter (&adjacency_counters,
2410 cpu_index, adj_index1);
2412 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2413 * to see the IP headerr */
2414 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2416 next0 = adj0[0].rewrite_header.next_index;
2417 p0->current_data -= rw_len0;
2418 p0->current_length += rw_len0;
2419 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2420 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2422 vnet_feature_arc_start (lm->output_feature_arc_index,
2423 tx_sw_if_index0, &next0, p0);
2425 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2427 next1 = adj1[0].rewrite_header.next_index;
2428 p1->current_data -= rw_len1;
2429 p1->current_length += rw_len1;
2431 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2432 vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2434 vnet_feature_arc_start (lm->output_feature_arc_index,
2435 tx_sw_if_index1, &next1, p1);
2438 /* Guess we are only writing on simple Ethernet header. */
2439 vnet_rewrite_two_headers (adj0[0], adj1[0],
2440 ip0, ip1, sizeof (ethernet_header_t));
2443 * Bump the per-adjacency counters
2445 vlib_increment_combined_counter
2446 (&adjacency_counters,
2448 adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2450 vlib_increment_combined_counter
2451 (&adjacency_counters,
2453 adj_index1, 1, vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2457 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2458 adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2461 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2462 to_next, n_left_to_next,
2463 pi0, pi1, next0, next1);
2466 while (n_left_from > 0 && n_left_to_next > 0)
2468 ip_adjacency_t *adj0;
2471 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2472 u32 tx_sw_if_index0;
2474 pi0 = to_next[0] = from[0];
2476 p0 = vlib_get_buffer (vm, pi0);
2478 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2480 /* We should never rewrite a pkt using the MISS adjacency */
2481 ASSERT (adj_index0);
2483 adj0 = ip_get_adjacency (lm, adj_index0);
2485 ip0 = vlib_buffer_get_current (p0);
2487 error0 = IP4_ERROR_NONE;
2488 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2490 /* Decrement TTL & update checksum. */
2491 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2493 i32 ttl0 = ip0->ttl;
2495 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2497 checksum0 += checksum0 >= 0xffff;
2499 ip0->checksum = checksum0;
2501 ASSERT (ip0->ttl > 0);
2507 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2509 if (PREDICT_FALSE (ttl0 <= 0))
2512 * If the ttl drops below 1 when forwarding, generate
2515 error0 = IP4_ERROR_TIME_EXPIRED;
2516 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2517 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2518 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2519 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2525 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2528 vlib_prefetch_combined_counter (&adjacency_counters,
2529 cpu_index, adj_index0);
2531 /* Guess we are only writing on simple Ethernet header. */
2532 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2534 /* Update packet buffer attributes/set output interface. */
2535 rw_len0 = adj0[0].rewrite_header.data_bytes;
2536 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2538 vlib_increment_combined_counter
2539 (&adjacency_counters,
2541 adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2543 /* Check MTU of outgoing interface. */
2544 error0 = (vlib_buffer_length_in_chain (vm, p0)
2545 > adj0[0].rewrite_header.max_l3_packet_bytes
2546 ? IP4_ERROR_MTU_EXCEEDED : error0);
2548 p0->error = error_node->errors[error0];
2550 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2551 * to see the IP headerr */
2552 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2554 p0->current_data -= rw_len0;
2555 p0->current_length += rw_len0;
2556 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2558 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2559 next0 = adj0[0].rewrite_header.next_index;
2563 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2566 vnet_feature_arc_start (lm->output_feature_arc_index,
2567 tx_sw_if_index0, &next0, p0);
2574 n_left_to_next -= 1;
2576 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2577 to_next, n_left_to_next,
2581 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2584 /* Need to do trace after rewrites to pick up new packet data. */
2585 if (node->flags & VLIB_NODE_FLAG_TRACE)
2586 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2588 return frame->n_vectors;
2592 /** @brief IPv4 rewrite node.
2595 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2596 header checksum, fetch the ip adjacency, check the outbound mtu,
2597 apply the adjacency rewrite, and send pkts to the adjacency
2598 rewrite header's rewrite_next_index.
2600 @param vm vlib_main_t corresponding to the current thread
2601 @param node vlib_node_runtime_t
2602 @param frame vlib_frame_t whose contents should be dispatched
2604 @par Graph mechanics: buffer metadata, next index usage
2607 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2608 - the rewrite adjacency index
2609 - <code>adj->lookup_next_index</code>
2610 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2611 the packet will be dropped.
2612 - <code>adj->rewrite_header</code>
2613 - Rewrite string length, rewrite string, next_index
2616 - <code>b->current_data, b->current_length</code>
2617 - Updated net of applying the rewrite string
2619 <em>Next Indices:</em>
2620 - <code> adj->rewrite_header.next_index </code>
2624 ip4_rewrite (vlib_main_t * vm,
2625 vlib_node_runtime_t * node, vlib_frame_t * frame)
2627 return ip4_rewrite_inline (vm, node, frame, 0);
2631 ip4_midchain (vlib_main_t * vm,
2632 vlib_node_runtime_t * node, vlib_frame_t * frame)
2634 return ip4_rewrite_inline (vm, node, frame, 1);
2638 VLIB_REGISTER_NODE (ip4_rewrite_node) =
2640 .function = ip4_rewrite,.name = "ip4-rewrite",.vector_size =
2641 sizeof (u32),.format_trace = format_ip4_rewrite_trace,.n_next_nodes =
2644 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2645 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",}
2648 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite);
2650 VLIB_REGISTER_NODE (ip4_midchain_node) =
2652 .function = ip4_midchain,.name = "ip4-midchain",.vector_size =
2653 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.sibling_of =
2656 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2658 static clib_error_t *
2659 add_del_interface_table (vlib_main_t * vm,
2660 unformat_input_t * input, vlib_cli_command_t * cmd)
2662 vnet_main_t *vnm = vnet_get_main ();
2663 clib_error_t *error = 0;
2664 u32 sw_if_index, table_id;
2668 if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2670 error = clib_error_return (0, "unknown interface `%U'",
2671 format_unformat_error, input);
2675 if (unformat (input, "%d", &table_id))
2679 error = clib_error_return (0, "expected table id `%U'",
2680 format_unformat_error, input);
2685 ip4_main_t *im = &ip4_main;
2688 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2693 // changing an interface's table has consequences for any connecteds
2694 // and adj-fibs already installed.
2696 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2697 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2705 * Place the indicated interface into the supplied IPv4 FIB table (also known
2706 * as a VRF). If the FIB table does not exist, this command creates it. To
2707 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2708 * FIB table will only be displayed if a route has been added to the table, or
2709 * an IP Address is assigned to an interface in the table (which adds a route
2712 * @note IP addresses added after setting the interface IP table end up in
2713 * the indicated FIB table. If the IP address is added prior to adding the
2714 * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2715 * but potentially counter-intuitive results occur if you provision interface
2716 * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2717 * IP table ID provisioned. It might be marginally useful to evade source RPF
2718 * drops to put an interface address into multiple FIBs.
2721 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2722 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2725 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2727 .path = "set interface ip table",
2728 .function = add_del_interface_table,
2729 .short_help = "set interface ip table <interface> <table-id>",
2735 ip4_lookup_multicast (vlib_main_t * vm,
2736 vlib_node_runtime_t * node, vlib_frame_t * frame)
2738 ip4_main_t *im = &ip4_main;
2739 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
2740 u32 n_left_from, n_left_to_next, *from, *to_next;
2741 ip_lookup_next_t next;
2742 u32 cpu_index = os_get_cpu_number ();
2744 from = vlib_frame_vector_args (frame);
2745 n_left_from = frame->n_vectors;
2746 next = node->cached_next_index;
2748 while (n_left_from > 0)
2750 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2752 while (n_left_from >= 4 && n_left_to_next >= 2)
2754 vlib_buffer_t *p0, *p1;
2755 u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2756 ip_lookup_next_t next0, next1;
2757 ip4_header_t *ip0, *ip1;
2758 u32 fib_index0, fib_index1;
2759 const dpo_id_t *dpo0, *dpo1;
2760 const load_balance_t *lb0, *lb1;
2762 /* Prefetch next iteration. */
2764 vlib_buffer_t *p2, *p3;
2766 p2 = vlib_get_buffer (vm, from[2]);
2767 p3 = vlib_get_buffer (vm, from[3]);
2769 vlib_prefetch_buffer_header (p2, LOAD);
2770 vlib_prefetch_buffer_header (p3, LOAD);
2772 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2773 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2776 pi0 = to_next[0] = from[0];
2777 pi1 = to_next[1] = from[1];
2779 p0 = vlib_get_buffer (vm, pi0);
2780 p1 = vlib_get_buffer (vm, pi1);
2782 ip0 = vlib_buffer_get_current (p0);
2783 ip1 = vlib_buffer_get_current (p1);
2786 vec_elt (im->fib_index_by_sw_if_index,
2787 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2789 vec_elt (im->fib_index_by_sw_if_index,
2790 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2792 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
2793 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
2795 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
2796 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
2798 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0),
2800 lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index1),
2803 lb0 = load_balance_get (lb_index0);
2804 lb1 = load_balance_get (lb_index1);
2806 ASSERT (lb0->lb_n_buckets > 0);
2807 ASSERT (is_pow2 (lb0->lb_n_buckets));
2808 ASSERT (lb1->lb_n_buckets > 0);
2809 ASSERT (is_pow2 (lb1->lb_n_buckets));
2811 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2812 (ip0, lb0->lb_hash_config);
2814 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2815 (ip1, lb1->lb_hash_config);
2817 dpo0 = load_balance_get_bucket_i (lb0,
2818 (vnet_buffer (p0)->ip.flow_hash &
2819 (lb0->lb_n_buckets_minus_1)));
2820 dpo1 = load_balance_get_bucket_i (lb1,
2821 (vnet_buffer (p1)->ip.flow_hash &
2822 (lb1->lb_n_buckets_minus_1)));
2824 next0 = dpo0->dpoi_next_node;
2825 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2826 next1 = dpo1->dpoi_next_node;
2827 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2829 if (1) /* $$$$$$ HACK FIXME */
2830 vlib_increment_combined_counter
2831 (cm, cpu_index, lb_index0, 1,
2832 vlib_buffer_length_in_chain (vm, p0));
2833 if (1) /* $$$$$$ HACK FIXME */
2834 vlib_increment_combined_counter
2835 (cm, cpu_index, lb_index1, 1,
2836 vlib_buffer_length_in_chain (vm, p1));
2840 n_left_to_next -= 2;
2843 wrong_next = (next0 != next) + 2 * (next1 != next);
2844 if (PREDICT_FALSE (wrong_next != 0))
2852 n_left_to_next += 1;
2853 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2859 n_left_to_next += 1;
2860 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2866 n_left_to_next += 2;
2867 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2868 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2872 vlib_put_next_frame (vm, node, next, n_left_to_next);
2874 vlib_get_next_frame (vm, node, next, to_next,
2881 while (n_left_from > 0 && n_left_to_next > 0)
2886 ip_lookup_next_t next0;
2888 const dpo_id_t *dpo0;
2889 const load_balance_t *lb0;
2894 p0 = vlib_get_buffer (vm, pi0);
2896 ip0 = vlib_buffer_get_current (p0);
2898 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2899 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2900 fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
2901 fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
2903 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0),
2906 lb0 = load_balance_get (lb_index0);
2908 ASSERT (lb0->lb_n_buckets > 0);
2909 ASSERT (is_pow2 (lb0->lb_n_buckets));
2911 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2912 (ip0, lb0->lb_hash_config);
2914 dpo0 = load_balance_get_bucket_i (lb0,
2915 (vnet_buffer (p0)->ip.flow_hash &
2916 (lb0->lb_n_buckets_minus_1)));
2918 next0 = dpo0->dpoi_next_node;
2919 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2921 if (1) /* $$$$$$ HACK FIXME */
2922 vlib_increment_combined_counter
2923 (cm, cpu_index, lb_index0, 1,
2924 vlib_buffer_length_in_chain (vm, p0));
2928 n_left_to_next -= 1;
2931 if (PREDICT_FALSE (next0 != next))
2933 n_left_to_next += 1;
2934 vlib_put_next_frame (vm, node, next, n_left_to_next);
2936 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2939 n_left_to_next -= 1;
2943 vlib_put_next_frame (vm, node, next, n_left_to_next);
2946 if (node->flags & VLIB_NODE_FLAG_TRACE)
2947 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2949 return frame->n_vectors;
2952 VLIB_REGISTER_NODE (ip4_lookup_multicast_node, static) =
2954 .function = ip4_lookup_multicast,.name =
2955 "ip4-lookup-multicast",.vector_size = sizeof (u32),.sibling_of =
2956 "ip4-lookup",.format_trace = format_ip4_lookup_trace,.n_next_nodes = 0,};
2958 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node,
2959 ip4_lookup_multicast);
2961 VLIB_REGISTER_NODE (ip4_multicast_node, static) =
2963 .function = ip4_drop,.name = "ip4-multicast",.vector_size =
2964 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
2967 [0] = "error-drop",}
2971 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2973 ip4_fib_mtrie_t *mtrie0;
2974 ip4_fib_mtrie_leaf_t leaf0;
2977 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2979 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2980 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2981 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2982 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2983 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2985 /* Handle default route. */
2986 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2988 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2990 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2993 static clib_error_t *
2994 test_lookup_command_fn (vlib_main_t * vm,
2995 unformat_input_t * input, vlib_cli_command_t * cmd)
3002 ip4_address_t ip4_base_address;
3005 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3007 if (unformat (input, "table %d", &table_id))
3009 /* Make sure the entry exists. */
3010 fib = ip4_fib_get (table_id);
3011 if ((fib) && (fib->index != table_id))
3012 return clib_error_return (0, "<fib-index> %d does not exist",
3015 else if (unformat (input, "count %f", &count))
3018 else if (unformat (input, "%U",
3019 unformat_ip4_address, &ip4_base_address))
3022 return clib_error_return (0, "unknown input `%U'",
3023 format_unformat_error, input);
3028 for (i = 0; i < n; i++)
3030 if (!ip4_lookup_validate (&ip4_base_address, table_id))
3033 ip4_base_address.as_u32 =
3034 clib_host_to_net_u32 (1 +
3035 clib_net_to_host_u32 (ip4_base_address.as_u32));
3039 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3041 vlib_cli_output (vm, "No errors in %d lookups\n", n);
3047 * Perform a lookup of an IPv4 Address (or range of addresses) in the
3048 * given FIB table to determine if there is a conflict with the
3049 * adjacency table. The fib-id can be determined by using the
3050 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3053 * @todo This command uses fib-id, other commands use table-id (not
3054 * just a name, they are different indexes). Would like to change this
3055 * to table-id for consistency.
3058 * Example of how to run the test lookup command:
3059 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3060 * No errors in 2 lookups
3064 VLIB_CLI_COMMAND (lookup_test_command, static) =
3066 .path = "test lookup",
3067 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3068 .function = test_lookup_command_fn,
3073 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3075 ip4_main_t *im4 = &ip4_main;
3077 uword *p = hash_get (im4->fib_index_by_table_id, table_id);
3080 return VNET_API_ERROR_NO_SUCH_FIB;
3082 fib = ip4_fib_get (p[0]);
3084 fib->flow_hash_config = flow_hash_config;
3088 static clib_error_t *
3089 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3090 unformat_input_t * input,
3091 vlib_cli_command_t * cmd)
3095 u32 flow_hash_config = 0;
3098 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3100 if (unformat (input, "table %d", &table_id))
3103 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3104 foreach_flow_hash_bit
3111 return clib_error_return (0, "unknown input `%U'",
3112 format_unformat_error, input);
3114 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3120 case VNET_API_ERROR_NO_SUCH_FIB:
3121 return clib_error_return (0, "no such FIB table %d", table_id);
3124 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3132 * Configure the set of IPv4 fields used by the flow hash.
3135 * Example of how to set the flow hash on a given table:
3136 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3137 * Example of display the configured flow hash:
3138 * @cliexstart{show ip fib}
3139 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3142 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3143 * [0] [@0]: dpo-drop ip6
3146 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3147 * [0] [@0]: dpo-drop ip6
3150 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3151 * [0] [@0]: dpo-drop ip6
3154 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3155 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3158 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3159 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3160 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3161 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3162 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3165 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3166 * [0] [@0]: dpo-drop ip6
3167 * 255.255.255.255/32
3169 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3170 * [0] [@0]: dpo-drop ip6
3171 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3174 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3175 * [0] [@0]: dpo-drop ip6
3178 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3179 * [0] [@0]: dpo-drop ip6
3182 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3183 * [0] [@4]: ipv4-glean: af_packet0
3186 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3187 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3190 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3191 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3194 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3195 * [0] [@4]: ipv4-glean: af_packet1
3198 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3199 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3202 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3203 * [0] [@0]: dpo-drop ip6
3206 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3207 * [0] [@0]: dpo-drop ip6
3208 * 255.255.255.255/32
3210 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3211 * [0] [@0]: dpo-drop ip6
3215 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3217 .path = "set ip flow-hash",
3219 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3220 .function = set_ip_flow_hash_command_fn,
3225 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3228 vnet_main_t *vnm = vnet_get_main ();
3229 vnet_interface_main_t *im = &vnm->interface_main;
3230 ip4_main_t *ipm = &ip4_main;
3231 ip_lookup_main_t *lm = &ipm->lookup_main;
3232 vnet_classify_main_t *cm = &vnet_classify_main;
3233 ip4_address_t *if_addr;
3235 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3236 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3238 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3239 return VNET_API_ERROR_NO_SUCH_ENTRY;
3241 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3242 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3244 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3246 if (NULL != if_addr)
3248 fib_prefix_t pfx = {
3250 .fp_proto = FIB_PROTOCOL_IP4,
3251 .fp_addr.ip4 = *if_addr,
3255 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3259 if (table_index != (u32) ~ 0)
3261 dpo_id_t dpo = DPO_INVALID;
3266 classify_dpo_create (DPO_PROTO_IP4, table_index));
3268 fib_table_entry_special_dpo_add (fib_index,
3270 FIB_SOURCE_CLASSIFY,
3271 FIB_ENTRY_FLAG_NONE, &dpo);
3276 fib_table_entry_special_remove (fib_index,
3277 &pfx, FIB_SOURCE_CLASSIFY);
3284 static clib_error_t *
3285 set_ip_classify_command_fn (vlib_main_t * vm,
3286 unformat_input_t * input,
3287 vlib_cli_command_t * cmd)
3289 u32 table_index = ~0;
3290 int table_index_set = 0;
3291 u32 sw_if_index = ~0;
3294 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3296 if (unformat (input, "table-index %d", &table_index))
3297 table_index_set = 1;
3298 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3299 vnet_get_main (), &sw_if_index))
3305 if (table_index_set == 0)
3306 return clib_error_return (0, "classify table-index must be specified");
3308 if (sw_if_index == ~0)
3309 return clib_error_return (0, "interface / subif must be specified");
3311 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3318 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3319 return clib_error_return (0, "No such interface");
3321 case VNET_API_ERROR_NO_SUCH_ENTRY:
3322 return clib_error_return (0, "No such classifier table");
3328 * Assign a classification table to an interface. The classification
3329 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3330 * commands. Once the table is create, use this command to filter packets
3334 * Example of how to assign a classification table to an interface:
3335 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3338 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3340 .path = "set ip classify",
3342 "set ip classify intfc <interface> table-index <classify-idx>",
3343 .function = set_ip_classify_command_fn,
3348 * fd.io coding-style-patch-verification: ON
3351 * eval: (c-set-style "gnu")