2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
56 * @brief IPv4 Forwarding.
58 * This file contains the source code for IPv4 forwarding.
62 ip4_forward_next_trace (vlib_main_t * vm,
63 vlib_node_runtime_t * node,
65 vlib_rx_or_tx_t which_adj_index);
68 ip4_lookup_inline (vlib_main_t * vm,
69 vlib_node_runtime_t * node,
71 int lookup_for_responses_to_locally_received_packets)
73 ip4_main_t *im = &ip4_main;
74 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
75 u32 n_left_from, n_left_to_next, *from, *to_next;
76 ip_lookup_next_t next;
77 u32 cpu_index = os_get_cpu_number ();
79 from = vlib_frame_vector_args (frame);
80 n_left_from = frame->n_vectors;
81 next = node->cached_next_index;
83 while (n_left_from > 0)
85 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
87 while (n_left_from >= 8 && n_left_to_next >= 4)
89 vlib_buffer_t *p0, *p1, *p2, *p3;
90 ip4_header_t *ip0, *ip1, *ip2, *ip3;
91 __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
92 ip_lookup_next_t next0, next1, next2, next3;
93 const load_balance_t *lb0, *lb1, *lb2, *lb3;
94 ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
95 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
96 ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
97 __attribute__ ((unused)) u32 pi0, fib_index0, lb_index0,
99 __attribute__ ((unused)) u32 pi1, fib_index1, lb_index1,
101 __attribute__ ((unused)) u32 pi2, fib_index2, lb_index2,
103 __attribute__ ((unused)) u32 pi3, fib_index3, lb_index3,
105 flow_hash_config_t flow_hash_config0, flow_hash_config1;
106 flow_hash_config_t flow_hash_config2, flow_hash_config3;
107 u32 hash_c0, hash_c1, hash_c2, hash_c3;
108 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
110 /* Prefetch next iteration. */
112 vlib_buffer_t *p4, *p5, *p6, *p7;
114 p4 = vlib_get_buffer (vm, from[4]);
115 p5 = vlib_get_buffer (vm, from[5]);
116 p6 = vlib_get_buffer (vm, from[6]);
117 p7 = vlib_get_buffer (vm, from[7]);
119 vlib_prefetch_buffer_header (p4, LOAD);
120 vlib_prefetch_buffer_header (p5, LOAD);
121 vlib_prefetch_buffer_header (p6, LOAD);
122 vlib_prefetch_buffer_header (p7, LOAD);
124 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
125 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
126 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
127 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
130 pi0 = to_next[0] = from[0];
131 pi1 = to_next[1] = from[1];
132 pi2 = to_next[2] = from[2];
133 pi3 = to_next[3] = from[3];
140 p0 = vlib_get_buffer (vm, pi0);
141 p1 = vlib_get_buffer (vm, pi1);
142 p2 = vlib_get_buffer (vm, pi2);
143 p3 = vlib_get_buffer (vm, pi3);
145 ip0 = vlib_buffer_get_current (p0);
146 ip1 = vlib_buffer_get_current (p1);
147 ip2 = vlib_buffer_get_current (p2);
148 ip3 = vlib_buffer_get_current (p3);
150 dst_addr0 = &ip0->dst_address;
151 dst_addr1 = &ip1->dst_address;
152 dst_addr2 = &ip2->dst_address;
153 dst_addr3 = &ip3->dst_address;
156 vec_elt (im->fib_index_by_sw_if_index,
157 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
159 vec_elt (im->fib_index_by_sw_if_index,
160 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
162 vec_elt (im->fib_index_by_sw_if_index,
163 vnet_buffer (p2)->sw_if_index[VLIB_RX]);
165 vec_elt (im->fib_index_by_sw_if_index,
166 vnet_buffer (p3)->sw_if_index[VLIB_RX]);
168 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
169 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
171 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
172 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
174 (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
175 (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
177 (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
178 (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
181 if (!lookup_for_responses_to_locally_received_packets)
183 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
184 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
185 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
186 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
188 leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
190 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
191 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
192 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
193 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
196 tcp0 = (void *) (ip0 + 1);
197 tcp1 = (void *) (ip1 + 1);
198 tcp2 = (void *) (ip2 + 1);
199 tcp3 = (void *) (ip3 + 1);
201 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
202 || ip0->protocol == IP_PROTOCOL_UDP);
203 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
204 || ip1->protocol == IP_PROTOCOL_UDP);
205 is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
206 || ip2->protocol == IP_PROTOCOL_UDP);
207 is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
208 || ip1->protocol == IP_PROTOCOL_UDP);
210 if (!lookup_for_responses_to_locally_received_packets)
212 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
213 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
214 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
215 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
218 if (!lookup_for_responses_to_locally_received_packets)
220 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
221 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
222 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
223 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
226 if (!lookup_for_responses_to_locally_received_packets)
228 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
229 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
230 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
231 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
234 if (lookup_for_responses_to_locally_received_packets)
236 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
237 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
238 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
239 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
243 /* Handle default route. */
246 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
249 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
252 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
255 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
256 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
257 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
258 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
259 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
262 lb0 = load_balance_get (lb_index0);
263 lb1 = load_balance_get (lb_index1);
264 lb2 = load_balance_get (lb_index2);
265 lb3 = load_balance_get (lb_index3);
267 /* Use flow hash to compute multipath adjacency. */
268 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
269 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
270 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
271 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
272 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
274 flow_hash_config0 = lb0->lb_hash_config;
275 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
276 ip4_compute_flow_hash (ip0, flow_hash_config0);
278 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
280 flow_hash_config1 = lb1->lb_hash_config;
281 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
282 ip4_compute_flow_hash (ip1, flow_hash_config1);
284 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
286 flow_hash_config2 = lb2->lb_hash_config;
287 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
288 ip4_compute_flow_hash (ip2, flow_hash_config2);
290 if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
292 flow_hash_config3 = lb3->lb_hash_config;
293 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
294 ip4_compute_flow_hash (ip3, flow_hash_config3);
297 ASSERT (lb0->lb_n_buckets > 0);
298 ASSERT (is_pow2 (lb0->lb_n_buckets));
299 ASSERT (lb1->lb_n_buckets > 0);
300 ASSERT (is_pow2 (lb1->lb_n_buckets));
301 ASSERT (lb2->lb_n_buckets > 0);
302 ASSERT (is_pow2 (lb2->lb_n_buckets));
303 ASSERT (lb3->lb_n_buckets > 0);
304 ASSERT (is_pow2 (lb3->lb_n_buckets));
306 dpo0 = load_balance_get_bucket_i (lb0,
308 (lb0->lb_n_buckets_minus_1)));
309 dpo1 = load_balance_get_bucket_i (lb1,
311 (lb1->lb_n_buckets_minus_1)));
312 dpo2 = load_balance_get_bucket_i (lb2,
314 (lb2->lb_n_buckets_minus_1)));
315 dpo3 = load_balance_get_bucket_i (lb3,
317 (lb3->lb_n_buckets_minus_1)));
319 next0 = dpo0->dpoi_next_node;
320 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
321 next1 = dpo1->dpoi_next_node;
322 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
323 next2 = dpo2->dpoi_next_node;
324 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
325 next3 = dpo3->dpoi_next_node;
326 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
328 vlib_increment_combined_counter
329 (cm, cpu_index, lb_index0, 1,
330 vlib_buffer_length_in_chain (vm, p0)
331 + sizeof (ethernet_header_t));
332 vlib_increment_combined_counter
333 (cm, cpu_index, lb_index1, 1,
334 vlib_buffer_length_in_chain (vm, p1)
335 + sizeof (ethernet_header_t));
336 vlib_increment_combined_counter
337 (cm, cpu_index, lb_index2, 1,
338 vlib_buffer_length_in_chain (vm, p2)
339 + sizeof (ethernet_header_t));
340 vlib_increment_combined_counter
341 (cm, cpu_index, lb_index3, 1,
342 vlib_buffer_length_in_chain (vm, p3)
343 + sizeof (ethernet_header_t));
345 vlib_validate_buffer_enqueue_x4 (vm, node, next,
346 to_next, n_left_to_next,
348 next0, next1, next2, next3);
351 while (n_left_from > 0 && n_left_to_next > 0)
355 __attribute__ ((unused)) tcp_header_t *tcp0;
356 ip_lookup_next_t next0;
357 const load_balance_t *lb0;
358 ip4_fib_mtrie_t *mtrie0;
359 ip4_fib_mtrie_leaf_t leaf0;
360 ip4_address_t *dst_addr0;
361 __attribute__ ((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
362 flow_hash_config_t flow_hash_config0;
363 const dpo_id_t *dpo0;
369 p0 = vlib_get_buffer (vm, pi0);
371 ip0 = vlib_buffer_get_current (p0);
373 dst_addr0 = &ip0->dst_address;
376 vec_elt (im->fib_index_by_sw_if_index,
377 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
379 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
380 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
382 if (!lookup_for_responses_to_locally_received_packets)
384 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
386 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
388 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
391 tcp0 = (void *) (ip0 + 1);
393 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
394 || ip0->protocol == IP_PROTOCOL_UDP);
396 if (!lookup_for_responses_to_locally_received_packets)
397 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
399 if (!lookup_for_responses_to_locally_received_packets)
400 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
402 if (!lookup_for_responses_to_locally_received_packets)
403 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
405 if (lookup_for_responses_to_locally_received_packets)
406 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
409 /* Handle default route. */
412 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
413 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
416 lb0 = load_balance_get (lbi0);
418 /* Use flow hash to compute multipath adjacency. */
419 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
420 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
422 flow_hash_config0 = lb0->lb_hash_config;
424 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
425 ip4_compute_flow_hash (ip0, flow_hash_config0);
428 ASSERT (lb0->lb_n_buckets > 0);
429 ASSERT (is_pow2 (lb0->lb_n_buckets));
431 dpo0 = load_balance_get_bucket_i (lb0,
433 (lb0->lb_n_buckets_minus_1)));
435 next0 = dpo0->dpoi_next_node;
436 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
438 vlib_increment_combined_counter
439 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
446 if (PREDICT_FALSE (next0 != next))
449 vlib_put_next_frame (vm, node, next, n_left_to_next);
451 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
458 vlib_put_next_frame (vm, node, next, n_left_to_next);
461 if (node->flags & VLIB_NODE_FLAG_TRACE)
462 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
464 return frame->n_vectors;
467 /** @brief IPv4 lookup node.
470 This is the main IPv4 lookup dispatch node.
472 @param vm vlib_main_t corresponding to the current thread
473 @param node vlib_node_runtime_t
474 @param frame vlib_frame_t whose contents should be dispatched
476 @par Graph mechanics: buffer metadata, next index usage
479 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
480 - Indicates the @c sw_if_index value of the interface that the
481 packet was received on.
482 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
483 - When the value is @c ~0 then the node performs a longest prefix
484 match (LPM) for the packet destination address in the FIB attached
485 to the receive interface.
486 - Otherwise perform LPM for the packet destination address in the
487 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
488 value (0, 1, ...) and not a VRF id.
491 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
492 - The lookup result adjacency index.
495 - Dispatches the packet to the node index found in
496 ip_adjacency_t @c adj->lookup_next_index
497 (where @c adj is the lookup result adjacency).
500 ip4_lookup (vlib_main_t * vm,
501 vlib_node_runtime_t * node, vlib_frame_t * frame)
503 return ip4_lookup_inline (vm, node, frame,
504 /* lookup_for_responses_to_locally_received_packets */
509 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
511 VLIB_REGISTER_NODE (ip4_lookup_node) =
513 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
514 sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
515 IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
517 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
520 ip4_load_balance (vlib_main_t * vm,
521 vlib_node_runtime_t * node, vlib_frame_t * frame)
523 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
524 u32 n_left_from, n_left_to_next, *from, *to_next;
525 ip_lookup_next_t next;
526 u32 cpu_index = os_get_cpu_number ();
528 from = vlib_frame_vector_args (frame);
529 n_left_from = frame->n_vectors;
530 next = node->cached_next_index;
532 if (node->flags & VLIB_NODE_FLAG_TRACE)
533 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
535 while (n_left_from > 0)
537 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
540 while (n_left_from >= 4 && n_left_to_next >= 2)
542 ip_lookup_next_t next0, next1;
543 const load_balance_t *lb0, *lb1;
544 vlib_buffer_t *p0, *p1;
545 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
546 const ip4_header_t *ip0, *ip1;
547 const dpo_id_t *dpo0, *dpo1;
549 /* Prefetch next iteration. */
551 vlib_buffer_t *p2, *p3;
553 p2 = vlib_get_buffer (vm, from[2]);
554 p3 = vlib_get_buffer (vm, from[3]);
556 vlib_prefetch_buffer_header (p2, STORE);
557 vlib_prefetch_buffer_header (p3, STORE);
559 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
560 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
563 pi0 = to_next[0] = from[0];
564 pi1 = to_next[1] = from[1];
571 p0 = vlib_get_buffer (vm, pi0);
572 p1 = vlib_get_buffer (vm, pi1);
574 ip0 = vlib_buffer_get_current (p0);
575 ip1 = vlib_buffer_get_current (p1);
576 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
577 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
579 lb0 = load_balance_get (lbi0);
580 lb1 = load_balance_get (lbi1);
583 * this node is for via FIBs we can re-use the hash value from the
584 * to node if present.
585 * We don't want to use the same hash value at each level in the recursion
586 * graph as that would lead to polarisation
588 hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
589 hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
591 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
593 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
595 hc0 = vnet_buffer (p0)->ip.flow_hash =
596 vnet_buffer (p0)->ip.flow_hash >> 1;
600 hc0 = vnet_buffer (p0)->ip.flow_hash =
601 ip4_compute_flow_hash (ip0, hc0);
604 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
606 if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
608 hc1 = vnet_buffer (p1)->ip.flow_hash =
609 vnet_buffer (p1)->ip.flow_hash >> 1;
613 hc1 = vnet_buffer (p1)->ip.flow_hash =
614 ip4_compute_flow_hash (ip1, hc1);
619 load_balance_get_bucket_i (lb0,
620 hc0 & (lb0->lb_n_buckets_minus_1));
622 load_balance_get_bucket_i (lb1,
623 hc1 & (lb1->lb_n_buckets_minus_1));
625 next0 = dpo0->dpoi_next_node;
626 next1 = dpo1->dpoi_next_node;
628 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
629 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
631 vlib_increment_combined_counter
632 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
633 vlib_increment_combined_counter
634 (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
636 vlib_validate_buffer_enqueue_x2 (vm, node, next,
637 to_next, n_left_to_next,
638 pi0, pi1, next0, next1);
641 while (n_left_from > 0 && n_left_to_next > 0)
643 ip_lookup_next_t next0;
644 const load_balance_t *lb0;
647 const ip4_header_t *ip0;
648 const dpo_id_t *dpo0;
657 p0 = vlib_get_buffer (vm, pi0);
659 ip0 = vlib_buffer_get_current (p0);
660 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
662 lb0 = load_balance_get (lbi0);
664 hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
665 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
667 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
669 hc0 = vnet_buffer (p0)->ip.flow_hash =
670 vnet_buffer (p0)->ip.flow_hash >> 1;
674 hc0 = vnet_buffer (p0)->ip.flow_hash =
675 ip4_compute_flow_hash (ip0, hc0);
680 load_balance_get_bucket_i (lb0,
681 hc0 & (lb0->lb_n_buckets_minus_1));
683 next0 = dpo0->dpoi_next_node;
684 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
686 vlib_increment_combined_counter
687 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
689 vlib_validate_buffer_enqueue_x1 (vm, node, next,
690 to_next, n_left_to_next,
694 vlib_put_next_frame (vm, node, next, n_left_to_next);
697 return frame->n_vectors;
700 VLIB_REGISTER_NODE (ip4_load_balance_node) =
702 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
703 sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
704 format_ip4_lookup_trace,};
706 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
708 /* get first interface address */
710 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
711 ip_interface_address_t ** result_ia)
713 ip_lookup_main_t *lm = &im->lookup_main;
714 ip_interface_address_t *ia = 0;
715 ip4_address_t *result = 0;
717 foreach_ip_interface_address (lm, ia, sw_if_index,
718 1 /* honor unnumbered */ ,
722 ip_interface_address_get_address (lm, ia);
728 *result_ia = result ? ia : 0;
733 ip4_add_interface_routes (u32 sw_if_index,
734 ip4_main_t * im, u32 fib_index,
735 ip_interface_address_t * a)
737 ip_lookup_main_t *lm = &im->lookup_main;
738 ip4_address_t *address = ip_interface_address_get_address (lm, a);
740 .fp_len = a->address_length,
741 .fp_proto = FIB_PROTOCOL_IP4,
742 .fp_addr.ip4 = *address,
745 a->neighbor_probe_adj_index = ~0;
749 fib_node_index_t fei;
751 fei = fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), FIB_PROTOCOL_IP4, NULL, /* No next-hop address */
752 sw_if_index, ~0, // invalid FIB index
753 1, NULL, // no out-label stack
754 FIB_ROUTE_PATH_FLAG_NONE);
755 a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
760 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
762 u32 classify_table_index =
763 lm->classify_table_index_by_sw_if_index[sw_if_index];
764 if (classify_table_index != (u32) ~ 0)
766 dpo_id_t dpo = DPO_INVALID;
771 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
773 fib_table_entry_special_dpo_add (fib_index,
776 FIB_ENTRY_FLAG_NONE, &dpo);
781 fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), FIB_PROTOCOL_IP4, &pfx.fp_addr, sw_if_index, ~0, // invalid FIB index
782 1, NULL, // no out-label stack
783 FIB_ROUTE_PATH_FLAG_NONE);
787 ip4_del_interface_routes (ip4_main_t * im,
789 ip4_address_t * address, u32 address_length)
792 .fp_len = address_length,
793 .fp_proto = FIB_PROTOCOL_IP4,
794 .fp_addr.ip4 = *address,
799 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
803 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
807 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
809 ip4_main_t *im = &ip4_main;
811 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
814 * enable/disable only on the 1<->0 transition
818 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
823 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
824 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
827 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
830 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
835 static clib_error_t *
836 ip4_add_del_interface_address_internal (vlib_main_t * vm,
838 ip4_address_t * address,
839 u32 address_length, u32 is_del)
841 vnet_main_t *vnm = vnet_get_main ();
842 ip4_main_t *im = &ip4_main;
843 ip_lookup_main_t *lm = &im->lookup_main;
844 clib_error_t *error = 0;
845 u32 if_address_index, elts_before;
846 ip4_address_fib_t ip4_af, *addr_fib = 0;
848 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
849 ip4_addr_fib_init (&ip4_af, address,
850 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
851 vec_add1 (addr_fib, ip4_af);
854 * there is no support for adj-fib handling in the presence of overlapping
855 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
860 /* When adding an address check that it does not conflict
861 with an existing address. */
862 ip_interface_address_t *ia;
863 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
864 0 /* honor unnumbered */ ,
868 ip_interface_address_get_address
869 (&im->lookup_main, ia);
870 if (ip4_destination_matches_route
871 (im, address, x, ia->address_length)
873 ip4_destination_matches_route (im,
879 ("failed to add %U which conflicts with %U for interface %U",
880 format_ip4_address_and_length, address,
882 format_ip4_address_and_length, x,
884 format_vnet_sw_if_index_name, vnm,
889 elts_before = pool_elts (lm->if_address_pool);
891 error = ip_interface_address_add_del
892 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
896 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
899 ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
901 ip4_add_interface_routes (sw_if_index,
902 im, ip4_af.fib_index,
904 (lm->if_address_pool, if_address_index));
906 /* If pool did not grow/shrink: add duplicate address. */
907 if (elts_before != pool_elts (lm->if_address_pool))
909 ip4_add_del_interface_address_callback_t *cb;
910 vec_foreach (cb, im->add_del_interface_address_callbacks)
911 cb->function (im, cb->function_opaque, sw_if_index,
912 address, address_length, if_address_index, is_del);
921 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
922 ip4_address_t * address, u32 address_length,
925 return ip4_add_del_interface_address_internal
926 (vm, sw_if_index, address, address_length, is_del);
929 /* Built-in ip4 unicast rx feature path definition */
931 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
933 .arc_name = "ip4-unicast",
934 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
935 .end_node = "ip4-lookup",
936 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
939 VNET_FEATURE_INIT (ip4_flow_classify, static) =
941 .arc_name = "ip4-unicast",
942 .node_name = "ip4-flow-classify",
943 .runs_before = VNET_FEATURES ("ip4-inacl"),
946 VNET_FEATURE_INIT (ip4_inacl, static) =
948 .arc_name = "ip4-unicast",
949 .node_name = "ip4-inacl",
950 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
953 VNET_FEATURE_INIT (ip4_source_check_1, static) =
955 .arc_name = "ip4-unicast",
956 .node_name = "ip4-source-check-via-rx",
957 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
960 VNET_FEATURE_INIT (ip4_source_check_2, static) =
962 .arc_name = "ip4-unicast",
963 .node_name = "ip4-source-check-via-any",
964 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
967 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
969 .arc_name = "ip4-unicast",
970 .node_name = "ip4-source-and-port-range-check-rx",
971 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
974 VNET_FEATURE_INIT (ip4_policer_classify, static) =
976 .arc_name = "ip4-unicast",
977 .node_name = "ip4-policer-classify",
978 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
981 VNET_FEATURE_INIT (ip4_ipsec, static) =
983 .arc_name = "ip4-unicast",
984 .node_name = "ipsec-input-ip4",
985 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
988 VNET_FEATURE_INIT (ip4_vpath, static) =
990 .arc_name = "ip4-unicast",
991 .node_name = "vpath-input-ip4",
992 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
995 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
997 .arc_name = "ip4-unicast",
998 .node_name = "ip4-vxlan-bypass",
999 .runs_before = VNET_FEATURES ("ip4-lookup"),
1002 VNET_FEATURE_INIT (ip4_lookup, static) =
1004 .arc_name = "ip4-unicast",
1005 .node_name = "ip4-lookup",
1006 .runs_before = VNET_FEATURES ("ip4-drop"),
1009 VNET_FEATURE_INIT (ip4_drop, static) =
1011 .arc_name = "ip4-unicast",
1012 .node_name = "ip4-drop",
1013 .runs_before = 0, /* not before any other features */
1017 /* Built-in ip4 multicast rx feature path definition */
1018 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1020 .arc_name = "ip4-multicast",
1021 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1022 .end_node = "ip4-lookup-multicast",
1023 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1026 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1028 .arc_name = "ip4-multicast",
1029 .node_name = "vpath-input-ip4",
1030 .runs_before = VNET_FEATURES ("ip4-lookup-multicast"),
1033 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1035 .arc_name = "ip4-multicast",
1036 .node_name = "ip4-lookup-multicast",
1037 .runs_before = VNET_FEATURES ("ip4-drop"),
1040 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1042 .arc_name = "ip4-multicast",
1043 .node_name = "ip4-drop",
1044 .runs_before = 0, /* last feature */
1047 /* Source and port-range check ip4 tx feature path definition */
1048 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1050 .arc_name = "ip4-output",
1051 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1052 .end_node = "interface-output",
1053 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1056 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1058 .arc_name = "ip4-output",
1059 .node_name = "ip4-source-and-port-range-check-tx",
1060 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1063 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1065 .arc_name = "ip4-output",
1066 .node_name = "ipsec-output-ip4",
1067 .runs_before = VNET_FEATURES ("interface-output"),
1070 /* Built-in ip4 tx feature path definition */
1071 VNET_FEATURE_INIT (ip4_interface_output, static) =
1073 .arc_name = "ip4-output",
1074 .node_name = "interface-output",
1075 .runs_before = 0, /* not before any other features */
1079 static clib_error_t *
1080 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1082 ip4_main_t *im = &ip4_main;
1084 /* Fill in lookup tables with default table (0). */
1085 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1087 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1090 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1093 return /* no error */ 0;
1096 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1098 /* Global IP4 main. */
1099 ip4_main_t ip4_main;
1102 ip4_lookup_init (vlib_main_t * vm)
1104 ip4_main_t *im = &ip4_main;
1105 clib_error_t *error;
1108 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1111 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1116 m = pow2_mask (i) << (32 - i);
1119 im->fib_masks[i] = clib_host_to_net_u32 (m);
1122 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1124 /* Create FIB with index 0 and table id of 0. */
1125 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1129 pn = pg_get_node (ip4_lookup_node.index);
1130 pn->unformat_edit = unformat_pg_ip4_header;
1134 ethernet_arp_header_t h;
1136 memset (&h, 0, sizeof (h));
1138 /* Set target ethernet address to all zeros. */
1139 memset (h.ip4_over_ethernet[1].ethernet, 0,
1140 sizeof (h.ip4_over_ethernet[1].ethernet));
1142 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1143 #define _8(f,v) h.f = v;
1144 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1145 _16 (l3_type, ETHERNET_TYPE_IP4);
1146 _8 (n_l2_address_bytes, 6);
1147 _8 (n_l3_address_bytes, 4);
1148 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1152 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1155 /* alloc chunk size */ 8,
1162 VLIB_INIT_FUNCTION (ip4_lookup_init);
1166 /* Adjacency taken. */
1171 /* Packet data, possibly *after* rewrite. */
1172 u8 packet_data[64 - 1 * sizeof (u32)];
1174 ip4_forward_next_trace_t;
1177 format_ip4_forward_next_trace (u8 * s, va_list * args)
1179 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1180 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1181 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1182 uword indent = format_get_indent (s);
1183 s = format (s, "%U%U",
1184 format_white_space, indent,
1185 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1190 format_ip4_lookup_trace (u8 * s, va_list * args)
1192 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1193 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1194 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1195 uword indent = format_get_indent (s);
1197 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1198 t->fib_index, t->dpo_index, t->flow_hash);
1199 s = format (s, "\n%U%U",
1200 format_white_space, indent,
1201 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1206 format_ip4_rewrite_trace (u8 * s, va_list * args)
1208 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1209 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1210 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1211 vnet_main_t *vnm = vnet_get_main ();
1212 uword indent = format_get_indent (s);
1214 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1215 t->fib_index, t->dpo_index, format_ip_adjacency,
1216 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1217 s = format (s, "\n%U%U",
1218 format_white_space, indent,
1219 format_ip_adjacency_packet_data,
1220 vnm, t->dpo_index, t->packet_data, sizeof (t->packet_data));
1224 /* Common trace function for all ip4-forward next nodes. */
1226 ip4_forward_next_trace (vlib_main_t * vm,
1227 vlib_node_runtime_t * node,
1228 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1231 ip4_main_t *im = &ip4_main;
1233 n_left = frame->n_vectors;
1234 from = vlib_frame_vector_args (frame);
1239 vlib_buffer_t *b0, *b1;
1240 ip4_forward_next_trace_t *t0, *t1;
1242 /* Prefetch next iteration. */
1243 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1244 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1249 b0 = vlib_get_buffer (vm, bi0);
1250 b1 = vlib_get_buffer (vm, bi1);
1252 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1254 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1255 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1256 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1258 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1259 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1260 vec_elt (im->fib_index_by_sw_if_index,
1261 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1263 clib_memcpy (t0->packet_data,
1264 vlib_buffer_get_current (b0),
1265 sizeof (t0->packet_data));
1267 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1269 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1270 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1271 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1273 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1274 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1275 vec_elt (im->fib_index_by_sw_if_index,
1276 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1277 clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1278 sizeof (t1->packet_data));
1288 ip4_forward_next_trace_t *t0;
1292 b0 = vlib_get_buffer (vm, bi0);
1294 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1296 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1297 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1298 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1300 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1301 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1302 vec_elt (im->fib_index_by_sw_if_index,
1303 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1304 clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1305 sizeof (t0->packet_data));
1313 ip4_drop_or_punt (vlib_main_t * vm,
1314 vlib_node_runtime_t * node,
1315 vlib_frame_t * frame, ip4_error_t error_code)
1317 u32 *buffers = vlib_frame_vector_args (frame);
1318 uword n_packets = frame->n_vectors;
1320 vlib_error_drop_buffers (vm, node, buffers,
1324 ip4_input_node.index, error_code);
1326 if (node->flags & VLIB_NODE_FLAG_TRACE)
1327 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1333 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1335 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1339 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1341 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1344 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1346 .function = ip4_drop,.name = "ip4-drop",.vector_size =
1347 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
1350 [0] = "error-drop",}
1353 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1355 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1357 .function = ip4_punt,.name = "ip4-punt",.vector_size =
1358 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
1361 [0] = "error-punt",}
1364 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1366 /* Compute TCP/UDP/ICMP4 checksum in software. */
1368 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1372 u32 ip_header_length, payload_length_host_byte_order;
1373 u32 n_this_buffer, n_bytes_left;
1375 void *data_this_buffer;
1377 /* Initialize checksum with ip header. */
1378 ip_header_length = ip4_header_bytes (ip0);
1379 payload_length_host_byte_order =
1380 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1382 clib_host_to_net_u32 (payload_length_host_byte_order +
1383 (ip0->protocol << 16));
1385 if (BITS (uword) == 32)
1388 ip_csum_with_carry (sum0,
1389 clib_mem_unaligned (&ip0->src_address, u32));
1391 ip_csum_with_carry (sum0,
1392 clib_mem_unaligned (&ip0->dst_address, u32));
1396 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1398 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1399 data_this_buffer = (void *) ip0 + ip_header_length;
1400 if (n_this_buffer + ip_header_length > p0->current_length)
1402 p0->current_length >
1403 ip_header_length ? p0->current_length - ip_header_length : 0;
1406 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1407 n_bytes_left -= n_this_buffer;
1408 if (n_bytes_left == 0)
1411 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1412 p0 = vlib_get_buffer (vm, p0->next_buffer);
1413 data_this_buffer = vlib_buffer_get_current (p0);
1414 n_this_buffer = p0->current_length;
1417 sum16 = ~ip_csum_fold (sum0);
1423 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1425 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1429 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1430 || ip0->protocol == IP_PROTOCOL_UDP);
1432 udp0 = (void *) (ip0 + 1);
1433 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1435 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1436 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1440 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1442 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1443 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1449 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1451 ip4_main_t *im = &ip4_main;
1452 ip_lookup_main_t *lm = &im->lookup_main;
1453 ip_local_next_t next_index;
1454 u32 *from, *to_next, n_left_from, n_left_to_next;
1455 vlib_node_runtime_t *error_node =
1456 vlib_node_get_runtime (vm, ip4_input_node.index);
1458 from = vlib_frame_vector_args (frame);
1459 n_left_from = frame->n_vectors;
1460 next_index = node->cached_next_index;
1462 if (node->flags & VLIB_NODE_FLAG_TRACE)
1463 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1465 while (n_left_from > 0)
1467 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1469 while (n_left_from >= 4 && n_left_to_next >= 2)
1471 vlib_buffer_t *p0, *p1;
1472 ip4_header_t *ip0, *ip1;
1473 udp_header_t *udp0, *udp1;
1474 ip4_fib_mtrie_t *mtrie0, *mtrie1;
1475 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1476 const dpo_id_t *dpo0, *dpo1;
1477 const load_balance_t *lb0, *lb1;
1478 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1479 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1480 i32 len_diff0, len_diff1;
1481 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1482 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1485 pi0 = to_next[0] = from[0];
1486 pi1 = to_next[1] = from[1];
1490 n_left_to_next -= 2;
1492 p0 = vlib_get_buffer (vm, pi0);
1493 p1 = vlib_get_buffer (vm, pi1);
1495 ip0 = vlib_buffer_get_current (p0);
1496 ip1 = vlib_buffer_get_current (p1);
1498 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1499 vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1501 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1502 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1503 fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1504 fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1506 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1507 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
1508 fib_index1 = (vnet_buffer (p1)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1509 fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1511 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1512 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1514 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1517 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1519 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1521 /* Treat IP frag packets as "experimental" protocol for now
1522 until support of IP frag reassembly is implemented */
1523 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1524 proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1525 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1526 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1527 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1528 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1533 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1534 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1536 udp0 = ip4_next_header (ip0);
1537 udp1 = ip4_next_header (ip1);
1539 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1540 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1541 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1544 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1546 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1548 /* Verify UDP length. */
1549 ip_len0 = clib_net_to_host_u16 (ip0->length);
1550 ip_len1 = clib_net_to_host_u16 (ip1->length);
1551 udp_len0 = clib_net_to_host_u16 (udp0->length);
1552 udp_len1 = clib_net_to_host_u16 (udp1->length);
1554 len_diff0 = ip_len0 - udp_len0;
1555 len_diff1 = ip_len1 - udp_len1;
1557 len_diff0 = is_udp0 ? len_diff0 : 0;
1558 len_diff1 = is_udp1 ? len_diff1 : 0;
1560 if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1561 & good_tcp_udp0 & good_tcp_udp1)))
1566 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1567 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1569 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1570 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1575 && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1576 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1578 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1579 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1583 good_tcp_udp0 &= len_diff0 >= 0;
1584 good_tcp_udp1 &= len_diff1 >= 0;
1587 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1589 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1591 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1593 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1594 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1596 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1597 error0 = (is_tcp_udp0 && !good_tcp_udp0
1598 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1599 error1 = (is_tcp_udp1 && !good_tcp_udp1
1600 ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1603 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1605 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1608 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1611 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1613 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1614 ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1615 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1617 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1618 ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1619 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1621 lb0 = load_balance_get (lbi0);
1622 lb1 = load_balance_get (lbi1);
1623 dpo0 = load_balance_get_bucket_i (lb0, 0);
1624 dpo1 = load_balance_get_bucket_i (lb1, 0);
1627 * Must have a route to source otherwise we drop the packet.
1628 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1631 * - the source is a recieve => it's from us => bogus, do this
1632 * first since it sets a different error code.
1633 * - uRPF check for any route to source - accept if passes.
1634 * - allow packets destined to the broadcast address from unknown sources
1636 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1637 dpo0->dpoi_type == DPO_RECEIVE) ?
1638 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1639 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1640 !fib_urpf_check_size (lb0->lb_urpf) &&
1641 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1642 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1643 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1644 dpo1->dpoi_type == DPO_RECEIVE) ?
1645 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1646 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1647 !fib_urpf_check_size (lb1->lb_urpf) &&
1648 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1649 ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1651 next0 = lm->local_next_by_ip_protocol[proto0];
1652 next1 = lm->local_next_by_ip_protocol[proto1];
1655 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1657 error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1659 p0->error = error0 ? error_node->errors[error0] : 0;
1660 p1->error = error1 ? error_node->errors[error1] : 0;
1662 enqueue_code = (next0 != next_index) + 2 * (next1 != next_index);
1664 if (PREDICT_FALSE (enqueue_code != 0))
1666 switch (enqueue_code)
1672 n_left_to_next += 1;
1673 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1679 n_left_to_next += 1;
1680 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1684 /* A B B or A B C */
1686 n_left_to_next += 2;
1687 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1688 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1691 vlib_put_next_frame (vm, node, next_index,
1694 vlib_get_next_frame (vm, node, next_index, to_next,
1702 while (n_left_from > 0 && n_left_to_next > 0)
1707 ip4_fib_mtrie_t *mtrie0;
1708 ip4_fib_mtrie_leaf_t leaf0;
1709 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1711 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1712 load_balance_t *lb0;
1713 const dpo_id_t *dpo0;
1715 pi0 = to_next[0] = from[0];
1719 n_left_to_next -= 1;
1721 p0 = vlib_get_buffer (vm, pi0);
1723 ip0 = vlib_buffer_get_current (p0);
1725 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1727 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1728 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1729 fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
1730 fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1732 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1734 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1737 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1739 /* Treat IP frag packets as "experimental" protocol for now
1740 until support of IP frag reassembly is implemented */
1741 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1742 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1743 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1747 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1749 udp0 = ip4_next_header (ip0);
1751 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1752 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1755 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1757 /* Verify UDP length. */
1758 ip_len0 = clib_net_to_host_u16 (ip0->length);
1759 udp_len0 = clib_net_to_host_u16 (udp0->length);
1761 len_diff0 = ip_len0 - udp_len0;
1763 len_diff0 = is_udp0 ? len_diff0 : 0;
1765 if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1770 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1771 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1773 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1774 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1778 good_tcp_udp0 &= len_diff0 >= 0;
1781 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1783 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1785 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1787 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1788 error0 = (is_tcp_udp0 && !good_tcp_udp0
1789 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1792 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1795 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1797 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1798 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1800 lb0 = load_balance_get (lbi0);
1801 dpo0 = load_balance_get_bucket_i (lb0, 0);
1803 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1804 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1806 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1807 dpo0->dpoi_type == DPO_RECEIVE) ?
1808 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1809 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1810 !fib_urpf_check_size (lb0->lb_urpf) &&
1811 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1812 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1814 next0 = lm->local_next_by_ip_protocol[proto0];
1817 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1819 p0->error = error0 ? error_node->errors[error0] : 0;
1821 if (PREDICT_FALSE (next0 != next_index))
1823 n_left_to_next += 1;
1824 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1827 vlib_get_next_frame (vm, node, next_index, to_next,
1831 n_left_to_next -= 1;
1835 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1838 return frame->n_vectors;
1841 VLIB_REGISTER_NODE (ip4_local_node, static) =
1843 .function = ip4_local,.name = "ip4-local",.vector_size =
1844 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
1845 IP_LOCAL_N_NEXT,.next_nodes =
1847 [IP_LOCAL_NEXT_DROP] = "error-drop",
1848 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1849 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1850 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",}
1853 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1856 ip4_register_protocol (u32 protocol, u32 node_index)
1858 vlib_main_t *vm = vlib_get_main ();
1859 ip4_main_t *im = &ip4_main;
1860 ip_lookup_main_t *lm = &im->lookup_main;
1862 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1863 lm->local_next_by_ip_protocol[protocol] =
1864 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1867 static clib_error_t *
1868 show_ip_local_command_fn (vlib_main_t * vm,
1869 unformat_input_t * input, vlib_cli_command_t * cmd)
1871 ip4_main_t *im = &ip4_main;
1872 ip_lookup_main_t *lm = &im->lookup_main;
1875 vlib_cli_output (vm, "Protocols handled by ip4_local");
1876 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1878 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1879 vlib_cli_output (vm, "%d", i);
1887 * Display the set of protocols handled by the local IPv4 stack.
1890 * Example of how to display local protocol table:
1891 * @cliexstart{show ip local}
1892 * Protocols handled by ip4_local
1899 VLIB_CLI_COMMAND (show_ip_local, static) =
1901 .path = "show ip local",
1902 .function = show_ip_local_command_fn,
1903 .short_help = "show ip local",
1908 ip4_arp_inline (vlib_main_t * vm,
1909 vlib_node_runtime_t * node,
1910 vlib_frame_t * frame, int is_glean)
1912 vnet_main_t *vnm = vnet_get_main ();
1913 ip4_main_t *im = &ip4_main;
1914 ip_lookup_main_t *lm = &im->lookup_main;
1915 u32 *from, *to_next_drop;
1916 uword n_left_from, n_left_to_next_drop, next_index;
1917 static f64 time_last_seed_change = -1e100;
1918 static u32 hash_seeds[3];
1919 static uword hash_bitmap[256 / BITS (uword)];
1922 if (node->flags & VLIB_NODE_FLAG_TRACE)
1923 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1925 time_now = vlib_time_now (vm);
1926 if (time_now - time_last_seed_change > 1e-3)
1929 u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1930 sizeof (hash_seeds));
1931 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1932 hash_seeds[i] = r[i];
1934 /* Mark all hash keys as been no-seen before. */
1935 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1938 time_last_seed_change = time_now;
1941 from = vlib_frame_vector_args (frame);
1942 n_left_from = frame->n_vectors;
1943 next_index = node->cached_next_index;
1944 if (next_index == IP4_ARP_NEXT_DROP)
1945 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1947 while (n_left_from > 0)
1949 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1950 to_next_drop, n_left_to_next_drop);
1952 while (n_left_from > 0 && n_left_to_next_drop > 0)
1954 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1955 ip_adjacency_t *adj0;
1962 p0 = vlib_get_buffer (vm, pi0);
1964 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1965 adj0 = ip_get_adjacency (lm, adj_index0);
1966 ip0 = vlib_buffer_get_current (p0);
1972 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1973 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1978 * this is the Glean case, so we are ARPing for the
1979 * packet's destination
1981 a0 ^= ip0->dst_address.data_u32;
1985 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1989 hash_v3_finalize32 (a0, b0, c0);
1991 c0 &= BITS (hash_bitmap) - 1;
1992 c0 = c0 / BITS (uword);
1993 m0 = (uword) 1 << (c0 % BITS (uword));
1995 bm0 = hash_bitmap[c0];
1996 drop0 = (bm0 & m0) != 0;
1998 /* Mark it as seen. */
1999 hash_bitmap[c0] = bm0 | m0;
2003 to_next_drop[0] = pi0;
2005 n_left_to_next_drop -= 1;
2008 node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2009 IP4_ARP_ERROR_REQUEST_SENT];
2012 * the adj has been updated to a rewrite but the node the DPO that got
2013 * us here hasn't - yet. no big deal. we'll drop while we wait.
2015 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2022 * Can happen if the control-plane is programming tables
2023 * with traffic flowing; at least that's today's lame excuse.
2025 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
2026 (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2028 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2031 /* Send ARP request. */
2035 ethernet_arp_header_t *h0;
2036 vnet_hw_interface_t *hw_if0;
2039 vlib_packet_template_get_packet (vm,
2040 &im->ip4_arp_request_packet_template,
2043 /* Add rewrite/encap string for ARP packet. */
2044 vnet_rewrite_one_header (adj0[0], h0,
2045 sizeof (ethernet_header_t));
2047 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2049 /* Src ethernet address in ARP header. */
2050 clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2052 sizeof (h0->ip4_over_ethernet[0].ethernet));
2056 /* The interface's source address is stashed in the Glean Adj */
2057 h0->ip4_over_ethernet[0].ip4 =
2058 adj0->sub_type.glean.receive_addr.ip4;
2060 /* Copy in destination address we are requesting. This is the
2061 * glean case, so it's the packet's destination.*/
2062 h0->ip4_over_ethernet[1].ip4.data_u32 =
2063 ip0->dst_address.data_u32;
2067 /* Src IP address in ARP header. */
2068 if (ip4_src_address_for_packet (lm, sw_if_index0,
2070 ip4_over_ethernet[0].ip4))
2072 /* No source address available */
2074 node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2075 vlib_buffer_free (vm, &bi0, 1);
2079 /* Copy in destination address we are requesting from the
2081 h0->ip4_over_ethernet[1].ip4.data_u32 =
2082 adj0->sub_type.nbr.next_hop.ip4.as_u32;
2085 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2086 b0 = vlib_get_buffer (vm, bi0);
2087 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2089 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2091 vlib_set_next_frame_buffer (vm, node,
2092 adj0->rewrite_header.next_index,
2097 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2100 return frame->n_vectors;
2104 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2106 return (ip4_arp_inline (vm, node, frame, 0));
2110 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2112 return (ip4_arp_inline (vm, node, frame, 1));
2115 static char *ip4_arp_error_strings[] = {
2116 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2117 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2118 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2119 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2120 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2121 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2124 VLIB_REGISTER_NODE (ip4_arp_node) =
2126 .function = ip4_arp,.name = "ip4-arp",.vector_size =
2127 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2128 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2129 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2131 [IP4_ARP_NEXT_DROP] = "error-drop",}
2134 VLIB_REGISTER_NODE (ip4_glean_node) =
2136 .function = ip4_glean,.name = "ip4-glean",.vector_size =
2137 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2138 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2139 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2141 [IP4_ARP_NEXT_DROP] = "error-drop",}
2144 #define foreach_notrace_ip4_arp_error \
2151 arp_notrace_init (vlib_main_t * vm)
2153 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2155 /* don't trace ARP request packets */
2157 vnet_pcap_drop_trace_filter_add_del \
2158 (rt->errors[IP4_ARP_ERROR_##a], \
2160 foreach_notrace_ip4_arp_error;
2165 VLIB_INIT_FUNCTION (arp_notrace_init);
2168 /* Send an ARP request to see if given destination is reachable on given interface. */
2170 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2172 vnet_main_t *vnm = vnet_get_main ();
2173 ip4_main_t *im = &ip4_main;
2174 ethernet_arp_header_t *h;
2176 ip_interface_address_t *ia;
2177 ip_adjacency_t *adj;
2178 vnet_hw_interface_t *hi;
2179 vnet_sw_interface_t *si;
2183 si = vnet_get_sw_interface (vnm, sw_if_index);
2185 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2187 return clib_error_return (0, "%U: interface %U down",
2188 format_ip4_address, dst,
2189 format_vnet_sw_if_index_name, vnm,
2194 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2197 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2198 return clib_error_return
2199 (0, "no matching interface address for destination %U (interface %U)",
2200 format_ip4_address, dst,
2201 format_vnet_sw_if_index_name, vnm, sw_if_index);
2204 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2207 vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
2210 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2212 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2213 sizeof (h->ip4_over_ethernet[0].ethernet));
2215 h->ip4_over_ethernet[0].ip4 = src[0];
2216 h->ip4_over_ethernet[1].ip4 = dst[0];
2218 b = vlib_get_buffer (vm, bi);
2219 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2220 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2222 /* Add encapsulation string for software interface (e.g. ethernet header). */
2223 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2224 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2227 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2228 u32 *to_next = vlib_frame_vector_args (f);
2231 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2234 return /* no error */ 0;
2239 IP4_REWRITE_NEXT_DROP,
2240 IP4_REWRITE_NEXT_ICMP_ERROR,
2241 } ip4_rewrite_next_t;
2244 ip4_rewrite_inline (vlib_main_t * vm,
2245 vlib_node_runtime_t * node,
2246 vlib_frame_t * frame, int is_midchain)
2248 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2249 u32 *from = vlib_frame_vector_args (frame);
2250 u32 n_left_from, n_left_to_next, *to_next, next_index;
2251 vlib_node_runtime_t *error_node =
2252 vlib_node_get_runtime (vm, ip4_input_node.index);
2254 n_left_from = frame->n_vectors;
2255 next_index = node->cached_next_index;
2256 u32 cpu_index = os_get_cpu_number ();
2258 while (n_left_from > 0)
2260 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2262 while (n_left_from >= 4 && n_left_to_next >= 2)
2264 ip_adjacency_t *adj0, *adj1;
2265 vlib_buffer_t *p0, *p1;
2266 ip4_header_t *ip0, *ip1;
2267 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2268 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2269 u32 tx_sw_if_index0, tx_sw_if_index1;
2271 /* Prefetch next iteration. */
2273 vlib_buffer_t *p2, *p3;
2275 p2 = vlib_get_buffer (vm, from[2]);
2276 p3 = vlib_get_buffer (vm, from[3]);
2278 vlib_prefetch_buffer_header (p2, STORE);
2279 vlib_prefetch_buffer_header (p3, STORE);
2281 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2282 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2285 pi0 = to_next[0] = from[0];
2286 pi1 = to_next[1] = from[1];
2291 n_left_to_next -= 2;
2293 p0 = vlib_get_buffer (vm, pi0);
2294 p1 = vlib_get_buffer (vm, pi1);
2296 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2297 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2299 /* We should never rewrite a pkt using the MISS adjacency */
2300 ASSERT (adj_index0 && adj_index1);
2302 ip0 = vlib_buffer_get_current (p0);
2303 ip1 = vlib_buffer_get_current (p1);
2305 error0 = error1 = IP4_ERROR_NONE;
2306 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2308 /* Decrement TTL & update checksum.
2309 Works either endian, so no need for byte swap. */
2310 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2312 i32 ttl0 = ip0->ttl;
2314 /* Input node should have reject packets with ttl 0. */
2315 ASSERT (ip0->ttl > 0);
2317 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2318 checksum0 += checksum0 >= 0xffff;
2320 ip0->checksum = checksum0;
2325 * If the ttl drops below 1 when forwarding, generate
2328 if (PREDICT_FALSE (ttl0 <= 0))
2330 error0 = IP4_ERROR_TIME_EXPIRED;
2331 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2332 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2333 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2335 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2338 /* Verify checksum. */
2339 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2343 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2345 if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2347 i32 ttl1 = ip1->ttl;
2349 /* Input node should have reject packets with ttl 0. */
2350 ASSERT (ip1->ttl > 0);
2352 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2353 checksum1 += checksum1 >= 0xffff;
2355 ip1->checksum = checksum1;
2360 * If the ttl drops below 1 when forwarding, generate
2363 if (PREDICT_FALSE (ttl1 <= 0))
2365 error1 = IP4_ERROR_TIME_EXPIRED;
2366 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2367 icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2368 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2370 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2373 /* Verify checksum. */
2374 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2375 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2379 p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2382 /* Rewrite packet header and updates lengths. */
2383 adj0 = ip_get_adjacency (lm, adj_index0);
2384 adj1 = ip_get_adjacency (lm, adj_index1);
2386 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2387 rw_len0 = adj0[0].rewrite_header.data_bytes;
2388 rw_len1 = adj1[0].rewrite_header.data_bytes;
2389 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2390 vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2392 /* Check MTU of outgoing interface. */
2394 (vlib_buffer_length_in_chain (vm, p0) >
2396 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2399 (vlib_buffer_length_in_chain (vm, p1) >
2401 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2405 * We've already accounted for an ethernet_header_t elsewhere
2407 if (PREDICT_FALSE (rw_len0 > sizeof (ethernet_header_t)))
2408 vlib_increment_combined_counter
2409 (&adjacency_counters, cpu_index, adj_index0,
2410 /* packet increment */ 0,
2411 /* byte increment */ rw_len0 - sizeof (ethernet_header_t));
2413 if (PREDICT_FALSE (rw_len1 > sizeof (ethernet_header_t)))
2414 vlib_increment_combined_counter
2415 (&adjacency_counters, cpu_index, adj_index1,
2416 /* packet increment */ 0,
2417 /* byte increment */ rw_len1 - sizeof (ethernet_header_t));
2419 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2420 * to see the IP headerr */
2421 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2423 next0 = adj0[0].rewrite_header.next_index;
2424 p0->current_data -= rw_len0;
2425 p0->current_length += rw_len0;
2426 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2427 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2429 vnet_feature_arc_start (lm->output_feature_arc_index,
2430 tx_sw_if_index0, &next0, p0);
2432 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2434 next1 = adj1[0].rewrite_header.next_index;
2435 p1->current_data -= rw_len1;
2436 p1->current_length += rw_len1;
2438 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2439 vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2441 vnet_feature_arc_start (lm->output_feature_arc_index,
2442 tx_sw_if_index1, &next1, p1);
2445 /* Guess we are only writing on simple Ethernet header. */
2446 vnet_rewrite_two_headers (adj0[0], adj1[0],
2447 ip0, ip1, sizeof (ethernet_header_t));
2451 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2452 adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2455 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2456 to_next, n_left_to_next,
2457 pi0, pi1, next0, next1);
2460 while (n_left_from > 0 && n_left_to_next > 0)
2462 ip_adjacency_t *adj0;
2465 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2466 u32 tx_sw_if_index0;
2468 pi0 = to_next[0] = from[0];
2470 p0 = vlib_get_buffer (vm, pi0);
2472 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2474 /* We should never rewrite a pkt using the MISS adjacency */
2475 ASSERT (adj_index0);
2477 adj0 = ip_get_adjacency (lm, adj_index0);
2479 ip0 = vlib_buffer_get_current (p0);
2481 error0 = IP4_ERROR_NONE;
2482 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2484 /* Decrement TTL & update checksum. */
2485 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2487 i32 ttl0 = ip0->ttl;
2489 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2491 checksum0 += checksum0 >= 0xffff;
2493 ip0->checksum = checksum0;
2495 ASSERT (ip0->ttl > 0);
2501 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2503 if (PREDICT_FALSE (ttl0 <= 0))
2506 * If the ttl drops below 1 when forwarding, generate
2509 error0 = IP4_ERROR_TIME_EXPIRED;
2510 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2511 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2512 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2513 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2519 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2522 /* Guess we are only writing on simple Ethernet header. */
2523 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2525 /* Update packet buffer attributes/set output interface. */
2526 rw_len0 = adj0[0].rewrite_header.data_bytes;
2527 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2529 if (PREDICT_FALSE (rw_len0 > sizeof (ethernet_header_t)))
2530 vlib_increment_combined_counter
2531 (&adjacency_counters, cpu_index, adj_index0,
2532 /* packet increment */ 0,
2533 /* byte increment */ rw_len0 - sizeof (ethernet_header_t));
2535 /* Check MTU of outgoing interface. */
2536 error0 = (vlib_buffer_length_in_chain (vm, p0)
2537 > adj0[0].rewrite_header.max_l3_packet_bytes
2538 ? IP4_ERROR_MTU_EXCEEDED : error0);
2540 p0->error = error_node->errors[error0];
2542 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2543 * to see the IP headerr */
2544 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2546 p0->current_data -= rw_len0;
2547 p0->current_length += rw_len0;
2548 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2550 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2551 next0 = adj0[0].rewrite_header.next_index;
2555 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2558 vnet_feature_arc_start (lm->output_feature_arc_index,
2559 tx_sw_if_index0, &next0, p0);
2566 n_left_to_next -= 1;
2568 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2569 to_next, n_left_to_next,
2573 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2576 /* Need to do trace after rewrites to pick up new packet data. */
2577 if (node->flags & VLIB_NODE_FLAG_TRACE)
2578 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2580 return frame->n_vectors;
2584 /** @brief IPv4 rewrite node.
2587 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2588 header checksum, fetch the ip adjacency, check the outbound mtu,
2589 apply the adjacency rewrite, and send pkts to the adjacency
2590 rewrite header's rewrite_next_index.
2592 @param vm vlib_main_t corresponding to the current thread
2593 @param node vlib_node_runtime_t
2594 @param frame vlib_frame_t whose contents should be dispatched
2596 @par Graph mechanics: buffer metadata, next index usage
2599 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2600 - the rewrite adjacency index
2601 - <code>adj->lookup_next_index</code>
2602 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2603 the packet will be dropped.
2604 - <code>adj->rewrite_header</code>
2605 - Rewrite string length, rewrite string, next_index
2608 - <code>b->current_data, b->current_length</code>
2609 - Updated net of applying the rewrite string
2611 <em>Next Indices:</em>
2612 - <code> adj->rewrite_header.next_index </code>
2616 ip4_rewrite (vlib_main_t * vm,
2617 vlib_node_runtime_t * node, vlib_frame_t * frame)
2619 return ip4_rewrite_inline (vm, node, frame, 0);
2623 ip4_midchain (vlib_main_t * vm,
2624 vlib_node_runtime_t * node, vlib_frame_t * frame)
2626 return ip4_rewrite_inline (vm, node, frame, 1);
2630 VLIB_REGISTER_NODE (ip4_rewrite_node) =
2632 .function = ip4_rewrite,.name = "ip4-rewrite",.vector_size =
2633 sizeof (u32),.format_trace = format_ip4_rewrite_trace,.n_next_nodes =
2636 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2637 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",}
2640 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite);
2642 VLIB_REGISTER_NODE (ip4_midchain_node) =
2644 .function = ip4_midchain,.name = "ip4-midchain",.vector_size =
2645 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.sibling_of =
2648 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2650 static clib_error_t *
2651 add_del_interface_table (vlib_main_t * vm,
2652 unformat_input_t * input, vlib_cli_command_t * cmd)
2654 vnet_main_t *vnm = vnet_get_main ();
2655 clib_error_t *error = 0;
2656 u32 sw_if_index, table_id;
2660 if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2662 error = clib_error_return (0, "unknown interface `%U'",
2663 format_unformat_error, input);
2667 if (unformat (input, "%d", &table_id))
2671 error = clib_error_return (0, "expected table id `%U'",
2672 format_unformat_error, input);
2677 ip4_main_t *im = &ip4_main;
2680 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2685 // changing an interface's table has consequences for any connecteds
2686 // and adj-fibs already installed.
2688 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2689 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2697 * Place the indicated interface into the supplied IPv4 FIB table (also known
2698 * as a VRF). If the FIB table does not exist, this command creates it. To
2699 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2700 * FIB table will only be displayed if a route has been added to the table, or
2701 * an IP Address is assigned to an interface in the table (which adds a route
2704 * @note IP addresses added after setting the interface IP table end up in
2705 * the indicated FIB table. If the IP address is added prior to adding the
2706 * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2707 * but potentially counter-intuitive results occur if you provision interface
2708 * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2709 * IP table ID provisioned. It might be marginally useful to evade source RPF
2710 * drops to put an interface address into multiple FIBs.
2713 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2714 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2717 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2719 .path = "set interface ip table",
2720 .function = add_del_interface_table,
2721 .short_help = "set interface ip table <interface> <table-id>",
2727 ip4_lookup_multicast (vlib_main_t * vm,
2728 vlib_node_runtime_t * node, vlib_frame_t * frame)
2730 ip4_main_t *im = &ip4_main;
2731 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
2732 u32 n_left_from, n_left_to_next, *from, *to_next;
2733 ip_lookup_next_t next;
2734 u32 cpu_index = os_get_cpu_number ();
2736 from = vlib_frame_vector_args (frame);
2737 n_left_from = frame->n_vectors;
2738 next = node->cached_next_index;
2740 while (n_left_from > 0)
2742 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2744 while (n_left_from >= 4 && n_left_to_next >= 2)
2746 vlib_buffer_t *p0, *p1;
2747 u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2748 ip_lookup_next_t next0, next1;
2749 ip4_header_t *ip0, *ip1;
2750 u32 fib_index0, fib_index1;
2751 const dpo_id_t *dpo0, *dpo1;
2752 const load_balance_t *lb0, *lb1;
2754 /* Prefetch next iteration. */
2756 vlib_buffer_t *p2, *p3;
2758 p2 = vlib_get_buffer (vm, from[2]);
2759 p3 = vlib_get_buffer (vm, from[3]);
2761 vlib_prefetch_buffer_header (p2, LOAD);
2762 vlib_prefetch_buffer_header (p3, LOAD);
2764 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2765 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2768 pi0 = to_next[0] = from[0];
2769 pi1 = to_next[1] = from[1];
2771 p0 = vlib_get_buffer (vm, pi0);
2772 p1 = vlib_get_buffer (vm, pi1);
2774 ip0 = vlib_buffer_get_current (p0);
2775 ip1 = vlib_buffer_get_current (p1);
2778 vec_elt (im->fib_index_by_sw_if_index,
2779 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2781 vec_elt (im->fib_index_by_sw_if_index,
2782 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2784 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
2785 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
2787 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
2788 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
2790 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0),
2792 lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index1),
2795 lb0 = load_balance_get (lb_index0);
2796 lb1 = load_balance_get (lb_index1);
2798 ASSERT (lb0->lb_n_buckets > 0);
2799 ASSERT (is_pow2 (lb0->lb_n_buckets));
2800 ASSERT (lb1->lb_n_buckets > 0);
2801 ASSERT (is_pow2 (lb1->lb_n_buckets));
2803 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2804 (ip0, lb0->lb_hash_config);
2806 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2807 (ip1, lb1->lb_hash_config);
2809 dpo0 = load_balance_get_bucket_i (lb0,
2810 (vnet_buffer (p0)->ip.flow_hash &
2811 (lb0->lb_n_buckets_minus_1)));
2812 dpo1 = load_balance_get_bucket_i (lb1,
2813 (vnet_buffer (p1)->ip.flow_hash &
2814 (lb1->lb_n_buckets_minus_1)));
2816 next0 = dpo0->dpoi_next_node;
2817 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2818 next1 = dpo1->dpoi_next_node;
2819 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2821 if (1) /* $$$$$$ HACK FIXME */
2822 vlib_increment_combined_counter
2823 (cm, cpu_index, lb_index0, 1,
2824 vlib_buffer_length_in_chain (vm, p0));
2825 if (1) /* $$$$$$ HACK FIXME */
2826 vlib_increment_combined_counter
2827 (cm, cpu_index, lb_index1, 1,
2828 vlib_buffer_length_in_chain (vm, p1));
2832 n_left_to_next -= 2;
2835 wrong_next = (next0 != next) + 2 * (next1 != next);
2836 if (PREDICT_FALSE (wrong_next != 0))
2844 n_left_to_next += 1;
2845 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2851 n_left_to_next += 1;
2852 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2858 n_left_to_next += 2;
2859 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2860 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2864 vlib_put_next_frame (vm, node, next, n_left_to_next);
2866 vlib_get_next_frame (vm, node, next, to_next,
2873 while (n_left_from > 0 && n_left_to_next > 0)
2878 ip_lookup_next_t next0;
2880 const dpo_id_t *dpo0;
2881 const load_balance_t *lb0;
2886 p0 = vlib_get_buffer (vm, pi0);
2888 ip0 = vlib_buffer_get_current (p0);
2890 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2891 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2892 fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
2893 fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
2895 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0),
2898 lb0 = load_balance_get (lb_index0);
2900 ASSERT (lb0->lb_n_buckets > 0);
2901 ASSERT (is_pow2 (lb0->lb_n_buckets));
2903 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2904 (ip0, lb0->lb_hash_config);
2906 dpo0 = load_balance_get_bucket_i (lb0,
2907 (vnet_buffer (p0)->ip.flow_hash &
2908 (lb0->lb_n_buckets_minus_1)));
2910 next0 = dpo0->dpoi_next_node;
2911 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2913 if (1) /* $$$$$$ HACK FIXME */
2914 vlib_increment_combined_counter
2915 (cm, cpu_index, lb_index0, 1,
2916 vlib_buffer_length_in_chain (vm, p0));
2920 n_left_to_next -= 1;
2923 if (PREDICT_FALSE (next0 != next))
2925 n_left_to_next += 1;
2926 vlib_put_next_frame (vm, node, next, n_left_to_next);
2928 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2931 n_left_to_next -= 1;
2935 vlib_put_next_frame (vm, node, next, n_left_to_next);
2938 if (node->flags & VLIB_NODE_FLAG_TRACE)
2939 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2941 return frame->n_vectors;
2944 VLIB_REGISTER_NODE (ip4_lookup_multicast_node, static) =
2946 .function = ip4_lookup_multicast,.name =
2947 "ip4-lookup-multicast",.vector_size = sizeof (u32),.sibling_of =
2948 "ip4-lookup",.format_trace = format_ip4_lookup_trace,.n_next_nodes = 0,};
2950 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node,
2951 ip4_lookup_multicast);
2953 VLIB_REGISTER_NODE (ip4_multicast_node, static) =
2955 .function = ip4_drop,.name = "ip4-multicast",.vector_size =
2956 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
2959 [0] = "error-drop",}
2963 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2965 ip4_fib_mtrie_t *mtrie0;
2966 ip4_fib_mtrie_leaf_t leaf0;
2969 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2971 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2972 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2973 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2974 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2975 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2977 /* Handle default route. */
2978 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2980 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2982 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2985 static clib_error_t *
2986 test_lookup_command_fn (vlib_main_t * vm,
2987 unformat_input_t * input, vlib_cli_command_t * cmd)
2994 ip4_address_t ip4_base_address;
2997 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2999 if (unformat (input, "table %d", &table_id))
3001 /* Make sure the entry exists. */
3002 fib = ip4_fib_get (table_id);
3003 if ((fib) && (fib->index != table_id))
3004 return clib_error_return (0, "<fib-index> %d does not exist",
3007 else if (unformat (input, "count %f", &count))
3010 else if (unformat (input, "%U",
3011 unformat_ip4_address, &ip4_base_address))
3014 return clib_error_return (0, "unknown input `%U'",
3015 format_unformat_error, input);
3020 for (i = 0; i < n; i++)
3022 if (!ip4_lookup_validate (&ip4_base_address, table_id))
3025 ip4_base_address.as_u32 =
3026 clib_host_to_net_u32 (1 +
3027 clib_net_to_host_u32 (ip4_base_address.as_u32));
3031 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3033 vlib_cli_output (vm, "No errors in %d lookups\n", n);
3039 * Perform a lookup of an IPv4 Address (or range of addresses) in the
3040 * given FIB table to determine if there is a conflict with the
3041 * adjacency table. The fib-id can be determined by using the
3042 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3045 * @todo This command uses fib-id, other commands use table-id (not
3046 * just a name, they are different indexes). Would like to change this
3047 * to table-id for consistency.
3050 * Example of how to run the test lookup command:
3051 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3052 * No errors in 2 lookups
3056 VLIB_CLI_COMMAND (lookup_test_command, static) =
3058 .path = "test lookup",
3059 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3060 .function = test_lookup_command_fn,
3065 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3067 ip4_main_t *im4 = &ip4_main;
3069 uword *p = hash_get (im4->fib_index_by_table_id, table_id);
3072 return VNET_API_ERROR_NO_SUCH_FIB;
3074 fib = ip4_fib_get (p[0]);
3076 fib->flow_hash_config = flow_hash_config;
3080 static clib_error_t *
3081 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3082 unformat_input_t * input,
3083 vlib_cli_command_t * cmd)
3087 u32 flow_hash_config = 0;
3090 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3092 if (unformat (input, "table %d", &table_id))
3095 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3096 foreach_flow_hash_bit
3103 return clib_error_return (0, "unknown input `%U'",
3104 format_unformat_error, input);
3106 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3112 case VNET_API_ERROR_NO_SUCH_FIB:
3113 return clib_error_return (0, "no such FIB table %d", table_id);
3116 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3124 * Configure the set of IPv4 fields used by the flow hash.
3127 * Example of how to set the flow hash on a given table:
3128 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3129 * Example of display the configured flow hash:
3130 * @cliexstart{show ip fib}
3131 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3134 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3135 * [0] [@0]: dpo-drop ip6
3138 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3139 * [0] [@0]: dpo-drop ip6
3142 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3143 * [0] [@0]: dpo-drop ip6
3146 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3147 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3150 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3151 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3152 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3153 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3154 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3157 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3158 * [0] [@0]: dpo-drop ip6
3159 * 255.255.255.255/32
3161 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3162 * [0] [@0]: dpo-drop ip6
3163 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3166 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3167 * [0] [@0]: dpo-drop ip6
3170 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3171 * [0] [@0]: dpo-drop ip6
3174 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3175 * [0] [@4]: ipv4-glean: af_packet0
3178 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3179 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3182 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3183 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3186 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3187 * [0] [@4]: ipv4-glean: af_packet1
3190 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3191 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3194 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3195 * [0] [@0]: dpo-drop ip6
3198 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3199 * [0] [@0]: dpo-drop ip6
3200 * 255.255.255.255/32
3202 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3203 * [0] [@0]: dpo-drop ip6
3207 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3209 .path = "set ip flow-hash",
3211 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3212 .function = set_ip_flow_hash_command_fn,
3217 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3220 vnet_main_t *vnm = vnet_get_main ();
3221 vnet_interface_main_t *im = &vnm->interface_main;
3222 ip4_main_t *ipm = &ip4_main;
3223 ip_lookup_main_t *lm = &ipm->lookup_main;
3224 vnet_classify_main_t *cm = &vnet_classify_main;
3225 ip4_address_t *if_addr;
3227 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3228 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3230 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3231 return VNET_API_ERROR_NO_SUCH_ENTRY;
3233 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3234 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3236 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3238 if (NULL != if_addr)
3240 fib_prefix_t pfx = {
3242 .fp_proto = FIB_PROTOCOL_IP4,
3243 .fp_addr.ip4 = *if_addr,
3247 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3251 if (table_index != (u32) ~ 0)
3253 dpo_id_t dpo = DPO_INVALID;
3258 classify_dpo_create (DPO_PROTO_IP4, table_index));
3260 fib_table_entry_special_dpo_add (fib_index,
3262 FIB_SOURCE_CLASSIFY,
3263 FIB_ENTRY_FLAG_NONE, &dpo);
3268 fib_table_entry_special_remove (fib_index,
3269 &pfx, FIB_SOURCE_CLASSIFY);
3276 static clib_error_t *
3277 set_ip_classify_command_fn (vlib_main_t * vm,
3278 unformat_input_t * input,
3279 vlib_cli_command_t * cmd)
3281 u32 table_index = ~0;
3282 int table_index_set = 0;
3283 u32 sw_if_index = ~0;
3286 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3288 if (unformat (input, "table-index %d", &table_index))
3289 table_index_set = 1;
3290 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3291 vnet_get_main (), &sw_if_index))
3297 if (table_index_set == 0)
3298 return clib_error_return (0, "classify table-index must be specified");
3300 if (sw_if_index == ~0)
3301 return clib_error_return (0, "interface / subif must be specified");
3303 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3310 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3311 return clib_error_return (0, "No such interface");
3313 case VNET_API_ERROR_NO_SUCH_ENTRY:
3314 return clib_error_return (0, "No such classifier table");
3320 * Assign a classification table to an interface. The classification
3321 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3322 * commands. Once the table is create, use this command to filter packets
3326 * Example of how to assign a classification table to an interface:
3327 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3330 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3332 .path = "set ip classify",
3334 "set ip classify intfc <interface> table-index <classify-idx>",
3335 .function = set_ip_classify_command_fn,
3340 * fd.io coding-style-patch-verification: ON
3343 * eval: (c-set-style "gnu")