2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
56 * @brief IPv4 Forwarding.
58 * This file contains the source code for IPv4 forwarding.
62 ip4_forward_next_trace (vlib_main_t * vm,
63 vlib_node_runtime_t * node,
65 vlib_rx_or_tx_t which_adj_index);
68 ip4_lookup_inline (vlib_main_t * vm,
69 vlib_node_runtime_t * node,
71 int lookup_for_responses_to_locally_received_packets)
73 ip4_main_t *im = &ip4_main;
74 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
75 u32 n_left_from, n_left_to_next, *from, *to_next;
76 ip_lookup_next_t next;
77 u32 cpu_index = os_get_cpu_number ();
79 from = vlib_frame_vector_args (frame);
80 n_left_from = frame->n_vectors;
81 next = node->cached_next_index;
83 while (n_left_from > 0)
85 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
87 while (n_left_from >= 8 && n_left_to_next >= 4)
89 vlib_buffer_t *p0, *p1, *p2, *p3;
90 ip4_header_t *ip0, *ip1, *ip2, *ip3;
91 __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
92 ip_lookup_next_t next0, next1, next2, next3;
93 const load_balance_t *lb0, *lb1, *lb2, *lb3;
94 ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
95 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
96 ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
97 __attribute__ ((unused)) u32 pi0, fib_index0, lb_index0,
99 __attribute__ ((unused)) u32 pi1, fib_index1, lb_index1,
101 __attribute__ ((unused)) u32 pi2, fib_index2, lb_index2,
103 __attribute__ ((unused)) u32 pi3, fib_index3, lb_index3,
105 flow_hash_config_t flow_hash_config0, flow_hash_config1;
106 flow_hash_config_t flow_hash_config2, flow_hash_config3;
107 u32 hash_c0, hash_c1, hash_c2, hash_c3;
108 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
110 /* Prefetch next iteration. */
112 vlib_buffer_t *p4, *p5, *p6, *p7;
114 p4 = vlib_get_buffer (vm, from[4]);
115 p5 = vlib_get_buffer (vm, from[5]);
116 p6 = vlib_get_buffer (vm, from[6]);
117 p7 = vlib_get_buffer (vm, from[7]);
119 vlib_prefetch_buffer_header (p4, LOAD);
120 vlib_prefetch_buffer_header (p5, LOAD);
121 vlib_prefetch_buffer_header (p6, LOAD);
122 vlib_prefetch_buffer_header (p7, LOAD);
124 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
125 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
126 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
127 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
130 pi0 = to_next[0] = from[0];
131 pi1 = to_next[1] = from[1];
132 pi2 = to_next[2] = from[2];
133 pi3 = to_next[3] = from[3];
140 p0 = vlib_get_buffer (vm, pi0);
141 p1 = vlib_get_buffer (vm, pi1);
142 p2 = vlib_get_buffer (vm, pi2);
143 p3 = vlib_get_buffer (vm, pi3);
145 ip0 = vlib_buffer_get_current (p0);
146 ip1 = vlib_buffer_get_current (p1);
147 ip2 = vlib_buffer_get_current (p2);
148 ip3 = vlib_buffer_get_current (p3);
150 dst_addr0 = &ip0->dst_address;
151 dst_addr1 = &ip1->dst_address;
152 dst_addr2 = &ip2->dst_address;
153 dst_addr3 = &ip3->dst_address;
156 vec_elt (im->fib_index_by_sw_if_index,
157 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
159 vec_elt (im->fib_index_by_sw_if_index,
160 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
162 vec_elt (im->fib_index_by_sw_if_index,
163 vnet_buffer (p2)->sw_if_index[VLIB_RX]);
165 vec_elt (im->fib_index_by_sw_if_index,
166 vnet_buffer (p3)->sw_if_index[VLIB_RX]);
168 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
169 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
171 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
172 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
174 (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
175 (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
177 (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
178 (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
181 if (!lookup_for_responses_to_locally_received_packets)
183 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
184 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
185 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
186 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
188 leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
190 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
191 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
192 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
193 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
196 tcp0 = (void *) (ip0 + 1);
197 tcp1 = (void *) (ip1 + 1);
198 tcp2 = (void *) (ip2 + 1);
199 tcp3 = (void *) (ip3 + 1);
201 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
202 || ip0->protocol == IP_PROTOCOL_UDP);
203 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
204 || ip1->protocol == IP_PROTOCOL_UDP);
205 is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
206 || ip2->protocol == IP_PROTOCOL_UDP);
207 is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
208 || ip1->protocol == IP_PROTOCOL_UDP);
210 if (!lookup_for_responses_to_locally_received_packets)
212 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
213 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
214 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
215 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
218 if (!lookup_for_responses_to_locally_received_packets)
220 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
221 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
222 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
223 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
226 if (!lookup_for_responses_to_locally_received_packets)
228 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
229 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
230 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
231 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
234 if (lookup_for_responses_to_locally_received_packets)
236 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
237 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
238 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
239 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
243 /* Handle default route. */
246 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
249 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
252 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
255 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
256 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
257 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
258 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
259 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
262 lb0 = load_balance_get (lb_index0);
263 lb1 = load_balance_get (lb_index1);
264 lb2 = load_balance_get (lb_index2);
265 lb3 = load_balance_get (lb_index3);
267 /* Use flow hash to compute multipath adjacency. */
268 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
269 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
270 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
271 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
272 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
274 flow_hash_config0 = lb0->lb_hash_config;
275 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
276 ip4_compute_flow_hash (ip0, flow_hash_config0);
278 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
280 flow_hash_config1 = lb1->lb_hash_config;
281 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
282 ip4_compute_flow_hash (ip1, flow_hash_config1);
284 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
286 flow_hash_config2 = lb2->lb_hash_config;
287 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
288 ip4_compute_flow_hash (ip2, flow_hash_config2);
290 if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
292 flow_hash_config3 = lb3->lb_hash_config;
293 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
294 ip4_compute_flow_hash (ip3, flow_hash_config3);
297 ASSERT (lb0->lb_n_buckets > 0);
298 ASSERT (is_pow2 (lb0->lb_n_buckets));
299 ASSERT (lb1->lb_n_buckets > 0);
300 ASSERT (is_pow2 (lb1->lb_n_buckets));
301 ASSERT (lb2->lb_n_buckets > 0);
302 ASSERT (is_pow2 (lb2->lb_n_buckets));
303 ASSERT (lb3->lb_n_buckets > 0);
304 ASSERT (is_pow2 (lb3->lb_n_buckets));
306 dpo0 = load_balance_get_bucket_i (lb0,
308 (lb0->lb_n_buckets_minus_1)));
309 dpo1 = load_balance_get_bucket_i (lb1,
311 (lb1->lb_n_buckets_minus_1)));
312 dpo2 = load_balance_get_bucket_i (lb2,
314 (lb2->lb_n_buckets_minus_1)));
315 dpo3 = load_balance_get_bucket_i (lb3,
317 (lb3->lb_n_buckets_minus_1)));
319 next0 = dpo0->dpoi_next_node;
320 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
321 next1 = dpo1->dpoi_next_node;
322 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
323 next2 = dpo2->dpoi_next_node;
324 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
325 next3 = dpo3->dpoi_next_node;
326 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
328 vlib_increment_combined_counter
329 (cm, cpu_index, lb_index0, 1,
330 vlib_buffer_length_in_chain (vm, p0)
331 + sizeof (ethernet_header_t));
332 vlib_increment_combined_counter
333 (cm, cpu_index, lb_index1, 1,
334 vlib_buffer_length_in_chain (vm, p1)
335 + sizeof (ethernet_header_t));
336 vlib_increment_combined_counter
337 (cm, cpu_index, lb_index2, 1,
338 vlib_buffer_length_in_chain (vm, p2)
339 + sizeof (ethernet_header_t));
340 vlib_increment_combined_counter
341 (cm, cpu_index, lb_index3, 1,
342 vlib_buffer_length_in_chain (vm, p3)
343 + sizeof (ethernet_header_t));
345 vlib_validate_buffer_enqueue_x4 (vm, node, next,
346 to_next, n_left_to_next,
348 next0, next1, next2, next3);
351 while (n_left_from > 0 && n_left_to_next > 0)
355 __attribute__ ((unused)) tcp_header_t *tcp0;
356 ip_lookup_next_t next0;
357 const load_balance_t *lb0;
358 ip4_fib_mtrie_t *mtrie0;
359 ip4_fib_mtrie_leaf_t leaf0;
360 ip4_address_t *dst_addr0;
361 __attribute__ ((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
362 flow_hash_config_t flow_hash_config0;
363 const dpo_id_t *dpo0;
369 p0 = vlib_get_buffer (vm, pi0);
371 ip0 = vlib_buffer_get_current (p0);
373 dst_addr0 = &ip0->dst_address;
376 vec_elt (im->fib_index_by_sw_if_index,
377 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
379 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
380 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
382 if (!lookup_for_responses_to_locally_received_packets)
384 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
386 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
388 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
391 tcp0 = (void *) (ip0 + 1);
393 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
394 || ip0->protocol == IP_PROTOCOL_UDP);
396 if (!lookup_for_responses_to_locally_received_packets)
397 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
399 if (!lookup_for_responses_to_locally_received_packets)
400 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
402 if (!lookup_for_responses_to_locally_received_packets)
403 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
405 if (lookup_for_responses_to_locally_received_packets)
406 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
409 /* Handle default route. */
412 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
413 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
416 lb0 = load_balance_get (lbi0);
418 /* Use flow hash to compute multipath adjacency. */
419 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
420 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
422 flow_hash_config0 = lb0->lb_hash_config;
424 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
425 ip4_compute_flow_hash (ip0, flow_hash_config0);
428 ASSERT (lb0->lb_n_buckets > 0);
429 ASSERT (is_pow2 (lb0->lb_n_buckets));
431 dpo0 = load_balance_get_bucket_i (lb0,
433 (lb0->lb_n_buckets_minus_1)));
435 next0 = dpo0->dpoi_next_node;
436 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
438 vlib_increment_combined_counter
439 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
446 if (PREDICT_FALSE (next0 != next))
449 vlib_put_next_frame (vm, node, next, n_left_to_next);
451 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
458 vlib_put_next_frame (vm, node, next, n_left_to_next);
461 if (node->flags & VLIB_NODE_FLAG_TRACE)
462 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
464 return frame->n_vectors;
467 /** @brief IPv4 lookup node.
470 This is the main IPv4 lookup dispatch node.
472 @param vm vlib_main_t corresponding to the current thread
473 @param node vlib_node_runtime_t
474 @param frame vlib_frame_t whose contents should be dispatched
476 @par Graph mechanics: buffer metadata, next index usage
479 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
480 - Indicates the @c sw_if_index value of the interface that the
481 packet was received on.
482 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
483 - When the value is @c ~0 then the node performs a longest prefix
484 match (LPM) for the packet destination address in the FIB attached
485 to the receive interface.
486 - Otherwise perform LPM for the packet destination address in the
487 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
488 value (0, 1, ...) and not a VRF id.
491 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
492 - The lookup result adjacency index.
495 - Dispatches the packet to the node index found in
496 ip_adjacency_t @c adj->lookup_next_index
497 (where @c adj is the lookup result adjacency).
500 ip4_lookup (vlib_main_t * vm,
501 vlib_node_runtime_t * node, vlib_frame_t * frame)
503 return ip4_lookup_inline (vm, node, frame,
504 /* lookup_for_responses_to_locally_received_packets */
509 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
511 VLIB_REGISTER_NODE (ip4_lookup_node) =
513 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
514 sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
515 IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
517 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
520 ip4_load_balance (vlib_main_t * vm,
521 vlib_node_runtime_t * node, vlib_frame_t * frame)
523 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
524 u32 n_left_from, n_left_to_next, *from, *to_next;
525 ip_lookup_next_t next;
526 u32 cpu_index = os_get_cpu_number ();
528 from = vlib_frame_vector_args (frame);
529 n_left_from = frame->n_vectors;
530 next = node->cached_next_index;
532 if (node->flags & VLIB_NODE_FLAG_TRACE)
533 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
535 while (n_left_from > 0)
537 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
540 while (n_left_from >= 4 && n_left_to_next >= 2)
542 ip_lookup_next_t next0, next1;
543 const load_balance_t *lb0, *lb1;
544 vlib_buffer_t *p0, *p1;
545 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
546 const ip4_header_t *ip0, *ip1;
547 const dpo_id_t *dpo0, *dpo1;
549 /* Prefetch next iteration. */
551 vlib_buffer_t *p2, *p3;
553 p2 = vlib_get_buffer (vm, from[2]);
554 p3 = vlib_get_buffer (vm, from[3]);
556 vlib_prefetch_buffer_header (p2, STORE);
557 vlib_prefetch_buffer_header (p3, STORE);
559 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
560 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
563 pi0 = to_next[0] = from[0];
564 pi1 = to_next[1] = from[1];
571 p0 = vlib_get_buffer (vm, pi0);
572 p1 = vlib_get_buffer (vm, pi1);
574 ip0 = vlib_buffer_get_current (p0);
575 ip1 = vlib_buffer_get_current (p1);
576 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
577 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
579 lb0 = load_balance_get (lbi0);
580 lb1 = load_balance_get (lbi1);
583 * this node is for via FIBs we can re-use the hash value from the
584 * to node if present.
585 * We don't want to use the same hash value at each level in the recursion
586 * graph as that would lead to polarisation
588 hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
589 hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
591 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
593 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
595 hc0 = vnet_buffer (p0)->ip.flow_hash =
596 vnet_buffer (p0)->ip.flow_hash >> 1;
600 hc0 = vnet_buffer (p0)->ip.flow_hash =
601 ip4_compute_flow_hash (ip0, hc0);
604 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
606 if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
608 hc1 = vnet_buffer (p1)->ip.flow_hash =
609 vnet_buffer (p1)->ip.flow_hash >> 1;
613 hc1 = vnet_buffer (p1)->ip.flow_hash =
614 ip4_compute_flow_hash (ip1, hc1);
619 load_balance_get_bucket_i (lb0,
620 hc0 & (lb0->lb_n_buckets_minus_1));
622 load_balance_get_bucket_i (lb1,
623 hc1 & (lb1->lb_n_buckets_minus_1));
625 next0 = dpo0->dpoi_next_node;
626 next1 = dpo1->dpoi_next_node;
628 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
629 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
631 vlib_increment_combined_counter
632 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
633 vlib_increment_combined_counter
634 (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
636 vlib_validate_buffer_enqueue_x2 (vm, node, next,
637 to_next, n_left_to_next,
638 pi0, pi1, next0, next1);
641 while (n_left_from > 0 && n_left_to_next > 0)
643 ip_lookup_next_t next0;
644 const load_balance_t *lb0;
647 const ip4_header_t *ip0;
648 const dpo_id_t *dpo0;
657 p0 = vlib_get_buffer (vm, pi0);
659 ip0 = vlib_buffer_get_current (p0);
660 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
662 lb0 = load_balance_get (lbi0);
664 hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
665 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
667 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
669 hc0 = vnet_buffer (p0)->ip.flow_hash =
670 vnet_buffer (p0)->ip.flow_hash >> 1;
674 hc0 = vnet_buffer (p0)->ip.flow_hash =
675 ip4_compute_flow_hash (ip0, hc0);
680 load_balance_get_bucket_i (lb0,
681 hc0 & (lb0->lb_n_buckets_minus_1));
683 next0 = dpo0->dpoi_next_node;
684 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
686 vlib_increment_combined_counter
687 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
689 vlib_validate_buffer_enqueue_x1 (vm, node, next,
690 to_next, n_left_to_next,
694 vlib_put_next_frame (vm, node, next, n_left_to_next);
697 return frame->n_vectors;
700 VLIB_REGISTER_NODE (ip4_load_balance_node) =
702 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
703 sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
704 format_ip4_lookup_trace,};
706 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
708 /* get first interface address */
710 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
711 ip_interface_address_t ** result_ia)
713 ip_lookup_main_t *lm = &im->lookup_main;
714 ip_interface_address_t *ia = 0;
715 ip4_address_t *result = 0;
717 foreach_ip_interface_address (lm, ia, sw_if_index,
718 1 /* honor unnumbered */ ,
722 ip_interface_address_get_address (lm, ia);
728 *result_ia = result ? ia : 0;
733 ip4_add_interface_routes (u32 sw_if_index,
734 ip4_main_t * im, u32 fib_index,
735 ip_interface_address_t * a)
737 ip_lookup_main_t *lm = &im->lookup_main;
738 ip4_address_t *address = ip_interface_address_get_address (lm, a);
740 .fp_len = a->address_length,
741 .fp_proto = FIB_PROTOCOL_IP4,
742 .fp_addr.ip4 = *address,
745 a->neighbor_probe_adj_index = ~0;
749 fib_node_index_t fei;
751 fei = fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), FIB_PROTOCOL_IP4, NULL, /* No next-hop address */
752 sw_if_index, ~0, // invalid FIB index
753 1, NULL, // no out-label stack
754 FIB_ROUTE_PATH_FLAG_NONE);
755 a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
760 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
762 u32 classify_table_index =
763 lm->classify_table_index_by_sw_if_index[sw_if_index];
764 if (classify_table_index != (u32) ~ 0)
766 dpo_id_t dpo = DPO_INVALID;
771 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
773 fib_table_entry_special_dpo_add (fib_index,
776 FIB_ENTRY_FLAG_NONE, &dpo);
781 fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), FIB_PROTOCOL_IP4, &pfx.fp_addr, sw_if_index, ~0, // invalid FIB index
782 1, NULL, // no out-label stack
783 FIB_ROUTE_PATH_FLAG_NONE);
787 ip4_del_interface_routes (ip4_main_t * im,
789 ip4_address_t * address, u32 address_length)
792 .fp_len = address_length,
793 .fp_proto = FIB_PROTOCOL_IP4,
794 .fp_addr.ip4 = *address,
799 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
803 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
807 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
809 ip4_main_t *im = &ip4_main;
811 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
814 * enable/disable only on the 1<->0 transition
818 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
823 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
824 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
827 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
830 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
835 static clib_error_t *
836 ip4_add_del_interface_address_internal (vlib_main_t * vm,
838 ip4_address_t * address,
839 u32 address_length, u32 is_del)
841 vnet_main_t *vnm = vnet_get_main ();
842 ip4_main_t *im = &ip4_main;
843 ip_lookup_main_t *lm = &im->lookup_main;
844 clib_error_t *error = 0;
845 u32 if_address_index, elts_before;
846 ip4_address_fib_t ip4_af, *addr_fib = 0;
848 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
849 ip4_addr_fib_init (&ip4_af, address,
850 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
851 vec_add1 (addr_fib, ip4_af);
854 * there is no support for adj-fib handling in the presence of overlapping
855 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
860 /* When adding an address check that it does not conflict
861 with an existing address. */
862 ip_interface_address_t *ia;
863 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
864 0 /* honor unnumbered */ ,
868 ip_interface_address_get_address
869 (&im->lookup_main, ia);
870 if (ip4_destination_matches_route
871 (im, address, x, ia->address_length)
873 ip4_destination_matches_route (im,
879 ("failed to add %U which conflicts with %U for interface %U",
880 format_ip4_address_and_length, address,
882 format_ip4_address_and_length, x,
884 format_vnet_sw_if_index_name, vnm,
889 elts_before = pool_elts (lm->if_address_pool);
891 error = ip_interface_address_add_del
892 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
896 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
899 ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
901 ip4_add_interface_routes (sw_if_index,
902 im, ip4_af.fib_index,
904 (lm->if_address_pool, if_address_index));
906 /* If pool did not grow/shrink: add duplicate address. */
907 if (elts_before != pool_elts (lm->if_address_pool))
909 ip4_add_del_interface_address_callback_t *cb;
910 vec_foreach (cb, im->add_del_interface_address_callbacks)
911 cb->function (im, cb->function_opaque, sw_if_index,
912 address, address_length, if_address_index, is_del);
921 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
922 ip4_address_t * address, u32 address_length,
925 return ip4_add_del_interface_address_internal
926 (vm, sw_if_index, address, address_length, is_del);
929 /* Built-in ip4 unicast rx feature path definition */
931 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
933 .arc_name = "ip4-unicast",
934 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
935 .end_node = "ip4-lookup",
936 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
939 VNET_FEATURE_INIT (ip4_flow_classify, static) =
941 .arc_name = "ip4-unicast",
942 .node_name = "ip4-flow-classify",
943 .runs_before = VNET_FEATURES ("ip4-inacl"),
946 VNET_FEATURE_INIT (ip4_inacl, static) =
948 .arc_name = "ip4-unicast",
949 .node_name = "ip4-inacl",
950 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
953 VNET_FEATURE_INIT (ip4_source_check_1, static) =
955 .arc_name = "ip4-unicast",
956 .node_name = "ip4-source-check-via-rx",
957 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
960 VNET_FEATURE_INIT (ip4_source_check_2, static) =
962 .arc_name = "ip4-unicast",
963 .node_name = "ip4-source-check-via-any",
964 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
967 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
969 .arc_name = "ip4-unicast",
970 .node_name = "ip4-source-and-port-range-check-rx",
971 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
974 VNET_FEATURE_INIT (ip4_policer_classify, static) =
976 .arc_name = "ip4-unicast",
977 .node_name = "ip4-policer-classify",
978 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
981 VNET_FEATURE_INIT (ip4_ipsec, static) =
983 .arc_name = "ip4-unicast",
984 .node_name = "ipsec-input-ip4",
985 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
988 VNET_FEATURE_INIT (ip4_vpath, static) =
990 .arc_name = "ip4-unicast",
991 .node_name = "vpath-input-ip4",
992 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
995 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
997 .arc_name = "ip4-unicast",
998 .node_name = "ip4-vxlan-bypass",
999 .runs_before = VNET_FEATURES ("ip4-lookup"),
1002 VNET_FEATURE_INIT (ip4_lookup, static) =
1004 .arc_name = "ip4-unicast",
1005 .node_name = "ip4-lookup",
1006 .runs_before = VNET_FEATURES ("ip4-drop"),
1009 VNET_FEATURE_INIT (ip4_drop, static) =
1011 .arc_name = "ip4-unicast",
1012 .node_name = "ip4-drop",
1013 .runs_before = 0, /* not before any other features */
1017 /* Built-in ip4 multicast rx feature path definition */
1018 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1020 .arc_name = "ip4-multicast",
1021 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1022 .end_node = "ip4-lookup-multicast",
1023 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1026 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1028 .arc_name = "ip4-multicast",
1029 .node_name = "vpath-input-ip4",
1030 .runs_before = VNET_FEATURES ("ip4-lookup-multicast"),
1033 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1035 .arc_name = "ip4-multicast",
1036 .node_name = "ip4-lookup-multicast",
1037 .runs_before = VNET_FEATURES ("ip4-drop"),
1040 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1042 .arc_name = "ip4-multicast",
1043 .node_name = "ip4-drop",
1044 .runs_before = 0, /* last feature */
1047 /* Source and port-range check ip4 tx feature path definition */
1048 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1050 .arc_name = "ip4-output",
1051 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1052 .end_node = "interface-output",
1053 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1056 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1058 .arc_name = "ip4-output",
1059 .node_name = "ip4-source-and-port-range-check-tx",
1060 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1063 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1065 .arc_name = "ip4-output",
1066 .node_name = "ipsec-output-ip4",
1067 .runs_before = VNET_FEATURES ("interface-output"),
1070 /* Built-in ip4 tx feature path definition */
1071 VNET_FEATURE_INIT (ip4_interface_output, static) =
1073 .arc_name = "ip4-output",
1074 .node_name = "interface-output",
1075 .runs_before = 0, /* not before any other features */
1079 static clib_error_t *
1080 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1082 ip4_main_t *im = &ip4_main;
1084 /* Fill in lookup tables with default table (0). */
1085 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1087 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1090 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1093 return /* no error */ 0;
1096 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1098 /* Global IP4 main. */
1099 ip4_main_t ip4_main;
1102 ip4_lookup_init (vlib_main_t * vm)
1104 ip4_main_t *im = &ip4_main;
1105 clib_error_t *error;
1108 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1111 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1116 m = pow2_mask (i) << (32 - i);
1119 im->fib_masks[i] = clib_host_to_net_u32 (m);
1122 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1124 /* Create FIB with index 0 and table id of 0. */
1125 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1129 pn = pg_get_node (ip4_lookup_node.index);
1130 pn->unformat_edit = unformat_pg_ip4_header;
1134 ethernet_arp_header_t h;
1136 memset (&h, 0, sizeof (h));
1138 /* Set target ethernet address to all zeros. */
1139 memset (h.ip4_over_ethernet[1].ethernet, 0,
1140 sizeof (h.ip4_over_ethernet[1].ethernet));
1142 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1143 #define _8(f,v) h.f = v;
1144 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1145 _16 (l3_type, ETHERNET_TYPE_IP4);
1146 _8 (n_l2_address_bytes, 6);
1147 _8 (n_l3_address_bytes, 4);
1148 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1152 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1155 /* alloc chunk size */ 8,
1162 VLIB_INIT_FUNCTION (ip4_lookup_init);
1166 /* Adjacency taken. */
1171 /* Packet data, possibly *after* rewrite. */
1172 u8 packet_data[64 - 1 * sizeof (u32)];
1174 ip4_forward_next_trace_t;
1177 format_ip4_forward_next_trace (u8 * s, va_list * args)
1179 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1180 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1181 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1182 uword indent = format_get_indent (s);
1183 s = format (s, "%U%U",
1184 format_white_space, indent,
1185 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1190 format_ip4_lookup_trace (u8 * s, va_list * args)
1192 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1193 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1194 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1195 uword indent = format_get_indent (s);
1197 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1198 t->fib_index, t->dpo_index, t->flow_hash);
1199 s = format (s, "\n%U%U",
1200 format_white_space, indent,
1201 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1206 format_ip4_rewrite_trace (u8 * s, va_list * args)
1208 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1209 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1210 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1211 vnet_main_t *vnm = vnet_get_main ();
1212 uword indent = format_get_indent (s);
1214 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1215 t->fib_index, t->dpo_index, format_ip_adjacency,
1216 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1217 s = format (s, "\n%U%U",
1218 format_white_space, indent,
1219 format_ip_adjacency_packet_data,
1220 vnm, t->dpo_index, t->packet_data, sizeof (t->packet_data));
1224 /* Common trace function for all ip4-forward next nodes. */
1226 ip4_forward_next_trace (vlib_main_t * vm,
1227 vlib_node_runtime_t * node,
1228 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1231 ip4_main_t *im = &ip4_main;
1233 n_left = frame->n_vectors;
1234 from = vlib_frame_vector_args (frame);
1239 vlib_buffer_t *b0, *b1;
1240 ip4_forward_next_trace_t *t0, *t1;
1242 /* Prefetch next iteration. */
1243 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1244 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1249 b0 = vlib_get_buffer (vm, bi0);
1250 b1 = vlib_get_buffer (vm, bi1);
1252 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1254 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1255 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1256 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1258 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1259 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1260 vec_elt (im->fib_index_by_sw_if_index,
1261 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1263 clib_memcpy (t0->packet_data,
1264 vlib_buffer_get_current (b0),
1265 sizeof (t0->packet_data));
1267 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1269 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1270 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1271 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1273 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1274 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1275 vec_elt (im->fib_index_by_sw_if_index,
1276 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1277 clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1278 sizeof (t1->packet_data));
1288 ip4_forward_next_trace_t *t0;
1292 b0 = vlib_get_buffer (vm, bi0);
1294 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1296 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1297 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1298 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1300 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1301 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1302 vec_elt (im->fib_index_by_sw_if_index,
1303 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1304 clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1305 sizeof (t0->packet_data));
1313 ip4_drop_or_punt (vlib_main_t * vm,
1314 vlib_node_runtime_t * node,
1315 vlib_frame_t * frame, ip4_error_t error_code)
1317 u32 *buffers = vlib_frame_vector_args (frame);
1318 uword n_packets = frame->n_vectors;
1320 vlib_error_drop_buffers (vm, node, buffers,
1324 ip4_input_node.index, error_code);
1326 if (node->flags & VLIB_NODE_FLAG_TRACE)
1327 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1333 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1335 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1339 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1341 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1344 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1346 .function = ip4_drop,.name = "ip4-drop",.vector_size =
1347 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
1350 [0] = "error-drop",}
1353 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1355 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1357 .function = ip4_punt,.name = "ip4-punt",.vector_size =
1358 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
1361 [0] = "error-punt",}
1364 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1366 /* Compute TCP/UDP/ICMP4 checksum in software. */
1368 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1372 u32 ip_header_length, payload_length_host_byte_order;
1373 u32 n_this_buffer, n_bytes_left;
1375 void *data_this_buffer;
1377 /* Initialize checksum with ip header. */
1378 ip_header_length = ip4_header_bytes (ip0);
1379 payload_length_host_byte_order =
1380 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1382 clib_host_to_net_u32 (payload_length_host_byte_order +
1383 (ip0->protocol << 16));
1385 if (BITS (uword) == 32)
1388 ip_csum_with_carry (sum0,
1389 clib_mem_unaligned (&ip0->src_address, u32));
1391 ip_csum_with_carry (sum0,
1392 clib_mem_unaligned (&ip0->dst_address, u32));
1396 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1398 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1399 data_this_buffer = (void *) ip0 + ip_header_length;
1400 if (n_this_buffer + ip_header_length > p0->current_length)
1402 p0->current_length >
1403 ip_header_length ? p0->current_length - ip_header_length : 0;
1406 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1407 n_bytes_left -= n_this_buffer;
1408 if (n_bytes_left == 0)
1411 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1412 p0 = vlib_get_buffer (vm, p0->next_buffer);
1413 data_this_buffer = vlib_buffer_get_current (p0);
1414 n_this_buffer = p0->current_length;
1417 sum16 = ~ip_csum_fold (sum0);
1423 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1425 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1429 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1430 || ip0->protocol == IP_PROTOCOL_UDP);
1432 udp0 = (void *) (ip0 + 1);
1433 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1435 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1436 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1440 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1442 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1443 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1449 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1451 ip4_main_t *im = &ip4_main;
1452 ip_lookup_main_t *lm = &im->lookup_main;
1453 ip_local_next_t next_index;
1454 u32 *from, *to_next, n_left_from, n_left_to_next;
1455 vlib_node_runtime_t *error_node =
1456 vlib_node_get_runtime (vm, ip4_input_node.index);
1458 from = vlib_frame_vector_args (frame);
1459 n_left_from = frame->n_vectors;
1460 next_index = node->cached_next_index;
1462 if (node->flags & VLIB_NODE_FLAG_TRACE)
1463 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1465 while (n_left_from > 0)
1467 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1469 while (n_left_from >= 4 && n_left_to_next >= 2)
1471 vlib_buffer_t *p0, *p1;
1472 ip4_header_t *ip0, *ip1;
1473 udp_header_t *udp0, *udp1;
1474 ip4_fib_mtrie_t *mtrie0, *mtrie1;
1475 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1476 const dpo_id_t *dpo0, *dpo1;
1477 const load_balance_t *lb0, *lb1;
1478 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1479 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1480 i32 len_diff0, len_diff1;
1481 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1482 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1485 pi0 = to_next[0] = from[0];
1486 pi1 = to_next[1] = from[1];
1490 n_left_to_next -= 2;
1492 p0 = vlib_get_buffer (vm, pi0);
1493 p1 = vlib_get_buffer (vm, pi1);
1495 ip0 = vlib_buffer_get_current (p0);
1496 ip1 = vlib_buffer_get_current (p1);
1498 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1499 vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1501 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1502 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1503 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1504 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
1506 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1507 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1509 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1512 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1514 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1516 /* Treat IP frag packets as "experimental" protocol for now
1517 until support of IP frag reassembly is implemented */
1518 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1519 proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1520 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1521 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1522 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1523 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1528 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1529 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1531 udp0 = ip4_next_header (ip0);
1532 udp1 = ip4_next_header (ip1);
1534 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1535 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1536 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1539 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1541 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1543 /* Verify UDP length. */
1544 ip_len0 = clib_net_to_host_u16 (ip0->length);
1545 ip_len1 = clib_net_to_host_u16 (ip1->length);
1546 udp_len0 = clib_net_to_host_u16 (udp0->length);
1547 udp_len1 = clib_net_to_host_u16 (udp1->length);
1549 len_diff0 = ip_len0 - udp_len0;
1550 len_diff1 = ip_len1 - udp_len1;
1552 len_diff0 = is_udp0 ? len_diff0 : 0;
1553 len_diff1 = is_udp1 ? len_diff1 : 0;
1555 if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1556 & good_tcp_udp0 & good_tcp_udp1)))
1561 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1562 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1564 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1565 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1570 && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1571 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1573 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1574 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1578 good_tcp_udp0 &= len_diff0 >= 0;
1579 good_tcp_udp1 &= len_diff1 >= 0;
1582 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1584 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1586 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1588 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1589 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1591 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1592 error0 = (is_tcp_udp0 && !good_tcp_udp0
1593 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1594 error1 = (is_tcp_udp1 && !good_tcp_udp1
1595 ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1598 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1600 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1603 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1606 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1608 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1609 ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1610 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1612 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1613 ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1614 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1616 lb0 = load_balance_get (lbi0);
1617 lb1 = load_balance_get (lbi1);
1618 dpo0 = load_balance_get_bucket_i (lb0, 0);
1619 dpo1 = load_balance_get_bucket_i (lb1, 0);
1622 * Must have a route to source otherwise we drop the packet.
1623 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1626 * - the source is a recieve => it's from us => bogus, do this
1627 * first since it sets a different error code.
1628 * - uRPF check for any route to source - accept if passes.
1629 * - allow packets destined to the broadcast address from unknown sources
1631 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1632 dpo0->dpoi_type == DPO_RECEIVE) ?
1633 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1634 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1635 !fib_urpf_check_size (lb0->lb_urpf) &&
1636 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1637 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1638 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1639 dpo1->dpoi_type == DPO_RECEIVE) ?
1640 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1641 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1642 !fib_urpf_check_size (lb1->lb_urpf) &&
1643 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1644 ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1646 next0 = lm->local_next_by_ip_protocol[proto0];
1647 next1 = lm->local_next_by_ip_protocol[proto1];
1650 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1652 error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1654 p0->error = error0 ? error_node->errors[error0] : 0;
1655 p1->error = error1 ? error_node->errors[error1] : 0;
1657 enqueue_code = (next0 != next_index) + 2 * (next1 != next_index);
1659 if (PREDICT_FALSE (enqueue_code != 0))
1661 switch (enqueue_code)
1667 n_left_to_next += 1;
1668 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1674 n_left_to_next += 1;
1675 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1679 /* A B B or A B C */
1681 n_left_to_next += 2;
1682 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1683 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1686 vlib_put_next_frame (vm, node, next_index,
1689 vlib_get_next_frame (vm, node, next_index, to_next,
1697 while (n_left_from > 0 && n_left_to_next > 0)
1702 ip4_fib_mtrie_t *mtrie0;
1703 ip4_fib_mtrie_leaf_t leaf0;
1704 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1706 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1707 load_balance_t *lb0;
1708 const dpo_id_t *dpo0;
1710 pi0 = to_next[0] = from[0];
1714 n_left_to_next -= 1;
1716 p0 = vlib_get_buffer (vm, pi0);
1718 ip0 = vlib_buffer_get_current (p0);
1720 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1722 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1723 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1725 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1727 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1730 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1732 /* Treat IP frag packets as "experimental" protocol for now
1733 until support of IP frag reassembly is implemented */
1734 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1735 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1736 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1740 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1742 udp0 = ip4_next_header (ip0);
1744 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1745 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1748 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1750 /* Verify UDP length. */
1751 ip_len0 = clib_net_to_host_u16 (ip0->length);
1752 udp_len0 = clib_net_to_host_u16 (udp0->length);
1754 len_diff0 = ip_len0 - udp_len0;
1756 len_diff0 = is_udp0 ? len_diff0 : 0;
1758 if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1763 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1764 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1766 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1767 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1771 good_tcp_udp0 &= len_diff0 >= 0;
1774 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1776 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1778 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1780 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1781 error0 = (is_tcp_udp0 && !good_tcp_udp0
1782 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1785 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1788 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1790 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1791 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1793 lb0 = load_balance_get (lbi0);
1794 dpo0 = load_balance_get_bucket_i (lb0, 0);
1796 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1797 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1799 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1800 dpo0->dpoi_type == DPO_RECEIVE) ?
1801 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1802 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1803 !fib_urpf_check_size (lb0->lb_urpf) &&
1804 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1805 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1807 next0 = lm->local_next_by_ip_protocol[proto0];
1810 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1812 p0->error = error0 ? error_node->errors[error0] : 0;
1814 if (PREDICT_FALSE (next0 != next_index))
1816 n_left_to_next += 1;
1817 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1820 vlib_get_next_frame (vm, node, next_index, to_next,
1824 n_left_to_next -= 1;
1828 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1831 return frame->n_vectors;
1834 VLIB_REGISTER_NODE (ip4_local_node, static) =
1836 .function = ip4_local,.name = "ip4-local",.vector_size =
1837 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
1838 IP_LOCAL_N_NEXT,.next_nodes =
1840 [IP_LOCAL_NEXT_DROP] = "error-drop",
1841 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1842 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1843 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",}
1846 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1849 ip4_register_protocol (u32 protocol, u32 node_index)
1851 vlib_main_t *vm = vlib_get_main ();
1852 ip4_main_t *im = &ip4_main;
1853 ip_lookup_main_t *lm = &im->lookup_main;
1855 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1856 lm->local_next_by_ip_protocol[protocol] =
1857 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1860 static clib_error_t *
1861 show_ip_local_command_fn (vlib_main_t * vm,
1862 unformat_input_t * input, vlib_cli_command_t * cmd)
1864 ip4_main_t *im = &ip4_main;
1865 ip_lookup_main_t *lm = &im->lookup_main;
1868 vlib_cli_output (vm, "Protocols handled by ip4_local");
1869 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1871 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1872 vlib_cli_output (vm, "%d", i);
1880 * Display the set of protocols handled by the local IPv4 stack.
1883 * Example of how to display local protocol table:
1884 * @cliexstart{show ip local}
1885 * Protocols handled by ip4_local
1892 VLIB_CLI_COMMAND (show_ip_local, static) =
1894 .path = "show ip local",
1895 .function = show_ip_local_command_fn,
1896 .short_help = "show ip local",
1901 ip4_arp_inline (vlib_main_t * vm,
1902 vlib_node_runtime_t * node,
1903 vlib_frame_t * frame, int is_glean)
1905 vnet_main_t *vnm = vnet_get_main ();
1906 ip4_main_t *im = &ip4_main;
1907 ip_lookup_main_t *lm = &im->lookup_main;
1908 u32 *from, *to_next_drop;
1909 uword n_left_from, n_left_to_next_drop, next_index;
1910 static f64 time_last_seed_change = -1e100;
1911 static u32 hash_seeds[3];
1912 static uword hash_bitmap[256 / BITS (uword)];
1915 if (node->flags & VLIB_NODE_FLAG_TRACE)
1916 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1918 time_now = vlib_time_now (vm);
1919 if (time_now - time_last_seed_change > 1e-3)
1922 u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1923 sizeof (hash_seeds));
1924 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1925 hash_seeds[i] = r[i];
1927 /* Mark all hash keys as been no-seen before. */
1928 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1931 time_last_seed_change = time_now;
1934 from = vlib_frame_vector_args (frame);
1935 n_left_from = frame->n_vectors;
1936 next_index = node->cached_next_index;
1937 if (next_index == IP4_ARP_NEXT_DROP)
1938 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1940 while (n_left_from > 0)
1942 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1943 to_next_drop, n_left_to_next_drop);
1945 while (n_left_from > 0 && n_left_to_next_drop > 0)
1947 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1948 ip_adjacency_t *adj0;
1955 p0 = vlib_get_buffer (vm, pi0);
1957 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1958 adj0 = ip_get_adjacency (lm, adj_index0);
1959 ip0 = vlib_buffer_get_current (p0);
1965 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1966 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1971 * this is the Glean case, so we are ARPing for the
1972 * packet's destination
1974 a0 ^= ip0->dst_address.data_u32;
1978 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1982 hash_v3_finalize32 (a0, b0, c0);
1984 c0 &= BITS (hash_bitmap) - 1;
1985 c0 = c0 / BITS (uword);
1986 m0 = (uword) 1 << (c0 % BITS (uword));
1988 bm0 = hash_bitmap[c0];
1989 drop0 = (bm0 & m0) != 0;
1991 /* Mark it as seen. */
1992 hash_bitmap[c0] = bm0 | m0;
1996 to_next_drop[0] = pi0;
1998 n_left_to_next_drop -= 1;
2001 node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2002 IP4_ARP_ERROR_REQUEST_SENT];
2005 * the adj has been updated to a rewrite but the node the DPO that got
2006 * us here hasn't - yet. no big deal. we'll drop while we wait.
2008 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2015 * Can happen if the control-plane is programming tables
2016 * with traffic flowing; at least that's today's lame excuse.
2018 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
2019 (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2021 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2024 /* Send ARP request. */
2028 ethernet_arp_header_t *h0;
2029 vnet_hw_interface_t *hw_if0;
2032 vlib_packet_template_get_packet (vm,
2033 &im->ip4_arp_request_packet_template,
2036 /* Add rewrite/encap string for ARP packet. */
2037 vnet_rewrite_one_header (adj0[0], h0,
2038 sizeof (ethernet_header_t));
2040 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2042 /* Src ethernet address in ARP header. */
2043 clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2045 sizeof (h0->ip4_over_ethernet[0].ethernet));
2049 /* The interface's source address is stashed in the Glean Adj */
2050 h0->ip4_over_ethernet[0].ip4 =
2051 adj0->sub_type.glean.receive_addr.ip4;
2053 /* Copy in destination address we are requesting. This is the
2054 * glean case, so it's the packet's destination.*/
2055 h0->ip4_over_ethernet[1].ip4.data_u32 =
2056 ip0->dst_address.data_u32;
2060 /* Src IP address in ARP header. */
2061 if (ip4_src_address_for_packet (lm, sw_if_index0,
2063 ip4_over_ethernet[0].ip4))
2065 /* No source address available */
2067 node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2068 vlib_buffer_free (vm, &bi0, 1);
2072 /* Copy in destination address we are requesting from the
2074 h0->ip4_over_ethernet[1].ip4.data_u32 =
2075 adj0->sub_type.nbr.next_hop.ip4.as_u32;
2078 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2079 b0 = vlib_get_buffer (vm, bi0);
2080 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2082 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2084 vlib_set_next_frame_buffer (vm, node,
2085 adj0->rewrite_header.next_index,
2090 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2093 return frame->n_vectors;
2097 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2099 return (ip4_arp_inline (vm, node, frame, 0));
2103 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2105 return (ip4_arp_inline (vm, node, frame, 1));
2108 static char *ip4_arp_error_strings[] = {
2109 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2110 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2111 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2112 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2113 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2114 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2117 VLIB_REGISTER_NODE (ip4_arp_node) =
2119 .function = ip4_arp,.name = "ip4-arp",.vector_size =
2120 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2121 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2122 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2124 [IP4_ARP_NEXT_DROP] = "error-drop",}
2127 VLIB_REGISTER_NODE (ip4_glean_node) =
2129 .function = ip4_glean,.name = "ip4-glean",.vector_size =
2130 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2131 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2132 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2134 [IP4_ARP_NEXT_DROP] = "error-drop",}
2137 #define foreach_notrace_ip4_arp_error \
2144 arp_notrace_init (vlib_main_t * vm)
2146 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2148 /* don't trace ARP request packets */
2150 vnet_pcap_drop_trace_filter_add_del \
2151 (rt->errors[IP4_ARP_ERROR_##a], \
2153 foreach_notrace_ip4_arp_error;
2158 VLIB_INIT_FUNCTION (arp_notrace_init);
2161 /* Send an ARP request to see if given destination is reachable on given interface. */
2163 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2165 vnet_main_t *vnm = vnet_get_main ();
2166 ip4_main_t *im = &ip4_main;
2167 ethernet_arp_header_t *h;
2169 ip_interface_address_t *ia;
2170 ip_adjacency_t *adj;
2171 vnet_hw_interface_t *hi;
2172 vnet_sw_interface_t *si;
2176 si = vnet_get_sw_interface (vnm, sw_if_index);
2178 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2180 return clib_error_return (0, "%U: interface %U down",
2181 format_ip4_address, dst,
2182 format_vnet_sw_if_index_name, vnm,
2187 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2190 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2191 return clib_error_return
2192 (0, "no matching interface address for destination %U (interface %U)",
2193 format_ip4_address, dst,
2194 format_vnet_sw_if_index_name, vnm, sw_if_index);
2197 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2200 vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
2203 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2205 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2206 sizeof (h->ip4_over_ethernet[0].ethernet));
2208 h->ip4_over_ethernet[0].ip4 = src[0];
2209 h->ip4_over_ethernet[1].ip4 = dst[0];
2211 b = vlib_get_buffer (vm, bi);
2212 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2213 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2215 /* Add encapsulation string for software interface (e.g. ethernet header). */
2216 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2217 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2220 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2221 u32 *to_next = vlib_frame_vector_args (f);
2224 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2227 return /* no error */ 0;
2232 IP4_REWRITE_NEXT_DROP,
2233 IP4_REWRITE_NEXT_ICMP_ERROR,
2234 } ip4_rewrite_next_t;
2237 ip4_rewrite_inline (vlib_main_t * vm,
2238 vlib_node_runtime_t * node,
2239 vlib_frame_t * frame, int is_midchain)
2241 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2242 u32 *from = vlib_frame_vector_args (frame);
2243 u32 n_left_from, n_left_to_next, *to_next, next_index;
2244 vlib_node_runtime_t *error_node =
2245 vlib_node_get_runtime (vm, ip4_input_node.index);
2247 n_left_from = frame->n_vectors;
2248 next_index = node->cached_next_index;
2249 u32 cpu_index = os_get_cpu_number ();
2251 while (n_left_from > 0)
2253 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2255 while (n_left_from >= 4 && n_left_to_next >= 2)
2257 ip_adjacency_t *adj0, *adj1;
2258 vlib_buffer_t *p0, *p1;
2259 ip4_header_t *ip0, *ip1;
2260 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2261 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2262 u32 tx_sw_if_index0, tx_sw_if_index1;
2264 /* Prefetch next iteration. */
2266 vlib_buffer_t *p2, *p3;
2268 p2 = vlib_get_buffer (vm, from[2]);
2269 p3 = vlib_get_buffer (vm, from[3]);
2271 vlib_prefetch_buffer_header (p2, STORE);
2272 vlib_prefetch_buffer_header (p3, STORE);
2274 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2275 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2278 pi0 = to_next[0] = from[0];
2279 pi1 = to_next[1] = from[1];
2284 n_left_to_next -= 2;
2286 p0 = vlib_get_buffer (vm, pi0);
2287 p1 = vlib_get_buffer (vm, pi1);
2289 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2290 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2292 /* We should never rewrite a pkt using the MISS adjacency */
2293 ASSERT (adj_index0 && adj_index1);
2295 ip0 = vlib_buffer_get_current (p0);
2296 ip1 = vlib_buffer_get_current (p1);
2298 error0 = error1 = IP4_ERROR_NONE;
2299 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2301 /* Decrement TTL & update checksum.
2302 Works either endian, so no need for byte swap. */
2303 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2305 i32 ttl0 = ip0->ttl;
2307 /* Input node should have reject packets with ttl 0. */
2308 ASSERT (ip0->ttl > 0);
2310 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2311 checksum0 += checksum0 >= 0xffff;
2313 ip0->checksum = checksum0;
2318 * If the ttl drops below 1 when forwarding, generate
2321 if (PREDICT_FALSE (ttl0 <= 0))
2323 error0 = IP4_ERROR_TIME_EXPIRED;
2324 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2325 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2326 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2328 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2331 /* Verify checksum. */
2332 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2336 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2338 if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2340 i32 ttl1 = ip1->ttl;
2342 /* Input node should have reject packets with ttl 0. */
2343 ASSERT (ip1->ttl > 0);
2345 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2346 checksum1 += checksum1 >= 0xffff;
2348 ip1->checksum = checksum1;
2353 * If the ttl drops below 1 when forwarding, generate
2356 if (PREDICT_FALSE (ttl1 <= 0))
2358 error1 = IP4_ERROR_TIME_EXPIRED;
2359 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2360 icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2361 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2363 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2366 /* Verify checksum. */
2367 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2368 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2372 p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2375 /* Rewrite packet header and updates lengths. */
2376 adj0 = ip_get_adjacency (lm, adj_index0);
2377 adj1 = ip_get_adjacency (lm, adj_index1);
2379 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2380 rw_len0 = adj0[0].rewrite_header.data_bytes;
2381 rw_len1 = adj1[0].rewrite_header.data_bytes;
2382 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2383 vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2385 /* Check MTU of outgoing interface. */
2387 (vlib_buffer_length_in_chain (vm, p0) >
2389 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2392 (vlib_buffer_length_in_chain (vm, p1) >
2394 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2398 * We've already accounted for an ethernet_header_t elsewhere
2400 if (PREDICT_FALSE (rw_len0 > sizeof (ethernet_header_t)))
2401 vlib_increment_combined_counter
2402 (&adjacency_counters, cpu_index, adj_index0,
2403 /* packet increment */ 0,
2404 /* byte increment */ rw_len0 - sizeof (ethernet_header_t));
2406 if (PREDICT_FALSE (rw_len1 > sizeof (ethernet_header_t)))
2407 vlib_increment_combined_counter
2408 (&adjacency_counters, cpu_index, adj_index1,
2409 /* packet increment */ 0,
2410 /* byte increment */ rw_len1 - sizeof (ethernet_header_t));
2412 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2413 * to see the IP headerr */
2414 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2416 next0 = adj0[0].rewrite_header.next_index;
2417 p0->current_data -= rw_len0;
2418 p0->current_length += rw_len0;
2419 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2420 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2422 vnet_feature_arc_start (lm->output_feature_arc_index,
2423 tx_sw_if_index0, &next0, p0);
2425 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2427 next1 = adj1[0].rewrite_header.next_index;
2428 p1->current_data -= rw_len1;
2429 p1->current_length += rw_len1;
2431 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2432 vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2434 vnet_feature_arc_start (lm->output_feature_arc_index,
2435 tx_sw_if_index1, &next1, p1);
2438 /* Guess we are only writing on simple Ethernet header. */
2439 vnet_rewrite_two_headers (adj0[0], adj1[0],
2440 ip0, ip1, sizeof (ethernet_header_t));
2444 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2445 adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2448 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2449 to_next, n_left_to_next,
2450 pi0, pi1, next0, next1);
2453 while (n_left_from > 0 && n_left_to_next > 0)
2455 ip_adjacency_t *adj0;
2458 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2459 u32 tx_sw_if_index0;
2461 pi0 = to_next[0] = from[0];
2463 p0 = vlib_get_buffer (vm, pi0);
2465 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2467 /* We should never rewrite a pkt using the MISS adjacency */
2468 ASSERT (adj_index0);
2470 adj0 = ip_get_adjacency (lm, adj_index0);
2472 ip0 = vlib_buffer_get_current (p0);
2474 error0 = IP4_ERROR_NONE;
2475 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2477 /* Decrement TTL & update checksum. */
2478 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2480 i32 ttl0 = ip0->ttl;
2482 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2484 checksum0 += checksum0 >= 0xffff;
2486 ip0->checksum = checksum0;
2488 ASSERT (ip0->ttl > 0);
2494 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2496 if (PREDICT_FALSE (ttl0 <= 0))
2499 * If the ttl drops below 1 when forwarding, generate
2502 error0 = IP4_ERROR_TIME_EXPIRED;
2503 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2504 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2505 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2506 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2512 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2515 /* Guess we are only writing on simple Ethernet header. */
2516 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2518 /* Update packet buffer attributes/set output interface. */
2519 rw_len0 = adj0[0].rewrite_header.data_bytes;
2520 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2522 if (PREDICT_FALSE (rw_len0 > sizeof (ethernet_header_t)))
2523 vlib_increment_combined_counter
2524 (&adjacency_counters, cpu_index, adj_index0,
2525 /* packet increment */ 0,
2526 /* byte increment */ rw_len0 - sizeof (ethernet_header_t));
2528 /* Check MTU of outgoing interface. */
2529 error0 = (vlib_buffer_length_in_chain (vm, p0)
2530 > adj0[0].rewrite_header.max_l3_packet_bytes
2531 ? IP4_ERROR_MTU_EXCEEDED : error0);
2533 p0->error = error_node->errors[error0];
2535 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2536 * to see the IP headerr */
2537 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2539 p0->current_data -= rw_len0;
2540 p0->current_length += rw_len0;
2541 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2543 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2544 next0 = adj0[0].rewrite_header.next_index;
2548 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2551 vnet_feature_arc_start (lm->output_feature_arc_index,
2552 tx_sw_if_index0, &next0, p0);
2559 n_left_to_next -= 1;
2561 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2562 to_next, n_left_to_next,
2566 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2569 /* Need to do trace after rewrites to pick up new packet data. */
2570 if (node->flags & VLIB_NODE_FLAG_TRACE)
2571 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2573 return frame->n_vectors;
2577 /** @brief IPv4 rewrite node.
2580 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2581 header checksum, fetch the ip adjacency, check the outbound mtu,
2582 apply the adjacency rewrite, and send pkts to the adjacency
2583 rewrite header's rewrite_next_index.
2585 @param vm vlib_main_t corresponding to the current thread
2586 @param node vlib_node_runtime_t
2587 @param frame vlib_frame_t whose contents should be dispatched
2589 @par Graph mechanics: buffer metadata, next index usage
2592 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2593 - the rewrite adjacency index
2594 - <code>adj->lookup_next_index</code>
2595 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2596 the packet will be dropped.
2597 - <code>adj->rewrite_header</code>
2598 - Rewrite string length, rewrite string, next_index
2601 - <code>b->current_data, b->current_length</code>
2602 - Updated net of applying the rewrite string
2604 <em>Next Indices:</em>
2605 - <code> adj->rewrite_header.next_index </code>
2609 ip4_rewrite (vlib_main_t * vm,
2610 vlib_node_runtime_t * node, vlib_frame_t * frame)
2612 return ip4_rewrite_inline (vm, node, frame, 0);
2616 ip4_midchain (vlib_main_t * vm,
2617 vlib_node_runtime_t * node, vlib_frame_t * frame)
2619 return ip4_rewrite_inline (vm, node, frame, 1);
2623 VLIB_REGISTER_NODE (ip4_rewrite_node) =
2625 .function = ip4_rewrite,.name = "ip4-rewrite",.vector_size =
2626 sizeof (u32),.format_trace = format_ip4_rewrite_trace,.n_next_nodes =
2629 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2630 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",}
2633 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite);
2635 VLIB_REGISTER_NODE (ip4_midchain_node) =
2637 .function = ip4_midchain,.name = "ip4-midchain",.vector_size =
2638 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.sibling_of =
2641 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2643 static clib_error_t *
2644 add_del_interface_table (vlib_main_t * vm,
2645 unformat_input_t * input, vlib_cli_command_t * cmd)
2647 vnet_main_t *vnm = vnet_get_main ();
2648 clib_error_t *error = 0;
2649 u32 sw_if_index, table_id;
2653 if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2655 error = clib_error_return (0, "unknown interface `%U'",
2656 format_unformat_error, input);
2660 if (unformat (input, "%d", &table_id))
2664 error = clib_error_return (0, "expected table id `%U'",
2665 format_unformat_error, input);
2670 ip4_main_t *im = &ip4_main;
2673 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2678 // changing an interface's table has consequences for any connecteds
2679 // and adj-fibs already installed.
2681 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2682 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2690 * Place the indicated interface into the supplied IPv4 FIB table (also known
2691 * as a VRF). If the FIB table does not exist, this command creates it. To
2692 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2693 * FIB table will only be displayed if a route has been added to the table, or
2694 * an IP Address is assigned to an interface in the table (which adds a route
2697 * @note IP addresses added after setting the interface IP table end up in
2698 * the indicated FIB table. If the IP address is added prior to adding the
2699 * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2700 * but potentially counter-intuitive results occur if you provision interface
2701 * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2702 * IP table ID provisioned. It might be marginally useful to evade source RPF
2703 * drops to put an interface address into multiple FIBs.
2706 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2707 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2710 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2712 .path = "set interface ip table",
2713 .function = add_del_interface_table,
2714 .short_help = "set interface ip table <interface> <table-id>",
2720 ip4_lookup_multicast (vlib_main_t * vm,
2721 vlib_node_runtime_t * node, vlib_frame_t * frame)
2723 ip4_main_t *im = &ip4_main;
2724 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
2725 u32 n_left_from, n_left_to_next, *from, *to_next;
2726 ip_lookup_next_t next;
2727 u32 cpu_index = os_get_cpu_number ();
2729 from = vlib_frame_vector_args (frame);
2730 n_left_from = frame->n_vectors;
2731 next = node->cached_next_index;
2733 while (n_left_from > 0)
2735 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2737 while (n_left_from >= 4 && n_left_to_next >= 2)
2739 vlib_buffer_t *p0, *p1;
2740 u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2741 ip_lookup_next_t next0, next1;
2742 ip4_header_t *ip0, *ip1;
2743 u32 fib_index0, fib_index1;
2744 const dpo_id_t *dpo0, *dpo1;
2745 const load_balance_t *lb0, *lb1;
2747 /* Prefetch next iteration. */
2749 vlib_buffer_t *p2, *p3;
2751 p2 = vlib_get_buffer (vm, from[2]);
2752 p3 = vlib_get_buffer (vm, from[3]);
2754 vlib_prefetch_buffer_header (p2, LOAD);
2755 vlib_prefetch_buffer_header (p3, LOAD);
2757 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2758 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2761 pi0 = to_next[0] = from[0];
2762 pi1 = to_next[1] = from[1];
2764 p0 = vlib_get_buffer (vm, pi0);
2765 p1 = vlib_get_buffer (vm, pi1);
2767 ip0 = vlib_buffer_get_current (p0);
2768 ip1 = vlib_buffer_get_current (p1);
2771 vec_elt (im->fib_index_by_sw_if_index,
2772 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2774 vec_elt (im->fib_index_by_sw_if_index,
2775 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2777 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
2778 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
2780 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
2781 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
2783 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0),
2785 lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index1),
2788 lb0 = load_balance_get (lb_index0);
2789 lb1 = load_balance_get (lb_index1);
2791 ASSERT (lb0->lb_n_buckets > 0);
2792 ASSERT (is_pow2 (lb0->lb_n_buckets));
2793 ASSERT (lb1->lb_n_buckets > 0);
2794 ASSERT (is_pow2 (lb1->lb_n_buckets));
2796 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2797 (ip0, lb0->lb_hash_config);
2799 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2800 (ip1, lb1->lb_hash_config);
2802 dpo0 = load_balance_get_bucket_i (lb0,
2803 (vnet_buffer (p0)->ip.flow_hash &
2804 (lb0->lb_n_buckets_minus_1)));
2805 dpo1 = load_balance_get_bucket_i (lb1,
2806 (vnet_buffer (p1)->ip.flow_hash &
2807 (lb1->lb_n_buckets_minus_1)));
2809 next0 = dpo0->dpoi_next_node;
2810 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2811 next1 = dpo1->dpoi_next_node;
2812 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2814 if (1) /* $$$$$$ HACK FIXME */
2815 vlib_increment_combined_counter
2816 (cm, cpu_index, lb_index0, 1,
2817 vlib_buffer_length_in_chain (vm, p0));
2818 if (1) /* $$$$$$ HACK FIXME */
2819 vlib_increment_combined_counter
2820 (cm, cpu_index, lb_index1, 1,
2821 vlib_buffer_length_in_chain (vm, p1));
2825 n_left_to_next -= 2;
2828 wrong_next = (next0 != next) + 2 * (next1 != next);
2829 if (PREDICT_FALSE (wrong_next != 0))
2837 n_left_to_next += 1;
2838 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2844 n_left_to_next += 1;
2845 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2851 n_left_to_next += 2;
2852 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2853 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2857 vlib_put_next_frame (vm, node, next, n_left_to_next);
2859 vlib_get_next_frame (vm, node, next, to_next,
2866 while (n_left_from > 0 && n_left_to_next > 0)
2871 ip_lookup_next_t next0;
2873 const dpo_id_t *dpo0;
2874 const load_balance_t *lb0;
2879 p0 = vlib_get_buffer (vm, pi0);
2881 ip0 = vlib_buffer_get_current (p0);
2883 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2884 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2885 fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
2886 fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
2888 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0),
2891 lb0 = load_balance_get (lb_index0);
2893 ASSERT (lb0->lb_n_buckets > 0);
2894 ASSERT (is_pow2 (lb0->lb_n_buckets));
2896 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2897 (ip0, lb0->lb_hash_config);
2899 dpo0 = load_balance_get_bucket_i (lb0,
2900 (vnet_buffer (p0)->ip.flow_hash &
2901 (lb0->lb_n_buckets_minus_1)));
2903 next0 = dpo0->dpoi_next_node;
2904 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2906 if (1) /* $$$$$$ HACK FIXME */
2907 vlib_increment_combined_counter
2908 (cm, cpu_index, lb_index0, 1,
2909 vlib_buffer_length_in_chain (vm, p0));
2913 n_left_to_next -= 1;
2916 if (PREDICT_FALSE (next0 != next))
2918 n_left_to_next += 1;
2919 vlib_put_next_frame (vm, node, next, n_left_to_next);
2921 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2924 n_left_to_next -= 1;
2928 vlib_put_next_frame (vm, node, next, n_left_to_next);
2931 if (node->flags & VLIB_NODE_FLAG_TRACE)
2932 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2934 return frame->n_vectors;
2937 VLIB_REGISTER_NODE (ip4_lookup_multicast_node, static) =
2939 .function = ip4_lookup_multicast,.name =
2940 "ip4-lookup-multicast",.vector_size = sizeof (u32),.sibling_of =
2941 "ip4-lookup",.format_trace = format_ip4_lookup_trace,.n_next_nodes = 0,};
2943 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node,
2944 ip4_lookup_multicast);
2946 VLIB_REGISTER_NODE (ip4_multicast_node, static) =
2948 .function = ip4_drop,.name = "ip4-multicast",.vector_size =
2949 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
2952 [0] = "error-drop",}
2956 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2958 ip4_fib_mtrie_t *mtrie0;
2959 ip4_fib_mtrie_leaf_t leaf0;
2962 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2964 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2965 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2966 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2967 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2968 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2970 /* Handle default route. */
2971 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2973 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2975 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2978 static clib_error_t *
2979 test_lookup_command_fn (vlib_main_t * vm,
2980 unformat_input_t * input, vlib_cli_command_t * cmd)
2987 ip4_address_t ip4_base_address;
2990 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2992 if (unformat (input, "table %d", &table_id))
2994 /* Make sure the entry exists. */
2995 fib = ip4_fib_get (table_id);
2996 if ((fib) && (fib->index != table_id))
2997 return clib_error_return (0, "<fib-index> %d does not exist",
3000 else if (unformat (input, "count %f", &count))
3003 else if (unformat (input, "%U",
3004 unformat_ip4_address, &ip4_base_address))
3007 return clib_error_return (0, "unknown input `%U'",
3008 format_unformat_error, input);
3013 for (i = 0; i < n; i++)
3015 if (!ip4_lookup_validate (&ip4_base_address, table_id))
3018 ip4_base_address.as_u32 =
3019 clib_host_to_net_u32 (1 +
3020 clib_net_to_host_u32 (ip4_base_address.as_u32));
3024 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3026 vlib_cli_output (vm, "No errors in %d lookups\n", n);
3032 * Perform a lookup of an IPv4 Address (or range of addresses) in the
3033 * given FIB table to determine if there is a conflict with the
3034 * adjacency table. The fib-id can be determined by using the
3035 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3038 * @todo This command uses fib-id, other commands use table-id (not
3039 * just a name, they are different indexes). Would like to change this
3040 * to table-id for consistency.
3043 * Example of how to run the test lookup command:
3044 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3045 * No errors in 2 lookups
3049 VLIB_CLI_COMMAND (lookup_test_command, static) =
3051 .path = "test lookup",
3052 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3053 .function = test_lookup_command_fn,
3058 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3060 ip4_main_t *im4 = &ip4_main;
3062 uword *p = hash_get (im4->fib_index_by_table_id, table_id);
3065 return VNET_API_ERROR_NO_SUCH_FIB;
3067 fib = ip4_fib_get (p[0]);
3069 fib->flow_hash_config = flow_hash_config;
3073 static clib_error_t *
3074 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3075 unformat_input_t * input,
3076 vlib_cli_command_t * cmd)
3080 u32 flow_hash_config = 0;
3083 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3085 if (unformat (input, "table %d", &table_id))
3088 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3089 foreach_flow_hash_bit
3096 return clib_error_return (0, "unknown input `%U'",
3097 format_unformat_error, input);
3099 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3105 case VNET_API_ERROR_NO_SUCH_FIB:
3106 return clib_error_return (0, "no such FIB table %d", table_id);
3109 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3117 * Configure the set of IPv4 fields used by the flow hash.
3120 * Example of how to set the flow hash on a given table:
3121 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3122 * Example of display the configured flow hash:
3123 * @cliexstart{show ip fib}
3124 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3127 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3128 * [0] [@0]: dpo-drop ip6
3131 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3132 * [0] [@0]: dpo-drop ip6
3135 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3136 * [0] [@0]: dpo-drop ip6
3139 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3140 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3143 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3144 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3145 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3146 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3147 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3150 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3151 * [0] [@0]: dpo-drop ip6
3152 * 255.255.255.255/32
3154 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3155 * [0] [@0]: dpo-drop ip6
3156 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3159 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3160 * [0] [@0]: dpo-drop ip6
3163 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3164 * [0] [@0]: dpo-drop ip6
3167 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3168 * [0] [@4]: ipv4-glean: af_packet0
3171 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3172 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3175 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3176 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3179 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3180 * [0] [@4]: ipv4-glean: af_packet1
3183 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3184 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3187 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3188 * [0] [@0]: dpo-drop ip6
3191 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3192 * [0] [@0]: dpo-drop ip6
3193 * 255.255.255.255/32
3195 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3196 * [0] [@0]: dpo-drop ip6
3200 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3202 .path = "set ip flow-hash",
3204 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3205 .function = set_ip_flow_hash_command_fn,
3210 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3213 vnet_main_t *vnm = vnet_get_main ();
3214 vnet_interface_main_t *im = &vnm->interface_main;
3215 ip4_main_t *ipm = &ip4_main;
3216 ip_lookup_main_t *lm = &ipm->lookup_main;
3217 vnet_classify_main_t *cm = &vnet_classify_main;
3218 ip4_address_t *if_addr;
3220 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3221 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3223 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3224 return VNET_API_ERROR_NO_SUCH_ENTRY;
3226 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3227 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3229 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3231 if (NULL != if_addr)
3233 fib_prefix_t pfx = {
3235 .fp_proto = FIB_PROTOCOL_IP4,
3236 .fp_addr.ip4 = *if_addr,
3240 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3244 if (table_index != (u32) ~ 0)
3246 dpo_id_t dpo = DPO_INVALID;
3251 classify_dpo_create (DPO_PROTO_IP4, table_index));
3253 fib_table_entry_special_dpo_add (fib_index,
3255 FIB_SOURCE_CLASSIFY,
3256 FIB_ENTRY_FLAG_NONE, &dpo);
3261 fib_table_entry_special_remove (fib_index,
3262 &pfx, FIB_SOURCE_CLASSIFY);
3269 static clib_error_t *
3270 set_ip_classify_command_fn (vlib_main_t * vm,
3271 unformat_input_t * input,
3272 vlib_cli_command_t * cmd)
3274 u32 table_index = ~0;
3275 int table_index_set = 0;
3276 u32 sw_if_index = ~0;
3279 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3281 if (unformat (input, "table-index %d", &table_index))
3282 table_index_set = 1;
3283 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3284 vnet_get_main (), &sw_if_index))
3290 if (table_index_set == 0)
3291 return clib_error_return (0, "classify table-index must be specified");
3293 if (sw_if_index == ~0)
3294 return clib_error_return (0, "interface / subif must be specified");
3296 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3303 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3304 return clib_error_return (0, "No such interface");
3306 case VNET_API_ERROR_NO_SUCH_ENTRY:
3307 return clib_error_return (0, "No such classifier table");
3313 * Assign a classification table to an interface. The classification
3314 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3315 * commands. Once the table is create, use this command to filter packets
3319 * Example of how to assign a classification table to an interface:
3320 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3323 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3325 .path = "set ip classify",
3327 "set ip classify intfc <interface> table-index <classify-idx>",
3328 .function = set_ip_classify_command_fn,
3333 * fd.io coding-style-patch-verification: ON
3336 * eval: (c-set-style "gnu")