2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
56 * @brief IPv4 Forwarding.
58 * This file contains the source code for IPv4 forwarding.
62 ip4_forward_next_trace (vlib_main_t * vm,
63 vlib_node_runtime_t * node,
65 vlib_rx_or_tx_t which_adj_index);
68 ip4_lookup_inline (vlib_main_t * vm,
69 vlib_node_runtime_t * node,
71 int lookup_for_responses_to_locally_received_packets)
73 ip4_main_t *im = &ip4_main;
74 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
75 u32 n_left_from, n_left_to_next, *from, *to_next;
76 ip_lookup_next_t next;
77 u32 cpu_index = os_get_cpu_number ();
79 from = vlib_frame_vector_args (frame);
80 n_left_from = frame->n_vectors;
81 next = node->cached_next_index;
83 while (n_left_from > 0)
85 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
87 while (n_left_from >= 8 && n_left_to_next >= 4)
89 vlib_buffer_t *p0, *p1, *p2, *p3;
90 ip4_header_t *ip0, *ip1, *ip2, *ip3;
91 __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
92 ip_lookup_next_t next0, next1, next2, next3;
93 const load_balance_t *lb0, *lb1, *lb2, *lb3;
94 ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
95 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
96 ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
97 __attribute__ ((unused)) u32 pi0, fib_index0, lb_index0,
99 __attribute__ ((unused)) u32 pi1, fib_index1, lb_index1,
101 __attribute__ ((unused)) u32 pi2, fib_index2, lb_index2,
103 __attribute__ ((unused)) u32 pi3, fib_index3, lb_index3,
105 flow_hash_config_t flow_hash_config0, flow_hash_config1;
106 flow_hash_config_t flow_hash_config2, flow_hash_config3;
107 u32 hash_c0, hash_c1, hash_c2, hash_c3;
108 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
110 /* Prefetch next iteration. */
112 vlib_buffer_t *p4, *p5, *p6, *p7;
114 p4 = vlib_get_buffer (vm, from[4]);
115 p5 = vlib_get_buffer (vm, from[5]);
116 p6 = vlib_get_buffer (vm, from[6]);
117 p7 = vlib_get_buffer (vm, from[7]);
119 vlib_prefetch_buffer_header (p4, LOAD);
120 vlib_prefetch_buffer_header (p5, LOAD);
121 vlib_prefetch_buffer_header (p6, LOAD);
122 vlib_prefetch_buffer_header (p7, LOAD);
124 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
125 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
126 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
127 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
130 pi0 = to_next[0] = from[0];
131 pi1 = to_next[1] = from[1];
132 pi2 = to_next[2] = from[2];
133 pi3 = to_next[3] = from[3];
140 p0 = vlib_get_buffer (vm, pi0);
141 p1 = vlib_get_buffer (vm, pi1);
142 p2 = vlib_get_buffer (vm, pi2);
143 p3 = vlib_get_buffer (vm, pi3);
145 ip0 = vlib_buffer_get_current (p0);
146 ip1 = vlib_buffer_get_current (p1);
147 ip2 = vlib_buffer_get_current (p2);
148 ip3 = vlib_buffer_get_current (p3);
150 dst_addr0 = &ip0->dst_address;
151 dst_addr1 = &ip1->dst_address;
152 dst_addr2 = &ip2->dst_address;
153 dst_addr3 = &ip3->dst_address;
156 vec_elt (im->fib_index_by_sw_if_index,
157 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
159 vec_elt (im->fib_index_by_sw_if_index,
160 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
162 vec_elt (im->fib_index_by_sw_if_index,
163 vnet_buffer (p2)->sw_if_index[VLIB_RX]);
165 vec_elt (im->fib_index_by_sw_if_index,
166 vnet_buffer (p3)->sw_if_index[VLIB_RX]);
168 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
169 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
171 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
172 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
174 (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
175 (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
177 (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
178 (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
181 if (!lookup_for_responses_to_locally_received_packets)
183 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
184 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
185 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
186 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
188 leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
190 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
191 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
192 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
193 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
196 tcp0 = (void *) (ip0 + 1);
197 tcp1 = (void *) (ip1 + 1);
198 tcp2 = (void *) (ip2 + 1);
199 tcp3 = (void *) (ip3 + 1);
201 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
202 || ip0->protocol == IP_PROTOCOL_UDP);
203 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
204 || ip1->protocol == IP_PROTOCOL_UDP);
205 is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
206 || ip2->protocol == IP_PROTOCOL_UDP);
207 is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
208 || ip1->protocol == IP_PROTOCOL_UDP);
210 if (!lookup_for_responses_to_locally_received_packets)
212 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
213 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
214 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
215 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
218 if (!lookup_for_responses_to_locally_received_packets)
220 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
221 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
222 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
223 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
226 if (!lookup_for_responses_to_locally_received_packets)
228 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
229 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
230 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
231 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
234 if (lookup_for_responses_to_locally_received_packets)
236 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
237 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
238 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
239 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
243 /* Handle default route. */
246 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
249 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
252 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
255 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
256 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
257 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
258 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
259 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
262 lb0 = load_balance_get (lb_index0);
263 lb1 = load_balance_get (lb_index1);
264 lb2 = load_balance_get (lb_index2);
265 lb3 = load_balance_get (lb_index3);
267 /* Use flow hash to compute multipath adjacency. */
268 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
269 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
270 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
271 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
272 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
274 flow_hash_config0 = lb0->lb_hash_config;
275 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
276 ip4_compute_flow_hash (ip0, flow_hash_config0);
278 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
280 flow_hash_config1 = lb1->lb_hash_config;
281 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
282 ip4_compute_flow_hash (ip1, flow_hash_config1);
284 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
286 flow_hash_config2 = lb2->lb_hash_config;
287 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
288 ip4_compute_flow_hash (ip2, flow_hash_config2);
290 if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
292 flow_hash_config3 = lb3->lb_hash_config;
293 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
294 ip4_compute_flow_hash (ip3, flow_hash_config3);
297 ASSERT (lb0->lb_n_buckets > 0);
298 ASSERT (is_pow2 (lb0->lb_n_buckets));
299 ASSERT (lb1->lb_n_buckets > 0);
300 ASSERT (is_pow2 (lb1->lb_n_buckets));
301 ASSERT (lb2->lb_n_buckets > 0);
302 ASSERT (is_pow2 (lb2->lb_n_buckets));
303 ASSERT (lb3->lb_n_buckets > 0);
304 ASSERT (is_pow2 (lb3->lb_n_buckets));
306 dpo0 = load_balance_get_bucket_i (lb0,
308 (lb0->lb_n_buckets_minus_1)));
309 dpo1 = load_balance_get_bucket_i (lb1,
311 (lb1->lb_n_buckets_minus_1)));
312 dpo2 = load_balance_get_bucket_i (lb2,
314 (lb2->lb_n_buckets_minus_1)));
315 dpo3 = load_balance_get_bucket_i (lb3,
317 (lb3->lb_n_buckets_minus_1)));
319 next0 = dpo0->dpoi_next_node;
320 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
321 next1 = dpo1->dpoi_next_node;
322 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
323 next2 = dpo2->dpoi_next_node;
324 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
325 next3 = dpo3->dpoi_next_node;
326 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
328 vlib_increment_combined_counter
329 (cm, cpu_index, lb_index0, 1,
330 vlib_buffer_length_in_chain (vm, p0)
331 + sizeof (ethernet_header_t));
332 vlib_increment_combined_counter
333 (cm, cpu_index, lb_index1, 1,
334 vlib_buffer_length_in_chain (vm, p1)
335 + sizeof (ethernet_header_t));
336 vlib_increment_combined_counter
337 (cm, cpu_index, lb_index2, 1,
338 vlib_buffer_length_in_chain (vm, p2)
339 + sizeof (ethernet_header_t));
340 vlib_increment_combined_counter
341 (cm, cpu_index, lb_index3, 1,
342 vlib_buffer_length_in_chain (vm, p3)
343 + sizeof (ethernet_header_t));
345 vlib_validate_buffer_enqueue_x4 (vm, node, next,
346 to_next, n_left_to_next,
348 next0, next1, next2, next3);
351 while (n_left_from > 0 && n_left_to_next > 0)
355 __attribute__ ((unused)) tcp_header_t *tcp0;
356 ip_lookup_next_t next0;
357 const load_balance_t *lb0;
358 ip4_fib_mtrie_t *mtrie0;
359 ip4_fib_mtrie_leaf_t leaf0;
360 ip4_address_t *dst_addr0;
361 __attribute__ ((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
362 flow_hash_config_t flow_hash_config0;
363 const dpo_id_t *dpo0;
369 p0 = vlib_get_buffer (vm, pi0);
371 ip0 = vlib_buffer_get_current (p0);
373 dst_addr0 = &ip0->dst_address;
376 vec_elt (im->fib_index_by_sw_if_index,
377 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
379 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
380 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
382 if (!lookup_for_responses_to_locally_received_packets)
384 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
386 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
388 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
391 tcp0 = (void *) (ip0 + 1);
393 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
394 || ip0->protocol == IP_PROTOCOL_UDP);
396 if (!lookup_for_responses_to_locally_received_packets)
397 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
399 if (!lookup_for_responses_to_locally_received_packets)
400 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
402 if (!lookup_for_responses_to_locally_received_packets)
403 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
405 if (lookup_for_responses_to_locally_received_packets)
406 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
409 /* Handle default route. */
412 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
413 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
416 lb0 = load_balance_get (lbi0);
418 /* Use flow hash to compute multipath adjacency. */
419 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
420 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
422 flow_hash_config0 = lb0->lb_hash_config;
424 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
425 ip4_compute_flow_hash (ip0, flow_hash_config0);
428 ASSERT (lb0->lb_n_buckets > 0);
429 ASSERT (is_pow2 (lb0->lb_n_buckets));
431 dpo0 = load_balance_get_bucket_i (lb0,
433 (lb0->lb_n_buckets_minus_1)));
435 next0 = dpo0->dpoi_next_node;
436 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
438 vlib_increment_combined_counter
439 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
446 if (PREDICT_FALSE (next0 != next))
449 vlib_put_next_frame (vm, node, next, n_left_to_next);
451 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
458 vlib_put_next_frame (vm, node, next, n_left_to_next);
461 if (node->flags & VLIB_NODE_FLAG_TRACE)
462 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
464 return frame->n_vectors;
467 /** @brief IPv4 lookup node.
470 This is the main IPv4 lookup dispatch node.
472 @param vm vlib_main_t corresponding to the current thread
473 @param node vlib_node_runtime_t
474 @param frame vlib_frame_t whose contents should be dispatched
476 @par Graph mechanics: buffer metadata, next index usage
479 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
480 - Indicates the @c sw_if_index value of the interface that the
481 packet was received on.
482 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
483 - When the value is @c ~0 then the node performs a longest prefix
484 match (LPM) for the packet destination address in the FIB attached
485 to the receive interface.
486 - Otherwise perform LPM for the packet destination address in the
487 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
488 value (0, 1, ...) and not a VRF id.
491 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
492 - The lookup result adjacency index.
495 - Dispatches the packet to the node index found in
496 ip_adjacency_t @c adj->lookup_next_index
497 (where @c adj is the lookup result adjacency).
500 ip4_lookup (vlib_main_t * vm,
501 vlib_node_runtime_t * node, vlib_frame_t * frame)
503 return ip4_lookup_inline (vm, node, frame,
504 /* lookup_for_responses_to_locally_received_packets */
509 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
511 VLIB_REGISTER_NODE (ip4_lookup_node) =
513 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
514 sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
515 IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
517 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
520 ip4_load_balance (vlib_main_t * vm,
521 vlib_node_runtime_t * node, vlib_frame_t * frame)
523 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
524 u32 n_left_from, n_left_to_next, *from, *to_next;
525 ip_lookup_next_t next;
526 u32 cpu_index = os_get_cpu_number ();
528 from = vlib_frame_vector_args (frame);
529 n_left_from = frame->n_vectors;
530 next = node->cached_next_index;
532 if (node->flags & VLIB_NODE_FLAG_TRACE)
533 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
535 while (n_left_from > 0)
537 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
540 while (n_left_from >= 4 && n_left_to_next >= 2)
542 ip_lookup_next_t next0, next1;
543 const load_balance_t *lb0, *lb1;
544 vlib_buffer_t *p0, *p1;
545 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
546 const ip4_header_t *ip0, *ip1;
547 const dpo_id_t *dpo0, *dpo1;
549 /* Prefetch next iteration. */
551 vlib_buffer_t *p2, *p3;
553 p2 = vlib_get_buffer (vm, from[2]);
554 p3 = vlib_get_buffer (vm, from[3]);
556 vlib_prefetch_buffer_header (p2, STORE);
557 vlib_prefetch_buffer_header (p3, STORE);
559 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
560 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
563 pi0 = to_next[0] = from[0];
564 pi1 = to_next[1] = from[1];
571 p0 = vlib_get_buffer (vm, pi0);
572 p1 = vlib_get_buffer (vm, pi1);
574 ip0 = vlib_buffer_get_current (p0);
575 ip1 = vlib_buffer_get_current (p1);
576 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
577 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
579 lb0 = load_balance_get (lbi0);
580 lb1 = load_balance_get (lbi1);
583 * this node is for via FIBs we can re-use the hash value from the
584 * to node if present.
585 * We don't want to use the same hash value at each level in the recursion
586 * graph as that would lead to polarisation
588 hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
589 hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
591 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
593 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
595 hc0 = vnet_buffer (p0)->ip.flow_hash =
596 vnet_buffer (p0)->ip.flow_hash >> 1;
600 hc0 = vnet_buffer (p0)->ip.flow_hash =
601 ip4_compute_flow_hash (ip0, hc0);
604 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
606 if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
608 hc1 = vnet_buffer (p1)->ip.flow_hash =
609 vnet_buffer (p1)->ip.flow_hash >> 1;
613 hc1 = vnet_buffer (p1)->ip.flow_hash =
614 ip4_compute_flow_hash (ip1, hc1);
619 load_balance_get_bucket_i (lb0,
620 hc0 & (lb0->lb_n_buckets_minus_1));
622 load_balance_get_bucket_i (lb1,
623 hc1 & (lb1->lb_n_buckets_minus_1));
625 next0 = dpo0->dpoi_next_node;
626 next1 = dpo1->dpoi_next_node;
628 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
629 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
631 vlib_increment_combined_counter
632 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
633 vlib_increment_combined_counter
634 (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
636 vlib_validate_buffer_enqueue_x2 (vm, node, next,
637 to_next, n_left_to_next,
638 pi0, pi1, next0, next1);
641 while (n_left_from > 0 && n_left_to_next > 0)
643 ip_lookup_next_t next0;
644 const load_balance_t *lb0;
647 const ip4_header_t *ip0;
648 const dpo_id_t *dpo0;
657 p0 = vlib_get_buffer (vm, pi0);
659 ip0 = vlib_buffer_get_current (p0);
660 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
662 lb0 = load_balance_get (lbi0);
664 hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
665 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
667 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
669 hc0 = vnet_buffer (p0)->ip.flow_hash =
670 vnet_buffer (p0)->ip.flow_hash >> 1;
674 hc0 = vnet_buffer (p0)->ip.flow_hash =
675 ip4_compute_flow_hash (ip0, hc0);
680 load_balance_get_bucket_i (lb0,
681 hc0 & (lb0->lb_n_buckets_minus_1));
683 next0 = dpo0->dpoi_next_node;
684 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
686 vlib_increment_combined_counter
687 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
689 vlib_validate_buffer_enqueue_x1 (vm, node, next,
690 to_next, n_left_to_next,
694 vlib_put_next_frame (vm, node, next, n_left_to_next);
697 return frame->n_vectors;
700 VLIB_REGISTER_NODE (ip4_load_balance_node) =
702 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
703 sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
704 format_ip4_lookup_trace,};
706 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
708 /* get first interface address */
710 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
711 ip_interface_address_t ** result_ia)
713 ip_lookup_main_t *lm = &im->lookup_main;
714 ip_interface_address_t *ia = 0;
715 ip4_address_t *result = 0;
717 foreach_ip_interface_address (lm, ia, sw_if_index,
718 1 /* honor unnumbered */ ,
722 ip_interface_address_get_address (lm, ia);
728 *result_ia = result ? ia : 0;
733 ip4_add_interface_routes (u32 sw_if_index,
734 ip4_main_t * im, u32 fib_index,
735 ip_interface_address_t * a)
737 ip_lookup_main_t *lm = &im->lookup_main;
738 ip4_address_t *address = ip_interface_address_get_address (lm, a);
740 .fp_len = a->address_length,
741 .fp_proto = FIB_PROTOCOL_IP4,
742 .fp_addr.ip4 = *address,
745 a->neighbor_probe_adj_index = ~0;
749 fib_node_index_t fei;
751 fei = fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), FIB_PROTOCOL_IP4, NULL, /* No next-hop address */
752 sw_if_index, ~0, // invalid FIB index
753 1, NULL, // no out-label stack
754 FIB_ROUTE_PATH_FLAG_NONE);
755 a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
760 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
762 u32 classify_table_index =
763 lm->classify_table_index_by_sw_if_index[sw_if_index];
764 if (classify_table_index != (u32) ~ 0)
766 dpo_id_t dpo = DPO_INVALID;
771 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
773 fib_table_entry_special_dpo_add (fib_index,
776 FIB_ENTRY_FLAG_NONE, &dpo);
781 fib_table_entry_update_one_path (fib_index, &pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), FIB_PROTOCOL_IP4, &pfx.fp_addr, sw_if_index, ~0, // invalid FIB index
782 1, NULL, // no out-label stack
783 FIB_ROUTE_PATH_FLAG_NONE);
787 ip4_del_interface_routes (ip4_main_t * im,
789 ip4_address_t * address, u32 address_length)
792 .fp_len = address_length,
793 .fp_proto = FIB_PROTOCOL_IP4,
794 .fp_addr.ip4 = *address,
799 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
803 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
807 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
809 ip4_main_t *im = &ip4_main;
811 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
814 * enable/disable only on the 1<->0 transition
818 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
823 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
824 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
827 vnet_feature_enable_disable ("ip4-unicast", "ip4-lookup", sw_if_index,
830 vnet_feature_enable_disable ("ip4-multicast", "ip4-lookup-multicast",
831 sw_if_index, is_enable, 0, 0);
835 static clib_error_t *
836 ip4_add_del_interface_address_internal (vlib_main_t * vm,
838 ip4_address_t * address,
839 u32 address_length, u32 is_del)
841 vnet_main_t *vnm = vnet_get_main ();
842 ip4_main_t *im = &ip4_main;
843 ip_lookup_main_t *lm = &im->lookup_main;
844 clib_error_t *error = 0;
845 u32 if_address_index, elts_before;
846 ip4_address_fib_t ip4_af, *addr_fib = 0;
848 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
849 ip4_addr_fib_init (&ip4_af, address,
850 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
851 vec_add1 (addr_fib, ip4_af);
854 * there is no support for adj-fib handling in the presence of overlapping
855 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
860 /* When adding an address check that it does not conflict
861 with an existing address. */
862 ip_interface_address_t *ia;
863 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
864 0 /* honor unnumbered */ ,
868 ip_interface_address_get_address
869 (&im->lookup_main, ia);
870 if (ip4_destination_matches_route
871 (im, address, x, ia->address_length)
873 ip4_destination_matches_route (im,
879 ("failed to add %U which conflicts with %U for interface %U",
880 format_ip4_address_and_length, address,
882 format_ip4_address_and_length, x,
884 format_vnet_sw_if_index_name, vnm,
889 elts_before = pool_elts (lm->if_address_pool);
891 error = ip_interface_address_add_del
892 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
896 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
899 ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
901 ip4_add_interface_routes (sw_if_index,
902 im, ip4_af.fib_index,
904 (lm->if_address_pool, if_address_index));
906 /* If pool did not grow/shrink: add duplicate address. */
907 if (elts_before != pool_elts (lm->if_address_pool))
909 ip4_add_del_interface_address_callback_t *cb;
910 vec_foreach (cb, im->add_del_interface_address_callbacks)
911 cb->function (im, cb->function_opaque, sw_if_index,
912 address, address_length, if_address_index, is_del);
921 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
922 ip4_address_t * address, u32 address_length,
925 return ip4_add_del_interface_address_internal
926 (vm, sw_if_index, address, address_length, is_del);
929 /* Built-in ip4 unicast rx feature path definition */
931 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
933 .arc_name = "ip4-unicast",
935 VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
937 &ip4_main.lookup_main.ucast_feature_arc_index,};
939 VNET_FEATURE_INIT (ip4_flow_classify, static) =
941 .arc_name = "ip4-unicast",
942 .node_name = "ip4-flow-classify",
943 .runs_before = VNET_FEATURES ("ip4-inacl"),
946 VNET_FEATURE_INIT (ip4_inacl, static) =
948 .arc_name = "ip4-unicast",
949 .node_name = "ip4-inacl",
950 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
953 VNET_FEATURE_INIT (ip4_source_check_1, static) =
955 .arc_name = "ip4-unicast",
956 .node_name = "ip4-source-check-via-rx",
957 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
960 VNET_FEATURE_INIT (ip4_source_check_2, static) =
962 .arc_name = "ip4-unicast",
963 .node_name = "ip4-source-check-via-any",
964 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
967 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
969 .arc_name = "ip4-unicast",
970 .node_name = "ip4-source-and-port-range-check-rx",
971 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
974 VNET_FEATURE_INIT (ip4_policer_classify, static) =
976 .arc_name = "ip4-unicast",
977 .node_name = "ip4-policer-classify",
978 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
981 VNET_FEATURE_INIT (ip4_ipsec, static) =
983 .arc_name = "ip4-unicast",
984 .node_name = "ipsec-input-ip4",
985 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
988 VNET_FEATURE_INIT (ip4_vpath, static) =
990 .arc_name = "ip4-unicast",
991 .node_name = "vpath-input-ip4",
992 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
995 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
997 .arc_name = "ip4-unicast",
998 .node_name = "ip4-vxlan-bypass",
999 .runs_before = VNET_FEATURES ("ip4-lookup"),
1002 VNET_FEATURE_INIT (ip4_lookup, static) =
1004 .arc_name = "ip4-unicast",
1005 .node_name = "ip4-lookup",
1006 .runs_before = VNET_FEATURES ("ip4-drop"),
1009 VNET_FEATURE_INIT (ip4_drop, static) =
1011 .arc_name = "ip4-unicast",
1012 .node_name = "ip4-drop",
1013 .runs_before = 0, /* not before any other features */
1017 /* Built-in ip4 multicast rx feature path definition */
1018 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1020 .arc_name = "ip4-multicast",
1021 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1022 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1025 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1027 .arc_name = "ip4-multicast",
1028 .node_name = "vpath-input-ip4",
1029 .runs_before = VNET_FEATURES ("ip4-lookup-multicast"),
1032 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1034 .arc_name = "ip4-multicast",
1035 .node_name = "ip4-lookup-multicast",
1036 .runs_before = VNET_FEATURES ("ip4-drop"),
1039 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1041 .arc_name = "ip4-multicast",
1042 .node_name = "ip4-drop",
1043 .runs_before = 0, /* last feature */
1046 /* Source and port-range check ip4 tx feature path definition */
1047 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1049 .arc_name = "ip4-output",
1050 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1051 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1054 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1056 .arc_name = "ip4-output",
1057 .node_name = "ip4-source-and-port-range-check-tx",
1058 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1061 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1063 .arc_name = "ip4-output",
1064 .node_name = "ipsec-output-ip4",
1065 .runs_before = VNET_FEATURES ("interface-output"),
1068 /* Built-in ip4 tx feature path definition */
1069 VNET_FEATURE_INIT (ip4_interface_output, static) =
1071 .arc_name = "ip4-output",
1072 .node_name = "interface-output",
1073 .runs_before = 0, /* not before any other features */
1077 static clib_error_t *
1078 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1080 ip4_main_t *im = &ip4_main;
1082 /* Fill in lookup tables with default table (0). */
1083 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1085 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1088 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1091 vnet_feature_enable_disable ("ip4-output", "interface-output", sw_if_index,
1094 return /* no error */ 0;
1097 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1099 /* Global IP4 main. */
1100 ip4_main_t ip4_main;
1103 ip4_lookup_init (vlib_main_t * vm)
1105 ip4_main_t *im = &ip4_main;
1106 clib_error_t *error;
1109 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1112 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1117 m = pow2_mask (i) << (32 - i);
1120 im->fib_masks[i] = clib_host_to_net_u32 (m);
1123 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1125 /* Create FIB with index 0 and table id of 0. */
1126 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1130 pn = pg_get_node (ip4_lookup_node.index);
1131 pn->unformat_edit = unformat_pg_ip4_header;
1135 ethernet_arp_header_t h;
1137 memset (&h, 0, sizeof (h));
1139 /* Set target ethernet address to all zeros. */
1140 memset (h.ip4_over_ethernet[1].ethernet, 0,
1141 sizeof (h.ip4_over_ethernet[1].ethernet));
1143 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1144 #define _8(f,v) h.f = v;
1145 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1146 _16 (l3_type, ETHERNET_TYPE_IP4);
1147 _8 (n_l2_address_bytes, 6);
1148 _8 (n_l3_address_bytes, 4);
1149 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1153 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1156 /* alloc chunk size */ 8,
1163 VLIB_INIT_FUNCTION (ip4_lookup_init);
1167 /* Adjacency taken. */
1172 /* Packet data, possibly *after* rewrite. */
1173 u8 packet_data[64 - 1 * sizeof (u32)];
1175 ip4_forward_next_trace_t;
1178 format_ip4_forward_next_trace (u8 * s, va_list * args)
1180 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1181 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1182 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1183 uword indent = format_get_indent (s);
1184 s = format (s, "%U%U",
1185 format_white_space, indent,
1186 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1191 format_ip4_lookup_trace (u8 * s, va_list * args)
1193 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1194 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1195 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1196 uword indent = format_get_indent (s);
1198 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1199 t->fib_index, t->dpo_index, t->flow_hash);
1200 s = format (s, "\n%U%U",
1201 format_white_space, indent,
1202 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1207 format_ip4_rewrite_trace (u8 * s, va_list * args)
1209 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1210 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1211 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1212 vnet_main_t *vnm = vnet_get_main ();
1213 uword indent = format_get_indent (s);
1215 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1216 t->fib_index, t->dpo_index, format_ip_adjacency,
1217 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1218 s = format (s, "\n%U%U",
1219 format_white_space, indent,
1220 format_ip_adjacency_packet_data,
1221 vnm, t->dpo_index, t->packet_data, sizeof (t->packet_data));
1225 /* Common trace function for all ip4-forward next nodes. */
1227 ip4_forward_next_trace (vlib_main_t * vm,
1228 vlib_node_runtime_t * node,
1229 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1232 ip4_main_t *im = &ip4_main;
1234 n_left = frame->n_vectors;
1235 from = vlib_frame_vector_args (frame);
1240 vlib_buffer_t *b0, *b1;
1241 ip4_forward_next_trace_t *t0, *t1;
1243 /* Prefetch next iteration. */
1244 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1245 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1250 b0 = vlib_get_buffer (vm, bi0);
1251 b1 = vlib_get_buffer (vm, bi1);
1253 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1255 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1256 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1257 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1259 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1260 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1261 vec_elt (im->fib_index_by_sw_if_index,
1262 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1264 clib_memcpy (t0->packet_data,
1265 vlib_buffer_get_current (b0),
1266 sizeof (t0->packet_data));
1268 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1270 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1271 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1272 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1274 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1275 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1276 vec_elt (im->fib_index_by_sw_if_index,
1277 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1278 clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1279 sizeof (t1->packet_data));
1289 ip4_forward_next_trace_t *t0;
1293 b0 = vlib_get_buffer (vm, bi0);
1295 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1297 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1298 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1299 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1301 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1302 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1303 vec_elt (im->fib_index_by_sw_if_index,
1304 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1305 clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1306 sizeof (t0->packet_data));
1314 ip4_drop_or_punt (vlib_main_t * vm,
1315 vlib_node_runtime_t * node,
1316 vlib_frame_t * frame, ip4_error_t error_code)
1318 u32 *buffers = vlib_frame_vector_args (frame);
1319 uword n_packets = frame->n_vectors;
1321 vlib_error_drop_buffers (vm, node, buffers,
1325 ip4_input_node.index, error_code);
1327 if (node->flags & VLIB_NODE_FLAG_TRACE)
1328 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1334 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1336 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1340 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1342 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1345 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1347 .function = ip4_drop,.name = "ip4-drop",.vector_size =
1348 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
1351 [0] = "error-drop",}
1354 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1356 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1358 .function = ip4_punt,.name = "ip4-punt",.vector_size =
1359 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
1362 [0] = "error-punt",}
1365 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1367 /* Compute TCP/UDP/ICMP4 checksum in software. */
1369 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1373 u32 ip_header_length, payload_length_host_byte_order;
1374 u32 n_this_buffer, n_bytes_left;
1376 void *data_this_buffer;
1378 /* Initialize checksum with ip header. */
1379 ip_header_length = ip4_header_bytes (ip0);
1380 payload_length_host_byte_order =
1381 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1383 clib_host_to_net_u32 (payload_length_host_byte_order +
1384 (ip0->protocol << 16));
1386 if (BITS (uword) == 32)
1389 ip_csum_with_carry (sum0,
1390 clib_mem_unaligned (&ip0->src_address, u32));
1392 ip_csum_with_carry (sum0,
1393 clib_mem_unaligned (&ip0->dst_address, u32));
1397 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1399 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1400 data_this_buffer = (void *) ip0 + ip_header_length;
1401 if (n_this_buffer + ip_header_length > p0->current_length)
1403 p0->current_length >
1404 ip_header_length ? p0->current_length - ip_header_length : 0;
1407 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1408 n_bytes_left -= n_this_buffer;
1409 if (n_bytes_left == 0)
1412 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1413 p0 = vlib_get_buffer (vm, p0->next_buffer);
1414 data_this_buffer = vlib_buffer_get_current (p0);
1415 n_this_buffer = p0->current_length;
1418 sum16 = ~ip_csum_fold (sum0);
1424 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1426 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1430 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1431 || ip0->protocol == IP_PROTOCOL_UDP);
1433 udp0 = (void *) (ip0 + 1);
1434 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1436 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1437 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1441 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1443 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1444 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1450 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1452 ip4_main_t *im = &ip4_main;
1453 ip_lookup_main_t *lm = &im->lookup_main;
1454 ip_local_next_t next_index;
1455 u32 *from, *to_next, n_left_from, n_left_to_next;
1456 vlib_node_runtime_t *error_node =
1457 vlib_node_get_runtime (vm, ip4_input_node.index);
1459 from = vlib_frame_vector_args (frame);
1460 n_left_from = frame->n_vectors;
1461 next_index = node->cached_next_index;
1463 if (node->flags & VLIB_NODE_FLAG_TRACE)
1464 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1466 while (n_left_from > 0)
1468 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1470 while (n_left_from >= 4 && n_left_to_next >= 2)
1472 vlib_buffer_t *p0, *p1;
1473 ip4_header_t *ip0, *ip1;
1474 udp_header_t *udp0, *udp1;
1475 ip4_fib_mtrie_t *mtrie0, *mtrie1;
1476 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1477 const dpo_id_t *dpo0, *dpo1;
1478 const load_balance_t *lb0, *lb1;
1479 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1480 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1481 i32 len_diff0, len_diff1;
1482 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1483 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1486 pi0 = to_next[0] = from[0];
1487 pi1 = to_next[1] = from[1];
1491 n_left_to_next -= 2;
1493 p0 = vlib_get_buffer (vm, pi0);
1494 p1 = vlib_get_buffer (vm, pi1);
1496 ip0 = vlib_buffer_get_current (p0);
1497 ip1 = vlib_buffer_get_current (p1);
1499 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1500 vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1502 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1503 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1504 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1505 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
1507 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1508 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1510 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1513 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1515 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1517 /* Treat IP frag packets as "experimental" protocol for now
1518 until support of IP frag reassembly is implemented */
1519 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1520 proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1521 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1522 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1523 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1524 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1529 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1530 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1532 udp0 = ip4_next_header (ip0);
1533 udp1 = ip4_next_header (ip1);
1535 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1536 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1537 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1540 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1542 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1544 /* Verify UDP length. */
1545 ip_len0 = clib_net_to_host_u16 (ip0->length);
1546 ip_len1 = clib_net_to_host_u16 (ip1->length);
1547 udp_len0 = clib_net_to_host_u16 (udp0->length);
1548 udp_len1 = clib_net_to_host_u16 (udp1->length);
1550 len_diff0 = ip_len0 - udp_len0;
1551 len_diff1 = ip_len1 - udp_len1;
1553 len_diff0 = is_udp0 ? len_diff0 : 0;
1554 len_diff1 = is_udp1 ? len_diff1 : 0;
1556 if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1557 & good_tcp_udp0 & good_tcp_udp1)))
1562 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1563 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1565 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1566 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1571 && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1572 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1574 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1575 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1579 good_tcp_udp0 &= len_diff0 >= 0;
1580 good_tcp_udp1 &= len_diff1 >= 0;
1583 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1585 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1587 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1589 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1590 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1592 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1593 error0 = (is_tcp_udp0 && !good_tcp_udp0
1594 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1595 error1 = (is_tcp_udp1 && !good_tcp_udp1
1596 ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1599 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1601 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1604 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1607 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1609 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1610 ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1611 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1613 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1614 ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1615 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1617 lb0 = load_balance_get (lbi0);
1618 lb1 = load_balance_get (lbi1);
1619 dpo0 = load_balance_get_bucket_i (lb0, 0);
1620 dpo1 = load_balance_get_bucket_i (lb1, 0);
1623 * Must have a route to source otherwise we drop the packet.
1624 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1627 * - the source is a recieve => it's from us => bogus, do this
1628 * first since it sets a different error code.
1629 * - uRPF check for any route to source - accept if passes.
1630 * - allow packets destined to the broadcast address from unknown sources
1632 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1633 dpo0->dpoi_type == DPO_RECEIVE) ?
1634 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1635 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1636 !fib_urpf_check_size (lb0->lb_urpf) &&
1637 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1638 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1639 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1640 dpo1->dpoi_type == DPO_RECEIVE) ?
1641 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1642 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1643 !fib_urpf_check_size (lb1->lb_urpf) &&
1644 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1645 ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1647 next0 = lm->local_next_by_ip_protocol[proto0];
1648 next1 = lm->local_next_by_ip_protocol[proto1];
1651 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1653 error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1655 p0->error = error0 ? error_node->errors[error0] : 0;
1656 p1->error = error1 ? error_node->errors[error1] : 0;
1658 enqueue_code = (next0 != next_index) + 2 * (next1 != next_index);
1660 if (PREDICT_FALSE (enqueue_code != 0))
1662 switch (enqueue_code)
1668 n_left_to_next += 1;
1669 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1675 n_left_to_next += 1;
1676 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1680 /* A B B or A B C */
1682 n_left_to_next += 2;
1683 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1684 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1687 vlib_put_next_frame (vm, node, next_index,
1690 vlib_get_next_frame (vm, node, next_index, to_next,
1698 while (n_left_from > 0 && n_left_to_next > 0)
1703 ip4_fib_mtrie_t *mtrie0;
1704 ip4_fib_mtrie_leaf_t leaf0;
1705 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1707 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1708 load_balance_t *lb0;
1709 const dpo_id_t *dpo0;
1711 pi0 = to_next[0] = from[0];
1715 n_left_to_next -= 1;
1717 p0 = vlib_get_buffer (vm, pi0);
1719 ip0 = vlib_buffer_get_current (p0);
1721 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1723 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1724 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1726 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1728 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1731 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1733 /* Treat IP frag packets as "experimental" protocol for now
1734 until support of IP frag reassembly is implemented */
1735 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1736 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1737 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1741 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1743 udp0 = ip4_next_header (ip0);
1745 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1746 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1749 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1751 /* Verify UDP length. */
1752 ip_len0 = clib_net_to_host_u16 (ip0->length);
1753 udp_len0 = clib_net_to_host_u16 (udp0->length);
1755 len_diff0 = ip_len0 - udp_len0;
1757 len_diff0 = is_udp0 ? len_diff0 : 0;
1759 if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1764 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1765 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1767 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1768 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1772 good_tcp_udp0 &= len_diff0 >= 0;
1775 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1777 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1779 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1781 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1782 error0 = (is_tcp_udp0 && !good_tcp_udp0
1783 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1786 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1789 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1791 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1792 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1794 lb0 = load_balance_get (lbi0);
1795 dpo0 = load_balance_get_bucket_i (lb0, 0);
1797 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1798 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1800 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1801 dpo0->dpoi_type == DPO_RECEIVE) ?
1802 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1803 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1804 !fib_urpf_check_size (lb0->lb_urpf) &&
1805 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1806 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1808 next0 = lm->local_next_by_ip_protocol[proto0];
1811 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1813 p0->error = error0 ? error_node->errors[error0] : 0;
1815 if (PREDICT_FALSE (next0 != next_index))
1817 n_left_to_next += 1;
1818 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1821 vlib_get_next_frame (vm, node, next_index, to_next,
1825 n_left_to_next -= 1;
1829 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1832 return frame->n_vectors;
1835 VLIB_REGISTER_NODE (ip4_local_node, static) =
1837 .function = ip4_local,.name = "ip4-local",.vector_size =
1838 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
1839 IP_LOCAL_N_NEXT,.next_nodes =
1841 [IP_LOCAL_NEXT_DROP] = "error-drop",
1842 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1843 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1844 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",}
1847 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1850 ip4_register_protocol (u32 protocol, u32 node_index)
1852 vlib_main_t *vm = vlib_get_main ();
1853 ip4_main_t *im = &ip4_main;
1854 ip_lookup_main_t *lm = &im->lookup_main;
1856 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1857 lm->local_next_by_ip_protocol[protocol] =
1858 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1861 static clib_error_t *
1862 show_ip_local_command_fn (vlib_main_t * vm,
1863 unformat_input_t * input, vlib_cli_command_t * cmd)
1865 ip4_main_t *im = &ip4_main;
1866 ip_lookup_main_t *lm = &im->lookup_main;
1869 vlib_cli_output (vm, "Protocols handled by ip4_local");
1870 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1872 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1873 vlib_cli_output (vm, "%d", i);
1881 * Display the set of protocols handled by the local IPv4 stack.
1884 * Example of how to display local protocol table:
1885 * @cliexstart{show ip local}
1886 * Protocols handled by ip4_local
1893 VLIB_CLI_COMMAND (show_ip_local, static) =
1895 .path = "show ip local",
1896 .function = show_ip_local_command_fn,
1897 .short_help = "show ip local",
1902 ip4_arp_inline (vlib_main_t * vm,
1903 vlib_node_runtime_t * node,
1904 vlib_frame_t * frame, int is_glean)
1906 vnet_main_t *vnm = vnet_get_main ();
1907 ip4_main_t *im = &ip4_main;
1908 ip_lookup_main_t *lm = &im->lookup_main;
1909 u32 *from, *to_next_drop;
1910 uword n_left_from, n_left_to_next_drop, next_index;
1911 static f64 time_last_seed_change = -1e100;
1912 static u32 hash_seeds[3];
1913 static uword hash_bitmap[256 / BITS (uword)];
1916 if (node->flags & VLIB_NODE_FLAG_TRACE)
1917 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1919 time_now = vlib_time_now (vm);
1920 if (time_now - time_last_seed_change > 1e-3)
1923 u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1924 sizeof (hash_seeds));
1925 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1926 hash_seeds[i] = r[i];
1928 /* Mark all hash keys as been no-seen before. */
1929 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1932 time_last_seed_change = time_now;
1935 from = vlib_frame_vector_args (frame);
1936 n_left_from = frame->n_vectors;
1937 next_index = node->cached_next_index;
1938 if (next_index == IP4_ARP_NEXT_DROP)
1939 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1941 while (n_left_from > 0)
1943 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1944 to_next_drop, n_left_to_next_drop);
1946 while (n_left_from > 0 && n_left_to_next_drop > 0)
1948 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1949 ip_adjacency_t *adj0;
1956 p0 = vlib_get_buffer (vm, pi0);
1958 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1959 adj0 = ip_get_adjacency (lm, adj_index0);
1960 ip0 = vlib_buffer_get_current (p0);
1966 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1967 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1972 * this is the Glean case, so we are ARPing for the
1973 * packet's destination
1975 a0 ^= ip0->dst_address.data_u32;
1979 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1983 hash_v3_finalize32 (a0, b0, c0);
1985 c0 &= BITS (hash_bitmap) - 1;
1986 c0 = c0 / BITS (uword);
1987 m0 = (uword) 1 << (c0 % BITS (uword));
1989 bm0 = hash_bitmap[c0];
1990 drop0 = (bm0 & m0) != 0;
1992 /* Mark it as seen. */
1993 hash_bitmap[c0] = bm0 | m0;
1997 to_next_drop[0] = pi0;
1999 n_left_to_next_drop -= 1;
2002 node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2003 IP4_ARP_ERROR_REQUEST_SENT];
2006 * the adj has been updated to a rewrite but the node the DPO that got
2007 * us here hasn't - yet. no big deal. we'll drop while we wait.
2009 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2016 * Can happen if the control-plane is programming tables
2017 * with traffic flowing; at least that's today's lame excuse.
2019 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
2020 (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2022 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2025 /* Send ARP request. */
2029 ethernet_arp_header_t *h0;
2030 vnet_hw_interface_t *hw_if0;
2033 vlib_packet_template_get_packet (vm,
2034 &im->ip4_arp_request_packet_template,
2037 /* Add rewrite/encap string for ARP packet. */
2038 vnet_rewrite_one_header (adj0[0], h0,
2039 sizeof (ethernet_header_t));
2041 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2043 /* Src ethernet address in ARP header. */
2044 clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2046 sizeof (h0->ip4_over_ethernet[0].ethernet));
2050 /* The interface's source address is stashed in the Glean Adj */
2051 h0->ip4_over_ethernet[0].ip4 =
2052 adj0->sub_type.glean.receive_addr.ip4;
2054 /* Copy in destination address we are requesting. This is the
2055 * glean case, so it's the packet's destination.*/
2056 h0->ip4_over_ethernet[1].ip4.data_u32 =
2057 ip0->dst_address.data_u32;
2061 /* Src IP address in ARP header. */
2062 if (ip4_src_address_for_packet (lm, sw_if_index0,
2064 ip4_over_ethernet[0].ip4))
2066 /* No source address available */
2068 node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2069 vlib_buffer_free (vm, &bi0, 1);
2073 /* Copy in destination address we are requesting from the
2075 h0->ip4_over_ethernet[1].ip4.data_u32 =
2076 adj0->sub_type.nbr.next_hop.ip4.as_u32;
2079 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2080 b0 = vlib_get_buffer (vm, bi0);
2081 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2083 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2085 vlib_set_next_frame_buffer (vm, node,
2086 adj0->rewrite_header.next_index,
2091 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2094 return frame->n_vectors;
2098 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2100 return (ip4_arp_inline (vm, node, frame, 0));
2104 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2106 return (ip4_arp_inline (vm, node, frame, 1));
2109 static char *ip4_arp_error_strings[] = {
2110 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2111 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2112 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2113 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2114 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2115 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2118 VLIB_REGISTER_NODE (ip4_arp_node) =
2120 .function = ip4_arp,.name = "ip4-arp",.vector_size =
2121 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2122 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2123 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2125 [IP4_ARP_NEXT_DROP] = "error-drop",}
2128 VLIB_REGISTER_NODE (ip4_glean_node) =
2130 .function = ip4_glean,.name = "ip4-glean",.vector_size =
2131 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2132 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2133 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2135 [IP4_ARP_NEXT_DROP] = "error-drop",}
2138 #define foreach_notrace_ip4_arp_error \
2145 arp_notrace_init (vlib_main_t * vm)
2147 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2149 /* don't trace ARP request packets */
2151 vnet_pcap_drop_trace_filter_add_del \
2152 (rt->errors[IP4_ARP_ERROR_##a], \
2154 foreach_notrace_ip4_arp_error;
2159 VLIB_INIT_FUNCTION (arp_notrace_init);
2162 /* Send an ARP request to see if given destination is reachable on given interface. */
2164 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2166 vnet_main_t *vnm = vnet_get_main ();
2167 ip4_main_t *im = &ip4_main;
2168 ethernet_arp_header_t *h;
2170 ip_interface_address_t *ia;
2171 ip_adjacency_t *adj;
2172 vnet_hw_interface_t *hi;
2173 vnet_sw_interface_t *si;
2177 si = vnet_get_sw_interface (vnm, sw_if_index);
2179 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2181 return clib_error_return (0, "%U: interface %U down",
2182 format_ip4_address, dst,
2183 format_vnet_sw_if_index_name, vnm,
2188 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2191 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2192 return clib_error_return
2193 (0, "no matching interface address for destination %U (interface %U)",
2194 format_ip4_address, dst,
2195 format_vnet_sw_if_index_name, vnm, sw_if_index);
2198 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2201 vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template,
2204 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2206 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2207 sizeof (h->ip4_over_ethernet[0].ethernet));
2209 h->ip4_over_ethernet[0].ip4 = src[0];
2210 h->ip4_over_ethernet[1].ip4 = dst[0];
2212 b = vlib_get_buffer (vm, bi);
2213 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2214 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2216 /* Add encapsulation string for software interface (e.g. ethernet header). */
2217 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2218 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2221 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2222 u32 *to_next = vlib_frame_vector_args (f);
2225 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2228 return /* no error */ 0;
2233 IP4_REWRITE_NEXT_DROP,
2234 IP4_REWRITE_NEXT_ICMP_ERROR,
2235 } ip4_rewrite_next_t;
2238 ip4_rewrite_inline (vlib_main_t * vm,
2239 vlib_node_runtime_t * node,
2240 vlib_frame_t * frame, int is_midchain)
2242 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2243 u32 *from = vlib_frame_vector_args (frame);
2244 u32 n_left_from, n_left_to_next, *to_next, next_index;
2245 vlib_node_runtime_t *error_node =
2246 vlib_node_get_runtime (vm, ip4_input_node.index);
2248 n_left_from = frame->n_vectors;
2249 next_index = node->cached_next_index;
2250 u32 cpu_index = os_get_cpu_number ();
2252 while (n_left_from > 0)
2254 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2256 while (n_left_from >= 4 && n_left_to_next >= 2)
2258 ip_adjacency_t *adj0, *adj1;
2259 vlib_buffer_t *p0, *p1;
2260 ip4_header_t *ip0, *ip1;
2261 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2262 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2263 u32 tx_sw_if_index0, tx_sw_if_index1;
2265 /* Prefetch next iteration. */
2267 vlib_buffer_t *p2, *p3;
2269 p2 = vlib_get_buffer (vm, from[2]);
2270 p3 = vlib_get_buffer (vm, from[3]);
2272 vlib_prefetch_buffer_header (p2, STORE);
2273 vlib_prefetch_buffer_header (p3, STORE);
2275 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2276 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2279 pi0 = to_next[0] = from[0];
2280 pi1 = to_next[1] = from[1];
2285 n_left_to_next -= 2;
2287 p0 = vlib_get_buffer (vm, pi0);
2288 p1 = vlib_get_buffer (vm, pi1);
2290 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2291 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2293 /* We should never rewrite a pkt using the MISS adjacency */
2294 ASSERT (adj_index0 && adj_index1);
2296 ip0 = vlib_buffer_get_current (p0);
2297 ip1 = vlib_buffer_get_current (p1);
2299 error0 = error1 = IP4_ERROR_NONE;
2300 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2302 /* Decrement TTL & update checksum.
2303 Works either endian, so no need for byte swap. */
2304 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2306 i32 ttl0 = ip0->ttl;
2308 /* Input node should have reject packets with ttl 0. */
2309 ASSERT (ip0->ttl > 0);
2311 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2312 checksum0 += checksum0 >= 0xffff;
2314 ip0->checksum = checksum0;
2319 * If the ttl drops below 1 when forwarding, generate
2322 if (PREDICT_FALSE (ttl0 <= 0))
2324 error0 = IP4_ERROR_TIME_EXPIRED;
2325 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2326 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2327 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2329 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2332 /* Verify checksum. */
2333 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2337 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2339 if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2341 i32 ttl1 = ip1->ttl;
2343 /* Input node should have reject packets with ttl 0. */
2344 ASSERT (ip1->ttl > 0);
2346 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2347 checksum1 += checksum1 >= 0xffff;
2349 ip1->checksum = checksum1;
2354 * If the ttl drops below 1 when forwarding, generate
2357 if (PREDICT_FALSE (ttl1 <= 0))
2359 error1 = IP4_ERROR_TIME_EXPIRED;
2360 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2361 icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2362 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2364 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2367 /* Verify checksum. */
2368 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2369 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2373 p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2376 /* Rewrite packet header and updates lengths. */
2377 adj0 = ip_get_adjacency (lm, adj_index0);
2378 adj1 = ip_get_adjacency (lm, adj_index1);
2380 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2381 rw_len0 = adj0[0].rewrite_header.data_bytes;
2382 rw_len1 = adj1[0].rewrite_header.data_bytes;
2383 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2384 vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2386 /* Check MTU of outgoing interface. */
2388 (vlib_buffer_length_in_chain (vm, p0) >
2390 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2393 (vlib_buffer_length_in_chain (vm, p1) >
2395 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2398 next0 = (error0 == IP4_ERROR_NONE)
2399 ? adj0[0].rewrite_header.next_index : next0;
2401 next1 = (error1 == IP4_ERROR_NONE)
2402 ? adj1[0].rewrite_header.next_index : next1;
2405 * We've already accounted for an ethernet_header_t elsewhere
2407 if (PREDICT_FALSE (rw_len0 > sizeof (ethernet_header_t)))
2408 vlib_increment_combined_counter
2409 (&adjacency_counters, cpu_index, adj_index0,
2410 /* packet increment */ 0,
2411 /* byte increment */ rw_len0 - sizeof (ethernet_header_t));
2413 if (PREDICT_FALSE (rw_len1 > sizeof (ethernet_header_t)))
2414 vlib_increment_combined_counter
2415 (&adjacency_counters, cpu_index, adj_index1,
2416 /* packet increment */ 0,
2417 /* byte increment */ rw_len1 - sizeof (ethernet_header_t));
2419 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2420 * to see the IP headerr */
2421 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2423 p0->current_data -= rw_len0;
2424 p0->current_length += rw_len0;
2425 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2426 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2428 vnet_feature_arc_start (lm->output_feature_arc_index,
2429 tx_sw_if_index0, &next0, p0);
2431 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2433 p1->current_data -= rw_len1;
2434 p1->current_length += rw_len1;
2436 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2437 vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2439 vnet_feature_arc_start (lm->output_feature_arc_index,
2440 tx_sw_if_index1, &next1, p1);
2443 /* Guess we are only writing on simple Ethernet header. */
2444 vnet_rewrite_two_headers (adj0[0], adj1[0],
2445 ip0, ip1, sizeof (ethernet_header_t));
2449 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2450 adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2453 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2454 to_next, n_left_to_next,
2455 pi0, pi1, next0, next1);
2458 while (n_left_from > 0 && n_left_to_next > 0)
2460 ip_adjacency_t *adj0;
2463 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2464 u32 tx_sw_if_index0;
2466 pi0 = to_next[0] = from[0];
2468 p0 = vlib_get_buffer (vm, pi0);
2470 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2472 /* We should never rewrite a pkt using the MISS adjacency */
2473 ASSERT (adj_index0);
2475 adj0 = ip_get_adjacency (lm, adj_index0);
2477 ip0 = vlib_buffer_get_current (p0);
2479 error0 = IP4_ERROR_NONE;
2480 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2482 /* Decrement TTL & update checksum. */
2483 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2485 i32 ttl0 = ip0->ttl;
2487 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2489 checksum0 += checksum0 >= 0xffff;
2491 ip0->checksum = checksum0;
2493 ASSERT (ip0->ttl > 0);
2499 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2501 if (PREDICT_FALSE (ttl0 <= 0))
2504 * If the ttl drops below 1 when forwarding, generate
2507 error0 = IP4_ERROR_TIME_EXPIRED;
2508 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2509 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2510 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2511 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2517 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2520 /* Guess we are only writing on simple Ethernet header. */
2521 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2523 /* Update packet buffer attributes/set output interface. */
2524 rw_len0 = adj0[0].rewrite_header.data_bytes;
2525 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2527 if (PREDICT_FALSE (rw_len0 > sizeof (ethernet_header_t)))
2528 vlib_increment_combined_counter
2529 (&adjacency_counters, cpu_index, adj_index0,
2530 /* packet increment */ 0,
2531 /* byte increment */ rw_len0 - sizeof (ethernet_header_t));
2533 /* Check MTU of outgoing interface. */
2534 error0 = (vlib_buffer_length_in_chain (vm, p0)
2535 > adj0[0].rewrite_header.max_l3_packet_bytes
2536 ? IP4_ERROR_MTU_EXCEEDED : error0);
2538 p0->error = error_node->errors[error0];
2540 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2541 * to see the IP headerr */
2542 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2544 p0->current_data -= rw_len0;
2545 p0->current_length += rw_len0;
2546 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2548 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2549 next0 = adj0[0].rewrite_header.next_index;
2553 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2556 vnet_feature_arc_start (lm->output_feature_arc_index,
2557 tx_sw_if_index0, &next0, p0);
2564 n_left_to_next -= 1;
2566 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2567 to_next, n_left_to_next,
2571 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2574 /* Need to do trace after rewrites to pick up new packet data. */
2575 if (node->flags & VLIB_NODE_FLAG_TRACE)
2576 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2578 return frame->n_vectors;
2582 /** @brief IPv4 rewrite node.
2585 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2586 header checksum, fetch the ip adjacency, check the outbound mtu,
2587 apply the adjacency rewrite, and send pkts to the adjacency
2588 rewrite header's rewrite_next_index.
2590 @param vm vlib_main_t corresponding to the current thread
2591 @param node vlib_node_runtime_t
2592 @param frame vlib_frame_t whose contents should be dispatched
2594 @par Graph mechanics: buffer metadata, next index usage
2597 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2598 - the rewrite adjacency index
2599 - <code>adj->lookup_next_index</code>
2600 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2601 the packet will be dropped.
2602 - <code>adj->rewrite_header</code>
2603 - Rewrite string length, rewrite string, next_index
2606 - <code>b->current_data, b->current_length</code>
2607 - Updated net of applying the rewrite string
2609 <em>Next Indices:</em>
2610 - <code> adj->rewrite_header.next_index </code>
2614 ip4_rewrite (vlib_main_t * vm,
2615 vlib_node_runtime_t * node, vlib_frame_t * frame)
2617 return ip4_rewrite_inline (vm, node, frame, 0);
2621 ip4_midchain (vlib_main_t * vm,
2622 vlib_node_runtime_t * node, vlib_frame_t * frame)
2624 return ip4_rewrite_inline (vm, node, frame, 1);
2628 VLIB_REGISTER_NODE (ip4_rewrite_node) =
2630 .function = ip4_rewrite,.name = "ip4-rewrite",.vector_size =
2631 sizeof (u32),.format_trace = format_ip4_rewrite_trace,.n_next_nodes =
2634 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2635 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",}
2638 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite);
2640 VLIB_REGISTER_NODE (ip4_midchain_node) =
2642 .function = ip4_midchain,.name = "ip4-midchain",.vector_size =
2643 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.sibling_of =
2646 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2648 static clib_error_t *
2649 add_del_interface_table (vlib_main_t * vm,
2650 unformat_input_t * input, vlib_cli_command_t * cmd)
2652 vnet_main_t *vnm = vnet_get_main ();
2653 clib_error_t *error = 0;
2654 u32 sw_if_index, table_id;
2658 if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2660 error = clib_error_return (0, "unknown interface `%U'",
2661 format_unformat_error, input);
2665 if (unformat (input, "%d", &table_id))
2669 error = clib_error_return (0, "expected table id `%U'",
2670 format_unformat_error, input);
2675 ip4_main_t *im = &ip4_main;
2678 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2683 // changing an interface's table has consequences for any connecteds
2684 // and adj-fibs already installed.
2686 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2687 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2695 * Place the indicated interface into the supplied IPv4 FIB table (also known
2696 * as a VRF). If the FIB table does not exist, this command creates it. To
2697 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2698 * FIB table will only be displayed if a route has been added to the table, or
2699 * an IP Address is assigned to an interface in the table (which adds a route
2702 * @note IP addresses added after setting the interface IP table end up in
2703 * the indicated FIB table. If the IP address is added prior to adding the
2704 * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2705 * but potentially counter-intuitive results occur if you provision interface
2706 * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2707 * IP table ID provisioned. It might be marginally useful to evade source RPF
2708 * drops to put an interface address into multiple FIBs.
2711 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2712 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2715 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2717 .path = "set interface ip table",
2718 .function = add_del_interface_table,
2719 .short_help = "set interface ip table <interface> <table-id>",
2725 ip4_lookup_multicast (vlib_main_t * vm,
2726 vlib_node_runtime_t * node, vlib_frame_t * frame)
2728 ip4_main_t *im = &ip4_main;
2729 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
2730 u32 n_left_from, n_left_to_next, *from, *to_next;
2731 ip_lookup_next_t next;
2732 u32 cpu_index = os_get_cpu_number ();
2734 from = vlib_frame_vector_args (frame);
2735 n_left_from = frame->n_vectors;
2736 next = node->cached_next_index;
2738 while (n_left_from > 0)
2740 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2742 while (n_left_from >= 4 && n_left_to_next >= 2)
2744 vlib_buffer_t *p0, *p1;
2745 u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2746 ip_lookup_next_t next0, next1;
2747 ip4_header_t *ip0, *ip1;
2748 u32 fib_index0, fib_index1;
2749 const dpo_id_t *dpo0, *dpo1;
2750 const load_balance_t *lb0, *lb1;
2752 /* Prefetch next iteration. */
2754 vlib_buffer_t *p2, *p3;
2756 p2 = vlib_get_buffer (vm, from[2]);
2757 p3 = vlib_get_buffer (vm, from[3]);
2759 vlib_prefetch_buffer_header (p2, LOAD);
2760 vlib_prefetch_buffer_header (p3, LOAD);
2762 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2763 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2766 pi0 = to_next[0] = from[0];
2767 pi1 = to_next[1] = from[1];
2769 p0 = vlib_get_buffer (vm, pi0);
2770 p1 = vlib_get_buffer (vm, pi1);
2772 ip0 = vlib_buffer_get_current (p0);
2773 ip1 = vlib_buffer_get_current (p1);
2776 vec_elt (im->fib_index_by_sw_if_index,
2777 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2779 vec_elt (im->fib_index_by_sw_if_index,
2780 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2782 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
2783 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
2785 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
2786 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
2788 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0),
2790 lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index1),
2793 lb0 = load_balance_get (lb_index0);
2794 lb1 = load_balance_get (lb_index1);
2796 ASSERT (lb0->lb_n_buckets > 0);
2797 ASSERT (is_pow2 (lb0->lb_n_buckets));
2798 ASSERT (lb1->lb_n_buckets > 0);
2799 ASSERT (is_pow2 (lb1->lb_n_buckets));
2801 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2802 (ip0, lb0->lb_hash_config);
2804 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2805 (ip1, lb1->lb_hash_config);
2807 dpo0 = load_balance_get_bucket_i (lb0,
2808 (vnet_buffer (p0)->ip.flow_hash &
2809 (lb0->lb_n_buckets_minus_1)));
2810 dpo1 = load_balance_get_bucket_i (lb1,
2811 (vnet_buffer (p1)->ip.flow_hash &
2812 (lb1->lb_n_buckets_minus_1)));
2814 next0 = dpo0->dpoi_next_node;
2815 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2816 next1 = dpo1->dpoi_next_node;
2817 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2819 if (1) /* $$$$$$ HACK FIXME */
2820 vlib_increment_combined_counter
2821 (cm, cpu_index, lb_index0, 1,
2822 vlib_buffer_length_in_chain (vm, p0));
2823 if (1) /* $$$$$$ HACK FIXME */
2824 vlib_increment_combined_counter
2825 (cm, cpu_index, lb_index1, 1,
2826 vlib_buffer_length_in_chain (vm, p1));
2830 n_left_to_next -= 2;
2833 wrong_next = (next0 != next) + 2 * (next1 != next);
2834 if (PREDICT_FALSE (wrong_next != 0))
2842 n_left_to_next += 1;
2843 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2849 n_left_to_next += 1;
2850 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2856 n_left_to_next += 2;
2857 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2858 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2862 vlib_put_next_frame (vm, node, next, n_left_to_next);
2864 vlib_get_next_frame (vm, node, next, to_next,
2871 while (n_left_from > 0 && n_left_to_next > 0)
2876 ip_lookup_next_t next0;
2878 const dpo_id_t *dpo0;
2879 const load_balance_t *lb0;
2884 p0 = vlib_get_buffer (vm, pi0);
2886 ip0 = vlib_buffer_get_current (p0);
2888 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2889 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2890 fib_index0 = (vnet_buffer (p0)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
2891 fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
2893 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0),
2896 lb0 = load_balance_get (lb_index0);
2898 ASSERT (lb0->lb_n_buckets > 0);
2899 ASSERT (is_pow2 (lb0->lb_n_buckets));
2901 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2902 (ip0, lb0->lb_hash_config);
2904 dpo0 = load_balance_get_bucket_i (lb0,
2905 (vnet_buffer (p0)->ip.flow_hash &
2906 (lb0->lb_n_buckets_minus_1)));
2908 next0 = dpo0->dpoi_next_node;
2909 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2911 if (1) /* $$$$$$ HACK FIXME */
2912 vlib_increment_combined_counter
2913 (cm, cpu_index, lb_index0, 1,
2914 vlib_buffer_length_in_chain (vm, p0));
2918 n_left_to_next -= 1;
2921 if (PREDICT_FALSE (next0 != next))
2923 n_left_to_next += 1;
2924 vlib_put_next_frame (vm, node, next, n_left_to_next);
2926 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2929 n_left_to_next -= 1;
2933 vlib_put_next_frame (vm, node, next, n_left_to_next);
2936 if (node->flags & VLIB_NODE_FLAG_TRACE)
2937 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2939 return frame->n_vectors;
2942 VLIB_REGISTER_NODE (ip4_lookup_multicast_node, static) =
2944 .function = ip4_lookup_multicast,.name =
2945 "ip4-lookup-multicast",.vector_size = sizeof (u32),.sibling_of =
2946 "ip4-lookup",.format_trace = format_ip4_lookup_trace,.n_next_nodes = 0,};
2948 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node,
2949 ip4_lookup_multicast);
2951 VLIB_REGISTER_NODE (ip4_multicast_node, static) =
2953 .function = ip4_drop,.name = "ip4-multicast",.vector_size =
2954 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_next_nodes =
2957 [0] = "error-drop",}
2961 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2963 ip4_fib_mtrie_t *mtrie0;
2964 ip4_fib_mtrie_leaf_t leaf0;
2967 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2969 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2970 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2971 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2972 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2973 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2975 /* Handle default route. */
2976 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2978 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2980 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2983 static clib_error_t *
2984 test_lookup_command_fn (vlib_main_t * vm,
2985 unformat_input_t * input, vlib_cli_command_t * cmd)
2992 ip4_address_t ip4_base_address;
2995 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2997 if (unformat (input, "table %d", &table_id))
2999 /* Make sure the entry exists. */
3000 fib = ip4_fib_get (table_id);
3001 if ((fib) && (fib->index != table_id))
3002 return clib_error_return (0, "<fib-index> %d does not exist",
3005 else if (unformat (input, "count %f", &count))
3008 else if (unformat (input, "%U",
3009 unformat_ip4_address, &ip4_base_address))
3012 return clib_error_return (0, "unknown input `%U'",
3013 format_unformat_error, input);
3018 for (i = 0; i < n; i++)
3020 if (!ip4_lookup_validate (&ip4_base_address, table_id))
3023 ip4_base_address.as_u32 =
3024 clib_host_to_net_u32 (1 +
3025 clib_net_to_host_u32 (ip4_base_address.as_u32));
3029 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3031 vlib_cli_output (vm, "No errors in %d lookups\n", n);
3037 * Perform a lookup of an IPv4 Address (or range of addresses) in the
3038 * given FIB table to determine if there is a conflict with the
3039 * adjacency table. The fib-id can be determined by using the
3040 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3043 * @todo This command uses fib-id, other commands use table-id (not
3044 * just a name, they are different indexes). Would like to change this
3045 * to table-id for consistency.
3048 * Example of how to run the test lookup command:
3049 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3050 * No errors in 2 lookups
3054 VLIB_CLI_COMMAND (lookup_test_command, static) =
3056 .path = "test lookup",
3057 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3058 .function = test_lookup_command_fn,
3063 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3065 ip4_main_t *im4 = &ip4_main;
3067 uword *p = hash_get (im4->fib_index_by_table_id, table_id);
3070 return VNET_API_ERROR_NO_SUCH_FIB;
3072 fib = ip4_fib_get (p[0]);
3074 fib->flow_hash_config = flow_hash_config;
3078 static clib_error_t *
3079 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3080 unformat_input_t * input,
3081 vlib_cli_command_t * cmd)
3085 u32 flow_hash_config = 0;
3088 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3090 if (unformat (input, "table %d", &table_id))
3093 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3094 foreach_flow_hash_bit
3101 return clib_error_return (0, "unknown input `%U'",
3102 format_unformat_error, input);
3104 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3110 case VNET_API_ERROR_NO_SUCH_FIB:
3111 return clib_error_return (0, "no such FIB table %d", table_id);
3114 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3122 * Configure the set of IPv4 fields used by the flow hash.
3125 * Example of how to set the flow hash on a given table:
3126 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3127 * Example of display the configured flow hash:
3128 * @cliexstart{show ip fib}
3129 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3132 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3133 * [0] [@0]: dpo-drop ip6
3136 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3137 * [0] [@0]: dpo-drop ip6
3140 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3141 * [0] [@0]: dpo-drop ip6
3144 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3145 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3148 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3149 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3150 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3151 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3152 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3155 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3156 * [0] [@0]: dpo-drop ip6
3157 * 255.255.255.255/32
3159 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3160 * [0] [@0]: dpo-drop ip6
3161 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3164 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3165 * [0] [@0]: dpo-drop ip6
3168 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3169 * [0] [@0]: dpo-drop ip6
3172 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3173 * [0] [@4]: ipv4-glean: af_packet0
3176 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3177 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3180 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3181 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3184 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3185 * [0] [@4]: ipv4-glean: af_packet1
3188 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3189 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3192 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3193 * [0] [@0]: dpo-drop ip6
3196 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3197 * [0] [@0]: dpo-drop ip6
3198 * 255.255.255.255/32
3200 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3201 * [0] [@0]: dpo-drop ip6
3205 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3207 .path = "set ip flow-hash",
3209 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3210 .function = set_ip_flow_hash_command_fn,
3215 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3218 vnet_main_t *vnm = vnet_get_main ();
3219 vnet_interface_main_t *im = &vnm->interface_main;
3220 ip4_main_t *ipm = &ip4_main;
3221 ip_lookup_main_t *lm = &ipm->lookup_main;
3222 vnet_classify_main_t *cm = &vnet_classify_main;
3223 ip4_address_t *if_addr;
3225 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3226 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3228 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3229 return VNET_API_ERROR_NO_SUCH_ENTRY;
3231 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3232 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3234 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3236 if (NULL != if_addr)
3238 fib_prefix_t pfx = {
3240 .fp_proto = FIB_PROTOCOL_IP4,
3241 .fp_addr.ip4 = *if_addr,
3245 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3249 if (table_index != (u32) ~ 0)
3251 dpo_id_t dpo = DPO_INVALID;
3256 classify_dpo_create (DPO_PROTO_IP4, table_index));
3258 fib_table_entry_special_dpo_add (fib_index,
3260 FIB_SOURCE_CLASSIFY,
3261 FIB_ENTRY_FLAG_NONE, &dpo);
3266 fib_table_entry_special_remove (fib_index,
3267 &pfx, FIB_SOURCE_CLASSIFY);
3274 static clib_error_t *
3275 set_ip_classify_command_fn (vlib_main_t * vm,
3276 unformat_input_t * input,
3277 vlib_cli_command_t * cmd)
3279 u32 table_index = ~0;
3280 int table_index_set = 0;
3281 u32 sw_if_index = ~0;
3284 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3286 if (unformat (input, "table-index %d", &table_index))
3287 table_index_set = 1;
3288 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3289 vnet_get_main (), &sw_if_index))
3295 if (table_index_set == 0)
3296 return clib_error_return (0, "classify table-index must be specified");
3298 if (sw_if_index == ~0)
3299 return clib_error_return (0, "interface / subif must be specified");
3301 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3308 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3309 return clib_error_return (0, "No such interface");
3311 case VNET_API_ERROR_NO_SUCH_ENTRY:
3312 return clib_error_return (0, "No such classifier table");
3318 * Assign a classification table to an interface. The classification
3319 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3320 * commands. Once the table is create, use this command to filter packets
3324 * Example of how to assign a classification table to an interface:
3325 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3328 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3330 .path = "set ip classify",
3332 "set ip classify intfc <interface> table-index <classify-idx>",
3333 .function = set_ip_classify_command_fn,
3338 * fd.io coding-style-patch-verification: ON
3341 * eval: (c-set-style "gnu")