2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 * @brief IPv4 Forwarding.
59 * This file contains the source code for IPv4 forwarding.
63 ip4_forward_next_trace (vlib_main_t * vm,
64 vlib_node_runtime_t * node,
66 vlib_rx_or_tx_t which_adj_index);
69 ip4_lookup_inline (vlib_main_t * vm,
70 vlib_node_runtime_t * node,
72 int lookup_for_responses_to_locally_received_packets)
74 ip4_main_t *im = &ip4_main;
75 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
76 u32 n_left_from, n_left_to_next, *from, *to_next;
77 ip_lookup_next_t next;
78 u32 cpu_index = os_get_cpu_number ();
80 from = vlib_frame_vector_args (frame);
81 n_left_from = frame->n_vectors;
82 next = node->cached_next_index;
84 while (n_left_from > 0)
86 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
88 while (n_left_from >= 8 && n_left_to_next >= 4)
90 vlib_buffer_t *p0, *p1, *p2, *p3;
91 ip4_header_t *ip0, *ip1, *ip2, *ip3;
92 __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
93 ip_lookup_next_t next0, next1, next2, next3;
94 const load_balance_t *lb0, *lb1, *lb2, *lb3;
95 ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
96 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97 ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98 __attribute__ ((unused)) u32 pi0, fib_index0, lb_index0,
100 __attribute__ ((unused)) u32 pi1, fib_index1, lb_index1,
102 __attribute__ ((unused)) u32 pi2, fib_index2, lb_index2,
104 __attribute__ ((unused)) u32 pi3, fib_index3, lb_index3,
106 flow_hash_config_t flow_hash_config0, flow_hash_config1;
107 flow_hash_config_t flow_hash_config2, flow_hash_config3;
108 u32 hash_c0, hash_c1, hash_c2, hash_c3;
109 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
111 /* Prefetch next iteration. */
113 vlib_buffer_t *p4, *p5, *p6, *p7;
115 p4 = vlib_get_buffer (vm, from[4]);
116 p5 = vlib_get_buffer (vm, from[5]);
117 p6 = vlib_get_buffer (vm, from[6]);
118 p7 = vlib_get_buffer (vm, from[7]);
120 vlib_prefetch_buffer_header (p4, LOAD);
121 vlib_prefetch_buffer_header (p5, LOAD);
122 vlib_prefetch_buffer_header (p6, LOAD);
123 vlib_prefetch_buffer_header (p7, LOAD);
125 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
126 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
127 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
128 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
131 pi0 = to_next[0] = from[0];
132 pi1 = to_next[1] = from[1];
133 pi2 = to_next[2] = from[2];
134 pi3 = to_next[3] = from[3];
141 p0 = vlib_get_buffer (vm, pi0);
142 p1 = vlib_get_buffer (vm, pi1);
143 p2 = vlib_get_buffer (vm, pi2);
144 p3 = vlib_get_buffer (vm, pi3);
146 ip0 = vlib_buffer_get_current (p0);
147 ip1 = vlib_buffer_get_current (p1);
148 ip2 = vlib_buffer_get_current (p2);
149 ip3 = vlib_buffer_get_current (p3);
151 dst_addr0 = &ip0->dst_address;
152 dst_addr1 = &ip1->dst_address;
153 dst_addr2 = &ip2->dst_address;
154 dst_addr3 = &ip3->dst_address;
157 vec_elt (im->fib_index_by_sw_if_index,
158 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
160 vec_elt (im->fib_index_by_sw_if_index,
161 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
163 vec_elt (im->fib_index_by_sw_if_index,
164 vnet_buffer (p2)->sw_if_index[VLIB_RX]);
166 vec_elt (im->fib_index_by_sw_if_index,
167 vnet_buffer (p3)->sw_if_index[VLIB_RX]);
169 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
170 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
172 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
173 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
175 (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
176 (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
178 (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
179 (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
182 if (!lookup_for_responses_to_locally_received_packets)
184 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
185 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
186 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
187 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
190 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
191 leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
192 leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
193 leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
196 tcp0 = (void *) (ip0 + 1);
197 tcp1 = (void *) (ip1 + 1);
198 tcp2 = (void *) (ip2 + 1);
199 tcp3 = (void *) (ip3 + 1);
201 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
202 || ip0->protocol == IP_PROTOCOL_UDP);
203 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
204 || ip1->protocol == IP_PROTOCOL_UDP);
205 is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
206 || ip2->protocol == IP_PROTOCOL_UDP);
207 is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
208 || ip1->protocol == IP_PROTOCOL_UDP);
210 if (!lookup_for_responses_to_locally_received_packets)
212 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
213 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
214 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
215 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
218 if (!lookup_for_responses_to_locally_received_packets)
220 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
221 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
222 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
223 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
226 if (!lookup_for_responses_to_locally_received_packets)
228 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
229 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
230 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
231 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
234 if (lookup_for_responses_to_locally_received_packets)
236 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
237 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
238 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
239 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
243 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
244 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
245 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
246 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
249 ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
250 lb0 = load_balance_get (lb_index0);
251 lb1 = load_balance_get (lb_index1);
252 lb2 = load_balance_get (lb_index2);
253 lb3 = load_balance_get (lb_index3);
255 /* Use flow hash to compute multipath adjacency. */
256 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
257 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
258 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
259 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
260 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
262 flow_hash_config0 = lb0->lb_hash_config;
263 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
264 ip4_compute_flow_hash (ip0, flow_hash_config0);
266 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
268 flow_hash_config1 = lb1->lb_hash_config;
269 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
270 ip4_compute_flow_hash (ip1, flow_hash_config1);
272 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
274 flow_hash_config2 = lb2->lb_hash_config;
275 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
276 ip4_compute_flow_hash (ip2, flow_hash_config2);
278 if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
280 flow_hash_config3 = lb3->lb_hash_config;
281 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
282 ip4_compute_flow_hash (ip3, flow_hash_config3);
285 ASSERT (lb0->lb_n_buckets > 0);
286 ASSERT (is_pow2 (lb0->lb_n_buckets));
287 ASSERT (lb1->lb_n_buckets > 0);
288 ASSERT (is_pow2 (lb1->lb_n_buckets));
289 ASSERT (lb2->lb_n_buckets > 0);
290 ASSERT (is_pow2 (lb2->lb_n_buckets));
291 ASSERT (lb3->lb_n_buckets > 0);
292 ASSERT (is_pow2 (lb3->lb_n_buckets));
294 dpo0 = load_balance_get_bucket_i (lb0,
296 (lb0->lb_n_buckets_minus_1)));
297 dpo1 = load_balance_get_bucket_i (lb1,
299 (lb1->lb_n_buckets_minus_1)));
300 dpo2 = load_balance_get_bucket_i (lb2,
302 (lb2->lb_n_buckets_minus_1)));
303 dpo3 = load_balance_get_bucket_i (lb3,
305 (lb3->lb_n_buckets_minus_1)));
307 next0 = dpo0->dpoi_next_node;
308 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
309 next1 = dpo1->dpoi_next_node;
310 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
311 next2 = dpo2->dpoi_next_node;
312 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
313 next3 = dpo3->dpoi_next_node;
314 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
316 vlib_increment_combined_counter
317 (cm, cpu_index, lb_index0, 1,
318 vlib_buffer_length_in_chain (vm, p0)
319 + sizeof (ethernet_header_t));
320 vlib_increment_combined_counter
321 (cm, cpu_index, lb_index1, 1,
322 vlib_buffer_length_in_chain (vm, p1)
323 + sizeof (ethernet_header_t));
324 vlib_increment_combined_counter
325 (cm, cpu_index, lb_index2, 1,
326 vlib_buffer_length_in_chain (vm, p2)
327 + sizeof (ethernet_header_t));
328 vlib_increment_combined_counter
329 (cm, cpu_index, lb_index3, 1,
330 vlib_buffer_length_in_chain (vm, p3)
331 + sizeof (ethernet_header_t));
333 vlib_validate_buffer_enqueue_x4 (vm, node, next,
334 to_next, n_left_to_next,
336 next0, next1, next2, next3);
339 while (n_left_from > 0 && n_left_to_next > 0)
343 __attribute__ ((unused)) tcp_header_t *tcp0;
344 ip_lookup_next_t next0;
345 const load_balance_t *lb0;
346 ip4_fib_mtrie_t *mtrie0;
347 ip4_fib_mtrie_leaf_t leaf0;
348 ip4_address_t *dst_addr0;
349 __attribute__ ((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
350 flow_hash_config_t flow_hash_config0;
351 const dpo_id_t *dpo0;
357 p0 = vlib_get_buffer (vm, pi0);
359 ip0 = vlib_buffer_get_current (p0);
361 dst_addr0 = &ip0->dst_address;
364 vec_elt (im->fib_index_by_sw_if_index,
365 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
367 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
368 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
370 if (!lookup_for_responses_to_locally_received_packets)
372 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
374 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
377 tcp0 = (void *) (ip0 + 1);
379 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
380 || ip0->protocol == IP_PROTOCOL_UDP);
382 if (!lookup_for_responses_to_locally_received_packets)
383 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
385 if (!lookup_for_responses_to_locally_received_packets)
386 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
388 if (!lookup_for_responses_to_locally_received_packets)
389 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
391 if (lookup_for_responses_to_locally_received_packets)
392 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
395 /* Handle default route. */
396 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
400 lb0 = load_balance_get (lbi0);
402 /* Use flow hash to compute multipath adjacency. */
403 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
404 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
406 flow_hash_config0 = lb0->lb_hash_config;
408 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
409 ip4_compute_flow_hash (ip0, flow_hash_config0);
412 ASSERT (lb0->lb_n_buckets > 0);
413 ASSERT (is_pow2 (lb0->lb_n_buckets));
415 dpo0 = load_balance_get_bucket_i (lb0,
417 (lb0->lb_n_buckets_minus_1)));
419 next0 = dpo0->dpoi_next_node;
420 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
422 vlib_increment_combined_counter
423 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
430 if (PREDICT_FALSE (next0 != next))
433 vlib_put_next_frame (vm, node, next, n_left_to_next);
435 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
442 vlib_put_next_frame (vm, node, next, n_left_to_next);
445 if (node->flags & VLIB_NODE_FLAG_TRACE)
446 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
448 return frame->n_vectors;
451 /** @brief IPv4 lookup node.
454 This is the main IPv4 lookup dispatch node.
456 @param vm vlib_main_t corresponding to the current thread
457 @param node vlib_node_runtime_t
458 @param frame vlib_frame_t whose contents should be dispatched
460 @par Graph mechanics: buffer metadata, next index usage
463 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
464 - Indicates the @c sw_if_index value of the interface that the
465 packet was received on.
466 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
467 - When the value is @c ~0 then the node performs a longest prefix
468 match (LPM) for the packet destination address in the FIB attached
469 to the receive interface.
470 - Otherwise perform LPM for the packet destination address in the
471 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
472 value (0, 1, ...) and not a VRF id.
475 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
476 - The lookup result adjacency index.
479 - Dispatches the packet to the node index found in
480 ip_adjacency_t @c adj->lookup_next_index
481 (where @c adj is the lookup result adjacency).
484 ip4_lookup (vlib_main_t * vm,
485 vlib_node_runtime_t * node, vlib_frame_t * frame)
487 return ip4_lookup_inline (vm, node, frame,
488 /* lookup_for_responses_to_locally_received_packets */
493 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
495 VLIB_REGISTER_NODE (ip4_lookup_node) =
497 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
498 sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
499 IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
501 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
504 ip4_load_balance (vlib_main_t * vm,
505 vlib_node_runtime_t * node, vlib_frame_t * frame)
507 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
508 u32 n_left_from, n_left_to_next, *from, *to_next;
509 ip_lookup_next_t next;
510 u32 cpu_index = os_get_cpu_number ();
512 from = vlib_frame_vector_args (frame);
513 n_left_from = frame->n_vectors;
514 next = node->cached_next_index;
516 if (node->flags & VLIB_NODE_FLAG_TRACE)
517 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
519 while (n_left_from > 0)
521 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
524 while (n_left_from >= 4 && n_left_to_next >= 2)
526 ip_lookup_next_t next0, next1;
527 const load_balance_t *lb0, *lb1;
528 vlib_buffer_t *p0, *p1;
529 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
530 const ip4_header_t *ip0, *ip1;
531 const dpo_id_t *dpo0, *dpo1;
533 /* Prefetch next iteration. */
535 vlib_buffer_t *p2, *p3;
537 p2 = vlib_get_buffer (vm, from[2]);
538 p3 = vlib_get_buffer (vm, from[3]);
540 vlib_prefetch_buffer_header (p2, STORE);
541 vlib_prefetch_buffer_header (p3, STORE);
543 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
544 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
547 pi0 = to_next[0] = from[0];
548 pi1 = to_next[1] = from[1];
555 p0 = vlib_get_buffer (vm, pi0);
556 p1 = vlib_get_buffer (vm, pi1);
558 ip0 = vlib_buffer_get_current (p0);
559 ip1 = vlib_buffer_get_current (p1);
560 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
561 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
563 lb0 = load_balance_get (lbi0);
564 lb1 = load_balance_get (lbi1);
567 * this node is for via FIBs we can re-use the hash value from the
568 * to node if present.
569 * We don't want to use the same hash value at each level in the recursion
570 * graph as that would lead to polarisation
574 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
576 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
578 hc0 = vnet_buffer (p0)->ip.flow_hash =
579 vnet_buffer (p0)->ip.flow_hash >> 1;
583 hc0 = vnet_buffer (p0)->ip.flow_hash =
584 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
587 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
589 if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
591 hc1 = vnet_buffer (p1)->ip.flow_hash =
592 vnet_buffer (p1)->ip.flow_hash >> 1;
596 hc1 = vnet_buffer (p1)->ip.flow_hash =
597 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
602 load_balance_get_bucket_i (lb0,
603 hc0 & (lb0->lb_n_buckets_minus_1));
605 load_balance_get_bucket_i (lb1,
606 hc1 & (lb1->lb_n_buckets_minus_1));
608 next0 = dpo0->dpoi_next_node;
609 next1 = dpo1->dpoi_next_node;
611 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
612 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
614 vlib_increment_combined_counter
615 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
616 vlib_increment_combined_counter
617 (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
619 vlib_validate_buffer_enqueue_x2 (vm, node, next,
620 to_next, n_left_to_next,
621 pi0, pi1, next0, next1);
624 while (n_left_from > 0 && n_left_to_next > 0)
626 ip_lookup_next_t next0;
627 const load_balance_t *lb0;
630 const ip4_header_t *ip0;
631 const dpo_id_t *dpo0;
640 p0 = vlib_get_buffer (vm, pi0);
642 ip0 = vlib_buffer_get_current (p0);
643 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
645 lb0 = load_balance_get (lbi0);
648 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
650 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
652 hc0 = vnet_buffer (p0)->ip.flow_hash =
653 vnet_buffer (p0)->ip.flow_hash >> 1;
657 hc0 = vnet_buffer (p0)->ip.flow_hash =
658 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
663 load_balance_get_bucket_i (lb0,
664 hc0 & (lb0->lb_n_buckets_minus_1));
666 next0 = dpo0->dpoi_next_node;
667 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
669 vlib_increment_combined_counter
670 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
672 vlib_validate_buffer_enqueue_x1 (vm, node, next,
673 to_next, n_left_to_next,
677 vlib_put_next_frame (vm, node, next, n_left_to_next);
680 return frame->n_vectors;
683 VLIB_REGISTER_NODE (ip4_load_balance_node) =
685 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
686 sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
687 format_ip4_lookup_trace,};
689 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
691 /* get first interface address */
693 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
694 ip_interface_address_t ** result_ia)
696 ip_lookup_main_t *lm = &im->lookup_main;
697 ip_interface_address_t *ia = 0;
698 ip4_address_t *result = 0;
701 foreach_ip_interface_address
702 (lm, ia, sw_if_index,
703 1 /* honor unnumbered */ ,
706 ip_interface_address_get_address (lm, ia);
712 *result_ia = result ? ia : 0;
717 ip4_add_interface_routes (u32 sw_if_index,
718 ip4_main_t * im, u32 fib_index,
719 ip_interface_address_t * a)
721 ip_lookup_main_t *lm = &im->lookup_main;
722 ip4_address_t *address = ip_interface_address_get_address (lm, a);
724 .fp_len = a->address_length,
725 .fp_proto = FIB_PROTOCOL_IP4,
726 .fp_addr.ip4 = *address,
729 a->neighbor_probe_adj_index = ~0;
731 if (pfx.fp_len <= 30)
733 /* a /30 or shorter - add a glean for the network address */
734 fib_node_index_t fei;
736 fei = fib_table_entry_update_one_path (fib_index, &pfx,
737 FIB_SOURCE_INTERFACE,
738 (FIB_ENTRY_FLAG_CONNECTED |
739 FIB_ENTRY_FLAG_ATTACHED),
741 /* No next-hop address */
747 // no out-label stack
749 FIB_ROUTE_PATH_FLAG_NONE);
750 a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
752 /* Add the two broadcast addresses as drop */
753 fib_prefix_t net_pfx = {
755 .fp_proto = FIB_PROTOCOL_IP4,
756 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
758 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
759 fib_table_entry_special_add(fib_index,
761 FIB_SOURCE_INTERFACE,
762 (FIB_ENTRY_FLAG_DROP |
763 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT),
765 net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
766 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
767 fib_table_entry_special_add(fib_index,
769 FIB_SOURCE_INTERFACE,
770 (FIB_ENTRY_FLAG_DROP |
771 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT),
774 else if (pfx.fp_len == 31)
776 u32 mask = clib_host_to_net_u32(1);
777 fib_prefix_t net_pfx = pfx;
780 net_pfx.fp_addr.ip4.as_u32 ^= mask;
782 /* a /31 - add the other end as an attached host */
783 fib_table_entry_update_one_path (fib_index, &net_pfx,
784 FIB_SOURCE_INTERFACE,
785 (FIB_ENTRY_FLAG_ATTACHED),
793 FIB_ROUTE_PATH_FLAG_NONE);
797 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
799 u32 classify_table_index =
800 lm->classify_table_index_by_sw_if_index[sw_if_index];
801 if (classify_table_index != (u32) ~ 0)
803 dpo_id_t dpo = DPO_INVALID;
808 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
810 fib_table_entry_special_dpo_add (fib_index,
813 FIB_ENTRY_FLAG_NONE, &dpo);
818 fib_table_entry_update_one_path (fib_index, &pfx,
819 FIB_SOURCE_INTERFACE,
820 (FIB_ENTRY_FLAG_CONNECTED |
821 FIB_ENTRY_FLAG_LOCAL),
828 FIB_ROUTE_PATH_FLAG_NONE);
832 ip4_del_interface_routes (ip4_main_t * im,
834 ip4_address_t * address, u32 address_length)
837 .fp_len = address_length,
838 .fp_proto = FIB_PROTOCOL_IP4,
839 .fp_addr.ip4 = *address,
842 if (pfx.fp_len <= 30)
844 fib_prefix_t net_pfx = {
846 .fp_proto = FIB_PROTOCOL_IP4,
847 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
849 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
850 fib_table_entry_special_remove(fib_index,
852 FIB_SOURCE_INTERFACE);
853 net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
854 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
855 fib_table_entry_special_remove(fib_index,
857 FIB_SOURCE_INTERFACE);
858 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
860 else if (pfx.fp_len == 31)
862 u32 mask = clib_host_to_net_u32(1);
863 fib_prefix_t net_pfx = pfx;
866 net_pfx.fp_addr.ip4.as_u32 ^= mask;
868 fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
872 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
876 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
878 ip4_main_t *im = &ip4_main;
880 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
883 * enable/disable only on the 1<->0 transition
887 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
892 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
893 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
896 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
900 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
901 sw_if_index, !is_enable, 0, 0);
904 static clib_error_t *
905 ip4_add_del_interface_address_internal (vlib_main_t * vm,
907 ip4_address_t * address,
908 u32 address_length, u32 is_del)
910 vnet_main_t *vnm = vnet_get_main ();
911 ip4_main_t *im = &ip4_main;
912 ip_lookup_main_t *lm = &im->lookup_main;
913 clib_error_t *error = 0;
914 u32 if_address_index, elts_before;
915 ip4_address_fib_t ip4_af, *addr_fib = 0;
917 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
918 ip4_addr_fib_init (&ip4_af, address,
919 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
920 vec_add1 (addr_fib, ip4_af);
923 * there is no support for adj-fib handling in the presence of overlapping
924 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
930 /* When adding an address check that it does not conflict
931 with an existing address. */
932 ip_interface_address_t *ia;
933 foreach_ip_interface_address
934 (&im->lookup_main, ia, sw_if_index,
935 0 /* honor unnumbered */ ,
938 ip_interface_address_get_address
939 (&im->lookup_main, ia);
940 if (ip4_destination_matches_route
941 (im, address, x, ia->address_length) ||
942 ip4_destination_matches_route (im,
948 ("failed to add %U which conflicts with %U for interface %U",
949 format_ip4_address_and_length, address,
951 format_ip4_address_and_length, x,
953 format_vnet_sw_if_index_name, vnm,
959 elts_before = pool_elts (lm->if_address_pool);
961 error = ip_interface_address_add_del
962 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
966 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
969 ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
971 ip4_add_interface_routes (sw_if_index,
972 im, ip4_af.fib_index,
974 (lm->if_address_pool, if_address_index));
976 /* If pool did not grow/shrink: add duplicate address. */
977 if (elts_before != pool_elts (lm->if_address_pool))
979 ip4_add_del_interface_address_callback_t *cb;
980 vec_foreach (cb, im->add_del_interface_address_callbacks)
981 cb->function (im, cb->function_opaque, sw_if_index,
982 address, address_length, if_address_index, is_del);
991 ip4_add_del_interface_address (vlib_main_t * vm,
993 ip4_address_t * address,
994 u32 address_length, u32 is_del)
996 return ip4_add_del_interface_address_internal
997 (vm, sw_if_index, address, address_length, is_del);
1000 /* Built-in ip4 unicast rx feature path definition */
1002 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
1004 .arc_name = "ip4-unicast",
1005 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1006 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
1009 VNET_FEATURE_INIT (ip4_flow_classify, static) =
1011 .arc_name = "ip4-unicast",
1012 .node_name = "ip4-flow-classify",
1013 .runs_before = VNET_FEATURES ("ip4-inacl"),
1016 VNET_FEATURE_INIT (ip4_inacl, static) =
1018 .arc_name = "ip4-unicast",
1019 .node_name = "ip4-inacl",
1020 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
1023 VNET_FEATURE_INIT (ip4_source_check_1, static) =
1025 .arc_name = "ip4-unicast",
1026 .node_name = "ip4-source-check-via-rx",
1027 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
1030 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1032 .arc_name = "ip4-unicast",
1033 .node_name = "ip4-source-check-via-any",
1034 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1037 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1039 .arc_name = "ip4-unicast",
1040 .node_name = "ip4-source-and-port-range-check-rx",
1041 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1044 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1046 .arc_name = "ip4-unicast",
1047 .node_name = "ip4-policer-classify",
1048 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1051 VNET_FEATURE_INIT (ip4_ipsec, static) =
1053 .arc_name = "ip4-unicast",
1054 .node_name = "ipsec-input-ip4",
1055 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1058 VNET_FEATURE_INIT (ip4_vpath, static) =
1060 .arc_name = "ip4-unicast",
1061 .node_name = "vpath-input-ip4",
1062 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1065 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1067 .arc_name = "ip4-unicast",
1068 .node_name = "ip4-vxlan-bypass",
1069 .runs_before = VNET_FEATURES ("ip4-lookup"),
1072 VNET_FEATURE_INIT (ip4_drop, static) =
1074 .arc_name = "ip4-unicast",
1075 .node_name = "ip4-drop",
1076 .runs_before = VNET_FEATURES ("ip4-lookup"),
1079 VNET_FEATURE_INIT (ip4_lookup, static) =
1081 .arc_name = "ip4-unicast",
1082 .node_name = "ip4-lookup",
1083 .runs_before = 0, /* not before any other features */
1086 /* Built-in ip4 multicast rx feature path definition */
1087 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1089 .arc_name = "ip4-multicast",
1090 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1091 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1094 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1096 .arc_name = "ip4-multicast",
1097 .node_name = "vpath-input-ip4",
1098 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1101 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1103 .arc_name = "ip4-multicast",
1104 .node_name = "ip4-drop",
1105 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1108 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1110 .arc_name = "ip4-multicast",
1111 .node_name = "ip4-mfib-forward-lookup",
1112 .runs_before = 0, /* last feature */
1115 /* Source and port-range check ip4 tx feature path definition */
1116 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1118 .arc_name = "ip4-output",
1119 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1120 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1123 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1125 .arc_name = "ip4-output",
1126 .node_name = "ip4-source-and-port-range-check-tx",
1127 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1130 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1132 .arc_name = "ip4-output",
1133 .node_name = "ipsec-output-ip4",
1134 .runs_before = VNET_FEATURES ("interface-output"),
1137 /* Built-in ip4 tx feature path definition */
1138 VNET_FEATURE_INIT (ip4_interface_output, static) =
1140 .arc_name = "ip4-output",
1141 .node_name = "interface-output",
1142 .runs_before = 0, /* not before any other features */
1146 static clib_error_t *
1147 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1149 ip4_main_t *im = &ip4_main;
1151 /* Fill in lookup tables with default table (0). */
1152 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1153 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1155 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1158 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1161 return /* no error */ 0;
1164 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1166 /* Global IP4 main. */
1167 ip4_main_t ip4_main;
1170 ip4_lookup_init (vlib_main_t * vm)
1172 ip4_main_t *im = &ip4_main;
1173 clib_error_t *error;
1176 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1179 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1184 m = pow2_mask (i) << (32 - i);
1187 im->fib_masks[i] = clib_host_to_net_u32 (m);
1190 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1192 /* Create FIB with index 0 and table id of 0. */
1193 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1194 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1198 pn = pg_get_node (ip4_lookup_node.index);
1199 pn->unformat_edit = unformat_pg_ip4_header;
1203 ethernet_arp_header_t h;
1205 memset (&h, 0, sizeof (h));
1207 /* Set target ethernet address to all zeros. */
1208 memset (h.ip4_over_ethernet[1].ethernet, 0,
1209 sizeof (h.ip4_over_ethernet[1].ethernet));
1211 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1212 #define _8(f,v) h.f = v;
1213 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1214 _16 (l3_type, ETHERNET_TYPE_IP4);
1215 _8 (n_l2_address_bytes, 6);
1216 _8 (n_l3_address_bytes, 4);
1217 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1221 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1224 /* alloc chunk size */ 8,
1231 VLIB_INIT_FUNCTION (ip4_lookup_init);
1235 /* Adjacency taken. */
1240 /* Packet data, possibly *after* rewrite. */
1241 u8 packet_data[64 - 1 * sizeof (u32)];
1243 ip4_forward_next_trace_t;
1246 format_ip4_forward_next_trace (u8 * s, va_list * args)
1248 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1249 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1250 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1251 uword indent = format_get_indent (s);
1252 s = format (s, "%U%U",
1253 format_white_space, indent,
1254 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1259 format_ip4_lookup_trace (u8 * s, va_list * args)
1261 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1262 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1263 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1264 uword indent = format_get_indent (s);
1266 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1267 t->fib_index, t->dpo_index, t->flow_hash);
1268 s = format (s, "\n%U%U",
1269 format_white_space, indent,
1270 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1275 format_ip4_rewrite_trace (u8 * s, va_list * args)
1277 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1278 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1279 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1280 uword indent = format_get_indent (s);
1282 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1283 t->fib_index, t->dpo_index, format_ip_adjacency,
1284 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1285 s = format (s, "\n%U%U",
1286 format_white_space, indent,
1287 format_ip_adjacency_packet_data,
1288 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1292 /* Common trace function for all ip4-forward next nodes. */
1294 ip4_forward_next_trace (vlib_main_t * vm,
1295 vlib_node_runtime_t * node,
1296 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1299 ip4_main_t *im = &ip4_main;
1301 n_left = frame->n_vectors;
1302 from = vlib_frame_vector_args (frame);
1307 vlib_buffer_t *b0, *b1;
1308 ip4_forward_next_trace_t *t0, *t1;
1310 /* Prefetch next iteration. */
1311 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1312 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1317 b0 = vlib_get_buffer (vm, bi0);
1318 b1 = vlib_get_buffer (vm, bi1);
1320 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1322 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1323 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1324 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1326 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1327 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1328 vec_elt (im->fib_index_by_sw_if_index,
1329 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1331 clib_memcpy (t0->packet_data,
1332 vlib_buffer_get_current (b0),
1333 sizeof (t0->packet_data));
1335 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1337 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1338 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1339 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1341 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1342 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1343 vec_elt (im->fib_index_by_sw_if_index,
1344 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1345 clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1346 sizeof (t1->packet_data));
1356 ip4_forward_next_trace_t *t0;
1360 b0 = vlib_get_buffer (vm, bi0);
1362 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1364 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1365 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1366 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1368 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1369 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1370 vec_elt (im->fib_index_by_sw_if_index,
1371 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1372 clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1373 sizeof (t0->packet_data));
1381 ip4_drop_or_punt (vlib_main_t * vm,
1382 vlib_node_runtime_t * node,
1383 vlib_frame_t * frame, ip4_error_t error_code)
1385 u32 *buffers = vlib_frame_vector_args (frame);
1386 uword n_packets = frame->n_vectors;
1388 vlib_error_drop_buffers (vm, node, buffers,
1392 ip4_input_node.index, error_code);
1394 if (node->flags & VLIB_NODE_FLAG_TRACE)
1395 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1401 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1403 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1407 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1409 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1413 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1415 .function = ip4_drop,.
1417 .vector_size = sizeof (u32),
1418 .format_trace = format_ip4_forward_next_trace,
1425 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1427 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1429 .function = ip4_punt,
1431 .vector_size = sizeof (u32),
1432 .format_trace = format_ip4_forward_next_trace,
1439 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1442 /* Compute TCP/UDP/ICMP4 checksum in software. */
1444 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1448 u32 ip_header_length, payload_length_host_byte_order;
1449 u32 n_this_buffer, n_bytes_left;
1451 void *data_this_buffer;
1453 /* Initialize checksum with ip header. */
1454 ip_header_length = ip4_header_bytes (ip0);
1455 payload_length_host_byte_order =
1456 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1458 clib_host_to_net_u32 (payload_length_host_byte_order +
1459 (ip0->protocol << 16));
1461 if (BITS (uword) == 32)
1464 ip_csum_with_carry (sum0,
1465 clib_mem_unaligned (&ip0->src_address, u32));
1467 ip_csum_with_carry (sum0,
1468 clib_mem_unaligned (&ip0->dst_address, u32));
1472 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1474 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1475 data_this_buffer = (void *) ip0 + ip_header_length;
1476 if (n_this_buffer + ip_header_length > p0->current_length)
1478 p0->current_length >
1479 ip_header_length ? p0->current_length - ip_header_length : 0;
1482 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1483 n_bytes_left -= n_this_buffer;
1484 if (n_bytes_left == 0)
1487 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1488 p0 = vlib_get_buffer (vm, p0->next_buffer);
1489 data_this_buffer = vlib_buffer_get_current (p0);
1490 n_this_buffer = p0->current_length;
1493 sum16 = ~ip_csum_fold (sum0);
1499 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1501 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1505 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1506 || ip0->protocol == IP_PROTOCOL_UDP);
1508 udp0 = (void *) (ip0 + 1);
1509 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1511 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1512 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1516 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1518 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1519 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1525 VNET_FEATURE_ARC_INIT (ip4_local) =
1527 .arc_name = "ip4-local",
1528 .start_nodes = VNET_FEATURES ("ip4-local"),
1533 ip4_local_inline (vlib_main_t * vm,
1534 vlib_node_runtime_t * node,
1535 vlib_frame_t * frame, int head_of_feature_arc)
1537 ip4_main_t *im = &ip4_main;
1538 ip_lookup_main_t *lm = &im->lookup_main;
1539 ip_local_next_t next_index;
1540 u32 *from, *to_next, n_left_from, n_left_to_next;
1541 vlib_node_runtime_t *error_node =
1542 vlib_node_get_runtime (vm, ip4_input_node.index);
1543 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1545 from = vlib_frame_vector_args (frame);
1546 n_left_from = frame->n_vectors;
1547 next_index = node->cached_next_index;
1549 if (node->flags & VLIB_NODE_FLAG_TRACE)
1550 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1552 while (n_left_from > 0)
1554 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1556 while (n_left_from >= 4 && n_left_to_next >= 2)
1558 vlib_buffer_t *p0, *p1;
1559 ip4_header_t *ip0, *ip1;
1560 udp_header_t *udp0, *udp1;
1561 ip4_fib_mtrie_t *mtrie0, *mtrie1;
1562 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1563 const dpo_id_t *dpo0, *dpo1;
1564 const load_balance_t *lb0, *lb1;
1565 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1566 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1567 i32 len_diff0, len_diff1;
1568 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1569 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1570 u32 sw_if_index0, sw_if_index1;
1572 pi0 = to_next[0] = from[0];
1573 pi1 = to_next[1] = from[1];
1577 n_left_to_next -= 2;
1579 next0 = next1 = IP_LOCAL_NEXT_DROP;
1581 p0 = vlib_get_buffer (vm, pi0);
1582 p1 = vlib_get_buffer (vm, pi1);
1584 ip0 = vlib_buffer_get_current (p0);
1585 ip1 = vlib_buffer_get_current (p1);
1587 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1588 vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1590 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1591 sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1593 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1594 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1596 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1598 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1599 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1601 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1603 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1604 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1606 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1607 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1609 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1610 leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1612 /* Treat IP frag packets as "experimental" protocol for now
1613 until support of IP frag reassembly is implemented */
1614 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1615 proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1617 if (head_of_feature_arc == 0)
1619 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1623 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1624 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1625 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1626 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1631 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1632 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1634 udp0 = ip4_next_header (ip0);
1635 udp1 = ip4_next_header (ip1);
1637 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1638 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1639 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1642 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1644 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1646 /* Verify UDP length. */
1647 ip_len0 = clib_net_to_host_u16 (ip0->length);
1648 ip_len1 = clib_net_to_host_u16 (ip1->length);
1649 udp_len0 = clib_net_to_host_u16 (udp0->length);
1650 udp_len1 = clib_net_to_host_u16 (udp1->length);
1652 len_diff0 = ip_len0 - udp_len0;
1653 len_diff1 = ip_len1 - udp_len1;
1655 len_diff0 = is_udp0 ? len_diff0 : 0;
1656 len_diff1 = is_udp1 ? len_diff1 : 0;
1658 if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1659 & good_tcp_udp0 & good_tcp_udp1)))
1664 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1665 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1667 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1668 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1673 && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1674 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1676 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1677 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1681 good_tcp_udp0 &= len_diff0 >= 0;
1682 good_tcp_udp1 &= len_diff1 >= 0;
1685 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1687 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1689 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1691 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1692 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1694 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1695 error0 = (is_tcp_udp0 && !good_tcp_udp0
1696 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1697 error1 = (is_tcp_udp1 && !good_tcp_udp1
1698 ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1701 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1703 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1705 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1706 ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1707 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1709 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1710 ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1711 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1713 lb0 = load_balance_get (lbi0);
1714 lb1 = load_balance_get (lbi1);
1715 dpo0 = load_balance_get_bucket_i (lb0, 0);
1716 dpo1 = load_balance_get_bucket_i (lb1, 0);
1719 * Must have a route to source otherwise we drop the packet.
1720 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1723 * - the source is a recieve => it's from us => bogus, do this
1724 * first since it sets a different error code.
1725 * - uRPF check for any route to source - accept if passes.
1726 * - allow packets destined to the broadcast address from unknown sources
1728 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1729 dpo0->dpoi_type == DPO_RECEIVE) ?
1730 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1731 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1732 !fib_urpf_check_size (lb0->lb_urpf) &&
1733 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1734 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1735 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1736 dpo1->dpoi_type == DPO_RECEIVE) ?
1737 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1738 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1739 !fib_urpf_check_size (lb1->lb_urpf) &&
1740 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1741 ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1745 next0 = lm->local_next_by_ip_protocol[proto0];
1746 next1 = lm->local_next_by_ip_protocol[proto1];
1749 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1751 error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1753 p0->error = error0 ? error_node->errors[error0] : 0;
1754 p1->error = error1 ? error_node->errors[error1] : 0;
1756 if (head_of_feature_arc)
1758 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1759 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1760 if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1761 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1764 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1765 n_left_to_next, pi0, pi1,
1769 while (n_left_from > 0 && n_left_to_next > 0)
1774 ip4_fib_mtrie_t *mtrie0;
1775 ip4_fib_mtrie_leaf_t leaf0;
1776 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1778 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1779 load_balance_t *lb0;
1780 const dpo_id_t *dpo0;
1783 pi0 = to_next[0] = from[0];
1787 n_left_to_next -= 1;
1789 next0 = IP_LOCAL_NEXT_DROP;
1791 p0 = vlib_get_buffer (vm, pi0);
1793 ip0 = vlib_buffer_get_current (p0);
1795 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1797 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1799 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1802 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1803 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1805 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1807 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1809 /* Treat IP frag packets as "experimental" protocol for now
1810 until support of IP frag reassembly is implemented */
1811 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1813 if (head_of_feature_arc == 0)
1815 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1819 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1820 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1824 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1826 udp0 = ip4_next_header (ip0);
1828 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1829 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1832 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1834 /* Verify UDP length. */
1835 ip_len0 = clib_net_to_host_u16 (ip0->length);
1836 udp_len0 = clib_net_to_host_u16 (udp0->length);
1838 len_diff0 = ip_len0 - udp_len0;
1840 len_diff0 = is_udp0 ? len_diff0 : 0;
1842 if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1847 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1848 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1850 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1851 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1855 good_tcp_udp0 &= len_diff0 >= 0;
1858 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1860 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1862 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1864 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1865 error0 = (is_tcp_udp0 && !good_tcp_udp0
1866 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1869 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1871 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1872 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1874 lb0 = load_balance_get (lbi0);
1875 dpo0 = load_balance_get_bucket_i (lb0, 0);
1877 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1878 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1880 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1881 dpo0->dpoi_type == DPO_RECEIVE) ?
1882 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1883 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1884 !fib_urpf_check_size (lb0->lb_urpf) &&
1885 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1886 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1890 next0 = lm->local_next_by_ip_protocol[proto0];
1893 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1895 p0->error = error0 ? error_node->errors[error0] : 0;
1897 if (head_of_feature_arc)
1899 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1900 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1903 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1904 n_left_to_next, pi0, next0);
1908 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1911 return frame->n_vectors;
1915 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1917 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1921 VLIB_REGISTER_NODE (ip4_local_node) =
1923 .function = ip4_local,
1924 .name = "ip4-local",
1925 .vector_size = sizeof (u32),
1926 .format_trace = format_ip4_forward_next_trace,
1927 .n_next_nodes = IP_LOCAL_N_NEXT,
1930 [IP_LOCAL_NEXT_DROP] = "error-drop",
1931 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1932 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1933 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",},
1937 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1940 ip4_local_end_of_arc (vlib_main_t * vm,
1941 vlib_node_runtime_t * node, vlib_frame_t * frame)
1943 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1947 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1948 .function = ip4_local_end_of_arc,
1949 .name = "ip4-local-end-of-arc",
1950 .vector_size = sizeof (u32),
1952 .format_trace = format_ip4_forward_next_trace,
1953 .sibling_of = "ip4-local",
1956 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1958 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1959 .arc_name = "ip4-local",
1960 .node_name = "ip4-local-end-of-arc",
1961 .runs_before = 0, /* not before any other features */
1966 ip4_register_protocol (u32 protocol, u32 node_index)
1968 vlib_main_t *vm = vlib_get_main ();
1969 ip4_main_t *im = &ip4_main;
1970 ip_lookup_main_t *lm = &im->lookup_main;
1972 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1973 lm->local_next_by_ip_protocol[protocol] =
1974 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1977 static clib_error_t *
1978 show_ip_local_command_fn (vlib_main_t * vm,
1979 unformat_input_t * input, vlib_cli_command_t * cmd)
1981 ip4_main_t *im = &ip4_main;
1982 ip_lookup_main_t *lm = &im->lookup_main;
1985 vlib_cli_output (vm, "Protocols handled by ip4_local");
1986 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1988 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1989 vlib_cli_output (vm, "%d", i);
1997 * Display the set of protocols handled by the local IPv4 stack.
2000 * Example of how to display local protocol table:
2001 * @cliexstart{show ip local}
2002 * Protocols handled by ip4_local
2009 VLIB_CLI_COMMAND (show_ip_local, static) =
2011 .path = "show ip local",
2012 .function = show_ip_local_command_fn,
2013 .short_help = "show ip local",
2018 ip4_arp_inline (vlib_main_t * vm,
2019 vlib_node_runtime_t * node,
2020 vlib_frame_t * frame, int is_glean)
2022 vnet_main_t *vnm = vnet_get_main ();
2023 ip4_main_t *im = &ip4_main;
2024 ip_lookup_main_t *lm = &im->lookup_main;
2025 u32 *from, *to_next_drop;
2026 uword n_left_from, n_left_to_next_drop, next_index;
2027 static f64 time_last_seed_change = -1e100;
2028 static u32 hash_seeds[3];
2029 static uword hash_bitmap[256 / BITS (uword)];
2032 if (node->flags & VLIB_NODE_FLAG_TRACE)
2033 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2035 time_now = vlib_time_now (vm);
2036 if (time_now - time_last_seed_change > 1e-3)
2039 u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
2040 sizeof (hash_seeds));
2041 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2042 hash_seeds[i] = r[i];
2044 /* Mark all hash keys as been no-seen before. */
2045 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2048 time_last_seed_change = time_now;
2051 from = vlib_frame_vector_args (frame);
2052 n_left_from = frame->n_vectors;
2053 next_index = node->cached_next_index;
2054 if (next_index == IP4_ARP_NEXT_DROP)
2055 next_index = IP4_ARP_N_NEXT; /* point to first interface */
2057 while (n_left_from > 0)
2059 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2060 to_next_drop, n_left_to_next_drop);
2062 while (n_left_from > 0 && n_left_to_next_drop > 0)
2064 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2065 ip_adjacency_t *adj0;
2072 p0 = vlib_get_buffer (vm, pi0);
2074 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2075 adj0 = ip_get_adjacency (lm, adj_index0);
2076 ip0 = vlib_buffer_get_current (p0);
2082 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2083 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2088 * this is the Glean case, so we are ARPing for the
2089 * packet's destination
2091 a0 ^= ip0->dst_address.data_u32;
2095 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2099 hash_v3_finalize32 (a0, b0, c0);
2101 c0 &= BITS (hash_bitmap) - 1;
2102 c0 = c0 / BITS (uword);
2103 m0 = (uword) 1 << (c0 % BITS (uword));
2105 bm0 = hash_bitmap[c0];
2106 drop0 = (bm0 & m0) != 0;
2108 /* Mark it as seen. */
2109 hash_bitmap[c0] = bm0 | m0;
2113 to_next_drop[0] = pi0;
2115 n_left_to_next_drop -= 1;
2118 node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2119 IP4_ARP_ERROR_REQUEST_SENT];
2122 * the adj has been updated to a rewrite but the node the DPO that got
2123 * us here hasn't - yet. no big deal. we'll drop while we wait.
2125 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2132 * Can happen if the control-plane is programming tables
2133 * with traffic flowing; at least that's today's lame excuse.
2135 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2136 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2138 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2141 /* Send ARP request. */
2145 ethernet_arp_header_t *h0;
2146 vnet_hw_interface_t *hw_if0;
2149 vlib_packet_template_get_packet (vm,
2150 &im->ip4_arp_request_packet_template,
2153 /* Add rewrite/encap string for ARP packet. */
2154 vnet_rewrite_one_header (adj0[0], h0,
2155 sizeof (ethernet_header_t));
2157 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2159 /* Src ethernet address in ARP header. */
2160 clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2162 sizeof (h0->ip4_over_ethernet[0].ethernet));
2166 /* The interface's source address is stashed in the Glean Adj */
2167 h0->ip4_over_ethernet[0].ip4 =
2168 adj0->sub_type.glean.receive_addr.ip4;
2170 /* Copy in destination address we are requesting. This is the
2171 * glean case, so it's the packet's destination.*/
2172 h0->ip4_over_ethernet[1].ip4.data_u32 =
2173 ip0->dst_address.data_u32;
2177 /* Src IP address in ARP header. */
2178 if (ip4_src_address_for_packet (lm, sw_if_index0,
2180 ip4_over_ethernet[0].ip4))
2182 /* No source address available */
2184 node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2185 vlib_buffer_free (vm, &bi0, 1);
2189 /* Copy in destination address we are requesting from the
2191 h0->ip4_over_ethernet[1].ip4.data_u32 =
2192 adj0->sub_type.nbr.next_hop.ip4.as_u32;
2195 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2196 b0 = vlib_get_buffer (vm, bi0);
2197 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2199 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2201 vlib_set_next_frame_buffer (vm, node,
2202 adj0->rewrite_header.next_index,
2207 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2210 return frame->n_vectors;
2214 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2216 return (ip4_arp_inline (vm, node, frame, 0));
2220 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2222 return (ip4_arp_inline (vm, node, frame, 1));
2225 static char *ip4_arp_error_strings[] = {
2226 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2227 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2228 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2229 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2230 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2231 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2234 VLIB_REGISTER_NODE (ip4_arp_node) =
2236 .function = ip4_arp,.name = "ip4-arp",.vector_size =
2237 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2238 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2239 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2241 [IP4_ARP_NEXT_DROP] = "error-drop",}
2244 VLIB_REGISTER_NODE (ip4_glean_node) =
2246 .function = ip4_glean,.name = "ip4-glean",.vector_size =
2247 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2248 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2249 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2251 [IP4_ARP_NEXT_DROP] = "error-drop",}
2254 #define foreach_notrace_ip4_arp_error \
2261 arp_notrace_init (vlib_main_t * vm)
2263 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2265 /* don't trace ARP request packets */
2267 vnet_pcap_drop_trace_filter_add_del \
2268 (rt->errors[IP4_ARP_ERROR_##a], \
2270 foreach_notrace_ip4_arp_error;
2275 VLIB_INIT_FUNCTION (arp_notrace_init);
2278 /* Send an ARP request to see if given destination is reachable on given interface. */
2280 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2282 vnet_main_t *vnm = vnet_get_main ();
2283 ip4_main_t *im = &ip4_main;
2284 ethernet_arp_header_t *h;
2286 ip_interface_address_t *ia;
2287 ip_adjacency_t *adj;
2288 vnet_hw_interface_t *hi;
2289 vnet_sw_interface_t *si;
2293 si = vnet_get_sw_interface (vnm, sw_if_index);
2295 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2297 return clib_error_return (0, "%U: interface %U down",
2298 format_ip4_address, dst,
2299 format_vnet_sw_if_index_name, vnm,
2304 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2307 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2308 return clib_error_return
2310 "no matching interface address for destination %U (interface %U)",
2311 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2315 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2318 vlib_packet_template_get_packet (vm,
2319 &im->ip4_arp_request_packet_template,
2322 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2324 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2325 sizeof (h->ip4_over_ethernet[0].ethernet));
2327 h->ip4_over_ethernet[0].ip4 = src[0];
2328 h->ip4_over_ethernet[1].ip4 = dst[0];
2330 b = vlib_get_buffer (vm, bi);
2331 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2332 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2334 /* Add encapsulation string for software interface (e.g. ethernet header). */
2335 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2336 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2339 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2340 u32 *to_next = vlib_frame_vector_args (f);
2343 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2346 return /* no error */ 0;
2351 IP4_REWRITE_NEXT_DROP,
2352 IP4_REWRITE_NEXT_ICMP_ERROR,
2353 } ip4_rewrite_next_t;
2356 ip4_rewrite_inline (vlib_main_t * vm,
2357 vlib_node_runtime_t * node,
2358 vlib_frame_t * frame,
2359 int do_counters, int is_midchain, int is_mcast)
2361 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2362 u32 *from = vlib_frame_vector_args (frame);
2363 u32 n_left_from, n_left_to_next, *to_next, next_index;
2364 vlib_node_runtime_t *error_node =
2365 vlib_node_get_runtime (vm, ip4_input_node.index);
2367 n_left_from = frame->n_vectors;
2368 next_index = node->cached_next_index;
2369 u32 cpu_index = os_get_cpu_number ();
2371 while (n_left_from > 0)
2373 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2375 while (n_left_from >= 4 && n_left_to_next >= 2)
2377 ip_adjacency_t *adj0, *adj1;
2378 vlib_buffer_t *p0, *p1;
2379 ip4_header_t *ip0, *ip1;
2380 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2381 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2382 u32 tx_sw_if_index0, tx_sw_if_index1;
2384 /* Prefetch next iteration. */
2386 vlib_buffer_t *p2, *p3;
2388 p2 = vlib_get_buffer (vm, from[2]);
2389 p3 = vlib_get_buffer (vm, from[3]);
2391 vlib_prefetch_buffer_header (p2, STORE);
2392 vlib_prefetch_buffer_header (p3, STORE);
2394 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2395 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2398 pi0 = to_next[0] = from[0];
2399 pi1 = to_next[1] = from[1];
2404 n_left_to_next -= 2;
2406 p0 = vlib_get_buffer (vm, pi0);
2407 p1 = vlib_get_buffer (vm, pi1);
2409 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2410 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2413 * pre-fetch the per-adjacency counters
2417 vlib_prefetch_combined_counter (&adjacency_counters,
2418 cpu_index, adj_index0);
2419 vlib_prefetch_combined_counter (&adjacency_counters,
2420 cpu_index, adj_index1);
2423 ip0 = vlib_buffer_get_current (p0);
2424 ip1 = vlib_buffer_get_current (p1);
2426 error0 = error1 = IP4_ERROR_NONE;
2427 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2429 /* Decrement TTL & update checksum.
2430 Works either endian, so no need for byte swap. */
2431 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2433 i32 ttl0 = ip0->ttl;
2435 /* Input node should have reject packets with ttl 0. */
2436 ASSERT (ip0->ttl > 0);
2438 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2439 checksum0 += checksum0 >= 0xffff;
2441 ip0->checksum = checksum0;
2446 * If the ttl drops below 1 when forwarding, generate
2449 if (PREDICT_FALSE (ttl0 <= 0))
2451 error0 = IP4_ERROR_TIME_EXPIRED;
2452 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2453 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2454 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2456 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2459 /* Verify checksum. */
2460 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2464 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2466 if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2468 i32 ttl1 = ip1->ttl;
2470 /* Input node should have reject packets with ttl 0. */
2471 ASSERT (ip1->ttl > 0);
2473 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2474 checksum1 += checksum1 >= 0xffff;
2476 ip1->checksum = checksum1;
2481 * If the ttl drops below 1 when forwarding, generate
2484 if (PREDICT_FALSE (ttl1 <= 0))
2486 error1 = IP4_ERROR_TIME_EXPIRED;
2487 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2488 icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2489 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2491 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2494 /* Verify checksum. */
2495 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2496 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2500 p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2503 /* Rewrite packet header and updates lengths. */
2504 adj0 = ip_get_adjacency (lm, adj_index0);
2505 adj1 = ip_get_adjacency (lm, adj_index1);
2507 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2508 rw_len0 = adj0[0].rewrite_header.data_bytes;
2509 rw_len1 = adj1[0].rewrite_header.data_bytes;
2510 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2511 vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2513 /* Check MTU of outgoing interface. */
2515 (vlib_buffer_length_in_chain (vm, p0) >
2517 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2520 (vlib_buffer_length_in_chain (vm, p1) >
2522 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2525 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2526 * to see the IP headerr */
2527 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2529 next0 = adj0[0].rewrite_header.next_index;
2530 p0->current_data -= rw_len0;
2531 p0->current_length += rw_len0;
2532 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2533 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2536 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2537 vnet_feature_arc_start (lm->output_feature_arc_index,
2538 tx_sw_if_index0, &next0, p0);
2540 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2542 next1 = adj1[0].rewrite_header.next_index;
2543 p1->current_data -= rw_len1;
2544 p1->current_length += rw_len1;
2546 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2547 vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2550 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2551 vnet_feature_arc_start (lm->output_feature_arc_index,
2552 tx_sw_if_index1, &next1, p1);
2555 /* Guess we are only writing on simple Ethernet header. */
2556 vnet_rewrite_two_headers (adj0[0], adj1[0],
2557 ip0, ip1, sizeof (ethernet_header_t));
2560 * Bump the per-adjacency counters
2564 vlib_increment_combined_counter
2565 (&adjacency_counters,
2568 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2570 vlib_increment_combined_counter
2571 (&adjacency_counters,
2574 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2579 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2580 adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2585 * copy bytes from the IP address into the MAC rewrite
2587 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2588 vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2591 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2592 to_next, n_left_to_next,
2593 pi0, pi1, next0, next1);
2596 while (n_left_from > 0 && n_left_to_next > 0)
2598 ip_adjacency_t *adj0;
2601 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2602 u32 tx_sw_if_index0;
2604 pi0 = to_next[0] = from[0];
2606 p0 = vlib_get_buffer (vm, pi0);
2608 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2610 adj0 = ip_get_adjacency (lm, adj_index0);
2612 ip0 = vlib_buffer_get_current (p0);
2614 error0 = IP4_ERROR_NONE;
2615 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2617 /* Decrement TTL & update checksum. */
2618 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2620 i32 ttl0 = ip0->ttl;
2622 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2624 checksum0 += checksum0 >= 0xffff;
2626 ip0->checksum = checksum0;
2628 ASSERT (ip0->ttl > 0);
2634 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2636 if (PREDICT_FALSE (ttl0 <= 0))
2639 * If the ttl drops below 1 when forwarding, generate
2642 error0 = IP4_ERROR_TIME_EXPIRED;
2643 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2644 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2645 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2646 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2652 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2656 vlib_prefetch_combined_counter (&adjacency_counters,
2657 cpu_index, adj_index0);
2659 /* Guess we are only writing on simple Ethernet header. */
2660 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2664 * copy bytes from the IP address into the MAC rewrite
2666 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2669 /* Update packet buffer attributes/set output interface. */
2670 rw_len0 = adj0[0].rewrite_header.data_bytes;
2671 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2674 vlib_increment_combined_counter
2675 (&adjacency_counters,
2676 cpu_index, adj_index0, 1,
2677 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2679 /* Check MTU of outgoing interface. */
2680 error0 = (vlib_buffer_length_in_chain (vm, p0)
2681 > adj0[0].rewrite_header.max_l3_packet_bytes
2682 ? IP4_ERROR_MTU_EXCEEDED : error0);
2684 p0->error = error_node->errors[error0];
2686 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2687 * to see the IP headerr */
2688 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2690 p0->current_data -= rw_len0;
2691 p0->current_length += rw_len0;
2692 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2694 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2695 next0 = adj0[0].rewrite_header.next_index;
2699 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2703 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2704 vnet_feature_arc_start (lm->output_feature_arc_index,
2705 tx_sw_if_index0, &next0, p0);
2712 n_left_to_next -= 1;
2714 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2715 to_next, n_left_to_next,
2719 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2722 /* Need to do trace after rewrites to pick up new packet data. */
2723 if (node->flags & VLIB_NODE_FLAG_TRACE)
2724 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2726 return frame->n_vectors;
2730 /** @brief IPv4 rewrite node.
2733 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2734 header checksum, fetch the ip adjacency, check the outbound mtu,
2735 apply the adjacency rewrite, and send pkts to the adjacency
2736 rewrite header's rewrite_next_index.
2738 @param vm vlib_main_t corresponding to the current thread
2739 @param node vlib_node_runtime_t
2740 @param frame vlib_frame_t whose contents should be dispatched
2742 @par Graph mechanics: buffer metadata, next index usage
2745 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2746 - the rewrite adjacency index
2747 - <code>adj->lookup_next_index</code>
2748 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2749 the packet will be dropped.
2750 - <code>adj->rewrite_header</code>
2751 - Rewrite string length, rewrite string, next_index
2754 - <code>b->current_data, b->current_length</code>
2755 - Updated net of applying the rewrite string
2757 <em>Next Indices:</em>
2758 - <code> adj->rewrite_header.next_index </code>
2762 ip4_rewrite (vlib_main_t * vm,
2763 vlib_node_runtime_t * node, vlib_frame_t * frame)
2765 if (adj_are_counters_enabled ())
2766 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2768 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2772 ip4_midchain (vlib_main_t * vm,
2773 vlib_node_runtime_t * node, vlib_frame_t * frame)
2775 if (adj_are_counters_enabled ())
2776 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2778 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2782 ip4_rewrite_mcast (vlib_main_t * vm,
2783 vlib_node_runtime_t * node, vlib_frame_t * frame)
2785 if (adj_are_counters_enabled ())
2786 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2788 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2792 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2793 .function = ip4_rewrite,
2794 .name = "ip4-rewrite",
2795 .vector_size = sizeof (u32),
2797 .format_trace = format_ip4_rewrite_trace,
2801 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2802 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2805 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2807 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2808 .function = ip4_rewrite_mcast,
2809 .name = "ip4-rewrite-mcast",
2810 .vector_size = sizeof (u32),
2812 .format_trace = format_ip4_rewrite_trace,
2813 .sibling_of = "ip4-rewrite",
2815 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2817 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2818 .function = ip4_midchain,
2819 .name = "ip4-midchain",
2820 .vector_size = sizeof (u32),
2821 .format_trace = format_ip4_forward_next_trace,
2822 .sibling_of = "ip4-rewrite",
2824 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2827 static clib_error_t *
2828 add_del_interface_table (vlib_main_t * vm,
2829 unformat_input_t * input, vlib_cli_command_t * cmd)
2831 vnet_main_t *vnm = vnet_get_main ();
2832 ip_interface_address_t *ia;
2833 clib_error_t *error = 0;
2834 u32 sw_if_index, table_id;
2838 if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2840 error = clib_error_return (0, "unknown interface `%U'",
2841 format_unformat_error, input);
2845 if (unformat (input, "%d", &table_id))
2849 error = clib_error_return (0, "expected table id `%U'",
2850 format_unformat_error, input);
2855 * If the interface already has in IP address, then a change int
2856 * VRF is not allowed. The IP address applied must first be removed.
2857 * We do not do that automatically here, since VPP has no knowledge
2858 * of whether thoses subnets are valid in the destination VRF.
2861 foreach_ip_interface_address (&ip4_main.lookup_main,
2863 1 /* honor unnumbered */,
2867 a = ip_interface_address_get_address (&ip4_main.lookup_main, ia);
2868 error = clib_error_return (0, "interface %U has address %U",
2869 format_vnet_sw_if_index_name, vnm,
2871 format_ip4_address, a);
2877 ip4_main_t *im = &ip4_main;
2880 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2882 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2883 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2885 fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2886 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
2887 im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
2895 * Place the indicated interface into the supplied IPv4 FIB table (also known
2896 * as a VRF). If the FIB table does not exist, this command creates it. To
2897 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2898 * FIB table will only be displayed if a route has been added to the table, or
2899 * an IP Address is assigned to an interface in the table (which adds a route
2902 * @note IP addresses added after setting the interface IP table are added to
2903 * the indicated FIB table. If an IP address is added prior to changing the
2904 * table then this is an error. The control plane must remove these addresses
2905 * first and then change the table. VPP will not automatically move the
2906 * addresses from the old to the new table as it does not know the validity
2910 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2911 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2914 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2916 .path = "set interface ip table",
2917 .function = add_del_interface_table,
2918 .short_help = "set interface ip table <interface> <table-id>",
2923 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2925 ip4_fib_mtrie_t *mtrie0;
2926 ip4_fib_mtrie_leaf_t leaf0;
2929 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2931 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2932 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2933 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2934 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2936 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2938 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2941 static clib_error_t *
2942 test_lookup_command_fn (vlib_main_t * vm,
2943 unformat_input_t * input, vlib_cli_command_t * cmd)
2950 ip4_address_t ip4_base_address;
2953 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2955 if (unformat (input, "table %d", &table_id))
2957 /* Make sure the entry exists. */
2958 fib = ip4_fib_get (table_id);
2959 if ((fib) && (fib->index != table_id))
2960 return clib_error_return (0, "<fib-index> %d does not exist",
2963 else if (unformat (input, "count %f", &count))
2966 else if (unformat (input, "%U",
2967 unformat_ip4_address, &ip4_base_address))
2970 return clib_error_return (0, "unknown input `%U'",
2971 format_unformat_error, input);
2976 for (i = 0; i < n; i++)
2978 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2981 ip4_base_address.as_u32 =
2982 clib_host_to_net_u32 (1 +
2983 clib_net_to_host_u32 (ip4_base_address.as_u32));
2987 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2989 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2995 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2996 * given FIB table to determine if there is a conflict with the
2997 * adjacency table. The fib-id can be determined by using the
2998 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3001 * @todo This command uses fib-id, other commands use table-id (not
3002 * just a name, they are different indexes). Would like to change this
3003 * to table-id for consistency.
3006 * Example of how to run the test lookup command:
3007 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3008 * No errors in 2 lookups
3012 VLIB_CLI_COMMAND (lookup_test_command, static) =
3014 .path = "test lookup",
3015 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3016 .function = test_lookup_command_fn,
3021 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3023 ip4_main_t *im4 = &ip4_main;
3025 uword *p = hash_get (im4->fib_index_by_table_id, table_id);
3028 return VNET_API_ERROR_NO_SUCH_FIB;
3030 fib = ip4_fib_get (p[0]);
3032 fib->flow_hash_config = flow_hash_config;
3036 static clib_error_t *
3037 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3038 unformat_input_t * input,
3039 vlib_cli_command_t * cmd)
3043 u32 flow_hash_config = 0;
3046 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3048 if (unformat (input, "table %d", &table_id))
3051 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3052 foreach_flow_hash_bit
3059 return clib_error_return (0, "unknown input `%U'",
3060 format_unformat_error, input);
3062 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3068 case VNET_API_ERROR_NO_SUCH_FIB:
3069 return clib_error_return (0, "no such FIB table %d", table_id);
3072 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3080 * Configure the set of IPv4 fields used by the flow hash.
3083 * Example of how to set the flow hash on a given table:
3084 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3085 * Example of display the configured flow hash:
3086 * @cliexstart{show ip fib}
3087 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3090 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3091 * [0] [@0]: dpo-drop ip6
3094 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3095 * [0] [@0]: dpo-drop ip6
3098 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3099 * [0] [@0]: dpo-drop ip6
3102 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3103 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3106 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3107 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3108 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3109 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3110 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3113 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3114 * [0] [@0]: dpo-drop ip6
3115 * 255.255.255.255/32
3117 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3118 * [0] [@0]: dpo-drop ip6
3119 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3122 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3123 * [0] [@0]: dpo-drop ip6
3126 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3127 * [0] [@0]: dpo-drop ip6
3130 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3131 * [0] [@4]: ipv4-glean: af_packet0
3134 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3135 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3138 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3139 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3142 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3143 * [0] [@4]: ipv4-glean: af_packet1
3146 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3147 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3150 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3151 * [0] [@0]: dpo-drop ip6
3154 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3155 * [0] [@0]: dpo-drop ip6
3156 * 255.255.255.255/32
3158 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3159 * [0] [@0]: dpo-drop ip6
3163 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3165 .path = "set ip flow-hash",
3167 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3168 .function = set_ip_flow_hash_command_fn,
3173 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3176 vnet_main_t *vnm = vnet_get_main ();
3177 vnet_interface_main_t *im = &vnm->interface_main;
3178 ip4_main_t *ipm = &ip4_main;
3179 ip_lookup_main_t *lm = &ipm->lookup_main;
3180 vnet_classify_main_t *cm = &vnet_classify_main;
3181 ip4_address_t *if_addr;
3183 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3184 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3186 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3187 return VNET_API_ERROR_NO_SUCH_ENTRY;
3189 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3190 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3192 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3194 if (NULL != if_addr)
3196 fib_prefix_t pfx = {
3198 .fp_proto = FIB_PROTOCOL_IP4,
3199 .fp_addr.ip4 = *if_addr,
3203 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3207 if (table_index != (u32) ~ 0)
3209 dpo_id_t dpo = DPO_INVALID;
3214 classify_dpo_create (DPO_PROTO_IP4, table_index));
3216 fib_table_entry_special_dpo_add (fib_index,
3218 FIB_SOURCE_CLASSIFY,
3219 FIB_ENTRY_FLAG_NONE, &dpo);
3224 fib_table_entry_special_remove (fib_index,
3225 &pfx, FIB_SOURCE_CLASSIFY);
3232 static clib_error_t *
3233 set_ip_classify_command_fn (vlib_main_t * vm,
3234 unformat_input_t * input,
3235 vlib_cli_command_t * cmd)
3237 u32 table_index = ~0;
3238 int table_index_set = 0;
3239 u32 sw_if_index = ~0;
3242 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3244 if (unformat (input, "table-index %d", &table_index))
3245 table_index_set = 1;
3246 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3247 vnet_get_main (), &sw_if_index))
3253 if (table_index_set == 0)
3254 return clib_error_return (0, "classify table-index must be specified");
3256 if (sw_if_index == ~0)
3257 return clib_error_return (0, "interface / subif must be specified");
3259 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3266 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3267 return clib_error_return (0, "No such interface");
3269 case VNET_API_ERROR_NO_SUCH_ENTRY:
3270 return clib_error_return (0, "No such classifier table");
3276 * Assign a classification table to an interface. The classification
3277 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3278 * commands. Once the table is create, use this command to filter packets
3282 * Example of how to assign a classification table to an interface:
3283 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3286 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3288 .path = "set ip classify",
3290 "set ip classify intfc <interface> table-index <classify-idx>",
3291 .function = set_ip_classify_command_fn,
3296 * fd.io coding-style-patch-verification: ON
3299 * eval: (c-set-style "gnu")