2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/load_balance_map.h>
53 #include <vnet/dpo/classify_dpo.h>
54 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
58 * @brief IPv4 Forwarding.
60 * This file contains the source code for IPv4 forwarding.
64 ip4_lookup_inline (vlib_main_t * vm,
65 vlib_node_runtime_t * node,
67 int lookup_for_responses_to_locally_received_packets)
69 ip4_main_t *im = &ip4_main;
70 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
71 u32 n_left_from, n_left_to_next, *from, *to_next;
72 ip_lookup_next_t next;
73 u32 thread_index = vlib_get_thread_index ();
75 from = vlib_frame_vector_args (frame);
76 n_left_from = frame->n_vectors;
77 next = node->cached_next_index;
79 while (n_left_from > 0)
81 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
83 while (n_left_from >= 8 && n_left_to_next >= 4)
85 vlib_buffer_t *p0, *p1, *p2, *p3;
86 ip4_header_t *ip0, *ip1, *ip2, *ip3;
87 ip_lookup_next_t next0, next1, next2, next3;
88 const load_balance_t *lb0, *lb1, *lb2, *lb3;
89 ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
90 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
91 ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
92 u32 pi0, fib_index0, lb_index0;
93 u32 pi1, fib_index1, lb_index1;
94 u32 pi2, fib_index2, lb_index2;
95 u32 pi3, fib_index3, lb_index3;
96 flow_hash_config_t flow_hash_config0, flow_hash_config1;
97 flow_hash_config_t flow_hash_config2, flow_hash_config3;
98 u32 hash_c0, hash_c1, hash_c2, hash_c3;
99 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
101 /* Prefetch next iteration. */
103 vlib_buffer_t *p4, *p5, *p6, *p7;
105 p4 = vlib_get_buffer (vm, from[4]);
106 p5 = vlib_get_buffer (vm, from[5]);
107 p6 = vlib_get_buffer (vm, from[6]);
108 p7 = vlib_get_buffer (vm, from[7]);
110 vlib_prefetch_buffer_header (p4, LOAD);
111 vlib_prefetch_buffer_header (p5, LOAD);
112 vlib_prefetch_buffer_header (p6, LOAD);
113 vlib_prefetch_buffer_header (p7, LOAD);
115 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
116 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
117 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
118 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
121 pi0 = to_next[0] = from[0];
122 pi1 = to_next[1] = from[1];
123 pi2 = to_next[2] = from[2];
124 pi3 = to_next[3] = from[3];
131 p0 = vlib_get_buffer (vm, pi0);
132 p1 = vlib_get_buffer (vm, pi1);
133 p2 = vlib_get_buffer (vm, pi2);
134 p3 = vlib_get_buffer (vm, pi3);
136 ip0 = vlib_buffer_get_current (p0);
137 ip1 = vlib_buffer_get_current (p1);
138 ip2 = vlib_buffer_get_current (p2);
139 ip3 = vlib_buffer_get_current (p3);
141 dst_addr0 = &ip0->dst_address;
142 dst_addr1 = &ip1->dst_address;
143 dst_addr2 = &ip2->dst_address;
144 dst_addr3 = &ip3->dst_address;
147 vec_elt (im->fib_index_by_sw_if_index,
148 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
150 vec_elt (im->fib_index_by_sw_if_index,
151 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
153 vec_elt (im->fib_index_by_sw_if_index,
154 vnet_buffer (p2)->sw_if_index[VLIB_RX]);
156 vec_elt (im->fib_index_by_sw_if_index,
157 vnet_buffer (p3)->sw_if_index[VLIB_RX]);
159 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
160 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
162 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
163 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
165 (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
166 (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
168 (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
169 (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
172 if (!lookup_for_responses_to_locally_received_packets)
174 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
175 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
176 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
177 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
179 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
180 leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
181 leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
182 leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
185 if (!lookup_for_responses_to_locally_received_packets)
187 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
188 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
189 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
190 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
193 if (!lookup_for_responses_to_locally_received_packets)
195 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
196 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
197 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
198 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
201 if (lookup_for_responses_to_locally_received_packets)
203 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
204 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
205 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
206 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
210 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
211 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
212 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
213 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
216 ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
217 lb0 = load_balance_get (lb_index0);
218 lb1 = load_balance_get (lb_index1);
219 lb2 = load_balance_get (lb_index2);
220 lb3 = load_balance_get (lb_index3);
222 ASSERT (lb0->lb_n_buckets > 0);
223 ASSERT (is_pow2 (lb0->lb_n_buckets));
224 ASSERT (lb1->lb_n_buckets > 0);
225 ASSERT (is_pow2 (lb1->lb_n_buckets));
226 ASSERT (lb2->lb_n_buckets > 0);
227 ASSERT (is_pow2 (lb2->lb_n_buckets));
228 ASSERT (lb3->lb_n_buckets > 0);
229 ASSERT (is_pow2 (lb3->lb_n_buckets));
231 /* Use flow hash to compute multipath adjacency. */
232 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
233 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
234 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
235 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
236 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
238 flow_hash_config0 = lb0->lb_hash_config;
239 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
240 ip4_compute_flow_hash (ip0, flow_hash_config0);
242 load_balance_get_fwd_bucket (lb0,
244 (lb0->lb_n_buckets_minus_1)));
248 dpo0 = load_balance_get_bucket_i (lb0, 0);
250 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
252 flow_hash_config1 = lb1->lb_hash_config;
253 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
254 ip4_compute_flow_hash (ip1, flow_hash_config1);
256 load_balance_get_fwd_bucket (lb1,
258 (lb1->lb_n_buckets_minus_1)));
262 dpo1 = load_balance_get_bucket_i (lb1, 0);
264 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
266 flow_hash_config2 = lb2->lb_hash_config;
267 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
268 ip4_compute_flow_hash (ip2, flow_hash_config2);
270 load_balance_get_fwd_bucket (lb2,
272 (lb2->lb_n_buckets_minus_1)));
276 dpo2 = load_balance_get_bucket_i (lb2, 0);
278 if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
280 flow_hash_config3 = lb3->lb_hash_config;
281 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
282 ip4_compute_flow_hash (ip3, flow_hash_config3);
284 load_balance_get_fwd_bucket (lb3,
286 (lb3->lb_n_buckets_minus_1)));
290 dpo3 = load_balance_get_bucket_i (lb3, 0);
293 next0 = dpo0->dpoi_next_node;
294 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
295 next1 = dpo1->dpoi_next_node;
296 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
297 next2 = dpo2->dpoi_next_node;
298 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
299 next3 = dpo3->dpoi_next_node;
300 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
302 vlib_increment_combined_counter
303 (cm, thread_index, lb_index0, 1,
304 vlib_buffer_length_in_chain (vm, p0));
305 vlib_increment_combined_counter
306 (cm, thread_index, lb_index1, 1,
307 vlib_buffer_length_in_chain (vm, p1));
308 vlib_increment_combined_counter
309 (cm, thread_index, lb_index2, 1,
310 vlib_buffer_length_in_chain (vm, p2));
311 vlib_increment_combined_counter
312 (cm, thread_index, lb_index3, 1,
313 vlib_buffer_length_in_chain (vm, p3));
315 vlib_validate_buffer_enqueue_x4 (vm, node, next,
316 to_next, n_left_to_next,
318 next0, next1, next2, next3);
321 while (n_left_from > 0 && n_left_to_next > 0)
325 ip_lookup_next_t next0;
326 const load_balance_t *lb0;
327 ip4_fib_mtrie_t *mtrie0;
328 ip4_fib_mtrie_leaf_t leaf0;
329 ip4_address_t *dst_addr0;
330 u32 pi0, fib_index0, lbi0;
331 flow_hash_config_t flow_hash_config0;
332 const dpo_id_t *dpo0;
338 p0 = vlib_get_buffer (vm, pi0);
340 ip0 = vlib_buffer_get_current (p0);
342 dst_addr0 = &ip0->dst_address;
345 vec_elt (im->fib_index_by_sw_if_index,
346 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
348 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
349 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
351 if (!lookup_for_responses_to_locally_received_packets)
353 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
355 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
358 if (!lookup_for_responses_to_locally_received_packets)
359 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
361 if (!lookup_for_responses_to_locally_received_packets)
362 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
364 if (lookup_for_responses_to_locally_received_packets)
365 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
368 /* Handle default route. */
369 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
373 lb0 = load_balance_get (lbi0);
375 ASSERT (lb0->lb_n_buckets > 0);
376 ASSERT (is_pow2 (lb0->lb_n_buckets));
378 /* Use flow hash to compute multipath adjacency. */
379 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
380 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
382 flow_hash_config0 = lb0->lb_hash_config;
384 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
385 ip4_compute_flow_hash (ip0, flow_hash_config0);
387 load_balance_get_fwd_bucket (lb0,
389 (lb0->lb_n_buckets_minus_1)));
393 dpo0 = load_balance_get_bucket_i (lb0, 0);
396 next0 = dpo0->dpoi_next_node;
397 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
399 vlib_increment_combined_counter (cm, thread_index, lbi0, 1,
400 vlib_buffer_length_in_chain (vm,
408 if (PREDICT_FALSE (next0 != next))
411 vlib_put_next_frame (vm, node, next, n_left_to_next);
413 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
420 vlib_put_next_frame (vm, node, next, n_left_to_next);
423 if (node->flags & VLIB_NODE_FLAG_TRACE)
424 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
426 return frame->n_vectors;
429 /** @brief IPv4 lookup node.
432 This is the main IPv4 lookup dispatch node.
434 @param vm vlib_main_t corresponding to the current thread
435 @param node vlib_node_runtime_t
436 @param frame vlib_frame_t whose contents should be dispatched
438 @par Graph mechanics: buffer metadata, next index usage
441 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
442 - Indicates the @c sw_if_index value of the interface that the
443 packet was received on.
444 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
445 - When the value is @c ~0 then the node performs a longest prefix
446 match (LPM) for the packet destination address in the FIB attached
447 to the receive interface.
448 - Otherwise perform LPM for the packet destination address in the
449 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
450 value (0, 1, ...) and not a VRF id.
453 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
454 - The lookup result adjacency index.
457 - Dispatches the packet to the node index found in
458 ip_adjacency_t @c adj->lookup_next_index
459 (where @c adj is the lookup result adjacency).
462 ip4_lookup (vlib_main_t * vm,
463 vlib_node_runtime_t * node, vlib_frame_t * frame)
465 return ip4_lookup_inline (vm, node, frame,
466 /* lookup_for_responses_to_locally_received_packets */
471 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
474 VLIB_REGISTER_NODE (ip4_lookup_node) =
476 .function = ip4_lookup,
477 .name = "ip4-lookup",
478 .vector_size = sizeof (u32),
479 .format_trace = format_ip4_lookup_trace,
480 .n_next_nodes = IP_LOOKUP_N_NEXT,
481 .next_nodes = IP4_LOOKUP_NEXT_NODES,
485 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
488 ip4_load_balance (vlib_main_t * vm,
489 vlib_node_runtime_t * node, vlib_frame_t * frame)
491 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
492 u32 n_left_from, n_left_to_next, *from, *to_next;
493 ip_lookup_next_t next;
494 u32 thread_index = vlib_get_thread_index ();
496 from = vlib_frame_vector_args (frame);
497 n_left_from = frame->n_vectors;
498 next = node->cached_next_index;
500 if (node->flags & VLIB_NODE_FLAG_TRACE)
501 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
503 while (n_left_from > 0)
505 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
508 while (n_left_from >= 4 && n_left_to_next >= 2)
510 ip_lookup_next_t next0, next1;
511 const load_balance_t *lb0, *lb1;
512 vlib_buffer_t *p0, *p1;
513 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
514 const ip4_header_t *ip0, *ip1;
515 const dpo_id_t *dpo0, *dpo1;
517 /* Prefetch next iteration. */
519 vlib_buffer_t *p2, *p3;
521 p2 = vlib_get_buffer (vm, from[2]);
522 p3 = vlib_get_buffer (vm, from[3]);
524 vlib_prefetch_buffer_header (p2, STORE);
525 vlib_prefetch_buffer_header (p3, STORE);
527 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
528 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
531 pi0 = to_next[0] = from[0];
532 pi1 = to_next[1] = from[1];
539 p0 = vlib_get_buffer (vm, pi0);
540 p1 = vlib_get_buffer (vm, pi1);
542 ip0 = vlib_buffer_get_current (p0);
543 ip1 = vlib_buffer_get_current (p1);
544 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
545 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
547 lb0 = load_balance_get (lbi0);
548 lb1 = load_balance_get (lbi1);
551 * this node is for via FIBs we can re-use the hash value from the
552 * to node if present.
553 * We don't want to use the same hash value at each level in the recursion
554 * graph as that would lead to polarisation
558 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
560 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
562 hc0 = vnet_buffer (p0)->ip.flow_hash =
563 vnet_buffer (p0)->ip.flow_hash >> 1;
567 hc0 = vnet_buffer (p0)->ip.flow_hash =
568 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
570 dpo0 = load_balance_get_fwd_bucket
571 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
575 dpo0 = load_balance_get_bucket_i (lb0, 0);
577 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
579 if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
581 hc1 = vnet_buffer (p1)->ip.flow_hash =
582 vnet_buffer (p1)->ip.flow_hash >> 1;
586 hc1 = vnet_buffer (p1)->ip.flow_hash =
587 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
589 dpo1 = load_balance_get_fwd_bucket
590 (lb1, (hc1 & (lb1->lb_n_buckets_minus_1)));
594 dpo1 = load_balance_get_bucket_i (lb1, 0);
597 next0 = dpo0->dpoi_next_node;
598 next1 = dpo1->dpoi_next_node;
600 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
601 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
603 vlib_increment_combined_counter
604 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
605 vlib_increment_combined_counter
606 (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
608 vlib_validate_buffer_enqueue_x2 (vm, node, next,
609 to_next, n_left_to_next,
610 pi0, pi1, next0, next1);
613 while (n_left_from > 0 && n_left_to_next > 0)
615 ip_lookup_next_t next0;
616 const load_balance_t *lb0;
619 const ip4_header_t *ip0;
620 const dpo_id_t *dpo0;
629 p0 = vlib_get_buffer (vm, pi0);
631 ip0 = vlib_buffer_get_current (p0);
632 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
634 lb0 = load_balance_get (lbi0);
637 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
639 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
641 hc0 = vnet_buffer (p0)->ip.flow_hash =
642 vnet_buffer (p0)->ip.flow_hash >> 1;
646 hc0 = vnet_buffer (p0)->ip.flow_hash =
647 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
649 dpo0 = load_balance_get_fwd_bucket
650 (lb0, (hc0 & (lb0->lb_n_buckets_minus_1)));
654 dpo0 = load_balance_get_bucket_i (lb0, 0);
657 next0 = dpo0->dpoi_next_node;
658 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
660 vlib_increment_combined_counter
661 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
663 vlib_validate_buffer_enqueue_x1 (vm, node, next,
664 to_next, n_left_to_next,
668 vlib_put_next_frame (vm, node, next, n_left_to_next);
671 return frame->n_vectors;
675 VLIB_REGISTER_NODE (ip4_load_balance_node) =
677 .function = ip4_load_balance,
678 .name = "ip4-load-balance",
679 .vector_size = sizeof (u32),
680 .sibling_of = "ip4-lookup",
682 format_ip4_lookup_trace,
686 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
688 /* get first interface address */
690 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
691 ip_interface_address_t ** result_ia)
693 ip_lookup_main_t *lm = &im->lookup_main;
694 ip_interface_address_t *ia = 0;
695 ip4_address_t *result = 0;
698 foreach_ip_interface_address
699 (lm, ia, sw_if_index,
700 1 /* honor unnumbered */ ,
703 ip_interface_address_get_address (lm, ia);
709 *result_ia = result ? ia : 0;
714 ip4_add_interface_routes (u32 sw_if_index,
715 ip4_main_t * im, u32 fib_index,
716 ip_interface_address_t * a)
718 ip_lookup_main_t *lm = &im->lookup_main;
719 ip4_address_t *address = ip_interface_address_get_address (lm, a);
721 .fp_len = a->address_length,
722 .fp_proto = FIB_PROTOCOL_IP4,
723 .fp_addr.ip4 = *address,
726 if (pfx.fp_len <= 30)
728 /* a /30 or shorter - add a glean for the network address */
729 fib_table_entry_update_one_path (fib_index, &pfx,
730 FIB_SOURCE_INTERFACE,
731 (FIB_ENTRY_FLAG_CONNECTED |
732 FIB_ENTRY_FLAG_ATTACHED),
734 /* No next-hop address */
740 // no out-label stack
742 FIB_ROUTE_PATH_FLAG_NONE);
744 /* Add the two broadcast addresses as drop */
745 fib_prefix_t net_pfx = {
747 .fp_proto = FIB_PROTOCOL_IP4,
748 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
750 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
751 fib_table_entry_special_add(fib_index,
753 FIB_SOURCE_INTERFACE,
754 (FIB_ENTRY_FLAG_DROP |
755 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
756 net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
757 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
758 fib_table_entry_special_add(fib_index,
760 FIB_SOURCE_INTERFACE,
761 (FIB_ENTRY_FLAG_DROP |
762 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
764 else if (pfx.fp_len == 31)
766 u32 mask = clib_host_to_net_u32(1);
767 fib_prefix_t net_pfx = pfx;
770 net_pfx.fp_addr.ip4.as_u32 ^= mask;
772 /* a /31 - add the other end as an attached host */
773 fib_table_entry_update_one_path (fib_index, &net_pfx,
774 FIB_SOURCE_INTERFACE,
775 (FIB_ENTRY_FLAG_ATTACHED),
783 FIB_ROUTE_PATH_FLAG_NONE);
787 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
789 u32 classify_table_index =
790 lm->classify_table_index_by_sw_if_index[sw_if_index];
791 if (classify_table_index != (u32) ~ 0)
793 dpo_id_t dpo = DPO_INVALID;
798 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
800 fib_table_entry_special_dpo_add (fib_index,
803 FIB_ENTRY_FLAG_NONE, &dpo);
808 fib_table_entry_update_one_path (fib_index, &pfx,
809 FIB_SOURCE_INTERFACE,
810 (FIB_ENTRY_FLAG_CONNECTED |
811 FIB_ENTRY_FLAG_LOCAL),
818 FIB_ROUTE_PATH_FLAG_NONE);
822 ip4_del_interface_routes (ip4_main_t * im,
824 ip4_address_t * address, u32 address_length)
827 .fp_len = address_length,
828 .fp_proto = FIB_PROTOCOL_IP4,
829 .fp_addr.ip4 = *address,
832 if (pfx.fp_len <= 30)
834 fib_prefix_t net_pfx = {
836 .fp_proto = FIB_PROTOCOL_IP4,
837 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
839 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
840 fib_table_entry_special_remove(fib_index,
842 FIB_SOURCE_INTERFACE);
843 net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
844 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
845 fib_table_entry_special_remove(fib_index,
847 FIB_SOURCE_INTERFACE);
848 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
850 else if (pfx.fp_len == 31)
852 u32 mask = clib_host_to_net_u32(1);
853 fib_prefix_t net_pfx = pfx;
856 net_pfx.fp_addr.ip4.as_u32 ^= mask;
858 fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
862 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
866 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
868 ip4_main_t *im = &ip4_main;
870 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
873 * enable/disable only on the 1<->0 transition
877 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
882 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
883 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
886 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
890 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
891 sw_if_index, !is_enable, 0, 0);
894 static clib_error_t *
895 ip4_add_del_interface_address_internal (vlib_main_t * vm,
897 ip4_address_t * address,
898 u32 address_length, u32 is_del)
900 vnet_main_t *vnm = vnet_get_main ();
901 ip4_main_t *im = &ip4_main;
902 ip_lookup_main_t *lm = &im->lookup_main;
903 clib_error_t *error = 0;
904 u32 if_address_index, elts_before;
905 ip4_address_fib_t ip4_af, *addr_fib = 0;
907 /* local0 interface doesn't support IP addressing */
908 if (sw_if_index == 0)
911 clib_error_create ("local0 interface doesn't support IP addressing");
914 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
915 ip4_addr_fib_init (&ip4_af, address,
916 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
917 vec_add1 (addr_fib, ip4_af);
920 * there is no support for adj-fib handling in the presence of overlapping
921 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
927 /* When adding an address check that it does not conflict
928 with an existing address. */
929 ip_interface_address_t *ia;
930 foreach_ip_interface_address
931 (&im->lookup_main, ia, sw_if_index,
932 0 /* honor unnumbered */ ,
935 ip_interface_address_get_address
936 (&im->lookup_main, ia);
937 if (ip4_destination_matches_route
938 (im, address, x, ia->address_length) ||
939 ip4_destination_matches_route (im,
945 ("failed to add %U which conflicts with %U for interface %U",
946 format_ip4_address_and_length, address,
948 format_ip4_address_and_length, x,
950 format_vnet_sw_if_index_name, vnm,
956 elts_before = pool_elts (lm->if_address_pool);
958 error = ip_interface_address_add_del
959 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
963 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
966 ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
968 ip4_add_interface_routes (sw_if_index,
969 im, ip4_af.fib_index,
971 (lm->if_address_pool, if_address_index));
973 /* If pool did not grow/shrink: add duplicate address. */
974 if (elts_before != pool_elts (lm->if_address_pool))
976 ip4_add_del_interface_address_callback_t *cb;
977 vec_foreach (cb, im->add_del_interface_address_callbacks)
978 cb->function (im, cb->function_opaque, sw_if_index,
979 address, address_length, if_address_index, is_del);
988 ip4_add_del_interface_address (vlib_main_t * vm,
990 ip4_address_t * address,
991 u32 address_length, u32 is_del)
993 return ip4_add_del_interface_address_internal
994 (vm, sw_if_index, address, address_length, is_del);
997 /* Built-in ip4 unicast rx feature path definition */
999 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
1001 .arc_name = "ip4-unicast",
1002 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1003 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
1006 VNET_FEATURE_INIT (ip4_flow_classify, static) =
1008 .arc_name = "ip4-unicast",
1009 .node_name = "ip4-flow-classify",
1010 .runs_before = VNET_FEATURES ("ip4-inacl"),
1013 VNET_FEATURE_INIT (ip4_inacl, static) =
1015 .arc_name = "ip4-unicast",
1016 .node_name = "ip4-inacl",
1017 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
1020 VNET_FEATURE_INIT (ip4_source_check_1, static) =
1022 .arc_name = "ip4-unicast",
1023 .node_name = "ip4-source-check-via-rx",
1024 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
1027 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1029 .arc_name = "ip4-unicast",
1030 .node_name = "ip4-source-check-via-any",
1031 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1034 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1036 .arc_name = "ip4-unicast",
1037 .node_name = "ip4-source-and-port-range-check-rx",
1038 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1041 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1043 .arc_name = "ip4-unicast",
1044 .node_name = "ip4-policer-classify",
1045 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1048 VNET_FEATURE_INIT (ip4_ipsec, static) =
1050 .arc_name = "ip4-unicast",
1051 .node_name = "ipsec-input-ip4",
1052 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1055 VNET_FEATURE_INIT (ip4_vpath, static) =
1057 .arc_name = "ip4-unicast",
1058 .node_name = "vpath-input-ip4",
1059 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1062 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1064 .arc_name = "ip4-unicast",
1065 .node_name = "ip4-vxlan-bypass",
1066 .runs_before = VNET_FEATURES ("ip4-lookup"),
1069 VNET_FEATURE_INIT (ip4_drop, static) =
1071 .arc_name = "ip4-unicast",
1072 .node_name = "ip4-drop",
1073 .runs_before = VNET_FEATURES ("ip4-lookup"),
1076 VNET_FEATURE_INIT (ip4_lookup, static) =
1078 .arc_name = "ip4-unicast",
1079 .node_name = "ip4-lookup",
1080 .runs_before = 0, /* not before any other features */
1083 /* Built-in ip4 multicast rx feature path definition */
1084 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1086 .arc_name = "ip4-multicast",
1087 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1088 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1091 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1093 .arc_name = "ip4-multicast",
1094 .node_name = "vpath-input-ip4",
1095 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1098 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1100 .arc_name = "ip4-multicast",
1101 .node_name = "ip4-drop",
1102 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1105 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1107 .arc_name = "ip4-multicast",
1108 .node_name = "ip4-mfib-forward-lookup",
1109 .runs_before = 0, /* last feature */
1112 /* Source and port-range check ip4 tx feature path definition */
1113 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1115 .arc_name = "ip4-output",
1116 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"),
1117 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1120 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1122 .arc_name = "ip4-output",
1123 .node_name = "ip4-source-and-port-range-check-tx",
1124 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1127 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1129 .arc_name = "ip4-output",
1130 .node_name = "ipsec-output-ip4",
1131 .runs_before = VNET_FEATURES ("interface-output"),
1134 /* Built-in ip4 tx feature path definition */
1135 VNET_FEATURE_INIT (ip4_interface_output, static) =
1137 .arc_name = "ip4-output",
1138 .node_name = "interface-output",
1139 .runs_before = 0, /* not before any other features */
1143 static clib_error_t *
1144 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1146 ip4_main_t *im = &ip4_main;
1148 /* Fill in lookup tables with default table (0). */
1149 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1150 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1154 ip4_main_t *im4 = &ip4_main;
1155 ip_lookup_main_t *lm4 = &im4->lookup_main;
1156 ip_interface_address_t *ia = 0;
1157 ip4_address_t *address;
1158 vlib_main_t *vm = vlib_get_main ();
1161 foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */,
1163 address = ip_interface_address_get_address (lm4, ia);
1164 ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
1169 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1172 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1175 return /* no error */ 0;
1178 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1180 /* Global IP4 main. */
1181 ip4_main_t ip4_main;
1184 ip4_lookup_init (vlib_main_t * vm)
1186 ip4_main_t *im = &ip4_main;
1187 clib_error_t *error;
1190 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1192 if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init)))
1194 if ((error = vlib_call_init_function (vm, fib_module_init)))
1196 if ((error = vlib_call_init_function (vm, mfib_module_init)))
1199 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1204 m = pow2_mask (i) << (32 - i);
1207 im->fib_masks[i] = clib_host_to_net_u32 (m);
1210 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1212 /* Create FIB with index 0 and table id of 0. */
1213 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1214 FIB_SOURCE_DEFAULT_ROUTE);
1215 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0,
1216 MFIB_SOURCE_DEFAULT_ROUTE);
1220 pn = pg_get_node (ip4_lookup_node.index);
1221 pn->unformat_edit = unformat_pg_ip4_header;
1225 ethernet_arp_header_t h;
1227 memset (&h, 0, sizeof (h));
1229 /* Set target ethernet address to all zeros. */
1230 memset (h.ip4_over_ethernet[1].ethernet, 0,
1231 sizeof (h.ip4_over_ethernet[1].ethernet));
1233 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1234 #define _8(f,v) h.f = v;
1235 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1236 _16 (l3_type, ETHERNET_TYPE_IP4);
1237 _8 (n_l2_address_bytes, 6);
1238 _8 (n_l3_address_bytes, 4);
1239 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1243 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1246 /* alloc chunk size */ 8,
1253 VLIB_INIT_FUNCTION (ip4_lookup_init);
1257 /* Adjacency taken. */
1262 /* Packet data, possibly *after* rewrite. */
1263 u8 packet_data[64 - 1 * sizeof (u32)];
1265 ip4_forward_next_trace_t;
1268 format_ip4_forward_next_trace (u8 * s, va_list * args)
1270 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1271 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1272 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1273 u32 indent = format_get_indent (s);
1274 s = format (s, "%U%U",
1275 format_white_space, indent,
1276 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1281 format_ip4_lookup_trace (u8 * s, va_list * args)
1283 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1284 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1285 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1286 u32 indent = format_get_indent (s);
1288 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1289 t->fib_index, t->dpo_index, t->flow_hash);
1290 s = format (s, "\n%U%U",
1291 format_white_space, indent,
1292 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1297 format_ip4_rewrite_trace (u8 * s, va_list * args)
1299 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1300 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1301 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1302 u32 indent = format_get_indent (s);
1304 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1305 t->fib_index, t->dpo_index, format_ip_adjacency,
1306 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1307 s = format (s, "\n%U%U",
1308 format_white_space, indent,
1309 format_ip_adjacency_packet_data,
1310 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1314 /* Common trace function for all ip4-forward next nodes. */
1316 ip4_forward_next_trace (vlib_main_t * vm,
1317 vlib_node_runtime_t * node,
1318 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1321 ip4_main_t *im = &ip4_main;
1323 n_left = frame->n_vectors;
1324 from = vlib_frame_vector_args (frame);
1329 vlib_buffer_t *b0, *b1;
1330 ip4_forward_next_trace_t *t0, *t1;
1332 /* Prefetch next iteration. */
1333 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1334 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1339 b0 = vlib_get_buffer (vm, bi0);
1340 b1 = vlib_get_buffer (vm, bi1);
1342 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1344 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1345 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1346 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1348 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1349 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1350 vec_elt (im->fib_index_by_sw_if_index,
1351 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1353 clib_memcpy (t0->packet_data,
1354 vlib_buffer_get_current (b0),
1355 sizeof (t0->packet_data));
1357 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1359 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1360 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1361 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1363 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1364 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1365 vec_elt (im->fib_index_by_sw_if_index,
1366 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1367 clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1368 sizeof (t1->packet_data));
1378 ip4_forward_next_trace_t *t0;
1382 b0 = vlib_get_buffer (vm, bi0);
1384 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1386 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1387 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1388 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1390 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1391 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1392 vec_elt (im->fib_index_by_sw_if_index,
1393 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1394 clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1395 sizeof (t0->packet_data));
1402 /* Compute TCP/UDP/ICMP4 checksum in software. */
1404 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1408 u32 ip_header_length, payload_length_host_byte_order;
1409 u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
1411 void *data_this_buffer;
1413 /* Initialize checksum with ip header. */
1414 ip_header_length = ip4_header_bytes (ip0);
1415 payload_length_host_byte_order =
1416 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1418 clib_host_to_net_u32 (payload_length_host_byte_order +
1419 (ip0->protocol << 16));
1421 if (BITS (uword) == 32)
1424 ip_csum_with_carry (sum0,
1425 clib_mem_unaligned (&ip0->src_address, u32));
1427 ip_csum_with_carry (sum0,
1428 clib_mem_unaligned (&ip0->dst_address, u32));
1432 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1434 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1435 data_this_buffer = (void *) ip0 + ip_header_length;
1436 n_ip_bytes_this_buffer =
1437 p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
1438 if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
1440 n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
1441 n_ip_bytes_this_buffer - ip_header_length : 0;
1445 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1446 n_bytes_left -= n_this_buffer;
1447 if (n_bytes_left == 0)
1450 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1451 p0 = vlib_get_buffer (vm, p0->next_buffer);
1452 data_this_buffer = vlib_buffer_get_current (p0);
1453 n_this_buffer = p0->current_length;
1456 sum16 = ~ip_csum_fold (sum0);
1462 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1464 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1468 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1469 || ip0->protocol == IP_PROTOCOL_UDP);
1471 udp0 = (void *) (ip0 + 1);
1472 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1474 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1475 | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
1479 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1481 p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED
1482 | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT));
1488 VNET_FEATURE_ARC_INIT (ip4_local) =
1490 .arc_name = "ip4-local",
1491 .start_nodes = VNET_FEATURES ("ip4-local"),
1496 ip4_local_validate_l4 (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip,
1497 u8 is_udp, u8 * error, u8 * good_tcp_udp)
1500 flags0 = ip4_tcp_udp_validate_checksum (vm, p);
1501 *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
1505 u32 ip_len, udp_len;
1507 udp = ip4_next_header (ip);
1508 /* Verify UDP length. */
1509 ip_len = clib_net_to_host_u16 (ip->length);
1510 udp_len = clib_net_to_host_u16 (udp->length);
1512 len_diff = ip_len - udp_len;
1513 *good_tcp_udp &= len_diff >= 0;
1514 *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error;
1518 #define ip4_local_do_l4_check(is_tcp_udp, flags) \
1519 (is_tcp_udp && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \
1520 || flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \
1521 || flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM))
1524 ip4_local_inline (vlib_main_t * vm,
1525 vlib_node_runtime_t * node,
1526 vlib_frame_t * frame, int head_of_feature_arc)
1528 ip4_main_t *im = &ip4_main;
1529 ip_lookup_main_t *lm = &im->lookup_main;
1530 ip_local_next_t next_index;
1531 u32 *from, *to_next, n_left_from, n_left_to_next;
1532 vlib_node_runtime_t *error_node =
1533 vlib_node_get_runtime (vm, ip4_input_node.index);
1534 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1536 from = vlib_frame_vector_args (frame);
1537 n_left_from = frame->n_vectors;
1538 next_index = node->cached_next_index;
1540 if (node->flags & VLIB_NODE_FLAG_TRACE)
1541 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1543 while (n_left_from > 0)
1545 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1547 while (n_left_from >= 4 && n_left_to_next >= 2)
1549 vlib_buffer_t *p0, *p1;
1550 ip4_header_t *ip0, *ip1;
1551 ip4_fib_mtrie_t *mtrie0, *mtrie1;
1552 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1553 const dpo_id_t *dpo0, *dpo1;
1554 const load_balance_t *lb0, *lb1;
1555 u32 pi0, next0, fib_index0, lbi0;
1556 u32 pi1, next1, fib_index1, lbi1;
1557 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1558 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1559 u32 sw_if_index0, sw_if_index1;
1561 pi0 = to_next[0] = from[0];
1562 pi1 = to_next[1] = from[1];
1566 n_left_to_next -= 2;
1568 next0 = next1 = IP_LOCAL_NEXT_DROP;
1569 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1571 p0 = vlib_get_buffer (vm, pi0);
1572 p1 = vlib_get_buffer (vm, pi1);
1574 ip0 = vlib_buffer_get_current (p0);
1575 ip1 = vlib_buffer_get_current (p1);
1577 vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1578 vnet_buffer (p1)->l3_hdr_offset = p1->current_data;
1580 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1581 sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1583 /* Treat IP frag packets as "experimental" protocol for now
1584 until support of IP frag reassembly is implemented */
1585 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1586 proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1588 if (head_of_feature_arc == 0)
1591 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1592 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1593 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1594 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1597 (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1598 || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1599 || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1600 good_tcp_udp1 = (p1->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1601 || (p1->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1603 VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1605 if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)
1606 || ip4_local_do_l4_check (is_tcp_udp1,
1610 ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1613 ip4_local_validate_l4 (vm, p1, ip1, is_udp1, &error1,
1617 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1618 error0 = (is_tcp_udp0 && !good_tcp_udp0
1619 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1620 error1 = (is_tcp_udp1 && !good_tcp_udp1
1621 ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1623 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1625 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1626 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1628 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1630 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1631 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1633 /* TODO maybe move to lookup? */
1634 vnet_buffer (p0)->ip.fib_index = fib_index0;
1635 vnet_buffer (p1)->ip.fib_index = fib_index1;
1637 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1638 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1640 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1641 leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1642 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1644 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1646 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1648 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address,
1651 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1652 ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1653 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1655 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1656 ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1657 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1659 lb0 = load_balance_get (lbi0);
1660 lb1 = load_balance_get (lbi1);
1661 dpo0 = load_balance_get_bucket_i (lb0, 0);
1662 dpo1 = load_balance_get_bucket_i (lb1, 0);
1665 * Must have a route to source otherwise we drop the packet.
1666 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1669 * - the source is a recieve => it's from us => bogus, do this
1670 * first since it sets a different error code.
1671 * - uRPF check for any route to source - accept if passes.
1672 * - allow packets destined to the broadcast address from unknown sources
1674 if (p0->flags & VNET_BUFFER_F_IS_NATED)
1677 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1678 dpo0->dpoi_type == DPO_RECEIVE) ?
1679 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1680 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1681 !fib_urpf_check_size (lb0->lb_urpf) &&
1682 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1683 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1686 if (p1->flags & VNET_BUFFER_F_IS_NATED)
1689 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1690 dpo1->dpoi_type == DPO_RECEIVE) ?
1691 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1692 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1693 !fib_urpf_check_size (lb1->lb_urpf) &&
1694 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1695 ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1699 next0 = lm->local_next_by_ip_protocol[proto0];
1700 next1 = lm->local_next_by_ip_protocol[proto1];
1703 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1705 error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1707 p0->error = error0 ? error_node->errors[error0] : 0;
1708 p1->error = error1 ? error_node->errors[error1] : 0;
1710 if (head_of_feature_arc)
1712 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1713 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1714 if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1715 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1718 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1719 n_left_to_next, pi0, pi1,
1723 while (n_left_from > 0 && n_left_to_next > 0)
1727 ip4_fib_mtrie_t *mtrie0;
1728 ip4_fib_mtrie_leaf_t leaf0;
1729 u32 pi0, next0, fib_index0, lbi0;
1730 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1731 load_balance_t *lb0;
1732 const dpo_id_t *dpo0;
1735 pi0 = to_next[0] = from[0];
1739 n_left_to_next -= 1;
1741 next0 = IP_LOCAL_NEXT_DROP;
1742 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1744 p0 = vlib_get_buffer (vm, pi0);
1745 ip0 = vlib_buffer_get_current (p0);
1746 vnet_buffer (p0)->l3_hdr_offset = p0->current_data;
1747 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1749 /* Treat IP frag packets as "experimental" protocol for now
1750 until support of IP frag reassembly is implemented */
1751 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1753 if (head_of_feature_arc == 0 || p0->flags & VNET_BUFFER_F_IS_NATED)
1756 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1757 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1760 (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT
1761 || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM
1762 || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0;
1764 if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags)))
1766 ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0,
1770 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1771 error0 = (is_tcp_udp0 && !good_tcp_udp0
1772 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1774 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1776 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1777 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1778 vnet_buffer (p0)->ip.fib_index = fib_index0;
1779 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1780 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1781 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1783 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address,
1785 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1786 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1787 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1789 lb0 = load_balance_get (lbi0);
1790 dpo0 = load_balance_get_bucket_i (lb0, 0);
1792 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1793 dpo0->dpoi_type == DPO_RECEIVE) ?
1794 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1795 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1796 !fib_urpf_check_size (lb0->lb_urpf) &&
1797 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1798 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1801 next0 = lm->local_next_by_ip_protocol[proto0];
1803 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1805 p0->error = error0 ? error_node->errors[error0] : 0;
1807 if (head_of_feature_arc)
1809 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1810 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1813 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1814 n_left_to_next, pi0, next0);
1816 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1819 return frame->n_vectors;
1823 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1825 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1829 VLIB_REGISTER_NODE (ip4_local_node) =
1831 .function = ip4_local,
1832 .name = "ip4-local",
1833 .vector_size = sizeof (u32),
1834 .format_trace = format_ip4_forward_next_trace,
1835 .n_next_nodes = IP_LOCAL_N_NEXT,
1838 [IP_LOCAL_NEXT_DROP] = "ip4-drop",
1839 [IP_LOCAL_NEXT_PUNT] = "ip4-punt",
1840 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1841 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1846 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1849 ip4_local_end_of_arc (vlib_main_t * vm,
1850 vlib_node_runtime_t * node, vlib_frame_t * frame)
1852 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1856 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1857 .function = ip4_local_end_of_arc,
1858 .name = "ip4-local-end-of-arc",
1859 .vector_size = sizeof (u32),
1861 .format_trace = format_ip4_forward_next_trace,
1862 .sibling_of = "ip4-local",
1865 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1867 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1868 .arc_name = "ip4-local",
1869 .node_name = "ip4-local-end-of-arc",
1870 .runs_before = 0, /* not before any other features */
1875 ip4_register_protocol (u32 protocol, u32 node_index)
1877 vlib_main_t *vm = vlib_get_main ();
1878 ip4_main_t *im = &ip4_main;
1879 ip_lookup_main_t *lm = &im->lookup_main;
1881 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1882 lm->local_next_by_ip_protocol[protocol] =
1883 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1886 static clib_error_t *
1887 show_ip_local_command_fn (vlib_main_t * vm,
1888 unformat_input_t * input, vlib_cli_command_t * cmd)
1890 ip4_main_t *im = &ip4_main;
1891 ip_lookup_main_t *lm = &im->lookup_main;
1894 vlib_cli_output (vm, "Protocols handled by ip4_local");
1895 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1897 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1899 u32 node_index = vlib_get_node (vm,
1900 ip4_local_node.index)->
1901 next_nodes[lm->local_next_by_ip_protocol[i]];
1902 vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm,
1912 * Display the set of protocols handled by the local IPv4 stack.
1915 * Example of how to display local protocol table:
1916 * @cliexstart{show ip local}
1917 * Protocols handled by ip4_local
1924 VLIB_CLI_COMMAND (show_ip_local, static) =
1926 .path = "show ip local",
1927 .function = show_ip_local_command_fn,
1928 .short_help = "show ip local",
1933 ip4_arp_inline (vlib_main_t * vm,
1934 vlib_node_runtime_t * node,
1935 vlib_frame_t * frame, int is_glean)
1937 vnet_main_t *vnm = vnet_get_main ();
1938 ip4_main_t *im = &ip4_main;
1939 ip_lookup_main_t *lm = &im->lookup_main;
1940 u32 *from, *to_next_drop;
1941 uword n_left_from, n_left_to_next_drop, next_index;
1942 static f64 time_last_seed_change = -1e100;
1943 static u32 hash_seeds[3];
1944 static uword hash_bitmap[256 / BITS (uword)];
1947 if (node->flags & VLIB_NODE_FLAG_TRACE)
1948 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1950 time_now = vlib_time_now (vm);
1951 if (time_now - time_last_seed_change > 1e-3)
1954 u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1955 sizeof (hash_seeds));
1956 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1957 hash_seeds[i] = r[i];
1959 /* Mark all hash keys as been no-seen before. */
1960 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1963 time_last_seed_change = time_now;
1966 from = vlib_frame_vector_args (frame);
1967 n_left_from = frame->n_vectors;
1968 next_index = node->cached_next_index;
1969 if (next_index == IP4_ARP_NEXT_DROP)
1970 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1972 while (n_left_from > 0)
1974 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1975 to_next_drop, n_left_to_next_drop);
1977 while (n_left_from > 0 && n_left_to_next_drop > 0)
1979 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1980 ip_adjacency_t *adj0;
1987 p0 = vlib_get_buffer (vm, pi0);
1989 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1990 adj0 = adj_get (adj_index0);
1991 ip0 = vlib_buffer_get_current (p0);
1997 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1998 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2003 * this is the Glean case, so we are ARPing for the
2004 * packet's destination
2006 a0 ^= ip0->dst_address.data_u32;
2010 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2014 hash_v3_mix32 (a0, b0, c0);
2015 hash_v3_finalize32 (a0, b0, c0);
2017 c0 &= BITS (hash_bitmap) - 1;
2018 m0 = (uword) 1 << (c0 % BITS (uword));
2019 c0 = c0 / BITS (uword);
2021 bm0 = hash_bitmap[c0];
2022 drop0 = (bm0 & m0) != 0;
2024 /* Mark it as seen. */
2025 hash_bitmap[c0] = bm0 | m0;
2029 to_next_drop[0] = pi0;
2031 n_left_to_next_drop -= 1;
2034 node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2035 IP4_ARP_ERROR_REQUEST_SENT];
2038 * the adj has been updated to a rewrite but the node the DPO that got
2039 * us here hasn't - yet. no big deal. we'll drop while we wait.
2041 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2048 * Can happen if the control-plane is programming tables
2049 * with traffic flowing; at least that's today's lame excuse.
2051 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2052 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2054 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2057 /* Send ARP request. */
2061 ethernet_arp_header_t *h0;
2062 vnet_hw_interface_t *hw_if0;
2065 vlib_packet_template_get_packet (vm,
2066 &im->ip4_arp_request_packet_template,
2069 /* Seems we're out of buffers */
2070 if (PREDICT_FALSE (!h0))
2073 /* Add rewrite/encap string for ARP packet. */
2074 vnet_rewrite_one_header (adj0[0], h0,
2075 sizeof (ethernet_header_t));
2077 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2079 /* Src ethernet address in ARP header. */
2080 clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2082 sizeof (h0->ip4_over_ethernet[0].ethernet));
2086 /* The interface's source address is stashed in the Glean Adj */
2087 h0->ip4_over_ethernet[0].ip4 =
2088 adj0->sub_type.glean.receive_addr.ip4;
2090 /* Copy in destination address we are requesting. This is the
2091 * glean case, so it's the packet's destination.*/
2092 h0->ip4_over_ethernet[1].ip4.data_u32 =
2093 ip0->dst_address.data_u32;
2097 /* Src IP address in ARP header. */
2098 if (ip4_src_address_for_packet (lm, sw_if_index0,
2100 ip4_over_ethernet[0].ip4))
2102 /* No source address available */
2104 node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2105 vlib_buffer_free (vm, &bi0, 1);
2109 /* Copy in destination address we are requesting from the
2111 h0->ip4_over_ethernet[1].ip4.data_u32 =
2112 adj0->sub_type.nbr.next_hop.ip4.as_u32;
2115 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2116 b0 = vlib_get_buffer (vm, bi0);
2117 VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
2118 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2120 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2122 vlib_set_next_frame_buffer (vm, node,
2123 adj0->rewrite_header.next_index,
2128 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2131 return frame->n_vectors;
2135 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2137 return (ip4_arp_inline (vm, node, frame, 0));
2141 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2143 return (ip4_arp_inline (vm, node, frame, 1));
2146 static char *ip4_arp_error_strings[] = {
2147 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2148 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2149 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2150 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2151 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2152 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2156 VLIB_REGISTER_NODE (ip4_arp_node) =
2158 .function = ip4_arp,
2160 .vector_size = sizeof (u32),
2161 .format_trace = format_ip4_forward_next_trace,
2162 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2163 .error_strings = ip4_arp_error_strings,
2164 .n_next_nodes = IP4_ARP_N_NEXT,
2167 [IP4_ARP_NEXT_DROP] = "error-drop",
2171 VLIB_REGISTER_NODE (ip4_glean_node) =
2173 .function = ip4_glean,
2174 .name = "ip4-glean",
2175 .vector_size = sizeof (u32),
2176 .format_trace = format_ip4_forward_next_trace,
2177 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2178 .error_strings = ip4_arp_error_strings,
2179 .n_next_nodes = IP4_ARP_N_NEXT,
2181 [IP4_ARP_NEXT_DROP] = "error-drop",
2186 #define foreach_notrace_ip4_arp_error \
2193 arp_notrace_init (vlib_main_t * vm)
2195 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2197 /* don't trace ARP request packets */
2199 vnet_pcap_drop_trace_filter_add_del \
2200 (rt->errors[IP4_ARP_ERROR_##a], \
2202 foreach_notrace_ip4_arp_error;
2207 VLIB_INIT_FUNCTION (arp_notrace_init);
2210 /* Send an ARP request to see if given destination is reachable on given interface. */
2212 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2214 vnet_main_t *vnm = vnet_get_main ();
2215 ip4_main_t *im = &ip4_main;
2216 ethernet_arp_header_t *h;
2218 ip_interface_address_t *ia;
2219 ip_adjacency_t *adj;
2220 vnet_hw_interface_t *hi;
2221 vnet_sw_interface_t *si;
2226 si = vnet_get_sw_interface (vnm, sw_if_index);
2228 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2230 return clib_error_return (0, "%U: interface %U down",
2231 format_ip4_address, dst,
2232 format_vnet_sw_if_index_name, vnm,
2237 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2240 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2241 return clib_error_return
2243 "no matching interface address for destination %U (interface %U)",
2244 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2248 h = vlib_packet_template_get_packet (vm,
2249 &im->ip4_arp_request_packet_template,
2252 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2253 if (PREDICT_FALSE (!hi->hw_address))
2255 return clib_error_return (0, "%U: interface %U do not support ip probe",
2256 format_ip4_address, dst,
2257 format_vnet_sw_if_index_name, vnm,
2261 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2262 sizeof (h->ip4_over_ethernet[0].ethernet));
2264 h->ip4_over_ethernet[0].ip4 = src[0];
2265 h->ip4_over_ethernet[1].ip4 = dst[0];
2267 b = vlib_get_buffer (vm, bi);
2268 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2269 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2271 ip46_address_t nh = {
2275 ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
2276 VNET_LINK_IP4, &nh, sw_if_index);
2279 /* Peer has been previously resolved, retrieve glean adj instead */
2280 if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE)
2283 ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, sw_if_index, &nh);
2287 /* Add encapsulation string for software interface (e.g. ethernet header). */
2288 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2289 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2292 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2293 u32 *to_next = vlib_frame_vector_args (f);
2296 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2300 return /* no error */ 0;
2305 IP4_REWRITE_NEXT_DROP,
2306 IP4_REWRITE_NEXT_ICMP_ERROR,
2307 } ip4_rewrite_next_t;
2310 ip4_rewrite_inline (vlib_main_t * vm,
2311 vlib_node_runtime_t * node,
2312 vlib_frame_t * frame,
2313 int do_counters, int is_midchain, int is_mcast)
2315 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2316 u32 *from = vlib_frame_vector_args (frame);
2317 u32 n_left_from, n_left_to_next, *to_next, next_index;
2318 vlib_node_runtime_t *error_node =
2319 vlib_node_get_runtime (vm, ip4_input_node.index);
2321 n_left_from = frame->n_vectors;
2322 next_index = node->cached_next_index;
2323 u32 thread_index = vlib_get_thread_index ();
2325 while (n_left_from > 0)
2327 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2329 while (n_left_from >= 4 && n_left_to_next >= 2)
2331 ip_adjacency_t *adj0, *adj1;
2332 vlib_buffer_t *p0, *p1;
2333 ip4_header_t *ip0, *ip1;
2334 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2335 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2336 u32 tx_sw_if_index0, tx_sw_if_index1;
2338 /* Prefetch next iteration. */
2340 vlib_buffer_t *p2, *p3;
2342 p2 = vlib_get_buffer (vm, from[2]);
2343 p3 = vlib_get_buffer (vm, from[3]);
2345 vlib_prefetch_buffer_header (p2, STORE);
2346 vlib_prefetch_buffer_header (p3, STORE);
2348 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2349 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2352 pi0 = to_next[0] = from[0];
2353 pi1 = to_next[1] = from[1];
2358 n_left_to_next -= 2;
2360 p0 = vlib_get_buffer (vm, pi0);
2361 p1 = vlib_get_buffer (vm, pi1);
2363 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2364 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2367 * pre-fetch the per-adjacency counters
2371 vlib_prefetch_combined_counter (&adjacency_counters,
2372 thread_index, adj_index0);
2373 vlib_prefetch_combined_counter (&adjacency_counters,
2374 thread_index, adj_index1);
2377 ip0 = vlib_buffer_get_current (p0);
2378 ip1 = vlib_buffer_get_current (p1);
2380 error0 = error1 = IP4_ERROR_NONE;
2381 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2383 /* Decrement TTL & update checksum.
2384 Works either endian, so no need for byte swap. */
2385 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2387 i32 ttl0 = ip0->ttl;
2389 /* Input node should have reject packets with ttl 0. */
2390 ASSERT (ip0->ttl > 0);
2392 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2393 checksum0 += checksum0 >= 0xffff;
2395 ip0->checksum = checksum0;
2400 * If the ttl drops below 1 when forwarding, generate
2403 if (PREDICT_FALSE (ttl0 <= 0))
2405 error0 = IP4_ERROR_TIME_EXPIRED;
2406 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2407 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2408 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2410 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2413 /* Verify checksum. */
2414 ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2415 (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2419 p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2421 if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2423 i32 ttl1 = ip1->ttl;
2425 /* Input node should have reject packets with ttl 0. */
2426 ASSERT (ip1->ttl > 0);
2428 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2429 checksum1 += checksum1 >= 0xffff;
2431 ip1->checksum = checksum1;
2436 * If the ttl drops below 1 when forwarding, generate
2439 if (PREDICT_FALSE (ttl1 <= 0))
2441 error1 = IP4_ERROR_TIME_EXPIRED;
2442 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2443 icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2444 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2446 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2449 /* Verify checksum. */
2450 ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) ||
2451 (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2455 p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2458 /* Rewrite packet header and updates lengths. */
2459 adj0 = adj_get (adj_index0);
2460 adj1 = adj_get (adj_index1);
2462 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2463 rw_len0 = adj0[0].rewrite_header.data_bytes;
2464 rw_len1 = adj1[0].rewrite_header.data_bytes;
2465 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2466 vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2468 /* Check MTU of outgoing interface. */
2470 (vlib_buffer_length_in_chain (vm, p0) >
2472 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2475 (vlib_buffer_length_in_chain (vm, p1) >
2477 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2482 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2483 vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2484 IP4_ERROR_SAME_INTERFACE : error0);
2485 error1 = ((adj1[0].rewrite_header.sw_if_index ==
2486 vnet_buffer (p1)->sw_if_index[VLIB_RX]) ?
2487 IP4_ERROR_SAME_INTERFACE : error1);
2490 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2491 * to see the IP headerr */
2492 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2494 next0 = adj0[0].rewrite_header.next_index;
2495 p0->current_data -= rw_len0;
2496 p0->current_length += rw_len0;
2497 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2498 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2501 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2502 vnet_feature_arc_start (lm->output_feature_arc_index,
2503 tx_sw_if_index0, &next0, p0);
2505 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2507 next1 = adj1[0].rewrite_header.next_index;
2508 p1->current_data -= rw_len1;
2509 p1->current_length += rw_len1;
2511 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2512 vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2515 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2516 vnet_feature_arc_start (lm->output_feature_arc_index,
2517 tx_sw_if_index1, &next1, p1);
2520 /* Guess we are only writing on simple Ethernet header. */
2521 vnet_rewrite_two_headers (adj0[0], adj1[0],
2522 ip0, ip1, sizeof (ethernet_header_t));
2525 * Bump the per-adjacency counters
2529 vlib_increment_combined_counter
2530 (&adjacency_counters,
2533 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2535 vlib_increment_combined_counter
2536 (&adjacency_counters,
2539 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2544 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2545 adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2550 * copy bytes from the IP address into the MAC rewrite
2552 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2553 vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2556 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2557 to_next, n_left_to_next,
2558 pi0, pi1, next0, next1);
2561 while (n_left_from > 0 && n_left_to_next > 0)
2563 ip_adjacency_t *adj0;
2566 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2567 u32 tx_sw_if_index0;
2569 pi0 = to_next[0] = from[0];
2571 p0 = vlib_get_buffer (vm, pi0);
2573 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2575 adj0 = adj_get (adj_index0);
2577 ip0 = vlib_buffer_get_current (p0);
2579 error0 = IP4_ERROR_NONE;
2580 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2582 /* Decrement TTL & update checksum. */
2583 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)))
2585 i32 ttl0 = ip0->ttl;
2587 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2589 checksum0 += checksum0 >= 0xffff;
2591 ip0->checksum = checksum0;
2593 ASSERT (ip0->ttl > 0);
2599 ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) ||
2600 (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM));
2602 if (PREDICT_FALSE (ttl0 <= 0))
2605 * If the ttl drops below 1 when forwarding, generate
2608 error0 = IP4_ERROR_TIME_EXPIRED;
2609 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2610 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2611 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2612 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2618 p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED;
2622 vlib_prefetch_combined_counter (&adjacency_counters,
2623 thread_index, adj_index0);
2625 /* Guess we are only writing on simple Ethernet header. */
2626 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2630 * copy bytes from the IP address into the MAC rewrite
2632 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2635 /* Update packet buffer attributes/set output interface. */
2636 rw_len0 = adj0[0].rewrite_header.data_bytes;
2637 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2640 vlib_increment_combined_counter
2641 (&adjacency_counters,
2642 thread_index, adj_index0, 1,
2643 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2645 /* Check MTU of outgoing interface. */
2646 error0 = (vlib_buffer_length_in_chain (vm, p0)
2647 > adj0[0].rewrite_header.max_l3_packet_bytes
2648 ? IP4_ERROR_MTU_EXCEEDED : error0);
2651 error0 = ((adj0[0].rewrite_header.sw_if_index ==
2652 vnet_buffer (p0)->sw_if_index[VLIB_RX]) ?
2653 IP4_ERROR_SAME_INTERFACE : error0);
2655 p0->error = error_node->errors[error0];
2657 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2658 * to see the IP headerr */
2659 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2661 p0->current_data -= rw_len0;
2662 p0->current_length += rw_len0;
2663 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2665 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2666 next0 = adj0[0].rewrite_header.next_index;
2670 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2674 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2675 vnet_feature_arc_start (lm->output_feature_arc_index,
2676 tx_sw_if_index0, &next0, p0);
2683 n_left_to_next -= 1;
2685 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2686 to_next, n_left_to_next,
2690 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2693 /* Need to do trace after rewrites to pick up new packet data. */
2694 if (node->flags & VLIB_NODE_FLAG_TRACE)
2695 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2697 return frame->n_vectors;
2701 /** @brief IPv4 rewrite node.
2704 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2705 header checksum, fetch the ip adjacency, check the outbound mtu,
2706 apply the adjacency rewrite, and send pkts to the adjacency
2707 rewrite header's rewrite_next_index.
2709 @param vm vlib_main_t corresponding to the current thread
2710 @param node vlib_node_runtime_t
2711 @param frame vlib_frame_t whose contents should be dispatched
2713 @par Graph mechanics: buffer metadata, next index usage
2716 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2717 - the rewrite adjacency index
2718 - <code>adj->lookup_next_index</code>
2719 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2720 the packet will be dropped.
2721 - <code>adj->rewrite_header</code>
2722 - Rewrite string length, rewrite string, next_index
2725 - <code>b->current_data, b->current_length</code>
2726 - Updated net of applying the rewrite string
2728 <em>Next Indices:</em>
2729 - <code> adj->rewrite_header.next_index </code>
2733 ip4_rewrite (vlib_main_t * vm,
2734 vlib_node_runtime_t * node, vlib_frame_t * frame)
2736 if (adj_are_counters_enabled ())
2737 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2739 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2743 ip4_midchain (vlib_main_t * vm,
2744 vlib_node_runtime_t * node, vlib_frame_t * frame)
2746 if (adj_are_counters_enabled ())
2747 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2749 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2753 ip4_rewrite_mcast (vlib_main_t * vm,
2754 vlib_node_runtime_t * node, vlib_frame_t * frame)
2756 if (adj_are_counters_enabled ())
2757 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2759 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2763 ip4_mcast_midchain (vlib_main_t * vm,
2764 vlib_node_runtime_t * node, vlib_frame_t * frame)
2766 if (adj_are_counters_enabled ())
2767 return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2769 return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2773 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2774 .function = ip4_rewrite,
2775 .name = "ip4-rewrite",
2776 .vector_size = sizeof (u32),
2778 .format_trace = format_ip4_rewrite_trace,
2782 [IP4_REWRITE_NEXT_DROP] = "ip4-drop",
2783 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2786 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2788 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2789 .function = ip4_rewrite_mcast,
2790 .name = "ip4-rewrite-mcast",
2791 .vector_size = sizeof (u32),
2793 .format_trace = format_ip4_rewrite_trace,
2794 .sibling_of = "ip4-rewrite",
2796 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2798 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2799 .function = ip4_mcast_midchain,
2800 .name = "ip4-mcast-midchain",
2801 .vector_size = sizeof (u32),
2803 .format_trace = format_ip4_rewrite_trace,
2804 .sibling_of = "ip4-rewrite",
2806 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2808 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2809 .function = ip4_midchain,
2810 .name = "ip4-midchain",
2811 .vector_size = sizeof (u32),
2812 .format_trace = format_ip4_forward_next_trace,
2813 .sibling_of = "ip4-rewrite",
2815 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2819 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2821 ip4_fib_mtrie_t *mtrie0;
2822 ip4_fib_mtrie_leaf_t leaf0;
2825 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2827 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2828 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2829 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2831 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2833 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2836 static clib_error_t *
2837 test_lookup_command_fn (vlib_main_t * vm,
2838 unformat_input_t * input, vlib_cli_command_t * cmd)
2845 ip4_address_t ip4_base_address;
2848 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2850 if (unformat (input, "table %d", &table_id))
2852 /* Make sure the entry exists. */
2853 fib = ip4_fib_get (table_id);
2854 if ((fib) && (fib->index != table_id))
2855 return clib_error_return (0, "<fib-index> %d does not exist",
2858 else if (unformat (input, "count %f", &count))
2861 else if (unformat (input, "%U",
2862 unformat_ip4_address, &ip4_base_address))
2865 return clib_error_return (0, "unknown input `%U'",
2866 format_unformat_error, input);
2871 for (i = 0; i < n; i++)
2873 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2876 ip4_base_address.as_u32 =
2877 clib_host_to_net_u32 (1 +
2878 clib_net_to_host_u32 (ip4_base_address.as_u32));
2882 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2884 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2890 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2891 * given FIB table to determine if there is a conflict with the
2892 * adjacency table. The fib-id can be determined by using the
2893 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2896 * @todo This command uses fib-id, other commands use table-id (not
2897 * just a name, they are different indexes). Would like to change this
2898 * to table-id for consistency.
2901 * Example of how to run the test lookup command:
2902 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2903 * No errors in 2 lookups
2907 VLIB_CLI_COMMAND (lookup_test_command, static) =
2909 .path = "test lookup",
2910 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2911 .function = test_lookup_command_fn,
2916 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2920 fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
2922 if (~0 == fib_index)
2923 return VNET_API_ERROR_NO_SUCH_FIB;
2925 fib_table_set_flow_hash_config (fib_index, FIB_PROTOCOL_IP4,
2931 static clib_error_t *
2932 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2933 unformat_input_t * input,
2934 vlib_cli_command_t * cmd)
2938 u32 flow_hash_config = 0;
2941 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2943 if (unformat (input, "table %d", &table_id))
2946 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2947 foreach_flow_hash_bit
2954 return clib_error_return (0, "unknown input `%U'",
2955 format_unformat_error, input);
2957 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2963 case VNET_API_ERROR_NO_SUCH_FIB:
2964 return clib_error_return (0, "no such FIB table %d", table_id);
2967 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2975 * Configure the set of IPv4 fields used by the flow hash.
2978 * Example of how to set the flow hash on a given table:
2979 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2980 * Example of display the configured flow hash:
2981 * @cliexstart{show ip fib}
2982 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2985 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2986 * [0] [@0]: dpo-drop ip6
2989 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2990 * [0] [@0]: dpo-drop ip6
2993 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2994 * [0] [@0]: dpo-drop ip6
2997 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2998 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3001 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3002 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3003 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3004 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3005 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3008 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3009 * [0] [@0]: dpo-drop ip6
3010 * 255.255.255.255/32
3012 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3013 * [0] [@0]: dpo-drop ip6
3014 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3017 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3018 * [0] [@0]: dpo-drop ip6
3021 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3022 * [0] [@0]: dpo-drop ip6
3025 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3026 * [0] [@4]: ipv4-glean: af_packet0
3029 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3030 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3033 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3034 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3037 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3038 * [0] [@4]: ipv4-glean: af_packet1
3041 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3042 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3045 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3046 * [0] [@0]: dpo-drop ip6
3049 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3050 * [0] [@0]: dpo-drop ip6
3051 * 255.255.255.255/32
3053 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3054 * [0] [@0]: dpo-drop ip6
3058 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3060 .path = "set ip flow-hash",
3062 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3063 .function = set_ip_flow_hash_command_fn,
3068 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3071 vnet_main_t *vnm = vnet_get_main ();
3072 vnet_interface_main_t *im = &vnm->interface_main;
3073 ip4_main_t *ipm = &ip4_main;
3074 ip_lookup_main_t *lm = &ipm->lookup_main;
3075 vnet_classify_main_t *cm = &vnet_classify_main;
3076 ip4_address_t *if_addr;
3078 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3079 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3081 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3082 return VNET_API_ERROR_NO_SUCH_ENTRY;
3084 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3085 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3087 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3089 if (NULL != if_addr)
3091 fib_prefix_t pfx = {
3093 .fp_proto = FIB_PROTOCOL_IP4,
3094 .fp_addr.ip4 = *if_addr,
3098 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3102 if (table_index != (u32) ~ 0)
3104 dpo_id_t dpo = DPO_INVALID;
3109 classify_dpo_create (DPO_PROTO_IP4, table_index));
3111 fib_table_entry_special_dpo_add (fib_index,
3113 FIB_SOURCE_CLASSIFY,
3114 FIB_ENTRY_FLAG_NONE, &dpo);
3119 fib_table_entry_special_remove (fib_index,
3120 &pfx, FIB_SOURCE_CLASSIFY);
3127 static clib_error_t *
3128 set_ip_classify_command_fn (vlib_main_t * vm,
3129 unformat_input_t * input,
3130 vlib_cli_command_t * cmd)
3132 u32 table_index = ~0;
3133 int table_index_set = 0;
3134 u32 sw_if_index = ~0;
3137 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3139 if (unformat (input, "table-index %d", &table_index))
3140 table_index_set = 1;
3141 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3142 vnet_get_main (), &sw_if_index))
3148 if (table_index_set == 0)
3149 return clib_error_return (0, "classify table-index must be specified");
3151 if (sw_if_index == ~0)
3152 return clib_error_return (0, "interface / subif must be specified");
3154 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3161 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3162 return clib_error_return (0, "No such interface");
3164 case VNET_API_ERROR_NO_SUCH_ENTRY:
3165 return clib_error_return (0, "No such classifier table");
3171 * Assign a classification table to an interface. The classification
3172 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3173 * commands. Once the table is create, use this command to filter packets
3177 * Example of how to assign a classification table to an interface:
3178 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3181 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3183 .path = "set ip classify",
3185 "set ip classify intfc <interface> table-index <classify-idx>",
3186 .function = set_ip_classify_command_fn,
3190 static clib_error_t *
3191 ip4_config (vlib_main_t * vm, unformat_input_t * input)
3193 ip4_main_t *im = &ip4_main;
3196 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3198 if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize))
3201 return clib_error_return (0,
3202 "invalid heap-size parameter `%U'",
3203 format_unformat_error, input);
3206 im->mtrie_heap_size = heapsize;
3211 VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip");
3214 * fd.io coding-style-patch-verification: ON
3217 * eval: (c-set-style "gnu")