2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 * @brief IPv4 Forwarding.
59 * This file contains the source code for IPv4 forwarding.
63 ip4_forward_next_trace (vlib_main_t * vm,
64 vlib_node_runtime_t * node,
66 vlib_rx_or_tx_t which_adj_index);
69 ip4_lookup_inline (vlib_main_t * vm,
70 vlib_node_runtime_t * node,
72 int lookup_for_responses_to_locally_received_packets)
74 ip4_main_t *im = &ip4_main;
75 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
76 u32 n_left_from, n_left_to_next, *from, *to_next;
77 ip_lookup_next_t next;
78 u32 thread_index = vlib_get_thread_index ();
80 from = vlib_frame_vector_args (frame);
81 n_left_from = frame->n_vectors;
82 next = node->cached_next_index;
84 while (n_left_from > 0)
86 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
88 while (n_left_from >= 8 && n_left_to_next >= 4)
90 vlib_buffer_t *p0, *p1, *p2, *p3;
91 ip4_header_t *ip0, *ip1, *ip2, *ip3;
92 __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
93 ip_lookup_next_t next0, next1, next2, next3;
94 const load_balance_t *lb0, *lb1, *lb2, *lb3;
95 ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
96 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97 ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98 u32 pi0, fib_index0, lb_index0;
99 u32 pi1, fib_index1, lb_index1;
100 u32 pi2, fib_index2, lb_index2;
101 u32 pi3, fib_index3, lb_index3;
102 flow_hash_config_t flow_hash_config0, flow_hash_config1;
103 flow_hash_config_t flow_hash_config2, flow_hash_config3;
104 u32 hash_c0, hash_c1, hash_c2, hash_c3;
105 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
107 /* Prefetch next iteration. */
109 vlib_buffer_t *p4, *p5, *p6, *p7;
111 p4 = vlib_get_buffer (vm, from[4]);
112 p5 = vlib_get_buffer (vm, from[5]);
113 p6 = vlib_get_buffer (vm, from[6]);
114 p7 = vlib_get_buffer (vm, from[7]);
116 vlib_prefetch_buffer_header (p4, LOAD);
117 vlib_prefetch_buffer_header (p5, LOAD);
118 vlib_prefetch_buffer_header (p6, LOAD);
119 vlib_prefetch_buffer_header (p7, LOAD);
121 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
122 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
123 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
124 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
127 pi0 = to_next[0] = from[0];
128 pi1 = to_next[1] = from[1];
129 pi2 = to_next[2] = from[2];
130 pi3 = to_next[3] = from[3];
137 p0 = vlib_get_buffer (vm, pi0);
138 p1 = vlib_get_buffer (vm, pi1);
139 p2 = vlib_get_buffer (vm, pi2);
140 p3 = vlib_get_buffer (vm, pi3);
142 ip0 = vlib_buffer_get_current (p0);
143 ip1 = vlib_buffer_get_current (p1);
144 ip2 = vlib_buffer_get_current (p2);
145 ip3 = vlib_buffer_get_current (p3);
147 dst_addr0 = &ip0->dst_address;
148 dst_addr1 = &ip1->dst_address;
149 dst_addr2 = &ip2->dst_address;
150 dst_addr3 = &ip3->dst_address;
153 vec_elt (im->fib_index_by_sw_if_index,
154 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
156 vec_elt (im->fib_index_by_sw_if_index,
157 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
159 vec_elt (im->fib_index_by_sw_if_index,
160 vnet_buffer (p2)->sw_if_index[VLIB_RX]);
162 vec_elt (im->fib_index_by_sw_if_index,
163 vnet_buffer (p3)->sw_if_index[VLIB_RX]);
165 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
166 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
168 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
169 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
171 (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
172 (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
174 (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
175 (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
178 if (!lookup_for_responses_to_locally_received_packets)
180 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
181 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
182 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
183 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
185 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
186 leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
187 leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
188 leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
191 tcp0 = (void *) (ip0 + 1);
192 tcp1 = (void *) (ip1 + 1);
193 tcp2 = (void *) (ip2 + 1);
194 tcp3 = (void *) (ip3 + 1);
196 if (!lookup_for_responses_to_locally_received_packets)
198 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
199 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
200 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
201 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
204 if (!lookup_for_responses_to_locally_received_packets)
206 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
207 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
208 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
209 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
212 if (lookup_for_responses_to_locally_received_packets)
214 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
215 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
216 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
217 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
221 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
222 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
223 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
224 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
227 ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
228 lb0 = load_balance_get (lb_index0);
229 lb1 = load_balance_get (lb_index1);
230 lb2 = load_balance_get (lb_index2);
231 lb3 = load_balance_get (lb_index3);
233 /* Use flow hash to compute multipath adjacency. */
234 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
235 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
236 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
237 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
238 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
240 flow_hash_config0 = lb0->lb_hash_config;
241 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
242 ip4_compute_flow_hash (ip0, flow_hash_config0);
244 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
246 flow_hash_config1 = lb1->lb_hash_config;
247 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
248 ip4_compute_flow_hash (ip1, flow_hash_config1);
250 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
252 flow_hash_config2 = lb2->lb_hash_config;
253 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
254 ip4_compute_flow_hash (ip2, flow_hash_config2);
256 if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
258 flow_hash_config3 = lb3->lb_hash_config;
259 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
260 ip4_compute_flow_hash (ip3, flow_hash_config3);
263 ASSERT (lb0->lb_n_buckets > 0);
264 ASSERT (is_pow2 (lb0->lb_n_buckets));
265 ASSERT (lb1->lb_n_buckets > 0);
266 ASSERT (is_pow2 (lb1->lb_n_buckets));
267 ASSERT (lb2->lb_n_buckets > 0);
268 ASSERT (is_pow2 (lb2->lb_n_buckets));
269 ASSERT (lb3->lb_n_buckets > 0);
270 ASSERT (is_pow2 (lb3->lb_n_buckets));
272 dpo0 = load_balance_get_bucket_i (lb0,
274 (lb0->lb_n_buckets_minus_1)));
275 dpo1 = load_balance_get_bucket_i (lb1,
277 (lb1->lb_n_buckets_minus_1)));
278 dpo2 = load_balance_get_bucket_i (lb2,
280 (lb2->lb_n_buckets_minus_1)));
281 dpo3 = load_balance_get_bucket_i (lb3,
283 (lb3->lb_n_buckets_minus_1)));
285 next0 = dpo0->dpoi_next_node;
286 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
287 next1 = dpo1->dpoi_next_node;
288 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
289 next2 = dpo2->dpoi_next_node;
290 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
291 next3 = dpo3->dpoi_next_node;
292 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
294 vlib_increment_combined_counter
295 (cm, thread_index, lb_index0, 1,
296 vlib_buffer_length_in_chain (vm, p0)
297 + sizeof (ethernet_header_t));
298 vlib_increment_combined_counter
299 (cm, thread_index, lb_index1, 1,
300 vlib_buffer_length_in_chain (vm, p1)
301 + sizeof (ethernet_header_t));
302 vlib_increment_combined_counter
303 (cm, thread_index, lb_index2, 1,
304 vlib_buffer_length_in_chain (vm, p2)
305 + sizeof (ethernet_header_t));
306 vlib_increment_combined_counter
307 (cm, thread_index, lb_index3, 1,
308 vlib_buffer_length_in_chain (vm, p3)
309 + sizeof (ethernet_header_t));
311 vlib_validate_buffer_enqueue_x4 (vm, node, next,
312 to_next, n_left_to_next,
314 next0, next1, next2, next3);
317 while (n_left_from > 0 && n_left_to_next > 0)
321 __attribute__ ((unused)) tcp_header_t *tcp0;
322 ip_lookup_next_t next0;
323 const load_balance_t *lb0;
324 ip4_fib_mtrie_t *mtrie0;
325 ip4_fib_mtrie_leaf_t leaf0;
326 ip4_address_t *dst_addr0;
327 u32 pi0, fib_index0, lbi0;
328 flow_hash_config_t flow_hash_config0;
329 const dpo_id_t *dpo0;
335 p0 = vlib_get_buffer (vm, pi0);
337 ip0 = vlib_buffer_get_current (p0);
339 dst_addr0 = &ip0->dst_address;
342 vec_elt (im->fib_index_by_sw_if_index,
343 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
345 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
346 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
348 if (!lookup_for_responses_to_locally_received_packets)
350 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
352 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
355 tcp0 = (void *) (ip0 + 1);
357 if (!lookup_for_responses_to_locally_received_packets)
358 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
360 if (!lookup_for_responses_to_locally_received_packets)
361 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
363 if (lookup_for_responses_to_locally_received_packets)
364 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
367 /* Handle default route. */
368 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
372 lb0 = load_balance_get (lbi0);
374 /* Use flow hash to compute multipath adjacency. */
375 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
376 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
378 flow_hash_config0 = lb0->lb_hash_config;
380 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
381 ip4_compute_flow_hash (ip0, flow_hash_config0);
384 ASSERT (lb0->lb_n_buckets > 0);
385 ASSERT (is_pow2 (lb0->lb_n_buckets));
387 dpo0 = load_balance_get_bucket_i (lb0,
389 (lb0->lb_n_buckets_minus_1)));
391 next0 = dpo0->dpoi_next_node;
392 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
394 vlib_increment_combined_counter
395 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
402 if (PREDICT_FALSE (next0 != next))
405 vlib_put_next_frame (vm, node, next, n_left_to_next);
407 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
414 vlib_put_next_frame (vm, node, next, n_left_to_next);
417 if (node->flags & VLIB_NODE_FLAG_TRACE)
418 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
420 return frame->n_vectors;
423 /** @brief IPv4 lookup node.
426 This is the main IPv4 lookup dispatch node.
428 @param vm vlib_main_t corresponding to the current thread
429 @param node vlib_node_runtime_t
430 @param frame vlib_frame_t whose contents should be dispatched
432 @par Graph mechanics: buffer metadata, next index usage
435 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
436 - Indicates the @c sw_if_index value of the interface that the
437 packet was received on.
438 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
439 - When the value is @c ~0 then the node performs a longest prefix
440 match (LPM) for the packet destination address in the FIB attached
441 to the receive interface.
442 - Otherwise perform LPM for the packet destination address in the
443 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
444 value (0, 1, ...) and not a VRF id.
447 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
448 - The lookup result adjacency index.
451 - Dispatches the packet to the node index found in
452 ip_adjacency_t @c adj->lookup_next_index
453 (where @c adj is the lookup result adjacency).
456 ip4_lookup (vlib_main_t * vm,
457 vlib_node_runtime_t * node, vlib_frame_t * frame)
459 return ip4_lookup_inline (vm, node, frame,
460 /* lookup_for_responses_to_locally_received_packets */
465 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
467 VLIB_REGISTER_NODE (ip4_lookup_node) =
469 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
470 sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
471 IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
473 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
476 ip4_load_balance (vlib_main_t * vm,
477 vlib_node_runtime_t * node, vlib_frame_t * frame)
479 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
480 u32 n_left_from, n_left_to_next, *from, *to_next;
481 ip_lookup_next_t next;
482 u32 thread_index = vlib_get_thread_index ();
484 from = vlib_frame_vector_args (frame);
485 n_left_from = frame->n_vectors;
486 next = node->cached_next_index;
488 if (node->flags & VLIB_NODE_FLAG_TRACE)
489 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
491 while (n_left_from > 0)
493 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
496 while (n_left_from >= 4 && n_left_to_next >= 2)
498 ip_lookup_next_t next0, next1;
499 const load_balance_t *lb0, *lb1;
500 vlib_buffer_t *p0, *p1;
501 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
502 const ip4_header_t *ip0, *ip1;
503 const dpo_id_t *dpo0, *dpo1;
505 /* Prefetch next iteration. */
507 vlib_buffer_t *p2, *p3;
509 p2 = vlib_get_buffer (vm, from[2]);
510 p3 = vlib_get_buffer (vm, from[3]);
512 vlib_prefetch_buffer_header (p2, STORE);
513 vlib_prefetch_buffer_header (p3, STORE);
515 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
516 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
519 pi0 = to_next[0] = from[0];
520 pi1 = to_next[1] = from[1];
527 p0 = vlib_get_buffer (vm, pi0);
528 p1 = vlib_get_buffer (vm, pi1);
530 ip0 = vlib_buffer_get_current (p0);
531 ip1 = vlib_buffer_get_current (p1);
532 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
533 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
535 lb0 = load_balance_get (lbi0);
536 lb1 = load_balance_get (lbi1);
539 * this node is for via FIBs we can re-use the hash value from the
540 * to node if present.
541 * We don't want to use the same hash value at each level in the recursion
542 * graph as that would lead to polarisation
546 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
548 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
550 hc0 = vnet_buffer (p0)->ip.flow_hash =
551 vnet_buffer (p0)->ip.flow_hash >> 1;
555 hc0 = vnet_buffer (p0)->ip.flow_hash =
556 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
559 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
561 if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
563 hc1 = vnet_buffer (p1)->ip.flow_hash =
564 vnet_buffer (p1)->ip.flow_hash >> 1;
568 hc1 = vnet_buffer (p1)->ip.flow_hash =
569 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
574 load_balance_get_bucket_i (lb0,
575 hc0 & (lb0->lb_n_buckets_minus_1));
577 load_balance_get_bucket_i (lb1,
578 hc1 & (lb1->lb_n_buckets_minus_1));
580 next0 = dpo0->dpoi_next_node;
581 next1 = dpo1->dpoi_next_node;
583 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
584 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
586 vlib_increment_combined_counter
587 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
588 vlib_increment_combined_counter
589 (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
591 vlib_validate_buffer_enqueue_x2 (vm, node, next,
592 to_next, n_left_to_next,
593 pi0, pi1, next0, next1);
596 while (n_left_from > 0 && n_left_to_next > 0)
598 ip_lookup_next_t next0;
599 const load_balance_t *lb0;
602 const ip4_header_t *ip0;
603 const dpo_id_t *dpo0;
612 p0 = vlib_get_buffer (vm, pi0);
614 ip0 = vlib_buffer_get_current (p0);
615 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
617 lb0 = load_balance_get (lbi0);
620 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
622 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
624 hc0 = vnet_buffer (p0)->ip.flow_hash =
625 vnet_buffer (p0)->ip.flow_hash >> 1;
629 hc0 = vnet_buffer (p0)->ip.flow_hash =
630 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
635 load_balance_get_bucket_i (lb0,
636 hc0 & (lb0->lb_n_buckets_minus_1));
638 next0 = dpo0->dpoi_next_node;
639 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
641 vlib_increment_combined_counter
642 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
644 vlib_validate_buffer_enqueue_x1 (vm, node, next,
645 to_next, n_left_to_next,
649 vlib_put_next_frame (vm, node, next, n_left_to_next);
652 return frame->n_vectors;
655 VLIB_REGISTER_NODE (ip4_load_balance_node) =
657 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
658 sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
659 format_ip4_lookup_trace,};
661 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
663 /* get first interface address */
665 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
666 ip_interface_address_t ** result_ia)
668 ip_lookup_main_t *lm = &im->lookup_main;
669 ip_interface_address_t *ia = 0;
670 ip4_address_t *result = 0;
673 foreach_ip_interface_address
674 (lm, ia, sw_if_index,
675 1 /* honor unnumbered */ ,
678 ip_interface_address_get_address (lm, ia);
684 *result_ia = result ? ia : 0;
689 ip4_add_interface_routes (u32 sw_if_index,
690 ip4_main_t * im, u32 fib_index,
691 ip_interface_address_t * a)
693 ip_lookup_main_t *lm = &im->lookup_main;
694 ip4_address_t *address = ip_interface_address_get_address (lm, a);
696 .fp_len = a->address_length,
697 .fp_proto = FIB_PROTOCOL_IP4,
698 .fp_addr.ip4 = *address,
701 a->neighbor_probe_adj_index = ~0;
703 if (pfx.fp_len <= 30)
705 /* a /30 or shorter - add a glean for the network address */
706 fib_node_index_t fei;
708 fei = fib_table_entry_update_one_path (fib_index, &pfx,
709 FIB_SOURCE_INTERFACE,
710 (FIB_ENTRY_FLAG_CONNECTED |
711 FIB_ENTRY_FLAG_ATTACHED),
713 /* No next-hop address */
719 // no out-label stack
721 FIB_ROUTE_PATH_FLAG_NONE);
722 a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
724 /* Add the two broadcast addresses as drop */
725 fib_prefix_t net_pfx = {
727 .fp_proto = FIB_PROTOCOL_IP4,
728 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
730 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
731 fib_table_entry_special_add(fib_index,
733 FIB_SOURCE_INTERFACE,
734 (FIB_ENTRY_FLAG_DROP |
735 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT),
737 net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
738 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
739 fib_table_entry_special_add(fib_index,
741 FIB_SOURCE_INTERFACE,
742 (FIB_ENTRY_FLAG_DROP |
743 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT),
746 else if (pfx.fp_len == 31)
748 u32 mask = clib_host_to_net_u32(1);
749 fib_prefix_t net_pfx = pfx;
752 net_pfx.fp_addr.ip4.as_u32 ^= mask;
754 /* a /31 - add the other end as an attached host */
755 fib_table_entry_update_one_path (fib_index, &net_pfx,
756 FIB_SOURCE_INTERFACE,
757 (FIB_ENTRY_FLAG_ATTACHED),
765 FIB_ROUTE_PATH_FLAG_NONE);
769 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
771 u32 classify_table_index =
772 lm->classify_table_index_by_sw_if_index[sw_if_index];
773 if (classify_table_index != (u32) ~ 0)
775 dpo_id_t dpo = DPO_INVALID;
780 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
782 fib_table_entry_special_dpo_add (fib_index,
785 FIB_ENTRY_FLAG_NONE, &dpo);
790 fib_table_entry_update_one_path (fib_index, &pfx,
791 FIB_SOURCE_INTERFACE,
792 (FIB_ENTRY_FLAG_CONNECTED |
793 FIB_ENTRY_FLAG_LOCAL),
800 FIB_ROUTE_PATH_FLAG_NONE);
804 ip4_del_interface_routes (ip4_main_t * im,
806 ip4_address_t * address, u32 address_length)
809 .fp_len = address_length,
810 .fp_proto = FIB_PROTOCOL_IP4,
811 .fp_addr.ip4 = *address,
814 if (pfx.fp_len <= 30)
816 fib_prefix_t net_pfx = {
818 .fp_proto = FIB_PROTOCOL_IP4,
819 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
821 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
822 fib_table_entry_special_remove(fib_index,
824 FIB_SOURCE_INTERFACE);
825 net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
826 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
827 fib_table_entry_special_remove(fib_index,
829 FIB_SOURCE_INTERFACE);
830 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
832 else if (pfx.fp_len == 31)
834 u32 mask = clib_host_to_net_u32(1);
835 fib_prefix_t net_pfx = pfx;
838 net_pfx.fp_addr.ip4.as_u32 ^= mask;
840 fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
844 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
848 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
850 ip4_main_t *im = &ip4_main;
852 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
855 * enable/disable only on the 1<->0 transition
859 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
864 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
865 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
868 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
872 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
873 sw_if_index, !is_enable, 0, 0);
876 static clib_error_t *
877 ip4_add_del_interface_address_internal (vlib_main_t * vm,
879 ip4_address_t * address,
880 u32 address_length, u32 is_del)
882 vnet_main_t *vnm = vnet_get_main ();
883 ip4_main_t *im = &ip4_main;
884 ip_lookup_main_t *lm = &im->lookup_main;
885 clib_error_t *error = 0;
886 u32 if_address_index, elts_before;
887 ip4_address_fib_t ip4_af, *addr_fib = 0;
889 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
890 ip4_addr_fib_init (&ip4_af, address,
891 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
892 vec_add1 (addr_fib, ip4_af);
895 * there is no support for adj-fib handling in the presence of overlapping
896 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
902 /* When adding an address check that it does not conflict
903 with an existing address. */
904 ip_interface_address_t *ia;
905 foreach_ip_interface_address
906 (&im->lookup_main, ia, sw_if_index,
907 0 /* honor unnumbered */ ,
910 ip_interface_address_get_address
911 (&im->lookup_main, ia);
912 if (ip4_destination_matches_route
913 (im, address, x, ia->address_length) ||
914 ip4_destination_matches_route (im,
920 ("failed to add %U which conflicts with %U for interface %U",
921 format_ip4_address_and_length, address,
923 format_ip4_address_and_length, x,
925 format_vnet_sw_if_index_name, vnm,
931 elts_before = pool_elts (lm->if_address_pool);
933 error = ip_interface_address_add_del
934 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
938 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
941 ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
943 ip4_add_interface_routes (sw_if_index,
944 im, ip4_af.fib_index,
946 (lm->if_address_pool, if_address_index));
948 /* If pool did not grow/shrink: add duplicate address. */
949 if (elts_before != pool_elts (lm->if_address_pool))
951 ip4_add_del_interface_address_callback_t *cb;
952 vec_foreach (cb, im->add_del_interface_address_callbacks)
953 cb->function (im, cb->function_opaque, sw_if_index,
954 address, address_length, if_address_index, is_del);
963 ip4_add_del_interface_address (vlib_main_t * vm,
965 ip4_address_t * address,
966 u32 address_length, u32 is_del)
968 return ip4_add_del_interface_address_internal
969 (vm, sw_if_index, address, address_length, is_del);
972 /* Built-in ip4 unicast rx feature path definition */
974 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
976 .arc_name = "ip4-unicast",
977 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
978 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
981 VNET_FEATURE_INIT (ip4_flow_classify, static) =
983 .arc_name = "ip4-unicast",
984 .node_name = "ip4-flow-classify",
985 .runs_before = VNET_FEATURES ("ip4-inacl"),
988 VNET_FEATURE_INIT (ip4_inacl, static) =
990 .arc_name = "ip4-unicast",
991 .node_name = "ip4-inacl",
992 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
995 VNET_FEATURE_INIT (ip4_source_check_1, static) =
997 .arc_name = "ip4-unicast",
998 .node_name = "ip4-source-check-via-rx",
999 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
1002 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1004 .arc_name = "ip4-unicast",
1005 .node_name = "ip4-source-check-via-any",
1006 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1009 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1011 .arc_name = "ip4-unicast",
1012 .node_name = "ip4-source-and-port-range-check-rx",
1013 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1016 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1018 .arc_name = "ip4-unicast",
1019 .node_name = "ip4-policer-classify",
1020 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1023 VNET_FEATURE_INIT (ip4_ipsec, static) =
1025 .arc_name = "ip4-unicast",
1026 .node_name = "ipsec-input-ip4",
1027 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1030 VNET_FEATURE_INIT (ip4_vpath, static) =
1032 .arc_name = "ip4-unicast",
1033 .node_name = "vpath-input-ip4",
1034 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1037 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1039 .arc_name = "ip4-unicast",
1040 .node_name = "ip4-vxlan-bypass",
1041 .runs_before = VNET_FEATURES ("ip4-lookup"),
1044 VNET_FEATURE_INIT (ip4_drop, static) =
1046 .arc_name = "ip4-unicast",
1047 .node_name = "ip4-drop",
1048 .runs_before = VNET_FEATURES ("ip4-lookup"),
1051 VNET_FEATURE_INIT (ip4_lookup, static) =
1053 .arc_name = "ip4-unicast",
1054 .node_name = "ip4-lookup",
1055 .runs_before = 0, /* not before any other features */
1058 /* Built-in ip4 multicast rx feature path definition */
1059 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1061 .arc_name = "ip4-multicast",
1062 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1063 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1066 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1068 .arc_name = "ip4-multicast",
1069 .node_name = "vpath-input-ip4",
1070 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1073 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1075 .arc_name = "ip4-multicast",
1076 .node_name = "ip4-drop",
1077 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1080 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1082 .arc_name = "ip4-multicast",
1083 .node_name = "ip4-mfib-forward-lookup",
1084 .runs_before = 0, /* last feature */
1087 /* Source and port-range check ip4 tx feature path definition */
1088 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1090 .arc_name = "ip4-output",
1091 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1092 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1095 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1097 .arc_name = "ip4-output",
1098 .node_name = "ip4-source-and-port-range-check-tx",
1099 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1102 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1104 .arc_name = "ip4-output",
1105 .node_name = "ipsec-output-ip4",
1106 .runs_before = VNET_FEATURES ("interface-output"),
1109 /* Built-in ip4 tx feature path definition */
1110 VNET_FEATURE_INIT (ip4_interface_output, static) =
1112 .arc_name = "ip4-output",
1113 .node_name = "interface-output",
1114 .runs_before = 0, /* not before any other features */
1118 static clib_error_t *
1119 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1121 ip4_main_t *im = &ip4_main;
1123 /* Fill in lookup tables with default table (0). */
1124 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1125 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1127 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1130 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1133 return /* no error */ 0;
1136 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1138 /* Global IP4 main. */
1139 ip4_main_t ip4_main;
1142 ip4_lookup_init (vlib_main_t * vm)
1144 ip4_main_t *im = &ip4_main;
1145 clib_error_t *error;
1148 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1151 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1156 m = pow2_mask (i) << (32 - i);
1159 im->fib_masks[i] = clib_host_to_net_u32 (m);
1162 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1164 /* Create FIB with index 0 and table id of 0. */
1165 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1166 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1170 pn = pg_get_node (ip4_lookup_node.index);
1171 pn->unformat_edit = unformat_pg_ip4_header;
1175 ethernet_arp_header_t h;
1177 memset (&h, 0, sizeof (h));
1179 /* Set target ethernet address to all zeros. */
1180 memset (h.ip4_over_ethernet[1].ethernet, 0,
1181 sizeof (h.ip4_over_ethernet[1].ethernet));
1183 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1184 #define _8(f,v) h.f = v;
1185 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1186 _16 (l3_type, ETHERNET_TYPE_IP4);
1187 _8 (n_l2_address_bytes, 6);
1188 _8 (n_l3_address_bytes, 4);
1189 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1193 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1196 /* alloc chunk size */ 8,
1203 VLIB_INIT_FUNCTION (ip4_lookup_init);
1207 /* Adjacency taken. */
1212 /* Packet data, possibly *after* rewrite. */
1213 u8 packet_data[64 - 1 * sizeof (u32)];
1215 ip4_forward_next_trace_t;
1218 format_ip4_forward_next_trace (u8 * s, va_list * args)
1220 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1221 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1222 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1223 uword indent = format_get_indent (s);
1224 s = format (s, "%U%U",
1225 format_white_space, indent,
1226 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1231 format_ip4_lookup_trace (u8 * s, va_list * args)
1233 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1234 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1235 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1236 uword indent = format_get_indent (s);
1238 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1239 t->fib_index, t->dpo_index, t->flow_hash);
1240 s = format (s, "\n%U%U",
1241 format_white_space, indent,
1242 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1247 format_ip4_rewrite_trace (u8 * s, va_list * args)
1249 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1250 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1251 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1252 uword indent = format_get_indent (s);
1254 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1255 t->fib_index, t->dpo_index, format_ip_adjacency,
1256 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1257 s = format (s, "\n%U%U",
1258 format_white_space, indent,
1259 format_ip_adjacency_packet_data,
1260 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1264 /* Common trace function for all ip4-forward next nodes. */
1266 ip4_forward_next_trace (vlib_main_t * vm,
1267 vlib_node_runtime_t * node,
1268 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1271 ip4_main_t *im = &ip4_main;
1273 n_left = frame->n_vectors;
1274 from = vlib_frame_vector_args (frame);
1279 vlib_buffer_t *b0, *b1;
1280 ip4_forward_next_trace_t *t0, *t1;
1282 /* Prefetch next iteration. */
1283 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1284 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1289 b0 = vlib_get_buffer (vm, bi0);
1290 b1 = vlib_get_buffer (vm, bi1);
1292 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1294 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1295 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1296 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1298 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1299 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1300 vec_elt (im->fib_index_by_sw_if_index,
1301 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1303 clib_memcpy (t0->packet_data,
1304 vlib_buffer_get_current (b0),
1305 sizeof (t0->packet_data));
1307 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1309 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1310 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1311 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1313 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1314 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1315 vec_elt (im->fib_index_by_sw_if_index,
1316 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1317 clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1318 sizeof (t1->packet_data));
1328 ip4_forward_next_trace_t *t0;
1332 b0 = vlib_get_buffer (vm, bi0);
1334 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1336 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1337 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1338 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1340 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1341 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1342 vec_elt (im->fib_index_by_sw_if_index,
1343 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1344 clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1345 sizeof (t0->packet_data));
1353 ip4_drop_or_punt (vlib_main_t * vm,
1354 vlib_node_runtime_t * node,
1355 vlib_frame_t * frame, ip4_error_t error_code)
1357 u32 *buffers = vlib_frame_vector_args (frame);
1358 uword n_packets = frame->n_vectors;
1360 vlib_error_drop_buffers (vm, node, buffers,
1364 ip4_input_node.index, error_code);
1366 if (node->flags & VLIB_NODE_FLAG_TRACE)
1367 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1373 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1375 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1379 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1381 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1385 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1387 .function = ip4_drop,.
1389 .vector_size = sizeof (u32),
1390 .format_trace = format_ip4_forward_next_trace,
1397 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1399 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1401 .function = ip4_punt,
1403 .vector_size = sizeof (u32),
1404 .format_trace = format_ip4_forward_next_trace,
1411 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1414 /* Compute TCP/UDP/ICMP4 checksum in software. */
1416 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1420 u32 ip_header_length, payload_length_host_byte_order;
1421 u32 n_this_buffer, n_bytes_left;
1423 void *data_this_buffer;
1425 /* Initialize checksum with ip header. */
1426 ip_header_length = ip4_header_bytes (ip0);
1427 payload_length_host_byte_order =
1428 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1430 clib_host_to_net_u32 (payload_length_host_byte_order +
1431 (ip0->protocol << 16));
1433 if (BITS (uword) == 32)
1436 ip_csum_with_carry (sum0,
1437 clib_mem_unaligned (&ip0->src_address, u32));
1439 ip_csum_with_carry (sum0,
1440 clib_mem_unaligned (&ip0->dst_address, u32));
1444 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1446 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1447 data_this_buffer = (void *) ip0 + ip_header_length;
1448 if (n_this_buffer + ip_header_length > p0->current_length)
1450 p0->current_length >
1451 ip_header_length ? p0->current_length - ip_header_length : 0;
1454 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1455 n_bytes_left -= n_this_buffer;
1456 if (n_bytes_left == 0)
1459 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1460 p0 = vlib_get_buffer (vm, p0->next_buffer);
1461 data_this_buffer = vlib_buffer_get_current (p0);
1462 n_this_buffer = p0->current_length;
1465 sum16 = ~ip_csum_fold (sum0);
1471 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1473 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1477 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1478 || ip0->protocol == IP_PROTOCOL_UDP);
1480 udp0 = (void *) (ip0 + 1);
1481 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1483 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1484 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1488 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1490 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1491 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1497 VNET_FEATURE_ARC_INIT (ip4_local) =
1499 .arc_name = "ip4-local",
1500 .start_nodes = VNET_FEATURES ("ip4-local"),
1505 ip4_local_inline (vlib_main_t * vm,
1506 vlib_node_runtime_t * node,
1507 vlib_frame_t * frame, int head_of_feature_arc)
1509 ip4_main_t *im = &ip4_main;
1510 ip_lookup_main_t *lm = &im->lookup_main;
1511 ip_local_next_t next_index;
1512 u32 *from, *to_next, n_left_from, n_left_to_next;
1513 vlib_node_runtime_t *error_node =
1514 vlib_node_get_runtime (vm, ip4_input_node.index);
1515 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1517 from = vlib_frame_vector_args (frame);
1518 n_left_from = frame->n_vectors;
1519 next_index = node->cached_next_index;
1521 if (node->flags & VLIB_NODE_FLAG_TRACE)
1522 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1524 while (n_left_from > 0)
1526 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1528 while (n_left_from >= 4 && n_left_to_next >= 2)
1530 vlib_buffer_t *p0, *p1;
1531 ip4_header_t *ip0, *ip1;
1532 udp_header_t *udp0, *udp1;
1533 ip4_fib_mtrie_t *mtrie0, *mtrie1;
1534 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1535 const dpo_id_t *dpo0, *dpo1;
1536 const load_balance_t *lb0, *lb1;
1537 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1538 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1539 i32 len_diff0, len_diff1;
1540 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1541 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1542 u32 sw_if_index0, sw_if_index1;
1544 pi0 = to_next[0] = from[0];
1545 pi1 = to_next[1] = from[1];
1549 n_left_to_next -= 2;
1551 next0 = next1 = IP_LOCAL_NEXT_DROP;
1553 p0 = vlib_get_buffer (vm, pi0);
1554 p1 = vlib_get_buffer (vm, pi1);
1556 ip0 = vlib_buffer_get_current (p0);
1557 ip1 = vlib_buffer_get_current (p1);
1559 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1560 vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1562 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1563 sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1565 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1566 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1568 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1570 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1571 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1573 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1575 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1576 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1578 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1579 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1581 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1582 leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1584 /* Treat IP frag packets as "experimental" protocol for now
1585 until support of IP frag reassembly is implemented */
1586 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1587 proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1589 if (head_of_feature_arc == 0)
1591 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1595 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1596 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1597 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1598 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1603 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1604 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1606 udp0 = ip4_next_header (ip0);
1607 udp1 = ip4_next_header (ip1);
1609 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1610 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1611 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1613 /* Verify UDP length. */
1614 ip_len0 = clib_net_to_host_u16 (ip0->length);
1615 ip_len1 = clib_net_to_host_u16 (ip1->length);
1616 udp_len0 = clib_net_to_host_u16 (udp0->length);
1617 udp_len1 = clib_net_to_host_u16 (udp1->length);
1619 len_diff0 = ip_len0 - udp_len0;
1620 len_diff1 = ip_len1 - udp_len1;
1622 len_diff0 = is_udp0 ? len_diff0 : 0;
1623 len_diff1 = is_udp1 ? len_diff1 : 0;
1625 if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1626 & good_tcp_udp0 & good_tcp_udp1)))
1631 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1632 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1634 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1635 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1640 && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1641 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1643 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1644 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1648 good_tcp_udp0 &= len_diff0 >= 0;
1649 good_tcp_udp1 &= len_diff1 >= 0;
1652 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1654 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1656 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1658 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1659 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1661 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1662 error0 = (is_tcp_udp0 && !good_tcp_udp0
1663 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1664 error1 = (is_tcp_udp1 && !good_tcp_udp1
1665 ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1668 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1670 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1672 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1673 ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1674 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1676 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1677 ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1678 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1680 lb0 = load_balance_get (lbi0);
1681 lb1 = load_balance_get (lbi1);
1682 dpo0 = load_balance_get_bucket_i (lb0, 0);
1683 dpo1 = load_balance_get_bucket_i (lb1, 0);
1686 * Must have a route to source otherwise we drop the packet.
1687 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1690 * - the source is a recieve => it's from us => bogus, do this
1691 * first since it sets a different error code.
1692 * - uRPF check for any route to source - accept if passes.
1693 * - allow packets destined to the broadcast address from unknown sources
1695 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1696 dpo0->dpoi_type == DPO_RECEIVE) ?
1697 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1698 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1699 !fib_urpf_check_size (lb0->lb_urpf) &&
1700 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1701 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1702 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1703 dpo1->dpoi_type == DPO_RECEIVE) ?
1704 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1705 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1706 !fib_urpf_check_size (lb1->lb_urpf) &&
1707 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1708 ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1712 next0 = lm->local_next_by_ip_protocol[proto0];
1713 next1 = lm->local_next_by_ip_protocol[proto1];
1716 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1718 error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1720 p0->error = error0 ? error_node->errors[error0] : 0;
1721 p1->error = error1 ? error_node->errors[error1] : 0;
1723 if (head_of_feature_arc)
1725 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1726 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1727 if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1728 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1731 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1732 n_left_to_next, pi0, pi1,
1736 while (n_left_from > 0 && n_left_to_next > 0)
1741 ip4_fib_mtrie_t *mtrie0;
1742 ip4_fib_mtrie_leaf_t leaf0;
1743 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1745 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1746 load_balance_t *lb0;
1747 const dpo_id_t *dpo0;
1750 pi0 = to_next[0] = from[0];
1754 n_left_to_next -= 1;
1756 next0 = IP_LOCAL_NEXT_DROP;
1758 p0 = vlib_get_buffer (vm, pi0);
1760 ip0 = vlib_buffer_get_current (p0);
1762 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1764 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1766 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1769 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1770 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1772 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1774 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1776 /* Treat IP frag packets as "experimental" protocol for now
1777 until support of IP frag reassembly is implemented */
1778 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1780 if (head_of_feature_arc == 0)
1782 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1786 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1787 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1791 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1793 udp0 = ip4_next_header (ip0);
1795 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1796 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1798 /* Verify UDP length. */
1799 ip_len0 = clib_net_to_host_u16 (ip0->length);
1800 udp_len0 = clib_net_to_host_u16 (udp0->length);
1802 len_diff0 = ip_len0 - udp_len0;
1804 len_diff0 = is_udp0 ? len_diff0 : 0;
1806 if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1811 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1812 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1814 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1815 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1819 good_tcp_udp0 &= len_diff0 >= 0;
1822 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1824 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1826 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1828 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1829 error0 = (is_tcp_udp0 && !good_tcp_udp0
1830 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1833 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1835 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1836 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1838 lb0 = load_balance_get (lbi0);
1839 dpo0 = load_balance_get_bucket_i (lb0, 0);
1841 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1842 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1844 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1845 dpo0->dpoi_type == DPO_RECEIVE) ?
1846 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1847 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1848 !fib_urpf_check_size (lb0->lb_urpf) &&
1849 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1850 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1854 next0 = lm->local_next_by_ip_protocol[proto0];
1857 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1859 p0->error = error0 ? error_node->errors[error0] : 0;
1861 if (head_of_feature_arc)
1863 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1864 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1867 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1868 n_left_to_next, pi0, next0);
1872 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1875 return frame->n_vectors;
1879 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1881 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1885 VLIB_REGISTER_NODE (ip4_local_node) =
1887 .function = ip4_local,
1888 .name = "ip4-local",
1889 .vector_size = sizeof (u32),
1890 .format_trace = format_ip4_forward_next_trace,
1891 .n_next_nodes = IP_LOCAL_N_NEXT,
1894 [IP_LOCAL_NEXT_DROP] = "error-drop",
1895 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1896 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1897 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",},
1901 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1904 ip4_local_end_of_arc (vlib_main_t * vm,
1905 vlib_node_runtime_t * node, vlib_frame_t * frame)
1907 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1911 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1912 .function = ip4_local_end_of_arc,
1913 .name = "ip4-local-end-of-arc",
1914 .vector_size = sizeof (u32),
1916 .format_trace = format_ip4_forward_next_trace,
1917 .sibling_of = "ip4-local",
1920 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1922 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1923 .arc_name = "ip4-local",
1924 .node_name = "ip4-local-end-of-arc",
1925 .runs_before = 0, /* not before any other features */
1930 ip4_register_protocol (u32 protocol, u32 node_index)
1932 vlib_main_t *vm = vlib_get_main ();
1933 ip4_main_t *im = &ip4_main;
1934 ip_lookup_main_t *lm = &im->lookup_main;
1936 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1937 lm->local_next_by_ip_protocol[protocol] =
1938 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1941 static clib_error_t *
1942 show_ip_local_command_fn (vlib_main_t * vm,
1943 unformat_input_t * input, vlib_cli_command_t * cmd)
1945 ip4_main_t *im = &ip4_main;
1946 ip_lookup_main_t *lm = &im->lookup_main;
1949 vlib_cli_output (vm, "Protocols handled by ip4_local");
1950 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1952 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1953 vlib_cli_output (vm, "%d", i);
1961 * Display the set of protocols handled by the local IPv4 stack.
1964 * Example of how to display local protocol table:
1965 * @cliexstart{show ip local}
1966 * Protocols handled by ip4_local
1973 VLIB_CLI_COMMAND (show_ip_local, static) =
1975 .path = "show ip local",
1976 .function = show_ip_local_command_fn,
1977 .short_help = "show ip local",
1982 ip4_arp_inline (vlib_main_t * vm,
1983 vlib_node_runtime_t * node,
1984 vlib_frame_t * frame, int is_glean)
1986 vnet_main_t *vnm = vnet_get_main ();
1987 ip4_main_t *im = &ip4_main;
1988 ip_lookup_main_t *lm = &im->lookup_main;
1989 u32 *from, *to_next_drop;
1990 uword n_left_from, n_left_to_next_drop, next_index;
1991 static f64 time_last_seed_change = -1e100;
1992 static u32 hash_seeds[3];
1993 static uword hash_bitmap[256 / BITS (uword)];
1996 if (node->flags & VLIB_NODE_FLAG_TRACE)
1997 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1999 time_now = vlib_time_now (vm);
2000 if (time_now - time_last_seed_change > 1e-3)
2003 u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
2004 sizeof (hash_seeds));
2005 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2006 hash_seeds[i] = r[i];
2008 /* Mark all hash keys as been no-seen before. */
2009 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2012 time_last_seed_change = time_now;
2015 from = vlib_frame_vector_args (frame);
2016 n_left_from = frame->n_vectors;
2017 next_index = node->cached_next_index;
2018 if (next_index == IP4_ARP_NEXT_DROP)
2019 next_index = IP4_ARP_N_NEXT; /* point to first interface */
2021 while (n_left_from > 0)
2023 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2024 to_next_drop, n_left_to_next_drop);
2026 while (n_left_from > 0 && n_left_to_next_drop > 0)
2028 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2029 ip_adjacency_t *adj0;
2036 p0 = vlib_get_buffer (vm, pi0);
2038 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2039 adj0 = ip_get_adjacency (lm, adj_index0);
2040 ip0 = vlib_buffer_get_current (p0);
2046 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2047 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2052 * this is the Glean case, so we are ARPing for the
2053 * packet's destination
2055 a0 ^= ip0->dst_address.data_u32;
2059 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2063 hash_v3_finalize32 (a0, b0, c0);
2065 c0 &= BITS (hash_bitmap) - 1;
2066 c0 = c0 / BITS (uword);
2067 m0 = (uword) 1 << (c0 % BITS (uword));
2069 bm0 = hash_bitmap[c0];
2070 drop0 = (bm0 & m0) != 0;
2072 /* Mark it as seen. */
2073 hash_bitmap[c0] = bm0 | m0;
2077 to_next_drop[0] = pi0;
2079 n_left_to_next_drop -= 1;
2082 node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2083 IP4_ARP_ERROR_REQUEST_SENT];
2086 * the adj has been updated to a rewrite but the node the DPO that got
2087 * us here hasn't - yet. no big deal. we'll drop while we wait.
2089 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2096 * Can happen if the control-plane is programming tables
2097 * with traffic flowing; at least that's today's lame excuse.
2099 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2100 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2102 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2105 /* Send ARP request. */
2109 ethernet_arp_header_t *h0;
2110 vnet_hw_interface_t *hw_if0;
2113 vlib_packet_template_get_packet (vm,
2114 &im->ip4_arp_request_packet_template,
2117 /* Add rewrite/encap string for ARP packet. */
2118 vnet_rewrite_one_header (adj0[0], h0,
2119 sizeof (ethernet_header_t));
2121 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2123 /* Src ethernet address in ARP header. */
2124 clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2126 sizeof (h0->ip4_over_ethernet[0].ethernet));
2130 /* The interface's source address is stashed in the Glean Adj */
2131 h0->ip4_over_ethernet[0].ip4 =
2132 adj0->sub_type.glean.receive_addr.ip4;
2134 /* Copy in destination address we are requesting. This is the
2135 * glean case, so it's the packet's destination.*/
2136 h0->ip4_over_ethernet[1].ip4.data_u32 =
2137 ip0->dst_address.data_u32;
2141 /* Src IP address in ARP header. */
2142 if (ip4_src_address_for_packet (lm, sw_if_index0,
2144 ip4_over_ethernet[0].ip4))
2146 /* No source address available */
2148 node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2149 vlib_buffer_free (vm, &bi0, 1);
2153 /* Copy in destination address we are requesting from the
2155 h0->ip4_over_ethernet[1].ip4.data_u32 =
2156 adj0->sub_type.nbr.next_hop.ip4.as_u32;
2159 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2160 b0 = vlib_get_buffer (vm, bi0);
2161 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2163 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2165 vlib_set_next_frame_buffer (vm, node,
2166 adj0->rewrite_header.next_index,
2171 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2174 return frame->n_vectors;
2178 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2180 return (ip4_arp_inline (vm, node, frame, 0));
2184 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2186 return (ip4_arp_inline (vm, node, frame, 1));
2189 static char *ip4_arp_error_strings[] = {
2190 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2191 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2192 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2193 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2194 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2195 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2198 VLIB_REGISTER_NODE (ip4_arp_node) =
2200 .function = ip4_arp,.name = "ip4-arp",.vector_size =
2201 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2202 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2203 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2205 [IP4_ARP_NEXT_DROP] = "error-drop",}
2208 VLIB_REGISTER_NODE (ip4_glean_node) =
2210 .function = ip4_glean,.name = "ip4-glean",.vector_size =
2211 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2212 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2213 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2215 [IP4_ARP_NEXT_DROP] = "error-drop",}
2218 #define foreach_notrace_ip4_arp_error \
2225 arp_notrace_init (vlib_main_t * vm)
2227 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2229 /* don't trace ARP request packets */
2231 vnet_pcap_drop_trace_filter_add_del \
2232 (rt->errors[IP4_ARP_ERROR_##a], \
2234 foreach_notrace_ip4_arp_error;
2239 VLIB_INIT_FUNCTION (arp_notrace_init);
2242 /* Send an ARP request to see if given destination is reachable on given interface. */
2244 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2246 vnet_main_t *vnm = vnet_get_main ();
2247 ip4_main_t *im = &ip4_main;
2248 ethernet_arp_header_t *h;
2250 ip_interface_address_t *ia;
2251 ip_adjacency_t *adj;
2252 vnet_hw_interface_t *hi;
2253 vnet_sw_interface_t *si;
2257 si = vnet_get_sw_interface (vnm, sw_if_index);
2259 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2261 return clib_error_return (0, "%U: interface %U down",
2262 format_ip4_address, dst,
2263 format_vnet_sw_if_index_name, vnm,
2268 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2271 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2272 return clib_error_return
2274 "no matching interface address for destination %U (interface %U)",
2275 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2279 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2282 vlib_packet_template_get_packet (vm,
2283 &im->ip4_arp_request_packet_template,
2286 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2288 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2289 sizeof (h->ip4_over_ethernet[0].ethernet));
2291 h->ip4_over_ethernet[0].ip4 = src[0];
2292 h->ip4_over_ethernet[1].ip4 = dst[0];
2294 b = vlib_get_buffer (vm, bi);
2295 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2296 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2298 /* Add encapsulation string for software interface (e.g. ethernet header). */
2299 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2300 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2303 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2304 u32 *to_next = vlib_frame_vector_args (f);
2307 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2310 return /* no error */ 0;
2315 IP4_REWRITE_NEXT_DROP,
2316 IP4_REWRITE_NEXT_ICMP_ERROR,
2317 } ip4_rewrite_next_t;
2320 ip4_rewrite_inline (vlib_main_t * vm,
2321 vlib_node_runtime_t * node,
2322 vlib_frame_t * frame,
2323 int do_counters, int is_midchain, int is_mcast)
2325 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2326 u32 *from = vlib_frame_vector_args (frame);
2327 u32 n_left_from, n_left_to_next, *to_next, next_index;
2328 vlib_node_runtime_t *error_node =
2329 vlib_node_get_runtime (vm, ip4_input_node.index);
2331 n_left_from = frame->n_vectors;
2332 next_index = node->cached_next_index;
2333 u32 thread_index = vlib_get_thread_index ();
2335 while (n_left_from > 0)
2337 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2339 while (n_left_from >= 4 && n_left_to_next >= 2)
2341 ip_adjacency_t *adj0, *adj1;
2342 vlib_buffer_t *p0, *p1;
2343 ip4_header_t *ip0, *ip1;
2344 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2345 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2346 u32 tx_sw_if_index0, tx_sw_if_index1;
2348 /* Prefetch next iteration. */
2350 vlib_buffer_t *p2, *p3;
2352 p2 = vlib_get_buffer (vm, from[2]);
2353 p3 = vlib_get_buffer (vm, from[3]);
2355 vlib_prefetch_buffer_header (p2, STORE);
2356 vlib_prefetch_buffer_header (p3, STORE);
2358 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2359 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2362 pi0 = to_next[0] = from[0];
2363 pi1 = to_next[1] = from[1];
2368 n_left_to_next -= 2;
2370 p0 = vlib_get_buffer (vm, pi0);
2371 p1 = vlib_get_buffer (vm, pi1);
2373 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2374 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2377 * pre-fetch the per-adjacency counters
2381 vlib_prefetch_combined_counter (&adjacency_counters,
2382 thread_index, adj_index0);
2383 vlib_prefetch_combined_counter (&adjacency_counters,
2384 thread_index, adj_index1);
2387 ip0 = vlib_buffer_get_current (p0);
2388 ip1 = vlib_buffer_get_current (p1);
2390 error0 = error1 = IP4_ERROR_NONE;
2391 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2393 /* Decrement TTL & update checksum.
2394 Works either endian, so no need for byte swap. */
2395 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2397 i32 ttl0 = ip0->ttl;
2399 /* Input node should have reject packets with ttl 0. */
2400 ASSERT (ip0->ttl > 0);
2402 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2403 checksum0 += checksum0 >= 0xffff;
2405 ip0->checksum = checksum0;
2410 * If the ttl drops below 1 when forwarding, generate
2413 if (PREDICT_FALSE (ttl0 <= 0))
2415 error0 = IP4_ERROR_TIME_EXPIRED;
2416 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2417 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2418 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2420 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2423 /* Verify checksum. */
2424 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2428 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2430 if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2432 i32 ttl1 = ip1->ttl;
2434 /* Input node should have reject packets with ttl 0. */
2435 ASSERT (ip1->ttl > 0);
2437 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2438 checksum1 += checksum1 >= 0xffff;
2440 ip1->checksum = checksum1;
2445 * If the ttl drops below 1 when forwarding, generate
2448 if (PREDICT_FALSE (ttl1 <= 0))
2450 error1 = IP4_ERROR_TIME_EXPIRED;
2451 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2452 icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2453 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2455 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2458 /* Verify checksum. */
2459 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2460 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2464 p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2467 /* Rewrite packet header and updates lengths. */
2468 adj0 = ip_get_adjacency (lm, adj_index0);
2469 adj1 = ip_get_adjacency (lm, adj_index1);
2471 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2472 rw_len0 = adj0[0].rewrite_header.data_bytes;
2473 rw_len1 = adj1[0].rewrite_header.data_bytes;
2474 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2475 vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2477 /* Check MTU of outgoing interface. */
2479 (vlib_buffer_length_in_chain (vm, p0) >
2481 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2484 (vlib_buffer_length_in_chain (vm, p1) >
2486 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2489 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2490 * to see the IP headerr */
2491 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2493 next0 = adj0[0].rewrite_header.next_index;
2494 p0->current_data -= rw_len0;
2495 p0->current_length += rw_len0;
2496 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2497 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2500 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2501 vnet_feature_arc_start (lm->output_feature_arc_index,
2502 tx_sw_if_index0, &next0, p0);
2504 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2506 next1 = adj1[0].rewrite_header.next_index;
2507 p1->current_data -= rw_len1;
2508 p1->current_length += rw_len1;
2510 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2511 vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2514 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2515 vnet_feature_arc_start (lm->output_feature_arc_index,
2516 tx_sw_if_index1, &next1, p1);
2519 /* Guess we are only writing on simple Ethernet header. */
2520 vnet_rewrite_two_headers (adj0[0], adj1[0],
2521 ip0, ip1, sizeof (ethernet_header_t));
2524 * Bump the per-adjacency counters
2528 vlib_increment_combined_counter
2529 (&adjacency_counters,
2532 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2534 vlib_increment_combined_counter
2535 (&adjacency_counters,
2538 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2543 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2544 adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2549 * copy bytes from the IP address into the MAC rewrite
2551 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2552 vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2555 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2556 to_next, n_left_to_next,
2557 pi0, pi1, next0, next1);
2560 while (n_left_from > 0 && n_left_to_next > 0)
2562 ip_adjacency_t *adj0;
2565 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2566 u32 tx_sw_if_index0;
2568 pi0 = to_next[0] = from[0];
2570 p0 = vlib_get_buffer (vm, pi0);
2572 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2574 adj0 = ip_get_adjacency (lm, adj_index0);
2576 ip0 = vlib_buffer_get_current (p0);
2578 error0 = IP4_ERROR_NONE;
2579 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2581 /* Decrement TTL & update checksum. */
2582 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2584 i32 ttl0 = ip0->ttl;
2586 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2588 checksum0 += checksum0 >= 0xffff;
2590 ip0->checksum = checksum0;
2592 ASSERT (ip0->ttl > 0);
2598 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2600 if (PREDICT_FALSE (ttl0 <= 0))
2603 * If the ttl drops below 1 when forwarding, generate
2606 error0 = IP4_ERROR_TIME_EXPIRED;
2607 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2608 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2609 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2610 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2616 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2620 vlib_prefetch_combined_counter (&adjacency_counters,
2621 thread_index, adj_index0);
2623 /* Guess we are only writing on simple Ethernet header. */
2624 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2628 * copy bytes from the IP address into the MAC rewrite
2630 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2633 /* Update packet buffer attributes/set output interface. */
2634 rw_len0 = adj0[0].rewrite_header.data_bytes;
2635 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2638 vlib_increment_combined_counter
2639 (&adjacency_counters,
2640 thread_index, adj_index0, 1,
2641 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2643 /* Check MTU of outgoing interface. */
2644 error0 = (vlib_buffer_length_in_chain (vm, p0)
2645 > adj0[0].rewrite_header.max_l3_packet_bytes
2646 ? IP4_ERROR_MTU_EXCEEDED : error0);
2648 p0->error = error_node->errors[error0];
2650 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2651 * to see the IP headerr */
2652 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2654 p0->current_data -= rw_len0;
2655 p0->current_length += rw_len0;
2656 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2658 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2659 next0 = adj0[0].rewrite_header.next_index;
2663 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2667 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2668 vnet_feature_arc_start (lm->output_feature_arc_index,
2669 tx_sw_if_index0, &next0, p0);
2676 n_left_to_next -= 1;
2678 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2679 to_next, n_left_to_next,
2683 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2686 /* Need to do trace after rewrites to pick up new packet data. */
2687 if (node->flags & VLIB_NODE_FLAG_TRACE)
2688 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2690 return frame->n_vectors;
2694 /** @brief IPv4 rewrite node.
2697 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2698 header checksum, fetch the ip adjacency, check the outbound mtu,
2699 apply the adjacency rewrite, and send pkts to the adjacency
2700 rewrite header's rewrite_next_index.
2702 @param vm vlib_main_t corresponding to the current thread
2703 @param node vlib_node_runtime_t
2704 @param frame vlib_frame_t whose contents should be dispatched
2706 @par Graph mechanics: buffer metadata, next index usage
2709 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2710 - the rewrite adjacency index
2711 - <code>adj->lookup_next_index</code>
2712 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2713 the packet will be dropped.
2714 - <code>adj->rewrite_header</code>
2715 - Rewrite string length, rewrite string, next_index
2718 - <code>b->current_data, b->current_length</code>
2719 - Updated net of applying the rewrite string
2721 <em>Next Indices:</em>
2722 - <code> adj->rewrite_header.next_index </code>
2726 ip4_rewrite (vlib_main_t * vm,
2727 vlib_node_runtime_t * node, vlib_frame_t * frame)
2729 if (adj_are_counters_enabled ())
2730 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2732 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2736 ip4_midchain (vlib_main_t * vm,
2737 vlib_node_runtime_t * node, vlib_frame_t * frame)
2739 if (adj_are_counters_enabled ())
2740 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2742 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2746 ip4_rewrite_mcast (vlib_main_t * vm,
2747 vlib_node_runtime_t * node, vlib_frame_t * frame)
2749 if (adj_are_counters_enabled ())
2750 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2752 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2756 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2757 .function = ip4_rewrite,
2758 .name = "ip4-rewrite",
2759 .vector_size = sizeof (u32),
2761 .format_trace = format_ip4_rewrite_trace,
2765 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2766 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2769 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2771 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2772 .function = ip4_rewrite_mcast,
2773 .name = "ip4-rewrite-mcast",
2774 .vector_size = sizeof (u32),
2776 .format_trace = format_ip4_rewrite_trace,
2777 .sibling_of = "ip4-rewrite",
2779 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2781 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2782 .function = ip4_midchain,
2783 .name = "ip4-midchain",
2784 .vector_size = sizeof (u32),
2785 .format_trace = format_ip4_forward_next_trace,
2786 .sibling_of = "ip4-rewrite",
2788 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2791 static clib_error_t *
2792 add_del_interface_table (vlib_main_t * vm,
2793 unformat_input_t * input, vlib_cli_command_t * cmd)
2795 vnet_main_t *vnm = vnet_get_main ();
2796 ip_interface_address_t *ia;
2797 clib_error_t *error = 0;
2798 u32 sw_if_index, table_id;
2802 if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2804 error = clib_error_return (0, "unknown interface `%U'",
2805 format_unformat_error, input);
2809 if (unformat (input, "%d", &table_id))
2813 error = clib_error_return (0, "expected table id `%U'",
2814 format_unformat_error, input);
2819 * If the interface already has in IP address, then a change int
2820 * VRF is not allowed. The IP address applied must first be removed.
2821 * We do not do that automatically here, since VPP has no knowledge
2822 * of whether thoses subnets are valid in the destination VRF.
2825 foreach_ip_interface_address (&ip4_main.lookup_main,
2827 1 /* honor unnumbered */,
2831 a = ip_interface_address_get_address (&ip4_main.lookup_main, ia);
2832 error = clib_error_return (0, "interface %U has address %U",
2833 format_vnet_sw_if_index_name, vnm,
2835 format_ip4_address, a);
2841 ip4_main_t *im = &ip4_main;
2844 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2846 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2847 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2849 fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2850 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
2851 im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
2859 * Place the indicated interface into the supplied IPv4 FIB table (also known
2860 * as a VRF). If the FIB table does not exist, this command creates it. To
2861 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2862 * FIB table will only be displayed if a route has been added to the table, or
2863 * an IP Address is assigned to an interface in the table (which adds a route
2866 * @note IP addresses added after setting the interface IP table are added to
2867 * the indicated FIB table. If an IP address is added prior to changing the
2868 * table then this is an error. The control plane must remove these addresses
2869 * first and then change the table. VPP will not automatically move the
2870 * addresses from the old to the new table as it does not know the validity
2874 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2875 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2878 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2880 .path = "set interface ip table",
2881 .function = add_del_interface_table,
2882 .short_help = "set interface ip table <interface> <table-id>",
2887 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2889 ip4_fib_mtrie_t *mtrie0;
2890 ip4_fib_mtrie_leaf_t leaf0;
2893 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2895 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2896 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2897 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2899 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2901 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2904 static clib_error_t *
2905 test_lookup_command_fn (vlib_main_t * vm,
2906 unformat_input_t * input, vlib_cli_command_t * cmd)
2913 ip4_address_t ip4_base_address;
2916 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2918 if (unformat (input, "table %d", &table_id))
2920 /* Make sure the entry exists. */
2921 fib = ip4_fib_get (table_id);
2922 if ((fib) && (fib->index != table_id))
2923 return clib_error_return (0, "<fib-index> %d does not exist",
2926 else if (unformat (input, "count %f", &count))
2929 else if (unformat (input, "%U",
2930 unformat_ip4_address, &ip4_base_address))
2933 return clib_error_return (0, "unknown input `%U'",
2934 format_unformat_error, input);
2939 for (i = 0; i < n; i++)
2941 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2944 ip4_base_address.as_u32 =
2945 clib_host_to_net_u32 (1 +
2946 clib_net_to_host_u32 (ip4_base_address.as_u32));
2950 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2952 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2958 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2959 * given FIB table to determine if there is a conflict with the
2960 * adjacency table. The fib-id can be determined by using the
2961 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2964 * @todo This command uses fib-id, other commands use table-id (not
2965 * just a name, they are different indexes). Would like to change this
2966 * to table-id for consistency.
2969 * Example of how to run the test lookup command:
2970 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2971 * No errors in 2 lookups
2975 VLIB_CLI_COMMAND (lookup_test_command, static) =
2977 .path = "test lookup",
2978 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2979 .function = test_lookup_command_fn,
2984 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2986 ip4_main_t *im4 = &ip4_main;
2988 uword *p = hash_get (im4->fib_index_by_table_id, table_id);
2991 return VNET_API_ERROR_NO_SUCH_FIB;
2993 fib = ip4_fib_get (p[0]);
2995 fib->flow_hash_config = flow_hash_config;
2999 static clib_error_t *
3000 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3001 unformat_input_t * input,
3002 vlib_cli_command_t * cmd)
3006 u32 flow_hash_config = 0;
3009 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3011 if (unformat (input, "table %d", &table_id))
3014 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3015 foreach_flow_hash_bit
3022 return clib_error_return (0, "unknown input `%U'",
3023 format_unformat_error, input);
3025 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3031 case VNET_API_ERROR_NO_SUCH_FIB:
3032 return clib_error_return (0, "no such FIB table %d", table_id);
3035 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3043 * Configure the set of IPv4 fields used by the flow hash.
3046 * Example of how to set the flow hash on a given table:
3047 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3048 * Example of display the configured flow hash:
3049 * @cliexstart{show ip fib}
3050 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3053 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3054 * [0] [@0]: dpo-drop ip6
3057 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3058 * [0] [@0]: dpo-drop ip6
3061 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3062 * [0] [@0]: dpo-drop ip6
3065 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3066 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3069 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3070 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3071 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3072 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3073 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3076 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3077 * [0] [@0]: dpo-drop ip6
3078 * 255.255.255.255/32
3080 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3081 * [0] [@0]: dpo-drop ip6
3082 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3085 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3086 * [0] [@0]: dpo-drop ip6
3089 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3090 * [0] [@0]: dpo-drop ip6
3093 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3094 * [0] [@4]: ipv4-glean: af_packet0
3097 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3098 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3101 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3102 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3105 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3106 * [0] [@4]: ipv4-glean: af_packet1
3109 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3110 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3113 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3114 * [0] [@0]: dpo-drop ip6
3117 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3118 * [0] [@0]: dpo-drop ip6
3119 * 255.255.255.255/32
3121 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3122 * [0] [@0]: dpo-drop ip6
3126 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3128 .path = "set ip flow-hash",
3130 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3131 .function = set_ip_flow_hash_command_fn,
3136 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3139 vnet_main_t *vnm = vnet_get_main ();
3140 vnet_interface_main_t *im = &vnm->interface_main;
3141 ip4_main_t *ipm = &ip4_main;
3142 ip_lookup_main_t *lm = &ipm->lookup_main;
3143 vnet_classify_main_t *cm = &vnet_classify_main;
3144 ip4_address_t *if_addr;
3146 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3147 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3149 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3150 return VNET_API_ERROR_NO_SUCH_ENTRY;
3152 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3153 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3155 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3157 if (NULL != if_addr)
3159 fib_prefix_t pfx = {
3161 .fp_proto = FIB_PROTOCOL_IP4,
3162 .fp_addr.ip4 = *if_addr,
3166 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3170 if (table_index != (u32) ~ 0)
3172 dpo_id_t dpo = DPO_INVALID;
3177 classify_dpo_create (DPO_PROTO_IP4, table_index));
3179 fib_table_entry_special_dpo_add (fib_index,
3181 FIB_SOURCE_CLASSIFY,
3182 FIB_ENTRY_FLAG_NONE, &dpo);
3187 fib_table_entry_special_remove (fib_index,
3188 &pfx, FIB_SOURCE_CLASSIFY);
3195 static clib_error_t *
3196 set_ip_classify_command_fn (vlib_main_t * vm,
3197 unformat_input_t * input,
3198 vlib_cli_command_t * cmd)
3200 u32 table_index = ~0;
3201 int table_index_set = 0;
3202 u32 sw_if_index = ~0;
3205 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3207 if (unformat (input, "table-index %d", &table_index))
3208 table_index_set = 1;
3209 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3210 vnet_get_main (), &sw_if_index))
3216 if (table_index_set == 0)
3217 return clib_error_return (0, "classify table-index must be specified");
3219 if (sw_if_index == ~0)
3220 return clib_error_return (0, "interface / subif must be specified");
3222 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3229 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3230 return clib_error_return (0, "No such interface");
3232 case VNET_API_ERROR_NO_SUCH_ENTRY:
3233 return clib_error_return (0, "No such classifier table");
3239 * Assign a classification table to an interface. The classification
3240 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3241 * commands. Once the table is create, use this command to filter packets
3245 * Example of how to assign a classification table to an interface:
3246 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3249 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3251 .path = "set ip classify",
3253 "set ip classify intfc <interface> table-index <classify-idx>",
3254 .function = set_ip_classify_command_fn,
3259 * fd.io coding-style-patch-verification: ON
3262 * eval: (c-set-style "gnu")