2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 * @brief IPv4 Forwarding.
59 * This file contains the source code for IPv4 forwarding.
63 ip4_forward_next_trace (vlib_main_t * vm,
64 vlib_node_runtime_t * node,
66 vlib_rx_or_tx_t which_adj_index);
69 ip4_lookup_inline (vlib_main_t * vm,
70 vlib_node_runtime_t * node,
72 int lookup_for_responses_to_locally_received_packets)
74 ip4_main_t *im = &ip4_main;
75 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
76 u32 n_left_from, n_left_to_next, *from, *to_next;
77 ip_lookup_next_t next;
78 u32 thread_index = vlib_get_thread_index ();
80 from = vlib_frame_vector_args (frame);
81 n_left_from = frame->n_vectors;
82 next = node->cached_next_index;
84 while (n_left_from > 0)
86 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
88 while (n_left_from >= 8 && n_left_to_next >= 4)
90 vlib_buffer_t *p0, *p1, *p2, *p3;
91 ip4_header_t *ip0, *ip1, *ip2, *ip3;
92 __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
93 ip_lookup_next_t next0, next1, next2, next3;
94 const load_balance_t *lb0, *lb1, *lb2, *lb3;
95 ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
96 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97 ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98 u32 pi0, fib_index0, lb_index0;
99 u32 pi1, fib_index1, lb_index1;
100 u32 pi2, fib_index2, lb_index2;
101 u32 pi3, fib_index3, lb_index3;
102 flow_hash_config_t flow_hash_config0, flow_hash_config1;
103 flow_hash_config_t flow_hash_config2, flow_hash_config3;
104 u32 hash_c0, hash_c1, hash_c2, hash_c3;
105 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
107 /* Prefetch next iteration. */
109 vlib_buffer_t *p4, *p5, *p6, *p7;
111 p4 = vlib_get_buffer (vm, from[4]);
112 p5 = vlib_get_buffer (vm, from[5]);
113 p6 = vlib_get_buffer (vm, from[6]);
114 p7 = vlib_get_buffer (vm, from[7]);
116 vlib_prefetch_buffer_header (p4, LOAD);
117 vlib_prefetch_buffer_header (p5, LOAD);
118 vlib_prefetch_buffer_header (p6, LOAD);
119 vlib_prefetch_buffer_header (p7, LOAD);
121 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
122 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
123 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
124 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
127 pi0 = to_next[0] = from[0];
128 pi1 = to_next[1] = from[1];
129 pi2 = to_next[2] = from[2];
130 pi3 = to_next[3] = from[3];
137 p0 = vlib_get_buffer (vm, pi0);
138 p1 = vlib_get_buffer (vm, pi1);
139 p2 = vlib_get_buffer (vm, pi2);
140 p3 = vlib_get_buffer (vm, pi3);
142 ip0 = vlib_buffer_get_current (p0);
143 ip1 = vlib_buffer_get_current (p1);
144 ip2 = vlib_buffer_get_current (p2);
145 ip3 = vlib_buffer_get_current (p3);
147 dst_addr0 = &ip0->dst_address;
148 dst_addr1 = &ip1->dst_address;
149 dst_addr2 = &ip2->dst_address;
150 dst_addr3 = &ip3->dst_address;
153 vec_elt (im->fib_index_by_sw_if_index,
154 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
156 vec_elt (im->fib_index_by_sw_if_index,
157 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
159 vec_elt (im->fib_index_by_sw_if_index,
160 vnet_buffer (p2)->sw_if_index[VLIB_RX]);
162 vec_elt (im->fib_index_by_sw_if_index,
163 vnet_buffer (p3)->sw_if_index[VLIB_RX]);
165 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
166 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
168 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
169 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
171 (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
172 (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
174 (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
175 (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
178 if (!lookup_for_responses_to_locally_received_packets)
180 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
181 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
182 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
183 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
185 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
186 leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
187 leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
188 leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
191 tcp0 = (void *) (ip0 + 1);
192 tcp1 = (void *) (ip1 + 1);
193 tcp2 = (void *) (ip2 + 1);
194 tcp3 = (void *) (ip3 + 1);
196 if (!lookup_for_responses_to_locally_received_packets)
198 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
199 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
200 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
201 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
204 if (!lookup_for_responses_to_locally_received_packets)
206 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
207 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
208 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
209 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
212 if (lookup_for_responses_to_locally_received_packets)
214 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
215 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
216 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
217 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
221 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
222 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
223 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
224 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
227 ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
228 lb0 = load_balance_get (lb_index0);
229 lb1 = load_balance_get (lb_index1);
230 lb2 = load_balance_get (lb_index2);
231 lb3 = load_balance_get (lb_index3);
233 /* Use flow hash to compute multipath adjacency. */
234 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
235 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
236 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
237 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
238 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
240 flow_hash_config0 = lb0->lb_hash_config;
241 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
242 ip4_compute_flow_hash (ip0, flow_hash_config0);
244 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
246 flow_hash_config1 = lb1->lb_hash_config;
247 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
248 ip4_compute_flow_hash (ip1, flow_hash_config1);
250 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
252 flow_hash_config2 = lb2->lb_hash_config;
253 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
254 ip4_compute_flow_hash (ip2, flow_hash_config2);
256 if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
258 flow_hash_config3 = lb3->lb_hash_config;
259 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
260 ip4_compute_flow_hash (ip3, flow_hash_config3);
263 ASSERT (lb0->lb_n_buckets > 0);
264 ASSERT (is_pow2 (lb0->lb_n_buckets));
265 ASSERT (lb1->lb_n_buckets > 0);
266 ASSERT (is_pow2 (lb1->lb_n_buckets));
267 ASSERT (lb2->lb_n_buckets > 0);
268 ASSERT (is_pow2 (lb2->lb_n_buckets));
269 ASSERT (lb3->lb_n_buckets > 0);
270 ASSERT (is_pow2 (lb3->lb_n_buckets));
272 dpo0 = load_balance_get_bucket_i (lb0,
274 (lb0->lb_n_buckets_minus_1)));
275 dpo1 = load_balance_get_bucket_i (lb1,
277 (lb1->lb_n_buckets_minus_1)));
278 dpo2 = load_balance_get_bucket_i (lb2,
280 (lb2->lb_n_buckets_minus_1)));
281 dpo3 = load_balance_get_bucket_i (lb3,
283 (lb3->lb_n_buckets_minus_1)));
285 next0 = dpo0->dpoi_next_node;
286 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
287 next1 = dpo1->dpoi_next_node;
288 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
289 next2 = dpo2->dpoi_next_node;
290 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
291 next3 = dpo3->dpoi_next_node;
292 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
294 vlib_increment_combined_counter
295 (cm, thread_index, lb_index0, 1,
296 vlib_buffer_length_in_chain (vm, p0)
297 + sizeof (ethernet_header_t));
298 vlib_increment_combined_counter
299 (cm, thread_index, lb_index1, 1,
300 vlib_buffer_length_in_chain (vm, p1)
301 + sizeof (ethernet_header_t));
302 vlib_increment_combined_counter
303 (cm, thread_index, lb_index2, 1,
304 vlib_buffer_length_in_chain (vm, p2)
305 + sizeof (ethernet_header_t));
306 vlib_increment_combined_counter
307 (cm, thread_index, lb_index3, 1,
308 vlib_buffer_length_in_chain (vm, p3)
309 + sizeof (ethernet_header_t));
311 vlib_validate_buffer_enqueue_x4 (vm, node, next,
312 to_next, n_left_to_next,
314 next0, next1, next2, next3);
317 while (n_left_from > 0 && n_left_to_next > 0)
321 __attribute__ ((unused)) tcp_header_t *tcp0;
322 ip_lookup_next_t next0;
323 const load_balance_t *lb0;
324 ip4_fib_mtrie_t *mtrie0;
325 ip4_fib_mtrie_leaf_t leaf0;
326 ip4_address_t *dst_addr0;
327 u32 pi0, fib_index0, lbi0;
328 flow_hash_config_t flow_hash_config0;
329 const dpo_id_t *dpo0;
335 p0 = vlib_get_buffer (vm, pi0);
337 ip0 = vlib_buffer_get_current (p0);
339 dst_addr0 = &ip0->dst_address;
342 vec_elt (im->fib_index_by_sw_if_index,
343 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
345 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
346 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
348 if (!lookup_for_responses_to_locally_received_packets)
350 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
352 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
355 tcp0 = (void *) (ip0 + 1);
357 if (!lookup_for_responses_to_locally_received_packets)
358 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
360 if (!lookup_for_responses_to_locally_received_packets)
361 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
363 if (lookup_for_responses_to_locally_received_packets)
364 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
367 /* Handle default route. */
368 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
372 lb0 = load_balance_get (lbi0);
374 /* Use flow hash to compute multipath adjacency. */
375 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
376 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
378 flow_hash_config0 = lb0->lb_hash_config;
380 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
381 ip4_compute_flow_hash (ip0, flow_hash_config0);
384 ASSERT (lb0->lb_n_buckets > 0);
385 ASSERT (is_pow2 (lb0->lb_n_buckets));
387 dpo0 = load_balance_get_bucket_i (lb0,
389 (lb0->lb_n_buckets_minus_1)));
391 next0 = dpo0->dpoi_next_node;
392 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
394 vlib_increment_combined_counter
395 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
402 if (PREDICT_FALSE (next0 != next))
405 vlib_put_next_frame (vm, node, next, n_left_to_next);
407 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
414 vlib_put_next_frame (vm, node, next, n_left_to_next);
417 if (node->flags & VLIB_NODE_FLAG_TRACE)
418 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
420 return frame->n_vectors;
423 /** @brief IPv4 lookup node.
426 This is the main IPv4 lookup dispatch node.
428 @param vm vlib_main_t corresponding to the current thread
429 @param node vlib_node_runtime_t
430 @param frame vlib_frame_t whose contents should be dispatched
432 @par Graph mechanics: buffer metadata, next index usage
435 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
436 - Indicates the @c sw_if_index value of the interface that the
437 packet was received on.
438 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
439 - When the value is @c ~0 then the node performs a longest prefix
440 match (LPM) for the packet destination address in the FIB attached
441 to the receive interface.
442 - Otherwise perform LPM for the packet destination address in the
443 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
444 value (0, 1, ...) and not a VRF id.
447 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
448 - The lookup result adjacency index.
451 - Dispatches the packet to the node index found in
452 ip_adjacency_t @c adj->lookup_next_index
453 (where @c adj is the lookup result adjacency).
456 ip4_lookup (vlib_main_t * vm,
457 vlib_node_runtime_t * node, vlib_frame_t * frame)
459 return ip4_lookup_inline (vm, node, frame,
460 /* lookup_for_responses_to_locally_received_packets */
465 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
467 VLIB_REGISTER_NODE (ip4_lookup_node) =
469 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
470 sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
471 IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
473 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
476 ip4_load_balance (vlib_main_t * vm,
477 vlib_node_runtime_t * node, vlib_frame_t * frame)
479 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
480 u32 n_left_from, n_left_to_next, *from, *to_next;
481 ip_lookup_next_t next;
482 u32 thread_index = vlib_get_thread_index ();
484 from = vlib_frame_vector_args (frame);
485 n_left_from = frame->n_vectors;
486 next = node->cached_next_index;
488 if (node->flags & VLIB_NODE_FLAG_TRACE)
489 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
491 while (n_left_from > 0)
493 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
496 while (n_left_from >= 4 && n_left_to_next >= 2)
498 ip_lookup_next_t next0, next1;
499 const load_balance_t *lb0, *lb1;
500 vlib_buffer_t *p0, *p1;
501 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
502 const ip4_header_t *ip0, *ip1;
503 const dpo_id_t *dpo0, *dpo1;
505 /* Prefetch next iteration. */
507 vlib_buffer_t *p2, *p3;
509 p2 = vlib_get_buffer (vm, from[2]);
510 p3 = vlib_get_buffer (vm, from[3]);
512 vlib_prefetch_buffer_header (p2, STORE);
513 vlib_prefetch_buffer_header (p3, STORE);
515 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
516 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
519 pi0 = to_next[0] = from[0];
520 pi1 = to_next[1] = from[1];
527 p0 = vlib_get_buffer (vm, pi0);
528 p1 = vlib_get_buffer (vm, pi1);
530 ip0 = vlib_buffer_get_current (p0);
531 ip1 = vlib_buffer_get_current (p1);
532 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
533 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
535 lb0 = load_balance_get (lbi0);
536 lb1 = load_balance_get (lbi1);
539 * this node is for via FIBs we can re-use the hash value from the
540 * to node if present.
541 * We don't want to use the same hash value at each level in the recursion
542 * graph as that would lead to polarisation
546 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
548 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
550 hc0 = vnet_buffer (p0)->ip.flow_hash =
551 vnet_buffer (p0)->ip.flow_hash >> 1;
555 hc0 = vnet_buffer (p0)->ip.flow_hash =
556 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
559 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
561 if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
563 hc1 = vnet_buffer (p1)->ip.flow_hash =
564 vnet_buffer (p1)->ip.flow_hash >> 1;
568 hc1 = vnet_buffer (p1)->ip.flow_hash =
569 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
574 load_balance_get_bucket_i (lb0,
575 hc0 & (lb0->lb_n_buckets_minus_1));
577 load_balance_get_bucket_i (lb1,
578 hc1 & (lb1->lb_n_buckets_minus_1));
580 next0 = dpo0->dpoi_next_node;
581 next1 = dpo1->dpoi_next_node;
583 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
584 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
586 vlib_increment_combined_counter
587 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
588 vlib_increment_combined_counter
589 (cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
591 vlib_validate_buffer_enqueue_x2 (vm, node, next,
592 to_next, n_left_to_next,
593 pi0, pi1, next0, next1);
596 while (n_left_from > 0 && n_left_to_next > 0)
598 ip_lookup_next_t next0;
599 const load_balance_t *lb0;
602 const ip4_header_t *ip0;
603 const dpo_id_t *dpo0;
612 p0 = vlib_get_buffer (vm, pi0);
614 ip0 = vlib_buffer_get_current (p0);
615 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
617 lb0 = load_balance_get (lbi0);
620 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
622 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
624 hc0 = vnet_buffer (p0)->ip.flow_hash =
625 vnet_buffer (p0)->ip.flow_hash >> 1;
629 hc0 = vnet_buffer (p0)->ip.flow_hash =
630 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
635 load_balance_get_bucket_i (lb0,
636 hc0 & (lb0->lb_n_buckets_minus_1));
638 next0 = dpo0->dpoi_next_node;
639 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
641 vlib_increment_combined_counter
642 (cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
644 vlib_validate_buffer_enqueue_x1 (vm, node, next,
645 to_next, n_left_to_next,
649 vlib_put_next_frame (vm, node, next, n_left_to_next);
652 return frame->n_vectors;
655 VLIB_REGISTER_NODE (ip4_load_balance_node) =
657 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
658 sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
659 format_ip4_lookup_trace,};
661 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
663 /* get first interface address */
665 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
666 ip_interface_address_t ** result_ia)
668 ip_lookup_main_t *lm = &im->lookup_main;
669 ip_interface_address_t *ia = 0;
670 ip4_address_t *result = 0;
673 foreach_ip_interface_address
674 (lm, ia, sw_if_index,
675 1 /* honor unnumbered */ ,
678 ip_interface_address_get_address (lm, ia);
684 *result_ia = result ? ia : 0;
689 ip4_add_interface_routes (u32 sw_if_index,
690 ip4_main_t * im, u32 fib_index,
691 ip_interface_address_t * a)
693 ip_lookup_main_t *lm = &im->lookup_main;
694 ip4_address_t *address = ip_interface_address_get_address (lm, a);
696 .fp_len = a->address_length,
697 .fp_proto = FIB_PROTOCOL_IP4,
698 .fp_addr.ip4 = *address,
701 a->neighbor_probe_adj_index = ~0;
703 if (pfx.fp_len <= 30)
705 /* a /30 or shorter - add a glean for the network address */
706 fib_node_index_t fei;
708 fei = fib_table_entry_update_one_path (fib_index, &pfx,
709 FIB_SOURCE_INTERFACE,
710 (FIB_ENTRY_FLAG_CONNECTED |
711 FIB_ENTRY_FLAG_ATTACHED),
713 /* No next-hop address */
719 // no out-label stack
721 FIB_ROUTE_PATH_FLAG_NONE);
722 a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
724 /* Add the two broadcast addresses as drop */
725 fib_prefix_t net_pfx = {
727 .fp_proto = FIB_PROTOCOL_IP4,
728 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
730 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
731 fib_table_entry_special_add(fib_index,
733 FIB_SOURCE_INTERFACE,
734 (FIB_ENTRY_FLAG_DROP |
735 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
736 net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
737 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
738 fib_table_entry_special_add(fib_index,
740 FIB_SOURCE_INTERFACE,
741 (FIB_ENTRY_FLAG_DROP |
742 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT));
744 else if (pfx.fp_len == 31)
746 u32 mask = clib_host_to_net_u32(1);
747 fib_prefix_t net_pfx = pfx;
750 net_pfx.fp_addr.ip4.as_u32 ^= mask;
752 /* a /31 - add the other end as an attached host */
753 fib_table_entry_update_one_path (fib_index, &net_pfx,
754 FIB_SOURCE_INTERFACE,
755 (FIB_ENTRY_FLAG_ATTACHED),
763 FIB_ROUTE_PATH_FLAG_NONE);
767 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
769 u32 classify_table_index =
770 lm->classify_table_index_by_sw_if_index[sw_if_index];
771 if (classify_table_index != (u32) ~ 0)
773 dpo_id_t dpo = DPO_INVALID;
778 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
780 fib_table_entry_special_dpo_add (fib_index,
783 FIB_ENTRY_FLAG_NONE, &dpo);
788 fib_table_entry_update_one_path (fib_index, &pfx,
789 FIB_SOURCE_INTERFACE,
790 (FIB_ENTRY_FLAG_CONNECTED |
791 FIB_ENTRY_FLAG_LOCAL),
798 FIB_ROUTE_PATH_FLAG_NONE);
802 ip4_del_interface_routes (ip4_main_t * im,
804 ip4_address_t * address, u32 address_length)
807 .fp_len = address_length,
808 .fp_proto = FIB_PROTOCOL_IP4,
809 .fp_addr.ip4 = *address,
812 if (pfx.fp_len <= 30)
814 fib_prefix_t net_pfx = {
816 .fp_proto = FIB_PROTOCOL_IP4,
817 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
819 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
820 fib_table_entry_special_remove(fib_index,
822 FIB_SOURCE_INTERFACE);
823 net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
824 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
825 fib_table_entry_special_remove(fib_index,
827 FIB_SOURCE_INTERFACE);
828 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
830 else if (pfx.fp_len == 31)
832 u32 mask = clib_host_to_net_u32(1);
833 fib_prefix_t net_pfx = pfx;
836 net_pfx.fp_addr.ip4.as_u32 ^= mask;
838 fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
842 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
846 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
848 ip4_main_t *im = &ip4_main;
850 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
853 * enable/disable only on the 1<->0 transition
857 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
862 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
863 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
866 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
870 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
871 sw_if_index, !is_enable, 0, 0);
874 static clib_error_t *
875 ip4_add_del_interface_address_internal (vlib_main_t * vm,
877 ip4_address_t * address,
878 u32 address_length, u32 is_del)
880 vnet_main_t *vnm = vnet_get_main ();
881 ip4_main_t *im = &ip4_main;
882 ip_lookup_main_t *lm = &im->lookup_main;
883 clib_error_t *error = 0;
884 u32 if_address_index, elts_before;
885 ip4_address_fib_t ip4_af, *addr_fib = 0;
887 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
888 ip4_addr_fib_init (&ip4_af, address,
889 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
890 vec_add1 (addr_fib, ip4_af);
893 * there is no support for adj-fib handling in the presence of overlapping
894 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
900 /* When adding an address check that it does not conflict
901 with an existing address. */
902 ip_interface_address_t *ia;
903 foreach_ip_interface_address
904 (&im->lookup_main, ia, sw_if_index,
905 0 /* honor unnumbered */ ,
908 ip_interface_address_get_address
909 (&im->lookup_main, ia);
910 if (ip4_destination_matches_route
911 (im, address, x, ia->address_length) ||
912 ip4_destination_matches_route (im,
918 ("failed to add %U which conflicts with %U for interface %U",
919 format_ip4_address_and_length, address,
921 format_ip4_address_and_length, x,
923 format_vnet_sw_if_index_name, vnm,
929 elts_before = pool_elts (lm->if_address_pool);
931 error = ip_interface_address_add_del
932 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
936 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
939 ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
941 ip4_add_interface_routes (sw_if_index,
942 im, ip4_af.fib_index,
944 (lm->if_address_pool, if_address_index));
946 /* If pool did not grow/shrink: add duplicate address. */
947 if (elts_before != pool_elts (lm->if_address_pool))
949 ip4_add_del_interface_address_callback_t *cb;
950 vec_foreach (cb, im->add_del_interface_address_callbacks)
951 cb->function (im, cb->function_opaque, sw_if_index,
952 address, address_length, if_address_index, is_del);
961 ip4_add_del_interface_address (vlib_main_t * vm,
963 ip4_address_t * address,
964 u32 address_length, u32 is_del)
966 return ip4_add_del_interface_address_internal
967 (vm, sw_if_index, address, address_length, is_del);
970 /* Built-in ip4 unicast rx feature path definition */
972 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
974 .arc_name = "ip4-unicast",
975 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
976 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
979 VNET_FEATURE_INIT (ip4_flow_classify, static) =
981 .arc_name = "ip4-unicast",
982 .node_name = "ip4-flow-classify",
983 .runs_before = VNET_FEATURES ("ip4-inacl"),
986 VNET_FEATURE_INIT (ip4_inacl, static) =
988 .arc_name = "ip4-unicast",
989 .node_name = "ip4-inacl",
990 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
993 VNET_FEATURE_INIT (ip4_source_check_1, static) =
995 .arc_name = "ip4-unicast",
996 .node_name = "ip4-source-check-via-rx",
997 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
1000 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1002 .arc_name = "ip4-unicast",
1003 .node_name = "ip4-source-check-via-any",
1004 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1007 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1009 .arc_name = "ip4-unicast",
1010 .node_name = "ip4-source-and-port-range-check-rx",
1011 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1014 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1016 .arc_name = "ip4-unicast",
1017 .node_name = "ip4-policer-classify",
1018 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1021 VNET_FEATURE_INIT (ip4_ipsec, static) =
1023 .arc_name = "ip4-unicast",
1024 .node_name = "ipsec-input-ip4",
1025 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1028 VNET_FEATURE_INIT (ip4_vpath, static) =
1030 .arc_name = "ip4-unicast",
1031 .node_name = "vpath-input-ip4",
1032 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1035 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1037 .arc_name = "ip4-unicast",
1038 .node_name = "ip4-vxlan-bypass",
1039 .runs_before = VNET_FEATURES ("ip4-lookup"),
1042 VNET_FEATURE_INIT (ip4_drop, static) =
1044 .arc_name = "ip4-unicast",
1045 .node_name = "ip4-drop",
1046 .runs_before = VNET_FEATURES ("ip4-lookup"),
1049 VNET_FEATURE_INIT (ip4_lookup, static) =
1051 .arc_name = "ip4-unicast",
1052 .node_name = "ip4-lookup",
1053 .runs_before = 0, /* not before any other features */
1056 /* Built-in ip4 multicast rx feature path definition */
1057 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1059 .arc_name = "ip4-multicast",
1060 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1061 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1064 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1066 .arc_name = "ip4-multicast",
1067 .node_name = "vpath-input-ip4",
1068 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1071 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1073 .arc_name = "ip4-multicast",
1074 .node_name = "ip4-drop",
1075 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1078 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1080 .arc_name = "ip4-multicast",
1081 .node_name = "ip4-mfib-forward-lookup",
1082 .runs_before = 0, /* last feature */
1085 /* Source and port-range check ip4 tx feature path definition */
1086 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1088 .arc_name = "ip4-output",
1089 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1090 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1093 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1095 .arc_name = "ip4-output",
1096 .node_name = "ip4-source-and-port-range-check-tx",
1097 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1100 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1102 .arc_name = "ip4-output",
1103 .node_name = "ipsec-output-ip4",
1104 .runs_before = VNET_FEATURES ("interface-output"),
1107 /* Built-in ip4 tx feature path definition */
1108 VNET_FEATURE_INIT (ip4_interface_output, static) =
1110 .arc_name = "ip4-output",
1111 .node_name = "interface-output",
1112 .runs_before = 0, /* not before any other features */
1116 static clib_error_t *
1117 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1119 ip4_main_t *im = &ip4_main;
1121 /* Fill in lookup tables with default table (0). */
1122 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1123 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1125 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1128 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1131 return /* no error */ 0;
1134 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1136 /* Global IP4 main. */
1137 ip4_main_t ip4_main;
1140 ip4_lookup_init (vlib_main_t * vm)
1142 ip4_main_t *im = &ip4_main;
1143 clib_error_t *error;
1146 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1149 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1154 m = pow2_mask (i) << (32 - i);
1157 im->fib_masks[i] = clib_host_to_net_u32 (m);
1160 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1162 /* Create FIB with index 0 and table id of 0. */
1163 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1164 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1168 pn = pg_get_node (ip4_lookup_node.index);
1169 pn->unformat_edit = unformat_pg_ip4_header;
1173 ethernet_arp_header_t h;
1175 memset (&h, 0, sizeof (h));
1177 /* Set target ethernet address to all zeros. */
1178 memset (h.ip4_over_ethernet[1].ethernet, 0,
1179 sizeof (h.ip4_over_ethernet[1].ethernet));
1181 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1182 #define _8(f,v) h.f = v;
1183 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1184 _16 (l3_type, ETHERNET_TYPE_IP4);
1185 _8 (n_l2_address_bytes, 6);
1186 _8 (n_l3_address_bytes, 4);
1187 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1191 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1194 /* alloc chunk size */ 8,
1201 VLIB_INIT_FUNCTION (ip4_lookup_init);
1205 /* Adjacency taken. */
1210 /* Packet data, possibly *after* rewrite. */
1211 u8 packet_data[64 - 1 * sizeof (u32)];
1213 ip4_forward_next_trace_t;
1216 format_ip4_forward_next_trace (u8 * s, va_list * args)
1218 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1219 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1220 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1221 uword indent = format_get_indent (s);
1222 s = format (s, "%U%U",
1223 format_white_space, indent,
1224 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1229 format_ip4_lookup_trace (u8 * s, va_list * args)
1231 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1232 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1233 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1234 uword indent = format_get_indent (s);
1236 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1237 t->fib_index, t->dpo_index, t->flow_hash);
1238 s = format (s, "\n%U%U",
1239 format_white_space, indent,
1240 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1245 format_ip4_rewrite_trace (u8 * s, va_list * args)
1247 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1248 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1249 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1250 uword indent = format_get_indent (s);
1252 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1253 t->fib_index, t->dpo_index, format_ip_adjacency,
1254 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1255 s = format (s, "\n%U%U",
1256 format_white_space, indent,
1257 format_ip_adjacency_packet_data,
1258 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1262 /* Common trace function for all ip4-forward next nodes. */
1264 ip4_forward_next_trace (vlib_main_t * vm,
1265 vlib_node_runtime_t * node,
1266 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1269 ip4_main_t *im = &ip4_main;
1271 n_left = frame->n_vectors;
1272 from = vlib_frame_vector_args (frame);
1277 vlib_buffer_t *b0, *b1;
1278 ip4_forward_next_trace_t *t0, *t1;
1280 /* Prefetch next iteration. */
1281 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1282 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1287 b0 = vlib_get_buffer (vm, bi0);
1288 b1 = vlib_get_buffer (vm, bi1);
1290 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1292 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1293 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1294 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1296 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1297 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1298 vec_elt (im->fib_index_by_sw_if_index,
1299 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1301 clib_memcpy (t0->packet_data,
1302 vlib_buffer_get_current (b0),
1303 sizeof (t0->packet_data));
1305 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1307 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1308 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1309 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1311 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1312 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1313 vec_elt (im->fib_index_by_sw_if_index,
1314 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1315 clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1316 sizeof (t1->packet_data));
1326 ip4_forward_next_trace_t *t0;
1330 b0 = vlib_get_buffer (vm, bi0);
1332 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1334 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1335 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1336 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1338 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1339 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1340 vec_elt (im->fib_index_by_sw_if_index,
1341 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1342 clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1343 sizeof (t0->packet_data));
1351 ip4_drop_or_punt (vlib_main_t * vm,
1352 vlib_node_runtime_t * node,
1353 vlib_frame_t * frame, ip4_error_t error_code)
1355 u32 *buffers = vlib_frame_vector_args (frame);
1356 uword n_packets = frame->n_vectors;
1358 vlib_error_drop_buffers (vm, node, buffers,
1362 ip4_input_node.index, error_code);
1364 if (node->flags & VLIB_NODE_FLAG_TRACE)
1365 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1371 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1373 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1377 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1379 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1383 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1385 .function = ip4_drop,.
1387 .vector_size = sizeof (u32),
1388 .format_trace = format_ip4_forward_next_trace,
1395 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1397 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1399 .function = ip4_punt,
1401 .vector_size = sizeof (u32),
1402 .format_trace = format_ip4_forward_next_trace,
1409 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1412 /* Compute TCP/UDP/ICMP4 checksum in software. */
1414 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1418 u32 ip_header_length, payload_length_host_byte_order;
1419 u32 n_this_buffer, n_bytes_left;
1421 void *data_this_buffer;
1423 /* Initialize checksum with ip header. */
1424 ip_header_length = ip4_header_bytes (ip0);
1425 payload_length_host_byte_order =
1426 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1428 clib_host_to_net_u32 (payload_length_host_byte_order +
1429 (ip0->protocol << 16));
1431 if (BITS (uword) == 32)
1434 ip_csum_with_carry (sum0,
1435 clib_mem_unaligned (&ip0->src_address, u32));
1437 ip_csum_with_carry (sum0,
1438 clib_mem_unaligned (&ip0->dst_address, u32));
1442 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1444 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1445 data_this_buffer = (void *) ip0 + ip_header_length;
1446 if (n_this_buffer + ip_header_length > p0->current_length)
1448 p0->current_length >
1449 ip_header_length ? p0->current_length - ip_header_length : 0;
1452 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1453 n_bytes_left -= n_this_buffer;
1454 if (n_bytes_left == 0)
1457 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1458 p0 = vlib_get_buffer (vm, p0->next_buffer);
1459 data_this_buffer = vlib_buffer_get_current (p0);
1460 n_this_buffer = p0->current_length;
1463 sum16 = ~ip_csum_fold (sum0);
1469 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1471 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1475 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1476 || ip0->protocol == IP_PROTOCOL_UDP);
1478 udp0 = (void *) (ip0 + 1);
1479 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1481 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1482 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1486 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1488 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1489 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1495 VNET_FEATURE_ARC_INIT (ip4_local) =
1497 .arc_name = "ip4-local",
1498 .start_nodes = VNET_FEATURES ("ip4-local"),
1503 ip4_local_inline (vlib_main_t * vm,
1504 vlib_node_runtime_t * node,
1505 vlib_frame_t * frame, int head_of_feature_arc)
1507 ip4_main_t *im = &ip4_main;
1508 ip_lookup_main_t *lm = &im->lookup_main;
1509 ip_local_next_t next_index;
1510 u32 *from, *to_next, n_left_from, n_left_to_next;
1511 vlib_node_runtime_t *error_node =
1512 vlib_node_get_runtime (vm, ip4_input_node.index);
1513 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1515 from = vlib_frame_vector_args (frame);
1516 n_left_from = frame->n_vectors;
1517 next_index = node->cached_next_index;
1519 if (node->flags & VLIB_NODE_FLAG_TRACE)
1520 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1522 while (n_left_from > 0)
1524 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1526 while (n_left_from >= 4 && n_left_to_next >= 2)
1528 vlib_buffer_t *p0, *p1;
1529 ip4_header_t *ip0, *ip1;
1530 udp_header_t *udp0, *udp1;
1531 ip4_fib_mtrie_t *mtrie0, *mtrie1;
1532 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1533 const dpo_id_t *dpo0, *dpo1;
1534 const load_balance_t *lb0, *lb1;
1535 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1536 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1537 i32 len_diff0, len_diff1;
1538 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1539 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1540 u32 sw_if_index0, sw_if_index1;
1542 pi0 = to_next[0] = from[0];
1543 pi1 = to_next[1] = from[1];
1547 n_left_to_next -= 2;
1549 next0 = next1 = IP_LOCAL_NEXT_DROP;
1551 p0 = vlib_get_buffer (vm, pi0);
1552 p1 = vlib_get_buffer (vm, pi1);
1554 ip0 = vlib_buffer_get_current (p0);
1555 ip1 = vlib_buffer_get_current (p1);
1557 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1558 vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1560 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1561 sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1563 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1564 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1566 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1568 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1569 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1571 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1573 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1574 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1576 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1577 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1579 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1580 leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
1582 /* Treat IP frag packets as "experimental" protocol for now
1583 until support of IP frag reassembly is implemented */
1584 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1585 proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1587 if (head_of_feature_arc == 0)
1589 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1593 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1594 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1595 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1596 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1601 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1602 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1604 udp0 = ip4_next_header (ip0);
1605 udp1 = ip4_next_header (ip1);
1607 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1608 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1609 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1611 /* Verify UDP length. */
1612 ip_len0 = clib_net_to_host_u16 (ip0->length);
1613 ip_len1 = clib_net_to_host_u16 (ip1->length);
1614 udp_len0 = clib_net_to_host_u16 (udp0->length);
1615 udp_len1 = clib_net_to_host_u16 (udp1->length);
1617 len_diff0 = ip_len0 - udp_len0;
1618 len_diff1 = ip_len1 - udp_len1;
1620 len_diff0 = is_udp0 ? len_diff0 : 0;
1621 len_diff1 = is_udp1 ? len_diff1 : 0;
1623 if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1624 & good_tcp_udp0 & good_tcp_udp1)))
1629 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1630 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1632 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1633 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1638 && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1639 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1641 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1642 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1646 good_tcp_udp0 &= len_diff0 >= 0;
1647 good_tcp_udp1 &= len_diff1 >= 0;
1650 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1652 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1654 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1656 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1657 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1659 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1660 error0 = (is_tcp_udp0 && !good_tcp_udp0
1661 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1662 error1 = (is_tcp_udp1 && !good_tcp_udp1
1663 ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1666 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1668 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1670 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1671 ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1672 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1674 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1675 ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1676 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1678 lb0 = load_balance_get (lbi0);
1679 lb1 = load_balance_get (lbi1);
1680 dpo0 = load_balance_get_bucket_i (lb0, 0);
1681 dpo1 = load_balance_get_bucket_i (lb1, 0);
1684 * Must have a route to source otherwise we drop the packet.
1685 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1688 * - the source is a recieve => it's from us => bogus, do this
1689 * first since it sets a different error code.
1690 * - uRPF check for any route to source - accept if passes.
1691 * - allow packets destined to the broadcast address from unknown sources
1693 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1694 dpo0->dpoi_type == DPO_RECEIVE) ?
1695 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1696 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1697 !fib_urpf_check_size (lb0->lb_urpf) &&
1698 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1699 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1700 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1701 dpo1->dpoi_type == DPO_RECEIVE) ?
1702 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1703 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1704 !fib_urpf_check_size (lb1->lb_urpf) &&
1705 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1706 ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1710 next0 = lm->local_next_by_ip_protocol[proto0];
1711 next1 = lm->local_next_by_ip_protocol[proto1];
1714 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1716 error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1718 p0->error = error0 ? error_node->errors[error0] : 0;
1719 p1->error = error1 ? error_node->errors[error1] : 0;
1721 if (head_of_feature_arc)
1723 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1724 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1725 if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1726 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1729 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1730 n_left_to_next, pi0, pi1,
1734 while (n_left_from > 0 && n_left_to_next > 0)
1739 ip4_fib_mtrie_t *mtrie0;
1740 ip4_fib_mtrie_leaf_t leaf0;
1741 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1743 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1744 load_balance_t *lb0;
1745 const dpo_id_t *dpo0;
1748 pi0 = to_next[0] = from[0];
1752 n_left_to_next -= 1;
1754 next0 = IP_LOCAL_NEXT_DROP;
1756 p0 = vlib_get_buffer (vm, pi0);
1758 ip0 = vlib_buffer_get_current (p0);
1760 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1762 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1764 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1767 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1768 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1770 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1772 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
1774 /* Treat IP frag packets as "experimental" protocol for now
1775 until support of IP frag reassembly is implemented */
1776 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1778 if (head_of_feature_arc == 0)
1780 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1784 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1785 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1789 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1791 udp0 = ip4_next_header (ip0);
1793 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1794 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1796 /* Verify UDP length. */
1797 ip_len0 = clib_net_to_host_u16 (ip0->length);
1798 udp_len0 = clib_net_to_host_u16 (udp0->length);
1800 len_diff0 = ip_len0 - udp_len0;
1802 len_diff0 = is_udp0 ? len_diff0 : 0;
1804 if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1809 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1810 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1812 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1813 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1817 good_tcp_udp0 &= len_diff0 >= 0;
1820 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1822 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1824 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1826 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1827 error0 = (is_tcp_udp0 && !good_tcp_udp0
1828 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1831 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1833 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1834 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1836 lb0 = load_balance_get (lbi0);
1837 dpo0 = load_balance_get_bucket_i (lb0, 0);
1839 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1840 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1842 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1843 dpo0->dpoi_type == DPO_RECEIVE) ?
1844 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1845 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1846 !fib_urpf_check_size (lb0->lb_urpf) &&
1847 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1848 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1852 next0 = lm->local_next_by_ip_protocol[proto0];
1855 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1857 p0->error = error0 ? error_node->errors[error0] : 0;
1859 if (head_of_feature_arc)
1861 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1862 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1865 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1866 n_left_to_next, pi0, next0);
1870 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1873 return frame->n_vectors;
1877 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1879 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1883 VLIB_REGISTER_NODE (ip4_local_node) =
1885 .function = ip4_local,
1886 .name = "ip4-local",
1887 .vector_size = sizeof (u32),
1888 .format_trace = format_ip4_forward_next_trace,
1889 .n_next_nodes = IP_LOCAL_N_NEXT,
1892 [IP_LOCAL_NEXT_DROP] = "error-drop",
1893 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1894 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1895 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",},
1899 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1902 ip4_local_end_of_arc (vlib_main_t * vm,
1903 vlib_node_runtime_t * node, vlib_frame_t * frame)
1905 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1909 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1910 .function = ip4_local_end_of_arc,
1911 .name = "ip4-local-end-of-arc",
1912 .vector_size = sizeof (u32),
1914 .format_trace = format_ip4_forward_next_trace,
1915 .sibling_of = "ip4-local",
1918 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1920 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1921 .arc_name = "ip4-local",
1922 .node_name = "ip4-local-end-of-arc",
1923 .runs_before = 0, /* not before any other features */
1928 ip4_register_protocol (u32 protocol, u32 node_index)
1930 vlib_main_t *vm = vlib_get_main ();
1931 ip4_main_t *im = &ip4_main;
1932 ip_lookup_main_t *lm = &im->lookup_main;
1934 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1935 lm->local_next_by_ip_protocol[protocol] =
1936 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1939 static clib_error_t *
1940 show_ip_local_command_fn (vlib_main_t * vm,
1941 unformat_input_t * input, vlib_cli_command_t * cmd)
1943 ip4_main_t *im = &ip4_main;
1944 ip_lookup_main_t *lm = &im->lookup_main;
1947 vlib_cli_output (vm, "Protocols handled by ip4_local");
1948 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1950 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1951 vlib_cli_output (vm, "%d", i);
1959 * Display the set of protocols handled by the local IPv4 stack.
1962 * Example of how to display local protocol table:
1963 * @cliexstart{show ip local}
1964 * Protocols handled by ip4_local
1971 VLIB_CLI_COMMAND (show_ip_local, static) =
1973 .path = "show ip local",
1974 .function = show_ip_local_command_fn,
1975 .short_help = "show ip local",
1980 ip4_arp_inline (vlib_main_t * vm,
1981 vlib_node_runtime_t * node,
1982 vlib_frame_t * frame, int is_glean)
1984 vnet_main_t *vnm = vnet_get_main ();
1985 ip4_main_t *im = &ip4_main;
1986 ip_lookup_main_t *lm = &im->lookup_main;
1987 u32 *from, *to_next_drop;
1988 uword n_left_from, n_left_to_next_drop, next_index;
1989 static f64 time_last_seed_change = -1e100;
1990 static u32 hash_seeds[3];
1991 static uword hash_bitmap[256 / BITS (uword)];
1994 if (node->flags & VLIB_NODE_FLAG_TRACE)
1995 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1997 time_now = vlib_time_now (vm);
1998 if (time_now - time_last_seed_change > 1e-3)
2001 u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
2002 sizeof (hash_seeds));
2003 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2004 hash_seeds[i] = r[i];
2006 /* Mark all hash keys as been no-seen before. */
2007 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2010 time_last_seed_change = time_now;
2013 from = vlib_frame_vector_args (frame);
2014 n_left_from = frame->n_vectors;
2015 next_index = node->cached_next_index;
2016 if (next_index == IP4_ARP_NEXT_DROP)
2017 next_index = IP4_ARP_N_NEXT; /* point to first interface */
2019 while (n_left_from > 0)
2021 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2022 to_next_drop, n_left_to_next_drop);
2024 while (n_left_from > 0 && n_left_to_next_drop > 0)
2026 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2027 ip_adjacency_t *adj0;
2034 p0 = vlib_get_buffer (vm, pi0);
2036 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2037 adj0 = adj_get (adj_index0);
2038 ip0 = vlib_buffer_get_current (p0);
2044 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2045 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2050 * this is the Glean case, so we are ARPing for the
2051 * packet's destination
2053 a0 ^= ip0->dst_address.data_u32;
2057 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2061 hash_v3_finalize32 (a0, b0, c0);
2063 c0 &= BITS (hash_bitmap) - 1;
2064 c0 = c0 / BITS (uword);
2065 m0 = (uword) 1 << (c0 % BITS (uword));
2067 bm0 = hash_bitmap[c0];
2068 drop0 = (bm0 & m0) != 0;
2070 /* Mark it as seen. */
2071 hash_bitmap[c0] = bm0 | m0;
2075 to_next_drop[0] = pi0;
2077 n_left_to_next_drop -= 1;
2080 node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2081 IP4_ARP_ERROR_REQUEST_SENT];
2084 * the adj has been updated to a rewrite but the node the DPO that got
2085 * us here hasn't - yet. no big deal. we'll drop while we wait.
2087 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2094 * Can happen if the control-plane is programming tables
2095 * with traffic flowing; at least that's today's lame excuse.
2097 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2098 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2100 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2103 /* Send ARP request. */
2107 ethernet_arp_header_t *h0;
2108 vnet_hw_interface_t *hw_if0;
2111 vlib_packet_template_get_packet (vm,
2112 &im->ip4_arp_request_packet_template,
2115 /* Add rewrite/encap string for ARP packet. */
2116 vnet_rewrite_one_header (adj0[0], h0,
2117 sizeof (ethernet_header_t));
2119 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2121 /* Src ethernet address in ARP header. */
2122 clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2124 sizeof (h0->ip4_over_ethernet[0].ethernet));
2128 /* The interface's source address is stashed in the Glean Adj */
2129 h0->ip4_over_ethernet[0].ip4 =
2130 adj0->sub_type.glean.receive_addr.ip4;
2132 /* Copy in destination address we are requesting. This is the
2133 * glean case, so it's the packet's destination.*/
2134 h0->ip4_over_ethernet[1].ip4.data_u32 =
2135 ip0->dst_address.data_u32;
2139 /* Src IP address in ARP header. */
2140 if (ip4_src_address_for_packet (lm, sw_if_index0,
2142 ip4_over_ethernet[0].ip4))
2144 /* No source address available */
2146 node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2147 vlib_buffer_free (vm, &bi0, 1);
2151 /* Copy in destination address we are requesting from the
2153 h0->ip4_over_ethernet[1].ip4.data_u32 =
2154 adj0->sub_type.nbr.next_hop.ip4.as_u32;
2157 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2158 b0 = vlib_get_buffer (vm, bi0);
2159 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2161 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2163 vlib_set_next_frame_buffer (vm, node,
2164 adj0->rewrite_header.next_index,
2169 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2172 return frame->n_vectors;
2176 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2178 return (ip4_arp_inline (vm, node, frame, 0));
2182 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2184 return (ip4_arp_inline (vm, node, frame, 1));
2187 static char *ip4_arp_error_strings[] = {
2188 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2189 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2190 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2191 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2192 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2193 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2196 VLIB_REGISTER_NODE (ip4_arp_node) =
2198 .function = ip4_arp,.name = "ip4-arp",.vector_size =
2199 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2200 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2201 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2203 [IP4_ARP_NEXT_DROP] = "error-drop",}
2206 VLIB_REGISTER_NODE (ip4_glean_node) =
2208 .function = ip4_glean,.name = "ip4-glean",.vector_size =
2209 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2210 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2211 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2213 [IP4_ARP_NEXT_DROP] = "error-drop",}
2216 #define foreach_notrace_ip4_arp_error \
2223 arp_notrace_init (vlib_main_t * vm)
2225 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2227 /* don't trace ARP request packets */
2229 vnet_pcap_drop_trace_filter_add_del \
2230 (rt->errors[IP4_ARP_ERROR_##a], \
2232 foreach_notrace_ip4_arp_error;
2237 VLIB_INIT_FUNCTION (arp_notrace_init);
2240 /* Send an ARP request to see if given destination is reachable on given interface. */
2242 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2244 vnet_main_t *vnm = vnet_get_main ();
2245 ip4_main_t *im = &ip4_main;
2246 ethernet_arp_header_t *h;
2248 ip_interface_address_t *ia;
2249 ip_adjacency_t *adj;
2250 vnet_hw_interface_t *hi;
2251 vnet_sw_interface_t *si;
2255 si = vnet_get_sw_interface (vnm, sw_if_index);
2257 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2259 return clib_error_return (0, "%U: interface %U down",
2260 format_ip4_address, dst,
2261 format_vnet_sw_if_index_name, vnm,
2266 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2269 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2270 return clib_error_return
2272 "no matching interface address for destination %U (interface %U)",
2273 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2277 adj = adj_get (ia->neighbor_probe_adj_index);
2280 vlib_packet_template_get_packet (vm,
2281 &im->ip4_arp_request_packet_template,
2284 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2286 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2287 sizeof (h->ip4_over_ethernet[0].ethernet));
2289 h->ip4_over_ethernet[0].ip4 = src[0];
2290 h->ip4_over_ethernet[1].ip4 = dst[0];
2292 b = vlib_get_buffer (vm, bi);
2293 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2294 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2296 /* Add encapsulation string for software interface (e.g. ethernet header). */
2297 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2298 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2301 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2302 u32 *to_next = vlib_frame_vector_args (f);
2305 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2308 return /* no error */ 0;
2313 IP4_REWRITE_NEXT_DROP,
2314 IP4_REWRITE_NEXT_ICMP_ERROR,
2315 } ip4_rewrite_next_t;
2318 ip4_rewrite_inline (vlib_main_t * vm,
2319 vlib_node_runtime_t * node,
2320 vlib_frame_t * frame,
2321 int do_counters, int is_midchain, int is_mcast)
2323 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2324 u32 *from = vlib_frame_vector_args (frame);
2325 u32 n_left_from, n_left_to_next, *to_next, next_index;
2326 vlib_node_runtime_t *error_node =
2327 vlib_node_get_runtime (vm, ip4_input_node.index);
2329 n_left_from = frame->n_vectors;
2330 next_index = node->cached_next_index;
2331 u32 thread_index = vlib_get_thread_index ();
2333 while (n_left_from > 0)
2335 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2337 while (n_left_from >= 4 && n_left_to_next >= 2)
2339 ip_adjacency_t *adj0, *adj1;
2340 vlib_buffer_t *p0, *p1;
2341 ip4_header_t *ip0, *ip1;
2342 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2343 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2344 u32 tx_sw_if_index0, tx_sw_if_index1;
2346 /* Prefetch next iteration. */
2348 vlib_buffer_t *p2, *p3;
2350 p2 = vlib_get_buffer (vm, from[2]);
2351 p3 = vlib_get_buffer (vm, from[3]);
2353 vlib_prefetch_buffer_header (p2, STORE);
2354 vlib_prefetch_buffer_header (p3, STORE);
2356 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2357 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2360 pi0 = to_next[0] = from[0];
2361 pi1 = to_next[1] = from[1];
2366 n_left_to_next -= 2;
2368 p0 = vlib_get_buffer (vm, pi0);
2369 p1 = vlib_get_buffer (vm, pi1);
2371 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2372 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2375 * pre-fetch the per-adjacency counters
2379 vlib_prefetch_combined_counter (&adjacency_counters,
2380 thread_index, adj_index0);
2381 vlib_prefetch_combined_counter (&adjacency_counters,
2382 thread_index, adj_index1);
2385 ip0 = vlib_buffer_get_current (p0);
2386 ip1 = vlib_buffer_get_current (p1);
2388 error0 = error1 = IP4_ERROR_NONE;
2389 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2391 /* Decrement TTL & update checksum.
2392 Works either endian, so no need for byte swap. */
2393 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2395 i32 ttl0 = ip0->ttl;
2397 /* Input node should have reject packets with ttl 0. */
2398 ASSERT (ip0->ttl > 0);
2400 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2401 checksum0 += checksum0 >= 0xffff;
2403 ip0->checksum = checksum0;
2408 * If the ttl drops below 1 when forwarding, generate
2411 if (PREDICT_FALSE (ttl0 <= 0))
2413 error0 = IP4_ERROR_TIME_EXPIRED;
2414 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2415 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2416 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2418 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2421 /* Verify checksum. */
2422 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2426 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2428 if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2430 i32 ttl1 = ip1->ttl;
2432 /* Input node should have reject packets with ttl 0. */
2433 ASSERT (ip1->ttl > 0);
2435 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2436 checksum1 += checksum1 >= 0xffff;
2438 ip1->checksum = checksum1;
2443 * If the ttl drops below 1 when forwarding, generate
2446 if (PREDICT_FALSE (ttl1 <= 0))
2448 error1 = IP4_ERROR_TIME_EXPIRED;
2449 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2450 icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2451 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2453 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2456 /* Verify checksum. */
2457 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2458 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2462 p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2465 /* Rewrite packet header and updates lengths. */
2466 adj0 = adj_get (adj_index0);
2467 adj1 = adj_get (adj_index1);
2469 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2470 rw_len0 = adj0[0].rewrite_header.data_bytes;
2471 rw_len1 = adj1[0].rewrite_header.data_bytes;
2472 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2473 vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2475 /* Check MTU of outgoing interface. */
2477 (vlib_buffer_length_in_chain (vm, p0) >
2479 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2482 (vlib_buffer_length_in_chain (vm, p1) >
2484 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2487 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2488 * to see the IP headerr */
2489 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2491 next0 = adj0[0].rewrite_header.next_index;
2492 p0->current_data -= rw_len0;
2493 p0->current_length += rw_len0;
2494 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2495 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2498 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2499 vnet_feature_arc_start (lm->output_feature_arc_index,
2500 tx_sw_if_index0, &next0, p0);
2502 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2504 next1 = adj1[0].rewrite_header.next_index;
2505 p1->current_data -= rw_len1;
2506 p1->current_length += rw_len1;
2508 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2509 vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2512 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2513 vnet_feature_arc_start (lm->output_feature_arc_index,
2514 tx_sw_if_index1, &next1, p1);
2517 /* Guess we are only writing on simple Ethernet header. */
2518 vnet_rewrite_two_headers (adj0[0], adj1[0],
2519 ip0, ip1, sizeof (ethernet_header_t));
2522 * Bump the per-adjacency counters
2526 vlib_increment_combined_counter
2527 (&adjacency_counters,
2530 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2532 vlib_increment_combined_counter
2533 (&adjacency_counters,
2536 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2541 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2542 adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2547 * copy bytes from the IP address into the MAC rewrite
2549 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2550 vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2553 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2554 to_next, n_left_to_next,
2555 pi0, pi1, next0, next1);
2558 while (n_left_from > 0 && n_left_to_next > 0)
2560 ip_adjacency_t *adj0;
2563 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2564 u32 tx_sw_if_index0;
2566 pi0 = to_next[0] = from[0];
2568 p0 = vlib_get_buffer (vm, pi0);
2570 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2572 adj0 = adj_get (adj_index0);
2574 ip0 = vlib_buffer_get_current (p0);
2576 error0 = IP4_ERROR_NONE;
2577 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2579 /* Decrement TTL & update checksum. */
2580 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2582 i32 ttl0 = ip0->ttl;
2584 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2586 checksum0 += checksum0 >= 0xffff;
2588 ip0->checksum = checksum0;
2590 ASSERT (ip0->ttl > 0);
2596 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2598 if (PREDICT_FALSE (ttl0 <= 0))
2601 * If the ttl drops below 1 when forwarding, generate
2604 error0 = IP4_ERROR_TIME_EXPIRED;
2605 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2606 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2607 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2608 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2614 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2618 vlib_prefetch_combined_counter (&adjacency_counters,
2619 thread_index, adj_index0);
2621 /* Guess we are only writing on simple Ethernet header. */
2622 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2626 * copy bytes from the IP address into the MAC rewrite
2628 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2631 /* Update packet buffer attributes/set output interface. */
2632 rw_len0 = adj0[0].rewrite_header.data_bytes;
2633 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2636 vlib_increment_combined_counter
2637 (&adjacency_counters,
2638 thread_index, adj_index0, 1,
2639 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2641 /* Check MTU of outgoing interface. */
2642 error0 = (vlib_buffer_length_in_chain (vm, p0)
2643 > adj0[0].rewrite_header.max_l3_packet_bytes
2644 ? IP4_ERROR_MTU_EXCEEDED : error0);
2646 p0->error = error_node->errors[error0];
2648 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2649 * to see the IP headerr */
2650 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2652 p0->current_data -= rw_len0;
2653 p0->current_length += rw_len0;
2654 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2656 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2657 next0 = adj0[0].rewrite_header.next_index;
2661 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2665 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2666 vnet_feature_arc_start (lm->output_feature_arc_index,
2667 tx_sw_if_index0, &next0, p0);
2674 n_left_to_next -= 1;
2676 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2677 to_next, n_left_to_next,
2681 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2684 /* Need to do trace after rewrites to pick up new packet data. */
2685 if (node->flags & VLIB_NODE_FLAG_TRACE)
2686 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2688 return frame->n_vectors;
2692 /** @brief IPv4 rewrite node.
2695 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2696 header checksum, fetch the ip adjacency, check the outbound mtu,
2697 apply the adjacency rewrite, and send pkts to the adjacency
2698 rewrite header's rewrite_next_index.
2700 @param vm vlib_main_t corresponding to the current thread
2701 @param node vlib_node_runtime_t
2702 @param frame vlib_frame_t whose contents should be dispatched
2704 @par Graph mechanics: buffer metadata, next index usage
2707 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2708 - the rewrite adjacency index
2709 - <code>adj->lookup_next_index</code>
2710 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2711 the packet will be dropped.
2712 - <code>adj->rewrite_header</code>
2713 - Rewrite string length, rewrite string, next_index
2716 - <code>b->current_data, b->current_length</code>
2717 - Updated net of applying the rewrite string
2719 <em>Next Indices:</em>
2720 - <code> adj->rewrite_header.next_index </code>
2724 ip4_rewrite (vlib_main_t * vm,
2725 vlib_node_runtime_t * node, vlib_frame_t * frame)
2727 if (adj_are_counters_enabled ())
2728 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2730 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2734 ip4_midchain (vlib_main_t * vm,
2735 vlib_node_runtime_t * node, vlib_frame_t * frame)
2737 if (adj_are_counters_enabled ())
2738 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2740 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2744 ip4_rewrite_mcast (vlib_main_t * vm,
2745 vlib_node_runtime_t * node, vlib_frame_t * frame)
2747 if (adj_are_counters_enabled ())
2748 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2750 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2754 ip4_mcast_midchain (vlib_main_t * vm,
2755 vlib_node_runtime_t * node, vlib_frame_t * frame)
2757 if (adj_are_counters_enabled ())
2758 return ip4_rewrite_inline (vm, node, frame, 1, 1, 1);
2760 return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
2764 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2765 .function = ip4_rewrite,
2766 .name = "ip4-rewrite",
2767 .vector_size = sizeof (u32),
2769 .format_trace = format_ip4_rewrite_trace,
2773 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2774 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2777 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2779 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2780 .function = ip4_rewrite_mcast,
2781 .name = "ip4-rewrite-mcast",
2782 .vector_size = sizeof (u32),
2784 .format_trace = format_ip4_rewrite_trace,
2785 .sibling_of = "ip4-rewrite",
2787 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2789 VLIB_REGISTER_NODE (ip4_mcast_midchain_node, static) = {
2790 .function = ip4_mcast_midchain,
2791 .name = "ip4-mcast-midchain",
2792 .vector_size = sizeof (u32),
2794 .format_trace = format_ip4_rewrite_trace,
2795 .sibling_of = "ip4-rewrite",
2797 VLIB_NODE_FUNCTION_MULTIARCH (ip4_mcast_midchain_node, ip4_mcast_midchain)
2799 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2800 .function = ip4_midchain,
2801 .name = "ip4-midchain",
2802 .vector_size = sizeof (u32),
2803 .format_trace = format_ip4_forward_next_trace,
2804 .sibling_of = "ip4-rewrite",
2806 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2809 static clib_error_t *
2810 add_del_interface_table (vlib_main_t * vm,
2811 unformat_input_t * input, vlib_cli_command_t * cmd)
2813 vnet_main_t *vnm = vnet_get_main ();
2814 ip_interface_address_t *ia;
2815 clib_error_t *error = 0;
2816 u32 sw_if_index, table_id;
2820 if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2822 error = clib_error_return (0, "unknown interface `%U'",
2823 format_unformat_error, input);
2827 if (unformat (input, "%d", &table_id))
2831 error = clib_error_return (0, "expected table id `%U'",
2832 format_unformat_error, input);
2837 * If the interface already has in IP address, then a change int
2838 * VRF is not allowed. The IP address applied must first be removed.
2839 * We do not do that automatically here, since VPP has no knowledge
2840 * of whether thoses subnets are valid in the destination VRF.
2843 foreach_ip_interface_address (&ip4_main.lookup_main,
2845 1 /* honor unnumbered */,
2849 a = ip_interface_address_get_address (&ip4_main.lookup_main, ia);
2850 error = clib_error_return (0, "interface %U has address %U",
2851 format_vnet_sw_if_index_name, vnm,
2853 format_ip4_address, a);
2859 ip4_main_t *im = &ip4_main;
2862 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2864 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2865 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2867 fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2868 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
2869 im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
2877 * Place the indicated interface into the supplied IPv4 FIB table (also known
2878 * as a VRF). If the FIB table does not exist, this command creates it. To
2879 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2880 * FIB table will only be displayed if a route has been added to the table, or
2881 * an IP Address is assigned to an interface in the table (which adds a route
2884 * @note IP addresses added after setting the interface IP table are added to
2885 * the indicated FIB table. If an IP address is added prior to changing the
2886 * table then this is an error. The control plane must remove these addresses
2887 * first and then change the table. VPP will not automatically move the
2888 * addresses from the old to the new table as it does not know the validity
2892 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2893 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2896 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2898 .path = "set interface ip table",
2899 .function = add_del_interface_table,
2900 .short_help = "set interface ip table <interface> <table-id>",
2905 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2907 ip4_fib_mtrie_t *mtrie0;
2908 ip4_fib_mtrie_leaf_t leaf0;
2911 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2913 leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
2914 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2915 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2917 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2919 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2922 static clib_error_t *
2923 test_lookup_command_fn (vlib_main_t * vm,
2924 unformat_input_t * input, vlib_cli_command_t * cmd)
2931 ip4_address_t ip4_base_address;
2934 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2936 if (unformat (input, "table %d", &table_id))
2938 /* Make sure the entry exists. */
2939 fib = ip4_fib_get (table_id);
2940 if ((fib) && (fib->index != table_id))
2941 return clib_error_return (0, "<fib-index> %d does not exist",
2944 else if (unformat (input, "count %f", &count))
2947 else if (unformat (input, "%U",
2948 unformat_ip4_address, &ip4_base_address))
2951 return clib_error_return (0, "unknown input `%U'",
2952 format_unformat_error, input);
2957 for (i = 0; i < n; i++)
2959 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2962 ip4_base_address.as_u32 =
2963 clib_host_to_net_u32 (1 +
2964 clib_net_to_host_u32 (ip4_base_address.as_u32));
2968 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2970 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2976 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2977 * given FIB table to determine if there is a conflict with the
2978 * adjacency table. The fib-id can be determined by using the
2979 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2982 * @todo This command uses fib-id, other commands use table-id (not
2983 * just a name, they are different indexes). Would like to change this
2984 * to table-id for consistency.
2987 * Example of how to run the test lookup command:
2988 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2989 * No errors in 2 lookups
2993 VLIB_CLI_COMMAND (lookup_test_command, static) =
2995 .path = "test lookup",
2996 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2997 .function = test_lookup_command_fn,
3002 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3007 fib_index = fib_table_find (FIB_PROTOCOL_IP4, table_id);
3009 if (~0 == fib_index)
3010 return VNET_API_ERROR_NO_SUCH_FIB;
3012 fib = ip4_fib_get (fib_index);
3014 fib->flow_hash_config = flow_hash_config;
3018 static clib_error_t *
3019 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3020 unformat_input_t * input,
3021 vlib_cli_command_t * cmd)
3025 u32 flow_hash_config = 0;
3028 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3030 if (unformat (input, "table %d", &table_id))
3033 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3034 foreach_flow_hash_bit
3041 return clib_error_return (0, "unknown input `%U'",
3042 format_unformat_error, input);
3044 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3050 case VNET_API_ERROR_NO_SUCH_FIB:
3051 return clib_error_return (0, "no such FIB table %d", table_id);
3054 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3062 * Configure the set of IPv4 fields used by the flow hash.
3065 * Example of how to set the flow hash on a given table:
3066 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3067 * Example of display the configured flow hash:
3068 * @cliexstart{show ip fib}
3069 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3072 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3073 * [0] [@0]: dpo-drop ip6
3076 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3077 * [0] [@0]: dpo-drop ip6
3080 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3081 * [0] [@0]: dpo-drop ip6
3084 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3085 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3088 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3089 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3090 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3091 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3092 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3095 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3096 * [0] [@0]: dpo-drop ip6
3097 * 255.255.255.255/32
3099 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3100 * [0] [@0]: dpo-drop ip6
3101 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3104 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3105 * [0] [@0]: dpo-drop ip6
3108 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3109 * [0] [@0]: dpo-drop ip6
3112 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3113 * [0] [@4]: ipv4-glean: af_packet0
3116 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3117 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3120 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3121 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3124 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3125 * [0] [@4]: ipv4-glean: af_packet1
3128 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3129 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3132 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3133 * [0] [@0]: dpo-drop ip6
3136 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3137 * [0] [@0]: dpo-drop ip6
3138 * 255.255.255.255/32
3140 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3141 * [0] [@0]: dpo-drop ip6
3145 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3147 .path = "set ip flow-hash",
3149 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3150 .function = set_ip_flow_hash_command_fn,
3155 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3158 vnet_main_t *vnm = vnet_get_main ();
3159 vnet_interface_main_t *im = &vnm->interface_main;
3160 ip4_main_t *ipm = &ip4_main;
3161 ip_lookup_main_t *lm = &ipm->lookup_main;
3162 vnet_classify_main_t *cm = &vnet_classify_main;
3163 ip4_address_t *if_addr;
3165 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3166 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3168 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3169 return VNET_API_ERROR_NO_SUCH_ENTRY;
3171 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3172 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3174 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3176 if (NULL != if_addr)
3178 fib_prefix_t pfx = {
3180 .fp_proto = FIB_PROTOCOL_IP4,
3181 .fp_addr.ip4 = *if_addr,
3185 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3189 if (table_index != (u32) ~ 0)
3191 dpo_id_t dpo = DPO_INVALID;
3196 classify_dpo_create (DPO_PROTO_IP4, table_index));
3198 fib_table_entry_special_dpo_add (fib_index,
3200 FIB_SOURCE_CLASSIFY,
3201 FIB_ENTRY_FLAG_NONE, &dpo);
3206 fib_table_entry_special_remove (fib_index,
3207 &pfx, FIB_SOURCE_CLASSIFY);
3214 static clib_error_t *
3215 set_ip_classify_command_fn (vlib_main_t * vm,
3216 unformat_input_t * input,
3217 vlib_cli_command_t * cmd)
3219 u32 table_index = ~0;
3220 int table_index_set = 0;
3221 u32 sw_if_index = ~0;
3224 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3226 if (unformat (input, "table-index %d", &table_index))
3227 table_index_set = 1;
3228 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3229 vnet_get_main (), &sw_if_index))
3235 if (table_index_set == 0)
3236 return clib_error_return (0, "classify table-index must be specified");
3238 if (sw_if_index == ~0)
3239 return clib_error_return (0, "interface / subif must be specified");
3241 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3248 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3249 return clib_error_return (0, "No such interface");
3251 case VNET_API_ERROR_NO_SUCH_ENTRY:
3252 return clib_error_return (0, "No such classifier table");
3258 * Assign a classification table to an interface. The classification
3259 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3260 * commands. Once the table is create, use this command to filter packets
3264 * Example of how to assign a classification table to an interface:
3265 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3268 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3270 .path = "set ip classify",
3272 "set ip classify intfc <interface> table-index <classify-idx>",
3273 .function = set_ip_classify_command_fn,
3278 * fd.io coding-style-patch-verification: ON
3281 * eval: (c-set-style "gnu")