2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 * @brief IPv4 Forwarding.
59 * This file contains the source code for IPv4 forwarding.
63 ip4_forward_next_trace (vlib_main_t * vm,
64 vlib_node_runtime_t * node,
66 vlib_rx_or_tx_t which_adj_index);
69 ip4_lookup_inline (vlib_main_t * vm,
70 vlib_node_runtime_t * node,
72 int lookup_for_responses_to_locally_received_packets)
74 ip4_main_t *im = &ip4_main;
75 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
76 u32 n_left_from, n_left_to_next, *from, *to_next;
77 ip_lookup_next_t next;
78 u32 cpu_index = os_get_cpu_number ();
80 from = vlib_frame_vector_args (frame);
81 n_left_from = frame->n_vectors;
82 next = node->cached_next_index;
84 while (n_left_from > 0)
86 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
88 while (n_left_from >= 8 && n_left_to_next >= 4)
90 vlib_buffer_t *p0, *p1, *p2, *p3;
91 ip4_header_t *ip0, *ip1, *ip2, *ip3;
92 __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
93 ip_lookup_next_t next0, next1, next2, next3;
94 const load_balance_t *lb0, *lb1, *lb2, *lb3;
95 ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
96 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97 ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98 __attribute__ ((unused)) u32 pi0, fib_index0, lb_index0,
100 __attribute__ ((unused)) u32 pi1, fib_index1, lb_index1,
102 __attribute__ ((unused)) u32 pi2, fib_index2, lb_index2,
104 __attribute__ ((unused)) u32 pi3, fib_index3, lb_index3,
106 flow_hash_config_t flow_hash_config0, flow_hash_config1;
107 flow_hash_config_t flow_hash_config2, flow_hash_config3;
108 u32 hash_c0, hash_c1, hash_c2, hash_c3;
109 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
111 /* Prefetch next iteration. */
113 vlib_buffer_t *p4, *p5, *p6, *p7;
115 p4 = vlib_get_buffer (vm, from[4]);
116 p5 = vlib_get_buffer (vm, from[5]);
117 p6 = vlib_get_buffer (vm, from[6]);
118 p7 = vlib_get_buffer (vm, from[7]);
120 vlib_prefetch_buffer_header (p4, LOAD);
121 vlib_prefetch_buffer_header (p5, LOAD);
122 vlib_prefetch_buffer_header (p6, LOAD);
123 vlib_prefetch_buffer_header (p7, LOAD);
125 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
126 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
127 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
128 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
131 pi0 = to_next[0] = from[0];
132 pi1 = to_next[1] = from[1];
133 pi2 = to_next[2] = from[2];
134 pi3 = to_next[3] = from[3];
141 p0 = vlib_get_buffer (vm, pi0);
142 p1 = vlib_get_buffer (vm, pi1);
143 p2 = vlib_get_buffer (vm, pi2);
144 p3 = vlib_get_buffer (vm, pi3);
146 ip0 = vlib_buffer_get_current (p0);
147 ip1 = vlib_buffer_get_current (p1);
148 ip2 = vlib_buffer_get_current (p2);
149 ip3 = vlib_buffer_get_current (p3);
151 dst_addr0 = &ip0->dst_address;
152 dst_addr1 = &ip1->dst_address;
153 dst_addr2 = &ip2->dst_address;
154 dst_addr3 = &ip3->dst_address;
157 vec_elt (im->fib_index_by_sw_if_index,
158 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
160 vec_elt (im->fib_index_by_sw_if_index,
161 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
163 vec_elt (im->fib_index_by_sw_if_index,
164 vnet_buffer (p2)->sw_if_index[VLIB_RX]);
166 vec_elt (im->fib_index_by_sw_if_index,
167 vnet_buffer (p3)->sw_if_index[VLIB_RX]);
169 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
170 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
172 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
173 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
175 (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
176 (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
178 (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
179 (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
182 if (!lookup_for_responses_to_locally_received_packets)
184 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
185 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
186 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
187 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
189 leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
191 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
192 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
193 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
194 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
197 tcp0 = (void *) (ip0 + 1);
198 tcp1 = (void *) (ip1 + 1);
199 tcp2 = (void *) (ip2 + 1);
200 tcp3 = (void *) (ip3 + 1);
202 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
203 || ip0->protocol == IP_PROTOCOL_UDP);
204 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
205 || ip1->protocol == IP_PROTOCOL_UDP);
206 is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
207 || ip2->protocol == IP_PROTOCOL_UDP);
208 is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
209 || ip1->protocol == IP_PROTOCOL_UDP);
211 if (!lookup_for_responses_to_locally_received_packets)
213 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
214 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
215 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
216 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
219 if (!lookup_for_responses_to_locally_received_packets)
221 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
222 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
223 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
224 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
227 if (!lookup_for_responses_to_locally_received_packets)
229 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
230 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
231 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
232 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
235 if (lookup_for_responses_to_locally_received_packets)
237 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
238 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
239 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
240 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
244 /* Handle default route. */
247 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
250 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
253 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
256 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
257 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
258 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
259 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
260 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
263 lb0 = load_balance_get (lb_index0);
264 lb1 = load_balance_get (lb_index1);
265 lb2 = load_balance_get (lb_index2);
266 lb3 = load_balance_get (lb_index3);
268 /* Use flow hash to compute multipath adjacency. */
269 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
270 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
271 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
272 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
273 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
275 flow_hash_config0 = lb0->lb_hash_config;
276 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
277 ip4_compute_flow_hash (ip0, flow_hash_config0);
279 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
281 flow_hash_config1 = lb1->lb_hash_config;
282 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
283 ip4_compute_flow_hash (ip1, flow_hash_config1);
285 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
287 flow_hash_config2 = lb2->lb_hash_config;
288 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
289 ip4_compute_flow_hash (ip2, flow_hash_config2);
291 if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
293 flow_hash_config3 = lb3->lb_hash_config;
294 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
295 ip4_compute_flow_hash (ip3, flow_hash_config3);
298 ASSERT (lb0->lb_n_buckets > 0);
299 ASSERT (is_pow2 (lb0->lb_n_buckets));
300 ASSERT (lb1->lb_n_buckets > 0);
301 ASSERT (is_pow2 (lb1->lb_n_buckets));
302 ASSERT (lb2->lb_n_buckets > 0);
303 ASSERT (is_pow2 (lb2->lb_n_buckets));
304 ASSERT (lb3->lb_n_buckets > 0);
305 ASSERT (is_pow2 (lb3->lb_n_buckets));
307 dpo0 = load_balance_get_bucket_i (lb0,
309 (lb0->lb_n_buckets_minus_1)));
310 dpo1 = load_balance_get_bucket_i (lb1,
312 (lb1->lb_n_buckets_minus_1)));
313 dpo2 = load_balance_get_bucket_i (lb2,
315 (lb2->lb_n_buckets_minus_1)));
316 dpo3 = load_balance_get_bucket_i (lb3,
318 (lb3->lb_n_buckets_minus_1)));
320 next0 = dpo0->dpoi_next_node;
321 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
322 next1 = dpo1->dpoi_next_node;
323 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
324 next2 = dpo2->dpoi_next_node;
325 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
326 next3 = dpo3->dpoi_next_node;
327 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
329 vlib_increment_combined_counter
330 (cm, cpu_index, lb_index0, 1,
331 vlib_buffer_length_in_chain (vm, p0)
332 + sizeof (ethernet_header_t));
333 vlib_increment_combined_counter
334 (cm, cpu_index, lb_index1, 1,
335 vlib_buffer_length_in_chain (vm, p1)
336 + sizeof (ethernet_header_t));
337 vlib_increment_combined_counter
338 (cm, cpu_index, lb_index2, 1,
339 vlib_buffer_length_in_chain (vm, p2)
340 + sizeof (ethernet_header_t));
341 vlib_increment_combined_counter
342 (cm, cpu_index, lb_index3, 1,
343 vlib_buffer_length_in_chain (vm, p3)
344 + sizeof (ethernet_header_t));
346 vlib_validate_buffer_enqueue_x4 (vm, node, next,
347 to_next, n_left_to_next,
349 next0, next1, next2, next3);
352 while (n_left_from > 0 && n_left_to_next > 0)
356 __attribute__ ((unused)) tcp_header_t *tcp0;
357 ip_lookup_next_t next0;
358 const load_balance_t *lb0;
359 ip4_fib_mtrie_t *mtrie0;
360 ip4_fib_mtrie_leaf_t leaf0;
361 ip4_address_t *dst_addr0;
362 __attribute__ ((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
363 flow_hash_config_t flow_hash_config0;
364 const dpo_id_t *dpo0;
370 p0 = vlib_get_buffer (vm, pi0);
372 ip0 = vlib_buffer_get_current (p0);
374 dst_addr0 = &ip0->dst_address;
377 vec_elt (im->fib_index_by_sw_if_index,
378 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
380 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
381 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
383 if (!lookup_for_responses_to_locally_received_packets)
385 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
387 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
389 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
392 tcp0 = (void *) (ip0 + 1);
394 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
395 || ip0->protocol == IP_PROTOCOL_UDP);
397 if (!lookup_for_responses_to_locally_received_packets)
398 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
400 if (!lookup_for_responses_to_locally_received_packets)
401 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
403 if (!lookup_for_responses_to_locally_received_packets)
404 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
406 if (lookup_for_responses_to_locally_received_packets)
407 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
410 /* Handle default route. */
413 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
414 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
417 lb0 = load_balance_get (lbi0);
419 /* Use flow hash to compute multipath adjacency. */
420 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
421 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
423 flow_hash_config0 = lb0->lb_hash_config;
425 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
426 ip4_compute_flow_hash (ip0, flow_hash_config0);
429 ASSERT (lb0->lb_n_buckets > 0);
430 ASSERT (is_pow2 (lb0->lb_n_buckets));
432 dpo0 = load_balance_get_bucket_i (lb0,
434 (lb0->lb_n_buckets_minus_1)));
436 next0 = dpo0->dpoi_next_node;
437 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
439 vlib_increment_combined_counter
440 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
447 if (PREDICT_FALSE (next0 != next))
450 vlib_put_next_frame (vm, node, next, n_left_to_next);
452 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
459 vlib_put_next_frame (vm, node, next, n_left_to_next);
462 if (node->flags & VLIB_NODE_FLAG_TRACE)
463 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
465 return frame->n_vectors;
468 /** @brief IPv4 lookup node.
471 This is the main IPv4 lookup dispatch node.
473 @param vm vlib_main_t corresponding to the current thread
474 @param node vlib_node_runtime_t
475 @param frame vlib_frame_t whose contents should be dispatched
477 @par Graph mechanics: buffer metadata, next index usage
480 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
481 - Indicates the @c sw_if_index value of the interface that the
482 packet was received on.
483 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
484 - When the value is @c ~0 then the node performs a longest prefix
485 match (LPM) for the packet destination address in the FIB attached
486 to the receive interface.
487 - Otherwise perform LPM for the packet destination address in the
488 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
489 value (0, 1, ...) and not a VRF id.
492 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
493 - The lookup result adjacency index.
496 - Dispatches the packet to the node index found in
497 ip_adjacency_t @c adj->lookup_next_index
498 (where @c adj is the lookup result adjacency).
501 ip4_lookup (vlib_main_t * vm,
502 vlib_node_runtime_t * node, vlib_frame_t * frame)
504 return ip4_lookup_inline (vm, node, frame,
505 /* lookup_for_responses_to_locally_received_packets */
510 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
512 VLIB_REGISTER_NODE (ip4_lookup_node) =
514 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
515 sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
516 IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
518 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
521 ip4_load_balance (vlib_main_t * vm,
522 vlib_node_runtime_t * node, vlib_frame_t * frame)
524 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
525 u32 n_left_from, n_left_to_next, *from, *to_next;
526 ip_lookup_next_t next;
527 u32 cpu_index = os_get_cpu_number ();
529 from = vlib_frame_vector_args (frame);
530 n_left_from = frame->n_vectors;
531 next = node->cached_next_index;
533 if (node->flags & VLIB_NODE_FLAG_TRACE)
534 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
536 while (n_left_from > 0)
538 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
541 while (n_left_from >= 4 && n_left_to_next >= 2)
543 ip_lookup_next_t next0, next1;
544 const load_balance_t *lb0, *lb1;
545 vlib_buffer_t *p0, *p1;
546 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
547 const ip4_header_t *ip0, *ip1;
548 const dpo_id_t *dpo0, *dpo1;
550 /* Prefetch next iteration. */
552 vlib_buffer_t *p2, *p3;
554 p2 = vlib_get_buffer (vm, from[2]);
555 p3 = vlib_get_buffer (vm, from[3]);
557 vlib_prefetch_buffer_header (p2, STORE);
558 vlib_prefetch_buffer_header (p3, STORE);
560 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
561 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
564 pi0 = to_next[0] = from[0];
565 pi1 = to_next[1] = from[1];
572 p0 = vlib_get_buffer (vm, pi0);
573 p1 = vlib_get_buffer (vm, pi1);
575 ip0 = vlib_buffer_get_current (p0);
576 ip1 = vlib_buffer_get_current (p1);
577 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
578 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
580 lb0 = load_balance_get (lbi0);
581 lb1 = load_balance_get (lbi1);
584 * this node is for via FIBs we can re-use the hash value from the
585 * to node if present.
586 * We don't want to use the same hash value at each level in the recursion
587 * graph as that would lead to polarisation
591 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
593 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
595 hc0 = vnet_buffer (p0)->ip.flow_hash =
596 vnet_buffer (p0)->ip.flow_hash >> 1;
600 hc0 = vnet_buffer (p0)->ip.flow_hash =
601 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
604 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
606 if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
608 hc1 = vnet_buffer (p1)->ip.flow_hash =
609 vnet_buffer (p1)->ip.flow_hash >> 1;
613 hc1 = vnet_buffer (p1)->ip.flow_hash =
614 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
619 load_balance_get_bucket_i (lb0,
620 hc0 & (lb0->lb_n_buckets_minus_1));
622 load_balance_get_bucket_i (lb1,
623 hc1 & (lb1->lb_n_buckets_minus_1));
625 next0 = dpo0->dpoi_next_node;
626 next1 = dpo1->dpoi_next_node;
628 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
629 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
631 vlib_increment_combined_counter
632 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
633 vlib_increment_combined_counter
634 (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
636 vlib_validate_buffer_enqueue_x2 (vm, node, next,
637 to_next, n_left_to_next,
638 pi0, pi1, next0, next1);
641 while (n_left_from > 0 && n_left_to_next > 0)
643 ip_lookup_next_t next0;
644 const load_balance_t *lb0;
647 const ip4_header_t *ip0;
648 const dpo_id_t *dpo0;
657 p0 = vlib_get_buffer (vm, pi0);
659 ip0 = vlib_buffer_get_current (p0);
660 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
662 lb0 = load_balance_get (lbi0);
665 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
667 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
669 hc0 = vnet_buffer (p0)->ip.flow_hash =
670 vnet_buffer (p0)->ip.flow_hash >> 1;
674 hc0 = vnet_buffer (p0)->ip.flow_hash =
675 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
680 load_balance_get_bucket_i (lb0,
681 hc0 & (lb0->lb_n_buckets_minus_1));
683 next0 = dpo0->dpoi_next_node;
684 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
686 vlib_increment_combined_counter
687 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
689 vlib_validate_buffer_enqueue_x1 (vm, node, next,
690 to_next, n_left_to_next,
694 vlib_put_next_frame (vm, node, next, n_left_to_next);
697 return frame->n_vectors;
700 VLIB_REGISTER_NODE (ip4_load_balance_node) =
702 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
703 sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
704 format_ip4_lookup_trace,};
706 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
708 /* get first interface address */
710 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
711 ip_interface_address_t ** result_ia)
713 ip_lookup_main_t *lm = &im->lookup_main;
714 ip_interface_address_t *ia = 0;
715 ip4_address_t *result = 0;
718 foreach_ip_interface_address
719 (lm, ia, sw_if_index,
720 1 /* honor unnumbered */ ,
723 ip_interface_address_get_address (lm, ia);
729 *result_ia = result ? ia : 0;
734 ip4_add_interface_routes (u32 sw_if_index,
735 ip4_main_t * im, u32 fib_index,
736 ip_interface_address_t * a)
738 ip_lookup_main_t *lm = &im->lookup_main;
739 ip4_address_t *address = ip_interface_address_get_address (lm, a);
741 .fp_len = a->address_length,
742 .fp_proto = FIB_PROTOCOL_IP4,
743 .fp_addr.ip4 = *address,
746 a->neighbor_probe_adj_index = ~0;
750 fib_node_index_t fei;
752 fei = fib_table_entry_update_one_path (fib_index, &pfx,
753 FIB_SOURCE_INTERFACE,
754 (FIB_ENTRY_FLAG_CONNECTED |
755 FIB_ENTRY_FLAG_ATTACHED),
757 /* No next-hop address */
763 // no out-label stack
765 FIB_ROUTE_PATH_FLAG_NONE);
766 a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
771 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
773 u32 classify_table_index =
774 lm->classify_table_index_by_sw_if_index[sw_if_index];
775 if (classify_table_index != (u32) ~ 0)
777 dpo_id_t dpo = DPO_INVALID;
782 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
784 fib_table_entry_special_dpo_add (fib_index,
787 FIB_ENTRY_FLAG_NONE, &dpo);
792 fib_table_entry_update_one_path (fib_index, &pfx,
793 FIB_SOURCE_INTERFACE,
794 (FIB_ENTRY_FLAG_CONNECTED |
795 FIB_ENTRY_FLAG_LOCAL),
802 FIB_ROUTE_PATH_FLAG_NONE);
806 ip4_del_interface_routes (ip4_main_t * im,
808 ip4_address_t * address, u32 address_length)
811 .fp_len = address_length,
812 .fp_proto = FIB_PROTOCOL_IP4,
813 .fp_addr.ip4 = *address,
818 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
822 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
826 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
828 ip4_main_t *im = &ip4_main;
830 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
833 * enable/disable only on the 1<->0 transition
837 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
842 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
843 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
846 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
850 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
851 sw_if_index, !is_enable, 0, 0);
854 static clib_error_t *
855 ip4_add_del_interface_address_internal (vlib_main_t * vm,
857 ip4_address_t * address,
858 u32 address_length, u32 is_del)
860 vnet_main_t *vnm = vnet_get_main ();
861 ip4_main_t *im = &ip4_main;
862 ip_lookup_main_t *lm = &im->lookup_main;
863 clib_error_t *error = 0;
864 u32 if_address_index, elts_before;
865 ip4_address_fib_t ip4_af, *addr_fib = 0;
867 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
868 ip4_addr_fib_init (&ip4_af, address,
869 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
870 vec_add1 (addr_fib, ip4_af);
873 * there is no support for adj-fib handling in the presence of overlapping
874 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
880 /* When adding an address check that it does not conflict
881 with an existing address. */
882 ip_interface_address_t *ia;
883 foreach_ip_interface_address
884 (&im->lookup_main, ia, sw_if_index,
885 0 /* honor unnumbered */ ,
888 ip_interface_address_get_address
889 (&im->lookup_main, ia);
890 if (ip4_destination_matches_route
891 (im, address, x, ia->address_length) ||
892 ip4_destination_matches_route (im,
898 ("failed to add %U which conflicts with %U for interface %U",
899 format_ip4_address_and_length, address,
901 format_ip4_address_and_length, x,
903 format_vnet_sw_if_index_name, vnm,
909 elts_before = pool_elts (lm->if_address_pool);
911 error = ip_interface_address_add_del
912 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
916 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
919 ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
921 ip4_add_interface_routes (sw_if_index,
922 im, ip4_af.fib_index,
924 (lm->if_address_pool, if_address_index));
926 /* If pool did not grow/shrink: add duplicate address. */
927 if (elts_before != pool_elts (lm->if_address_pool))
929 ip4_add_del_interface_address_callback_t *cb;
930 vec_foreach (cb, im->add_del_interface_address_callbacks)
931 cb->function (im, cb->function_opaque, sw_if_index,
932 address, address_length, if_address_index, is_del);
941 ip4_add_del_interface_address (vlib_main_t * vm,
943 ip4_address_t * address,
944 u32 address_length, u32 is_del)
946 return ip4_add_del_interface_address_internal
947 (vm, sw_if_index, address, address_length, is_del);
950 /* Built-in ip4 unicast rx feature path definition */
952 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
954 .arc_name = "ip4-unicast",
955 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
956 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
959 VNET_FEATURE_INIT (ip4_flow_classify, static) =
961 .arc_name = "ip4-unicast",
962 .node_name = "ip4-flow-classify",
963 .runs_before = VNET_FEATURES ("ip4-inacl"),
966 VNET_FEATURE_INIT (ip4_inacl, static) =
968 .arc_name = "ip4-unicast",
969 .node_name = "ip4-inacl",
970 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
973 VNET_FEATURE_INIT (ip4_source_check_1, static) =
975 .arc_name = "ip4-unicast",
976 .node_name = "ip4-source-check-via-rx",
977 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
980 VNET_FEATURE_INIT (ip4_source_check_2, static) =
982 .arc_name = "ip4-unicast",
983 .node_name = "ip4-source-check-via-any",
984 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
987 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
989 .arc_name = "ip4-unicast",
990 .node_name = "ip4-source-and-port-range-check-rx",
991 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
994 VNET_FEATURE_INIT (ip4_policer_classify, static) =
996 .arc_name = "ip4-unicast",
997 .node_name = "ip4-policer-classify",
998 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1001 VNET_FEATURE_INIT (ip4_ipsec, static) =
1003 .arc_name = "ip4-unicast",
1004 .node_name = "ipsec-input-ip4",
1005 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1008 VNET_FEATURE_INIT (ip4_vpath, static) =
1010 .arc_name = "ip4-unicast",
1011 .node_name = "vpath-input-ip4",
1012 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1015 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1017 .arc_name = "ip4-unicast",
1018 .node_name = "ip4-vxlan-bypass",
1019 .runs_before = VNET_FEATURES ("ip4-lookup"),
1022 VNET_FEATURE_INIT (ip4_drop, static) =
1024 .arc_name = "ip4-unicast",
1025 .node_name = "ip4-drop",
1026 .runs_before = VNET_FEATURES ("ip4-lookup"),
1029 VNET_FEATURE_INIT (ip4_lookup, static) =
1031 .arc_name = "ip4-unicast",
1032 .node_name = "ip4-lookup",
1033 .runs_before = 0, /* not before any other features */
1036 /* Built-in ip4 multicast rx feature path definition */
1037 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1039 .arc_name = "ip4-multicast",
1040 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1041 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1044 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1046 .arc_name = "ip4-multicast",
1047 .node_name = "vpath-input-ip4",
1048 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1051 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1053 .arc_name = "ip4-multicast",
1054 .node_name = "ip4-drop",
1055 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1058 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1060 .arc_name = "ip4-multicast",
1061 .node_name = "ip4-mfib-forward-lookup",
1062 .runs_before = 0, /* last feature */
1065 /* Source and port-range check ip4 tx feature path definition */
1066 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1068 .arc_name = "ip4-output",
1069 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1070 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1073 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1075 .arc_name = "ip4-output",
1076 .node_name = "ip4-source-and-port-range-check-tx",
1077 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1080 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1082 .arc_name = "ip4-output",
1083 .node_name = "ipsec-output-ip4",
1084 .runs_before = VNET_FEATURES ("interface-output"),
1087 /* Built-in ip4 tx feature path definition */
1088 VNET_FEATURE_INIT (ip4_interface_output, static) =
1090 .arc_name = "ip4-output",
1091 .node_name = "interface-output",
1092 .runs_before = 0, /* not before any other features */
1096 static clib_error_t *
1097 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1099 ip4_main_t *im = &ip4_main;
1101 /* Fill in lookup tables with default table (0). */
1102 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1103 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1105 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1108 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1111 return /* no error */ 0;
1114 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1116 /* Global IP4 main. */
1117 ip4_main_t ip4_main;
1120 ip4_lookup_init (vlib_main_t * vm)
1122 ip4_main_t *im = &ip4_main;
1123 clib_error_t *error;
1126 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1129 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1134 m = pow2_mask (i) << (32 - i);
1137 im->fib_masks[i] = clib_host_to_net_u32 (m);
1140 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1142 /* Create FIB with index 0 and table id of 0. */
1143 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1144 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1148 pn = pg_get_node (ip4_lookup_node.index);
1149 pn->unformat_edit = unformat_pg_ip4_header;
1153 ethernet_arp_header_t h;
1155 memset (&h, 0, sizeof (h));
1157 /* Set target ethernet address to all zeros. */
1158 memset (h.ip4_over_ethernet[1].ethernet, 0,
1159 sizeof (h.ip4_over_ethernet[1].ethernet));
1161 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1162 #define _8(f,v) h.f = v;
1163 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1164 _16 (l3_type, ETHERNET_TYPE_IP4);
1165 _8 (n_l2_address_bytes, 6);
1166 _8 (n_l3_address_bytes, 4);
1167 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1171 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1174 /* alloc chunk size */ 8,
1181 VLIB_INIT_FUNCTION (ip4_lookup_init);
1185 /* Adjacency taken. */
1190 /* Packet data, possibly *after* rewrite. */
1191 u8 packet_data[64 - 1 * sizeof (u32)];
1193 ip4_forward_next_trace_t;
1196 format_ip4_forward_next_trace (u8 * s, va_list * args)
1198 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1199 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1200 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1201 uword indent = format_get_indent (s);
1202 s = format (s, "%U%U",
1203 format_white_space, indent,
1204 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1209 format_ip4_lookup_trace (u8 * s, va_list * args)
1211 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1212 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1213 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1214 uword indent = format_get_indent (s);
1216 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1217 t->fib_index, t->dpo_index, t->flow_hash);
1218 s = format (s, "\n%U%U",
1219 format_white_space, indent,
1220 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1225 format_ip4_rewrite_trace (u8 * s, va_list * args)
1227 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1228 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1229 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1230 uword indent = format_get_indent (s);
1232 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1233 t->fib_index, t->dpo_index, format_ip_adjacency,
1234 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1235 s = format (s, "\n%U%U",
1236 format_white_space, indent,
1237 format_ip_adjacency_packet_data,
1238 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1242 /* Common trace function for all ip4-forward next nodes. */
1244 ip4_forward_next_trace (vlib_main_t * vm,
1245 vlib_node_runtime_t * node,
1246 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1249 ip4_main_t *im = &ip4_main;
1251 n_left = frame->n_vectors;
1252 from = vlib_frame_vector_args (frame);
1257 vlib_buffer_t *b0, *b1;
1258 ip4_forward_next_trace_t *t0, *t1;
1260 /* Prefetch next iteration. */
1261 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1262 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1267 b0 = vlib_get_buffer (vm, bi0);
1268 b1 = vlib_get_buffer (vm, bi1);
1270 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1272 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1273 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1274 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1276 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1277 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1278 vec_elt (im->fib_index_by_sw_if_index,
1279 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1281 clib_memcpy (t0->packet_data,
1282 vlib_buffer_get_current (b0),
1283 sizeof (t0->packet_data));
1285 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1287 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1288 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1289 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1291 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1292 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1293 vec_elt (im->fib_index_by_sw_if_index,
1294 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1295 clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1296 sizeof (t1->packet_data));
1306 ip4_forward_next_trace_t *t0;
1310 b0 = vlib_get_buffer (vm, bi0);
1312 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1314 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1315 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1316 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1318 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1319 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1320 vec_elt (im->fib_index_by_sw_if_index,
1321 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1322 clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1323 sizeof (t0->packet_data));
1331 ip4_drop_or_punt (vlib_main_t * vm,
1332 vlib_node_runtime_t * node,
1333 vlib_frame_t * frame, ip4_error_t error_code)
1335 u32 *buffers = vlib_frame_vector_args (frame);
1336 uword n_packets = frame->n_vectors;
1338 vlib_error_drop_buffers (vm, node, buffers,
1342 ip4_input_node.index, error_code);
1344 if (node->flags & VLIB_NODE_FLAG_TRACE)
1345 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1351 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1353 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1357 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1359 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1363 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1365 .function = ip4_drop,.
1367 .vector_size = sizeof (u32),
1368 .format_trace = format_ip4_forward_next_trace,
1375 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1377 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1379 .function = ip4_punt,
1381 .vector_size = sizeof (u32),
1382 .format_trace = format_ip4_forward_next_trace,
1389 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1392 /* Compute TCP/UDP/ICMP4 checksum in software. */
1394 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1398 u32 ip_header_length, payload_length_host_byte_order;
1399 u32 n_this_buffer, n_bytes_left;
1401 void *data_this_buffer;
1403 /* Initialize checksum with ip header. */
1404 ip_header_length = ip4_header_bytes (ip0);
1405 payload_length_host_byte_order =
1406 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1408 clib_host_to_net_u32 (payload_length_host_byte_order +
1409 (ip0->protocol << 16));
1411 if (BITS (uword) == 32)
1414 ip_csum_with_carry (sum0,
1415 clib_mem_unaligned (&ip0->src_address, u32));
1417 ip_csum_with_carry (sum0,
1418 clib_mem_unaligned (&ip0->dst_address, u32));
1422 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1424 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1425 data_this_buffer = (void *) ip0 + ip_header_length;
1426 if (n_this_buffer + ip_header_length > p0->current_length)
1428 p0->current_length >
1429 ip_header_length ? p0->current_length - ip_header_length : 0;
1432 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1433 n_bytes_left -= n_this_buffer;
1434 if (n_bytes_left == 0)
1437 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1438 p0 = vlib_get_buffer (vm, p0->next_buffer);
1439 data_this_buffer = vlib_buffer_get_current (p0);
1440 n_this_buffer = p0->current_length;
1443 sum16 = ~ip_csum_fold (sum0);
1449 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1451 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1455 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1456 || ip0->protocol == IP_PROTOCOL_UDP);
1458 udp0 = (void *) (ip0 + 1);
1459 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1461 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1462 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1466 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1468 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1469 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1475 VNET_FEATURE_ARC_INIT (ip4_local) =
1477 .arc_name = "ip4-local",
1478 .start_nodes = VNET_FEATURES ("ip4-local"),
1483 ip4_local_inline (vlib_main_t * vm,
1484 vlib_node_runtime_t * node,
1485 vlib_frame_t * frame, int head_of_feature_arc)
1487 ip4_main_t *im = &ip4_main;
1488 ip_lookup_main_t *lm = &im->lookup_main;
1489 ip_local_next_t next_index;
1490 u32 *from, *to_next, n_left_from, n_left_to_next;
1491 vlib_node_runtime_t *error_node =
1492 vlib_node_get_runtime (vm, ip4_input_node.index);
1493 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1495 from = vlib_frame_vector_args (frame);
1496 n_left_from = frame->n_vectors;
1497 next_index = node->cached_next_index;
1499 if (node->flags & VLIB_NODE_FLAG_TRACE)
1500 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1502 while (n_left_from > 0)
1504 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1506 while (n_left_from >= 4 && n_left_to_next >= 2)
1508 vlib_buffer_t *p0, *p1;
1509 ip4_header_t *ip0, *ip1;
1510 udp_header_t *udp0, *udp1;
1511 ip4_fib_mtrie_t *mtrie0, *mtrie1;
1512 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1513 const dpo_id_t *dpo0, *dpo1;
1514 const load_balance_t *lb0, *lb1;
1515 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1516 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1517 i32 len_diff0, len_diff1;
1518 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1519 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1520 u32 sw_if_index0, sw_if_index1;
1522 pi0 = to_next[0] = from[0];
1523 pi1 = to_next[1] = from[1];
1527 n_left_to_next -= 2;
1529 next0 = next1 = IP_LOCAL_NEXT_DROP;
1531 p0 = vlib_get_buffer (vm, pi0);
1532 p1 = vlib_get_buffer (vm, pi1);
1534 ip0 = vlib_buffer_get_current (p0);
1535 ip1 = vlib_buffer_get_current (p1);
1537 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1538 vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1540 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1541 sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1543 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1544 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1546 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1548 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1549 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1551 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1553 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1554 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1556 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1557 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1559 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1562 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1564 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1566 /* Treat IP frag packets as "experimental" protocol for now
1567 until support of IP frag reassembly is implemented */
1568 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1569 proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1571 if (head_of_feature_arc == 0)
1573 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1577 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1578 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1579 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1580 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1585 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1586 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1588 udp0 = ip4_next_header (ip0);
1589 udp1 = ip4_next_header (ip1);
1591 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1592 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1593 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1596 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1598 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1600 /* Verify UDP length. */
1601 ip_len0 = clib_net_to_host_u16 (ip0->length);
1602 ip_len1 = clib_net_to_host_u16 (ip1->length);
1603 udp_len0 = clib_net_to_host_u16 (udp0->length);
1604 udp_len1 = clib_net_to_host_u16 (udp1->length);
1606 len_diff0 = ip_len0 - udp_len0;
1607 len_diff1 = ip_len1 - udp_len1;
1609 len_diff0 = is_udp0 ? len_diff0 : 0;
1610 len_diff1 = is_udp1 ? len_diff1 : 0;
1612 if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1613 & good_tcp_udp0 & good_tcp_udp1)))
1618 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1619 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1621 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1622 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1627 && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1628 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1630 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1631 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1635 good_tcp_udp0 &= len_diff0 >= 0;
1636 good_tcp_udp1 &= len_diff1 >= 0;
1639 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1641 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1643 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1645 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1646 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1648 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1649 error0 = (is_tcp_udp0 && !good_tcp_udp0
1650 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1651 error1 = (is_tcp_udp1 && !good_tcp_udp1
1652 ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1655 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1657 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1660 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1663 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1665 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1666 ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1667 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1669 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1670 ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1671 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1673 lb0 = load_balance_get (lbi0);
1674 lb1 = load_balance_get (lbi1);
1675 dpo0 = load_balance_get_bucket_i (lb0, 0);
1676 dpo1 = load_balance_get_bucket_i (lb1, 0);
1679 * Must have a route to source otherwise we drop the packet.
1680 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1683 * - the source is a recieve => it's from us => bogus, do this
1684 * first since it sets a different error code.
1685 * - uRPF check for any route to source - accept if passes.
1686 * - allow packets destined to the broadcast address from unknown sources
1688 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1689 dpo0->dpoi_type == DPO_RECEIVE) ?
1690 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1691 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1692 !fib_urpf_check_size (lb0->lb_urpf) &&
1693 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1694 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1695 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1696 dpo1->dpoi_type == DPO_RECEIVE) ?
1697 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1698 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1699 !fib_urpf_check_size (lb1->lb_urpf) &&
1700 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1701 ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1705 next0 = lm->local_next_by_ip_protocol[proto0];
1706 next1 = lm->local_next_by_ip_protocol[proto1];
1709 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1711 error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1713 p0->error = error0 ? error_node->errors[error0] : 0;
1714 p1->error = error1 ? error_node->errors[error1] : 0;
1716 if (head_of_feature_arc)
1718 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1719 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1720 if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1721 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1724 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1725 n_left_to_next, pi0, pi1,
1729 while (n_left_from > 0 && n_left_to_next > 0)
1734 ip4_fib_mtrie_t *mtrie0;
1735 ip4_fib_mtrie_leaf_t leaf0;
1736 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1738 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1739 load_balance_t *lb0;
1740 const dpo_id_t *dpo0;
1743 pi0 = to_next[0] = from[0];
1747 n_left_to_next -= 1;
1749 next0 = IP_LOCAL_NEXT_DROP;
1751 p0 = vlib_get_buffer (vm, pi0);
1753 ip0 = vlib_buffer_get_current (p0);
1755 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1757 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1759 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1762 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1763 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1765 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1767 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1770 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1772 /* Treat IP frag packets as "experimental" protocol for now
1773 until support of IP frag reassembly is implemented */
1774 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1776 if (head_of_feature_arc == 0)
1778 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1782 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1783 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1787 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1789 udp0 = ip4_next_header (ip0);
1791 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1792 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1795 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1797 /* Verify UDP length. */
1798 ip_len0 = clib_net_to_host_u16 (ip0->length);
1799 udp_len0 = clib_net_to_host_u16 (udp0->length);
1801 len_diff0 = ip_len0 - udp_len0;
1803 len_diff0 = is_udp0 ? len_diff0 : 0;
1805 if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1810 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1811 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1813 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1814 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1818 good_tcp_udp0 &= len_diff0 >= 0;
1821 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1823 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1825 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1827 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1828 error0 = (is_tcp_udp0 && !good_tcp_udp0
1829 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1832 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1835 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1837 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1838 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1840 lb0 = load_balance_get (lbi0);
1841 dpo0 = load_balance_get_bucket_i (lb0, 0);
1843 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1844 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1846 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1847 dpo0->dpoi_type == DPO_RECEIVE) ?
1848 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1849 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1850 !fib_urpf_check_size (lb0->lb_urpf) &&
1851 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1852 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1856 next0 = lm->local_next_by_ip_protocol[proto0];
1859 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1861 p0->error = error0 ? error_node->errors[error0] : 0;
1863 if (head_of_feature_arc)
1865 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1866 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1869 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1870 n_left_to_next, pi0, next0);
1874 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1877 return frame->n_vectors;
1881 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1883 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1887 VLIB_REGISTER_NODE (ip4_local_node) =
1889 .function = ip4_local,
1890 .name = "ip4-local",
1891 .vector_size = sizeof (u32),
1892 .format_trace = format_ip4_forward_next_trace,
1893 .n_next_nodes = IP_LOCAL_N_NEXT,
1896 [IP_LOCAL_NEXT_DROP] = "error-drop",
1897 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1898 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1899 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",},
1903 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1906 ip4_local_end_of_arc (vlib_main_t * vm,
1907 vlib_node_runtime_t * node, vlib_frame_t * frame)
1909 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1913 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1914 .function = ip4_local_end_of_arc,
1915 .name = "ip4-local-end-of-arc",
1916 .vector_size = sizeof (u32),
1918 .format_trace = format_ip4_forward_next_trace,
1919 .sibling_of = "ip4-local",
1922 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1924 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1925 .arc_name = "ip4-local",
1926 .node_name = "ip4-local-end-of-arc",
1927 .runs_before = 0, /* not before any other features */
1932 ip4_register_protocol (u32 protocol, u32 node_index)
1934 vlib_main_t *vm = vlib_get_main ();
1935 ip4_main_t *im = &ip4_main;
1936 ip_lookup_main_t *lm = &im->lookup_main;
1938 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1939 lm->local_next_by_ip_protocol[protocol] =
1940 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1943 static clib_error_t *
1944 show_ip_local_command_fn (vlib_main_t * vm,
1945 unformat_input_t * input, vlib_cli_command_t * cmd)
1947 ip4_main_t *im = &ip4_main;
1948 ip_lookup_main_t *lm = &im->lookup_main;
1951 vlib_cli_output (vm, "Protocols handled by ip4_local");
1952 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1954 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1955 vlib_cli_output (vm, "%d", i);
1963 * Display the set of protocols handled by the local IPv4 stack.
1966 * Example of how to display local protocol table:
1967 * @cliexstart{show ip local}
1968 * Protocols handled by ip4_local
1975 VLIB_CLI_COMMAND (show_ip_local, static) =
1977 .path = "show ip local",
1978 .function = show_ip_local_command_fn,
1979 .short_help = "show ip local",
1984 ip4_arp_inline (vlib_main_t * vm,
1985 vlib_node_runtime_t * node,
1986 vlib_frame_t * frame, int is_glean)
1988 vnet_main_t *vnm = vnet_get_main ();
1989 ip4_main_t *im = &ip4_main;
1990 ip_lookup_main_t *lm = &im->lookup_main;
1991 u32 *from, *to_next_drop;
1992 uword n_left_from, n_left_to_next_drop, next_index;
1993 static f64 time_last_seed_change = -1e100;
1994 static u32 hash_seeds[3];
1995 static uword hash_bitmap[256 / BITS (uword)];
1998 if (node->flags & VLIB_NODE_FLAG_TRACE)
1999 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2001 time_now = vlib_time_now (vm);
2002 if (time_now - time_last_seed_change > 1e-3)
2005 u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
2006 sizeof (hash_seeds));
2007 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2008 hash_seeds[i] = r[i];
2010 /* Mark all hash keys as been no-seen before. */
2011 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2014 time_last_seed_change = time_now;
2017 from = vlib_frame_vector_args (frame);
2018 n_left_from = frame->n_vectors;
2019 next_index = node->cached_next_index;
2020 if (next_index == IP4_ARP_NEXT_DROP)
2021 next_index = IP4_ARP_N_NEXT; /* point to first interface */
2023 while (n_left_from > 0)
2025 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2026 to_next_drop, n_left_to_next_drop);
2028 while (n_left_from > 0 && n_left_to_next_drop > 0)
2030 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2031 ip_adjacency_t *adj0;
2038 p0 = vlib_get_buffer (vm, pi0);
2040 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2041 adj0 = ip_get_adjacency (lm, adj_index0);
2042 ip0 = vlib_buffer_get_current (p0);
2048 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2049 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2054 * this is the Glean case, so we are ARPing for the
2055 * packet's destination
2057 a0 ^= ip0->dst_address.data_u32;
2061 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2065 hash_v3_finalize32 (a0, b0, c0);
2067 c0 &= BITS (hash_bitmap) - 1;
2068 c0 = c0 / BITS (uword);
2069 m0 = (uword) 1 << (c0 % BITS (uword));
2071 bm0 = hash_bitmap[c0];
2072 drop0 = (bm0 & m0) != 0;
2074 /* Mark it as seen. */
2075 hash_bitmap[c0] = bm0 | m0;
2079 to_next_drop[0] = pi0;
2081 n_left_to_next_drop -= 1;
2084 node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2085 IP4_ARP_ERROR_REQUEST_SENT];
2088 * the adj has been updated to a rewrite but the node the DPO that got
2089 * us here hasn't - yet. no big deal. we'll drop while we wait.
2091 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2098 * Can happen if the control-plane is programming tables
2099 * with traffic flowing; at least that's today's lame excuse.
2101 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2102 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2104 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2107 /* Send ARP request. */
2111 ethernet_arp_header_t *h0;
2112 vnet_hw_interface_t *hw_if0;
2115 vlib_packet_template_get_packet (vm,
2116 &im->ip4_arp_request_packet_template,
2119 /* Add rewrite/encap string for ARP packet. */
2120 vnet_rewrite_one_header (adj0[0], h0,
2121 sizeof (ethernet_header_t));
2123 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2125 /* Src ethernet address in ARP header. */
2126 clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2128 sizeof (h0->ip4_over_ethernet[0].ethernet));
2132 /* The interface's source address is stashed in the Glean Adj */
2133 h0->ip4_over_ethernet[0].ip4 =
2134 adj0->sub_type.glean.receive_addr.ip4;
2136 /* Copy in destination address we are requesting. This is the
2137 * glean case, so it's the packet's destination.*/
2138 h0->ip4_over_ethernet[1].ip4.data_u32 =
2139 ip0->dst_address.data_u32;
2143 /* Src IP address in ARP header. */
2144 if (ip4_src_address_for_packet (lm, sw_if_index0,
2146 ip4_over_ethernet[0].ip4))
2148 /* No source address available */
2150 node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2151 vlib_buffer_free (vm, &bi0, 1);
2155 /* Copy in destination address we are requesting from the
2157 h0->ip4_over_ethernet[1].ip4.data_u32 =
2158 adj0->sub_type.nbr.next_hop.ip4.as_u32;
2161 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2162 b0 = vlib_get_buffer (vm, bi0);
2163 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2165 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2167 vlib_set_next_frame_buffer (vm, node,
2168 adj0->rewrite_header.next_index,
2173 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2176 return frame->n_vectors;
2180 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2182 return (ip4_arp_inline (vm, node, frame, 0));
2186 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2188 return (ip4_arp_inline (vm, node, frame, 1));
2191 static char *ip4_arp_error_strings[] = {
2192 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2193 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2194 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2195 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2196 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2197 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2200 VLIB_REGISTER_NODE (ip4_arp_node) =
2202 .function = ip4_arp,.name = "ip4-arp",.vector_size =
2203 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2204 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2205 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2207 [IP4_ARP_NEXT_DROP] = "error-drop",}
2210 VLIB_REGISTER_NODE (ip4_glean_node) =
2212 .function = ip4_glean,.name = "ip4-glean",.vector_size =
2213 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2214 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2215 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2217 [IP4_ARP_NEXT_DROP] = "error-drop",}
2220 #define foreach_notrace_ip4_arp_error \
2227 arp_notrace_init (vlib_main_t * vm)
2229 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2231 /* don't trace ARP request packets */
2233 vnet_pcap_drop_trace_filter_add_del \
2234 (rt->errors[IP4_ARP_ERROR_##a], \
2236 foreach_notrace_ip4_arp_error;
2241 VLIB_INIT_FUNCTION (arp_notrace_init);
2244 /* Send an ARP request to see if given destination is reachable on given interface. */
2246 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2248 vnet_main_t *vnm = vnet_get_main ();
2249 ip4_main_t *im = &ip4_main;
2250 ethernet_arp_header_t *h;
2252 ip_interface_address_t *ia;
2253 ip_adjacency_t *adj;
2254 vnet_hw_interface_t *hi;
2255 vnet_sw_interface_t *si;
2259 si = vnet_get_sw_interface (vnm, sw_if_index);
2261 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2263 return clib_error_return (0, "%U: interface %U down",
2264 format_ip4_address, dst,
2265 format_vnet_sw_if_index_name, vnm,
2270 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2273 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2274 return clib_error_return
2276 "no matching interface address for destination %U (interface %U)",
2277 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2281 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2284 vlib_packet_template_get_packet (vm,
2285 &im->ip4_arp_request_packet_template,
2288 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2290 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2291 sizeof (h->ip4_over_ethernet[0].ethernet));
2293 h->ip4_over_ethernet[0].ip4 = src[0];
2294 h->ip4_over_ethernet[1].ip4 = dst[0];
2296 b = vlib_get_buffer (vm, bi);
2297 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2298 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2300 /* Add encapsulation string for software interface (e.g. ethernet header). */
2301 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2302 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2305 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2306 u32 *to_next = vlib_frame_vector_args (f);
2309 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2312 return /* no error */ 0;
2317 IP4_REWRITE_NEXT_DROP,
2318 IP4_REWRITE_NEXT_ICMP_ERROR,
2319 } ip4_rewrite_next_t;
2322 ip4_rewrite_inline (vlib_main_t * vm,
2323 vlib_node_runtime_t * node,
2324 vlib_frame_t * frame,
2325 int do_counters, int is_midchain, int is_mcast)
2327 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2328 u32 *from = vlib_frame_vector_args (frame);
2329 u32 n_left_from, n_left_to_next, *to_next, next_index;
2330 vlib_node_runtime_t *error_node =
2331 vlib_node_get_runtime (vm, ip4_input_node.index);
2333 n_left_from = frame->n_vectors;
2334 next_index = node->cached_next_index;
2335 u32 cpu_index = os_get_cpu_number ();
2337 while (n_left_from > 0)
2339 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2341 while (n_left_from >= 4 && n_left_to_next >= 2)
2343 ip_adjacency_t *adj0, *adj1;
2344 vlib_buffer_t *p0, *p1;
2345 ip4_header_t *ip0, *ip1;
2346 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2347 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2348 u32 tx_sw_if_index0, tx_sw_if_index1;
2350 /* Prefetch next iteration. */
2352 vlib_buffer_t *p2, *p3;
2354 p2 = vlib_get_buffer (vm, from[2]);
2355 p3 = vlib_get_buffer (vm, from[3]);
2357 vlib_prefetch_buffer_header (p2, STORE);
2358 vlib_prefetch_buffer_header (p3, STORE);
2360 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2361 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2364 pi0 = to_next[0] = from[0];
2365 pi1 = to_next[1] = from[1];
2370 n_left_to_next -= 2;
2372 p0 = vlib_get_buffer (vm, pi0);
2373 p1 = vlib_get_buffer (vm, pi1);
2375 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2376 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2379 * pre-fetch the per-adjacency counters
2383 vlib_prefetch_combined_counter (&adjacency_counters,
2384 cpu_index, adj_index0);
2385 vlib_prefetch_combined_counter (&adjacency_counters,
2386 cpu_index, adj_index1);
2389 /* We should never rewrite a pkt using the MISS adjacency */
2390 ASSERT (adj_index0 && adj_index1);
2392 ip0 = vlib_buffer_get_current (p0);
2393 ip1 = vlib_buffer_get_current (p1);
2395 error0 = error1 = IP4_ERROR_NONE;
2396 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2398 /* Decrement TTL & update checksum.
2399 Works either endian, so no need for byte swap. */
2400 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2402 i32 ttl0 = ip0->ttl;
2404 /* Input node should have reject packets with ttl 0. */
2405 ASSERT (ip0->ttl > 0);
2407 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2408 checksum0 += checksum0 >= 0xffff;
2410 ip0->checksum = checksum0;
2415 * If the ttl drops below 1 when forwarding, generate
2418 if (PREDICT_FALSE (ttl0 <= 0))
2420 error0 = IP4_ERROR_TIME_EXPIRED;
2421 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2422 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2423 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2425 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2428 /* Verify checksum. */
2429 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2433 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2435 if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2437 i32 ttl1 = ip1->ttl;
2439 /* Input node should have reject packets with ttl 0. */
2440 ASSERT (ip1->ttl > 0);
2442 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2443 checksum1 += checksum1 >= 0xffff;
2445 ip1->checksum = checksum1;
2450 * If the ttl drops below 1 when forwarding, generate
2453 if (PREDICT_FALSE (ttl1 <= 0))
2455 error1 = IP4_ERROR_TIME_EXPIRED;
2456 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2457 icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2458 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2460 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2463 /* Verify checksum. */
2464 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2465 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2469 p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2472 /* Rewrite packet header and updates lengths. */
2473 adj0 = ip_get_adjacency (lm, adj_index0);
2474 adj1 = ip_get_adjacency (lm, adj_index1);
2476 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2477 rw_len0 = adj0[0].rewrite_header.data_bytes;
2478 rw_len1 = adj1[0].rewrite_header.data_bytes;
2479 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2480 vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2482 /* Check MTU of outgoing interface. */
2484 (vlib_buffer_length_in_chain (vm, p0) >
2486 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2489 (vlib_buffer_length_in_chain (vm, p1) >
2491 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2494 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2495 * to see the IP headerr */
2496 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2498 next0 = adj0[0].rewrite_header.next_index;
2499 p0->current_data -= rw_len0;
2500 p0->current_length += rw_len0;
2501 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2502 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2505 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2506 vnet_feature_arc_start (lm->output_feature_arc_index,
2507 tx_sw_if_index0, &next0, p0);
2509 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2511 next1 = adj1[0].rewrite_header.next_index;
2512 p1->current_data -= rw_len1;
2513 p1->current_length += rw_len1;
2515 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2516 vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2519 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2520 vnet_feature_arc_start (lm->output_feature_arc_index,
2521 tx_sw_if_index1, &next1, p1);
2524 /* Guess we are only writing on simple Ethernet header. */
2525 vnet_rewrite_two_headers (adj0[0], adj1[0],
2526 ip0, ip1, sizeof (ethernet_header_t));
2529 * Bump the per-adjacency counters
2533 vlib_increment_combined_counter
2534 (&adjacency_counters,
2537 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2539 vlib_increment_combined_counter
2540 (&adjacency_counters,
2543 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2548 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2549 adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2554 * copy bytes from the IP address into the MAC rewrite
2556 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2557 vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2560 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2561 to_next, n_left_to_next,
2562 pi0, pi1, next0, next1);
2565 while (n_left_from > 0 && n_left_to_next > 0)
2567 ip_adjacency_t *adj0;
2570 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2571 u32 tx_sw_if_index0;
2573 pi0 = to_next[0] = from[0];
2575 p0 = vlib_get_buffer (vm, pi0);
2577 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2579 /* We should never rewrite a pkt using the MISS adjacency */
2580 ASSERT (adj_index0);
2582 adj0 = ip_get_adjacency (lm, adj_index0);
2584 ip0 = vlib_buffer_get_current (p0);
2586 error0 = IP4_ERROR_NONE;
2587 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2589 /* Decrement TTL & update checksum. */
2590 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2592 i32 ttl0 = ip0->ttl;
2594 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2596 checksum0 += checksum0 >= 0xffff;
2598 ip0->checksum = checksum0;
2600 ASSERT (ip0->ttl > 0);
2606 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2608 if (PREDICT_FALSE (ttl0 <= 0))
2611 * If the ttl drops below 1 when forwarding, generate
2614 error0 = IP4_ERROR_TIME_EXPIRED;
2615 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2616 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2617 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2618 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2624 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2628 vlib_prefetch_combined_counter (&adjacency_counters,
2629 cpu_index, adj_index0);
2631 /* Guess we are only writing on simple Ethernet header. */
2632 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2636 * copy bytes from the IP address into the MAC rewrite
2638 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2641 /* Update packet buffer attributes/set output interface. */
2642 rw_len0 = adj0[0].rewrite_header.data_bytes;
2643 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2646 vlib_increment_combined_counter
2647 (&adjacency_counters,
2648 cpu_index, adj_index0, 1,
2649 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2651 /* Check MTU of outgoing interface. */
2652 error0 = (vlib_buffer_length_in_chain (vm, p0)
2653 > adj0[0].rewrite_header.max_l3_packet_bytes
2654 ? IP4_ERROR_MTU_EXCEEDED : error0);
2656 p0->error = error_node->errors[error0];
2658 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2659 * to see the IP headerr */
2660 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2662 p0->current_data -= rw_len0;
2663 p0->current_length += rw_len0;
2664 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2666 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2667 next0 = adj0[0].rewrite_header.next_index;
2671 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2675 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2676 vnet_feature_arc_start (lm->output_feature_arc_index,
2677 tx_sw_if_index0, &next0, p0);
2684 n_left_to_next -= 1;
2686 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2687 to_next, n_left_to_next,
2691 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2694 /* Need to do trace after rewrites to pick up new packet data. */
2695 if (node->flags & VLIB_NODE_FLAG_TRACE)
2696 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2698 return frame->n_vectors;
2702 /** @brief IPv4 rewrite node.
2705 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2706 header checksum, fetch the ip adjacency, check the outbound mtu,
2707 apply the adjacency rewrite, and send pkts to the adjacency
2708 rewrite header's rewrite_next_index.
2710 @param vm vlib_main_t corresponding to the current thread
2711 @param node vlib_node_runtime_t
2712 @param frame vlib_frame_t whose contents should be dispatched
2714 @par Graph mechanics: buffer metadata, next index usage
2717 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2718 - the rewrite adjacency index
2719 - <code>adj->lookup_next_index</code>
2720 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2721 the packet will be dropped.
2722 - <code>adj->rewrite_header</code>
2723 - Rewrite string length, rewrite string, next_index
2726 - <code>b->current_data, b->current_length</code>
2727 - Updated net of applying the rewrite string
2729 <em>Next Indices:</em>
2730 - <code> adj->rewrite_header.next_index </code>
2734 ip4_rewrite (vlib_main_t * vm,
2735 vlib_node_runtime_t * node, vlib_frame_t * frame)
2737 if (adj_are_counters_enabled ())
2738 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2740 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2744 ip4_midchain (vlib_main_t * vm,
2745 vlib_node_runtime_t * node, vlib_frame_t * frame)
2747 if (adj_are_counters_enabled ())
2748 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2750 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2754 ip4_rewrite_mcast (vlib_main_t * vm,
2755 vlib_node_runtime_t * node, vlib_frame_t * frame)
2757 if (adj_are_counters_enabled ())
2758 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2760 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2764 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2765 .function = ip4_rewrite,
2766 .name = "ip4-rewrite",
2767 .vector_size = sizeof (u32),
2769 .format_trace = format_ip4_rewrite_trace,
2773 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2774 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2777 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2779 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2780 .function = ip4_rewrite_mcast,
2781 .name = "ip4-rewrite-mcast",
2782 .vector_size = sizeof (u32),
2784 .format_trace = format_ip4_rewrite_trace,
2785 .sibling_of = "ip4-rewrite",
2787 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2789 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2790 .function = ip4_midchain,
2791 .name = "ip4-midchain",
2792 .vector_size = sizeof (u32),
2793 .format_trace = format_ip4_forward_next_trace,
2794 .sibling_of = "ip4-rewrite",
2796 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2799 static clib_error_t *
2800 add_del_interface_table (vlib_main_t * vm,
2801 unformat_input_t * input, vlib_cli_command_t * cmd)
2803 vnet_main_t *vnm = vnet_get_main ();
2804 ip_interface_address_t *ia;
2805 clib_error_t *error = 0;
2806 u32 sw_if_index, table_id;
2810 if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2812 error = clib_error_return (0, "unknown interface `%U'",
2813 format_unformat_error, input);
2817 if (unformat (input, "%d", &table_id))
2821 error = clib_error_return (0, "expected table id `%U'",
2822 format_unformat_error, input);
2827 * If the interface already has in IP address, then a change int
2828 * VRF is not allowed. The IP address applied must first be removed.
2829 * We do not do that automatically here, since VPP has no knowledge
2830 * of whether thoses subnets are valid in the destination VRF.
2833 foreach_ip_interface_address (&ip4_main.lookup_main,
2835 1 /* honor unnumbered */,
2839 a = ip_interface_address_get_address (&ip4_main.lookup_main, ia);
2840 error = clib_error_return (0, "interface %U has address %U",
2841 format_vnet_sw_if_index_name, vnm,
2843 format_ip4_address, a);
2849 ip4_main_t *im = &ip4_main;
2852 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2854 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2855 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2857 fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2858 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
2859 im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
2867 * Place the indicated interface into the supplied IPv4 FIB table (also known
2868 * as a VRF). If the FIB table does not exist, this command creates it. To
2869 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2870 * FIB table will only be displayed if a route has been added to the table, or
2871 * an IP Address is assigned to an interface in the table (which adds a route
2874 * @note IP addresses added after setting the interface IP table are added to
2875 * the indicated FIB table. If an IP address is added prior to changing the
2876 * table then this is an error. The control plane must remove these addresses
2877 * first and then change the table. VPP will not automatically move the
2878 * addresses from the old to the new table as it does not know the validity
2882 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2883 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2886 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2888 .path = "set interface ip table",
2889 .function = add_del_interface_table,
2890 .short_help = "set interface ip table <interface> <table-id>",
2895 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2897 ip4_fib_mtrie_t *mtrie0;
2898 ip4_fib_mtrie_leaf_t leaf0;
2901 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2903 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2904 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2905 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2906 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2907 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2909 /* Handle default route. */
2910 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2912 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2914 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2917 static clib_error_t *
2918 test_lookup_command_fn (vlib_main_t * vm,
2919 unformat_input_t * input, vlib_cli_command_t * cmd)
2926 ip4_address_t ip4_base_address;
2929 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2931 if (unformat (input, "table %d", &table_id))
2933 /* Make sure the entry exists. */
2934 fib = ip4_fib_get (table_id);
2935 if ((fib) && (fib->index != table_id))
2936 return clib_error_return (0, "<fib-index> %d does not exist",
2939 else if (unformat (input, "count %f", &count))
2942 else if (unformat (input, "%U",
2943 unformat_ip4_address, &ip4_base_address))
2946 return clib_error_return (0, "unknown input `%U'",
2947 format_unformat_error, input);
2952 for (i = 0; i < n; i++)
2954 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2957 ip4_base_address.as_u32 =
2958 clib_host_to_net_u32 (1 +
2959 clib_net_to_host_u32 (ip4_base_address.as_u32));
2963 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2965 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2971 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2972 * given FIB table to determine if there is a conflict with the
2973 * adjacency table. The fib-id can be determined by using the
2974 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2977 * @todo This command uses fib-id, other commands use table-id (not
2978 * just a name, they are different indexes). Would like to change this
2979 * to table-id for consistency.
2982 * Example of how to run the test lookup command:
2983 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2984 * No errors in 2 lookups
2988 VLIB_CLI_COMMAND (lookup_test_command, static) =
2990 .path = "test lookup",
2991 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2992 .function = test_lookup_command_fn,
2997 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2999 ip4_main_t *im4 = &ip4_main;
3001 uword *p = hash_get (im4->fib_index_by_table_id, table_id);
3004 return VNET_API_ERROR_NO_SUCH_FIB;
3006 fib = ip4_fib_get (p[0]);
3008 fib->flow_hash_config = flow_hash_config;
3012 static clib_error_t *
3013 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3014 unformat_input_t * input,
3015 vlib_cli_command_t * cmd)
3019 u32 flow_hash_config = 0;
3022 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3024 if (unformat (input, "table %d", &table_id))
3027 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3028 foreach_flow_hash_bit
3035 return clib_error_return (0, "unknown input `%U'",
3036 format_unformat_error, input);
3038 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3044 case VNET_API_ERROR_NO_SUCH_FIB:
3045 return clib_error_return (0, "no such FIB table %d", table_id);
3048 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3056 * Configure the set of IPv4 fields used by the flow hash.
3059 * Example of how to set the flow hash on a given table:
3060 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3061 * Example of display the configured flow hash:
3062 * @cliexstart{show ip fib}
3063 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3066 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3067 * [0] [@0]: dpo-drop ip6
3070 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3071 * [0] [@0]: dpo-drop ip6
3074 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3075 * [0] [@0]: dpo-drop ip6
3078 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3079 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3082 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3083 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3084 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3085 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3086 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3089 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3090 * [0] [@0]: dpo-drop ip6
3091 * 255.255.255.255/32
3093 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3094 * [0] [@0]: dpo-drop ip6
3095 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3098 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3099 * [0] [@0]: dpo-drop ip6
3102 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3103 * [0] [@0]: dpo-drop ip6
3106 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3107 * [0] [@4]: ipv4-glean: af_packet0
3110 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3111 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3114 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3115 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3118 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3119 * [0] [@4]: ipv4-glean: af_packet1
3122 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3123 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3126 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3127 * [0] [@0]: dpo-drop ip6
3130 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3131 * [0] [@0]: dpo-drop ip6
3132 * 255.255.255.255/32
3134 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3135 * [0] [@0]: dpo-drop ip6
3139 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3141 .path = "set ip flow-hash",
3143 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3144 .function = set_ip_flow_hash_command_fn,
3149 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3152 vnet_main_t *vnm = vnet_get_main ();
3153 vnet_interface_main_t *im = &vnm->interface_main;
3154 ip4_main_t *ipm = &ip4_main;
3155 ip_lookup_main_t *lm = &ipm->lookup_main;
3156 vnet_classify_main_t *cm = &vnet_classify_main;
3157 ip4_address_t *if_addr;
3159 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3160 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3162 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3163 return VNET_API_ERROR_NO_SUCH_ENTRY;
3165 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3166 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3168 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3170 if (NULL != if_addr)
3172 fib_prefix_t pfx = {
3174 .fp_proto = FIB_PROTOCOL_IP4,
3175 .fp_addr.ip4 = *if_addr,
3179 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3183 if (table_index != (u32) ~ 0)
3185 dpo_id_t dpo = DPO_INVALID;
3190 classify_dpo_create (DPO_PROTO_IP4, table_index));
3192 fib_table_entry_special_dpo_add (fib_index,
3194 FIB_SOURCE_CLASSIFY,
3195 FIB_ENTRY_FLAG_NONE, &dpo);
3200 fib_table_entry_special_remove (fib_index,
3201 &pfx, FIB_SOURCE_CLASSIFY);
3208 static clib_error_t *
3209 set_ip_classify_command_fn (vlib_main_t * vm,
3210 unformat_input_t * input,
3211 vlib_cli_command_t * cmd)
3213 u32 table_index = ~0;
3214 int table_index_set = 0;
3215 u32 sw_if_index = ~0;
3218 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3220 if (unformat (input, "table-index %d", &table_index))
3221 table_index_set = 1;
3222 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3223 vnet_get_main (), &sw_if_index))
3229 if (table_index_set == 0)
3230 return clib_error_return (0, "classify table-index must be specified");
3232 if (sw_if_index == ~0)
3233 return clib_error_return (0, "interface / subif must be specified");
3235 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3242 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3243 return clib_error_return (0, "No such interface");
3245 case VNET_API_ERROR_NO_SUCH_ENTRY:
3246 return clib_error_return (0, "No such classifier table");
3252 * Assign a classification table to an interface. The classification
3253 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3254 * commands. Once the table is create, use this command to filter packets
3258 * Example of how to assign a classification table to an interface:
3259 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3262 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3264 .path = "set ip classify",
3266 "set ip classify intfc <interface> table-index <classify-idx>",
3267 .function = set_ip_classify_command_fn,
3272 * fd.io coding-style-patch-verification: ON
3275 * eval: (c-set-style "gnu")