2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 * @brief IPv4 Forwarding.
59 * This file contains the source code for IPv4 forwarding.
63 ip4_forward_next_trace (vlib_main_t * vm,
64 vlib_node_runtime_t * node,
66 vlib_rx_or_tx_t which_adj_index);
69 ip4_lookup_inline (vlib_main_t * vm,
70 vlib_node_runtime_t * node,
72 int lookup_for_responses_to_locally_received_packets)
74 ip4_main_t *im = &ip4_main;
75 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
76 u32 n_left_from, n_left_to_next, *from, *to_next;
77 ip_lookup_next_t next;
78 u32 cpu_index = os_get_cpu_number ();
80 from = vlib_frame_vector_args (frame);
81 n_left_from = frame->n_vectors;
82 next = node->cached_next_index;
84 while (n_left_from > 0)
86 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
88 while (n_left_from >= 8 && n_left_to_next >= 4)
90 vlib_buffer_t *p0, *p1, *p2, *p3;
91 ip4_header_t *ip0, *ip1, *ip2, *ip3;
92 __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
93 ip_lookup_next_t next0, next1, next2, next3;
94 const load_balance_t *lb0, *lb1, *lb2, *lb3;
95 ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
96 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97 ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98 __attribute__ ((unused)) u32 pi0, fib_index0, lb_index0,
100 __attribute__ ((unused)) u32 pi1, fib_index1, lb_index1,
102 __attribute__ ((unused)) u32 pi2, fib_index2, lb_index2,
104 __attribute__ ((unused)) u32 pi3, fib_index3, lb_index3,
106 flow_hash_config_t flow_hash_config0, flow_hash_config1;
107 flow_hash_config_t flow_hash_config2, flow_hash_config3;
108 u32 hash_c0, hash_c1, hash_c2, hash_c3;
109 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
111 /* Prefetch next iteration. */
113 vlib_buffer_t *p4, *p5, *p6, *p7;
115 p4 = vlib_get_buffer (vm, from[4]);
116 p5 = vlib_get_buffer (vm, from[5]);
117 p6 = vlib_get_buffer (vm, from[6]);
118 p7 = vlib_get_buffer (vm, from[7]);
120 vlib_prefetch_buffer_header (p4, LOAD);
121 vlib_prefetch_buffer_header (p5, LOAD);
122 vlib_prefetch_buffer_header (p6, LOAD);
123 vlib_prefetch_buffer_header (p7, LOAD);
125 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
126 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
127 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
128 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
131 pi0 = to_next[0] = from[0];
132 pi1 = to_next[1] = from[1];
133 pi2 = to_next[2] = from[2];
134 pi3 = to_next[3] = from[3];
141 p0 = vlib_get_buffer (vm, pi0);
142 p1 = vlib_get_buffer (vm, pi1);
143 p2 = vlib_get_buffer (vm, pi2);
144 p3 = vlib_get_buffer (vm, pi3);
146 ip0 = vlib_buffer_get_current (p0);
147 ip1 = vlib_buffer_get_current (p1);
148 ip2 = vlib_buffer_get_current (p2);
149 ip3 = vlib_buffer_get_current (p3);
151 dst_addr0 = &ip0->dst_address;
152 dst_addr1 = &ip1->dst_address;
153 dst_addr2 = &ip2->dst_address;
154 dst_addr3 = &ip3->dst_address;
157 vec_elt (im->fib_index_by_sw_if_index,
158 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
160 vec_elt (im->fib_index_by_sw_if_index,
161 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
163 vec_elt (im->fib_index_by_sw_if_index,
164 vnet_buffer (p2)->sw_if_index[VLIB_RX]);
166 vec_elt (im->fib_index_by_sw_if_index,
167 vnet_buffer (p3)->sw_if_index[VLIB_RX]);
169 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
170 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
172 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
173 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
175 (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
176 (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
178 (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
179 (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
182 if (!lookup_for_responses_to_locally_received_packets)
184 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
185 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
186 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
187 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
189 leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
191 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
192 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
193 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
194 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
197 tcp0 = (void *) (ip0 + 1);
198 tcp1 = (void *) (ip1 + 1);
199 tcp2 = (void *) (ip2 + 1);
200 tcp3 = (void *) (ip3 + 1);
202 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
203 || ip0->protocol == IP_PROTOCOL_UDP);
204 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
205 || ip1->protocol == IP_PROTOCOL_UDP);
206 is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
207 || ip2->protocol == IP_PROTOCOL_UDP);
208 is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
209 || ip1->protocol == IP_PROTOCOL_UDP);
211 if (!lookup_for_responses_to_locally_received_packets)
213 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
214 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
215 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
216 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
219 if (!lookup_for_responses_to_locally_received_packets)
221 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
222 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
223 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
224 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
227 if (!lookup_for_responses_to_locally_received_packets)
229 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
230 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
231 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
232 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
235 if (lookup_for_responses_to_locally_received_packets)
237 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
238 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
239 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
240 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
244 /* Handle default route. */
247 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
250 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
253 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
256 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
257 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
258 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
259 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
260 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
263 lb0 = load_balance_get (lb_index0);
264 lb1 = load_balance_get (lb_index1);
265 lb2 = load_balance_get (lb_index2);
266 lb3 = load_balance_get (lb_index3);
268 /* Use flow hash to compute multipath adjacency. */
269 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
270 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
271 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
272 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
273 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
275 flow_hash_config0 = lb0->lb_hash_config;
276 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
277 ip4_compute_flow_hash (ip0, flow_hash_config0);
279 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
281 flow_hash_config1 = lb1->lb_hash_config;
282 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
283 ip4_compute_flow_hash (ip1, flow_hash_config1);
285 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
287 flow_hash_config2 = lb2->lb_hash_config;
288 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
289 ip4_compute_flow_hash (ip2, flow_hash_config2);
291 if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
293 flow_hash_config3 = lb3->lb_hash_config;
294 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
295 ip4_compute_flow_hash (ip3, flow_hash_config3);
298 ASSERT (lb0->lb_n_buckets > 0);
299 ASSERT (is_pow2 (lb0->lb_n_buckets));
300 ASSERT (lb1->lb_n_buckets > 0);
301 ASSERT (is_pow2 (lb1->lb_n_buckets));
302 ASSERT (lb2->lb_n_buckets > 0);
303 ASSERT (is_pow2 (lb2->lb_n_buckets));
304 ASSERT (lb3->lb_n_buckets > 0);
305 ASSERT (is_pow2 (lb3->lb_n_buckets));
307 dpo0 = load_balance_get_bucket_i (lb0,
309 (lb0->lb_n_buckets_minus_1)));
310 dpo1 = load_balance_get_bucket_i (lb1,
312 (lb1->lb_n_buckets_minus_1)));
313 dpo2 = load_balance_get_bucket_i (lb2,
315 (lb2->lb_n_buckets_minus_1)));
316 dpo3 = load_balance_get_bucket_i (lb3,
318 (lb3->lb_n_buckets_minus_1)));
320 next0 = dpo0->dpoi_next_node;
321 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
322 next1 = dpo1->dpoi_next_node;
323 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
324 next2 = dpo2->dpoi_next_node;
325 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
326 next3 = dpo3->dpoi_next_node;
327 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
329 vlib_increment_combined_counter
330 (cm, cpu_index, lb_index0, 1,
331 vlib_buffer_length_in_chain (vm, p0)
332 + sizeof (ethernet_header_t));
333 vlib_increment_combined_counter
334 (cm, cpu_index, lb_index1, 1,
335 vlib_buffer_length_in_chain (vm, p1)
336 + sizeof (ethernet_header_t));
337 vlib_increment_combined_counter
338 (cm, cpu_index, lb_index2, 1,
339 vlib_buffer_length_in_chain (vm, p2)
340 + sizeof (ethernet_header_t));
341 vlib_increment_combined_counter
342 (cm, cpu_index, lb_index3, 1,
343 vlib_buffer_length_in_chain (vm, p3)
344 + sizeof (ethernet_header_t));
346 vlib_validate_buffer_enqueue_x4 (vm, node, next,
347 to_next, n_left_to_next,
349 next0, next1, next2, next3);
352 while (n_left_from > 0 && n_left_to_next > 0)
356 __attribute__ ((unused)) tcp_header_t *tcp0;
357 ip_lookup_next_t next0;
358 const load_balance_t *lb0;
359 ip4_fib_mtrie_t *mtrie0;
360 ip4_fib_mtrie_leaf_t leaf0;
361 ip4_address_t *dst_addr0;
362 __attribute__ ((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
363 flow_hash_config_t flow_hash_config0;
364 const dpo_id_t *dpo0;
370 p0 = vlib_get_buffer (vm, pi0);
372 ip0 = vlib_buffer_get_current (p0);
374 dst_addr0 = &ip0->dst_address;
377 vec_elt (im->fib_index_by_sw_if_index,
378 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
380 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
381 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
383 if (!lookup_for_responses_to_locally_received_packets)
385 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
387 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
389 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
392 tcp0 = (void *) (ip0 + 1);
394 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
395 || ip0->protocol == IP_PROTOCOL_UDP);
397 if (!lookup_for_responses_to_locally_received_packets)
398 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
400 if (!lookup_for_responses_to_locally_received_packets)
401 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
403 if (!lookup_for_responses_to_locally_received_packets)
404 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
406 if (lookup_for_responses_to_locally_received_packets)
407 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
410 /* Handle default route. */
413 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
414 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
417 lb0 = load_balance_get (lbi0);
419 /* Use flow hash to compute multipath adjacency. */
420 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
421 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
423 flow_hash_config0 = lb0->lb_hash_config;
425 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
426 ip4_compute_flow_hash (ip0, flow_hash_config0);
429 ASSERT (lb0->lb_n_buckets > 0);
430 ASSERT (is_pow2 (lb0->lb_n_buckets));
432 dpo0 = load_balance_get_bucket_i (lb0,
434 (lb0->lb_n_buckets_minus_1)));
436 next0 = dpo0->dpoi_next_node;
437 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
439 vlib_increment_combined_counter
440 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
447 if (PREDICT_FALSE (next0 != next))
450 vlib_put_next_frame (vm, node, next, n_left_to_next);
452 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
459 vlib_put_next_frame (vm, node, next, n_left_to_next);
462 if (node->flags & VLIB_NODE_FLAG_TRACE)
463 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
465 return frame->n_vectors;
468 /** @brief IPv4 lookup node.
471 This is the main IPv4 lookup dispatch node.
473 @param vm vlib_main_t corresponding to the current thread
474 @param node vlib_node_runtime_t
475 @param frame vlib_frame_t whose contents should be dispatched
477 @par Graph mechanics: buffer metadata, next index usage
480 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
481 - Indicates the @c sw_if_index value of the interface that the
482 packet was received on.
483 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
484 - When the value is @c ~0 then the node performs a longest prefix
485 match (LPM) for the packet destination address in the FIB attached
486 to the receive interface.
487 - Otherwise perform LPM for the packet destination address in the
488 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
489 value (0, 1, ...) and not a VRF id.
492 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
493 - The lookup result adjacency index.
496 - Dispatches the packet to the node index found in
497 ip_adjacency_t @c adj->lookup_next_index
498 (where @c adj is the lookup result adjacency).
501 ip4_lookup (vlib_main_t * vm,
502 vlib_node_runtime_t * node, vlib_frame_t * frame)
504 return ip4_lookup_inline (vm, node, frame,
505 /* lookup_for_responses_to_locally_received_packets */
510 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
512 VLIB_REGISTER_NODE (ip4_lookup_node) =
514 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
515 sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
516 IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
518 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
521 ip4_load_balance (vlib_main_t * vm,
522 vlib_node_runtime_t * node, vlib_frame_t * frame)
524 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
525 u32 n_left_from, n_left_to_next, *from, *to_next;
526 ip_lookup_next_t next;
527 u32 cpu_index = os_get_cpu_number ();
529 from = vlib_frame_vector_args (frame);
530 n_left_from = frame->n_vectors;
531 next = node->cached_next_index;
533 if (node->flags & VLIB_NODE_FLAG_TRACE)
534 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
536 while (n_left_from > 0)
538 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
541 while (n_left_from >= 4 && n_left_to_next >= 2)
543 ip_lookup_next_t next0, next1;
544 const load_balance_t *lb0, *lb1;
545 vlib_buffer_t *p0, *p1;
546 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
547 const ip4_header_t *ip0, *ip1;
548 const dpo_id_t *dpo0, *dpo1;
550 /* Prefetch next iteration. */
552 vlib_buffer_t *p2, *p3;
554 p2 = vlib_get_buffer (vm, from[2]);
555 p3 = vlib_get_buffer (vm, from[3]);
557 vlib_prefetch_buffer_header (p2, STORE);
558 vlib_prefetch_buffer_header (p3, STORE);
560 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
561 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
564 pi0 = to_next[0] = from[0];
565 pi1 = to_next[1] = from[1];
572 p0 = vlib_get_buffer (vm, pi0);
573 p1 = vlib_get_buffer (vm, pi1);
575 ip0 = vlib_buffer_get_current (p0);
576 ip1 = vlib_buffer_get_current (p1);
577 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
578 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
580 lb0 = load_balance_get (lbi0);
581 lb1 = load_balance_get (lbi1);
584 * this node is for via FIBs we can re-use the hash value from the
585 * to node if present.
586 * We don't want to use the same hash value at each level in the recursion
587 * graph as that would lead to polarisation
589 hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
590 hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
592 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
594 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
596 hc0 = vnet_buffer (p0)->ip.flow_hash =
597 vnet_buffer (p0)->ip.flow_hash >> 1;
601 hc0 = vnet_buffer (p0)->ip.flow_hash =
602 ip4_compute_flow_hash (ip0, hc0);
605 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
607 if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
609 hc1 = vnet_buffer (p1)->ip.flow_hash =
610 vnet_buffer (p1)->ip.flow_hash >> 1;
614 hc1 = vnet_buffer (p1)->ip.flow_hash =
615 ip4_compute_flow_hash (ip1, hc1);
620 load_balance_get_bucket_i (lb0,
621 hc0 & (lb0->lb_n_buckets_minus_1));
623 load_balance_get_bucket_i (lb1,
624 hc1 & (lb1->lb_n_buckets_minus_1));
626 next0 = dpo0->dpoi_next_node;
627 next1 = dpo1->dpoi_next_node;
629 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
630 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
632 vlib_increment_combined_counter
633 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
634 vlib_increment_combined_counter
635 (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
637 vlib_validate_buffer_enqueue_x2 (vm, node, next,
638 to_next, n_left_to_next,
639 pi0, pi1, next0, next1);
642 while (n_left_from > 0 && n_left_to_next > 0)
644 ip_lookup_next_t next0;
645 const load_balance_t *lb0;
648 const ip4_header_t *ip0;
649 const dpo_id_t *dpo0;
658 p0 = vlib_get_buffer (vm, pi0);
660 ip0 = vlib_buffer_get_current (p0);
661 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
663 lb0 = load_balance_get (lbi0);
665 hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
666 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
668 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
670 hc0 = vnet_buffer (p0)->ip.flow_hash =
671 vnet_buffer (p0)->ip.flow_hash >> 1;
675 hc0 = vnet_buffer (p0)->ip.flow_hash =
676 ip4_compute_flow_hash (ip0, hc0);
681 load_balance_get_bucket_i (lb0,
682 hc0 & (lb0->lb_n_buckets_minus_1));
684 next0 = dpo0->dpoi_next_node;
685 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
687 vlib_increment_combined_counter
688 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
690 vlib_validate_buffer_enqueue_x1 (vm, node, next,
691 to_next, n_left_to_next,
695 vlib_put_next_frame (vm, node, next, n_left_to_next);
698 return frame->n_vectors;
701 VLIB_REGISTER_NODE (ip4_load_balance_node) =
703 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
704 sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
705 format_ip4_lookup_trace,};
707 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
709 /* get first interface address */
711 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
712 ip_interface_address_t ** result_ia)
714 ip_lookup_main_t *lm = &im->lookup_main;
715 ip_interface_address_t *ia = 0;
716 ip4_address_t *result = 0;
719 foreach_ip_interface_address
720 (lm, ia, sw_if_index,
721 1 /* honor unnumbered */ ,
724 ip_interface_address_get_address (lm, ia);
730 *result_ia = result ? ia : 0;
735 ip4_add_interface_routes (u32 sw_if_index,
736 ip4_main_t * im, u32 fib_index,
737 ip_interface_address_t * a)
739 ip_lookup_main_t *lm = &im->lookup_main;
740 ip4_address_t *address = ip_interface_address_get_address (lm, a);
742 .fp_len = a->address_length,
743 .fp_proto = FIB_PROTOCOL_IP4,
744 .fp_addr.ip4 = *address,
747 a->neighbor_probe_adj_index = ~0;
751 fib_node_index_t fei;
753 fei = fib_table_entry_update_one_path (fib_index, &pfx,
754 FIB_SOURCE_INTERFACE,
755 (FIB_ENTRY_FLAG_CONNECTED |
756 FIB_ENTRY_FLAG_ATTACHED),
758 /* No next-hop address */
764 // no out-label stack
766 FIB_ROUTE_PATH_FLAG_NONE);
767 a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
772 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
774 u32 classify_table_index =
775 lm->classify_table_index_by_sw_if_index[sw_if_index];
776 if (classify_table_index != (u32) ~ 0)
778 dpo_id_t dpo = DPO_INVALID;
783 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
785 fib_table_entry_special_dpo_add (fib_index,
788 FIB_ENTRY_FLAG_NONE, &dpo);
793 fib_table_entry_update_one_path (fib_index, &pfx,
794 FIB_SOURCE_INTERFACE,
795 (FIB_ENTRY_FLAG_CONNECTED |
796 FIB_ENTRY_FLAG_LOCAL),
803 FIB_ROUTE_PATH_FLAG_NONE);
807 ip4_del_interface_routes (ip4_main_t * im,
809 ip4_address_t * address, u32 address_length)
812 .fp_len = address_length,
813 .fp_proto = FIB_PROTOCOL_IP4,
814 .fp_addr.ip4 = *address,
819 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
823 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
827 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
829 ip4_main_t *im = &ip4_main;
831 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
834 * enable/disable only on the 1<->0 transition
838 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
843 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
844 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
847 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
851 vnet_feature_enable_disable ("ip4-multicast",
852 "ip4-mfib-forward-lookup",
853 sw_if_index, is_enable, 0, 0);
856 static clib_error_t *
857 ip4_add_del_interface_address_internal (vlib_main_t * vm,
859 ip4_address_t * address,
860 u32 address_length, u32 is_del)
862 vnet_main_t *vnm = vnet_get_main ();
863 ip4_main_t *im = &ip4_main;
864 ip_lookup_main_t *lm = &im->lookup_main;
865 clib_error_t *error = 0;
866 u32 if_address_index, elts_before;
867 ip4_address_fib_t ip4_af, *addr_fib = 0;
869 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
870 ip4_addr_fib_init (&ip4_af, address,
871 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
872 vec_add1 (addr_fib, ip4_af);
875 * there is no support for adj-fib handling in the presence of overlapping
876 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
882 /* When adding an address check that it does not conflict
883 with an existing address. */
884 ip_interface_address_t *ia;
885 foreach_ip_interface_address
886 (&im->lookup_main, ia, sw_if_index,
887 0 /* honor unnumbered */ ,
890 ip_interface_address_get_address
891 (&im->lookup_main, ia);
892 if (ip4_destination_matches_route
893 (im, address, x, ia->address_length) ||
894 ip4_destination_matches_route (im,
900 ("failed to add %U which conflicts with %U for interface %U",
901 format_ip4_address_and_length, address,
903 format_ip4_address_and_length, x,
905 format_vnet_sw_if_index_name, vnm,
911 elts_before = pool_elts (lm->if_address_pool);
913 error = ip_interface_address_add_del
914 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
918 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
921 ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
923 ip4_add_interface_routes (sw_if_index,
924 im, ip4_af.fib_index,
926 (lm->if_address_pool, if_address_index));
928 /* If pool did not grow/shrink: add duplicate address. */
929 if (elts_before != pool_elts (lm->if_address_pool))
931 ip4_add_del_interface_address_callback_t *cb;
932 vec_foreach (cb, im->add_del_interface_address_callbacks)
933 cb->function (im, cb->function_opaque, sw_if_index,
934 address, address_length, if_address_index, is_del);
943 ip4_add_del_interface_address (vlib_main_t * vm,
945 ip4_address_t * address,
946 u32 address_length, u32 is_del)
948 return ip4_add_del_interface_address_internal
949 (vm, sw_if_index, address, address_length, is_del);
952 /* Built-in ip4 unicast rx feature path definition */
954 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
956 .arc_name = "ip4-unicast",
957 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
958 .end_node = "ip4-lookup",
959 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
962 VNET_FEATURE_INIT (ip4_flow_classify, static) =
964 .arc_name = "ip4-unicast",
965 .node_name = "ip4-flow-classify",
966 .runs_before = VNET_FEATURES ("ip4-inacl"),
969 VNET_FEATURE_INIT (ip4_inacl, static) =
971 .arc_name = "ip4-unicast",
972 .node_name = "ip4-inacl",
973 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
976 VNET_FEATURE_INIT (ip4_source_check_1, static) =
978 .arc_name = "ip4-unicast",
979 .node_name = "ip4-source-check-via-rx",
980 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
983 VNET_FEATURE_INIT (ip4_source_check_2, static) =
985 .arc_name = "ip4-unicast",
986 .node_name = "ip4-source-check-via-any",
987 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
990 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
992 .arc_name = "ip4-unicast",
993 .node_name = "ip4-source-and-port-range-check-rx",
994 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
997 VNET_FEATURE_INIT (ip4_policer_classify, static) =
999 .arc_name = "ip4-unicast",
1000 .node_name = "ip4-policer-classify",
1001 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1004 VNET_FEATURE_INIT (ip4_ipsec, static) =
1006 .arc_name = "ip4-unicast",
1007 .node_name = "ipsec-input-ip4",
1008 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1011 VNET_FEATURE_INIT (ip4_vpath, static) =
1013 .arc_name = "ip4-unicast",
1014 .node_name = "vpath-input-ip4",
1015 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1018 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1020 .arc_name = "ip4-unicast",
1021 .node_name = "ip4-vxlan-bypass",
1022 .runs_before = VNET_FEATURES ("ip4-lookup"),
1025 VNET_FEATURE_INIT (ip4_lookup, static) =
1027 .arc_name = "ip4-unicast",
1028 .node_name = "ip4-lookup",
1029 .runs_before = VNET_FEATURES ("ip4-drop"),
1032 VNET_FEATURE_INIT (ip4_drop, static) =
1034 .arc_name = "ip4-unicast",
1035 .node_name = "ip4-drop",
1036 .runs_before = 0, /* not before any other features */
1040 /* Built-in ip4 multicast rx feature path definition */
1041 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1043 .arc_name = "ip4-multicast",
1044 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1045 .end_node = "ip4-lookup-multicast",
1046 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1049 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1051 .arc_name = "ip4-multicast",
1052 .node_name = "vpath-input-ip4",
1053 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1056 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1058 .arc_name = "ip4-multicast",
1059 .node_name = "ip4-mfib-forward-lookup",
1060 .runs_before = VNET_FEATURES ("ip4-drop"),
1063 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1065 .arc_name = "ip4-multicast",
1066 .node_name = "ip4-drop",
1067 .runs_before = 0, /* last feature */
1070 /* Source and port-range check ip4 tx feature path definition */
1071 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1073 .arc_name = "ip4-output",
1074 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1075 .end_node = "interface-output",
1076 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1079 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1081 .arc_name = "ip4-output",
1082 .node_name = "ip4-source-and-port-range-check-tx",
1083 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1086 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1088 .arc_name = "ip4-output",
1089 .node_name = "ipsec-output-ip4",
1090 .runs_before = VNET_FEATURES ("interface-output"),
1093 /* Built-in ip4 tx feature path definition */
1094 VNET_FEATURE_INIT (ip4_interface_output, static) =
1096 .arc_name = "ip4-output",
1097 .node_name = "interface-output",
1098 .runs_before = 0, /* not before any other features */
1102 static clib_error_t *
1103 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1105 ip4_main_t *im = &ip4_main;
1107 /* Fill in lookup tables with default table (0). */
1108 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1109 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1111 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1114 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1117 return /* no error */ 0;
1120 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1122 /* Global IP4 main. */
1123 ip4_main_t ip4_main;
1126 ip4_lookup_init (vlib_main_t * vm)
1128 ip4_main_t *im = &ip4_main;
1129 clib_error_t *error;
1132 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1135 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1140 m = pow2_mask (i) << (32 - i);
1143 im->fib_masks[i] = clib_host_to_net_u32 (m);
1146 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1148 /* Create FIB with index 0 and table id of 0. */
1149 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1150 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1154 pn = pg_get_node (ip4_lookup_node.index);
1155 pn->unformat_edit = unformat_pg_ip4_header;
1159 ethernet_arp_header_t h;
1161 memset (&h, 0, sizeof (h));
1163 /* Set target ethernet address to all zeros. */
1164 memset (h.ip4_over_ethernet[1].ethernet, 0,
1165 sizeof (h.ip4_over_ethernet[1].ethernet));
1167 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1168 #define _8(f,v) h.f = v;
1169 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1170 _16 (l3_type, ETHERNET_TYPE_IP4);
1171 _8 (n_l2_address_bytes, 6);
1172 _8 (n_l3_address_bytes, 4);
1173 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1177 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1180 /* alloc chunk size */ 8,
1187 VLIB_INIT_FUNCTION (ip4_lookup_init);
1191 /* Adjacency taken. */
1196 /* Packet data, possibly *after* rewrite. */
1197 u8 packet_data[64 - 1 * sizeof (u32)];
1199 ip4_forward_next_trace_t;
1202 format_ip4_forward_next_trace (u8 * s, va_list * args)
1204 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1205 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1206 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1207 uword indent = format_get_indent (s);
1208 s = format (s, "%U%U",
1209 format_white_space, indent,
1210 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1215 format_ip4_lookup_trace (u8 * s, va_list * args)
1217 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1218 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1219 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1220 uword indent = format_get_indent (s);
1222 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1223 t->fib_index, t->dpo_index, t->flow_hash);
1224 s = format (s, "\n%U%U",
1225 format_white_space, indent,
1226 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1231 format_ip4_rewrite_trace (u8 * s, va_list * args)
1233 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1234 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1235 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1236 vnet_main_t *vnm = vnet_get_main ();
1237 uword indent = format_get_indent (s);
1239 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1240 t->fib_index, t->dpo_index, format_ip_adjacency,
1241 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1242 s = format (s, "\n%U%U",
1243 format_white_space, indent,
1244 format_ip_adjacency_packet_data,
1245 vnm, t->dpo_index, t->packet_data, sizeof (t->packet_data));
1249 /* Common trace function for all ip4-forward next nodes. */
1251 ip4_forward_next_trace (vlib_main_t * vm,
1252 vlib_node_runtime_t * node,
1253 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1256 ip4_main_t *im = &ip4_main;
1258 n_left = frame->n_vectors;
1259 from = vlib_frame_vector_args (frame);
1264 vlib_buffer_t *b0, *b1;
1265 ip4_forward_next_trace_t *t0, *t1;
1267 /* Prefetch next iteration. */
1268 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1269 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1274 b0 = vlib_get_buffer (vm, bi0);
1275 b1 = vlib_get_buffer (vm, bi1);
1277 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1279 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1280 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1281 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1283 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1284 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1285 vec_elt (im->fib_index_by_sw_if_index,
1286 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1288 clib_memcpy (t0->packet_data,
1289 vlib_buffer_get_current (b0),
1290 sizeof (t0->packet_data));
1292 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1294 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1295 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1296 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1298 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1299 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1300 vec_elt (im->fib_index_by_sw_if_index,
1301 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1302 clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1303 sizeof (t1->packet_data));
1313 ip4_forward_next_trace_t *t0;
1317 b0 = vlib_get_buffer (vm, bi0);
1319 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1321 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1322 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1323 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1325 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1326 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1327 vec_elt (im->fib_index_by_sw_if_index,
1328 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1329 clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1330 sizeof (t0->packet_data));
1338 ip4_drop_or_punt (vlib_main_t * vm,
1339 vlib_node_runtime_t * node,
1340 vlib_frame_t * frame, ip4_error_t error_code)
1342 u32 *buffers = vlib_frame_vector_args (frame);
1343 uword n_packets = frame->n_vectors;
1345 vlib_error_drop_buffers (vm, node, buffers,
1349 ip4_input_node.index, error_code);
1351 if (node->flags & VLIB_NODE_FLAG_TRACE)
1352 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1358 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1360 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1364 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1366 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1370 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1372 .function = ip4_drop,.
1374 .vector_size = sizeof (u32),
1375 .format_trace = format_ip4_forward_next_trace,
1382 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1384 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1386 .function = ip4_punt,
1388 .vector_size = sizeof (u32),
1389 .format_trace = format_ip4_forward_next_trace,
1396 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1399 /* Compute TCP/UDP/ICMP4 checksum in software. */
1401 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1405 u32 ip_header_length, payload_length_host_byte_order;
1406 u32 n_this_buffer, n_bytes_left;
1408 void *data_this_buffer;
1410 /* Initialize checksum with ip header. */
1411 ip_header_length = ip4_header_bytes (ip0);
1412 payload_length_host_byte_order =
1413 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1415 clib_host_to_net_u32 (payload_length_host_byte_order +
1416 (ip0->protocol << 16));
1418 if (BITS (uword) == 32)
1421 ip_csum_with_carry (sum0,
1422 clib_mem_unaligned (&ip0->src_address, u32));
1424 ip_csum_with_carry (sum0,
1425 clib_mem_unaligned (&ip0->dst_address, u32));
1429 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1431 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1432 data_this_buffer = (void *) ip0 + ip_header_length;
1433 if (n_this_buffer + ip_header_length > p0->current_length)
1435 p0->current_length >
1436 ip_header_length ? p0->current_length - ip_header_length : 0;
1439 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1440 n_bytes_left -= n_this_buffer;
1441 if (n_bytes_left == 0)
1444 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1445 p0 = vlib_get_buffer (vm, p0->next_buffer);
1446 data_this_buffer = vlib_buffer_get_current (p0);
1447 n_this_buffer = p0->current_length;
1450 sum16 = ~ip_csum_fold (sum0);
1456 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1458 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1462 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1463 || ip0->protocol == IP_PROTOCOL_UDP);
1465 udp0 = (void *) (ip0 + 1);
1466 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1468 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1469 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1473 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1475 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1476 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1482 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1484 ip4_main_t *im = &ip4_main;
1485 ip_lookup_main_t *lm = &im->lookup_main;
1486 ip_local_next_t next_index;
1487 u32 *from, *to_next, n_left_from, n_left_to_next;
1488 vlib_node_runtime_t *error_node =
1489 vlib_node_get_runtime (vm, ip4_input_node.index);
1491 from = vlib_frame_vector_args (frame);
1492 n_left_from = frame->n_vectors;
1493 next_index = node->cached_next_index;
1495 if (node->flags & VLIB_NODE_FLAG_TRACE)
1496 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1498 while (n_left_from > 0)
1500 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1502 while (n_left_from >= 4 && n_left_to_next >= 2)
1504 vlib_buffer_t *p0, *p1;
1505 ip4_header_t *ip0, *ip1;
1506 udp_header_t *udp0, *udp1;
1507 ip4_fib_mtrie_t *mtrie0, *mtrie1;
1508 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1509 const dpo_id_t *dpo0, *dpo1;
1510 const load_balance_t *lb0, *lb1;
1511 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1512 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1513 i32 len_diff0, len_diff1;
1514 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1515 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1518 pi0 = to_next[0] = from[0];
1519 pi1 = to_next[1] = from[1];
1523 n_left_to_next -= 2;
1525 p0 = vlib_get_buffer (vm, pi0);
1526 p1 = vlib_get_buffer (vm, pi1);
1528 ip0 = vlib_buffer_get_current (p0);
1529 ip1 = vlib_buffer_get_current (p1);
1531 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1532 vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1534 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1535 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1537 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1538 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1540 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1541 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
1543 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1544 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1546 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1547 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1549 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1552 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1554 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1556 /* Treat IP frag packets as "experimental" protocol for now
1557 until support of IP frag reassembly is implemented */
1558 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1559 proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1560 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1561 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1562 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1563 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1568 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1569 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1571 udp0 = ip4_next_header (ip0);
1572 udp1 = ip4_next_header (ip1);
1574 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1575 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1576 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1579 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1581 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1583 /* Verify UDP length. */
1584 ip_len0 = clib_net_to_host_u16 (ip0->length);
1585 ip_len1 = clib_net_to_host_u16 (ip1->length);
1586 udp_len0 = clib_net_to_host_u16 (udp0->length);
1587 udp_len1 = clib_net_to_host_u16 (udp1->length);
1589 len_diff0 = ip_len0 - udp_len0;
1590 len_diff1 = ip_len1 - udp_len1;
1592 len_diff0 = is_udp0 ? len_diff0 : 0;
1593 len_diff1 = is_udp1 ? len_diff1 : 0;
1595 if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1596 & good_tcp_udp0 & good_tcp_udp1)))
1601 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1602 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1604 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1605 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1610 && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1611 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1613 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1614 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1618 good_tcp_udp0 &= len_diff0 >= 0;
1619 good_tcp_udp1 &= len_diff1 >= 0;
1622 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1624 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1626 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1628 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1629 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1631 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1632 error0 = (is_tcp_udp0 && !good_tcp_udp0
1633 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1634 error1 = (is_tcp_udp1 && !good_tcp_udp1
1635 ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1638 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1640 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1643 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1646 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1648 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1649 ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1650 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1652 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1653 ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1654 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1656 lb0 = load_balance_get (lbi0);
1657 lb1 = load_balance_get (lbi1);
1658 dpo0 = load_balance_get_bucket_i (lb0, 0);
1659 dpo1 = load_balance_get_bucket_i (lb1, 0);
1662 * Must have a route to source otherwise we drop the packet.
1663 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1666 * - the source is a recieve => it's from us => bogus, do this
1667 * first since it sets a different error code.
1668 * - uRPF check for any route to source - accept if passes.
1669 * - allow packets destined to the broadcast address from unknown sources
1671 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1672 dpo0->dpoi_type == DPO_RECEIVE) ?
1673 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1674 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1675 !fib_urpf_check_size (lb0->lb_urpf) &&
1676 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1677 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1678 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1679 dpo1->dpoi_type == DPO_RECEIVE) ?
1680 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1681 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1682 !fib_urpf_check_size (lb1->lb_urpf) &&
1683 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1684 ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1686 next0 = lm->local_next_by_ip_protocol[proto0];
1687 next1 = lm->local_next_by_ip_protocol[proto1];
1690 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1692 error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1694 p0->error = error0 ? error_node->errors[error0] : 0;
1695 p1->error = error1 ? error_node->errors[error1] : 0;
1697 enqueue_code = (next0 != next_index) + 2 * (next1 != next_index);
1699 if (PREDICT_FALSE (enqueue_code != 0))
1701 switch (enqueue_code)
1707 n_left_to_next += 1;
1708 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1714 n_left_to_next += 1;
1715 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1719 /* A B B or A B C */
1721 n_left_to_next += 2;
1722 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1723 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1726 vlib_put_next_frame (vm, node, next_index,
1729 vlib_get_next_frame (vm, node, next_index, to_next,
1737 while (n_left_from > 0 && n_left_to_next > 0)
1742 ip4_fib_mtrie_t *mtrie0;
1743 ip4_fib_mtrie_leaf_t leaf0;
1744 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1746 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1747 load_balance_t *lb0;
1748 const dpo_id_t *dpo0;
1750 pi0 = to_next[0] = from[0];
1754 n_left_to_next -= 1;
1756 p0 = vlib_get_buffer (vm, pi0);
1758 ip0 = vlib_buffer_get_current (p0);
1760 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1762 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1763 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1765 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1766 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1768 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1770 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1773 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1775 /* Treat IP frag packets as "experimental" protocol for now
1776 until support of IP frag reassembly is implemented */
1777 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1778 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1779 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1783 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1785 udp0 = ip4_next_header (ip0);
1787 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1788 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1791 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1793 /* Verify UDP length. */
1794 ip_len0 = clib_net_to_host_u16 (ip0->length);
1795 udp_len0 = clib_net_to_host_u16 (udp0->length);
1797 len_diff0 = ip_len0 - udp_len0;
1799 len_diff0 = is_udp0 ? len_diff0 : 0;
1801 if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1806 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1807 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1809 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1810 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1814 good_tcp_udp0 &= len_diff0 >= 0;
1817 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1819 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1821 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1823 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1824 error0 = (is_tcp_udp0 && !good_tcp_udp0
1825 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1828 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1831 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1833 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1834 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1836 lb0 = load_balance_get (lbi0);
1837 dpo0 = load_balance_get_bucket_i (lb0, 0);
1839 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1840 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1842 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1843 dpo0->dpoi_type == DPO_RECEIVE) ?
1844 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1845 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1846 !fib_urpf_check_size (lb0->lb_urpf) &&
1847 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1848 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1850 next0 = lm->local_next_by_ip_protocol[proto0];
1853 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1855 p0->error = error0 ? error_node->errors[error0] : 0;
1857 if (PREDICT_FALSE (next0 != next_index))
1859 n_left_to_next += 1;
1860 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1863 vlib_get_next_frame (vm, node, next_index, to_next,
1867 n_left_to_next -= 1;
1871 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1874 return frame->n_vectors;
1877 VLIB_REGISTER_NODE (ip4_local_node) =
1879 .function = ip4_local,.name = "ip4-local",.vector_size =
1880 sizeof (u32),.format_trace =
1881 format_ip4_forward_next_trace,.n_next_nodes =
1882 IP_LOCAL_N_NEXT,.next_nodes =
1884 [IP_LOCAL_NEXT_DROP] = "error-drop",
1885 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1886 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1887 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",}
1890 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1893 ip4_register_protocol (u32 protocol, u32 node_index)
1895 vlib_main_t *vm = vlib_get_main ();
1896 ip4_main_t *im = &ip4_main;
1897 ip_lookup_main_t *lm = &im->lookup_main;
1899 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1900 lm->local_next_by_ip_protocol[protocol] =
1901 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1904 static clib_error_t *
1905 show_ip_local_command_fn (vlib_main_t * vm,
1906 unformat_input_t * input, vlib_cli_command_t * cmd)
1908 ip4_main_t *im = &ip4_main;
1909 ip_lookup_main_t *lm = &im->lookup_main;
1912 vlib_cli_output (vm, "Protocols handled by ip4_local");
1913 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1915 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1916 vlib_cli_output (vm, "%d", i);
1924 * Display the set of protocols handled by the local IPv4 stack.
1927 * Example of how to display local protocol table:
1928 * @cliexstart{show ip local}
1929 * Protocols handled by ip4_local
1936 VLIB_CLI_COMMAND (show_ip_local, static) =
1938 .path = "show ip local",
1939 .function = show_ip_local_command_fn,
1940 .short_help = "show ip local",
1945 ip4_arp_inline (vlib_main_t * vm,
1946 vlib_node_runtime_t * node,
1947 vlib_frame_t * frame, int is_glean)
1949 vnet_main_t *vnm = vnet_get_main ();
1950 ip4_main_t *im = &ip4_main;
1951 ip_lookup_main_t *lm = &im->lookup_main;
1952 u32 *from, *to_next_drop;
1953 uword n_left_from, n_left_to_next_drop, next_index;
1954 static f64 time_last_seed_change = -1e100;
1955 static u32 hash_seeds[3];
1956 static uword hash_bitmap[256 / BITS (uword)];
1959 if (node->flags & VLIB_NODE_FLAG_TRACE)
1960 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1962 time_now = vlib_time_now (vm);
1963 if (time_now - time_last_seed_change > 1e-3)
1966 u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
1967 sizeof (hash_seeds));
1968 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1969 hash_seeds[i] = r[i];
1971 /* Mark all hash keys as been no-seen before. */
1972 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1975 time_last_seed_change = time_now;
1978 from = vlib_frame_vector_args (frame);
1979 n_left_from = frame->n_vectors;
1980 next_index = node->cached_next_index;
1981 if (next_index == IP4_ARP_NEXT_DROP)
1982 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1984 while (n_left_from > 0)
1986 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1987 to_next_drop, n_left_to_next_drop);
1989 while (n_left_from > 0 && n_left_to_next_drop > 0)
1991 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1992 ip_adjacency_t *adj0;
1999 p0 = vlib_get_buffer (vm, pi0);
2001 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2002 adj0 = ip_get_adjacency (lm, adj_index0);
2003 ip0 = vlib_buffer_get_current (p0);
2009 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2010 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2015 * this is the Glean case, so we are ARPing for the
2016 * packet's destination
2018 a0 ^= ip0->dst_address.data_u32;
2022 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2026 hash_v3_finalize32 (a0, b0, c0);
2028 c0 &= BITS (hash_bitmap) - 1;
2029 c0 = c0 / BITS (uword);
2030 m0 = (uword) 1 << (c0 % BITS (uword));
2032 bm0 = hash_bitmap[c0];
2033 drop0 = (bm0 & m0) != 0;
2035 /* Mark it as seen. */
2036 hash_bitmap[c0] = bm0 | m0;
2040 to_next_drop[0] = pi0;
2042 n_left_to_next_drop -= 1;
2045 node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2046 IP4_ARP_ERROR_REQUEST_SENT];
2049 * the adj has been updated to a rewrite but the node the DPO that got
2050 * us here hasn't - yet. no big deal. we'll drop while we wait.
2052 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2059 * Can happen if the control-plane is programming tables
2060 * with traffic flowing; at least that's today's lame excuse.
2062 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2063 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2065 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2068 /* Send ARP request. */
2072 ethernet_arp_header_t *h0;
2073 vnet_hw_interface_t *hw_if0;
2076 vlib_packet_template_get_packet (vm,
2077 &im->ip4_arp_request_packet_template,
2080 /* Add rewrite/encap string for ARP packet. */
2081 vnet_rewrite_one_header (adj0[0], h0,
2082 sizeof (ethernet_header_t));
2084 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2086 /* Src ethernet address in ARP header. */
2087 clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2089 sizeof (h0->ip4_over_ethernet[0].ethernet));
2093 /* The interface's source address is stashed in the Glean Adj */
2094 h0->ip4_over_ethernet[0].ip4 =
2095 adj0->sub_type.glean.receive_addr.ip4;
2097 /* Copy in destination address we are requesting. This is the
2098 * glean case, so it's the packet's destination.*/
2099 h0->ip4_over_ethernet[1].ip4.data_u32 =
2100 ip0->dst_address.data_u32;
2104 /* Src IP address in ARP header. */
2105 if (ip4_src_address_for_packet (lm, sw_if_index0,
2107 ip4_over_ethernet[0].ip4))
2109 /* No source address available */
2111 node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2112 vlib_buffer_free (vm, &bi0, 1);
2116 /* Copy in destination address we are requesting from the
2118 h0->ip4_over_ethernet[1].ip4.data_u32 =
2119 adj0->sub_type.nbr.next_hop.ip4.as_u32;
2122 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2123 b0 = vlib_get_buffer (vm, bi0);
2124 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2126 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2128 vlib_set_next_frame_buffer (vm, node,
2129 adj0->rewrite_header.next_index,
2134 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2137 return frame->n_vectors;
2141 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2143 return (ip4_arp_inline (vm, node, frame, 0));
2147 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2149 return (ip4_arp_inline (vm, node, frame, 1));
2152 static char *ip4_arp_error_strings[] = {
2153 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2154 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2155 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2156 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2157 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2158 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2161 VLIB_REGISTER_NODE (ip4_arp_node) =
2163 .function = ip4_arp,.name = "ip4-arp",.vector_size =
2164 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2165 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2166 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2168 [IP4_ARP_NEXT_DROP] = "error-drop",}
2171 VLIB_REGISTER_NODE (ip4_glean_node) =
2173 .function = ip4_glean,.name = "ip4-glean",.vector_size =
2174 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2175 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2176 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2178 [IP4_ARP_NEXT_DROP] = "error-drop",}
2181 #define foreach_notrace_ip4_arp_error \
2188 arp_notrace_init (vlib_main_t * vm)
2190 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2192 /* don't trace ARP request packets */
2194 vnet_pcap_drop_trace_filter_add_del \
2195 (rt->errors[IP4_ARP_ERROR_##a], \
2197 foreach_notrace_ip4_arp_error;
2202 VLIB_INIT_FUNCTION (arp_notrace_init);
2205 /* Send an ARP request to see if given destination is reachable on given interface. */
2207 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2209 vnet_main_t *vnm = vnet_get_main ();
2210 ip4_main_t *im = &ip4_main;
2211 ethernet_arp_header_t *h;
2213 ip_interface_address_t *ia;
2214 ip_adjacency_t *adj;
2215 vnet_hw_interface_t *hi;
2216 vnet_sw_interface_t *si;
2220 si = vnet_get_sw_interface (vnm, sw_if_index);
2222 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2224 return clib_error_return (0, "%U: interface %U down",
2225 format_ip4_address, dst,
2226 format_vnet_sw_if_index_name, vnm,
2231 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2234 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2235 return clib_error_return
2237 "no matching interface address for destination %U (interface %U)",
2238 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2242 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2245 vlib_packet_template_get_packet (vm,
2246 &im->ip4_arp_request_packet_template,
2249 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2251 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2252 sizeof (h->ip4_over_ethernet[0].ethernet));
2254 h->ip4_over_ethernet[0].ip4 = src[0];
2255 h->ip4_over_ethernet[1].ip4 = dst[0];
2257 b = vlib_get_buffer (vm, bi);
2258 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2259 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2261 /* Add encapsulation string for software interface (e.g. ethernet header). */
2262 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2263 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2266 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2267 u32 *to_next = vlib_frame_vector_args (f);
2270 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2273 return /* no error */ 0;
2278 IP4_REWRITE_NEXT_DROP,
2279 IP4_REWRITE_NEXT_ICMP_ERROR,
2280 } ip4_rewrite_next_t;
2283 ip4_rewrite_inline (vlib_main_t * vm,
2284 vlib_node_runtime_t * node,
2285 vlib_frame_t * frame, int is_midchain, int is_mcast)
2287 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2288 u32 *from = vlib_frame_vector_args (frame);
2289 u32 n_left_from, n_left_to_next, *to_next, next_index;
2290 vlib_node_runtime_t *error_node =
2291 vlib_node_get_runtime (vm, ip4_input_node.index);
2293 n_left_from = frame->n_vectors;
2294 next_index = node->cached_next_index;
2295 u32 cpu_index = os_get_cpu_number ();
2297 while (n_left_from > 0)
2299 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2301 while (n_left_from >= 4 && n_left_to_next >= 2)
2303 ip_adjacency_t *adj0, *adj1;
2304 vlib_buffer_t *p0, *p1;
2305 ip4_header_t *ip0, *ip1;
2306 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2307 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2308 u32 tx_sw_if_index0, tx_sw_if_index1;
2310 /* Prefetch next iteration. */
2312 vlib_buffer_t *p2, *p3;
2314 p2 = vlib_get_buffer (vm, from[2]);
2315 p3 = vlib_get_buffer (vm, from[3]);
2317 vlib_prefetch_buffer_header (p2, STORE);
2318 vlib_prefetch_buffer_header (p3, STORE);
2320 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2321 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2324 pi0 = to_next[0] = from[0];
2325 pi1 = to_next[1] = from[1];
2330 n_left_to_next -= 2;
2332 p0 = vlib_get_buffer (vm, pi0);
2333 p1 = vlib_get_buffer (vm, pi1);
2335 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2336 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2338 /* We should never rewrite a pkt using the MISS adjacency */
2339 ASSERT (adj_index0 && adj_index1);
2341 ip0 = vlib_buffer_get_current (p0);
2342 ip1 = vlib_buffer_get_current (p1);
2344 error0 = error1 = IP4_ERROR_NONE;
2345 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2347 /* Decrement TTL & update checksum.
2348 Works either endian, so no need for byte swap. */
2349 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2351 i32 ttl0 = ip0->ttl;
2353 /* Input node should have reject packets with ttl 0. */
2354 ASSERT (ip0->ttl > 0);
2356 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2357 checksum0 += checksum0 >= 0xffff;
2359 ip0->checksum = checksum0;
2364 * If the ttl drops below 1 when forwarding, generate
2367 if (PREDICT_FALSE (ttl0 <= 0))
2369 error0 = IP4_ERROR_TIME_EXPIRED;
2370 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2371 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2372 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2374 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2377 /* Verify checksum. */
2378 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2382 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2384 if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2386 i32 ttl1 = ip1->ttl;
2388 /* Input node should have reject packets with ttl 0. */
2389 ASSERT (ip1->ttl > 0);
2391 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2392 checksum1 += checksum1 >= 0xffff;
2394 ip1->checksum = checksum1;
2399 * If the ttl drops below 1 when forwarding, generate
2402 if (PREDICT_FALSE (ttl1 <= 0))
2404 error1 = IP4_ERROR_TIME_EXPIRED;
2405 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2406 icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2407 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2409 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2412 /* Verify checksum. */
2413 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2414 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2418 p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2421 /* Rewrite packet header and updates lengths. */
2422 adj0 = ip_get_adjacency (lm, adj_index0);
2423 adj1 = ip_get_adjacency (lm, adj_index1);
2425 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2426 rw_len0 = adj0[0].rewrite_header.data_bytes;
2427 rw_len1 = adj1[0].rewrite_header.data_bytes;
2428 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2429 vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2431 /* Check MTU of outgoing interface. */
2433 (vlib_buffer_length_in_chain (vm, p0) >
2435 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2438 (vlib_buffer_length_in_chain (vm, p1) >
2440 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2444 * pre-fetch the per-adjacency counters
2446 vlib_prefetch_combined_counter (&adjacency_counters,
2447 cpu_index, adj_index0);
2448 vlib_prefetch_combined_counter (&adjacency_counters,
2449 cpu_index, adj_index1);
2451 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2452 * to see the IP headerr */
2453 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2455 next0 = adj0[0].rewrite_header.next_index;
2456 p0->current_data -= rw_len0;
2457 p0->current_length += rw_len0;
2458 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2459 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2461 vnet_feature_arc_start (lm->output_feature_arc_index,
2462 tx_sw_if_index0, &next0, p0);
2464 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2466 next1 = adj1[0].rewrite_header.next_index;
2467 p1->current_data -= rw_len1;
2468 p1->current_length += rw_len1;
2470 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2471 vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2473 vnet_feature_arc_start (lm->output_feature_arc_index,
2474 tx_sw_if_index1, &next1, p1);
2477 /* Guess we are only writing on simple Ethernet header. */
2478 vnet_rewrite_two_headers (adj0[0], adj1[0],
2479 ip0, ip1, sizeof (ethernet_header_t));
2482 * Bump the per-adjacency counters
2484 vlib_increment_combined_counter
2485 (&adjacency_counters,
2487 adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2489 vlib_increment_combined_counter
2490 (&adjacency_counters,
2492 adj_index1, 1, vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2496 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2497 adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2502 * copy bytes from the IP address into the MAC rewrite
2504 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 1);
2505 vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1, 1);
2508 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2509 to_next, n_left_to_next,
2510 pi0, pi1, next0, next1);
2513 while (n_left_from > 0 && n_left_to_next > 0)
2515 ip_adjacency_t *adj0;
2518 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2519 u32 tx_sw_if_index0;
2521 pi0 = to_next[0] = from[0];
2523 p0 = vlib_get_buffer (vm, pi0);
2525 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2527 /* We should never rewrite a pkt using the MISS adjacency */
2528 ASSERT (adj_index0);
2530 adj0 = ip_get_adjacency (lm, adj_index0);
2532 ip0 = vlib_buffer_get_current (p0);
2534 error0 = IP4_ERROR_NONE;
2535 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2537 /* Decrement TTL & update checksum. */
2538 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2540 i32 ttl0 = ip0->ttl;
2542 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2544 checksum0 += checksum0 >= 0xffff;
2546 ip0->checksum = checksum0;
2548 ASSERT (ip0->ttl > 0);
2554 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2556 if (PREDICT_FALSE (ttl0 <= 0))
2559 * If the ttl drops below 1 when forwarding, generate
2562 error0 = IP4_ERROR_TIME_EXPIRED;
2563 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2564 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2565 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2566 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2572 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2575 vlib_prefetch_combined_counter (&adjacency_counters,
2576 cpu_index, adj_index0);
2578 /* Guess we are only writing on simple Ethernet header. */
2579 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2583 * copy bytes from the IP address into the MAC rewrite
2585 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 1);
2588 /* Update packet buffer attributes/set output interface. */
2589 rw_len0 = adj0[0].rewrite_header.data_bytes;
2590 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2592 vlib_increment_combined_counter
2593 (&adjacency_counters,
2595 adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2597 /* Check MTU of outgoing interface. */
2598 error0 = (vlib_buffer_length_in_chain (vm, p0)
2599 > adj0[0].rewrite_header.max_l3_packet_bytes
2600 ? IP4_ERROR_MTU_EXCEEDED : error0);
2602 p0->error = error_node->errors[error0];
2604 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2605 * to see the IP headerr */
2606 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2608 p0->current_data -= rw_len0;
2609 p0->current_length += rw_len0;
2610 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2612 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2613 next0 = adj0[0].rewrite_header.next_index;
2617 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2620 vnet_feature_arc_start (lm->output_feature_arc_index,
2621 tx_sw_if_index0, &next0, p0);
2628 n_left_to_next -= 1;
2630 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2631 to_next, n_left_to_next,
2635 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2638 /* Need to do trace after rewrites to pick up new packet data. */
2639 if (node->flags & VLIB_NODE_FLAG_TRACE)
2640 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2642 return frame->n_vectors;
2646 /** @brief IPv4 rewrite node.
2649 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2650 header checksum, fetch the ip adjacency, check the outbound mtu,
2651 apply the adjacency rewrite, and send pkts to the adjacency
2652 rewrite header's rewrite_next_index.
2654 @param vm vlib_main_t corresponding to the current thread
2655 @param node vlib_node_runtime_t
2656 @param frame vlib_frame_t whose contents should be dispatched
2658 @par Graph mechanics: buffer metadata, next index usage
2661 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2662 - the rewrite adjacency index
2663 - <code>adj->lookup_next_index</code>
2664 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2665 the packet will be dropped.
2666 - <code>adj->rewrite_header</code>
2667 - Rewrite string length, rewrite string, next_index
2670 - <code>b->current_data, b->current_length</code>
2671 - Updated net of applying the rewrite string
2673 <em>Next Indices:</em>
2674 - <code> adj->rewrite_header.next_index </code>
2678 ip4_rewrite (vlib_main_t * vm,
2679 vlib_node_runtime_t * node, vlib_frame_t * frame)
2681 return ip4_rewrite_inline (vm, node, frame, 0, 0);
2685 ip4_midchain (vlib_main_t * vm,
2686 vlib_node_runtime_t * node, vlib_frame_t * frame)
2688 return ip4_rewrite_inline (vm, node, frame, 1, 0);
2692 ip4_rewrite_mcast (vlib_main_t * vm,
2693 vlib_node_runtime_t * node, vlib_frame_t * frame)
2695 return ip4_rewrite_inline (vm, node, frame, 0, 1);
2699 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2700 .function = ip4_rewrite,
2701 .name = "ip4-rewrite",
2702 .vector_size = sizeof (u32),
2704 .format_trace = format_ip4_rewrite_trace,
2708 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2709 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2712 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2714 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2715 .function = ip4_rewrite_mcast,
2716 .name = "ip4-rewrite-mcast",
2717 .vector_size = sizeof (u32),
2719 .format_trace = format_ip4_rewrite_trace,
2720 .sibling_of = "ip4-rewrite",
2722 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2724 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2725 .function = ip4_midchain,
2726 .name = "ip4-midchain",
2727 .vector_size = sizeof (u32),
2728 .format_trace = format_ip4_forward_next_trace,
2729 .sibling_of = "ip4-rewrite",
2731 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2734 static clib_error_t *
2735 add_del_interface_table (vlib_main_t * vm,
2736 unformat_input_t * input, vlib_cli_command_t * cmd)
2738 vnet_main_t *vnm = vnet_get_main ();
2739 clib_error_t *error = 0;
2740 u32 sw_if_index, table_id;
2744 if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2746 error = clib_error_return (0, "unknown interface `%U'",
2747 format_unformat_error, input);
2751 if (unformat (input, "%d", &table_id))
2755 error = clib_error_return (0, "expected table id `%U'",
2756 format_unformat_error, input);
2761 ip4_main_t *im = &ip4_main;
2764 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2769 // changing an interface's table has consequences for any connecteds
2770 // and adj-fibs already installed.
2772 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2773 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2775 fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2777 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
2778 im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
2786 * Place the indicated interface into the supplied IPv4 FIB table (also known
2787 * as a VRF). If the FIB table does not exist, this command creates it. To
2788 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2789 * FIB table will only be displayed if a route has been added to the table, or
2790 * an IP Address is assigned to an interface in the table (which adds a route
2793 * @note IP addresses added after setting the interface IP table end up in
2794 * the indicated FIB table. If the IP address is added prior to adding the
2795 * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2796 * but potentially counter-intuitive results occur if you provision interface
2797 * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2798 * IP table ID provisioned. It might be marginally useful to evade source RPF
2799 * drops to put an interface address into multiple FIBs.
2802 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2803 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2806 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2808 .path = "set interface ip table",
2809 .function = add_del_interface_table,
2810 .short_help = "set interface ip table <interface> <table-id>",
2815 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2817 ip4_fib_mtrie_t *mtrie0;
2818 ip4_fib_mtrie_leaf_t leaf0;
2821 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2823 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2824 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2825 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2826 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2827 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2829 /* Handle default route. */
2830 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2832 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2834 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2837 static clib_error_t *
2838 test_lookup_command_fn (vlib_main_t * vm,
2839 unformat_input_t * input, vlib_cli_command_t * cmd)
2846 ip4_address_t ip4_base_address;
2849 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2851 if (unformat (input, "table %d", &table_id))
2853 /* Make sure the entry exists. */
2854 fib = ip4_fib_get (table_id);
2855 if ((fib) && (fib->index != table_id))
2856 return clib_error_return (0, "<fib-index> %d does not exist",
2859 else if (unformat (input, "count %f", &count))
2862 else if (unformat (input, "%U",
2863 unformat_ip4_address, &ip4_base_address))
2866 return clib_error_return (0, "unknown input `%U'",
2867 format_unformat_error, input);
2872 for (i = 0; i < n; i++)
2874 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2877 ip4_base_address.as_u32 =
2878 clib_host_to_net_u32 (1 +
2879 clib_net_to_host_u32 (ip4_base_address.as_u32));
2883 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2885 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2891 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2892 * given FIB table to determine if there is a conflict with the
2893 * adjacency table. The fib-id can be determined by using the
2894 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2897 * @todo This command uses fib-id, other commands use table-id (not
2898 * just a name, they are different indexes). Would like to change this
2899 * to table-id for consistency.
2902 * Example of how to run the test lookup command:
2903 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2904 * No errors in 2 lookups
2908 VLIB_CLI_COMMAND (lookup_test_command, static) =
2910 .path = "test lookup",
2911 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2912 .function = test_lookup_command_fn,
2917 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2919 ip4_main_t *im4 = &ip4_main;
2921 uword *p = hash_get (im4->fib_index_by_table_id, table_id);
2924 return VNET_API_ERROR_NO_SUCH_FIB;
2926 fib = ip4_fib_get (p[0]);
2928 fib->flow_hash_config = flow_hash_config;
2932 static clib_error_t *
2933 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2934 unformat_input_t * input,
2935 vlib_cli_command_t * cmd)
2939 u32 flow_hash_config = 0;
2942 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2944 if (unformat (input, "table %d", &table_id))
2947 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2948 foreach_flow_hash_bit
2955 return clib_error_return (0, "unknown input `%U'",
2956 format_unformat_error, input);
2958 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
2964 case VNET_API_ERROR_NO_SUCH_FIB:
2965 return clib_error_return (0, "no such FIB table %d", table_id);
2968 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
2976 * Configure the set of IPv4 fields used by the flow hash.
2979 * Example of how to set the flow hash on a given table:
2980 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
2981 * Example of display the configured flow hash:
2982 * @cliexstart{show ip fib}
2983 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
2986 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
2987 * [0] [@0]: dpo-drop ip6
2990 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
2991 * [0] [@0]: dpo-drop ip6
2994 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
2995 * [0] [@0]: dpo-drop ip6
2998 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
2999 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3002 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3003 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3004 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3005 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3006 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3009 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3010 * [0] [@0]: dpo-drop ip6
3011 * 255.255.255.255/32
3013 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3014 * [0] [@0]: dpo-drop ip6
3015 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3018 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3019 * [0] [@0]: dpo-drop ip6
3022 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3023 * [0] [@0]: dpo-drop ip6
3026 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3027 * [0] [@4]: ipv4-glean: af_packet0
3030 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3031 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3034 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3035 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3038 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3039 * [0] [@4]: ipv4-glean: af_packet1
3042 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3043 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3046 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3047 * [0] [@0]: dpo-drop ip6
3050 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3051 * [0] [@0]: dpo-drop ip6
3052 * 255.255.255.255/32
3054 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3055 * [0] [@0]: dpo-drop ip6
3059 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3061 .path = "set ip flow-hash",
3063 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3064 .function = set_ip_flow_hash_command_fn,
3069 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3072 vnet_main_t *vnm = vnet_get_main ();
3073 vnet_interface_main_t *im = &vnm->interface_main;
3074 ip4_main_t *ipm = &ip4_main;
3075 ip_lookup_main_t *lm = &ipm->lookup_main;
3076 vnet_classify_main_t *cm = &vnet_classify_main;
3077 ip4_address_t *if_addr;
3079 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3080 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3082 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3083 return VNET_API_ERROR_NO_SUCH_ENTRY;
3085 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3086 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3088 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3090 if (NULL != if_addr)
3092 fib_prefix_t pfx = {
3094 .fp_proto = FIB_PROTOCOL_IP4,
3095 .fp_addr.ip4 = *if_addr,
3099 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3103 if (table_index != (u32) ~ 0)
3105 dpo_id_t dpo = DPO_INVALID;
3110 classify_dpo_create (DPO_PROTO_IP4, table_index));
3112 fib_table_entry_special_dpo_add (fib_index,
3114 FIB_SOURCE_CLASSIFY,
3115 FIB_ENTRY_FLAG_NONE, &dpo);
3120 fib_table_entry_special_remove (fib_index,
3121 &pfx, FIB_SOURCE_CLASSIFY);
3128 static clib_error_t *
3129 set_ip_classify_command_fn (vlib_main_t * vm,
3130 unformat_input_t * input,
3131 vlib_cli_command_t * cmd)
3133 u32 table_index = ~0;
3134 int table_index_set = 0;
3135 u32 sw_if_index = ~0;
3138 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3140 if (unformat (input, "table-index %d", &table_index))
3141 table_index_set = 1;
3142 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3143 vnet_get_main (), &sw_if_index))
3149 if (table_index_set == 0)
3150 return clib_error_return (0, "classify table-index must be specified");
3152 if (sw_if_index == ~0)
3153 return clib_error_return (0, "interface / subif must be specified");
3155 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3162 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3163 return clib_error_return (0, "No such interface");
3165 case VNET_API_ERROR_NO_SUCH_ENTRY:
3166 return clib_error_return (0, "No such classifier table");
3172 * Assign a classification table to an interface. The classification
3173 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3174 * commands. Once the table is create, use this command to filter packets
3178 * Example of how to assign a classification table to an interface:
3179 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3182 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3184 .path = "set ip classify",
3186 "set ip classify intfc <interface> table-index <classify-idx>",
3187 .function = set_ip_classify_command_fn,
3192 * fd.io coding-style-patch-verification: ON
3195 * eval: (c-set-style "gnu")