2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 * @brief IPv4 Forwarding.
59 * This file contains the source code for IPv4 forwarding.
63 ip4_forward_next_trace (vlib_main_t * vm,
64 vlib_node_runtime_t * node,
66 vlib_rx_or_tx_t which_adj_index);
69 ip4_lookup_inline (vlib_main_t * vm,
70 vlib_node_runtime_t * node,
72 int lookup_for_responses_to_locally_received_packets)
74 ip4_main_t *im = &ip4_main;
75 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
76 u32 n_left_from, n_left_to_next, *from, *to_next;
77 ip_lookup_next_t next;
78 u32 cpu_index = os_get_cpu_number ();
80 from = vlib_frame_vector_args (frame);
81 n_left_from = frame->n_vectors;
82 next = node->cached_next_index;
84 while (n_left_from > 0)
86 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
88 while (n_left_from >= 8 && n_left_to_next >= 4)
90 vlib_buffer_t *p0, *p1, *p2, *p3;
91 ip4_header_t *ip0, *ip1, *ip2, *ip3;
92 __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
93 ip_lookup_next_t next0, next1, next2, next3;
94 const load_balance_t *lb0, *lb1, *lb2, *lb3;
95 ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
96 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97 ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98 __attribute__ ((unused)) u32 pi0, fib_index0, lb_index0,
100 __attribute__ ((unused)) u32 pi1, fib_index1, lb_index1,
102 __attribute__ ((unused)) u32 pi2, fib_index2, lb_index2,
104 __attribute__ ((unused)) u32 pi3, fib_index3, lb_index3,
106 flow_hash_config_t flow_hash_config0, flow_hash_config1;
107 flow_hash_config_t flow_hash_config2, flow_hash_config3;
108 u32 hash_c0, hash_c1, hash_c2, hash_c3;
109 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
111 /* Prefetch next iteration. */
113 vlib_buffer_t *p4, *p5, *p6, *p7;
115 p4 = vlib_get_buffer (vm, from[4]);
116 p5 = vlib_get_buffer (vm, from[5]);
117 p6 = vlib_get_buffer (vm, from[6]);
118 p7 = vlib_get_buffer (vm, from[7]);
120 vlib_prefetch_buffer_header (p4, LOAD);
121 vlib_prefetch_buffer_header (p5, LOAD);
122 vlib_prefetch_buffer_header (p6, LOAD);
123 vlib_prefetch_buffer_header (p7, LOAD);
125 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
126 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
127 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
128 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
131 pi0 = to_next[0] = from[0];
132 pi1 = to_next[1] = from[1];
133 pi2 = to_next[2] = from[2];
134 pi3 = to_next[3] = from[3];
141 p0 = vlib_get_buffer (vm, pi0);
142 p1 = vlib_get_buffer (vm, pi1);
143 p2 = vlib_get_buffer (vm, pi2);
144 p3 = vlib_get_buffer (vm, pi3);
146 ip0 = vlib_buffer_get_current (p0);
147 ip1 = vlib_buffer_get_current (p1);
148 ip2 = vlib_buffer_get_current (p2);
149 ip3 = vlib_buffer_get_current (p3);
151 dst_addr0 = &ip0->dst_address;
152 dst_addr1 = &ip1->dst_address;
153 dst_addr2 = &ip2->dst_address;
154 dst_addr3 = &ip3->dst_address;
157 vec_elt (im->fib_index_by_sw_if_index,
158 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
160 vec_elt (im->fib_index_by_sw_if_index,
161 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
163 vec_elt (im->fib_index_by_sw_if_index,
164 vnet_buffer (p2)->sw_if_index[VLIB_RX]);
166 vec_elt (im->fib_index_by_sw_if_index,
167 vnet_buffer (p3)->sw_if_index[VLIB_RX]);
169 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
170 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
172 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
173 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
175 (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
176 (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
178 (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
179 (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
182 if (!lookup_for_responses_to_locally_received_packets)
184 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
185 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
186 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
187 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
189 leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
191 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
192 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
193 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
194 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
197 tcp0 = (void *) (ip0 + 1);
198 tcp1 = (void *) (ip1 + 1);
199 tcp2 = (void *) (ip2 + 1);
200 tcp3 = (void *) (ip3 + 1);
202 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
203 || ip0->protocol == IP_PROTOCOL_UDP);
204 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
205 || ip1->protocol == IP_PROTOCOL_UDP);
206 is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
207 || ip2->protocol == IP_PROTOCOL_UDP);
208 is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
209 || ip1->protocol == IP_PROTOCOL_UDP);
211 if (!lookup_for_responses_to_locally_received_packets)
213 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
214 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
215 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
216 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
219 if (!lookup_for_responses_to_locally_received_packets)
221 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
222 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
223 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
224 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
227 if (!lookup_for_responses_to_locally_received_packets)
229 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
230 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
231 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
232 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
235 if (lookup_for_responses_to_locally_received_packets)
237 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
238 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
239 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
240 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
244 /* Handle default route. */
247 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
250 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
253 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
256 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
257 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
258 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
259 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
260 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
263 lb0 = load_balance_get (lb_index0);
264 lb1 = load_balance_get (lb_index1);
265 lb2 = load_balance_get (lb_index2);
266 lb3 = load_balance_get (lb_index3);
268 /* Use flow hash to compute multipath adjacency. */
269 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
270 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
271 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
272 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
273 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
275 flow_hash_config0 = lb0->lb_hash_config;
276 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
277 ip4_compute_flow_hash (ip0, flow_hash_config0);
279 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
281 flow_hash_config1 = lb1->lb_hash_config;
282 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
283 ip4_compute_flow_hash (ip1, flow_hash_config1);
285 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
287 flow_hash_config2 = lb2->lb_hash_config;
288 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
289 ip4_compute_flow_hash (ip2, flow_hash_config2);
291 if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
293 flow_hash_config3 = lb3->lb_hash_config;
294 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
295 ip4_compute_flow_hash (ip3, flow_hash_config3);
298 ASSERT (lb0->lb_n_buckets > 0);
299 ASSERT (is_pow2 (lb0->lb_n_buckets));
300 ASSERT (lb1->lb_n_buckets > 0);
301 ASSERT (is_pow2 (lb1->lb_n_buckets));
302 ASSERT (lb2->lb_n_buckets > 0);
303 ASSERT (is_pow2 (lb2->lb_n_buckets));
304 ASSERT (lb3->lb_n_buckets > 0);
305 ASSERT (is_pow2 (lb3->lb_n_buckets));
307 dpo0 = load_balance_get_bucket_i (lb0,
309 (lb0->lb_n_buckets_minus_1)));
310 dpo1 = load_balance_get_bucket_i (lb1,
312 (lb1->lb_n_buckets_minus_1)));
313 dpo2 = load_balance_get_bucket_i (lb2,
315 (lb2->lb_n_buckets_minus_1)));
316 dpo3 = load_balance_get_bucket_i (lb3,
318 (lb3->lb_n_buckets_minus_1)));
320 next0 = dpo0->dpoi_next_node;
321 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
322 next1 = dpo1->dpoi_next_node;
323 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
324 next2 = dpo2->dpoi_next_node;
325 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
326 next3 = dpo3->dpoi_next_node;
327 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
329 vlib_increment_combined_counter
330 (cm, cpu_index, lb_index0, 1,
331 vlib_buffer_length_in_chain (vm, p0)
332 + sizeof (ethernet_header_t));
333 vlib_increment_combined_counter
334 (cm, cpu_index, lb_index1, 1,
335 vlib_buffer_length_in_chain (vm, p1)
336 + sizeof (ethernet_header_t));
337 vlib_increment_combined_counter
338 (cm, cpu_index, lb_index2, 1,
339 vlib_buffer_length_in_chain (vm, p2)
340 + sizeof (ethernet_header_t));
341 vlib_increment_combined_counter
342 (cm, cpu_index, lb_index3, 1,
343 vlib_buffer_length_in_chain (vm, p3)
344 + sizeof (ethernet_header_t));
346 vlib_validate_buffer_enqueue_x4 (vm, node, next,
347 to_next, n_left_to_next,
349 next0, next1, next2, next3);
352 while (n_left_from > 0 && n_left_to_next > 0)
356 __attribute__ ((unused)) tcp_header_t *tcp0;
357 ip_lookup_next_t next0;
358 const load_balance_t *lb0;
359 ip4_fib_mtrie_t *mtrie0;
360 ip4_fib_mtrie_leaf_t leaf0;
361 ip4_address_t *dst_addr0;
362 __attribute__ ((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
363 flow_hash_config_t flow_hash_config0;
364 const dpo_id_t *dpo0;
370 p0 = vlib_get_buffer (vm, pi0);
372 ip0 = vlib_buffer_get_current (p0);
374 dst_addr0 = &ip0->dst_address;
377 vec_elt (im->fib_index_by_sw_if_index,
378 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
380 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
381 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
383 if (!lookup_for_responses_to_locally_received_packets)
385 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
387 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
389 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
392 tcp0 = (void *) (ip0 + 1);
394 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
395 || ip0->protocol == IP_PROTOCOL_UDP);
397 if (!lookup_for_responses_to_locally_received_packets)
398 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
400 if (!lookup_for_responses_to_locally_received_packets)
401 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
403 if (!lookup_for_responses_to_locally_received_packets)
404 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
406 if (lookup_for_responses_to_locally_received_packets)
407 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
410 /* Handle default route. */
413 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
414 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
417 lb0 = load_balance_get (lbi0);
419 /* Use flow hash to compute multipath adjacency. */
420 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
421 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
423 flow_hash_config0 = lb0->lb_hash_config;
425 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
426 ip4_compute_flow_hash (ip0, flow_hash_config0);
429 ASSERT (lb0->lb_n_buckets > 0);
430 ASSERT (is_pow2 (lb0->lb_n_buckets));
432 dpo0 = load_balance_get_bucket_i (lb0,
434 (lb0->lb_n_buckets_minus_1)));
436 next0 = dpo0->dpoi_next_node;
437 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
439 vlib_increment_combined_counter
440 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
447 if (PREDICT_FALSE (next0 != next))
450 vlib_put_next_frame (vm, node, next, n_left_to_next);
452 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
459 vlib_put_next_frame (vm, node, next, n_left_to_next);
462 if (node->flags & VLIB_NODE_FLAG_TRACE)
463 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
465 return frame->n_vectors;
468 /** @brief IPv4 lookup node.
471 This is the main IPv4 lookup dispatch node.
473 @param vm vlib_main_t corresponding to the current thread
474 @param node vlib_node_runtime_t
475 @param frame vlib_frame_t whose contents should be dispatched
477 @par Graph mechanics: buffer metadata, next index usage
480 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
481 - Indicates the @c sw_if_index value of the interface that the
482 packet was received on.
483 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
484 - When the value is @c ~0 then the node performs a longest prefix
485 match (LPM) for the packet destination address in the FIB attached
486 to the receive interface.
487 - Otherwise perform LPM for the packet destination address in the
488 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
489 value (0, 1, ...) and not a VRF id.
492 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
493 - The lookup result adjacency index.
496 - Dispatches the packet to the node index found in
497 ip_adjacency_t @c adj->lookup_next_index
498 (where @c adj is the lookup result adjacency).
501 ip4_lookup (vlib_main_t * vm,
502 vlib_node_runtime_t * node, vlib_frame_t * frame)
504 return ip4_lookup_inline (vm, node, frame,
505 /* lookup_for_responses_to_locally_received_packets */
510 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
512 VLIB_REGISTER_NODE (ip4_lookup_node) =
514 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
515 sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
516 IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
518 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
521 ip4_load_balance (vlib_main_t * vm,
522 vlib_node_runtime_t * node, vlib_frame_t * frame)
524 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
525 u32 n_left_from, n_left_to_next, *from, *to_next;
526 ip_lookup_next_t next;
527 u32 cpu_index = os_get_cpu_number ();
529 from = vlib_frame_vector_args (frame);
530 n_left_from = frame->n_vectors;
531 next = node->cached_next_index;
533 if (node->flags & VLIB_NODE_FLAG_TRACE)
534 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
536 while (n_left_from > 0)
538 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
541 while (n_left_from >= 4 && n_left_to_next >= 2)
543 ip_lookup_next_t next0, next1;
544 const load_balance_t *lb0, *lb1;
545 vlib_buffer_t *p0, *p1;
546 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
547 const ip4_header_t *ip0, *ip1;
548 const dpo_id_t *dpo0, *dpo1;
550 /* Prefetch next iteration. */
552 vlib_buffer_t *p2, *p3;
554 p2 = vlib_get_buffer (vm, from[2]);
555 p3 = vlib_get_buffer (vm, from[3]);
557 vlib_prefetch_buffer_header (p2, STORE);
558 vlib_prefetch_buffer_header (p3, STORE);
560 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
561 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
564 pi0 = to_next[0] = from[0];
565 pi1 = to_next[1] = from[1];
572 p0 = vlib_get_buffer (vm, pi0);
573 p1 = vlib_get_buffer (vm, pi1);
575 ip0 = vlib_buffer_get_current (p0);
576 ip1 = vlib_buffer_get_current (p1);
577 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
578 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
580 lb0 = load_balance_get (lbi0);
581 lb1 = load_balance_get (lbi1);
584 * this node is for via FIBs we can re-use the hash value from the
585 * to node if present.
586 * We don't want to use the same hash value at each level in the recursion
587 * graph as that would lead to polarisation
591 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
593 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
595 hc0 = vnet_buffer (p0)->ip.flow_hash =
596 vnet_buffer (p0)->ip.flow_hash >> 1;
600 hc0 = vnet_buffer (p0)->ip.flow_hash =
601 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
604 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
606 if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
608 hc1 = vnet_buffer (p1)->ip.flow_hash =
609 vnet_buffer (p1)->ip.flow_hash >> 1;
613 hc1 = vnet_buffer (p1)->ip.flow_hash =
614 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
619 load_balance_get_bucket_i (lb0,
620 hc0 & (lb0->lb_n_buckets_minus_1));
622 load_balance_get_bucket_i (lb1,
623 hc1 & (lb1->lb_n_buckets_minus_1));
625 next0 = dpo0->dpoi_next_node;
626 next1 = dpo1->dpoi_next_node;
628 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
629 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
631 vlib_increment_combined_counter
632 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
633 vlib_increment_combined_counter
634 (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
636 vlib_validate_buffer_enqueue_x2 (vm, node, next,
637 to_next, n_left_to_next,
638 pi0, pi1, next0, next1);
641 while (n_left_from > 0 && n_left_to_next > 0)
643 ip_lookup_next_t next0;
644 const load_balance_t *lb0;
647 const ip4_header_t *ip0;
648 const dpo_id_t *dpo0;
657 p0 = vlib_get_buffer (vm, pi0);
659 ip0 = vlib_buffer_get_current (p0);
660 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
662 lb0 = load_balance_get (lbi0);
665 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
667 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
669 hc0 = vnet_buffer (p0)->ip.flow_hash =
670 vnet_buffer (p0)->ip.flow_hash >> 1;
674 hc0 = vnet_buffer (p0)->ip.flow_hash =
675 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
680 load_balance_get_bucket_i (lb0,
681 hc0 & (lb0->lb_n_buckets_minus_1));
683 next0 = dpo0->dpoi_next_node;
684 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
686 vlib_increment_combined_counter
687 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
689 vlib_validate_buffer_enqueue_x1 (vm, node, next,
690 to_next, n_left_to_next,
694 vlib_put_next_frame (vm, node, next, n_left_to_next);
697 return frame->n_vectors;
700 VLIB_REGISTER_NODE (ip4_load_balance_node) =
702 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
703 sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
704 format_ip4_lookup_trace,};
706 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
708 /* get first interface address */
710 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
711 ip_interface_address_t ** result_ia)
713 ip_lookup_main_t *lm = &im->lookup_main;
714 ip_interface_address_t *ia = 0;
715 ip4_address_t *result = 0;
718 foreach_ip_interface_address
719 (lm, ia, sw_if_index,
720 1 /* honor unnumbered */ ,
723 ip_interface_address_get_address (lm, ia);
729 *result_ia = result ? ia : 0;
734 ip4_add_interface_routes (u32 sw_if_index,
735 ip4_main_t * im, u32 fib_index,
736 ip_interface_address_t * a)
738 ip_lookup_main_t *lm = &im->lookup_main;
739 ip4_address_t *address = ip_interface_address_get_address (lm, a);
741 .fp_len = a->address_length,
742 .fp_proto = FIB_PROTOCOL_IP4,
743 .fp_addr.ip4 = *address,
746 a->neighbor_probe_adj_index = ~0;
750 fib_node_index_t fei;
752 fei = fib_table_entry_update_one_path (fib_index, &pfx,
753 FIB_SOURCE_INTERFACE,
754 (FIB_ENTRY_FLAG_CONNECTED |
755 FIB_ENTRY_FLAG_ATTACHED),
757 /* No next-hop address */
763 // no out-label stack
765 FIB_ROUTE_PATH_FLAG_NONE);
766 a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
771 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
773 u32 classify_table_index =
774 lm->classify_table_index_by_sw_if_index[sw_if_index];
775 if (classify_table_index != (u32) ~ 0)
777 dpo_id_t dpo = DPO_INVALID;
782 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
784 fib_table_entry_special_dpo_add (fib_index,
787 FIB_ENTRY_FLAG_NONE, &dpo);
792 fib_table_entry_update_one_path (fib_index, &pfx,
793 FIB_SOURCE_INTERFACE,
794 (FIB_ENTRY_FLAG_CONNECTED |
795 FIB_ENTRY_FLAG_LOCAL),
802 FIB_ROUTE_PATH_FLAG_NONE);
806 ip4_del_interface_routes (ip4_main_t * im,
808 ip4_address_t * address, u32 address_length)
811 .fp_len = address_length,
812 .fp_proto = FIB_PROTOCOL_IP4,
813 .fp_addr.ip4 = *address,
818 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
822 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
826 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
828 ip4_main_t *im = &ip4_main;
830 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
833 * enable/disable only on the 1<->0 transition
837 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
842 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
843 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
846 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
850 vnet_feature_enable_disable ("ip4-multicast",
851 "ip4-mfib-forward-lookup",
852 sw_if_index, is_enable, 0, 0);
855 static clib_error_t *
856 ip4_add_del_interface_address_internal (vlib_main_t * vm,
858 ip4_address_t * address,
859 u32 address_length, u32 is_del)
861 vnet_main_t *vnm = vnet_get_main ();
862 ip4_main_t *im = &ip4_main;
863 ip_lookup_main_t *lm = &im->lookup_main;
864 clib_error_t *error = 0;
865 u32 if_address_index, elts_before;
866 ip4_address_fib_t ip4_af, *addr_fib = 0;
868 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
869 ip4_addr_fib_init (&ip4_af, address,
870 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
871 vec_add1 (addr_fib, ip4_af);
874 * there is no support for adj-fib handling in the presence of overlapping
875 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
881 /* When adding an address check that it does not conflict
882 with an existing address. */
883 ip_interface_address_t *ia;
884 foreach_ip_interface_address
885 (&im->lookup_main, ia, sw_if_index,
886 0 /* honor unnumbered */ ,
889 ip_interface_address_get_address
890 (&im->lookup_main, ia);
891 if (ip4_destination_matches_route
892 (im, address, x, ia->address_length) ||
893 ip4_destination_matches_route (im,
899 ("failed to add %U which conflicts with %U for interface %U",
900 format_ip4_address_and_length, address,
902 format_ip4_address_and_length, x,
904 format_vnet_sw_if_index_name, vnm,
910 elts_before = pool_elts (lm->if_address_pool);
912 error = ip_interface_address_add_del
913 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
917 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
920 ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
922 ip4_add_interface_routes (sw_if_index,
923 im, ip4_af.fib_index,
925 (lm->if_address_pool, if_address_index));
927 /* If pool did not grow/shrink: add duplicate address. */
928 if (elts_before != pool_elts (lm->if_address_pool))
930 ip4_add_del_interface_address_callback_t *cb;
931 vec_foreach (cb, im->add_del_interface_address_callbacks)
932 cb->function (im, cb->function_opaque, sw_if_index,
933 address, address_length, if_address_index, is_del);
942 ip4_add_del_interface_address (vlib_main_t * vm,
944 ip4_address_t * address,
945 u32 address_length, u32 is_del)
947 return ip4_add_del_interface_address_internal
948 (vm, sw_if_index, address, address_length, is_del);
951 /* Built-in ip4 unicast rx feature path definition */
953 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
955 .arc_name = "ip4-unicast",
956 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
957 .end_node = "ip4-lookup",
958 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
961 VNET_FEATURE_INIT (ip4_flow_classify, static) =
963 .arc_name = "ip4-unicast",
964 .node_name = "ip4-flow-classify",
965 .runs_before = VNET_FEATURES ("ip4-inacl"),
968 VNET_FEATURE_INIT (ip4_inacl, static) =
970 .arc_name = "ip4-unicast",
971 .node_name = "ip4-inacl",
972 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
975 VNET_FEATURE_INIT (ip4_source_check_1, static) =
977 .arc_name = "ip4-unicast",
978 .node_name = "ip4-source-check-via-rx",
979 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
982 VNET_FEATURE_INIT (ip4_source_check_2, static) =
984 .arc_name = "ip4-unicast",
985 .node_name = "ip4-source-check-via-any",
986 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
989 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
991 .arc_name = "ip4-unicast",
992 .node_name = "ip4-source-and-port-range-check-rx",
993 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
996 VNET_FEATURE_INIT (ip4_policer_classify, static) =
998 .arc_name = "ip4-unicast",
999 .node_name = "ip4-policer-classify",
1000 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1003 VNET_FEATURE_INIT (ip4_ipsec, static) =
1005 .arc_name = "ip4-unicast",
1006 .node_name = "ipsec-input-ip4",
1007 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1010 VNET_FEATURE_INIT (ip4_vpath, static) =
1012 .arc_name = "ip4-unicast",
1013 .node_name = "vpath-input-ip4",
1014 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1017 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1019 .arc_name = "ip4-unicast",
1020 .node_name = "ip4-vxlan-bypass",
1021 .runs_before = VNET_FEATURES ("ip4-lookup"),
1024 VNET_FEATURE_INIT (ip4_lookup, static) =
1026 .arc_name = "ip4-unicast",
1027 .node_name = "ip4-lookup",
1028 .runs_before = VNET_FEATURES ("ip4-drop"),
1031 VNET_FEATURE_INIT (ip4_drop, static) =
1033 .arc_name = "ip4-unicast",
1034 .node_name = "ip4-drop",
1035 .runs_before = 0, /* not before any other features */
1039 /* Built-in ip4 multicast rx feature path definition */
1040 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1042 .arc_name = "ip4-multicast",
1043 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1044 .end_node = "ip4-lookup-multicast",
1045 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1048 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1050 .arc_name = "ip4-multicast",
1051 .node_name = "vpath-input-ip4",
1052 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1055 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1057 .arc_name = "ip4-multicast",
1058 .node_name = "ip4-mfib-forward-lookup",
1059 .runs_before = VNET_FEATURES ("ip4-drop"),
1062 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1064 .arc_name = "ip4-multicast",
1065 .node_name = "ip4-drop",
1066 .runs_before = 0, /* last feature */
1069 /* Source and port-range check ip4 tx feature path definition */
1070 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1072 .arc_name = "ip4-output",
1073 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1074 .end_node = "interface-output",
1075 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1078 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1080 .arc_name = "ip4-output",
1081 .node_name = "ip4-source-and-port-range-check-tx",
1082 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1085 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1087 .arc_name = "ip4-output",
1088 .node_name = "ipsec-output-ip4",
1089 .runs_before = VNET_FEATURES ("interface-output"),
1092 /* Built-in ip4 tx feature path definition */
1093 VNET_FEATURE_INIT (ip4_interface_output, static) =
1095 .arc_name = "ip4-output",
1096 .node_name = "interface-output",
1097 .runs_before = 0, /* not before any other features */
1101 static clib_error_t *
1102 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1104 ip4_main_t *im = &ip4_main;
1106 /* Fill in lookup tables with default table (0). */
1107 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1108 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1110 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1113 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1116 return /* no error */ 0;
1119 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1121 /* Global IP4 main. */
1122 ip4_main_t ip4_main;
1125 ip4_lookup_init (vlib_main_t * vm)
1127 ip4_main_t *im = &ip4_main;
1128 clib_error_t *error;
1131 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1134 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1139 m = pow2_mask (i) << (32 - i);
1142 im->fib_masks[i] = clib_host_to_net_u32 (m);
1145 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1147 /* Create FIB with index 0 and table id of 0. */
1148 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1149 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1153 pn = pg_get_node (ip4_lookup_node.index);
1154 pn->unformat_edit = unformat_pg_ip4_header;
1158 ethernet_arp_header_t h;
1160 memset (&h, 0, sizeof (h));
1162 /* Set target ethernet address to all zeros. */
1163 memset (h.ip4_over_ethernet[1].ethernet, 0,
1164 sizeof (h.ip4_over_ethernet[1].ethernet));
1166 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1167 #define _8(f,v) h.f = v;
1168 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1169 _16 (l3_type, ETHERNET_TYPE_IP4);
1170 _8 (n_l2_address_bytes, 6);
1171 _8 (n_l3_address_bytes, 4);
1172 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1176 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1179 /* alloc chunk size */ 8,
1186 VLIB_INIT_FUNCTION (ip4_lookup_init);
1190 /* Adjacency taken. */
1195 /* Packet data, possibly *after* rewrite. */
1196 u8 packet_data[64 - 1 * sizeof (u32)];
1198 ip4_forward_next_trace_t;
1201 format_ip4_forward_next_trace (u8 * s, va_list * args)
1203 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1204 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1205 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1206 uword indent = format_get_indent (s);
1207 s = format (s, "%U%U",
1208 format_white_space, indent,
1209 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1214 format_ip4_lookup_trace (u8 * s, va_list * args)
1216 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1217 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1218 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1219 uword indent = format_get_indent (s);
1221 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1222 t->fib_index, t->dpo_index, t->flow_hash);
1223 s = format (s, "\n%U%U",
1224 format_white_space, indent,
1225 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1230 format_ip4_rewrite_trace (u8 * s, va_list * args)
1232 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1233 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1234 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1235 uword indent = format_get_indent (s);
1237 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1238 t->fib_index, t->dpo_index, format_ip_adjacency,
1239 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1240 s = format (s, "\n%U%U",
1241 format_white_space, indent,
1242 format_ip_adjacency_packet_data,
1243 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1247 /* Common trace function for all ip4-forward next nodes. */
1249 ip4_forward_next_trace (vlib_main_t * vm,
1250 vlib_node_runtime_t * node,
1251 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1254 ip4_main_t *im = &ip4_main;
1256 n_left = frame->n_vectors;
1257 from = vlib_frame_vector_args (frame);
1262 vlib_buffer_t *b0, *b1;
1263 ip4_forward_next_trace_t *t0, *t1;
1265 /* Prefetch next iteration. */
1266 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1267 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1272 b0 = vlib_get_buffer (vm, bi0);
1273 b1 = vlib_get_buffer (vm, bi1);
1275 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1277 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1278 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1279 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1281 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1282 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1283 vec_elt (im->fib_index_by_sw_if_index,
1284 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1286 clib_memcpy (t0->packet_data,
1287 vlib_buffer_get_current (b0),
1288 sizeof (t0->packet_data));
1290 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1292 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1293 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1294 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1296 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1297 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1298 vec_elt (im->fib_index_by_sw_if_index,
1299 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1300 clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1301 sizeof (t1->packet_data));
1311 ip4_forward_next_trace_t *t0;
1315 b0 = vlib_get_buffer (vm, bi0);
1317 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1319 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1320 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1321 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1323 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1324 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1325 vec_elt (im->fib_index_by_sw_if_index,
1326 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1327 clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1328 sizeof (t0->packet_data));
1336 ip4_drop_or_punt (vlib_main_t * vm,
1337 vlib_node_runtime_t * node,
1338 vlib_frame_t * frame, ip4_error_t error_code)
1340 u32 *buffers = vlib_frame_vector_args (frame);
1341 uword n_packets = frame->n_vectors;
1343 vlib_error_drop_buffers (vm, node, buffers,
1347 ip4_input_node.index, error_code);
1349 if (node->flags & VLIB_NODE_FLAG_TRACE)
1350 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1356 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1358 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1362 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1364 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1368 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1370 .function = ip4_drop,.
1372 .vector_size = sizeof (u32),
1373 .format_trace = format_ip4_forward_next_trace,
1380 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1382 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1384 .function = ip4_punt,
1386 .vector_size = sizeof (u32),
1387 .format_trace = format_ip4_forward_next_trace,
1394 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1397 /* Compute TCP/UDP/ICMP4 checksum in software. */
1399 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1403 u32 ip_header_length, payload_length_host_byte_order;
1404 u32 n_this_buffer, n_bytes_left;
1406 void *data_this_buffer;
1408 /* Initialize checksum with ip header. */
1409 ip_header_length = ip4_header_bytes (ip0);
1410 payload_length_host_byte_order =
1411 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1413 clib_host_to_net_u32 (payload_length_host_byte_order +
1414 (ip0->protocol << 16));
1416 if (BITS (uword) == 32)
1419 ip_csum_with_carry (sum0,
1420 clib_mem_unaligned (&ip0->src_address, u32));
1422 ip_csum_with_carry (sum0,
1423 clib_mem_unaligned (&ip0->dst_address, u32));
1427 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1429 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1430 data_this_buffer = (void *) ip0 + ip_header_length;
1431 if (n_this_buffer + ip_header_length > p0->current_length)
1433 p0->current_length >
1434 ip_header_length ? p0->current_length - ip_header_length : 0;
1437 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1438 n_bytes_left -= n_this_buffer;
1439 if (n_bytes_left == 0)
1442 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1443 p0 = vlib_get_buffer (vm, p0->next_buffer);
1444 data_this_buffer = vlib_buffer_get_current (p0);
1445 n_this_buffer = p0->current_length;
1448 sum16 = ~ip_csum_fold (sum0);
1454 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1456 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1460 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1461 || ip0->protocol == IP_PROTOCOL_UDP);
1463 udp0 = (void *) (ip0 + 1);
1464 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1466 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1467 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1471 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1473 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1474 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1480 VNET_FEATURE_ARC_INIT (ip4_local) =
1482 .arc_name = "ip4-local",
1483 .start_nodes = VNET_FEATURES ("ip4-local"),
1488 ip4_local_inline (vlib_main_t * vm,
1489 vlib_node_runtime_t * node,
1490 vlib_frame_t * frame, int head_of_feature_arc)
1492 ip4_main_t *im = &ip4_main;
1493 ip_lookup_main_t *lm = &im->lookup_main;
1494 ip_local_next_t next_index;
1495 u32 *from, *to_next, n_left_from, n_left_to_next;
1496 vlib_node_runtime_t *error_node =
1497 vlib_node_get_runtime (vm, ip4_input_node.index);
1498 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1500 from = vlib_frame_vector_args (frame);
1501 n_left_from = frame->n_vectors;
1502 next_index = node->cached_next_index;
1504 if (node->flags & VLIB_NODE_FLAG_TRACE)
1505 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1507 while (n_left_from > 0)
1509 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1511 while (n_left_from >= 4 && n_left_to_next >= 2)
1513 vlib_buffer_t *p0, *p1;
1514 ip4_header_t *ip0, *ip1;
1515 udp_header_t *udp0, *udp1;
1516 ip4_fib_mtrie_t *mtrie0, *mtrie1;
1517 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1518 const dpo_id_t *dpo0, *dpo1;
1519 const load_balance_t *lb0, *lb1;
1520 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1521 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1522 i32 len_diff0, len_diff1;
1523 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1524 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1525 u32 sw_if_index0, sw_if_index1;
1527 pi0 = to_next[0] = from[0];
1528 pi1 = to_next[1] = from[1];
1532 n_left_to_next -= 2;
1534 next0 = next1 = IP_LOCAL_NEXT_DROP;
1536 p0 = vlib_get_buffer (vm, pi0);
1537 p1 = vlib_get_buffer (vm, pi1);
1539 ip0 = vlib_buffer_get_current (p0);
1540 ip1 = vlib_buffer_get_current (p1);
1542 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1543 vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1545 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1546 sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1548 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1549 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1551 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1553 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1554 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1556 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1558 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1559 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1561 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1562 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1564 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1567 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1569 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1571 /* Treat IP frag packets as "experimental" protocol for now
1572 until support of IP frag reassembly is implemented */
1573 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1574 proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1576 if (head_of_feature_arc == 0)
1578 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1582 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1583 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1584 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1585 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1590 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1591 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1593 udp0 = ip4_next_header (ip0);
1594 udp1 = ip4_next_header (ip1);
1596 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1597 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1598 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1601 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1603 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1605 /* Verify UDP length. */
1606 ip_len0 = clib_net_to_host_u16 (ip0->length);
1607 ip_len1 = clib_net_to_host_u16 (ip1->length);
1608 udp_len0 = clib_net_to_host_u16 (udp0->length);
1609 udp_len1 = clib_net_to_host_u16 (udp1->length);
1611 len_diff0 = ip_len0 - udp_len0;
1612 len_diff1 = ip_len1 - udp_len1;
1614 len_diff0 = is_udp0 ? len_diff0 : 0;
1615 len_diff1 = is_udp1 ? len_diff1 : 0;
1617 if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1618 & good_tcp_udp0 & good_tcp_udp1)))
1623 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1624 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1626 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1627 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1632 && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1633 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1635 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1636 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1640 good_tcp_udp0 &= len_diff0 >= 0;
1641 good_tcp_udp1 &= len_diff1 >= 0;
1644 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1646 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1648 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1650 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1651 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1653 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1654 error0 = (is_tcp_udp0 && !good_tcp_udp0
1655 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1656 error1 = (is_tcp_udp1 && !good_tcp_udp1
1657 ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1660 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1662 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1665 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1668 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1670 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1671 ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1672 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1674 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1675 ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1676 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1678 lb0 = load_balance_get (lbi0);
1679 lb1 = load_balance_get (lbi1);
1680 dpo0 = load_balance_get_bucket_i (lb0, 0);
1681 dpo1 = load_balance_get_bucket_i (lb1, 0);
1684 * Must have a route to source otherwise we drop the packet.
1685 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1688 * - the source is a recieve => it's from us => bogus, do this
1689 * first since it sets a different error code.
1690 * - uRPF check for any route to source - accept if passes.
1691 * - allow packets destined to the broadcast address from unknown sources
1693 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1694 dpo0->dpoi_type == DPO_RECEIVE) ?
1695 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1696 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1697 !fib_urpf_check_size (lb0->lb_urpf) &&
1698 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1699 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1700 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1701 dpo1->dpoi_type == DPO_RECEIVE) ?
1702 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1703 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1704 !fib_urpf_check_size (lb1->lb_urpf) &&
1705 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1706 ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1710 next0 = lm->local_next_by_ip_protocol[proto0];
1711 next1 = lm->local_next_by_ip_protocol[proto1];
1714 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1716 error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1718 p0->error = error0 ? error_node->errors[error0] : 0;
1719 p1->error = error1 ? error_node->errors[error1] : 0;
1721 if (head_of_feature_arc)
1723 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1724 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1725 if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1726 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1729 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1730 n_left_to_next, pi0, pi1,
1734 while (n_left_from > 0 && n_left_to_next > 0)
1739 ip4_fib_mtrie_t *mtrie0;
1740 ip4_fib_mtrie_leaf_t leaf0;
1741 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1743 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1744 load_balance_t *lb0;
1745 const dpo_id_t *dpo0;
1748 pi0 = to_next[0] = from[0];
1752 n_left_to_next -= 1;
1754 next0 = IP_LOCAL_NEXT_DROP;
1756 p0 = vlib_get_buffer (vm, pi0);
1758 ip0 = vlib_buffer_get_current (p0);
1760 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1762 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1764 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1767 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1768 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1770 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1772 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1775 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1777 /* Treat IP frag packets as "experimental" protocol for now
1778 until support of IP frag reassembly is implemented */
1779 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1781 if (head_of_feature_arc == 0)
1783 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1787 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1788 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1792 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1794 udp0 = ip4_next_header (ip0);
1796 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1797 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1800 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1802 /* Verify UDP length. */
1803 ip_len0 = clib_net_to_host_u16 (ip0->length);
1804 udp_len0 = clib_net_to_host_u16 (udp0->length);
1806 len_diff0 = ip_len0 - udp_len0;
1808 len_diff0 = is_udp0 ? len_diff0 : 0;
1810 if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1815 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1816 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1818 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1819 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1823 good_tcp_udp0 &= len_diff0 >= 0;
1826 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1828 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1830 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1832 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1833 error0 = (is_tcp_udp0 && !good_tcp_udp0
1834 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1837 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1840 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1842 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1843 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1845 lb0 = load_balance_get (lbi0);
1846 dpo0 = load_balance_get_bucket_i (lb0, 0);
1848 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1849 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1851 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1852 dpo0->dpoi_type == DPO_RECEIVE) ?
1853 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1854 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1855 !fib_urpf_check_size (lb0->lb_urpf) &&
1856 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1857 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1861 next0 = lm->local_next_by_ip_protocol[proto0];
1864 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1866 p0->error = error0 ? error_node->errors[error0] : 0;
1868 if (head_of_feature_arc)
1870 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1871 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1874 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1875 n_left_to_next, pi0, next0);
1879 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1882 return frame->n_vectors;
1886 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1888 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1892 VLIB_REGISTER_NODE (ip4_local_node) =
1894 .function = ip4_local,
1895 .name = "ip4-local",
1896 .vector_size = sizeof (u32),
1897 .format_trace = format_ip4_forward_next_trace,
1898 .n_next_nodes = IP_LOCAL_N_NEXT,
1901 [IP_LOCAL_NEXT_DROP] = "error-drop",
1902 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1903 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1904 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",},
1908 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1911 ip4_local_end_of_arc (vlib_main_t * vm,
1912 vlib_node_runtime_t * node, vlib_frame_t * frame)
1914 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1918 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1919 .function = ip4_local_end_of_arc,
1920 .name = "ip4-local-end-of-arc",
1921 .vector_size = sizeof (u32),
1923 .format_trace = format_ip4_forward_next_trace,
1924 .sibling_of = "ip4-local",
1927 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1929 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1930 .arc_name = "ip4-local",
1931 .node_name = "ip4-local-end-of-arc",
1932 .runs_before = 0, /* not before any other features */
1937 ip4_register_protocol (u32 protocol, u32 node_index)
1939 vlib_main_t *vm = vlib_get_main ();
1940 ip4_main_t *im = &ip4_main;
1941 ip_lookup_main_t *lm = &im->lookup_main;
1943 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1944 lm->local_next_by_ip_protocol[protocol] =
1945 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1948 static clib_error_t *
1949 show_ip_local_command_fn (vlib_main_t * vm,
1950 unformat_input_t * input, vlib_cli_command_t * cmd)
1952 ip4_main_t *im = &ip4_main;
1953 ip_lookup_main_t *lm = &im->lookup_main;
1956 vlib_cli_output (vm, "Protocols handled by ip4_local");
1957 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1959 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1960 vlib_cli_output (vm, "%d", i);
1968 * Display the set of protocols handled by the local IPv4 stack.
1971 * Example of how to display local protocol table:
1972 * @cliexstart{show ip local}
1973 * Protocols handled by ip4_local
1980 VLIB_CLI_COMMAND (show_ip_local, static) =
1982 .path = "show ip local",
1983 .function = show_ip_local_command_fn,
1984 .short_help = "show ip local",
1989 ip4_arp_inline (vlib_main_t * vm,
1990 vlib_node_runtime_t * node,
1991 vlib_frame_t * frame, int is_glean)
1993 vnet_main_t *vnm = vnet_get_main ();
1994 ip4_main_t *im = &ip4_main;
1995 ip_lookup_main_t *lm = &im->lookup_main;
1996 u32 *from, *to_next_drop;
1997 uword n_left_from, n_left_to_next_drop, next_index;
1998 static f64 time_last_seed_change = -1e100;
1999 static u32 hash_seeds[3];
2000 static uword hash_bitmap[256 / BITS (uword)];
2003 if (node->flags & VLIB_NODE_FLAG_TRACE)
2004 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2006 time_now = vlib_time_now (vm);
2007 if (time_now - time_last_seed_change > 1e-3)
2010 u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
2011 sizeof (hash_seeds));
2012 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2013 hash_seeds[i] = r[i];
2015 /* Mark all hash keys as been no-seen before. */
2016 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2019 time_last_seed_change = time_now;
2022 from = vlib_frame_vector_args (frame);
2023 n_left_from = frame->n_vectors;
2024 next_index = node->cached_next_index;
2025 if (next_index == IP4_ARP_NEXT_DROP)
2026 next_index = IP4_ARP_N_NEXT; /* point to first interface */
2028 while (n_left_from > 0)
2030 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2031 to_next_drop, n_left_to_next_drop);
2033 while (n_left_from > 0 && n_left_to_next_drop > 0)
2035 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2036 ip_adjacency_t *adj0;
2043 p0 = vlib_get_buffer (vm, pi0);
2045 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2046 adj0 = ip_get_adjacency (lm, adj_index0);
2047 ip0 = vlib_buffer_get_current (p0);
2053 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2054 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2059 * this is the Glean case, so we are ARPing for the
2060 * packet's destination
2062 a0 ^= ip0->dst_address.data_u32;
2066 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2070 hash_v3_finalize32 (a0, b0, c0);
2072 c0 &= BITS (hash_bitmap) - 1;
2073 c0 = c0 / BITS (uword);
2074 m0 = (uword) 1 << (c0 % BITS (uword));
2076 bm0 = hash_bitmap[c0];
2077 drop0 = (bm0 & m0) != 0;
2079 /* Mark it as seen. */
2080 hash_bitmap[c0] = bm0 | m0;
2084 to_next_drop[0] = pi0;
2086 n_left_to_next_drop -= 1;
2089 node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2090 IP4_ARP_ERROR_REQUEST_SENT];
2093 * the adj has been updated to a rewrite but the node the DPO that got
2094 * us here hasn't - yet. no big deal. we'll drop while we wait.
2096 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2103 * Can happen if the control-plane is programming tables
2104 * with traffic flowing; at least that's today's lame excuse.
2106 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2107 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2109 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2112 /* Send ARP request. */
2116 ethernet_arp_header_t *h0;
2117 vnet_hw_interface_t *hw_if0;
2120 vlib_packet_template_get_packet (vm,
2121 &im->ip4_arp_request_packet_template,
2124 /* Add rewrite/encap string for ARP packet. */
2125 vnet_rewrite_one_header (adj0[0], h0,
2126 sizeof (ethernet_header_t));
2128 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2130 /* Src ethernet address in ARP header. */
2131 clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2133 sizeof (h0->ip4_over_ethernet[0].ethernet));
2137 /* The interface's source address is stashed in the Glean Adj */
2138 h0->ip4_over_ethernet[0].ip4 =
2139 adj0->sub_type.glean.receive_addr.ip4;
2141 /* Copy in destination address we are requesting. This is the
2142 * glean case, so it's the packet's destination.*/
2143 h0->ip4_over_ethernet[1].ip4.data_u32 =
2144 ip0->dst_address.data_u32;
2148 /* Src IP address in ARP header. */
2149 if (ip4_src_address_for_packet (lm, sw_if_index0,
2151 ip4_over_ethernet[0].ip4))
2153 /* No source address available */
2155 node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2156 vlib_buffer_free (vm, &bi0, 1);
2160 /* Copy in destination address we are requesting from the
2162 h0->ip4_over_ethernet[1].ip4.data_u32 =
2163 adj0->sub_type.nbr.next_hop.ip4.as_u32;
2166 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2167 b0 = vlib_get_buffer (vm, bi0);
2168 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2170 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2172 vlib_set_next_frame_buffer (vm, node,
2173 adj0->rewrite_header.next_index,
2178 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2181 return frame->n_vectors;
2185 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2187 return (ip4_arp_inline (vm, node, frame, 0));
2191 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2193 return (ip4_arp_inline (vm, node, frame, 1));
2196 static char *ip4_arp_error_strings[] = {
2197 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2198 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2199 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2200 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2201 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2202 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2205 VLIB_REGISTER_NODE (ip4_arp_node) =
2207 .function = ip4_arp,.name = "ip4-arp",.vector_size =
2208 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2209 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2210 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2212 [IP4_ARP_NEXT_DROP] = "error-drop",}
2215 VLIB_REGISTER_NODE (ip4_glean_node) =
2217 .function = ip4_glean,.name = "ip4-glean",.vector_size =
2218 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2219 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2220 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2222 [IP4_ARP_NEXT_DROP] = "error-drop",}
2225 #define foreach_notrace_ip4_arp_error \
2232 arp_notrace_init (vlib_main_t * vm)
2234 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2236 /* don't trace ARP request packets */
2238 vnet_pcap_drop_trace_filter_add_del \
2239 (rt->errors[IP4_ARP_ERROR_##a], \
2241 foreach_notrace_ip4_arp_error;
2246 VLIB_INIT_FUNCTION (arp_notrace_init);
2249 /* Send an ARP request to see if given destination is reachable on given interface. */
2251 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2253 vnet_main_t *vnm = vnet_get_main ();
2254 ip4_main_t *im = &ip4_main;
2255 ethernet_arp_header_t *h;
2257 ip_interface_address_t *ia;
2258 ip_adjacency_t *adj;
2259 vnet_hw_interface_t *hi;
2260 vnet_sw_interface_t *si;
2264 si = vnet_get_sw_interface (vnm, sw_if_index);
2266 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2268 return clib_error_return (0, "%U: interface %U down",
2269 format_ip4_address, dst,
2270 format_vnet_sw_if_index_name, vnm,
2275 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2278 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2279 return clib_error_return
2281 "no matching interface address for destination %U (interface %U)",
2282 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2286 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2289 vlib_packet_template_get_packet (vm,
2290 &im->ip4_arp_request_packet_template,
2293 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2295 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2296 sizeof (h->ip4_over_ethernet[0].ethernet));
2298 h->ip4_over_ethernet[0].ip4 = src[0];
2299 h->ip4_over_ethernet[1].ip4 = dst[0];
2301 b = vlib_get_buffer (vm, bi);
2302 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2303 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2305 /* Add encapsulation string for software interface (e.g. ethernet header). */
2306 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2307 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2310 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2311 u32 *to_next = vlib_frame_vector_args (f);
2314 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2317 return /* no error */ 0;
2322 IP4_REWRITE_NEXT_DROP,
2323 IP4_REWRITE_NEXT_ICMP_ERROR,
2324 } ip4_rewrite_next_t;
2327 ip4_rewrite_inline (vlib_main_t * vm,
2328 vlib_node_runtime_t * node,
2329 vlib_frame_t * frame,
2330 int do_counters, int is_midchain, int is_mcast)
2332 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2333 u32 *from = vlib_frame_vector_args (frame);
2334 u32 n_left_from, n_left_to_next, *to_next, next_index;
2335 vlib_node_runtime_t *error_node =
2336 vlib_node_get_runtime (vm, ip4_input_node.index);
2338 n_left_from = frame->n_vectors;
2339 next_index = node->cached_next_index;
2340 u32 cpu_index = os_get_cpu_number ();
2342 while (n_left_from > 0)
2344 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2346 while (n_left_from >= 4 && n_left_to_next >= 2)
2348 ip_adjacency_t *adj0, *adj1;
2349 vlib_buffer_t *p0, *p1;
2350 ip4_header_t *ip0, *ip1;
2351 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2352 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2353 u32 tx_sw_if_index0, tx_sw_if_index1;
2355 /* Prefetch next iteration. */
2357 vlib_buffer_t *p2, *p3;
2359 p2 = vlib_get_buffer (vm, from[2]);
2360 p3 = vlib_get_buffer (vm, from[3]);
2362 vlib_prefetch_buffer_header (p2, STORE);
2363 vlib_prefetch_buffer_header (p3, STORE);
2365 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2366 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2369 pi0 = to_next[0] = from[0];
2370 pi1 = to_next[1] = from[1];
2375 n_left_to_next -= 2;
2377 p0 = vlib_get_buffer (vm, pi0);
2378 p1 = vlib_get_buffer (vm, pi1);
2380 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2381 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2383 /* We should never rewrite a pkt using the MISS adjacency */
2384 ASSERT (adj_index0 && adj_index1);
2386 ip0 = vlib_buffer_get_current (p0);
2387 ip1 = vlib_buffer_get_current (p1);
2389 error0 = error1 = IP4_ERROR_NONE;
2390 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2392 /* Decrement TTL & update checksum.
2393 Works either endian, so no need for byte swap. */
2394 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2396 i32 ttl0 = ip0->ttl;
2398 /* Input node should have reject packets with ttl 0. */
2399 ASSERT (ip0->ttl > 0);
2401 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2402 checksum0 += checksum0 >= 0xffff;
2404 ip0->checksum = checksum0;
2409 * If the ttl drops below 1 when forwarding, generate
2412 if (PREDICT_FALSE (ttl0 <= 0))
2414 error0 = IP4_ERROR_TIME_EXPIRED;
2415 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2416 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2417 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2419 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2422 /* Verify checksum. */
2423 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2427 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2429 if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2431 i32 ttl1 = ip1->ttl;
2433 /* Input node should have reject packets with ttl 0. */
2434 ASSERT (ip1->ttl > 0);
2436 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2437 checksum1 += checksum1 >= 0xffff;
2439 ip1->checksum = checksum1;
2444 * If the ttl drops below 1 when forwarding, generate
2447 if (PREDICT_FALSE (ttl1 <= 0))
2449 error1 = IP4_ERROR_TIME_EXPIRED;
2450 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2451 icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2452 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2454 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2457 /* Verify checksum. */
2458 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2459 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2463 p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2466 /* Rewrite packet header and updates lengths. */
2467 adj0 = ip_get_adjacency (lm, adj_index0);
2468 adj1 = ip_get_adjacency (lm, adj_index1);
2470 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2471 rw_len0 = adj0[0].rewrite_header.data_bytes;
2472 rw_len1 = adj1[0].rewrite_header.data_bytes;
2473 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2474 vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2476 /* Check MTU of outgoing interface. */
2478 (vlib_buffer_length_in_chain (vm, p0) >
2480 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2483 (vlib_buffer_length_in_chain (vm, p1) >
2485 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2489 * pre-fetch the per-adjacency counters
2493 vlib_prefetch_combined_counter (&adjacency_counters,
2494 cpu_index, adj_index0);
2495 vlib_prefetch_combined_counter (&adjacency_counters,
2496 cpu_index, adj_index1);
2499 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2500 * to see the IP headerr */
2501 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2503 next0 = adj0[0].rewrite_header.next_index;
2504 p0->current_data -= rw_len0;
2505 p0->current_length += rw_len0;
2506 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2507 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2510 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2511 vnet_feature_arc_start (lm->output_feature_arc_index,
2512 tx_sw_if_index0, &next0, p0);
2514 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2516 next1 = adj1[0].rewrite_header.next_index;
2517 p1->current_data -= rw_len1;
2518 p1->current_length += rw_len1;
2520 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2521 vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2524 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2525 vnet_feature_arc_start (lm->output_feature_arc_index,
2526 tx_sw_if_index1, &next1, p1);
2529 /* Guess we are only writing on simple Ethernet header. */
2530 vnet_rewrite_two_headers (adj0[0], adj1[0],
2531 ip0, ip1, sizeof (ethernet_header_t));
2534 * Bump the per-adjacency counters
2538 vlib_increment_combined_counter
2539 (&adjacency_counters,
2542 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2544 vlib_increment_combined_counter
2545 (&adjacency_counters,
2548 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2553 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2554 adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2559 * copy bytes from the IP address into the MAC rewrite
2561 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 1);
2562 vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1, 1);
2565 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2566 to_next, n_left_to_next,
2567 pi0, pi1, next0, next1);
2570 while (n_left_from > 0 && n_left_to_next > 0)
2572 ip_adjacency_t *adj0;
2575 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2576 u32 tx_sw_if_index0;
2578 pi0 = to_next[0] = from[0];
2580 p0 = vlib_get_buffer (vm, pi0);
2582 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2584 /* We should never rewrite a pkt using the MISS adjacency */
2585 ASSERT (adj_index0);
2587 adj0 = ip_get_adjacency (lm, adj_index0);
2589 ip0 = vlib_buffer_get_current (p0);
2591 error0 = IP4_ERROR_NONE;
2592 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2594 /* Decrement TTL & update checksum. */
2595 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2597 i32 ttl0 = ip0->ttl;
2599 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2601 checksum0 += checksum0 >= 0xffff;
2603 ip0->checksum = checksum0;
2605 ASSERT (ip0->ttl > 0);
2611 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2613 if (PREDICT_FALSE (ttl0 <= 0))
2616 * If the ttl drops below 1 when forwarding, generate
2619 error0 = IP4_ERROR_TIME_EXPIRED;
2620 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2621 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2622 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2623 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2629 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2632 vlib_prefetch_combined_counter (&adjacency_counters,
2633 cpu_index, adj_index0);
2635 /* Guess we are only writing on simple Ethernet header. */
2636 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2640 * copy bytes from the IP address into the MAC rewrite
2642 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 1);
2645 /* Update packet buffer attributes/set output interface. */
2646 rw_len0 = adj0[0].rewrite_header.data_bytes;
2647 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2649 vlib_increment_combined_counter
2650 (&adjacency_counters,
2652 adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2654 /* Check MTU of outgoing interface. */
2655 error0 = (vlib_buffer_length_in_chain (vm, p0)
2656 > adj0[0].rewrite_header.max_l3_packet_bytes
2657 ? IP4_ERROR_MTU_EXCEEDED : error0);
2659 p0->error = error_node->errors[error0];
2661 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2662 * to see the IP headerr */
2663 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2665 p0->current_data -= rw_len0;
2666 p0->current_length += rw_len0;
2667 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2669 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2670 next0 = adj0[0].rewrite_header.next_index;
2674 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2678 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2679 vnet_feature_arc_start (lm->output_feature_arc_index,
2680 tx_sw_if_index0, &next0, p0);
2687 n_left_to_next -= 1;
2689 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2690 to_next, n_left_to_next,
2694 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2697 /* Need to do trace after rewrites to pick up new packet data. */
2698 if (node->flags & VLIB_NODE_FLAG_TRACE)
2699 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2701 return frame->n_vectors;
2705 /** @brief IPv4 rewrite node.
2708 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2709 header checksum, fetch the ip adjacency, check the outbound mtu,
2710 apply the adjacency rewrite, and send pkts to the adjacency
2711 rewrite header's rewrite_next_index.
2713 @param vm vlib_main_t corresponding to the current thread
2714 @param node vlib_node_runtime_t
2715 @param frame vlib_frame_t whose contents should be dispatched
2717 @par Graph mechanics: buffer metadata, next index usage
2720 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2721 - the rewrite adjacency index
2722 - <code>adj->lookup_next_index</code>
2723 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2724 the packet will be dropped.
2725 - <code>adj->rewrite_header</code>
2726 - Rewrite string length, rewrite string, next_index
2729 - <code>b->current_data, b->current_length</code>
2730 - Updated net of applying the rewrite string
2732 <em>Next Indices:</em>
2733 - <code> adj->rewrite_header.next_index </code>
2737 ip4_rewrite (vlib_main_t * vm,
2738 vlib_node_runtime_t * node, vlib_frame_t * frame)
2740 if (adj_are_counters_enabled ())
2741 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2743 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2747 ip4_midchain (vlib_main_t * vm,
2748 vlib_node_runtime_t * node, vlib_frame_t * frame)
2750 if (adj_are_counters_enabled ())
2751 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2753 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2757 ip4_rewrite_mcast (vlib_main_t * vm,
2758 vlib_node_runtime_t * node, vlib_frame_t * frame)
2760 if (adj_are_counters_enabled ())
2761 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2763 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2767 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2768 .function = ip4_rewrite,
2769 .name = "ip4-rewrite",
2770 .vector_size = sizeof (u32),
2772 .format_trace = format_ip4_rewrite_trace,
2776 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2777 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2780 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2782 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2783 .function = ip4_rewrite_mcast,
2784 .name = "ip4-rewrite-mcast",
2785 .vector_size = sizeof (u32),
2787 .format_trace = format_ip4_rewrite_trace,
2788 .sibling_of = "ip4-rewrite",
2790 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2792 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2793 .function = ip4_midchain,
2794 .name = "ip4-midchain",
2795 .vector_size = sizeof (u32),
2796 .format_trace = format_ip4_forward_next_trace,
2797 .sibling_of = "ip4-rewrite",
2799 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2802 static clib_error_t *
2803 add_del_interface_table (vlib_main_t * vm,
2804 unformat_input_t * input, vlib_cli_command_t * cmd)
2806 vnet_main_t *vnm = vnet_get_main ();
2807 ip_interface_address_t *ia;
2808 clib_error_t *error = 0;
2809 u32 sw_if_index, table_id;
2813 if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2815 error = clib_error_return (0, "unknown interface `%U'",
2816 format_unformat_error, input);
2820 if (unformat (input, "%d", &table_id))
2824 error = clib_error_return (0, "expected table id `%U'",
2825 format_unformat_error, input);
2830 * If the interface already has in IP address, then a change int
2831 * VRF is not allowed. The IP address applied must first be removed.
2832 * We do not do that automatically here, since VPP has no knowledge
2833 * of whether thoses subnets are valid in the destination VRF.
2836 foreach_ip_interface_address (&ip4_main.lookup_main,
2838 1 /* honor unnumbered */,
2842 a = ip_interface_address_get_address (&ip4_main.lookup_main, ia);
2843 error = clib_error_return (0, "interface %U has address %U",
2844 format_vnet_sw_if_index_name, vnm,
2846 format_ip4_address, a);
2852 ip4_main_t *im = &ip4_main;
2855 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2857 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2858 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2860 fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2861 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
2862 im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
2870 * Place the indicated interface into the supplied IPv4 FIB table (also known
2871 * as a VRF). If the FIB table does not exist, this command creates it. To
2872 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2873 * FIB table will only be displayed if a route has been added to the table, or
2874 * an IP Address is assigned to an interface in the table (which adds a route
2877 * @note IP addresses added after setting the interface IP table are added to
2878 * the indicated FIB table. If an IP address is added prior to changing the
2879 * table then this is an error. The control plane must remove these addresses
2880 * first and then change the table. VPP will not automatically move the
2881 * addresses from the old to the new table as it does not know the validity
2885 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2886 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2889 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2891 .path = "set interface ip table",
2892 .function = add_del_interface_table,
2893 .short_help = "set interface ip table <interface> <table-id>",
2898 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2900 ip4_fib_mtrie_t *mtrie0;
2901 ip4_fib_mtrie_leaf_t leaf0;
2904 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2906 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2907 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2908 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2909 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2910 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2912 /* Handle default route. */
2913 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2915 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2917 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2920 static clib_error_t *
2921 test_lookup_command_fn (vlib_main_t * vm,
2922 unformat_input_t * input, vlib_cli_command_t * cmd)
2929 ip4_address_t ip4_base_address;
2932 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2934 if (unformat (input, "table %d", &table_id))
2936 /* Make sure the entry exists. */
2937 fib = ip4_fib_get (table_id);
2938 if ((fib) && (fib->index != table_id))
2939 return clib_error_return (0, "<fib-index> %d does not exist",
2942 else if (unformat (input, "count %f", &count))
2945 else if (unformat (input, "%U",
2946 unformat_ip4_address, &ip4_base_address))
2949 return clib_error_return (0, "unknown input `%U'",
2950 format_unformat_error, input);
2955 for (i = 0; i < n; i++)
2957 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2960 ip4_base_address.as_u32 =
2961 clib_host_to_net_u32 (1 +
2962 clib_net_to_host_u32 (ip4_base_address.as_u32));
2966 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2968 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2974 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2975 * given FIB table to determine if there is a conflict with the
2976 * adjacency table. The fib-id can be determined by using the
2977 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2980 * @todo This command uses fib-id, other commands use table-id (not
2981 * just a name, they are different indexes). Would like to change this
2982 * to table-id for consistency.
2985 * Example of how to run the test lookup command:
2986 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2987 * No errors in 2 lookups
2991 VLIB_CLI_COMMAND (lookup_test_command, static) =
2993 .path = "test lookup",
2994 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2995 .function = test_lookup_command_fn,
3000 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3002 ip4_main_t *im4 = &ip4_main;
3004 uword *p = hash_get (im4->fib_index_by_table_id, table_id);
3007 return VNET_API_ERROR_NO_SUCH_FIB;
3009 fib = ip4_fib_get (p[0]);
3011 fib->flow_hash_config = flow_hash_config;
3015 static clib_error_t *
3016 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3017 unformat_input_t * input,
3018 vlib_cli_command_t * cmd)
3022 u32 flow_hash_config = 0;
3025 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3027 if (unformat (input, "table %d", &table_id))
3030 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3031 foreach_flow_hash_bit
3038 return clib_error_return (0, "unknown input `%U'",
3039 format_unformat_error, input);
3041 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3047 case VNET_API_ERROR_NO_SUCH_FIB:
3048 return clib_error_return (0, "no such FIB table %d", table_id);
3051 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3059 * Configure the set of IPv4 fields used by the flow hash.
3062 * Example of how to set the flow hash on a given table:
3063 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3064 * Example of display the configured flow hash:
3065 * @cliexstart{show ip fib}
3066 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3069 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3070 * [0] [@0]: dpo-drop ip6
3073 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3074 * [0] [@0]: dpo-drop ip6
3077 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3078 * [0] [@0]: dpo-drop ip6
3081 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3082 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3085 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3086 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3087 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3088 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3089 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3092 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3093 * [0] [@0]: dpo-drop ip6
3094 * 255.255.255.255/32
3096 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3097 * [0] [@0]: dpo-drop ip6
3098 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3101 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3102 * [0] [@0]: dpo-drop ip6
3105 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3106 * [0] [@0]: dpo-drop ip6
3109 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3110 * [0] [@4]: ipv4-glean: af_packet0
3113 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3114 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3117 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3118 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3121 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3122 * [0] [@4]: ipv4-glean: af_packet1
3125 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3126 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3129 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3130 * [0] [@0]: dpo-drop ip6
3133 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3134 * [0] [@0]: dpo-drop ip6
3135 * 255.255.255.255/32
3137 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3138 * [0] [@0]: dpo-drop ip6
3142 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3144 .path = "set ip flow-hash",
3146 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3147 .function = set_ip_flow_hash_command_fn,
3152 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3155 vnet_main_t *vnm = vnet_get_main ();
3156 vnet_interface_main_t *im = &vnm->interface_main;
3157 ip4_main_t *ipm = &ip4_main;
3158 ip_lookup_main_t *lm = &ipm->lookup_main;
3159 vnet_classify_main_t *cm = &vnet_classify_main;
3160 ip4_address_t *if_addr;
3162 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3163 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3165 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3166 return VNET_API_ERROR_NO_SUCH_ENTRY;
3168 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3169 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3171 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3173 if (NULL != if_addr)
3175 fib_prefix_t pfx = {
3177 .fp_proto = FIB_PROTOCOL_IP4,
3178 .fp_addr.ip4 = *if_addr,
3182 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3186 if (table_index != (u32) ~ 0)
3188 dpo_id_t dpo = DPO_INVALID;
3193 classify_dpo_create (DPO_PROTO_IP4, table_index));
3195 fib_table_entry_special_dpo_add (fib_index,
3197 FIB_SOURCE_CLASSIFY,
3198 FIB_ENTRY_FLAG_NONE, &dpo);
3203 fib_table_entry_special_remove (fib_index,
3204 &pfx, FIB_SOURCE_CLASSIFY);
3211 static clib_error_t *
3212 set_ip_classify_command_fn (vlib_main_t * vm,
3213 unformat_input_t * input,
3214 vlib_cli_command_t * cmd)
3216 u32 table_index = ~0;
3217 int table_index_set = 0;
3218 u32 sw_if_index = ~0;
3221 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3223 if (unformat (input, "table-index %d", &table_index))
3224 table_index_set = 1;
3225 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3226 vnet_get_main (), &sw_if_index))
3232 if (table_index_set == 0)
3233 return clib_error_return (0, "classify table-index must be specified");
3235 if (sw_if_index == ~0)
3236 return clib_error_return (0, "interface / subif must be specified");
3238 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3245 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3246 return clib_error_return (0, "No such interface");
3248 case VNET_API_ERROR_NO_SUCH_ENTRY:
3249 return clib_error_return (0, "No such classifier table");
3255 * Assign a classification table to an interface. The classification
3256 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3257 * commands. Once the table is create, use this command to filter packets
3261 * Example of how to assign a classification table to an interface:
3262 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3265 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3267 .path = "set ip classify",
3269 "set ip classify intfc <interface> table-index <classify-idx>",
3270 .function = set_ip_classify_command_fn,
3275 * fd.io coding-style-patch-verification: ON
3278 * eval: (c-set-style "gnu")