2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 * @brief IPv4 Forwarding.
59 * This file contains the source code for IPv4 forwarding.
63 ip4_forward_next_trace (vlib_main_t * vm,
64 vlib_node_runtime_t * node,
66 vlib_rx_or_tx_t which_adj_index);
69 ip4_lookup_inline (vlib_main_t * vm,
70 vlib_node_runtime_t * node,
72 int lookup_for_responses_to_locally_received_packets)
74 ip4_main_t *im = &ip4_main;
75 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
76 u32 n_left_from, n_left_to_next, *from, *to_next;
77 ip_lookup_next_t next;
78 u32 cpu_index = os_get_cpu_number ();
80 from = vlib_frame_vector_args (frame);
81 n_left_from = frame->n_vectors;
82 next = node->cached_next_index;
84 while (n_left_from > 0)
86 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
88 while (n_left_from >= 8 && n_left_to_next >= 4)
90 vlib_buffer_t *p0, *p1, *p2, *p3;
91 ip4_header_t *ip0, *ip1, *ip2, *ip3;
92 __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
93 ip_lookup_next_t next0, next1, next2, next3;
94 const load_balance_t *lb0, *lb1, *lb2, *lb3;
95 ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
96 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97 ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98 __attribute__ ((unused)) u32 pi0, fib_index0, lb_index0,
100 __attribute__ ((unused)) u32 pi1, fib_index1, lb_index1,
102 __attribute__ ((unused)) u32 pi2, fib_index2, lb_index2,
104 __attribute__ ((unused)) u32 pi3, fib_index3, lb_index3,
106 flow_hash_config_t flow_hash_config0, flow_hash_config1;
107 flow_hash_config_t flow_hash_config2, flow_hash_config3;
108 u32 hash_c0, hash_c1, hash_c2, hash_c3;
109 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
111 /* Prefetch next iteration. */
113 vlib_buffer_t *p4, *p5, *p6, *p7;
115 p4 = vlib_get_buffer (vm, from[4]);
116 p5 = vlib_get_buffer (vm, from[5]);
117 p6 = vlib_get_buffer (vm, from[6]);
118 p7 = vlib_get_buffer (vm, from[7]);
120 vlib_prefetch_buffer_header (p4, LOAD);
121 vlib_prefetch_buffer_header (p5, LOAD);
122 vlib_prefetch_buffer_header (p6, LOAD);
123 vlib_prefetch_buffer_header (p7, LOAD);
125 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
126 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
127 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
128 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
131 pi0 = to_next[0] = from[0];
132 pi1 = to_next[1] = from[1];
133 pi2 = to_next[2] = from[2];
134 pi3 = to_next[3] = from[3];
141 p0 = vlib_get_buffer (vm, pi0);
142 p1 = vlib_get_buffer (vm, pi1);
143 p2 = vlib_get_buffer (vm, pi2);
144 p3 = vlib_get_buffer (vm, pi3);
146 ip0 = vlib_buffer_get_current (p0);
147 ip1 = vlib_buffer_get_current (p1);
148 ip2 = vlib_buffer_get_current (p2);
149 ip3 = vlib_buffer_get_current (p3);
151 dst_addr0 = &ip0->dst_address;
152 dst_addr1 = &ip1->dst_address;
153 dst_addr2 = &ip2->dst_address;
154 dst_addr3 = &ip3->dst_address;
157 vec_elt (im->fib_index_by_sw_if_index,
158 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
160 vec_elt (im->fib_index_by_sw_if_index,
161 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
163 vec_elt (im->fib_index_by_sw_if_index,
164 vnet_buffer (p2)->sw_if_index[VLIB_RX]);
166 vec_elt (im->fib_index_by_sw_if_index,
167 vnet_buffer (p3)->sw_if_index[VLIB_RX]);
169 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
170 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
172 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
173 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
175 (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
176 (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
178 (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
179 (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
182 if (!lookup_for_responses_to_locally_received_packets)
184 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
185 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
186 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
187 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
189 leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
191 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
192 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
193 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
194 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
197 tcp0 = (void *) (ip0 + 1);
198 tcp1 = (void *) (ip1 + 1);
199 tcp2 = (void *) (ip2 + 1);
200 tcp3 = (void *) (ip3 + 1);
202 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
203 || ip0->protocol == IP_PROTOCOL_UDP);
204 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
205 || ip1->protocol == IP_PROTOCOL_UDP);
206 is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
207 || ip2->protocol == IP_PROTOCOL_UDP);
208 is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
209 || ip1->protocol == IP_PROTOCOL_UDP);
211 if (!lookup_for_responses_to_locally_received_packets)
213 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
214 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
215 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
216 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
219 if (!lookup_for_responses_to_locally_received_packets)
221 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
222 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
223 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
224 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
227 if (!lookup_for_responses_to_locally_received_packets)
229 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
230 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
231 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
232 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
235 if (lookup_for_responses_to_locally_received_packets)
237 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
238 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
239 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
240 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
244 /* Handle default route. */
247 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
250 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
253 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
256 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
257 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
258 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
259 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
260 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
263 lb0 = load_balance_get (lb_index0);
264 lb1 = load_balance_get (lb_index1);
265 lb2 = load_balance_get (lb_index2);
266 lb3 = load_balance_get (lb_index3);
268 /* Use flow hash to compute multipath adjacency. */
269 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
270 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
271 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
272 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
273 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
275 flow_hash_config0 = lb0->lb_hash_config;
276 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
277 ip4_compute_flow_hash (ip0, flow_hash_config0);
279 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
281 flow_hash_config1 = lb1->lb_hash_config;
282 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
283 ip4_compute_flow_hash (ip1, flow_hash_config1);
285 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
287 flow_hash_config2 = lb2->lb_hash_config;
288 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
289 ip4_compute_flow_hash (ip2, flow_hash_config2);
291 if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
293 flow_hash_config3 = lb3->lb_hash_config;
294 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
295 ip4_compute_flow_hash (ip3, flow_hash_config3);
298 ASSERT (lb0->lb_n_buckets > 0);
299 ASSERT (is_pow2 (lb0->lb_n_buckets));
300 ASSERT (lb1->lb_n_buckets > 0);
301 ASSERT (is_pow2 (lb1->lb_n_buckets));
302 ASSERT (lb2->lb_n_buckets > 0);
303 ASSERT (is_pow2 (lb2->lb_n_buckets));
304 ASSERT (lb3->lb_n_buckets > 0);
305 ASSERT (is_pow2 (lb3->lb_n_buckets));
307 dpo0 = load_balance_get_bucket_i (lb0,
309 (lb0->lb_n_buckets_minus_1)));
310 dpo1 = load_balance_get_bucket_i (lb1,
312 (lb1->lb_n_buckets_minus_1)));
313 dpo2 = load_balance_get_bucket_i (lb2,
315 (lb2->lb_n_buckets_minus_1)));
316 dpo3 = load_balance_get_bucket_i (lb3,
318 (lb3->lb_n_buckets_minus_1)));
320 next0 = dpo0->dpoi_next_node;
321 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
322 next1 = dpo1->dpoi_next_node;
323 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
324 next2 = dpo2->dpoi_next_node;
325 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
326 next3 = dpo3->dpoi_next_node;
327 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
329 vlib_increment_combined_counter
330 (cm, cpu_index, lb_index0, 1,
331 vlib_buffer_length_in_chain (vm, p0)
332 + sizeof (ethernet_header_t));
333 vlib_increment_combined_counter
334 (cm, cpu_index, lb_index1, 1,
335 vlib_buffer_length_in_chain (vm, p1)
336 + sizeof (ethernet_header_t));
337 vlib_increment_combined_counter
338 (cm, cpu_index, lb_index2, 1,
339 vlib_buffer_length_in_chain (vm, p2)
340 + sizeof (ethernet_header_t));
341 vlib_increment_combined_counter
342 (cm, cpu_index, lb_index3, 1,
343 vlib_buffer_length_in_chain (vm, p3)
344 + sizeof (ethernet_header_t));
346 vlib_validate_buffer_enqueue_x4 (vm, node, next,
347 to_next, n_left_to_next,
349 next0, next1, next2, next3);
352 while (n_left_from > 0 && n_left_to_next > 0)
356 __attribute__ ((unused)) tcp_header_t *tcp0;
357 ip_lookup_next_t next0;
358 const load_balance_t *lb0;
359 ip4_fib_mtrie_t *mtrie0;
360 ip4_fib_mtrie_leaf_t leaf0;
361 ip4_address_t *dst_addr0;
362 __attribute__ ((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
363 flow_hash_config_t flow_hash_config0;
364 const dpo_id_t *dpo0;
370 p0 = vlib_get_buffer (vm, pi0);
372 ip0 = vlib_buffer_get_current (p0);
374 dst_addr0 = &ip0->dst_address;
377 vec_elt (im->fib_index_by_sw_if_index,
378 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
380 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
381 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
383 if (!lookup_for_responses_to_locally_received_packets)
385 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
387 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
389 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
392 tcp0 = (void *) (ip0 + 1);
394 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
395 || ip0->protocol == IP_PROTOCOL_UDP);
397 if (!lookup_for_responses_to_locally_received_packets)
398 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
400 if (!lookup_for_responses_to_locally_received_packets)
401 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
403 if (!lookup_for_responses_to_locally_received_packets)
404 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
406 if (lookup_for_responses_to_locally_received_packets)
407 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
410 /* Handle default route. */
413 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
414 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
417 lb0 = load_balance_get (lbi0);
419 /* Use flow hash to compute multipath adjacency. */
420 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
421 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
423 flow_hash_config0 = lb0->lb_hash_config;
425 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
426 ip4_compute_flow_hash (ip0, flow_hash_config0);
429 ASSERT (lb0->lb_n_buckets > 0);
430 ASSERT (is_pow2 (lb0->lb_n_buckets));
432 dpo0 = load_balance_get_bucket_i (lb0,
434 (lb0->lb_n_buckets_minus_1)));
436 next0 = dpo0->dpoi_next_node;
437 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
439 vlib_increment_combined_counter
440 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
447 if (PREDICT_FALSE (next0 != next))
450 vlib_put_next_frame (vm, node, next, n_left_to_next);
452 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
459 vlib_put_next_frame (vm, node, next, n_left_to_next);
462 if (node->flags & VLIB_NODE_FLAG_TRACE)
463 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
465 return frame->n_vectors;
468 /** @brief IPv4 lookup node.
471 This is the main IPv4 lookup dispatch node.
473 @param vm vlib_main_t corresponding to the current thread
474 @param node vlib_node_runtime_t
475 @param frame vlib_frame_t whose contents should be dispatched
477 @par Graph mechanics: buffer metadata, next index usage
480 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
481 - Indicates the @c sw_if_index value of the interface that the
482 packet was received on.
483 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
484 - When the value is @c ~0 then the node performs a longest prefix
485 match (LPM) for the packet destination address in the FIB attached
486 to the receive interface.
487 - Otherwise perform LPM for the packet destination address in the
488 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
489 value (0, 1, ...) and not a VRF id.
492 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
493 - The lookup result adjacency index.
496 - Dispatches the packet to the node index found in
497 ip_adjacency_t @c adj->lookup_next_index
498 (where @c adj is the lookup result adjacency).
501 ip4_lookup (vlib_main_t * vm,
502 vlib_node_runtime_t * node, vlib_frame_t * frame)
504 return ip4_lookup_inline (vm, node, frame,
505 /* lookup_for_responses_to_locally_received_packets */
510 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
512 VLIB_REGISTER_NODE (ip4_lookup_node) =
514 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
515 sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
516 IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
518 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
521 ip4_load_balance (vlib_main_t * vm,
522 vlib_node_runtime_t * node, vlib_frame_t * frame)
524 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
525 u32 n_left_from, n_left_to_next, *from, *to_next;
526 ip_lookup_next_t next;
527 u32 cpu_index = os_get_cpu_number ();
529 from = vlib_frame_vector_args (frame);
530 n_left_from = frame->n_vectors;
531 next = node->cached_next_index;
533 if (node->flags & VLIB_NODE_FLAG_TRACE)
534 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
536 while (n_left_from > 0)
538 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
541 while (n_left_from >= 4 && n_left_to_next >= 2)
543 ip_lookup_next_t next0, next1;
544 const load_balance_t *lb0, *lb1;
545 vlib_buffer_t *p0, *p1;
546 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
547 const ip4_header_t *ip0, *ip1;
548 const dpo_id_t *dpo0, *dpo1;
550 /* Prefetch next iteration. */
552 vlib_buffer_t *p2, *p3;
554 p2 = vlib_get_buffer (vm, from[2]);
555 p3 = vlib_get_buffer (vm, from[3]);
557 vlib_prefetch_buffer_header (p2, STORE);
558 vlib_prefetch_buffer_header (p3, STORE);
560 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
561 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
564 pi0 = to_next[0] = from[0];
565 pi1 = to_next[1] = from[1];
572 p0 = vlib_get_buffer (vm, pi0);
573 p1 = vlib_get_buffer (vm, pi1);
575 ip0 = vlib_buffer_get_current (p0);
576 ip1 = vlib_buffer_get_current (p1);
577 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
578 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
580 lb0 = load_balance_get (lbi0);
581 lb1 = load_balance_get (lbi1);
584 * this node is for via FIBs we can re-use the hash value from the
585 * to node if present.
586 * We don't want to use the same hash value at each level in the recursion
587 * graph as that would lead to polarisation
589 hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
590 hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
592 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
594 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
596 hc0 = vnet_buffer (p0)->ip.flow_hash =
597 vnet_buffer (p0)->ip.flow_hash >> 1;
601 hc0 = vnet_buffer (p0)->ip.flow_hash =
602 ip4_compute_flow_hash (ip0, hc0);
605 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
607 if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
609 hc1 = vnet_buffer (p1)->ip.flow_hash =
610 vnet_buffer (p1)->ip.flow_hash >> 1;
614 hc1 = vnet_buffer (p1)->ip.flow_hash =
615 ip4_compute_flow_hash (ip1, hc1);
620 load_balance_get_bucket_i (lb0,
621 hc0 & (lb0->lb_n_buckets_minus_1));
623 load_balance_get_bucket_i (lb1,
624 hc1 & (lb1->lb_n_buckets_minus_1));
626 next0 = dpo0->dpoi_next_node;
627 next1 = dpo1->dpoi_next_node;
629 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
630 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
632 vlib_increment_combined_counter
633 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
634 vlib_increment_combined_counter
635 (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
637 vlib_validate_buffer_enqueue_x2 (vm, node, next,
638 to_next, n_left_to_next,
639 pi0, pi1, next0, next1);
642 while (n_left_from > 0 && n_left_to_next > 0)
644 ip_lookup_next_t next0;
645 const load_balance_t *lb0;
648 const ip4_header_t *ip0;
649 const dpo_id_t *dpo0;
658 p0 = vlib_get_buffer (vm, pi0);
660 ip0 = vlib_buffer_get_current (p0);
661 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
663 lb0 = load_balance_get (lbi0);
665 hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
666 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
668 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
670 hc0 = vnet_buffer (p0)->ip.flow_hash =
671 vnet_buffer (p0)->ip.flow_hash >> 1;
675 hc0 = vnet_buffer (p0)->ip.flow_hash =
676 ip4_compute_flow_hash (ip0, hc0);
681 load_balance_get_bucket_i (lb0,
682 hc0 & (lb0->lb_n_buckets_minus_1));
684 next0 = dpo0->dpoi_next_node;
685 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
687 vlib_increment_combined_counter
688 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
690 vlib_validate_buffer_enqueue_x1 (vm, node, next,
691 to_next, n_left_to_next,
695 vlib_put_next_frame (vm, node, next, n_left_to_next);
698 return frame->n_vectors;
701 VLIB_REGISTER_NODE (ip4_load_balance_node) =
703 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
704 sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
705 format_ip4_lookup_trace,};
707 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
709 /* get first interface address */
711 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
712 ip_interface_address_t ** result_ia)
714 ip_lookup_main_t *lm = &im->lookup_main;
715 ip_interface_address_t *ia = 0;
716 ip4_address_t *result = 0;
719 foreach_ip_interface_address
720 (lm, ia, sw_if_index,
721 1 /* honor unnumbered */ ,
724 ip_interface_address_get_address (lm, ia);
730 *result_ia = result ? ia : 0;
735 ip4_add_interface_routes (u32 sw_if_index,
736 ip4_main_t * im, u32 fib_index,
737 ip_interface_address_t * a)
739 ip_lookup_main_t *lm = &im->lookup_main;
740 ip4_address_t *address = ip_interface_address_get_address (lm, a);
742 .fp_len = a->address_length,
743 .fp_proto = FIB_PROTOCOL_IP4,
744 .fp_addr.ip4 = *address,
747 a->neighbor_probe_adj_index = ~0;
751 fib_node_index_t fei;
753 fei = fib_table_entry_update_one_path (fib_index, &pfx,
754 FIB_SOURCE_INTERFACE,
755 (FIB_ENTRY_FLAG_CONNECTED |
756 FIB_ENTRY_FLAG_ATTACHED),
758 /* No next-hop address */
764 // no out-label stack
766 FIB_ROUTE_PATH_FLAG_NONE);
767 a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
772 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
774 u32 classify_table_index =
775 lm->classify_table_index_by_sw_if_index[sw_if_index];
776 if (classify_table_index != (u32) ~ 0)
778 dpo_id_t dpo = DPO_INVALID;
783 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
785 fib_table_entry_special_dpo_add (fib_index,
788 FIB_ENTRY_FLAG_NONE, &dpo);
793 fib_table_entry_update_one_path (fib_index, &pfx,
794 FIB_SOURCE_INTERFACE,
795 (FIB_ENTRY_FLAG_CONNECTED |
796 FIB_ENTRY_FLAG_LOCAL),
803 FIB_ROUTE_PATH_FLAG_NONE);
807 ip4_del_interface_routes (ip4_main_t * im,
809 ip4_address_t * address, u32 address_length)
812 .fp_len = address_length,
813 .fp_proto = FIB_PROTOCOL_IP4,
814 .fp_addr.ip4 = *address,
819 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
823 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
827 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
829 ip4_main_t *im = &ip4_main;
831 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
834 * enable/disable only on the 1<->0 transition
838 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
843 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
844 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
847 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
851 vnet_feature_enable_disable ("ip4-multicast",
852 "ip4-mfib-forward-lookup",
853 sw_if_index, is_enable, 0, 0);
856 static clib_error_t *
857 ip4_add_del_interface_address_internal (vlib_main_t * vm,
859 ip4_address_t * address,
860 u32 address_length, u32 is_del)
862 vnet_main_t *vnm = vnet_get_main ();
863 ip4_main_t *im = &ip4_main;
864 ip_lookup_main_t *lm = &im->lookup_main;
865 clib_error_t *error = 0;
866 u32 if_address_index, elts_before;
867 ip4_address_fib_t ip4_af, *addr_fib = 0;
869 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
870 ip4_addr_fib_init (&ip4_af, address,
871 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
872 vec_add1 (addr_fib, ip4_af);
875 * there is no support for adj-fib handling in the presence of overlapping
876 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
882 /* When adding an address check that it does not conflict
883 with an existing address. */
884 ip_interface_address_t *ia;
885 foreach_ip_interface_address
886 (&im->lookup_main, ia, sw_if_index,
887 0 /* honor unnumbered */ ,
890 ip_interface_address_get_address
891 (&im->lookup_main, ia);
892 if (ip4_destination_matches_route
893 (im, address, x, ia->address_length) ||
894 ip4_destination_matches_route (im,
900 ("failed to add %U which conflicts with %U for interface %U",
901 format_ip4_address_and_length, address,
903 format_ip4_address_and_length, x,
905 format_vnet_sw_if_index_name, vnm,
911 elts_before = pool_elts (lm->if_address_pool);
913 error = ip_interface_address_add_del
914 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
918 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
921 ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
923 ip4_add_interface_routes (sw_if_index,
924 im, ip4_af.fib_index,
926 (lm->if_address_pool, if_address_index));
928 /* If pool did not grow/shrink: add duplicate address. */
929 if (elts_before != pool_elts (lm->if_address_pool))
931 ip4_add_del_interface_address_callback_t *cb;
932 vec_foreach (cb, im->add_del_interface_address_callbacks)
933 cb->function (im, cb->function_opaque, sw_if_index,
934 address, address_length, if_address_index, is_del);
943 ip4_add_del_interface_address (vlib_main_t * vm,
945 ip4_address_t * address,
946 u32 address_length, u32 is_del)
948 return ip4_add_del_interface_address_internal
949 (vm, sw_if_index, address, address_length, is_del);
952 /* Built-in ip4 unicast rx feature path definition */
954 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
956 .arc_name = "ip4-unicast",
957 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
958 .end_node = "ip4-lookup",
959 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
962 VNET_FEATURE_INIT (ip4_flow_classify, static) =
964 .arc_name = "ip4-unicast",
965 .node_name = "ip4-flow-classify",
966 .runs_before = VNET_FEATURES ("ip4-inacl"),
969 VNET_FEATURE_INIT (ip4_inacl, static) =
971 .arc_name = "ip4-unicast",
972 .node_name = "ip4-inacl",
973 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
976 VNET_FEATURE_INIT (ip4_source_check_1, static) =
978 .arc_name = "ip4-unicast",
979 .node_name = "ip4-source-check-via-rx",
980 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
983 VNET_FEATURE_INIT (ip4_source_check_2, static) =
985 .arc_name = "ip4-unicast",
986 .node_name = "ip4-source-check-via-any",
987 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
990 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
992 .arc_name = "ip4-unicast",
993 .node_name = "ip4-source-and-port-range-check-rx",
994 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
997 VNET_FEATURE_INIT (ip4_policer_classify, static) =
999 .arc_name = "ip4-unicast",
1000 .node_name = "ip4-policer-classify",
1001 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1004 VNET_FEATURE_INIT (ip4_ipsec, static) =
1006 .arc_name = "ip4-unicast",
1007 .node_name = "ipsec-input-ip4",
1008 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1011 VNET_FEATURE_INIT (ip4_vpath, static) =
1013 .arc_name = "ip4-unicast",
1014 .node_name = "vpath-input-ip4",
1015 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1018 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1020 .arc_name = "ip4-unicast",
1021 .node_name = "ip4-vxlan-bypass",
1022 .runs_before = VNET_FEATURES ("ip4-lookup"),
1025 VNET_FEATURE_INIT (ip4_lookup, static) =
1027 .arc_name = "ip4-unicast",
1028 .node_name = "ip4-lookup",
1029 .runs_before = VNET_FEATURES ("ip4-drop"),
1032 VNET_FEATURE_INIT (ip4_drop, static) =
1034 .arc_name = "ip4-unicast",
1035 .node_name = "ip4-drop",
1036 .runs_before = 0, /* not before any other features */
1040 /* Built-in ip4 multicast rx feature path definition */
1041 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1043 .arc_name = "ip4-multicast",
1044 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1045 .end_node = "ip4-lookup-multicast",
1046 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1049 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1051 .arc_name = "ip4-multicast",
1052 .node_name = "vpath-input-ip4",
1053 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1056 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1058 .arc_name = "ip4-multicast",
1059 .node_name = "ip4-mfib-forward-lookup",
1060 .runs_before = VNET_FEATURES ("ip4-drop"),
1063 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1065 .arc_name = "ip4-multicast",
1066 .node_name = "ip4-drop",
1067 .runs_before = 0, /* last feature */
1070 /* Source and port-range check ip4 tx feature path definition */
1071 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1073 .arc_name = "ip4-output",
1074 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1075 .end_node = "interface-output",
1076 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1079 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1081 .arc_name = "ip4-output",
1082 .node_name = "ip4-source-and-port-range-check-tx",
1083 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1086 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1088 .arc_name = "ip4-output",
1089 .node_name = "ipsec-output-ip4",
1090 .runs_before = VNET_FEATURES ("interface-output"),
1093 /* Built-in ip4 tx feature path definition */
1094 VNET_FEATURE_INIT (ip4_interface_output, static) =
1096 .arc_name = "ip4-output",
1097 .node_name = "interface-output",
1098 .runs_before = 0, /* not before any other features */
1102 static clib_error_t *
1103 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1105 ip4_main_t *im = &ip4_main;
1107 /* Fill in lookup tables with default table (0). */
1108 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1109 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1111 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1114 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1117 return /* no error */ 0;
1120 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1122 /* Global IP4 main. */
1123 ip4_main_t ip4_main;
1126 ip4_lookup_init (vlib_main_t * vm)
1128 ip4_main_t *im = &ip4_main;
1129 clib_error_t *error;
1132 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1135 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1140 m = pow2_mask (i) << (32 - i);
1143 im->fib_masks[i] = clib_host_to_net_u32 (m);
1146 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1148 /* Create FIB with index 0 and table id of 0. */
1149 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1150 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1154 pn = pg_get_node (ip4_lookup_node.index);
1155 pn->unformat_edit = unformat_pg_ip4_header;
1159 ethernet_arp_header_t h;
1161 memset (&h, 0, sizeof (h));
1163 /* Set target ethernet address to all zeros. */
1164 memset (h.ip4_over_ethernet[1].ethernet, 0,
1165 sizeof (h.ip4_over_ethernet[1].ethernet));
1167 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1168 #define _8(f,v) h.f = v;
1169 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1170 _16 (l3_type, ETHERNET_TYPE_IP4);
1171 _8 (n_l2_address_bytes, 6);
1172 _8 (n_l3_address_bytes, 4);
1173 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1177 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1180 /* alloc chunk size */ 8,
1187 VLIB_INIT_FUNCTION (ip4_lookup_init);
1191 /* Adjacency taken. */
1196 /* Packet data, possibly *after* rewrite. */
1197 u8 packet_data[64 - 1 * sizeof (u32)];
1199 ip4_forward_next_trace_t;
1202 format_ip4_forward_next_trace (u8 * s, va_list * args)
1204 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1205 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1206 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1207 uword indent = format_get_indent (s);
1208 s = format (s, "%U%U",
1209 format_white_space, indent,
1210 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1215 format_ip4_lookup_trace (u8 * s, va_list * args)
1217 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1218 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1219 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1220 uword indent = format_get_indent (s);
1222 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1223 t->fib_index, t->dpo_index, t->flow_hash);
1224 s = format (s, "\n%U%U",
1225 format_white_space, indent,
1226 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1231 format_ip4_rewrite_trace (u8 * s, va_list * args)
1233 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1234 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1235 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1236 vnet_main_t *vnm = vnet_get_main ();
1237 uword indent = format_get_indent (s);
1239 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1240 t->fib_index, t->dpo_index, format_ip_adjacency,
1241 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1242 s = format (s, "\n%U%U",
1243 format_white_space, indent,
1244 format_ip_adjacency_packet_data,
1245 vnm, t->dpo_index, t->packet_data, sizeof (t->packet_data));
1249 /* Common trace function for all ip4-forward next nodes. */
1251 ip4_forward_next_trace (vlib_main_t * vm,
1252 vlib_node_runtime_t * node,
1253 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1256 ip4_main_t *im = &ip4_main;
1258 n_left = frame->n_vectors;
1259 from = vlib_frame_vector_args (frame);
1264 vlib_buffer_t *b0, *b1;
1265 ip4_forward_next_trace_t *t0, *t1;
1267 /* Prefetch next iteration. */
1268 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1269 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1274 b0 = vlib_get_buffer (vm, bi0);
1275 b1 = vlib_get_buffer (vm, bi1);
1277 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1279 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1280 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1281 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1283 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1284 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1285 vec_elt (im->fib_index_by_sw_if_index,
1286 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1288 clib_memcpy (t0->packet_data,
1289 vlib_buffer_get_current (b0),
1290 sizeof (t0->packet_data));
1292 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1294 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1295 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1296 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1298 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1299 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1300 vec_elt (im->fib_index_by_sw_if_index,
1301 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1302 clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1303 sizeof (t1->packet_data));
1313 ip4_forward_next_trace_t *t0;
1317 b0 = vlib_get_buffer (vm, bi0);
1319 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1321 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1322 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1323 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1325 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1326 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1327 vec_elt (im->fib_index_by_sw_if_index,
1328 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1329 clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1330 sizeof (t0->packet_data));
1338 ip4_drop_or_punt (vlib_main_t * vm,
1339 vlib_node_runtime_t * node,
1340 vlib_frame_t * frame, ip4_error_t error_code)
1342 u32 *buffers = vlib_frame_vector_args (frame);
1343 uword n_packets = frame->n_vectors;
1345 vlib_error_drop_buffers (vm, node, buffers,
1349 ip4_input_node.index, error_code);
1351 if (node->flags & VLIB_NODE_FLAG_TRACE)
1352 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1358 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1360 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1364 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1366 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1370 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1372 .function = ip4_drop,.
1374 .vector_size = sizeof (u32),
1375 .format_trace = format_ip4_forward_next_trace,
1382 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1384 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1386 .function = ip4_punt,
1388 .vector_size = sizeof (u32),
1389 .format_trace = format_ip4_forward_next_trace,
1396 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1399 /* Compute TCP/UDP/ICMP4 checksum in software. */
1401 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1405 u32 ip_header_length, payload_length_host_byte_order;
1406 u32 n_this_buffer, n_bytes_left;
1408 void *data_this_buffer;
1410 /* Initialize checksum with ip header. */
1411 ip_header_length = ip4_header_bytes (ip0);
1412 payload_length_host_byte_order =
1413 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1415 clib_host_to_net_u32 (payload_length_host_byte_order +
1416 (ip0->protocol << 16));
1418 if (BITS (uword) == 32)
1421 ip_csum_with_carry (sum0,
1422 clib_mem_unaligned (&ip0->src_address, u32));
1424 ip_csum_with_carry (sum0,
1425 clib_mem_unaligned (&ip0->dst_address, u32));
1429 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1431 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1432 data_this_buffer = (void *) ip0 + ip_header_length;
1433 if (n_this_buffer + ip_header_length > p0->current_length)
1435 p0->current_length >
1436 ip_header_length ? p0->current_length - ip_header_length : 0;
1439 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1440 n_bytes_left -= n_this_buffer;
1441 if (n_bytes_left == 0)
1444 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1445 p0 = vlib_get_buffer (vm, p0->next_buffer);
1446 data_this_buffer = vlib_buffer_get_current (p0);
1447 n_this_buffer = p0->current_length;
1450 sum16 = ~ip_csum_fold (sum0);
1456 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1458 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1462 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1463 || ip0->protocol == IP_PROTOCOL_UDP);
1465 udp0 = (void *) (ip0 + 1);
1466 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1468 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1469 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1473 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1475 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1476 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1482 VNET_FEATURE_ARC_INIT (ip4_local) =
1484 .arc_name = "ip4-local",
1485 .start_nodes = VNET_FEATURES ("ip4-local"),
1490 ip4_local_inline (vlib_main_t * vm,
1491 vlib_node_runtime_t * node,
1492 vlib_frame_t * frame, int head_of_feature_arc)
1494 ip4_main_t *im = &ip4_main;
1495 ip_lookup_main_t *lm = &im->lookup_main;
1496 ip_local_next_t next_index;
1497 u32 *from, *to_next, n_left_from, n_left_to_next;
1498 vlib_node_runtime_t *error_node =
1499 vlib_node_get_runtime (vm, ip4_input_node.index);
1500 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1502 from = vlib_frame_vector_args (frame);
1503 n_left_from = frame->n_vectors;
1504 next_index = node->cached_next_index;
1506 if (node->flags & VLIB_NODE_FLAG_TRACE)
1507 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1509 while (n_left_from > 0)
1511 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1513 while (n_left_from >= 4 && n_left_to_next >= 2)
1515 vlib_buffer_t *p0, *p1;
1516 ip4_header_t *ip0, *ip1;
1517 udp_header_t *udp0, *udp1;
1518 ip4_fib_mtrie_t *mtrie0, *mtrie1;
1519 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1520 const dpo_id_t *dpo0, *dpo1;
1521 const load_balance_t *lb0, *lb1;
1522 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1523 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1524 i32 len_diff0, len_diff1;
1525 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1526 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1527 u32 sw_if_index0, sw_if_index1;
1529 pi0 = to_next[0] = from[0];
1530 pi1 = to_next[1] = from[1];
1534 n_left_to_next -= 2;
1536 next0 = next1 = IP_LOCAL_NEXT_DROP;
1538 p0 = vlib_get_buffer (vm, pi0);
1539 p1 = vlib_get_buffer (vm, pi1);
1541 ip0 = vlib_buffer_get_current (p0);
1542 ip1 = vlib_buffer_get_current (p1);
1544 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1545 vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1547 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1548 sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1550 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1551 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1553 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1555 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1556 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1558 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1560 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1561 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1563 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1564 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1566 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1569 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1571 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1573 /* Treat IP frag packets as "experimental" protocol for now
1574 until support of IP frag reassembly is implemented */
1575 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1576 proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1578 if (head_of_feature_arc == 0)
1580 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1584 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1585 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1586 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1587 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1592 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1593 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1595 udp0 = ip4_next_header (ip0);
1596 udp1 = ip4_next_header (ip1);
1598 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1599 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1600 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1603 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1605 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1607 /* Verify UDP length. */
1608 ip_len0 = clib_net_to_host_u16 (ip0->length);
1609 ip_len1 = clib_net_to_host_u16 (ip1->length);
1610 udp_len0 = clib_net_to_host_u16 (udp0->length);
1611 udp_len1 = clib_net_to_host_u16 (udp1->length);
1613 len_diff0 = ip_len0 - udp_len0;
1614 len_diff1 = ip_len1 - udp_len1;
1616 len_diff0 = is_udp0 ? len_diff0 : 0;
1617 len_diff1 = is_udp1 ? len_diff1 : 0;
1619 if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1620 & good_tcp_udp0 & good_tcp_udp1)))
1625 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1626 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1628 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1629 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1634 && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1635 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1637 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1638 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1642 good_tcp_udp0 &= len_diff0 >= 0;
1643 good_tcp_udp1 &= len_diff1 >= 0;
1646 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1648 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1650 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1652 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1653 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1655 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1656 error0 = (is_tcp_udp0 && !good_tcp_udp0
1657 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1658 error1 = (is_tcp_udp1 && !good_tcp_udp1
1659 ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1662 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1664 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1667 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1670 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1672 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1673 ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1674 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1676 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1677 ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1678 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1680 lb0 = load_balance_get (lbi0);
1681 lb1 = load_balance_get (lbi1);
1682 dpo0 = load_balance_get_bucket_i (lb0, 0);
1683 dpo1 = load_balance_get_bucket_i (lb1, 0);
1686 * Must have a route to source otherwise we drop the packet.
1687 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1690 * - the source is a recieve => it's from us => bogus, do this
1691 * first since it sets a different error code.
1692 * - uRPF check for any route to source - accept if passes.
1693 * - allow packets destined to the broadcast address from unknown sources
1695 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1696 dpo0->dpoi_type == DPO_RECEIVE) ?
1697 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1698 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1699 !fib_urpf_check_size (lb0->lb_urpf) &&
1700 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1701 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1702 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1703 dpo1->dpoi_type == DPO_RECEIVE) ?
1704 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1705 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1706 !fib_urpf_check_size (lb1->lb_urpf) &&
1707 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1708 ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1710 next0 = lm->local_next_by_ip_protocol[proto0];
1711 next1 = lm->local_next_by_ip_protocol[proto1];
1715 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1717 error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1719 p0->error = error0 ? error_node->errors[error0] : 0;
1720 p1->error = error1 ? error_node->errors[error1] : 0;
1722 if (head_of_feature_arc)
1724 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1725 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1726 if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1727 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1730 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1731 n_left_to_next, pi0, pi1,
1735 while (n_left_from > 0 && n_left_to_next > 0)
1740 ip4_fib_mtrie_t *mtrie0;
1741 ip4_fib_mtrie_leaf_t leaf0;
1742 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1744 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1745 load_balance_t *lb0;
1746 const dpo_id_t *dpo0;
1749 pi0 = to_next[0] = from[0];
1753 n_left_to_next -= 1;
1755 next0 = IP_LOCAL_NEXT_DROP;
1757 p0 = vlib_get_buffer (vm, pi0);
1759 ip0 = vlib_buffer_get_current (p0);
1761 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1763 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1765 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1768 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1769 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1771 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1773 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1776 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1778 /* Treat IP frag packets as "experimental" protocol for now
1779 until support of IP frag reassembly is implemented */
1780 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1782 if (head_of_feature_arc == 0)
1784 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1788 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1789 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1793 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1795 udp0 = ip4_next_header (ip0);
1797 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1798 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1801 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1803 /* Verify UDP length. */
1804 ip_len0 = clib_net_to_host_u16 (ip0->length);
1805 udp_len0 = clib_net_to_host_u16 (udp0->length);
1807 len_diff0 = ip_len0 - udp_len0;
1809 len_diff0 = is_udp0 ? len_diff0 : 0;
1811 if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1816 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1817 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1819 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1820 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1824 good_tcp_udp0 &= len_diff0 >= 0;
1827 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1829 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1831 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1833 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1834 error0 = (is_tcp_udp0 && !good_tcp_udp0
1835 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1838 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1841 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1843 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1844 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1846 lb0 = load_balance_get (lbi0);
1847 dpo0 = load_balance_get_bucket_i (lb0, 0);
1849 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1850 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1852 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1853 dpo0->dpoi_type == DPO_RECEIVE) ?
1854 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1855 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1856 !fib_urpf_check_size (lb0->lb_urpf) &&
1857 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1858 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1862 next0 = lm->local_next_by_ip_protocol[proto0];
1865 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1867 p0->error = error0 ? error_node->errors[error0] : 0;
1869 if (head_of_feature_arc)
1871 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1872 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1875 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1876 n_left_to_next, pi0, next0);
1880 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1883 return frame->n_vectors;
1887 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1889 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1893 VLIB_REGISTER_NODE (ip4_local_node) =
1895 .function = ip4_local,
1896 .name = "ip4-local",
1897 .vector_size = sizeof (u32),
1898 .format_trace = format_ip4_forward_next_trace,
1899 .n_next_nodes = IP_LOCAL_N_NEXT,
1902 [IP_LOCAL_NEXT_DROP] = "error-drop",
1903 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1904 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1905 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",},
1909 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1912 ip4_local_end_of_arc (vlib_main_t * vm,
1913 vlib_node_runtime_t * node, vlib_frame_t * frame)
1915 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1919 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1920 .function = ip4_local_end_of_arc,
1921 .name = "ip4-local-end-of-arc",
1922 .vector_size = sizeof (u32),
1924 .format_trace = format_ip4_forward_next_trace,
1925 .sibling_of = "ip4-local",
1928 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1930 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1931 .arc_name = "ip4-local",
1932 .node_name = "ip4-local-end-of-arc",
1933 .runs_before = 0, /* not before any other features */
1938 ip4_register_protocol (u32 protocol, u32 node_index)
1940 vlib_main_t *vm = vlib_get_main ();
1941 ip4_main_t *im = &ip4_main;
1942 ip_lookup_main_t *lm = &im->lookup_main;
1944 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1945 lm->local_next_by_ip_protocol[protocol] =
1946 vlib_node_add_next (vm, ip4_local_node.index, node_index);
1949 static clib_error_t *
1950 show_ip_local_command_fn (vlib_main_t * vm,
1951 unformat_input_t * input, vlib_cli_command_t * cmd)
1953 ip4_main_t *im = &ip4_main;
1954 ip_lookup_main_t *lm = &im->lookup_main;
1957 vlib_cli_output (vm, "Protocols handled by ip4_local");
1958 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
1960 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1961 vlib_cli_output (vm, "%d", i);
1969 * Display the set of protocols handled by the local IPv4 stack.
1972 * Example of how to display local protocol table:
1973 * @cliexstart{show ip local}
1974 * Protocols handled by ip4_local
1981 VLIB_CLI_COMMAND (show_ip_local, static) =
1983 .path = "show ip local",
1984 .function = show_ip_local_command_fn,
1985 .short_help = "show ip local",
1990 ip4_arp_inline (vlib_main_t * vm,
1991 vlib_node_runtime_t * node,
1992 vlib_frame_t * frame, int is_glean)
1994 vnet_main_t *vnm = vnet_get_main ();
1995 ip4_main_t *im = &ip4_main;
1996 ip_lookup_main_t *lm = &im->lookup_main;
1997 u32 *from, *to_next_drop;
1998 uword n_left_from, n_left_to_next_drop, next_index;
1999 static f64 time_last_seed_change = -1e100;
2000 static u32 hash_seeds[3];
2001 static uword hash_bitmap[256 / BITS (uword)];
2004 if (node->flags & VLIB_NODE_FLAG_TRACE)
2005 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2007 time_now = vlib_time_now (vm);
2008 if (time_now - time_last_seed_change > 1e-3)
2011 u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
2012 sizeof (hash_seeds));
2013 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2014 hash_seeds[i] = r[i];
2016 /* Mark all hash keys as been no-seen before. */
2017 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2020 time_last_seed_change = time_now;
2023 from = vlib_frame_vector_args (frame);
2024 n_left_from = frame->n_vectors;
2025 next_index = node->cached_next_index;
2026 if (next_index == IP4_ARP_NEXT_DROP)
2027 next_index = IP4_ARP_N_NEXT; /* point to first interface */
2029 while (n_left_from > 0)
2031 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2032 to_next_drop, n_left_to_next_drop);
2034 while (n_left_from > 0 && n_left_to_next_drop > 0)
2036 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2037 ip_adjacency_t *adj0;
2044 p0 = vlib_get_buffer (vm, pi0);
2046 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2047 adj0 = ip_get_adjacency (lm, adj_index0);
2048 ip0 = vlib_buffer_get_current (p0);
2054 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2055 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2060 * this is the Glean case, so we are ARPing for the
2061 * packet's destination
2063 a0 ^= ip0->dst_address.data_u32;
2067 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2071 hash_v3_finalize32 (a0, b0, c0);
2073 c0 &= BITS (hash_bitmap) - 1;
2074 c0 = c0 / BITS (uword);
2075 m0 = (uword) 1 << (c0 % BITS (uword));
2077 bm0 = hash_bitmap[c0];
2078 drop0 = (bm0 & m0) != 0;
2080 /* Mark it as seen. */
2081 hash_bitmap[c0] = bm0 | m0;
2085 to_next_drop[0] = pi0;
2087 n_left_to_next_drop -= 1;
2090 node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2091 IP4_ARP_ERROR_REQUEST_SENT];
2094 * the adj has been updated to a rewrite but the node the DPO that got
2095 * us here hasn't - yet. no big deal. we'll drop while we wait.
2097 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2104 * Can happen if the control-plane is programming tables
2105 * with traffic flowing; at least that's today's lame excuse.
2107 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2108 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2110 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2113 /* Send ARP request. */
2117 ethernet_arp_header_t *h0;
2118 vnet_hw_interface_t *hw_if0;
2121 vlib_packet_template_get_packet (vm,
2122 &im->ip4_arp_request_packet_template,
2125 /* Add rewrite/encap string for ARP packet. */
2126 vnet_rewrite_one_header (adj0[0], h0,
2127 sizeof (ethernet_header_t));
2129 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2131 /* Src ethernet address in ARP header. */
2132 clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2134 sizeof (h0->ip4_over_ethernet[0].ethernet));
2138 /* The interface's source address is stashed in the Glean Adj */
2139 h0->ip4_over_ethernet[0].ip4 =
2140 adj0->sub_type.glean.receive_addr.ip4;
2142 /* Copy in destination address we are requesting. This is the
2143 * glean case, so it's the packet's destination.*/
2144 h0->ip4_over_ethernet[1].ip4.data_u32 =
2145 ip0->dst_address.data_u32;
2149 /* Src IP address in ARP header. */
2150 if (ip4_src_address_for_packet (lm, sw_if_index0,
2152 ip4_over_ethernet[0].ip4))
2154 /* No source address available */
2156 node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2157 vlib_buffer_free (vm, &bi0, 1);
2161 /* Copy in destination address we are requesting from the
2163 h0->ip4_over_ethernet[1].ip4.data_u32 =
2164 adj0->sub_type.nbr.next_hop.ip4.as_u32;
2167 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2168 b0 = vlib_get_buffer (vm, bi0);
2169 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2171 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2173 vlib_set_next_frame_buffer (vm, node,
2174 adj0->rewrite_header.next_index,
2179 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2182 return frame->n_vectors;
2186 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2188 return (ip4_arp_inline (vm, node, frame, 0));
2192 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2194 return (ip4_arp_inline (vm, node, frame, 1));
2197 static char *ip4_arp_error_strings[] = {
2198 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2199 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2200 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2201 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2202 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2203 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2206 VLIB_REGISTER_NODE (ip4_arp_node) =
2208 .function = ip4_arp,.name = "ip4-arp",.vector_size =
2209 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2210 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2211 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2213 [IP4_ARP_NEXT_DROP] = "error-drop",}
2216 VLIB_REGISTER_NODE (ip4_glean_node) =
2218 .function = ip4_glean,.name = "ip4-glean",.vector_size =
2219 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2220 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2221 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2223 [IP4_ARP_NEXT_DROP] = "error-drop",}
2226 #define foreach_notrace_ip4_arp_error \
2233 arp_notrace_init (vlib_main_t * vm)
2235 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2237 /* don't trace ARP request packets */
2239 vnet_pcap_drop_trace_filter_add_del \
2240 (rt->errors[IP4_ARP_ERROR_##a], \
2242 foreach_notrace_ip4_arp_error;
2247 VLIB_INIT_FUNCTION (arp_notrace_init);
2250 /* Send an ARP request to see if given destination is reachable on given interface. */
2252 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2254 vnet_main_t *vnm = vnet_get_main ();
2255 ip4_main_t *im = &ip4_main;
2256 ethernet_arp_header_t *h;
2258 ip_interface_address_t *ia;
2259 ip_adjacency_t *adj;
2260 vnet_hw_interface_t *hi;
2261 vnet_sw_interface_t *si;
2265 si = vnet_get_sw_interface (vnm, sw_if_index);
2267 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2269 return clib_error_return (0, "%U: interface %U down",
2270 format_ip4_address, dst,
2271 format_vnet_sw_if_index_name, vnm,
2276 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2279 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2280 return clib_error_return
2282 "no matching interface address for destination %U (interface %U)",
2283 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2287 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2290 vlib_packet_template_get_packet (vm,
2291 &im->ip4_arp_request_packet_template,
2294 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2296 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2297 sizeof (h->ip4_over_ethernet[0].ethernet));
2299 h->ip4_over_ethernet[0].ip4 = src[0];
2300 h->ip4_over_ethernet[1].ip4 = dst[0];
2302 b = vlib_get_buffer (vm, bi);
2303 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2304 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2306 /* Add encapsulation string for software interface (e.g. ethernet header). */
2307 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2308 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2311 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2312 u32 *to_next = vlib_frame_vector_args (f);
2315 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2318 return /* no error */ 0;
2323 IP4_REWRITE_NEXT_DROP,
2324 IP4_REWRITE_NEXT_ICMP_ERROR,
2325 } ip4_rewrite_next_t;
2328 ip4_rewrite_inline (vlib_main_t * vm,
2329 vlib_node_runtime_t * node,
2330 vlib_frame_t * frame, int is_midchain, int is_mcast)
2332 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2333 u32 *from = vlib_frame_vector_args (frame);
2334 u32 n_left_from, n_left_to_next, *to_next, next_index;
2335 vlib_node_runtime_t *error_node =
2336 vlib_node_get_runtime (vm, ip4_input_node.index);
2338 n_left_from = frame->n_vectors;
2339 next_index = node->cached_next_index;
2340 u32 cpu_index = os_get_cpu_number ();
2342 while (n_left_from > 0)
2344 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2346 while (n_left_from >= 4 && n_left_to_next >= 2)
2348 ip_adjacency_t *adj0, *adj1;
2349 vlib_buffer_t *p0, *p1;
2350 ip4_header_t *ip0, *ip1;
2351 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2352 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2353 u32 tx_sw_if_index0, tx_sw_if_index1;
2355 /* Prefetch next iteration. */
2357 vlib_buffer_t *p2, *p3;
2359 p2 = vlib_get_buffer (vm, from[2]);
2360 p3 = vlib_get_buffer (vm, from[3]);
2362 vlib_prefetch_buffer_header (p2, STORE);
2363 vlib_prefetch_buffer_header (p3, STORE);
2365 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2366 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2369 pi0 = to_next[0] = from[0];
2370 pi1 = to_next[1] = from[1];
2375 n_left_to_next -= 2;
2377 p0 = vlib_get_buffer (vm, pi0);
2378 p1 = vlib_get_buffer (vm, pi1);
2380 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2381 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2383 /* We should never rewrite a pkt using the MISS adjacency */
2384 ASSERT (adj_index0 && adj_index1);
2386 ip0 = vlib_buffer_get_current (p0);
2387 ip1 = vlib_buffer_get_current (p1);
2389 error0 = error1 = IP4_ERROR_NONE;
2390 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2392 /* Decrement TTL & update checksum.
2393 Works either endian, so no need for byte swap. */
2394 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2396 i32 ttl0 = ip0->ttl;
2398 /* Input node should have reject packets with ttl 0. */
2399 ASSERT (ip0->ttl > 0);
2401 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2402 checksum0 += checksum0 >= 0xffff;
2404 ip0->checksum = checksum0;
2409 * If the ttl drops below 1 when forwarding, generate
2412 if (PREDICT_FALSE (ttl0 <= 0))
2414 error0 = IP4_ERROR_TIME_EXPIRED;
2415 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2416 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2417 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2419 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2422 /* Verify checksum. */
2423 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2427 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2429 if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2431 i32 ttl1 = ip1->ttl;
2433 /* Input node should have reject packets with ttl 0. */
2434 ASSERT (ip1->ttl > 0);
2436 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2437 checksum1 += checksum1 >= 0xffff;
2439 ip1->checksum = checksum1;
2444 * If the ttl drops below 1 when forwarding, generate
2447 if (PREDICT_FALSE (ttl1 <= 0))
2449 error1 = IP4_ERROR_TIME_EXPIRED;
2450 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2451 icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2452 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2454 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2457 /* Verify checksum. */
2458 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2459 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2463 p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2466 /* Rewrite packet header and updates lengths. */
2467 adj0 = ip_get_adjacency (lm, adj_index0);
2468 adj1 = ip_get_adjacency (lm, adj_index1);
2470 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2471 rw_len0 = adj0[0].rewrite_header.data_bytes;
2472 rw_len1 = adj1[0].rewrite_header.data_bytes;
2473 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2474 vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2476 /* Check MTU of outgoing interface. */
2478 (vlib_buffer_length_in_chain (vm, p0) >
2480 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2483 (vlib_buffer_length_in_chain (vm, p1) >
2485 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2489 * pre-fetch the per-adjacency counters
2491 vlib_prefetch_combined_counter (&adjacency_counters,
2492 cpu_index, adj_index0);
2493 vlib_prefetch_combined_counter (&adjacency_counters,
2494 cpu_index, adj_index1);
2496 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2497 * to see the IP headerr */
2498 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2500 next0 = adj0[0].rewrite_header.next_index;
2501 p0->current_data -= rw_len0;
2502 p0->current_length += rw_len0;
2503 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2504 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2506 vnet_feature_arc_start (lm->output_feature_arc_index,
2507 tx_sw_if_index0, &next0, p0);
2509 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2511 next1 = adj1[0].rewrite_header.next_index;
2512 p1->current_data -= rw_len1;
2513 p1->current_length += rw_len1;
2515 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2516 vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2518 vnet_feature_arc_start (lm->output_feature_arc_index,
2519 tx_sw_if_index1, &next1, p1);
2522 /* Guess we are only writing on simple Ethernet header. */
2523 vnet_rewrite_two_headers (adj0[0], adj1[0],
2524 ip0, ip1, sizeof (ethernet_header_t));
2527 * Bump the per-adjacency counters
2529 vlib_increment_combined_counter
2530 (&adjacency_counters,
2532 adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2534 vlib_increment_combined_counter
2535 (&adjacency_counters,
2537 adj_index1, 1, vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2541 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2542 adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2547 * copy bytes from the IP address into the MAC rewrite
2549 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 1);
2550 vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1, 1);
2553 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2554 to_next, n_left_to_next,
2555 pi0, pi1, next0, next1);
2558 while (n_left_from > 0 && n_left_to_next > 0)
2560 ip_adjacency_t *adj0;
2563 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2564 u32 tx_sw_if_index0;
2566 pi0 = to_next[0] = from[0];
2568 p0 = vlib_get_buffer (vm, pi0);
2570 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2572 /* We should never rewrite a pkt using the MISS adjacency */
2573 ASSERT (adj_index0);
2575 adj0 = ip_get_adjacency (lm, adj_index0);
2577 ip0 = vlib_buffer_get_current (p0);
2579 error0 = IP4_ERROR_NONE;
2580 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2582 /* Decrement TTL & update checksum. */
2583 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2585 i32 ttl0 = ip0->ttl;
2587 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2589 checksum0 += checksum0 >= 0xffff;
2591 ip0->checksum = checksum0;
2593 ASSERT (ip0->ttl > 0);
2599 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2601 if (PREDICT_FALSE (ttl0 <= 0))
2604 * If the ttl drops below 1 when forwarding, generate
2607 error0 = IP4_ERROR_TIME_EXPIRED;
2608 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2609 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2610 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2611 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2617 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2620 vlib_prefetch_combined_counter (&adjacency_counters,
2621 cpu_index, adj_index0);
2623 /* Guess we are only writing on simple Ethernet header. */
2624 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2628 * copy bytes from the IP address into the MAC rewrite
2630 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0, 1);
2633 /* Update packet buffer attributes/set output interface. */
2634 rw_len0 = adj0[0].rewrite_header.data_bytes;
2635 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2637 vlib_increment_combined_counter
2638 (&adjacency_counters,
2640 adj_index0, 1, vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2642 /* Check MTU of outgoing interface. */
2643 error0 = (vlib_buffer_length_in_chain (vm, p0)
2644 > adj0[0].rewrite_header.max_l3_packet_bytes
2645 ? IP4_ERROR_MTU_EXCEEDED : error0);
2647 p0->error = error_node->errors[error0];
2649 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2650 * to see the IP headerr */
2651 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2653 p0->current_data -= rw_len0;
2654 p0->current_length += rw_len0;
2655 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2657 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2658 next0 = adj0[0].rewrite_header.next_index;
2662 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2665 vnet_feature_arc_start (lm->output_feature_arc_index,
2666 tx_sw_if_index0, &next0, p0);
2673 n_left_to_next -= 1;
2675 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2676 to_next, n_left_to_next,
2680 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2683 /* Need to do trace after rewrites to pick up new packet data. */
2684 if (node->flags & VLIB_NODE_FLAG_TRACE)
2685 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2687 return frame->n_vectors;
2691 /** @brief IPv4 rewrite node.
2694 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2695 header checksum, fetch the ip adjacency, check the outbound mtu,
2696 apply the adjacency rewrite, and send pkts to the adjacency
2697 rewrite header's rewrite_next_index.
2699 @param vm vlib_main_t corresponding to the current thread
2700 @param node vlib_node_runtime_t
2701 @param frame vlib_frame_t whose contents should be dispatched
2703 @par Graph mechanics: buffer metadata, next index usage
2706 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2707 - the rewrite adjacency index
2708 - <code>adj->lookup_next_index</code>
2709 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2710 the packet will be dropped.
2711 - <code>adj->rewrite_header</code>
2712 - Rewrite string length, rewrite string, next_index
2715 - <code>b->current_data, b->current_length</code>
2716 - Updated net of applying the rewrite string
2718 <em>Next Indices:</em>
2719 - <code> adj->rewrite_header.next_index </code>
2723 ip4_rewrite (vlib_main_t * vm,
2724 vlib_node_runtime_t * node, vlib_frame_t * frame)
2726 return ip4_rewrite_inline (vm, node, frame, 0, 0);
2730 ip4_midchain (vlib_main_t * vm,
2731 vlib_node_runtime_t * node, vlib_frame_t * frame)
2733 return ip4_rewrite_inline (vm, node, frame, 1, 0);
2737 ip4_rewrite_mcast (vlib_main_t * vm,
2738 vlib_node_runtime_t * node, vlib_frame_t * frame)
2740 return ip4_rewrite_inline (vm, node, frame, 0, 1);
2744 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2745 .function = ip4_rewrite,
2746 .name = "ip4-rewrite",
2747 .vector_size = sizeof (u32),
2749 .format_trace = format_ip4_rewrite_trace,
2753 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2754 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2757 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2759 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2760 .function = ip4_rewrite_mcast,
2761 .name = "ip4-rewrite-mcast",
2762 .vector_size = sizeof (u32),
2764 .format_trace = format_ip4_rewrite_trace,
2765 .sibling_of = "ip4-rewrite",
2767 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2769 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2770 .function = ip4_midchain,
2771 .name = "ip4-midchain",
2772 .vector_size = sizeof (u32),
2773 .format_trace = format_ip4_forward_next_trace,
2774 .sibling_of = "ip4-rewrite",
2776 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2779 static clib_error_t *
2780 add_del_interface_table (vlib_main_t * vm,
2781 unformat_input_t * input, vlib_cli_command_t * cmd)
2783 vnet_main_t *vnm = vnet_get_main ();
2784 clib_error_t *error = 0;
2785 u32 sw_if_index, table_id;
2789 if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2791 error = clib_error_return (0, "unknown interface `%U'",
2792 format_unformat_error, input);
2796 if (unformat (input, "%d", &table_id))
2800 error = clib_error_return (0, "expected table id `%U'",
2801 format_unformat_error, input);
2806 ip4_main_t *im = &ip4_main;
2809 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2814 // changing an interface's table has consequences for any connecteds
2815 // and adj-fibs already installed.
2817 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2818 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2820 fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2822 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
2823 im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
2831 * Place the indicated interface into the supplied IPv4 FIB table (also known
2832 * as a VRF). If the FIB table does not exist, this command creates it. To
2833 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2834 * FIB table will only be displayed if a route has been added to the table, or
2835 * an IP Address is assigned to an interface in the table (which adds a route
2838 * @note IP addresses added after setting the interface IP table end up in
2839 * the indicated FIB table. If the IP address is added prior to adding the
2840 * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2841 * but potentially counter-intuitive results occur if you provision interface
2842 * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2843 * IP table ID provisioned. It might be marginally useful to evade source RPF
2844 * drops to put an interface address into multiple FIBs.
2847 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2848 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2851 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2853 .path = "set interface ip table",
2854 .function = add_del_interface_table,
2855 .short_help = "set interface ip table <interface> <table-id>",
2860 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2862 ip4_fib_mtrie_t *mtrie0;
2863 ip4_fib_mtrie_leaf_t leaf0;
2866 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2868 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2869 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2870 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2871 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2872 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2874 /* Handle default route. */
2875 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2877 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2879 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2882 static clib_error_t *
2883 test_lookup_command_fn (vlib_main_t * vm,
2884 unformat_input_t * input, vlib_cli_command_t * cmd)
2891 ip4_address_t ip4_base_address;
2894 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2896 if (unformat (input, "table %d", &table_id))
2898 /* Make sure the entry exists. */
2899 fib = ip4_fib_get (table_id);
2900 if ((fib) && (fib->index != table_id))
2901 return clib_error_return (0, "<fib-index> %d does not exist",
2904 else if (unformat (input, "count %f", &count))
2907 else if (unformat (input, "%U",
2908 unformat_ip4_address, &ip4_base_address))
2911 return clib_error_return (0, "unknown input `%U'",
2912 format_unformat_error, input);
2917 for (i = 0; i < n; i++)
2919 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2922 ip4_base_address.as_u32 =
2923 clib_host_to_net_u32 (1 +
2924 clib_net_to_host_u32 (ip4_base_address.as_u32));
2928 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2930 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2936 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2937 * given FIB table to determine if there is a conflict with the
2938 * adjacency table. The fib-id can be determined by using the
2939 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2942 * @todo This command uses fib-id, other commands use table-id (not
2943 * just a name, they are different indexes). Would like to change this
2944 * to table-id for consistency.
2947 * Example of how to run the test lookup command:
2948 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2949 * No errors in 2 lookups
2953 VLIB_CLI_COMMAND (lookup_test_command, static) =
2955 .path = "test lookup",
2956 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
2957 .function = test_lookup_command_fn,
2962 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2964 ip4_main_t *im4 = &ip4_main;
2966 uword *p = hash_get (im4->fib_index_by_table_id, table_id);
2969 return VNET_API_ERROR_NO_SUCH_FIB;
2971 fib = ip4_fib_get (p[0]);
2973 fib->flow_hash_config = flow_hash_config;
2977 static clib_error_t *
2978 set_ip_flow_hash_command_fn (vlib_main_t * vm,
2979 unformat_input_t * input,
2980 vlib_cli_command_t * cmd)
2984 u32 flow_hash_config = 0;
2987 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2989 if (unformat (input, "table %d", &table_id))
2992 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
2993 foreach_flow_hash_bit
3000 return clib_error_return (0, "unknown input `%U'",
3001 format_unformat_error, input);
3003 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3009 case VNET_API_ERROR_NO_SUCH_FIB:
3010 return clib_error_return (0, "no such FIB table %d", table_id);
3013 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3021 * Configure the set of IPv4 fields used by the flow hash.
3024 * Example of how to set the flow hash on a given table:
3025 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3026 * Example of display the configured flow hash:
3027 * @cliexstart{show ip fib}
3028 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3031 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3032 * [0] [@0]: dpo-drop ip6
3035 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3036 * [0] [@0]: dpo-drop ip6
3039 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3040 * [0] [@0]: dpo-drop ip6
3043 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3044 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3047 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3048 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3049 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3050 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3051 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3054 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3055 * [0] [@0]: dpo-drop ip6
3056 * 255.255.255.255/32
3058 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3059 * [0] [@0]: dpo-drop ip6
3060 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3063 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3064 * [0] [@0]: dpo-drop ip6
3067 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3068 * [0] [@0]: dpo-drop ip6
3071 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3072 * [0] [@4]: ipv4-glean: af_packet0
3075 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3076 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3079 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3080 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3083 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3084 * [0] [@4]: ipv4-glean: af_packet1
3087 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3088 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3091 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3092 * [0] [@0]: dpo-drop ip6
3095 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3096 * [0] [@0]: dpo-drop ip6
3097 * 255.255.255.255/32
3099 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3100 * [0] [@0]: dpo-drop ip6
3104 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3106 .path = "set ip flow-hash",
3108 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3109 .function = set_ip_flow_hash_command_fn,
3114 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3117 vnet_main_t *vnm = vnet_get_main ();
3118 vnet_interface_main_t *im = &vnm->interface_main;
3119 ip4_main_t *ipm = &ip4_main;
3120 ip_lookup_main_t *lm = &ipm->lookup_main;
3121 vnet_classify_main_t *cm = &vnet_classify_main;
3122 ip4_address_t *if_addr;
3124 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3125 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3127 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3128 return VNET_API_ERROR_NO_SUCH_ENTRY;
3130 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3131 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3133 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3135 if (NULL != if_addr)
3137 fib_prefix_t pfx = {
3139 .fp_proto = FIB_PROTOCOL_IP4,
3140 .fp_addr.ip4 = *if_addr,
3144 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3148 if (table_index != (u32) ~ 0)
3150 dpo_id_t dpo = DPO_INVALID;
3155 classify_dpo_create (DPO_PROTO_IP4, table_index));
3157 fib_table_entry_special_dpo_add (fib_index,
3159 FIB_SOURCE_CLASSIFY,
3160 FIB_ENTRY_FLAG_NONE, &dpo);
3165 fib_table_entry_special_remove (fib_index,
3166 &pfx, FIB_SOURCE_CLASSIFY);
3173 static clib_error_t *
3174 set_ip_classify_command_fn (vlib_main_t * vm,
3175 unformat_input_t * input,
3176 vlib_cli_command_t * cmd)
3178 u32 table_index = ~0;
3179 int table_index_set = 0;
3180 u32 sw_if_index = ~0;
3183 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3185 if (unformat (input, "table-index %d", &table_index))
3186 table_index_set = 1;
3187 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3188 vnet_get_main (), &sw_if_index))
3194 if (table_index_set == 0)
3195 return clib_error_return (0, "classify table-index must be specified");
3197 if (sw_if_index == ~0)
3198 return clib_error_return (0, "interface / subif must be specified");
3200 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3207 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3208 return clib_error_return (0, "No such interface");
3210 case VNET_API_ERROR_NO_SUCH_ENTRY:
3211 return clib_error_return (0, "No such classifier table");
3217 * Assign a classification table to an interface. The classification
3218 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3219 * commands. Once the table is create, use this command to filter packets
3223 * Example of how to assign a classification table to an interface:
3224 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3227 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3229 .path = "set ip classify",
3231 "set ip classify intfc <interface> table-index <classify-idx>",
3232 .function = set_ip_classify_command_fn,
3237 * fd.io coding-style-patch-verification: ON
3240 * eval: (c-set-style "gnu")