2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
53 #include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
57 * @brief IPv4 Forwarding.
59 * This file contains the source code for IPv4 forwarding.
63 ip4_forward_next_trace (vlib_main_t * vm,
64 vlib_node_runtime_t * node,
66 vlib_rx_or_tx_t which_adj_index);
69 ip4_lookup_inline (vlib_main_t * vm,
70 vlib_node_runtime_t * node,
72 int lookup_for_responses_to_locally_received_packets)
74 ip4_main_t *im = &ip4_main;
75 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters;
76 u32 n_left_from, n_left_to_next, *from, *to_next;
77 ip_lookup_next_t next;
78 u32 cpu_index = os_get_cpu_number ();
80 from = vlib_frame_vector_args (frame);
81 n_left_from = frame->n_vectors;
82 next = node->cached_next_index;
84 while (n_left_from > 0)
86 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
88 while (n_left_from >= 8 && n_left_to_next >= 4)
90 vlib_buffer_t *p0, *p1, *p2, *p3;
91 ip4_header_t *ip0, *ip1, *ip2, *ip3;
92 __attribute__ ((unused)) tcp_header_t *tcp0, *tcp1, *tcp2, *tcp3;
93 ip_lookup_next_t next0, next1, next2, next3;
94 const load_balance_t *lb0, *lb1, *lb2, *lb3;
95 ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
96 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97 ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98 __attribute__ ((unused)) u32 pi0, fib_index0, lb_index0,
100 __attribute__ ((unused)) u32 pi1, fib_index1, lb_index1,
102 __attribute__ ((unused)) u32 pi2, fib_index2, lb_index2,
104 __attribute__ ((unused)) u32 pi3, fib_index3, lb_index3,
106 flow_hash_config_t flow_hash_config0, flow_hash_config1;
107 flow_hash_config_t flow_hash_config2, flow_hash_config3;
108 u32 hash_c0, hash_c1, hash_c2, hash_c3;
109 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
111 /* Prefetch next iteration. */
113 vlib_buffer_t *p4, *p5, *p6, *p7;
115 p4 = vlib_get_buffer (vm, from[4]);
116 p5 = vlib_get_buffer (vm, from[5]);
117 p6 = vlib_get_buffer (vm, from[6]);
118 p7 = vlib_get_buffer (vm, from[7]);
120 vlib_prefetch_buffer_header (p4, LOAD);
121 vlib_prefetch_buffer_header (p5, LOAD);
122 vlib_prefetch_buffer_header (p6, LOAD);
123 vlib_prefetch_buffer_header (p7, LOAD);
125 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
126 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
127 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
128 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
131 pi0 = to_next[0] = from[0];
132 pi1 = to_next[1] = from[1];
133 pi2 = to_next[2] = from[2];
134 pi3 = to_next[3] = from[3];
141 p0 = vlib_get_buffer (vm, pi0);
142 p1 = vlib_get_buffer (vm, pi1);
143 p2 = vlib_get_buffer (vm, pi2);
144 p3 = vlib_get_buffer (vm, pi3);
146 ip0 = vlib_buffer_get_current (p0);
147 ip1 = vlib_buffer_get_current (p1);
148 ip2 = vlib_buffer_get_current (p2);
149 ip3 = vlib_buffer_get_current (p3);
151 dst_addr0 = &ip0->dst_address;
152 dst_addr1 = &ip1->dst_address;
153 dst_addr2 = &ip2->dst_address;
154 dst_addr3 = &ip3->dst_address;
157 vec_elt (im->fib_index_by_sw_if_index,
158 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
160 vec_elt (im->fib_index_by_sw_if_index,
161 vnet_buffer (p1)->sw_if_index[VLIB_RX]);
163 vec_elt (im->fib_index_by_sw_if_index,
164 vnet_buffer (p2)->sw_if_index[VLIB_RX]);
166 vec_elt (im->fib_index_by_sw_if_index,
167 vnet_buffer (p3)->sw_if_index[VLIB_RX]);
169 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
170 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
172 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
173 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
175 (vnet_buffer (p2)->sw_if_index[VLIB_TX] ==
176 (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX];
178 (vnet_buffer (p3)->sw_if_index[VLIB_TX] ==
179 (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX];
182 if (!lookup_for_responses_to_locally_received_packets)
184 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
185 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
186 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
187 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
189 leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
191 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
192 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
193 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
194 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
197 tcp0 = (void *) (ip0 + 1);
198 tcp1 = (void *) (ip1 + 1);
199 tcp2 = (void *) (ip2 + 1);
200 tcp3 = (void *) (ip3 + 1);
202 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
203 || ip0->protocol == IP_PROTOCOL_UDP);
204 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
205 || ip1->protocol == IP_PROTOCOL_UDP);
206 is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
207 || ip2->protocol == IP_PROTOCOL_UDP);
208 is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
209 || ip1->protocol == IP_PROTOCOL_UDP);
211 if (!lookup_for_responses_to_locally_received_packets)
213 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
214 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
215 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
216 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
219 if (!lookup_for_responses_to_locally_received_packets)
221 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
222 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
223 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
224 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
227 if (!lookup_for_responses_to_locally_received_packets)
229 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
230 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
231 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
232 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
235 if (lookup_for_responses_to_locally_received_packets)
237 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
238 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
239 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
240 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
244 /* Handle default route. */
247 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
250 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
253 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
256 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
257 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
258 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
259 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
260 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
263 lb0 = load_balance_get (lb_index0);
264 lb1 = load_balance_get (lb_index1);
265 lb2 = load_balance_get (lb_index2);
266 lb3 = load_balance_get (lb_index3);
268 /* Use flow hash to compute multipath adjacency. */
269 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
270 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
271 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
272 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
273 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
275 flow_hash_config0 = lb0->lb_hash_config;
276 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
277 ip4_compute_flow_hash (ip0, flow_hash_config0);
279 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
281 flow_hash_config1 = lb1->lb_hash_config;
282 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
283 ip4_compute_flow_hash (ip1, flow_hash_config1);
285 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
287 flow_hash_config2 = lb2->lb_hash_config;
288 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
289 ip4_compute_flow_hash (ip2, flow_hash_config2);
291 if (PREDICT_FALSE (lb3->lb_n_buckets > 1))
293 flow_hash_config3 = lb3->lb_hash_config;
294 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
295 ip4_compute_flow_hash (ip3, flow_hash_config3);
298 ASSERT (lb0->lb_n_buckets > 0);
299 ASSERT (is_pow2 (lb0->lb_n_buckets));
300 ASSERT (lb1->lb_n_buckets > 0);
301 ASSERT (is_pow2 (lb1->lb_n_buckets));
302 ASSERT (lb2->lb_n_buckets > 0);
303 ASSERT (is_pow2 (lb2->lb_n_buckets));
304 ASSERT (lb3->lb_n_buckets > 0);
305 ASSERT (is_pow2 (lb3->lb_n_buckets));
307 dpo0 = load_balance_get_bucket_i (lb0,
309 (lb0->lb_n_buckets_minus_1)));
310 dpo1 = load_balance_get_bucket_i (lb1,
312 (lb1->lb_n_buckets_minus_1)));
313 dpo2 = load_balance_get_bucket_i (lb2,
315 (lb2->lb_n_buckets_minus_1)));
316 dpo3 = load_balance_get_bucket_i (lb3,
318 (lb3->lb_n_buckets_minus_1)));
320 next0 = dpo0->dpoi_next_node;
321 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
322 next1 = dpo1->dpoi_next_node;
323 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
324 next2 = dpo2->dpoi_next_node;
325 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
326 next3 = dpo3->dpoi_next_node;
327 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
329 vlib_increment_combined_counter
330 (cm, cpu_index, lb_index0, 1,
331 vlib_buffer_length_in_chain (vm, p0)
332 + sizeof (ethernet_header_t));
333 vlib_increment_combined_counter
334 (cm, cpu_index, lb_index1, 1,
335 vlib_buffer_length_in_chain (vm, p1)
336 + sizeof (ethernet_header_t));
337 vlib_increment_combined_counter
338 (cm, cpu_index, lb_index2, 1,
339 vlib_buffer_length_in_chain (vm, p2)
340 + sizeof (ethernet_header_t));
341 vlib_increment_combined_counter
342 (cm, cpu_index, lb_index3, 1,
343 vlib_buffer_length_in_chain (vm, p3)
344 + sizeof (ethernet_header_t));
346 vlib_validate_buffer_enqueue_x4 (vm, node, next,
347 to_next, n_left_to_next,
349 next0, next1, next2, next3);
352 while (n_left_from > 0 && n_left_to_next > 0)
356 __attribute__ ((unused)) tcp_header_t *tcp0;
357 ip_lookup_next_t next0;
358 const load_balance_t *lb0;
359 ip4_fib_mtrie_t *mtrie0;
360 ip4_fib_mtrie_leaf_t leaf0;
361 ip4_address_t *dst_addr0;
362 __attribute__ ((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
363 flow_hash_config_t flow_hash_config0;
364 const dpo_id_t *dpo0;
370 p0 = vlib_get_buffer (vm, pi0);
372 ip0 = vlib_buffer_get_current (p0);
374 dst_addr0 = &ip0->dst_address;
377 vec_elt (im->fib_index_by_sw_if_index,
378 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
380 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
381 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
383 if (!lookup_for_responses_to_locally_received_packets)
385 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
387 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
389 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
392 tcp0 = (void *) (ip0 + 1);
394 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
395 || ip0->protocol == IP_PROTOCOL_UDP);
397 if (!lookup_for_responses_to_locally_received_packets)
398 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
400 if (!lookup_for_responses_to_locally_received_packets)
401 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
403 if (!lookup_for_responses_to_locally_received_packets)
404 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
406 if (lookup_for_responses_to_locally_received_packets)
407 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
410 /* Handle default route. */
413 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
414 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
417 lb0 = load_balance_get (lbi0);
419 /* Use flow hash to compute multipath adjacency. */
420 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
421 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
423 flow_hash_config0 = lb0->lb_hash_config;
425 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
426 ip4_compute_flow_hash (ip0, flow_hash_config0);
429 ASSERT (lb0->lb_n_buckets > 0);
430 ASSERT (is_pow2 (lb0->lb_n_buckets));
432 dpo0 = load_balance_get_bucket_i (lb0,
434 (lb0->lb_n_buckets_minus_1)));
436 next0 = dpo0->dpoi_next_node;
437 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
439 vlib_increment_combined_counter
440 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
447 if (PREDICT_FALSE (next0 != next))
450 vlib_put_next_frame (vm, node, next, n_left_to_next);
452 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
459 vlib_put_next_frame (vm, node, next, n_left_to_next);
462 if (node->flags & VLIB_NODE_FLAG_TRACE)
463 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
465 return frame->n_vectors;
468 /** @brief IPv4 lookup node.
471 This is the main IPv4 lookup dispatch node.
473 @param vm vlib_main_t corresponding to the current thread
474 @param node vlib_node_runtime_t
475 @param frame vlib_frame_t whose contents should be dispatched
477 @par Graph mechanics: buffer metadata, next index usage
480 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
481 - Indicates the @c sw_if_index value of the interface that the
482 packet was received on.
483 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
484 - When the value is @c ~0 then the node performs a longest prefix
485 match (LPM) for the packet destination address in the FIB attached
486 to the receive interface.
487 - Otherwise perform LPM for the packet destination address in the
488 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
489 value (0, 1, ...) and not a VRF id.
492 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
493 - The lookup result adjacency index.
496 - Dispatches the packet to the node index found in
497 ip_adjacency_t @c adj->lookup_next_index
498 (where @c adj is the lookup result adjacency).
501 ip4_lookup (vlib_main_t * vm,
502 vlib_node_runtime_t * node, vlib_frame_t * frame)
504 return ip4_lookup_inline (vm, node, frame,
505 /* lookup_for_responses_to_locally_received_packets */
510 static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
512 VLIB_REGISTER_NODE (ip4_lookup_node) =
514 .function = ip4_lookup,.name = "ip4-lookup",.vector_size =
515 sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes =
516 IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,};
518 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup);
521 ip4_load_balance (vlib_main_t * vm,
522 vlib_node_runtime_t * node, vlib_frame_t * frame)
524 vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters;
525 u32 n_left_from, n_left_to_next, *from, *to_next;
526 ip_lookup_next_t next;
527 u32 cpu_index = os_get_cpu_number ();
529 from = vlib_frame_vector_args (frame);
530 n_left_from = frame->n_vectors;
531 next = node->cached_next_index;
533 if (node->flags & VLIB_NODE_FLAG_TRACE)
534 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
536 while (n_left_from > 0)
538 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
541 while (n_left_from >= 4 && n_left_to_next >= 2)
543 ip_lookup_next_t next0, next1;
544 const load_balance_t *lb0, *lb1;
545 vlib_buffer_t *p0, *p1;
546 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
547 const ip4_header_t *ip0, *ip1;
548 const dpo_id_t *dpo0, *dpo1;
550 /* Prefetch next iteration. */
552 vlib_buffer_t *p2, *p3;
554 p2 = vlib_get_buffer (vm, from[2]);
555 p3 = vlib_get_buffer (vm, from[3]);
557 vlib_prefetch_buffer_header (p2, STORE);
558 vlib_prefetch_buffer_header (p3, STORE);
560 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
561 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
564 pi0 = to_next[0] = from[0];
565 pi1 = to_next[1] = from[1];
572 p0 = vlib_get_buffer (vm, pi0);
573 p1 = vlib_get_buffer (vm, pi1);
575 ip0 = vlib_buffer_get_current (p0);
576 ip1 = vlib_buffer_get_current (p1);
577 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
578 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
580 lb0 = load_balance_get (lbi0);
581 lb1 = load_balance_get (lbi1);
584 * this node is for via FIBs we can re-use the hash value from the
585 * to node if present.
586 * We don't want to use the same hash value at each level in the recursion
587 * graph as that would lead to polarisation
591 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
593 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
595 hc0 = vnet_buffer (p0)->ip.flow_hash =
596 vnet_buffer (p0)->ip.flow_hash >> 1;
600 hc0 = vnet_buffer (p0)->ip.flow_hash =
601 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
604 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
606 if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
608 hc1 = vnet_buffer (p1)->ip.flow_hash =
609 vnet_buffer (p1)->ip.flow_hash >> 1;
613 hc1 = vnet_buffer (p1)->ip.flow_hash =
614 ip4_compute_flow_hash (ip1, lb1->lb_hash_config);
619 load_balance_get_bucket_i (lb0,
620 hc0 & (lb0->lb_n_buckets_minus_1));
622 load_balance_get_bucket_i (lb1,
623 hc1 & (lb1->lb_n_buckets_minus_1));
625 next0 = dpo0->dpoi_next_node;
626 next1 = dpo1->dpoi_next_node;
628 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
629 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
631 vlib_increment_combined_counter
632 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
633 vlib_increment_combined_counter
634 (cm, cpu_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
636 vlib_validate_buffer_enqueue_x2 (vm, node, next,
637 to_next, n_left_to_next,
638 pi0, pi1, next0, next1);
641 while (n_left_from > 0 && n_left_to_next > 0)
643 ip_lookup_next_t next0;
644 const load_balance_t *lb0;
647 const ip4_header_t *ip0;
648 const dpo_id_t *dpo0;
657 p0 = vlib_get_buffer (vm, pi0);
659 ip0 = vlib_buffer_get_current (p0);
660 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
662 lb0 = load_balance_get (lbi0);
665 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
667 if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
669 hc0 = vnet_buffer (p0)->ip.flow_hash =
670 vnet_buffer (p0)->ip.flow_hash >> 1;
674 hc0 = vnet_buffer (p0)->ip.flow_hash =
675 ip4_compute_flow_hash (ip0, lb0->lb_hash_config);
680 load_balance_get_bucket_i (lb0,
681 hc0 & (lb0->lb_n_buckets_minus_1));
683 next0 = dpo0->dpoi_next_node;
684 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
686 vlib_increment_combined_counter
687 (cm, cpu_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
689 vlib_validate_buffer_enqueue_x1 (vm, node, next,
690 to_next, n_left_to_next,
694 vlib_put_next_frame (vm, node, next, n_left_to_next);
697 return frame->n_vectors;
700 VLIB_REGISTER_NODE (ip4_load_balance_node) =
702 .function = ip4_load_balance,.name = "ip4-load-balance",.vector_size =
703 sizeof (u32),.sibling_of = "ip4-lookup",.format_trace =
704 format_ip4_lookup_trace,};
706 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance);
708 /* get first interface address */
710 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
711 ip_interface_address_t ** result_ia)
713 ip_lookup_main_t *lm = &im->lookup_main;
714 ip_interface_address_t *ia = 0;
715 ip4_address_t *result = 0;
718 foreach_ip_interface_address
719 (lm, ia, sw_if_index,
720 1 /* honor unnumbered */ ,
723 ip_interface_address_get_address (lm, ia);
729 *result_ia = result ? ia : 0;
734 ip4_add_interface_routes (u32 sw_if_index,
735 ip4_main_t * im, u32 fib_index,
736 ip_interface_address_t * a)
738 ip_lookup_main_t *lm = &im->lookup_main;
739 ip4_address_t *address = ip_interface_address_get_address (lm, a);
741 .fp_len = a->address_length,
742 .fp_proto = FIB_PROTOCOL_IP4,
743 .fp_addr.ip4 = *address,
746 a->neighbor_probe_adj_index = ~0;
748 if (pfx.fp_len <= 30)
750 /* a /30 or shorter - add a glean for the network address */
751 fib_node_index_t fei;
753 fei = fib_table_entry_update_one_path (fib_index, &pfx,
754 FIB_SOURCE_INTERFACE,
755 (FIB_ENTRY_FLAG_CONNECTED |
756 FIB_ENTRY_FLAG_ATTACHED),
758 /* No next-hop address */
764 // no out-label stack
766 FIB_ROUTE_PATH_FLAG_NONE);
767 a->neighbor_probe_adj_index = fib_entry_get_adj (fei);
769 /* Add the two broadcast addresses as drop */
770 fib_prefix_t net_pfx = {
772 .fp_proto = FIB_PROTOCOL_IP4,
773 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
775 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
776 fib_table_entry_special_add(fib_index,
778 FIB_SOURCE_INTERFACE,
779 (FIB_ENTRY_FLAG_DROP |
780 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT),
782 net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
783 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
784 fib_table_entry_special_add(fib_index,
786 FIB_SOURCE_INTERFACE,
787 (FIB_ENTRY_FLAG_DROP |
788 FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT),
791 else if (pfx.fp_len == 31)
793 u32 mask = clib_host_to_net_u32(1);
794 fib_prefix_t net_pfx = pfx;
797 net_pfx.fp_addr.ip4.as_u32 ^= mask;
799 /* a /31 - add the other end as an attached host */
800 fib_table_entry_update_one_path (fib_index, &net_pfx,
801 FIB_SOURCE_INTERFACE,
802 (FIB_ENTRY_FLAG_ATTACHED),
810 FIB_ROUTE_PATH_FLAG_NONE);
814 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
816 u32 classify_table_index =
817 lm->classify_table_index_by_sw_if_index[sw_if_index];
818 if (classify_table_index != (u32) ~ 0)
820 dpo_id_t dpo = DPO_INVALID;
825 classify_dpo_create (DPO_PROTO_IP4, classify_table_index));
827 fib_table_entry_special_dpo_add (fib_index,
830 FIB_ENTRY_FLAG_NONE, &dpo);
835 fib_table_entry_update_one_path (fib_index, &pfx,
836 FIB_SOURCE_INTERFACE,
837 (FIB_ENTRY_FLAG_CONNECTED |
838 FIB_ENTRY_FLAG_LOCAL),
845 FIB_ROUTE_PATH_FLAG_NONE);
849 ip4_del_interface_routes (ip4_main_t * im,
851 ip4_address_t * address, u32 address_length)
854 .fp_len = address_length,
855 .fp_proto = FIB_PROTOCOL_IP4,
856 .fp_addr.ip4 = *address,
859 if (pfx.fp_len <= 30)
861 fib_prefix_t net_pfx = {
863 .fp_proto = FIB_PROTOCOL_IP4,
864 .fp_addr.ip4.as_u32 = address->as_u32 & im->fib_masks[pfx.fp_len],
866 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
867 fib_table_entry_special_remove(fib_index,
869 FIB_SOURCE_INTERFACE);
870 net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len];
871 if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32)
872 fib_table_entry_special_remove(fib_index,
874 FIB_SOURCE_INTERFACE);
875 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
877 else if (pfx.fp_len == 31)
879 u32 mask = clib_host_to_net_u32(1);
880 fib_prefix_t net_pfx = pfx;
883 net_pfx.fp_addr.ip4.as_u32 ^= mask;
885 fib_table_entry_delete (fib_index, &net_pfx, FIB_SOURCE_INTERFACE);
889 fib_table_entry_delete (fib_index, &pfx, FIB_SOURCE_INTERFACE);
893 ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable)
895 ip4_main_t *im = &ip4_main;
897 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
900 * enable/disable only on the 1<->0 transition
904 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
909 ASSERT (im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
910 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
913 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
917 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop",
918 sw_if_index, !is_enable, 0, 0);
921 static clib_error_t *
922 ip4_add_del_interface_address_internal (vlib_main_t * vm,
924 ip4_address_t * address,
925 u32 address_length, u32 is_del)
927 vnet_main_t *vnm = vnet_get_main ();
928 ip4_main_t *im = &ip4_main;
929 ip_lookup_main_t *lm = &im->lookup_main;
930 clib_error_t *error = 0;
931 u32 if_address_index, elts_before;
932 ip4_address_fib_t ip4_af, *addr_fib = 0;
934 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
935 ip4_addr_fib_init (&ip4_af, address,
936 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
937 vec_add1 (addr_fib, ip4_af);
940 * there is no support for adj-fib handling in the presence of overlapping
941 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
947 /* When adding an address check that it does not conflict
948 with an existing address. */
949 ip_interface_address_t *ia;
950 foreach_ip_interface_address
951 (&im->lookup_main, ia, sw_if_index,
952 0 /* honor unnumbered */ ,
955 ip_interface_address_get_address
956 (&im->lookup_main, ia);
957 if (ip4_destination_matches_route
958 (im, address, x, ia->address_length) ||
959 ip4_destination_matches_route (im,
965 ("failed to add %U which conflicts with %U for interface %U",
966 format_ip4_address_and_length, address,
968 format_ip4_address_and_length, x,
970 format_vnet_sw_if_index_name, vnm,
976 elts_before = pool_elts (lm->if_address_pool);
978 error = ip_interface_address_add_del
979 (lm, sw_if_index, addr_fib, address_length, is_del, &if_address_index);
983 ip4_sw_interface_enable_disable (sw_if_index, !is_del);
986 ip4_del_interface_routes (im, ip4_af.fib_index, address, address_length);
988 ip4_add_interface_routes (sw_if_index,
989 im, ip4_af.fib_index,
991 (lm->if_address_pool, if_address_index));
993 /* If pool did not grow/shrink: add duplicate address. */
994 if (elts_before != pool_elts (lm->if_address_pool))
996 ip4_add_del_interface_address_callback_t *cb;
997 vec_foreach (cb, im->add_del_interface_address_callbacks)
998 cb->function (im, cb->function_opaque, sw_if_index,
999 address, address_length, if_address_index, is_del);
1003 vec_free (addr_fib);
1008 ip4_add_del_interface_address (vlib_main_t * vm,
1010 ip4_address_t * address,
1011 u32 address_length, u32 is_del)
1013 return ip4_add_del_interface_address_internal
1014 (vm, sw_if_index, address, address_length, is_del);
1017 /* Built-in ip4 unicast rx feature path definition */
1019 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
1021 .arc_name = "ip4-unicast",
1022 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1023 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
1026 VNET_FEATURE_INIT (ip4_flow_classify, static) =
1028 .arc_name = "ip4-unicast",
1029 .node_name = "ip4-flow-classify",
1030 .runs_before = VNET_FEATURES ("ip4-inacl"),
1033 VNET_FEATURE_INIT (ip4_inacl, static) =
1035 .arc_name = "ip4-unicast",
1036 .node_name = "ip4-inacl",
1037 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
1040 VNET_FEATURE_INIT (ip4_source_check_1, static) =
1042 .arc_name = "ip4-unicast",
1043 .node_name = "ip4-source-check-via-rx",
1044 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
1047 VNET_FEATURE_INIT (ip4_source_check_2, static) =
1049 .arc_name = "ip4-unicast",
1050 .node_name = "ip4-source-check-via-any",
1051 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1054 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) =
1056 .arc_name = "ip4-unicast",
1057 .node_name = "ip4-source-and-port-range-check-rx",
1058 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
1061 VNET_FEATURE_INIT (ip4_policer_classify, static) =
1063 .arc_name = "ip4-unicast",
1064 .node_name = "ip4-policer-classify",
1065 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
1068 VNET_FEATURE_INIT (ip4_ipsec, static) =
1070 .arc_name = "ip4-unicast",
1071 .node_name = "ipsec-input-ip4",
1072 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
1075 VNET_FEATURE_INIT (ip4_vpath, static) =
1077 .arc_name = "ip4-unicast",
1078 .node_name = "vpath-input-ip4",
1079 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
1082 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) =
1084 .arc_name = "ip4-unicast",
1085 .node_name = "ip4-vxlan-bypass",
1086 .runs_before = VNET_FEATURES ("ip4-lookup"),
1089 VNET_FEATURE_INIT (ip4_drop, static) =
1091 .arc_name = "ip4-unicast",
1092 .node_name = "ip4-drop",
1093 .runs_before = VNET_FEATURES ("ip4-lookup"),
1096 VNET_FEATURE_INIT (ip4_lookup, static) =
1098 .arc_name = "ip4-unicast",
1099 .node_name = "ip4-lookup",
1100 .runs_before = 0, /* not before any other features */
1103 /* Built-in ip4 multicast rx feature path definition */
1104 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1106 .arc_name = "ip4-multicast",
1107 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1108 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1111 VNET_FEATURE_INIT (ip4_vpath_mc, static) =
1113 .arc_name = "ip4-multicast",
1114 .node_name = "vpath-input-ip4",
1115 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1118 VNET_FEATURE_INIT (ip4_mc_drop, static) =
1120 .arc_name = "ip4-multicast",
1121 .node_name = "ip4-drop",
1122 .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
1125 VNET_FEATURE_INIT (ip4_lookup_mc, static) =
1127 .arc_name = "ip4-multicast",
1128 .node_name = "ip4-mfib-forward-lookup",
1129 .runs_before = 0, /* last feature */
1132 /* Source and port-range check ip4 tx feature path definition */
1133 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1135 .arc_name = "ip4-output",
1136 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1137 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1140 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) =
1142 .arc_name = "ip4-output",
1143 .node_name = "ip4-source-and-port-range-check-tx",
1144 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1147 VNET_FEATURE_INIT (ip4_ipsec_output, static) =
1149 .arc_name = "ip4-output",
1150 .node_name = "ipsec-output-ip4",
1151 .runs_before = VNET_FEATURES ("interface-output"),
1154 /* Built-in ip4 tx feature path definition */
1155 VNET_FEATURE_INIT (ip4_interface_output, static) =
1157 .arc_name = "ip4-output",
1158 .node_name = "interface-output",
1159 .runs_before = 0, /* not before any other features */
1163 static clib_error_t *
1164 ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
1166 ip4_main_t *im = &ip4_main;
1168 /* Fill in lookup tables with default table (0). */
1169 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1170 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
1172 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1175 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1178 return /* no error */ 0;
1181 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1183 /* Global IP4 main. */
1184 ip4_main_t ip4_main;
1187 ip4_lookup_init (vlib_main_t * vm)
1189 ip4_main_t *im = &ip4_main;
1190 clib_error_t *error;
1193 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1196 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1201 m = pow2_mask (i) << (32 - i);
1204 im->fib_masks[i] = clib_host_to_net_u32 (m);
1207 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1209 /* Create FIB with index 0 and table id of 0. */
1210 fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1211 mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0);
1215 pn = pg_get_node (ip4_lookup_node.index);
1216 pn->unformat_edit = unformat_pg_ip4_header;
1220 ethernet_arp_header_t h;
1222 memset (&h, 0, sizeof (h));
1224 /* Set target ethernet address to all zeros. */
1225 memset (h.ip4_over_ethernet[1].ethernet, 0,
1226 sizeof (h.ip4_over_ethernet[1].ethernet));
1228 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1229 #define _8(f,v) h.f = v;
1230 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1231 _16 (l3_type, ETHERNET_TYPE_IP4);
1232 _8 (n_l2_address_bytes, 6);
1233 _8 (n_l3_address_bytes, 4);
1234 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1238 vlib_packet_template_init (vm, &im->ip4_arp_request_packet_template,
1241 /* alloc chunk size */ 8,
1248 VLIB_INIT_FUNCTION (ip4_lookup_init);
1252 /* Adjacency taken. */
1257 /* Packet data, possibly *after* rewrite. */
1258 u8 packet_data[64 - 1 * sizeof (u32)];
1260 ip4_forward_next_trace_t;
1263 format_ip4_forward_next_trace (u8 * s, va_list * args)
1265 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1266 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1267 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1268 uword indent = format_get_indent (s);
1269 s = format (s, "%U%U",
1270 format_white_space, indent,
1271 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1276 format_ip4_lookup_trace (u8 * s, va_list * args)
1278 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1279 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1280 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1281 uword indent = format_get_indent (s);
1283 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1284 t->fib_index, t->dpo_index, t->flow_hash);
1285 s = format (s, "\n%U%U",
1286 format_white_space, indent,
1287 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1292 format_ip4_rewrite_trace (u8 * s, va_list * args)
1294 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1295 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1296 ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
1297 uword indent = format_get_indent (s);
1299 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1300 t->fib_index, t->dpo_index, format_ip_adjacency,
1301 t->dpo_index, FORMAT_IP_ADJACENCY_NONE, t->flow_hash);
1302 s = format (s, "\n%U%U",
1303 format_white_space, indent,
1304 format_ip_adjacency_packet_data,
1305 t->dpo_index, t->packet_data, sizeof (t->packet_data));
1309 /* Common trace function for all ip4-forward next nodes. */
1311 ip4_forward_next_trace (vlib_main_t * vm,
1312 vlib_node_runtime_t * node,
1313 vlib_frame_t * frame, vlib_rx_or_tx_t which_adj_index)
1316 ip4_main_t *im = &ip4_main;
1318 n_left = frame->n_vectors;
1319 from = vlib_frame_vector_args (frame);
1324 vlib_buffer_t *b0, *b1;
1325 ip4_forward_next_trace_t *t0, *t1;
1327 /* Prefetch next iteration. */
1328 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1329 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1334 b0 = vlib_get_buffer (vm, bi0);
1335 b1 = vlib_get_buffer (vm, bi1);
1337 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1339 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1340 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1341 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1343 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1344 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1345 vec_elt (im->fib_index_by_sw_if_index,
1346 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1348 clib_memcpy (t0->packet_data,
1349 vlib_buffer_get_current (b0),
1350 sizeof (t0->packet_data));
1352 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1354 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1355 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1356 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1358 (vnet_buffer (b1)->sw_if_index[VLIB_TX] !=
1359 (u32) ~ 0) ? vnet_buffer (b1)->sw_if_index[VLIB_TX] :
1360 vec_elt (im->fib_index_by_sw_if_index,
1361 vnet_buffer (b1)->sw_if_index[VLIB_RX]);
1362 clib_memcpy (t1->packet_data, vlib_buffer_get_current (b1),
1363 sizeof (t1->packet_data));
1373 ip4_forward_next_trace_t *t0;
1377 b0 = vlib_get_buffer (vm, bi0);
1379 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1381 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1382 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1383 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1385 (vnet_buffer (b0)->sw_if_index[VLIB_TX] !=
1386 (u32) ~ 0) ? vnet_buffer (b0)->sw_if_index[VLIB_TX] :
1387 vec_elt (im->fib_index_by_sw_if_index,
1388 vnet_buffer (b0)->sw_if_index[VLIB_RX]);
1389 clib_memcpy (t0->packet_data, vlib_buffer_get_current (b0),
1390 sizeof (t0->packet_data));
1398 ip4_drop_or_punt (vlib_main_t * vm,
1399 vlib_node_runtime_t * node,
1400 vlib_frame_t * frame, ip4_error_t error_code)
1402 u32 *buffers = vlib_frame_vector_args (frame);
1403 uword n_packets = frame->n_vectors;
1405 vlib_error_drop_buffers (vm, node, buffers,
1409 ip4_input_node.index, error_code);
1411 if (node->flags & VLIB_NODE_FLAG_TRACE)
1412 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1418 ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1420 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP);
1424 ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1426 return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT);
1430 VLIB_REGISTER_NODE (ip4_drop_node, static) =
1432 .function = ip4_drop,.
1434 .vector_size = sizeof (u32),
1435 .format_trace = format_ip4_forward_next_trace,
1442 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop);
1444 VLIB_REGISTER_NODE (ip4_punt_node, static) =
1446 .function = ip4_punt,
1448 .vector_size = sizeof (u32),
1449 .format_trace = format_ip4_forward_next_trace,
1456 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt);
1459 /* Compute TCP/UDP/ICMP4 checksum in software. */
1461 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1465 u32 ip_header_length, payload_length_host_byte_order;
1466 u32 n_this_buffer, n_bytes_left;
1468 void *data_this_buffer;
1470 /* Initialize checksum with ip header. */
1471 ip_header_length = ip4_header_bytes (ip0);
1472 payload_length_host_byte_order =
1473 clib_net_to_host_u16 (ip0->length) - ip_header_length;
1475 clib_host_to_net_u32 (payload_length_host_byte_order +
1476 (ip0->protocol << 16));
1478 if (BITS (uword) == 32)
1481 ip_csum_with_carry (sum0,
1482 clib_mem_unaligned (&ip0->src_address, u32));
1484 ip_csum_with_carry (sum0,
1485 clib_mem_unaligned (&ip0->dst_address, u32));
1489 ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1491 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1492 data_this_buffer = (void *) ip0 + ip_header_length;
1493 if (n_this_buffer + ip_header_length > p0->current_length)
1495 p0->current_length >
1496 ip_header_length ? p0->current_length - ip_header_length : 0;
1499 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1500 n_bytes_left -= n_this_buffer;
1501 if (n_bytes_left == 0)
1504 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1505 p0 = vlib_get_buffer (vm, p0->next_buffer);
1506 data_this_buffer = vlib_buffer_get_current (p0);
1507 n_this_buffer = p0->current_length;
1510 sum16 = ~ip_csum_fold (sum0);
1516 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1518 ip4_header_t *ip0 = vlib_buffer_get_current (p0);
1522 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1523 || ip0->protocol == IP_PROTOCOL_UDP);
1525 udp0 = (void *) (ip0 + 1);
1526 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1528 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1529 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1533 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1535 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1536 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1542 VNET_FEATURE_ARC_INIT (ip4_local) =
1544 .arc_name = "ip4-local",
1545 .start_nodes = VNET_FEATURES ("ip4-local"),
1550 ip4_local_inline (vlib_main_t * vm,
1551 vlib_node_runtime_t * node,
1552 vlib_frame_t * frame, int head_of_feature_arc)
1554 ip4_main_t *im = &ip4_main;
1555 ip_lookup_main_t *lm = &im->lookup_main;
1556 ip_local_next_t next_index;
1557 u32 *from, *to_next, n_left_from, n_left_to_next;
1558 vlib_node_runtime_t *error_node =
1559 vlib_node_get_runtime (vm, ip4_input_node.index);
1560 u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
1562 from = vlib_frame_vector_args (frame);
1563 n_left_from = frame->n_vectors;
1564 next_index = node->cached_next_index;
1566 if (node->flags & VLIB_NODE_FLAG_TRACE)
1567 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1569 while (n_left_from > 0)
1571 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1573 while (n_left_from >= 4 && n_left_to_next >= 2)
1575 vlib_buffer_t *p0, *p1;
1576 ip4_header_t *ip0, *ip1;
1577 udp_header_t *udp0, *udp1;
1578 ip4_fib_mtrie_t *mtrie0, *mtrie1;
1579 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1580 const dpo_id_t *dpo0, *dpo1;
1581 const load_balance_t *lb0, *lb1;
1582 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1583 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1584 i32 len_diff0, len_diff1;
1585 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1586 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1587 u32 sw_if_index0, sw_if_index1;
1589 pi0 = to_next[0] = from[0];
1590 pi1 = to_next[1] = from[1];
1594 n_left_to_next -= 2;
1596 next0 = next1 = IP_LOCAL_NEXT_DROP;
1598 p0 = vlib_get_buffer (vm, pi0);
1599 p1 = vlib_get_buffer (vm, pi1);
1601 ip0 = vlib_buffer_get_current (p0);
1602 ip1 = vlib_buffer_get_current (p1);
1604 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1605 vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1607 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1608 sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX];
1610 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1611 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1613 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1615 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1616 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1618 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1);
1620 (vnet_buffer (p1)->sw_if_index[VLIB_TX] ==
1621 (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX];
1623 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1624 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1626 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1629 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1631 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1633 /* Treat IP frag packets as "experimental" protocol for now
1634 until support of IP frag reassembly is implemented */
1635 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1636 proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol;
1638 if (head_of_feature_arc == 0)
1640 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1644 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1645 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1646 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1647 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1652 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1653 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1655 udp0 = ip4_next_header (ip0);
1656 udp1 = ip4_next_header (ip1);
1658 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1659 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1660 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1663 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1665 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1667 /* Verify UDP length. */
1668 ip_len0 = clib_net_to_host_u16 (ip0->length);
1669 ip_len1 = clib_net_to_host_u16 (ip1->length);
1670 udp_len0 = clib_net_to_host_u16 (udp0->length);
1671 udp_len1 = clib_net_to_host_u16 (udp1->length);
1673 len_diff0 = ip_len0 - udp_len0;
1674 len_diff1 = ip_len1 - udp_len1;
1676 len_diff0 = is_udp0 ? len_diff0 : 0;
1677 len_diff1 = is_udp1 ? len_diff1 : 0;
1679 if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1
1680 & good_tcp_udp0 & good_tcp_udp1)))
1685 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1686 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1688 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1689 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1694 && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1695 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1697 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1698 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1702 good_tcp_udp0 &= len_diff0 >= 0;
1703 good_tcp_udp1 &= len_diff1 >= 0;
1706 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1708 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1710 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1712 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1713 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1715 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1716 error0 = (is_tcp_udp0 && !good_tcp_udp0
1717 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1718 error1 = (is_tcp_udp1 && !good_tcp_udp1
1719 ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1);
1722 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1724 ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1727 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1730 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1732 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 =
1733 ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1734 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1736 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 =
1737 ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1738 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1740 lb0 = load_balance_get (lbi0);
1741 lb1 = load_balance_get (lbi1);
1742 dpo0 = load_balance_get_bucket_i (lb0, 0);
1743 dpo1 = load_balance_get_bucket_i (lb1, 0);
1746 * Must have a route to source otherwise we drop the packet.
1747 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1750 * - the source is a recieve => it's from us => bogus, do this
1751 * first since it sets a different error code.
1752 * - uRPF check for any route to source - accept if passes.
1753 * - allow packets destined to the broadcast address from unknown sources
1755 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1756 dpo0->dpoi_type == DPO_RECEIVE) ?
1757 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1758 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1759 !fib_urpf_check_size (lb0->lb_urpf) &&
1760 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1761 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1762 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1763 dpo1->dpoi_type == DPO_RECEIVE) ?
1764 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1);
1765 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1766 !fib_urpf_check_size (lb1->lb_urpf) &&
1767 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1768 ? IP4_ERROR_SRC_LOOKUP_MISS : error1);
1772 next0 = lm->local_next_by_ip_protocol[proto0];
1773 next1 = lm->local_next_by_ip_protocol[proto1];
1776 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1778 error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1780 p0->error = error0 ? error_node->errors[error0] : 0;
1781 p1->error = error1 ? error_node->errors[error1] : 0;
1783 if (head_of_feature_arc)
1785 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1786 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1787 if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1788 vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1);
1791 vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
1792 n_left_to_next, pi0, pi1,
1796 while (n_left_from > 0 && n_left_to_next > 0)
1801 ip4_fib_mtrie_t *mtrie0;
1802 ip4_fib_mtrie_leaf_t leaf0;
1803 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1805 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1806 load_balance_t *lb0;
1807 const dpo_id_t *dpo0;
1810 pi0 = to_next[0] = from[0];
1814 n_left_to_next -= 1;
1816 next0 = IP_LOCAL_NEXT_DROP;
1818 p0 = vlib_get_buffer (vm, pi0);
1820 ip0 = vlib_buffer_get_current (p0);
1822 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1824 sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
1826 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0);
1829 (vnet_buffer (p0)->sw_if_index[VLIB_TX] ==
1830 (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX];
1832 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1834 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1837 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1839 /* Treat IP frag packets as "experimental" protocol for now
1840 until support of IP frag reassembly is implemented */
1841 proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol;
1843 if (head_of_feature_arc == 0)
1845 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1849 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1850 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1854 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1856 udp0 = ip4_next_header (ip0);
1858 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1859 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1862 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1864 /* Verify UDP length. */
1865 ip_len0 = clib_net_to_host_u16 (ip0->length);
1866 udp_len0 = clib_net_to_host_u16 (udp0->length);
1868 len_diff0 = ip_len0 - udp_len0;
1870 len_diff0 = is_udp0 ? len_diff0 : 0;
1872 if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0)))
1877 && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1878 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1880 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1881 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1885 good_tcp_udp0 &= len_diff0 >= 0;
1888 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1890 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1892 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1894 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1895 error0 = (is_tcp_udp0 && !good_tcp_udp0
1896 ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0);
1899 ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1902 IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1904 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1905 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1907 lb0 = load_balance_get (lbi0);
1908 dpo0 = load_balance_get_bucket_i (lb0, 0);
1910 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1911 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1913 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1914 dpo0->dpoi_type == DPO_RECEIVE) ?
1915 IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0);
1916 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1917 !fib_urpf_check_size (lb0->lb_urpf) &&
1918 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1919 ? IP4_ERROR_SRC_LOOKUP_MISS : error0);
1923 next0 = lm->local_next_by_ip_protocol[proto0];
1926 error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1928 p0->error = error0 ? error_node->errors[error0] : 0;
1930 if (head_of_feature_arc)
1932 if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
1933 vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0);
1936 vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1937 n_left_to_next, pi0, next0);
1941 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1944 return frame->n_vectors;
1948 ip4_local (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
1950 return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
1954 VLIB_REGISTER_NODE (ip4_local_node) =
1956 .function = ip4_local,
1957 .name = "ip4-local",
1958 .vector_size = sizeof (u32),
1959 .format_trace = format_ip4_forward_next_trace,
1960 .n_next_nodes = IP_LOCAL_N_NEXT,
1963 [IP_LOCAL_NEXT_DROP] = "error-drop",
1964 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1965 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1966 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",},
1970 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local);
1973 ip4_local_end_of_arc (vlib_main_t * vm,
1974 vlib_node_runtime_t * node, vlib_frame_t * frame)
1976 return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
1980 VLIB_REGISTER_NODE (ip4_local_end_of_arc_node,static) = {
1981 .function = ip4_local_end_of_arc,
1982 .name = "ip4-local-end-of-arc",
1983 .vector_size = sizeof (u32),
1985 .format_trace = format_ip4_forward_next_trace,
1986 .sibling_of = "ip4-local",
1989 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_end_of_arc_node, ip4_local_end_of_arc)
1991 VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
1992 .arc_name = "ip4-local",
1993 .node_name = "ip4-local-end-of-arc",
1994 .runs_before = 0, /* not before any other features */
1999 ip4_register_protocol (u32 protocol, u32 node_index)
2001 vlib_main_t *vm = vlib_get_main ();
2002 ip4_main_t *im = &ip4_main;
2003 ip_lookup_main_t *lm = &im->lookup_main;
2005 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
2006 lm->local_next_by_ip_protocol[protocol] =
2007 vlib_node_add_next (vm, ip4_local_node.index, node_index);
2010 static clib_error_t *
2011 show_ip_local_command_fn (vlib_main_t * vm,
2012 unformat_input_t * input, vlib_cli_command_t * cmd)
2014 ip4_main_t *im = &ip4_main;
2015 ip_lookup_main_t *lm = &im->lookup_main;
2018 vlib_cli_output (vm, "Protocols handled by ip4_local");
2019 for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++)
2021 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
2022 vlib_cli_output (vm, "%d", i);
2030 * Display the set of protocols handled by the local IPv4 stack.
2033 * Example of how to display local protocol table:
2034 * @cliexstart{show ip local}
2035 * Protocols handled by ip4_local
2042 VLIB_CLI_COMMAND (show_ip_local, static) =
2044 .path = "show ip local",
2045 .function = show_ip_local_command_fn,
2046 .short_help = "show ip local",
2051 ip4_arp_inline (vlib_main_t * vm,
2052 vlib_node_runtime_t * node,
2053 vlib_frame_t * frame, int is_glean)
2055 vnet_main_t *vnm = vnet_get_main ();
2056 ip4_main_t *im = &ip4_main;
2057 ip_lookup_main_t *lm = &im->lookup_main;
2058 u32 *from, *to_next_drop;
2059 uword n_left_from, n_left_to_next_drop, next_index;
2060 static f64 time_last_seed_change = -1e100;
2061 static u32 hash_seeds[3];
2062 static uword hash_bitmap[256 / BITS (uword)];
2065 if (node->flags & VLIB_NODE_FLAG_TRACE)
2066 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2068 time_now = vlib_time_now (vm);
2069 if (time_now - time_last_seed_change > 1e-3)
2072 u32 *r = clib_random_buffer_get_data (&vm->random_buffer,
2073 sizeof (hash_seeds));
2074 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
2075 hash_seeds[i] = r[i];
2077 /* Mark all hash keys as been no-seen before. */
2078 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
2081 time_last_seed_change = time_now;
2084 from = vlib_frame_vector_args (frame);
2085 n_left_from = frame->n_vectors;
2086 next_index = node->cached_next_index;
2087 if (next_index == IP4_ARP_NEXT_DROP)
2088 next_index = IP4_ARP_N_NEXT; /* point to first interface */
2090 while (n_left_from > 0)
2092 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
2093 to_next_drop, n_left_to_next_drop);
2095 while (n_left_from > 0 && n_left_to_next_drop > 0)
2097 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
2098 ip_adjacency_t *adj0;
2105 p0 = vlib_get_buffer (vm, pi0);
2107 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2108 adj0 = ip_get_adjacency (lm, adj_index0);
2109 ip0 = vlib_buffer_get_current (p0);
2115 sw_if_index0 = adj0->rewrite_header.sw_if_index;
2116 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
2121 * this is the Glean case, so we are ARPing for the
2122 * packet's destination
2124 a0 ^= ip0->dst_address.data_u32;
2128 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
2132 hash_v3_finalize32 (a0, b0, c0);
2134 c0 &= BITS (hash_bitmap) - 1;
2135 c0 = c0 / BITS (uword);
2136 m0 = (uword) 1 << (c0 % BITS (uword));
2138 bm0 = hash_bitmap[c0];
2139 drop0 = (bm0 & m0) != 0;
2141 /* Mark it as seen. */
2142 hash_bitmap[c0] = bm0 | m0;
2146 to_next_drop[0] = pi0;
2148 n_left_to_next_drop -= 1;
2151 node->errors[drop0 ? IP4_ARP_ERROR_DROP :
2152 IP4_ARP_ERROR_REQUEST_SENT];
2155 * the adj has been updated to a rewrite but the node the DPO that got
2156 * us here hasn't - yet. no big deal. we'll drop while we wait.
2158 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
2165 * Can happen if the control-plane is programming tables
2166 * with traffic flowing; at least that's today's lame excuse.
2168 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
2169 || (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
2171 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
2174 /* Send ARP request. */
2178 ethernet_arp_header_t *h0;
2179 vnet_hw_interface_t *hw_if0;
2182 vlib_packet_template_get_packet (vm,
2183 &im->ip4_arp_request_packet_template,
2186 /* Add rewrite/encap string for ARP packet. */
2187 vnet_rewrite_one_header (adj0[0], h0,
2188 sizeof (ethernet_header_t));
2190 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
2192 /* Src ethernet address in ARP header. */
2193 clib_memcpy (h0->ip4_over_ethernet[0].ethernet,
2195 sizeof (h0->ip4_over_ethernet[0].ethernet));
2199 /* The interface's source address is stashed in the Glean Adj */
2200 h0->ip4_over_ethernet[0].ip4 =
2201 adj0->sub_type.glean.receive_addr.ip4;
2203 /* Copy in destination address we are requesting. This is the
2204 * glean case, so it's the packet's destination.*/
2205 h0->ip4_over_ethernet[1].ip4.data_u32 =
2206 ip0->dst_address.data_u32;
2210 /* Src IP address in ARP header. */
2211 if (ip4_src_address_for_packet (lm, sw_if_index0,
2213 ip4_over_ethernet[0].ip4))
2215 /* No source address available */
2217 node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2218 vlib_buffer_free (vm, &bi0, 1);
2222 /* Copy in destination address we are requesting from the
2224 h0->ip4_over_ethernet[1].ip4.data_u32 =
2225 adj0->sub_type.nbr.next_hop.ip4.as_u32;
2228 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2229 b0 = vlib_get_buffer (vm, bi0);
2230 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2232 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2234 vlib_set_next_frame_buffer (vm, node,
2235 adj0->rewrite_header.next_index,
2240 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2243 return frame->n_vectors;
2247 ip4_arp (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2249 return (ip4_arp_inline (vm, node, frame, 0));
2253 ip4_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
2255 return (ip4_arp_inline (vm, node, frame, 1));
2258 static char *ip4_arp_error_strings[] = {
2259 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2260 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2261 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2262 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2263 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2264 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2267 VLIB_REGISTER_NODE (ip4_arp_node) =
2269 .function = ip4_arp,.name = "ip4-arp",.vector_size =
2270 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2271 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2272 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2274 [IP4_ARP_NEXT_DROP] = "error-drop",}
2277 VLIB_REGISTER_NODE (ip4_glean_node) =
2279 .function = ip4_glean,.name = "ip4-glean",.vector_size =
2280 sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors =
2281 ARRAY_LEN (ip4_arp_error_strings),.error_strings =
2282 ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes =
2284 [IP4_ARP_NEXT_DROP] = "error-drop",}
2287 #define foreach_notrace_ip4_arp_error \
2294 arp_notrace_init (vlib_main_t * vm)
2296 vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
2298 /* don't trace ARP request packets */
2300 vnet_pcap_drop_trace_filter_add_del \
2301 (rt->errors[IP4_ARP_ERROR_##a], \
2303 foreach_notrace_ip4_arp_error;
2308 VLIB_INIT_FUNCTION (arp_notrace_init);
2311 /* Send an ARP request to see if given destination is reachable on given interface. */
2313 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2315 vnet_main_t *vnm = vnet_get_main ();
2316 ip4_main_t *im = &ip4_main;
2317 ethernet_arp_header_t *h;
2319 ip_interface_address_t *ia;
2320 ip_adjacency_t *adj;
2321 vnet_hw_interface_t *hi;
2322 vnet_sw_interface_t *si;
2326 si = vnet_get_sw_interface (vnm, sw_if_index);
2328 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2330 return clib_error_return (0, "%U: interface %U down",
2331 format_ip4_address, dst,
2332 format_vnet_sw_if_index_name, vnm,
2337 ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2340 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2341 return clib_error_return
2343 "no matching interface address for destination %U (interface %U)",
2344 format_ip4_address, dst, format_vnet_sw_if_index_name, vnm,
2348 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2351 vlib_packet_template_get_packet (vm,
2352 &im->ip4_arp_request_packet_template,
2355 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2357 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address,
2358 sizeof (h->ip4_over_ethernet[0].ethernet));
2360 h->ip4_over_ethernet[0].ip4 = src[0];
2361 h->ip4_over_ethernet[1].ip4 = dst[0];
2363 b = vlib_get_buffer (vm, bi);
2364 vnet_buffer (b)->sw_if_index[VLIB_RX] =
2365 vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2367 /* Add encapsulation string for software interface (e.g. ethernet header). */
2368 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2369 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2372 vlib_frame_t *f = vlib_get_frame_to_node (vm, hi->output_node_index);
2373 u32 *to_next = vlib_frame_vector_args (f);
2376 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2379 return /* no error */ 0;
2384 IP4_REWRITE_NEXT_DROP,
2385 IP4_REWRITE_NEXT_ICMP_ERROR,
2386 } ip4_rewrite_next_t;
2389 ip4_rewrite_inline (vlib_main_t * vm,
2390 vlib_node_runtime_t * node,
2391 vlib_frame_t * frame,
2392 int do_counters, int is_midchain, int is_mcast)
2394 ip_lookup_main_t *lm = &ip4_main.lookup_main;
2395 u32 *from = vlib_frame_vector_args (frame);
2396 u32 n_left_from, n_left_to_next, *to_next, next_index;
2397 vlib_node_runtime_t *error_node =
2398 vlib_node_get_runtime (vm, ip4_input_node.index);
2400 n_left_from = frame->n_vectors;
2401 next_index = node->cached_next_index;
2402 u32 cpu_index = os_get_cpu_number ();
2404 while (n_left_from > 0)
2406 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2408 while (n_left_from >= 4 && n_left_to_next >= 2)
2410 ip_adjacency_t *adj0, *adj1;
2411 vlib_buffer_t *p0, *p1;
2412 ip4_header_t *ip0, *ip1;
2413 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2414 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2415 u32 tx_sw_if_index0, tx_sw_if_index1;
2417 /* Prefetch next iteration. */
2419 vlib_buffer_t *p2, *p3;
2421 p2 = vlib_get_buffer (vm, from[2]);
2422 p3 = vlib_get_buffer (vm, from[3]);
2424 vlib_prefetch_buffer_header (p2, STORE);
2425 vlib_prefetch_buffer_header (p3, STORE);
2427 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2428 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2431 pi0 = to_next[0] = from[0];
2432 pi1 = to_next[1] = from[1];
2437 n_left_to_next -= 2;
2439 p0 = vlib_get_buffer (vm, pi0);
2440 p1 = vlib_get_buffer (vm, pi1);
2442 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2443 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2446 * pre-fetch the per-adjacency counters
2450 vlib_prefetch_combined_counter (&adjacency_counters,
2451 cpu_index, adj_index0);
2452 vlib_prefetch_combined_counter (&adjacency_counters,
2453 cpu_index, adj_index1);
2456 /* We should never rewrite a pkt using the MISS adjacency */
2457 ASSERT (adj_index0 && adj_index1);
2459 ip0 = vlib_buffer_get_current (p0);
2460 ip1 = vlib_buffer_get_current (p1);
2462 error0 = error1 = IP4_ERROR_NONE;
2463 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2465 /* Decrement TTL & update checksum.
2466 Works either endian, so no need for byte swap. */
2467 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2469 i32 ttl0 = ip0->ttl;
2471 /* Input node should have reject packets with ttl 0. */
2472 ASSERT (ip0->ttl > 0);
2474 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2475 checksum0 += checksum0 >= 0xffff;
2477 ip0->checksum = checksum0;
2482 * If the ttl drops below 1 when forwarding, generate
2485 if (PREDICT_FALSE (ttl0 <= 0))
2487 error0 = IP4_ERROR_TIME_EXPIRED;
2488 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2489 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2490 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2492 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2495 /* Verify checksum. */
2496 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2500 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2502 if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2504 i32 ttl1 = ip1->ttl;
2506 /* Input node should have reject packets with ttl 0. */
2507 ASSERT (ip1->ttl > 0);
2509 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2510 checksum1 += checksum1 >= 0xffff;
2512 ip1->checksum = checksum1;
2517 * If the ttl drops below 1 when forwarding, generate
2520 if (PREDICT_FALSE (ttl1 <= 0))
2522 error1 = IP4_ERROR_TIME_EXPIRED;
2523 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2524 icmp4_error_set_vnet_buffer (p1, ICMP4_time_exceeded,
2525 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2527 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2530 /* Verify checksum. */
2531 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2532 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2536 p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2539 /* Rewrite packet header and updates lengths. */
2540 adj0 = ip_get_adjacency (lm, adj_index0);
2541 adj1 = ip_get_adjacency (lm, adj_index1);
2543 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2544 rw_len0 = adj0[0].rewrite_header.data_bytes;
2545 rw_len1 = adj1[0].rewrite_header.data_bytes;
2546 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2547 vnet_buffer (p1)->ip.save_rewrite_length = rw_len1;
2549 /* Check MTU of outgoing interface. */
2551 (vlib_buffer_length_in_chain (vm, p0) >
2553 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2556 (vlib_buffer_length_in_chain (vm, p1) >
2558 rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED :
2561 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2562 * to see the IP headerr */
2563 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2565 next0 = adj0[0].rewrite_header.next_index;
2566 p0->current_data -= rw_len0;
2567 p0->current_length += rw_len0;
2568 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2569 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2572 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2573 vnet_feature_arc_start (lm->output_feature_arc_index,
2574 tx_sw_if_index0, &next0, p0);
2576 if (PREDICT_TRUE (error1 == IP4_ERROR_NONE))
2578 next1 = adj1[0].rewrite_header.next_index;
2579 p1->current_data -= rw_len1;
2580 p1->current_length += rw_len1;
2582 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2583 vnet_buffer (p1)->sw_if_index[VLIB_TX] = tx_sw_if_index1;
2586 (adj1[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2587 vnet_feature_arc_start (lm->output_feature_arc_index,
2588 tx_sw_if_index1, &next1, p1);
2591 /* Guess we are only writing on simple Ethernet header. */
2592 vnet_rewrite_two_headers (adj0[0], adj1[0],
2593 ip0, ip1, sizeof (ethernet_header_t));
2596 * Bump the per-adjacency counters
2600 vlib_increment_combined_counter
2601 (&adjacency_counters,
2604 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2606 vlib_increment_combined_counter
2607 (&adjacency_counters,
2610 vlib_buffer_length_in_chain (vm, p1) + rw_len1);
2615 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2616 adj1->sub_type.midchain.fixup_func (vm, adj1, p1);
2621 * copy bytes from the IP address into the MAC rewrite
2623 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2624 vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1);
2627 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2628 to_next, n_left_to_next,
2629 pi0, pi1, next0, next1);
2632 while (n_left_from > 0 && n_left_to_next > 0)
2634 ip_adjacency_t *adj0;
2637 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2638 u32 tx_sw_if_index0;
2640 pi0 = to_next[0] = from[0];
2642 p0 = vlib_get_buffer (vm, pi0);
2644 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2646 /* We should never rewrite a pkt using the MISS adjacency */
2647 ASSERT (adj_index0);
2649 adj0 = ip_get_adjacency (lm, adj_index0);
2651 ip0 = vlib_buffer_get_current (p0);
2653 error0 = IP4_ERROR_NONE;
2654 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2656 /* Decrement TTL & update checksum. */
2657 if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2659 i32 ttl0 = ip0->ttl;
2661 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2663 checksum0 += checksum0 >= 0xffff;
2665 ip0->checksum = checksum0;
2667 ASSERT (ip0->ttl > 0);
2673 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2675 if (PREDICT_FALSE (ttl0 <= 0))
2678 * If the ttl drops below 1 when forwarding, generate
2681 error0 = IP4_ERROR_TIME_EXPIRED;
2682 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2683 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
2684 icmp4_error_set_vnet_buffer (p0, ICMP4_time_exceeded,
2685 ICMP4_time_exceeded_ttl_exceeded_in_transit,
2691 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2695 vlib_prefetch_combined_counter (&adjacency_counters,
2696 cpu_index, adj_index0);
2698 /* Guess we are only writing on simple Ethernet header. */
2699 vnet_rewrite_one_header (adj0[0], ip0, sizeof (ethernet_header_t));
2703 * copy bytes from the IP address into the MAC rewrite
2705 vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0);
2708 /* Update packet buffer attributes/set output interface. */
2709 rw_len0 = adj0[0].rewrite_header.data_bytes;
2710 vnet_buffer (p0)->ip.save_rewrite_length = rw_len0;
2713 vlib_increment_combined_counter
2714 (&adjacency_counters,
2715 cpu_index, adj_index0, 1,
2716 vlib_buffer_length_in_chain (vm, p0) + rw_len0);
2718 /* Check MTU of outgoing interface. */
2719 error0 = (vlib_buffer_length_in_chain (vm, p0)
2720 > adj0[0].rewrite_header.max_l3_packet_bytes
2721 ? IP4_ERROR_MTU_EXCEEDED : error0);
2723 p0->error = error_node->errors[error0];
2725 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2726 * to see the IP headerr */
2727 if (PREDICT_TRUE (error0 == IP4_ERROR_NONE))
2729 p0->current_data -= rw_len0;
2730 p0->current_length += rw_len0;
2731 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2733 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2734 next0 = adj0[0].rewrite_header.next_index;
2738 adj0->sub_type.midchain.fixup_func (vm, adj0, p0);
2742 (adj0[0].rewrite_header.flags & VNET_REWRITE_HAS_FEATURES))
2743 vnet_feature_arc_start (lm->output_feature_arc_index,
2744 tx_sw_if_index0, &next0, p0);
2751 n_left_to_next -= 1;
2753 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2754 to_next, n_left_to_next,
2758 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2761 /* Need to do trace after rewrites to pick up new packet data. */
2762 if (node->flags & VLIB_NODE_FLAG_TRACE)
2763 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2765 return frame->n_vectors;
2769 /** @brief IPv4 rewrite node.
2772 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2773 header checksum, fetch the ip adjacency, check the outbound mtu,
2774 apply the adjacency rewrite, and send pkts to the adjacency
2775 rewrite header's rewrite_next_index.
2777 @param vm vlib_main_t corresponding to the current thread
2778 @param node vlib_node_runtime_t
2779 @param frame vlib_frame_t whose contents should be dispatched
2781 @par Graph mechanics: buffer metadata, next index usage
2784 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2785 - the rewrite adjacency index
2786 - <code>adj->lookup_next_index</code>
2787 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2788 the packet will be dropped.
2789 - <code>adj->rewrite_header</code>
2790 - Rewrite string length, rewrite string, next_index
2793 - <code>b->current_data, b->current_length</code>
2794 - Updated net of applying the rewrite string
2796 <em>Next Indices:</em>
2797 - <code> adj->rewrite_header.next_index </code>
2801 ip4_rewrite (vlib_main_t * vm,
2802 vlib_node_runtime_t * node, vlib_frame_t * frame)
2804 if (adj_are_counters_enabled ())
2805 return ip4_rewrite_inline (vm, node, frame, 1, 0, 0);
2807 return ip4_rewrite_inline (vm, node, frame, 0, 0, 0);
2811 ip4_midchain (vlib_main_t * vm,
2812 vlib_node_runtime_t * node, vlib_frame_t * frame)
2814 if (adj_are_counters_enabled ())
2815 return ip4_rewrite_inline (vm, node, frame, 1, 1, 0);
2817 return ip4_rewrite_inline (vm, node, frame, 0, 1, 0);
2821 ip4_rewrite_mcast (vlib_main_t * vm,
2822 vlib_node_runtime_t * node, vlib_frame_t * frame)
2824 if (adj_are_counters_enabled ())
2825 return ip4_rewrite_inline (vm, node, frame, 1, 0, 1);
2827 return ip4_rewrite_inline (vm, node, frame, 0, 0, 1);
2831 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2832 .function = ip4_rewrite,
2833 .name = "ip4-rewrite",
2834 .vector_size = sizeof (u32),
2836 .format_trace = format_ip4_rewrite_trace,
2840 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2841 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2844 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2846 VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = {
2847 .function = ip4_rewrite_mcast,
2848 .name = "ip4-rewrite-mcast",
2849 .vector_size = sizeof (u32),
2851 .format_trace = format_ip4_rewrite_trace,
2852 .sibling_of = "ip4-rewrite",
2854 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_mcast_node, ip4_rewrite_mcast)
2856 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2857 .function = ip4_midchain,
2858 .name = "ip4-midchain",
2859 .vector_size = sizeof (u32),
2860 .format_trace = format_ip4_forward_next_trace,
2861 .sibling_of = "ip4-rewrite",
2863 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain);
2866 static clib_error_t *
2867 add_del_interface_table (vlib_main_t * vm,
2868 unformat_input_t * input, vlib_cli_command_t * cmd)
2870 vnet_main_t *vnm = vnet_get_main ();
2871 ip_interface_address_t *ia;
2872 clib_error_t *error = 0;
2873 u32 sw_if_index, table_id;
2877 if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2879 error = clib_error_return (0, "unknown interface `%U'",
2880 format_unformat_error, input);
2884 if (unformat (input, "%d", &table_id))
2888 error = clib_error_return (0, "expected table id `%U'",
2889 format_unformat_error, input);
2894 * If the interface already has in IP address, then a change int
2895 * VRF is not allowed. The IP address applied must first be removed.
2896 * We do not do that automatically here, since VPP has no knowledge
2897 * of whether thoses subnets are valid in the destination VRF.
2900 foreach_ip_interface_address (&ip4_main.lookup_main,
2902 1 /* honor unnumbered */,
2906 a = ip_interface_address_get_address (&ip4_main.lookup_main, ia);
2907 error = clib_error_return (0, "interface %U has address %U",
2908 format_vnet_sw_if_index_name, vnm,
2910 format_ip4_address, a);
2916 ip4_main_t *im = &ip4_main;
2919 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2921 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2922 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2924 fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id);
2925 vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
2926 im->mfib_index_by_sw_if_index[sw_if_index] = fib_index;
2934 * Place the indicated interface into the supplied IPv4 FIB table (also known
2935 * as a VRF). If the FIB table does not exist, this command creates it. To
2936 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2937 * FIB table will only be displayed if a route has been added to the table, or
2938 * an IP Address is assigned to an interface in the table (which adds a route
2941 * @note IP addresses added after setting the interface IP table are added to
2942 * the indicated FIB table. If an IP address is added prior to changing the
2943 * table then this is an error. The control plane must remove these addresses
2944 * first and then change the table. VPP will not automatically move the
2945 * addresses from the old to the new table as it does not know the validity
2949 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2950 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2953 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
2955 .path = "set interface ip table",
2956 .function = add_del_interface_table,
2957 .short_help = "set interface ip table <interface> <table-id>",
2962 ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
2964 ip4_fib_mtrie_t *mtrie0;
2965 ip4_fib_mtrie_leaf_t leaf0;
2968 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2970 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2971 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2972 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2973 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2974 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2976 /* Handle default route. */
2977 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2979 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2981 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
2984 static clib_error_t *
2985 test_lookup_command_fn (vlib_main_t * vm,
2986 unformat_input_t * input, vlib_cli_command_t * cmd)
2993 ip4_address_t ip4_base_address;
2996 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
2998 if (unformat (input, "table %d", &table_id))
3000 /* Make sure the entry exists. */
3001 fib = ip4_fib_get (table_id);
3002 if ((fib) && (fib->index != table_id))
3003 return clib_error_return (0, "<fib-index> %d does not exist",
3006 else if (unformat (input, "count %f", &count))
3009 else if (unformat (input, "%U",
3010 unformat_ip4_address, &ip4_base_address))
3013 return clib_error_return (0, "unknown input `%U'",
3014 format_unformat_error, input);
3019 for (i = 0; i < n; i++)
3021 if (!ip4_lookup_validate (&ip4_base_address, table_id))
3024 ip4_base_address.as_u32 =
3025 clib_host_to_net_u32 (1 +
3026 clib_net_to_host_u32 (ip4_base_address.as_u32));
3030 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3032 vlib_cli_output (vm, "No errors in %d lookups\n", n);
3038 * Perform a lookup of an IPv4 Address (or range of addresses) in the
3039 * given FIB table to determine if there is a conflict with the
3040 * adjacency table. The fib-id can be determined by using the
3041 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3044 * @todo This command uses fib-id, other commands use table-id (not
3045 * just a name, they are different indexes). Would like to change this
3046 * to table-id for consistency.
3049 * Example of how to run the test lookup command:
3050 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3051 * No errors in 2 lookups
3055 VLIB_CLI_COMMAND (lookup_test_command, static) =
3057 .path = "test lookup",
3058 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3059 .function = test_lookup_command_fn,
3064 vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3066 ip4_main_t *im4 = &ip4_main;
3068 uword *p = hash_get (im4->fib_index_by_table_id, table_id);
3071 return VNET_API_ERROR_NO_SUCH_FIB;
3073 fib = ip4_fib_get (p[0]);
3075 fib->flow_hash_config = flow_hash_config;
3079 static clib_error_t *
3080 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3081 unformat_input_t * input,
3082 vlib_cli_command_t * cmd)
3086 u32 flow_hash_config = 0;
3089 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3091 if (unformat (input, "table %d", &table_id))
3094 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3095 foreach_flow_hash_bit
3102 return clib_error_return (0, "unknown input `%U'",
3103 format_unformat_error, input);
3105 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3111 case VNET_API_ERROR_NO_SUCH_FIB:
3112 return clib_error_return (0, "no such FIB table %d", table_id);
3115 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3123 * Configure the set of IPv4 fields used by the flow hash.
3126 * Example of how to set the flow hash on a given table:
3127 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3128 * Example of display the configured flow hash:
3129 * @cliexstart{show ip fib}
3130 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3133 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3134 * [0] [@0]: dpo-drop ip6
3137 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3138 * [0] [@0]: dpo-drop ip6
3141 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3142 * [0] [@0]: dpo-drop ip6
3145 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3146 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3149 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3150 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3151 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3152 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3153 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3156 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3157 * [0] [@0]: dpo-drop ip6
3158 * 255.255.255.255/32
3160 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3161 * [0] [@0]: dpo-drop ip6
3162 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3165 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3166 * [0] [@0]: dpo-drop ip6
3169 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3170 * [0] [@0]: dpo-drop ip6
3173 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3174 * [0] [@4]: ipv4-glean: af_packet0
3177 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3178 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3181 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3182 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3185 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3186 * [0] [@4]: ipv4-glean: af_packet1
3189 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3190 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3193 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3194 * [0] [@0]: dpo-drop ip6
3197 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3198 * [0] [@0]: dpo-drop ip6
3199 * 255.255.255.255/32
3201 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3202 * [0] [@0]: dpo-drop ip6
3206 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
3208 .path = "set ip flow-hash",
3210 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3211 .function = set_ip_flow_hash_command_fn,
3216 vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3219 vnet_main_t *vnm = vnet_get_main ();
3220 vnet_interface_main_t *im = &vnm->interface_main;
3221 ip4_main_t *ipm = &ip4_main;
3222 ip_lookup_main_t *lm = &ipm->lookup_main;
3223 vnet_classify_main_t *cm = &vnet_classify_main;
3224 ip4_address_t *if_addr;
3226 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3227 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3229 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3230 return VNET_API_ERROR_NO_SUCH_ENTRY;
3232 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3233 lm->classify_table_index_by_sw_if_index[sw_if_index] = table_index;
3235 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3237 if (NULL != if_addr)
3239 fib_prefix_t pfx = {
3241 .fp_proto = FIB_PROTOCOL_IP4,
3242 .fp_addr.ip4 = *if_addr,
3246 fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
3250 if (table_index != (u32) ~ 0)
3252 dpo_id_t dpo = DPO_INVALID;
3257 classify_dpo_create (DPO_PROTO_IP4, table_index));
3259 fib_table_entry_special_dpo_add (fib_index,
3261 FIB_SOURCE_CLASSIFY,
3262 FIB_ENTRY_FLAG_NONE, &dpo);
3267 fib_table_entry_special_remove (fib_index,
3268 &pfx, FIB_SOURCE_CLASSIFY);
3275 static clib_error_t *
3276 set_ip_classify_command_fn (vlib_main_t * vm,
3277 unformat_input_t * input,
3278 vlib_cli_command_t * cmd)
3280 u32 table_index = ~0;
3281 int table_index_set = 0;
3282 u32 sw_if_index = ~0;
3285 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
3287 if (unformat (input, "table-index %d", &table_index))
3288 table_index_set = 1;
3289 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3290 vnet_get_main (), &sw_if_index))
3296 if (table_index_set == 0)
3297 return clib_error_return (0, "classify table-index must be specified");
3299 if (sw_if_index == ~0)
3300 return clib_error_return (0, "interface / subif must be specified");
3302 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3309 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3310 return clib_error_return (0, "No such interface");
3312 case VNET_API_ERROR_NO_SUCH_ENTRY:
3313 return clib_error_return (0, "No such classifier table");
3319 * Assign a classification table to an interface. The classification
3320 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3321 * commands. Once the table is create, use this command to filter packets
3325 * Example of how to assign a classification table to an interface:
3326 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3329 VLIB_CLI_COMMAND (set_ip_classify_command, static) =
3331 .path = "set ip classify",
3333 "set ip classify intfc <interface> table-index <classify-idx>",
3334 .function = set_ip_classify_command_fn,
3339 * fd.io coding-style-patch-verification: ON
3342 * eval: (c-set-style "gnu")