2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
56 * @brief IPv4 Forwarding.
58 * This file contains the source code for IPv4 forwarding.
62 ip4_forward_next_trace (vlib_main_t * vm,
63 vlib_node_runtime_t * node,
65 vlib_rx_or_tx_t which_adj_index);
68 ip4_lookup_inline (vlib_main_t * vm,
69 vlib_node_runtime_t * node,
71 int lookup_for_responses_to_locally_received_packets)
73 ip4_main_t * im = &ip4_main;
74 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
75 u32 n_left_from, n_left_to_next, * from, * to_next;
76 ip_lookup_next_t next;
77 u32 cpu_index = os_get_cpu_number();
79 from = vlib_frame_vector_args (frame);
80 n_left_from = frame->n_vectors;
81 next = node->cached_next_index;
83 while (n_left_from > 0)
85 vlib_get_next_frame (vm, node, next,
86 to_next, n_left_to_next);
88 while (n_left_from >= 8 && n_left_to_next >= 4)
90 vlib_buffer_t * p0, * p1, * p2, * p3;
91 ip4_header_t * ip0, * ip1, * ip2, * ip3;
92 __attribute__((unused)) tcp_header_t * tcp0, * tcp1, * tcp2, * tcp3;
93 ip_lookup_next_t next0, next1, next2, next3;
94 const load_balance_t * lb0, * lb1, * lb2, * lb3;
95 ip4_fib_mtrie_t * mtrie0, * mtrie1, * mtrie2, * mtrie3;
96 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97 ip4_address_t * dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98 __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
99 __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
100 __attribute__((unused)) u32 pi2, fib_index2, lb_index2, is_tcp_udp2;
101 __attribute__((unused)) u32 pi3, fib_index3, lb_index3, is_tcp_udp3;
102 flow_hash_config_t flow_hash_config0, flow_hash_config1;
103 flow_hash_config_t flow_hash_config2, flow_hash_config3;
104 u32 hash_c0, hash_c1, hash_c2, hash_c3;
105 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
107 /* Prefetch next iteration. */
109 vlib_buffer_t * p4, * p5, * p6, * p7;
111 p4 = vlib_get_buffer (vm, from[4]);
112 p5 = vlib_get_buffer (vm, from[5]);
113 p6 = vlib_get_buffer (vm, from[6]);
114 p7 = vlib_get_buffer (vm, from[7]);
116 vlib_prefetch_buffer_header (p4, LOAD);
117 vlib_prefetch_buffer_header (p5, LOAD);
118 vlib_prefetch_buffer_header (p6, LOAD);
119 vlib_prefetch_buffer_header (p7, LOAD);
121 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
122 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
123 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
124 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
127 pi0 = to_next[0] = from[0];
128 pi1 = to_next[1] = from[1];
129 pi2 = to_next[2] = from[2];
130 pi3 = to_next[3] = from[3];
137 p0 = vlib_get_buffer (vm, pi0);
138 p1 = vlib_get_buffer (vm, pi1);
139 p2 = vlib_get_buffer (vm, pi2);
140 p3 = vlib_get_buffer (vm, pi3);
142 ip0 = vlib_buffer_get_current (p0);
143 ip1 = vlib_buffer_get_current (p1);
144 ip2 = vlib_buffer_get_current (p2);
145 ip3 = vlib_buffer_get_current (p3);
147 dst_addr0 = &ip0->dst_address;
148 dst_addr1 = &ip1->dst_address;
149 dst_addr2 = &ip2->dst_address;
150 dst_addr3 = &ip3->dst_address;
152 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
153 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
154 fib_index2 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p2)->sw_if_index[VLIB_RX]);
155 fib_index3 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p3)->sw_if_index[VLIB_RX]);
156 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
157 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
158 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
159 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
160 fib_index2 = (vnet_buffer(p2)->sw_if_index[VLIB_TX] == (u32)~0) ?
161 fib_index2 : vnet_buffer(p2)->sw_if_index[VLIB_TX];
162 fib_index3 = (vnet_buffer(p3)->sw_if_index[VLIB_TX] == (u32)~0) ?
163 fib_index3 : vnet_buffer(p3)->sw_if_index[VLIB_TX];
166 if (! lookup_for_responses_to_locally_received_packets)
168 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
169 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
170 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
171 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
173 leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
175 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
176 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
177 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
178 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
181 tcp0 = (void *) (ip0 + 1);
182 tcp1 = (void *) (ip1 + 1);
183 tcp2 = (void *) (ip2 + 1);
184 tcp3 = (void *) (ip3 + 1);
186 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
187 || ip0->protocol == IP_PROTOCOL_UDP);
188 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
189 || ip1->protocol == IP_PROTOCOL_UDP);
190 is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
191 || ip2->protocol == IP_PROTOCOL_UDP);
192 is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
193 || ip1->protocol == IP_PROTOCOL_UDP);
195 if (! lookup_for_responses_to_locally_received_packets)
197 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
198 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
199 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
200 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
203 if (! lookup_for_responses_to_locally_received_packets)
205 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
206 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
207 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
208 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
211 if (! lookup_for_responses_to_locally_received_packets)
213 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
214 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
215 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
216 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
219 if (lookup_for_responses_to_locally_received_packets)
221 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
222 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
223 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
224 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
228 /* Handle default route. */
229 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
230 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
231 leaf2 = (leaf2 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
232 leaf3 = (leaf3 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
233 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
234 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
235 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
236 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
239 lb0 = load_balance_get (lb_index0);
240 lb1 = load_balance_get (lb_index1);
241 lb2 = load_balance_get (lb_index2);
242 lb3 = load_balance_get (lb_index3);
244 /* Use flow hash to compute multipath adjacency. */
245 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
246 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
247 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
248 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
249 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
251 flow_hash_config0 = lb0->lb_hash_config;
252 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
253 ip4_compute_flow_hash (ip0, flow_hash_config0);
255 if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
257 flow_hash_config1 = lb1->lb_hash_config;
258 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
259 ip4_compute_flow_hash (ip1, flow_hash_config1);
261 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
263 flow_hash_config2 = lb2->lb_hash_config;
264 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
265 ip4_compute_flow_hash (ip2, flow_hash_config2);
267 if (PREDICT_FALSE(lb3->lb_n_buckets > 1))
269 flow_hash_config3 = lb3->lb_hash_config;
270 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
271 ip4_compute_flow_hash (ip3, flow_hash_config3);
274 ASSERT (lb0->lb_n_buckets > 0);
275 ASSERT (is_pow2 (lb0->lb_n_buckets));
276 ASSERT (lb1->lb_n_buckets > 0);
277 ASSERT (is_pow2 (lb1->lb_n_buckets));
278 ASSERT (lb2->lb_n_buckets > 0);
279 ASSERT (is_pow2 (lb2->lb_n_buckets));
280 ASSERT (lb3->lb_n_buckets > 0);
281 ASSERT (is_pow2 (lb3->lb_n_buckets));
283 dpo0 = load_balance_get_bucket_i(lb0,
285 (lb0->lb_n_buckets_minus_1)));
286 dpo1 = load_balance_get_bucket_i(lb1,
288 (lb1->lb_n_buckets_minus_1)));
289 dpo2 = load_balance_get_bucket_i(lb2,
291 (lb2->lb_n_buckets_minus_1)));
292 dpo3 = load_balance_get_bucket_i(lb3,
294 (lb3->lb_n_buckets_minus_1)));
296 next0 = dpo0->dpoi_next_node;
297 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
298 next1 = dpo1->dpoi_next_node;
299 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
300 next2 = dpo2->dpoi_next_node;
301 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
302 next3 = dpo3->dpoi_next_node;
303 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
305 vlib_increment_combined_counter
306 (cm, cpu_index, lb_index0, 1,
307 vlib_buffer_length_in_chain (vm, p0)
308 + sizeof(ethernet_header_t));
309 vlib_increment_combined_counter
310 (cm, cpu_index, lb_index1, 1,
311 vlib_buffer_length_in_chain (vm, p1)
312 + sizeof(ethernet_header_t));
313 vlib_increment_combined_counter
314 (cm, cpu_index, lb_index2, 1,
315 vlib_buffer_length_in_chain (vm, p2)
316 + sizeof(ethernet_header_t));
317 vlib_increment_combined_counter
318 (cm, cpu_index, lb_index3, 1,
319 vlib_buffer_length_in_chain (vm, p3)
320 + sizeof(ethernet_header_t));
322 vlib_validate_buffer_enqueue_x4 (vm, node, next,
323 to_next, n_left_to_next,
325 next0, next1, next2, next3);
328 while (n_left_from > 0 && n_left_to_next > 0)
332 __attribute__((unused)) tcp_header_t * tcp0;
333 ip_lookup_next_t next0;
334 const load_balance_t *lb0;
335 ip4_fib_mtrie_t * mtrie0;
336 ip4_fib_mtrie_leaf_t leaf0;
337 ip4_address_t * dst_addr0;
338 __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
339 flow_hash_config_t flow_hash_config0;
340 const dpo_id_t *dpo0;
346 p0 = vlib_get_buffer (vm, pi0);
348 ip0 = vlib_buffer_get_current (p0);
350 dst_addr0 = &ip0->dst_address;
352 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
353 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
354 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
356 if (! lookup_for_responses_to_locally_received_packets)
358 mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
360 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
362 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
365 tcp0 = (void *) (ip0 + 1);
367 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
368 || ip0->protocol == IP_PROTOCOL_UDP);
370 if (! lookup_for_responses_to_locally_received_packets)
371 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
373 if (! lookup_for_responses_to_locally_received_packets)
374 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
376 if (! lookup_for_responses_to_locally_received_packets)
377 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
379 if (lookup_for_responses_to_locally_received_packets)
380 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
383 /* Handle default route. */
384 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
385 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
388 lb0 = load_balance_get (lbi0);
390 /* Use flow hash to compute multipath adjacency. */
391 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
392 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
394 flow_hash_config0 = lb0->lb_hash_config;
396 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
397 ip4_compute_flow_hash (ip0, flow_hash_config0);
400 ASSERT (lb0->lb_n_buckets > 0);
401 ASSERT (is_pow2 (lb0->lb_n_buckets));
403 dpo0 = load_balance_get_bucket_i(lb0,
405 (lb0->lb_n_buckets_minus_1)));
407 next0 = dpo0->dpoi_next_node;
408 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
410 vlib_increment_combined_counter
411 (cm, cpu_index, lbi0, 1,
412 vlib_buffer_length_in_chain (vm, p0));
419 if (PREDICT_FALSE (next0 != next))
422 vlib_put_next_frame (vm, node, next, n_left_to_next);
424 vlib_get_next_frame (vm, node, next,
425 to_next, n_left_to_next);
432 vlib_put_next_frame (vm, node, next, n_left_to_next);
435 if (node->flags & VLIB_NODE_FLAG_TRACE)
436 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
438 return frame->n_vectors;
441 /** @brief IPv4 lookup node.
444 This is the main IPv4 lookup dispatch node.
446 @param vm vlib_main_t corresponding to the current thread
447 @param node vlib_node_runtime_t
448 @param frame vlib_frame_t whose contents should be dispatched
450 @par Graph mechanics: buffer metadata, next index usage
453 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
454 - Indicates the @c sw_if_index value of the interface that the
455 packet was received on.
456 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
457 - When the value is @c ~0 then the node performs a longest prefix
458 match (LPM) for the packet destination address in the FIB attached
459 to the receive interface.
460 - Otherwise perform LPM for the packet destination address in the
461 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
462 value (0, 1, ...) and not a VRF id.
465 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
466 - The lookup result adjacency index.
469 - Dispatches the packet to the node index found in
470 ip_adjacency_t @c adj->lookup_next_index
471 (where @c adj is the lookup result adjacency).
474 ip4_lookup (vlib_main_t * vm,
475 vlib_node_runtime_t * node,
476 vlib_frame_t * frame)
478 return ip4_lookup_inline (vm, node, frame,
479 /* lookup_for_responses_to_locally_received_packets */ 0);
483 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
485 VLIB_REGISTER_NODE (ip4_lookup_node) = {
486 .function = ip4_lookup,
487 .name = "ip4-lookup",
488 .vector_size = sizeof (u32),
490 .format_trace = format_ip4_lookup_trace,
491 .n_next_nodes = IP_LOOKUP_N_NEXT,
492 .next_nodes = IP4_LOOKUP_NEXT_NODES,
495 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
498 ip4_load_balance (vlib_main_t * vm,
499 vlib_node_runtime_t * node,
500 vlib_frame_t * frame)
502 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
503 u32 n_left_from, n_left_to_next, * from, * to_next;
504 ip_lookup_next_t next;
505 u32 cpu_index = os_get_cpu_number();
507 from = vlib_frame_vector_args (frame);
508 n_left_from = frame->n_vectors;
509 next = node->cached_next_index;
511 if (node->flags & VLIB_NODE_FLAG_TRACE)
512 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
514 while (n_left_from > 0)
516 vlib_get_next_frame (vm, node, next,
517 to_next, n_left_to_next);
520 while (n_left_from >= 4 && n_left_to_next >= 2)
522 ip_lookup_next_t next0, next1;
523 const load_balance_t *lb0, *lb1;
524 vlib_buffer_t * p0, *p1;
525 u32 pi0, lbi0, hc0, pi1, lbi1, hc1;
526 const ip4_header_t *ip0, *ip1;
527 const dpo_id_t *dpo0, *dpo1;
529 /* Prefetch next iteration. */
531 vlib_buffer_t * p2, * p3;
533 p2 = vlib_get_buffer (vm, from[2]);
534 p3 = vlib_get_buffer (vm, from[3]);
536 vlib_prefetch_buffer_header (p2, STORE);
537 vlib_prefetch_buffer_header (p3, STORE);
539 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
540 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
543 pi0 = to_next[0] = from[0];
544 pi1 = to_next[1] = from[1];
551 p0 = vlib_get_buffer (vm, pi0);
552 p1 = vlib_get_buffer (vm, pi1);
554 ip0 = vlib_buffer_get_current (p0);
555 ip1 = vlib_buffer_get_current (p1);
556 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
557 lbi1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
559 lb0 = load_balance_get(lbi0);
560 lb1 = load_balance_get(lbi1);
563 * this node is for via FIBs we can re-use the hash value from the
564 * to node if present.
565 * We don't want to use the same hash value at each level in the recursion
566 * graph as that would lead to polarisation
568 hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
569 hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
571 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
573 if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
575 hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
579 hc0 = vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
582 if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
584 if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash))
586 hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1;
590 hc1 = vnet_buffer(p1)->ip.flow_hash = ip4_compute_flow_hash(ip1, hc1);
594 dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1));
595 dpo1 = load_balance_get_bucket_i(lb1, hc1 & (lb1->lb_n_buckets_minus_1));
597 next0 = dpo0->dpoi_next_node;
598 next1 = dpo1->dpoi_next_node;
600 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
601 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
603 vlib_increment_combined_counter
604 (cm, cpu_index, lbi0, 1,
605 vlib_buffer_length_in_chain (vm, p0));
606 vlib_increment_combined_counter
607 (cm, cpu_index, lbi1, 1,
608 vlib_buffer_length_in_chain (vm, p1));
610 vlib_validate_buffer_enqueue_x2 (vm, node, next,
611 to_next, n_left_to_next,
612 pi0, pi1, next0, next1);
615 while (n_left_from > 0 && n_left_to_next > 0)
617 ip_lookup_next_t next0;
618 const load_balance_t *lb0;
621 const ip4_header_t *ip0;
622 const dpo_id_t *dpo0;
631 p0 = vlib_get_buffer (vm, pi0);
633 ip0 = vlib_buffer_get_current (p0);
634 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
636 lb0 = load_balance_get(lbi0);
638 hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
639 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
641 if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
643 hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
647 hc0 = vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
651 dpo0 = load_balance_get_bucket_i(lb0, hc0 & (lb0->lb_n_buckets_minus_1));
653 next0 = dpo0->dpoi_next_node;
654 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
656 vlib_increment_combined_counter
657 (cm, cpu_index, lbi0, 1,
658 vlib_buffer_length_in_chain (vm, p0));
660 vlib_validate_buffer_enqueue_x1 (vm, node, next,
661 to_next, n_left_to_next,
665 vlib_put_next_frame (vm, node, next, n_left_to_next);
668 return frame->n_vectors;
671 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
672 .function = ip4_load_balance,
673 .name = "ip4-load-balance",
674 .vector_size = sizeof (u32),
675 .sibling_of = "ip4-lookup",
677 .format_trace = format_ip4_lookup_trace,
680 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
682 /* get first interface address */
684 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
685 ip_interface_address_t ** result_ia)
687 ip_lookup_main_t * lm = &im->lookup_main;
688 ip_interface_address_t * ia = 0;
689 ip4_address_t * result = 0;
691 foreach_ip_interface_address (lm, ia, sw_if_index,
692 1 /* honor unnumbered */,
694 ip4_address_t * a = ip_interface_address_get_address (lm, ia);
699 *result_ia = result ? ia : 0;
704 ip4_add_interface_routes (u32 sw_if_index,
705 ip4_main_t * im, u32 fib_index,
706 ip_interface_address_t * a)
708 ip_lookup_main_t * lm = &im->lookup_main;
709 ip4_address_t * address = ip_interface_address_get_address (lm, a);
711 .fp_len = a->address_length,
712 .fp_proto = FIB_PROTOCOL_IP4,
713 .fp_addr.ip4 = *address,
716 a->neighbor_probe_adj_index = ~0;
720 fib_node_index_t fei;
722 fei = fib_table_entry_update_one_path(fib_index,
724 FIB_SOURCE_INTERFACE,
725 (FIB_ENTRY_FLAG_CONNECTED |
726 FIB_ENTRY_FLAG_ATTACHED),
728 NULL, /* No next-hop address */
730 ~0, // invalid FIB index
732 NULL, // no out-label stack
733 FIB_ROUTE_PATH_FLAG_NONE);
734 a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
739 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
741 u32 classify_table_index =
742 lm->classify_table_index_by_sw_if_index [sw_if_index];
743 if (classify_table_index != (u32) ~0)
745 dpo_id_t dpo = DPO_INVALID;
750 classify_dpo_create(DPO_PROTO_IP4,
751 classify_table_index));
753 fib_table_entry_special_dpo_add(fib_index,
762 fib_table_entry_update_one_path(fib_index,
764 FIB_SOURCE_INTERFACE,
765 (FIB_ENTRY_FLAG_CONNECTED |
766 FIB_ENTRY_FLAG_LOCAL),
770 ~0, // invalid FIB index
772 NULL, // no out-label stack
773 FIB_ROUTE_PATH_FLAG_NONE);
777 ip4_del_interface_routes (ip4_main_t * im,
779 ip4_address_t * address,
783 .fp_len = address_length,
784 .fp_proto = FIB_PROTOCOL_IP4,
785 .fp_addr.ip4 = *address,
790 fib_table_entry_delete(fib_index,
792 FIB_SOURCE_INTERFACE);
796 fib_table_entry_delete(fib_index,
798 FIB_SOURCE_INTERFACE);
802 ip4_sw_interface_enable_disable (u32 sw_if_index,
805 ip4_main_t * im = &ip4_main;
807 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
810 * enable/disable only on the 1<->0 transition
814 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
819 ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
820 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
823 vnet_feature_enable_disable ("ip4-unicast", "ip4-lookup", sw_if_index,
826 vnet_feature_enable_disable ("ip4-multicast", "ip4-lookup-multicast", sw_if_index,
831 static clib_error_t *
832 ip4_add_del_interface_address_internal (vlib_main_t * vm,
834 ip4_address_t * address,
838 vnet_main_t * vnm = vnet_get_main();
839 ip4_main_t * im = &ip4_main;
840 ip_lookup_main_t * lm = &im->lookup_main;
841 clib_error_t * error = 0;
842 u32 if_address_index, elts_before;
843 ip4_address_fib_t ip4_af, * addr_fib = 0;
845 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
846 ip4_addr_fib_init (&ip4_af, address,
847 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
848 vec_add1 (addr_fib, ip4_af);
851 * there is no support for adj-fib handling in the presence of overlapping
852 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
857 /* When adding an address check that it does not conflict
858 with an existing address. */
859 ip_interface_address_t * ia;
860 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
861 0 /* honor unnumbered */,
863 ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
865 if (ip4_destination_matches_route (im, address, x, ia->address_length)
866 || ip4_destination_matches_route (im, x, address, address_length))
867 return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
868 format_ip4_address_and_length, address, address_length,
869 format_ip4_address_and_length, x, ia->address_length,
870 format_vnet_sw_if_index_name, vnm, sw_if_index);
874 elts_before = pool_elts (lm->if_address_pool);
876 error = ip_interface_address_add_del
886 ip4_sw_interface_enable_disable(sw_if_index, !is_del);
889 ip4_del_interface_routes (im, ip4_af.fib_index, address,
892 ip4_add_interface_routes (sw_if_index,
893 im, ip4_af.fib_index,
895 (lm->if_address_pool, if_address_index));
897 /* If pool did not grow/shrink: add duplicate address. */
898 if (elts_before != pool_elts (lm->if_address_pool))
900 ip4_add_del_interface_address_callback_t * cb;
901 vec_foreach (cb, im->add_del_interface_address_callbacks)
902 cb->function (im, cb->function_opaque, sw_if_index,
903 address, address_length,
914 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
915 ip4_address_t * address, u32 address_length,
918 return ip4_add_del_interface_address_internal
919 (vm, sw_if_index, address, address_length,
923 /* Built-in ip4 unicast rx feature path definition */
924 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
926 .arc_name = "ip4-unicast",
927 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
928 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
931 VNET_FEATURE_INIT (ip4_flow_classify, static) = {
932 .arc_name = "ip4-unicast",
933 .node_name = "ip4-flow-classify",
934 .runs_before = VNET_FEATURES ("ip4-inacl"),
937 VNET_FEATURE_INIT (ip4_inacl, static) = {
938 .arc_name = "ip4-unicast",
939 .node_name = "ip4-inacl",
940 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
943 VNET_FEATURE_INIT (ip4_source_check_1, static) = {
944 .arc_name = "ip4-unicast",
945 .node_name = "ip4-source-check-via-rx",
946 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
949 VNET_FEATURE_INIT (ip4_source_check_2, static) = {
950 .arc_name = "ip4-unicast",
951 .node_name = "ip4-source-check-via-any",
952 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
955 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
956 .arc_name = "ip4-unicast",
957 .node_name = "ip4-source-and-port-range-check-rx",
958 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
961 VNET_FEATURE_INIT (ip4_policer_classify, static) = {
962 .arc_name = "ip4-unicast",
963 .node_name = "ip4-policer-classify",
964 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
967 VNET_FEATURE_INIT (ip4_ipsec, static) = {
968 .arc_name = "ip4-unicast",
969 .node_name = "ipsec-input-ip4",
970 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
973 VNET_FEATURE_INIT (ip4_vpath, static) = {
974 .arc_name = "ip4-unicast",
975 .node_name = "vpath-input-ip4",
976 .runs_before = VNET_FEATURES ("ip4-vxlan-bypass"),
979 VNET_FEATURE_INIT (ip4_vxlan_bypass, static) = {
980 .arc_name = "ip4-unicast",
981 .node_name = "ip4-vxlan-bypass",
982 .runs_before = VNET_FEATURES ("ip4-lookup"),
985 VNET_FEATURE_INIT (ip4_lookup, static) = {
986 .arc_name = "ip4-unicast",
987 .node_name = "ip4-lookup",
988 .runs_before = VNET_FEATURES ("ip4-drop"),
991 VNET_FEATURE_INIT (ip4_drop, static) = {
992 .arc_name = "ip4-unicast",
993 .node_name = "ip4-drop",
994 .runs_before = 0, /* not before any other features */
998 /* Built-in ip4 multicast rx feature path definition */
999 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
1001 .arc_name = "ip4-multicast",
1002 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
1003 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
1006 VNET_FEATURE_INIT (ip4_vpath_mc, static) = {
1007 .arc_name = "ip4-multicast",
1008 .node_name = "vpath-input-ip4",
1009 .runs_before = VNET_FEATURES ("ip4-lookup-multicast"),
1012 VNET_FEATURE_INIT (ip4_lookup_mc, static) = {
1013 .arc_name = "ip4-multicast",
1014 .node_name = "ip4-lookup-multicast",
1015 .runs_before = VNET_FEATURES ("ip4-drop"),
1018 VNET_FEATURE_INIT (ip4_mc_drop, static) = {
1019 .arc_name = "ip4-multicast",
1020 .node_name = "ip4-drop",
1021 .runs_before = 0, /* last feature */
1024 /* Source and port-range check ip4 tx feature path definition */
1025 VNET_FEATURE_ARC_INIT (ip4_output, static) =
1027 .arc_name = "ip4-output",
1028 .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"),
1029 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
1032 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
1033 .arc_name = "ip4-output",
1034 .node_name = "ip4-source-and-port-range-check-tx",
1035 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
1038 VNET_FEATURE_INIT (ip4_ipsec_output, static) = {
1039 .arc_name = "ip4-output",
1040 .node_name = "ipsec-output-ip4",
1041 .runs_before = VNET_FEATURES ("interface-output"),
1044 /* Built-in ip4 tx feature path definition */
1045 VNET_FEATURE_INIT (ip4_interface_output, static) = {
1046 .arc_name = "ip4-output",
1047 .node_name = "interface-output",
1048 .runs_before = 0, /* not before any other features */
1052 static clib_error_t *
1053 ip4_sw_interface_add_del (vnet_main_t * vnm,
1057 ip4_main_t * im = &ip4_main;
1059 /* Fill in lookup tables with default table (0). */
1060 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
1062 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
1065 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
1068 vnet_feature_enable_disable ("ip4-output", "interface-output", sw_if_index,
1071 return /* no error */ 0;
1074 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1076 /* Global IP4 main. */
1077 ip4_main_t ip4_main;
1080 ip4_lookup_init (vlib_main_t * vm)
1082 ip4_main_t * im = &ip4_main;
1083 clib_error_t * error;
1086 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
1089 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1094 m = pow2_mask (i) << (32 - i);
1097 im->fib_masks[i] = clib_host_to_net_u32 (m);
1100 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1102 /* Create FIB with index 0 and table id of 0. */
1103 fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1107 pn = pg_get_node (ip4_lookup_node.index);
1108 pn->unformat_edit = unformat_pg_ip4_header;
1112 ethernet_arp_header_t h;
1114 memset (&h, 0, sizeof (h));
1116 /* Set target ethernet address to all zeros. */
1117 memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1119 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1120 #define _8(f,v) h.f = v;
1121 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1122 _16 (l3_type, ETHERNET_TYPE_IP4);
1123 _8 (n_l2_address_bytes, 6);
1124 _8 (n_l3_address_bytes, 4);
1125 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1129 vlib_packet_template_init (vm,
1130 &im->ip4_arp_request_packet_template,
1133 /* alloc chunk size */ 8,
1140 VLIB_INIT_FUNCTION (ip4_lookup_init);
1143 /* Adjacency taken. */
1148 /* Packet data, possibly *after* rewrite. */
1149 u8 packet_data[64 - 1*sizeof(u32)];
1150 } ip4_forward_next_trace_t;
1152 u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1154 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1155 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1156 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1157 uword indent = format_get_indent (s);
1158 s = format (s, "%U%U",
1159 format_white_space, indent,
1160 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1164 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1166 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1167 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1168 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1169 uword indent = format_get_indent (s);
1171 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1172 t->fib_index, t->dpo_index, t->flow_hash);
1173 s = format (s, "\n%U%U",
1174 format_white_space, indent,
1175 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1179 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1181 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1182 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1183 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1184 vnet_main_t * vnm = vnet_get_main();
1185 uword indent = format_get_indent (s);
1187 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1188 t->fib_index, t->dpo_index, format_ip_adjacency,
1189 t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
1191 s = format (s, "\n%U%U",
1192 format_white_space, indent,
1193 format_ip_adjacency_packet_data,
1195 t->packet_data, sizeof (t->packet_data));
1199 /* Common trace function for all ip4-forward next nodes. */
1201 ip4_forward_next_trace (vlib_main_t * vm,
1202 vlib_node_runtime_t * node,
1203 vlib_frame_t * frame,
1204 vlib_rx_or_tx_t which_adj_index)
1207 ip4_main_t * im = &ip4_main;
1209 n_left = frame->n_vectors;
1210 from = vlib_frame_vector_args (frame);
1215 vlib_buffer_t * b0, * b1;
1216 ip4_forward_next_trace_t * t0, * t1;
1218 /* Prefetch next iteration. */
1219 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1220 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1225 b0 = vlib_get_buffer (vm, bi0);
1226 b1 = vlib_get_buffer (vm, bi1);
1228 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1230 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1231 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1232 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1233 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1234 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1235 vec_elt (im->fib_index_by_sw_if_index,
1236 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1238 clib_memcpy (t0->packet_data,
1239 vlib_buffer_get_current (b0),
1240 sizeof (t0->packet_data));
1242 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1244 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1245 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1246 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1247 t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1248 vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1249 vec_elt (im->fib_index_by_sw_if_index,
1250 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1251 clib_memcpy (t1->packet_data,
1252 vlib_buffer_get_current (b1),
1253 sizeof (t1->packet_data));
1263 ip4_forward_next_trace_t * t0;
1267 b0 = vlib_get_buffer (vm, bi0);
1269 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1271 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1272 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1273 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1274 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1275 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1276 vec_elt (im->fib_index_by_sw_if_index,
1277 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1278 clib_memcpy (t0->packet_data,
1279 vlib_buffer_get_current (b0),
1280 sizeof (t0->packet_data));
1288 ip4_drop_or_punt (vlib_main_t * vm,
1289 vlib_node_runtime_t * node,
1290 vlib_frame_t * frame,
1291 ip4_error_t error_code)
1293 u32 * buffers = vlib_frame_vector_args (frame);
1294 uword n_packets = frame->n_vectors;
1296 vlib_error_drop_buffers (vm, node,
1301 ip4_input_node.index,
1304 if (node->flags & VLIB_NODE_FLAG_TRACE)
1305 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1311 ip4_drop (vlib_main_t * vm,
1312 vlib_node_runtime_t * node,
1313 vlib_frame_t * frame)
1314 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1317 ip4_punt (vlib_main_t * vm,
1318 vlib_node_runtime_t * node,
1319 vlib_frame_t * frame)
1320 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1322 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1323 .function = ip4_drop,
1325 .vector_size = sizeof (u32),
1327 .format_trace = format_ip4_forward_next_trace,
1335 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1337 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1338 .function = ip4_punt,
1340 .vector_size = sizeof (u32),
1342 .format_trace = format_ip4_forward_next_trace,
1350 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1352 /* Compute TCP/UDP/ICMP4 checksum in software. */
1354 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1358 u32 ip_header_length, payload_length_host_byte_order;
1359 u32 n_this_buffer, n_bytes_left;
1361 void * data_this_buffer;
1363 /* Initialize checksum with ip header. */
1364 ip_header_length = ip4_header_bytes (ip0);
1365 payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1366 sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1368 if (BITS (uword) == 32)
1370 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1371 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1374 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1376 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1377 data_this_buffer = (void *) ip0 + ip_header_length;
1378 if (n_this_buffer + ip_header_length > p0->current_length)
1379 n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1382 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1383 n_bytes_left -= n_this_buffer;
1384 if (n_bytes_left == 0)
1387 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1388 p0 = vlib_get_buffer (vm, p0->next_buffer);
1389 data_this_buffer = vlib_buffer_get_current (p0);
1390 n_this_buffer = p0->current_length;
1393 sum16 = ~ ip_csum_fold (sum0);
1399 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1401 ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1402 udp_header_t * udp0;
1405 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1406 || ip0->protocol == IP_PROTOCOL_UDP);
1408 udp0 = (void *) (ip0 + 1);
1409 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1411 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1412 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1416 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1418 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1419 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1425 ip4_local (vlib_main_t * vm,
1426 vlib_node_runtime_t * node,
1427 vlib_frame_t * frame)
1429 ip4_main_t * im = &ip4_main;
1430 ip_lookup_main_t * lm = &im->lookup_main;
1431 ip_local_next_t next_index;
1432 u32 * from, * to_next, n_left_from, n_left_to_next;
1433 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1435 from = vlib_frame_vector_args (frame);
1436 n_left_from = frame->n_vectors;
1437 next_index = node->cached_next_index;
1439 if (node->flags & VLIB_NODE_FLAG_TRACE)
1440 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1442 while (n_left_from > 0)
1444 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1446 while (n_left_from >= 4 && n_left_to_next >= 2)
1448 vlib_buffer_t * p0, * p1;
1449 ip4_header_t * ip0, * ip1;
1450 udp_header_t * udp0, * udp1;
1451 ip4_fib_mtrie_t * mtrie0, * mtrie1;
1452 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1453 const dpo_id_t *dpo0, *dpo1;
1454 const load_balance_t *lb0, *lb1;
1455 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1456 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1457 i32 len_diff0, len_diff1;
1458 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1459 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1462 pi0 = to_next[0] = from[0];
1463 pi1 = to_next[1] = from[1];
1467 n_left_to_next -= 2;
1469 p0 = vlib_get_buffer (vm, pi0);
1470 p1 = vlib_get_buffer (vm, pi1);
1472 ip0 = vlib_buffer_get_current (p0);
1473 ip1 = vlib_buffer_get_current (p1);
1475 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1476 vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data;
1478 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1479 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
1480 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1481 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1483 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1484 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1486 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1488 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1489 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1491 /* Treat IP frag packets as "experimental" protocol for now
1492 until support of IP frag reassembly is implemented */
1493 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1494 proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1495 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1496 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1497 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1498 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1503 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1504 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1506 udp0 = ip4_next_header (ip0);
1507 udp1 = ip4_next_header (ip1);
1509 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1510 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1511 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1513 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1514 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1516 /* Verify UDP length. */
1517 ip_len0 = clib_net_to_host_u16 (ip0->length);
1518 ip_len1 = clib_net_to_host_u16 (ip1->length);
1519 udp_len0 = clib_net_to_host_u16 (udp0->length);
1520 udp_len1 = clib_net_to_host_u16 (udp1->length);
1522 len_diff0 = ip_len0 - udp_len0;
1523 len_diff1 = ip_len1 - udp_len1;
1525 len_diff0 = is_udp0 ? len_diff0 : 0;
1526 len_diff1 = is_udp1 ? len_diff1 : 0;
1528 if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1529 & good_tcp_udp0 & good_tcp_udp1)))
1534 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1535 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1537 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1538 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1543 && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1544 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1546 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1547 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1551 good_tcp_udp0 &= len_diff0 >= 0;
1552 good_tcp_udp1 &= len_diff1 >= 0;
1554 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1555 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1557 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1559 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1560 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1562 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1563 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1564 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1566 error1 = (is_tcp_udp1 && ! good_tcp_udp1
1567 ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1570 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1571 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1572 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1573 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1575 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1576 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1578 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1579 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1581 lb0 = load_balance_get(lbi0);
1582 lb1 = load_balance_get(lbi1);
1583 dpo0 = load_balance_get_bucket_i(lb0, 0);
1584 dpo1 = load_balance_get_bucket_i(lb1, 0);
1587 * Must have a route to source otherwise we drop the packet.
1588 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1591 * - the source is a recieve => it's from us => bogus, do this
1592 * first since it sets a different error code.
1593 * - uRPF check for any route to source - accept if passes.
1594 * - allow packets destined to the broadcast address from unknown sources
1596 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1597 dpo0->dpoi_type == DPO_RECEIVE) ?
1598 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1600 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1601 !fib_urpf_check_size(lb0->lb_urpf) &&
1602 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1603 ? IP4_ERROR_SRC_LOOKUP_MISS
1605 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1606 dpo1->dpoi_type == DPO_RECEIVE) ?
1607 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1609 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1610 !fib_urpf_check_size(lb1->lb_urpf) &&
1611 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1612 ? IP4_ERROR_SRC_LOOKUP_MISS
1615 next0 = lm->local_next_by_ip_protocol[proto0];
1616 next1 = lm->local_next_by_ip_protocol[proto1];
1618 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1619 next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1621 p0->error = error0 ? error_node->errors[error0] : 0;
1622 p1->error = error1 ? error_node->errors[error1] : 0;
1624 enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1626 if (PREDICT_FALSE (enqueue_code != 0))
1628 switch (enqueue_code)
1634 n_left_to_next += 1;
1635 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1641 n_left_to_next += 1;
1642 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1646 /* A B B or A B C */
1648 n_left_to_next += 2;
1649 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1650 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1653 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1655 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1662 while (n_left_from > 0 && n_left_to_next > 0)
1666 udp_header_t * udp0;
1667 ip4_fib_mtrie_t * mtrie0;
1668 ip4_fib_mtrie_leaf_t leaf0;
1669 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1671 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1672 load_balance_t *lb0;
1673 const dpo_id_t *dpo0;
1675 pi0 = to_next[0] = from[0];
1679 n_left_to_next -= 1;
1681 p0 = vlib_get_buffer (vm, pi0);
1683 ip0 = vlib_buffer_get_current (p0);
1685 vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data;
1687 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1688 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1690 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1692 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1694 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1696 /* Treat IP frag packets as "experimental" protocol for now
1697 until support of IP frag reassembly is implemented */
1698 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1699 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1700 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1704 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1706 udp0 = ip4_next_header (ip0);
1708 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1709 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1711 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1713 /* Verify UDP length. */
1714 ip_len0 = clib_net_to_host_u16 (ip0->length);
1715 udp_len0 = clib_net_to_host_u16 (udp0->length);
1717 len_diff0 = ip_len0 - udp_len0;
1719 len_diff0 = is_udp0 ? len_diff0 : 0;
1721 if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1726 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1727 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1729 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1730 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1734 good_tcp_udp0 &= len_diff0 >= 0;
1736 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1738 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1740 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1742 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1743 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1744 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1747 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1748 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1750 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1751 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1753 lb0 = load_balance_get(lbi0);
1754 dpo0 = load_balance_get_bucket_i(lb0, 0);
1756 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1757 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0;
1759 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1760 dpo0->dpoi_type == DPO_RECEIVE) ?
1761 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1763 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1764 !fib_urpf_check_size(lb0->lb_urpf) &&
1765 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1766 ? IP4_ERROR_SRC_LOOKUP_MISS
1769 next0 = lm->local_next_by_ip_protocol[proto0];
1771 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1773 p0->error = error0? error_node->errors[error0] : 0;
1775 if (PREDICT_FALSE (next0 != next_index))
1777 n_left_to_next += 1;
1778 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1781 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1784 n_left_to_next -= 1;
1788 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1791 return frame->n_vectors;
1794 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1795 .function = ip4_local,
1796 .name = "ip4-local",
1797 .vector_size = sizeof (u32),
1799 .format_trace = format_ip4_forward_next_trace,
1801 .n_next_nodes = IP_LOCAL_N_NEXT,
1803 [IP_LOCAL_NEXT_DROP] = "error-drop",
1804 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1805 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1806 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1810 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1812 void ip4_register_protocol (u32 protocol, u32 node_index)
1814 vlib_main_t * vm = vlib_get_main();
1815 ip4_main_t * im = &ip4_main;
1816 ip_lookup_main_t * lm = &im->lookup_main;
1818 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1819 lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1822 static clib_error_t *
1823 show_ip_local_command_fn (vlib_main_t * vm,
1824 unformat_input_t * input,
1825 vlib_cli_command_t * cmd)
1827 ip4_main_t * im = &ip4_main;
1828 ip_lookup_main_t * lm = &im->lookup_main;
1831 vlib_cli_output (vm, "Protocols handled by ip4_local");
1832 for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1834 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1835 vlib_cli_output (vm, "%d", i);
1843 * Display the set of protocols handled by the local IPv4 stack.
1846 * Example of how to display local protocol table:
1847 * @cliexstart{show ip local}
1848 * Protocols handled by ip4_local
1855 VLIB_CLI_COMMAND (show_ip_local, static) = {
1856 .path = "show ip local",
1857 .function = show_ip_local_command_fn,
1858 .short_help = "show ip local",
1863 ip4_arp_inline (vlib_main_t * vm,
1864 vlib_node_runtime_t * node,
1865 vlib_frame_t * frame,
1868 vnet_main_t * vnm = vnet_get_main();
1869 ip4_main_t * im = &ip4_main;
1870 ip_lookup_main_t * lm = &im->lookup_main;
1871 u32 * from, * to_next_drop;
1872 uword n_left_from, n_left_to_next_drop, next_index;
1873 static f64 time_last_seed_change = -1e100;
1874 static u32 hash_seeds[3];
1875 static uword hash_bitmap[256 / BITS (uword)];
1878 if (node->flags & VLIB_NODE_FLAG_TRACE)
1879 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1881 time_now = vlib_time_now (vm);
1882 if (time_now - time_last_seed_change > 1e-3)
1885 u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1886 sizeof (hash_seeds));
1887 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1888 hash_seeds[i] = r[i];
1890 /* Mark all hash keys as been no-seen before. */
1891 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1894 time_last_seed_change = time_now;
1897 from = vlib_frame_vector_args (frame);
1898 n_left_from = frame->n_vectors;
1899 next_index = node->cached_next_index;
1900 if (next_index == IP4_ARP_NEXT_DROP)
1901 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1903 while (n_left_from > 0)
1905 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1906 to_next_drop, n_left_to_next_drop);
1908 while (n_left_from > 0 && n_left_to_next_drop > 0)
1910 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1911 ip_adjacency_t * adj0;
1918 p0 = vlib_get_buffer (vm, pi0);
1920 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1921 adj0 = ip_get_adjacency (lm, adj_index0);
1922 ip0 = vlib_buffer_get_current (p0);
1928 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1929 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1934 * this is the Glean case, so we are ARPing for the
1935 * packet's destination
1937 a0 ^= ip0->dst_address.data_u32;
1941 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1945 hash_v3_finalize32 (a0, b0, c0);
1947 c0 &= BITS (hash_bitmap) - 1;
1948 c0 = c0 / BITS (uword);
1949 m0 = (uword) 1 << (c0 % BITS (uword));
1951 bm0 = hash_bitmap[c0];
1952 drop0 = (bm0 & m0) != 0;
1954 /* Mark it as seen. */
1955 hash_bitmap[c0] = bm0 | m0;
1959 to_next_drop[0] = pi0;
1961 n_left_to_next_drop -= 1;
1963 p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1966 * the adj has been updated to a rewrite but the node the DPO that got
1967 * us here hasn't - yet. no big deal. we'll drop while we wait.
1969 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1976 * Can happen if the control-plane is programming tables
1977 * with traffic flowing; at least that's today's lame excuse.
1979 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1980 (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1982 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1985 /* Send ARP request. */
1989 ethernet_arp_header_t * h0;
1990 vnet_hw_interface_t * hw_if0;
1992 h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1994 /* Add rewrite/encap string for ARP packet. */
1995 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1997 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1999 /* Src ethernet address in ARP header. */
2000 clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
2001 sizeof (h0->ip4_over_ethernet[0].ethernet));
2005 /* The interface's source address is stashed in the Glean Adj */
2006 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
2008 /* Copy in destination address we are requesting. This is the
2009 * glean case, so it's the packet's destination.*/
2010 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
2014 /* Src IP address in ARP header. */
2015 if (ip4_src_address_for_packet(lm, sw_if_index0,
2016 &h0->ip4_over_ethernet[0].ip4))
2018 /* No source address available */
2019 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
2020 vlib_buffer_free(vm, &bi0, 1);
2024 /* Copy in destination address we are requesting from the
2026 h0->ip4_over_ethernet[1].ip4.data_u32 =
2027 adj0->sub_type.nbr.next_hop.ip4.as_u32;
2030 vlib_buffer_copy_trace_flag (vm, p0, bi0);
2031 b0 = vlib_get_buffer (vm, bi0);
2032 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
2034 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
2036 vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
2040 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
2043 return frame->n_vectors;
2047 ip4_arp (vlib_main_t * vm,
2048 vlib_node_runtime_t * node,
2049 vlib_frame_t * frame)
2051 return (ip4_arp_inline(vm, node, frame, 0));
2055 ip4_glean (vlib_main_t * vm,
2056 vlib_node_runtime_t * node,
2057 vlib_frame_t * frame)
2059 return (ip4_arp_inline(vm, node, frame, 1));
2062 static char * ip4_arp_error_strings[] = {
2063 [IP4_ARP_ERROR_DROP] = "address overflow drops",
2064 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
2065 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
2066 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
2067 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
2068 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
2071 VLIB_REGISTER_NODE (ip4_arp_node) = {
2072 .function = ip4_arp,
2074 .vector_size = sizeof (u32),
2076 .format_trace = format_ip4_forward_next_trace,
2078 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2079 .error_strings = ip4_arp_error_strings,
2081 .n_next_nodes = IP4_ARP_N_NEXT,
2083 [IP4_ARP_NEXT_DROP] = "error-drop",
2087 VLIB_REGISTER_NODE (ip4_glean_node) = {
2088 .function = ip4_glean,
2089 .name = "ip4-glean",
2090 .vector_size = sizeof (u32),
2092 .format_trace = format_ip4_forward_next_trace,
2094 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2095 .error_strings = ip4_arp_error_strings,
2097 .n_next_nodes = IP4_ARP_N_NEXT,
2099 [IP4_ARP_NEXT_DROP] = "error-drop",
2103 #define foreach_notrace_ip4_arp_error \
2109 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2111 vlib_node_runtime_t *rt =
2112 vlib_node_get_runtime (vm, ip4_arp_node.index);
2114 /* don't trace ARP request packets */
2116 vnet_pcap_drop_trace_filter_add_del \
2117 (rt->errors[IP4_ARP_ERROR_##a], \
2119 foreach_notrace_ip4_arp_error;
2124 VLIB_INIT_FUNCTION(arp_notrace_init);
2127 /* Send an ARP request to see if given destination is reachable on given interface. */
2129 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2131 vnet_main_t * vnm = vnet_get_main();
2132 ip4_main_t * im = &ip4_main;
2133 ethernet_arp_header_t * h;
2134 ip4_address_t * src;
2135 ip_interface_address_t * ia;
2136 ip_adjacency_t * adj;
2137 vnet_hw_interface_t * hi;
2138 vnet_sw_interface_t * si;
2142 si = vnet_get_sw_interface (vnm, sw_if_index);
2144 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2146 return clib_error_return (0, "%U: interface %U down",
2147 format_ip4_address, dst,
2148 format_vnet_sw_if_index_name, vnm,
2152 src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2155 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2156 return clib_error_return
2157 (0, "no matching interface address for destination %U (interface %U)",
2158 format_ip4_address, dst,
2159 format_vnet_sw_if_index_name, vnm, sw_if_index);
2162 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2164 h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2166 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2168 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2170 h->ip4_over_ethernet[0].ip4 = src[0];
2171 h->ip4_over_ethernet[1].ip4 = dst[0];
2173 b = vlib_get_buffer (vm, bi);
2174 vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2176 /* Add encapsulation string for software interface (e.g. ethernet header). */
2177 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2178 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2181 vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2182 u32 * to_next = vlib_frame_vector_args (f);
2185 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2188 return /* no error */ 0;
2192 IP4_REWRITE_NEXT_DROP,
2193 IP4_REWRITE_NEXT_ICMP_ERROR,
2194 } ip4_rewrite_next_t;
2197 ip4_rewrite_inline (vlib_main_t * vm,
2198 vlib_node_runtime_t * node,
2199 vlib_frame_t * frame,
2202 ip_lookup_main_t * lm = &ip4_main.lookup_main;
2203 u32 * from = vlib_frame_vector_args (frame);
2204 u32 n_left_from, n_left_to_next, * to_next, next_index;
2205 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2207 n_left_from = frame->n_vectors;
2208 next_index = node->cached_next_index;
2209 u32 cpu_index = os_get_cpu_number();
2211 while (n_left_from > 0)
2213 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2215 while (n_left_from >= 4 && n_left_to_next >= 2)
2217 ip_adjacency_t * adj0, * adj1;
2218 vlib_buffer_t * p0, * p1;
2219 ip4_header_t * ip0, * ip1;
2220 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2221 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2222 u32 tx_sw_if_index0, tx_sw_if_index1;
2224 /* Prefetch next iteration. */
2226 vlib_buffer_t * p2, * p3;
2228 p2 = vlib_get_buffer (vm, from[2]);
2229 p3 = vlib_get_buffer (vm, from[3]);
2231 vlib_prefetch_buffer_header (p2, STORE);
2232 vlib_prefetch_buffer_header (p3, STORE);
2234 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2235 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2238 pi0 = to_next[0] = from[0];
2239 pi1 = to_next[1] = from[1];
2244 n_left_to_next -= 2;
2246 p0 = vlib_get_buffer (vm, pi0);
2247 p1 = vlib_get_buffer (vm, pi1);
2249 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2250 adj_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_TX];
2252 /* We should never rewrite a pkt using the MISS adjacency */
2253 ASSERT(adj_index0 && adj_index1);
2255 ip0 = vlib_buffer_get_current (p0);
2256 ip1 = vlib_buffer_get_current (p1);
2258 error0 = error1 = IP4_ERROR_NONE;
2259 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2261 /* Decrement TTL & update checksum.
2262 Works either endian, so no need for byte swap. */
2263 if (PREDICT_TRUE(!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2265 i32 ttl0 = ip0->ttl;
2267 /* Input node should have reject packets with ttl 0. */
2268 ASSERT (ip0->ttl > 0);
2270 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2271 checksum0 += checksum0 >= 0xffff;
2273 ip0->checksum = checksum0;
2278 * If the ttl drops below 1 when forwarding, generate
2281 if (PREDICT_FALSE(ttl0 <= 0))
2283 error0 = IP4_ERROR_TIME_EXPIRED;
2284 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2285 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2286 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2287 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2290 /* Verify checksum. */
2291 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2295 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2297 if (PREDICT_TRUE(!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2299 i32 ttl1 = ip1->ttl;
2301 /* Input node should have reject packets with ttl 0. */
2302 ASSERT (ip1->ttl > 0);
2304 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2305 checksum1 += checksum1 >= 0xffff;
2307 ip1->checksum = checksum1;
2312 * If the ttl drops below 1 when forwarding, generate
2315 if (PREDICT_FALSE(ttl1 <= 0))
2317 error1 = IP4_ERROR_TIME_EXPIRED;
2318 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2319 icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2320 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2321 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2324 /* Verify checksum. */
2325 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2326 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2330 p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2333 /* Rewrite packet header and updates lengths. */
2334 adj0 = ip_get_adjacency (lm, adj_index0);
2335 adj1 = ip_get_adjacency (lm, adj_index1);
2337 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2338 rw_len0 = adj0[0].rewrite_header.data_bytes;
2339 rw_len1 = adj1[0].rewrite_header.data_bytes;
2340 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2341 vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2343 /* Check MTU of outgoing interface. */
2344 error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2345 ? IP4_ERROR_MTU_EXCEEDED
2347 error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2348 ? IP4_ERROR_MTU_EXCEEDED
2351 next0 = (error0 == IP4_ERROR_NONE)
2352 ? adj0[0].rewrite_header.next_index : next0;
2354 next1 = (error1 == IP4_ERROR_NONE)
2355 ? adj1[0].rewrite_header.next_index : next1;
2358 * We've already accounted for an ethernet_header_t elsewhere
2360 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2361 vlib_increment_combined_counter
2362 (&adjacency_counters,
2363 cpu_index, adj_index0,
2364 /* packet increment */ 0,
2365 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2367 if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2368 vlib_increment_combined_counter
2369 (&adjacency_counters,
2370 cpu_index, adj_index1,
2371 /* packet increment */ 0,
2372 /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2374 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2375 * to see the IP headerr */
2376 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2378 p0->current_data -= rw_len0;
2379 p0->current_length += rw_len0;
2380 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2381 vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2384 vnet_feature_arc_start(lm->output_feature_arc_index,
2385 tx_sw_if_index0, &next0, p0);
2387 if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2389 p1->current_data -= rw_len1;
2390 p1->current_length += rw_len1;
2392 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2393 vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2396 vnet_feature_arc_start(lm->output_feature_arc_index,
2397 tx_sw_if_index1, &next1, p1);
2400 /* Guess we are only writing on simple Ethernet header. */
2401 vnet_rewrite_two_headers (adj0[0], adj1[0],
2403 sizeof (ethernet_header_t));
2407 adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2408 adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2411 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2412 to_next, n_left_to_next,
2413 pi0, pi1, next0, next1);
2416 while (n_left_from > 0 && n_left_to_next > 0)
2418 ip_adjacency_t * adj0;
2421 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2422 u32 tx_sw_if_index0;
2424 pi0 = to_next[0] = from[0];
2426 p0 = vlib_get_buffer (vm, pi0);
2428 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
2430 /* We should never rewrite a pkt using the MISS adjacency */
2433 adj0 = ip_get_adjacency (lm, adj_index0);
2435 ip0 = vlib_buffer_get_current (p0);
2437 error0 = IP4_ERROR_NONE;
2438 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2440 /* Decrement TTL & update checksum. */
2441 if (PREDICT_TRUE(!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED)))
2443 i32 ttl0 = ip0->ttl;
2445 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2447 checksum0 += checksum0 >= 0xffff;
2449 ip0->checksum = checksum0;
2451 ASSERT (ip0->ttl > 0);
2457 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2459 if (PREDICT_FALSE(ttl0 <= 0))
2462 * If the ttl drops below 1 when forwarding, generate
2465 error0 = IP4_ERROR_TIME_EXPIRED;
2466 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2467 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2468 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2469 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2474 p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED;
2477 /* Guess we are only writing on simple Ethernet header. */
2478 vnet_rewrite_one_header (adj0[0], ip0,
2479 sizeof (ethernet_header_t));
2481 /* Update packet buffer attributes/set output interface. */
2482 rw_len0 = adj0[0].rewrite_header.data_bytes;
2483 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2485 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2486 vlib_increment_combined_counter
2487 (&adjacency_counters,
2488 cpu_index, adj_index0,
2489 /* packet increment */ 0,
2490 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2492 /* Check MTU of outgoing interface. */
2493 error0 = (vlib_buffer_length_in_chain (vm, p0)
2494 > adj0[0].rewrite_header.max_l3_packet_bytes
2495 ? IP4_ERROR_MTU_EXCEEDED
2498 p0->error = error_node->errors[error0];
2500 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2501 * to see the IP headerr */
2502 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2504 p0->current_data -= rw_len0;
2505 p0->current_length += rw_len0;
2506 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2508 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2509 next0 = adj0[0].rewrite_header.next_index;
2513 adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2516 vnet_feature_arc_start(lm->output_feature_arc_index,
2517 tx_sw_if_index0, &next0, p0);
2524 n_left_to_next -= 1;
2526 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2527 to_next, n_left_to_next,
2531 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2534 /* Need to do trace after rewrites to pick up new packet data. */
2535 if (node->flags & VLIB_NODE_FLAG_TRACE)
2536 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
2538 return frame->n_vectors;
2542 /** @brief IPv4 rewrite node.
2545 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2546 header checksum, fetch the ip adjacency, check the outbound mtu,
2547 apply the adjacency rewrite, and send pkts to the adjacency
2548 rewrite header's rewrite_next_index.
2550 @param vm vlib_main_t corresponding to the current thread
2551 @param node vlib_node_runtime_t
2552 @param frame vlib_frame_t whose contents should be dispatched
2554 @par Graph mechanics: buffer metadata, next index usage
2557 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2558 - the rewrite adjacency index
2559 - <code>adj->lookup_next_index</code>
2560 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2561 the packet will be dropped.
2562 - <code>adj->rewrite_header</code>
2563 - Rewrite string length, rewrite string, next_index
2566 - <code>b->current_data, b->current_length</code>
2567 - Updated net of applying the rewrite string
2569 <em>Next Indices:</em>
2570 - <code> adj->rewrite_header.next_index </code>
2574 ip4_rewrite (vlib_main_t * vm,
2575 vlib_node_runtime_t * node,
2576 vlib_frame_t * frame)
2578 return ip4_rewrite_inline (vm, node, frame, 0);
2582 ip4_midchain (vlib_main_t * vm,
2583 vlib_node_runtime_t * node,
2584 vlib_frame_t * frame)
2586 return ip4_rewrite_inline (vm, node, frame, 1);
2590 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2591 .function = ip4_rewrite,
2592 .name = "ip4-rewrite",
2593 .vector_size = sizeof (u32),
2595 .format_trace = format_ip4_rewrite_trace,
2599 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2600 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2604 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite)
2606 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2607 .function = ip4_midchain,
2608 .name = "ip4-midchain",
2609 .vector_size = sizeof (u32),
2611 .format_trace = format_ip4_forward_next_trace,
2613 .sibling_of = "ip4-rewrite",
2616 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2618 static clib_error_t *
2619 add_del_interface_table (vlib_main_t * vm,
2620 unformat_input_t * input,
2621 vlib_cli_command_t * cmd)
2623 vnet_main_t * vnm = vnet_get_main();
2624 clib_error_t * error = 0;
2625 u32 sw_if_index, table_id;
2629 if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2631 error = clib_error_return (0, "unknown interface `%U'",
2632 format_unformat_error, input);
2636 if (unformat (input, "%d", &table_id))
2640 error = clib_error_return (0, "expected table id `%U'",
2641 format_unformat_error, input);
2646 ip4_main_t * im = &ip4_main;
2649 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2654 // changing an interface's table has consequences for any connecteds
2655 // and adj-fibs already installed.
2657 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2658 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2666 * Place the indicated interface into the supplied IPv4 FIB table (also known
2667 * as a VRF). If the FIB table does not exist, this command creates it. To
2668 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2669 * FIB table will only be displayed if a route has been added to the table, or
2670 * an IP Address is assigned to an interface in the table (which adds a route
2673 * @note IP addresses added after setting the interface IP table end up in
2674 * the indicated FIB table. If the IP address is added prior to adding the
2675 * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2676 * but potentially counter-intuitive results occur if you provision interface
2677 * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2678 * IP table ID provisioned. It might be marginally useful to evade source RPF
2679 * drops to put an interface address into multiple FIBs.
2682 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2683 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2686 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2687 .path = "set interface ip table",
2688 .function = add_del_interface_table,
2689 .short_help = "set interface ip table <interface> <table-id>",
2695 ip4_lookup_multicast (vlib_main_t * vm,
2696 vlib_node_runtime_t * node,
2697 vlib_frame_t * frame)
2699 ip4_main_t * im = &ip4_main;
2700 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2701 u32 n_left_from, n_left_to_next, * from, * to_next;
2702 ip_lookup_next_t next;
2703 u32 cpu_index = os_get_cpu_number();
2705 from = vlib_frame_vector_args (frame);
2706 n_left_from = frame->n_vectors;
2707 next = node->cached_next_index;
2709 while (n_left_from > 0)
2711 vlib_get_next_frame (vm, node, next,
2712 to_next, n_left_to_next);
2714 while (n_left_from >= 4 && n_left_to_next >= 2)
2716 vlib_buffer_t * p0, * p1;
2717 u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2718 ip_lookup_next_t next0, next1;
2719 ip4_header_t * ip0, * ip1;
2720 u32 fib_index0, fib_index1;
2721 const dpo_id_t *dpo0, *dpo1;
2722 const load_balance_t * lb0, * lb1;
2724 /* Prefetch next iteration. */
2726 vlib_buffer_t * p2, * p3;
2728 p2 = vlib_get_buffer (vm, from[2]);
2729 p3 = vlib_get_buffer (vm, from[3]);
2731 vlib_prefetch_buffer_header (p2, LOAD);
2732 vlib_prefetch_buffer_header (p3, LOAD);
2734 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2735 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2738 pi0 = to_next[0] = from[0];
2739 pi1 = to_next[1] = from[1];
2741 p0 = vlib_get_buffer (vm, pi0);
2742 p1 = vlib_get_buffer (vm, pi1);
2744 ip0 = vlib_buffer_get_current (p0);
2745 ip1 = vlib_buffer_get_current (p1);
2747 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2748 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2749 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2750 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2751 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2752 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2754 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2756 lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2759 lb0 = load_balance_get (lb_index0);
2760 lb1 = load_balance_get (lb_index1);
2762 ASSERT (lb0->lb_n_buckets > 0);
2763 ASSERT (is_pow2 (lb0->lb_n_buckets));
2764 ASSERT (lb1->lb_n_buckets > 0);
2765 ASSERT (is_pow2 (lb1->lb_n_buckets));
2767 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2768 (ip0, lb0->lb_hash_config);
2770 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2771 (ip1, lb1->lb_hash_config);
2773 dpo0 = load_balance_get_bucket_i(lb0,
2774 (vnet_buffer (p0)->ip.flow_hash &
2775 (lb0->lb_n_buckets_minus_1)));
2776 dpo1 = load_balance_get_bucket_i(lb1,
2777 (vnet_buffer (p1)->ip.flow_hash &
2778 (lb1->lb_n_buckets_minus_1)));
2780 next0 = dpo0->dpoi_next_node;
2781 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2782 next1 = dpo1->dpoi_next_node;
2783 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2785 if (1) /* $$$$$$ HACK FIXME */
2786 vlib_increment_combined_counter
2787 (cm, cpu_index, lb_index0, 1,
2788 vlib_buffer_length_in_chain (vm, p0));
2789 if (1) /* $$$$$$ HACK FIXME */
2790 vlib_increment_combined_counter
2791 (cm, cpu_index, lb_index1, 1,
2792 vlib_buffer_length_in_chain (vm, p1));
2796 n_left_to_next -= 2;
2799 wrong_next = (next0 != next) + 2*(next1 != next);
2800 if (PREDICT_FALSE (wrong_next != 0))
2808 n_left_to_next += 1;
2809 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2815 n_left_to_next += 1;
2816 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2822 n_left_to_next += 2;
2823 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2824 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2828 vlib_put_next_frame (vm, node, next, n_left_to_next);
2830 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2836 while (n_left_from > 0 && n_left_to_next > 0)
2841 ip_lookup_next_t next0;
2843 const dpo_id_t *dpo0;
2844 const load_balance_t * lb0;
2849 p0 = vlib_get_buffer (vm, pi0);
2851 ip0 = vlib_buffer_get_current (p0);
2853 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2854 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2855 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2856 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2858 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2861 lb0 = load_balance_get (lb_index0);
2863 ASSERT (lb0->lb_n_buckets > 0);
2864 ASSERT (is_pow2 (lb0->lb_n_buckets));
2866 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2867 (ip0, lb0->lb_hash_config);
2869 dpo0 = load_balance_get_bucket_i(lb0,
2870 (vnet_buffer (p0)->ip.flow_hash &
2871 (lb0->lb_n_buckets_minus_1)));
2873 next0 = dpo0->dpoi_next_node;
2874 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2876 if (1) /* $$$$$$ HACK FIXME */
2877 vlib_increment_combined_counter
2878 (cm, cpu_index, lb_index0, 1,
2879 vlib_buffer_length_in_chain (vm, p0));
2883 n_left_to_next -= 1;
2886 if (PREDICT_FALSE (next0 != next))
2888 n_left_to_next += 1;
2889 vlib_put_next_frame (vm, node, next, n_left_to_next);
2891 vlib_get_next_frame (vm, node, next,
2892 to_next, n_left_to_next);
2895 n_left_to_next -= 1;
2899 vlib_put_next_frame (vm, node, next, n_left_to_next);
2902 if (node->flags & VLIB_NODE_FLAG_TRACE)
2903 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2905 return frame->n_vectors;
2908 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2909 .function = ip4_lookup_multicast,
2910 .name = "ip4-lookup-multicast",
2911 .vector_size = sizeof (u32),
2912 .sibling_of = "ip4-lookup",
2913 .format_trace = format_ip4_lookup_trace,
2918 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2920 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2921 .function = ip4_drop,
2922 .name = "ip4-multicast",
2923 .vector_size = sizeof (u32),
2925 .format_trace = format_ip4_forward_next_trace,
2933 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2935 ip4_fib_mtrie_t * mtrie0;
2936 ip4_fib_mtrie_leaf_t leaf0;
2939 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2941 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2942 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2943 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2944 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2945 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2947 /* Handle default route. */
2948 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2950 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2952 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2955 static clib_error_t *
2956 test_lookup_command_fn (vlib_main_t * vm,
2957 unformat_input_t * input,
2958 vlib_cli_command_t * cmd)
2965 ip4_address_t ip4_base_address;
2968 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2969 if (unformat (input, "table %d", &table_id))
2971 /* Make sure the entry exists. */
2972 fib = ip4_fib_get(table_id);
2973 if ((fib) && (fib->index != table_id))
2974 return clib_error_return (0, "<fib-index> %d does not exist",
2977 else if (unformat (input, "count %f", &count))
2980 else if (unformat (input, "%U",
2981 unformat_ip4_address, &ip4_base_address))
2984 return clib_error_return (0, "unknown input `%U'",
2985 format_unformat_error, input);
2990 for (i = 0; i < n; i++)
2992 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2995 ip4_base_address.as_u32 =
2996 clib_host_to_net_u32 (1 +
2997 clib_net_to_host_u32 (ip4_base_address.as_u32));
3001 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3003 vlib_cli_output (vm, "No errors in %d lookups\n", n);
3009 * Perform a lookup of an IPv4 Address (or range of addresses) in the
3010 * given FIB table to determine if there is a conflict with the
3011 * adjacency table. The fib-id can be determined by using the
3012 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3015 * @todo This command uses fib-id, other commands use table-id (not
3016 * just a name, they are different indexes). Would like to change this
3017 * to table-id for consistency.
3020 * Example of how to run the test lookup command:
3021 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3022 * No errors in 2 lookups
3026 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3027 .path = "test lookup",
3028 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3029 .function = test_lookup_command_fn,
3033 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3035 ip4_main_t * im4 = &ip4_main;
3037 uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3040 return VNET_API_ERROR_NO_SUCH_FIB;
3042 fib = ip4_fib_get (p[0]);
3044 fib->flow_hash_config = flow_hash_config;
3048 static clib_error_t *
3049 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3050 unformat_input_t * input,
3051 vlib_cli_command_t * cmd)
3055 u32 flow_hash_config = 0;
3058 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3059 if (unformat (input, "table %d", &table_id))
3062 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3063 foreach_flow_hash_bit
3069 return clib_error_return (0, "unknown input `%U'",
3070 format_unformat_error, input);
3072 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3078 case VNET_API_ERROR_NO_SUCH_FIB:
3079 return clib_error_return (0, "no such FIB table %d", table_id);
3082 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3090 * Configure the set of IPv4 fields used by the flow hash.
3093 * Example of how to set the flow hash on a given table:
3094 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3095 * Example of display the configured flow hash:
3096 * @cliexstart{show ip fib}
3097 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3100 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3101 * [0] [@0]: dpo-drop ip6
3104 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3105 * [0] [@0]: dpo-drop ip6
3108 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3109 * [0] [@0]: dpo-drop ip6
3112 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3113 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3116 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3117 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3118 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3119 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3120 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3123 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3124 * [0] [@0]: dpo-drop ip6
3125 * 255.255.255.255/32
3127 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3128 * [0] [@0]: dpo-drop ip6
3129 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3132 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3133 * [0] [@0]: dpo-drop ip6
3136 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3137 * [0] [@0]: dpo-drop ip6
3140 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3141 * [0] [@4]: ipv4-glean: af_packet0
3144 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3145 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3148 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3149 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3152 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3153 * [0] [@4]: ipv4-glean: af_packet1
3156 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3157 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3160 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3161 * [0] [@0]: dpo-drop ip6
3164 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3165 * [0] [@0]: dpo-drop ip6
3166 * 255.255.255.255/32
3168 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3169 * [0] [@0]: dpo-drop ip6
3173 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3174 .path = "set ip flow-hash",
3176 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3177 .function = set_ip_flow_hash_command_fn,
3181 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3184 vnet_main_t * vnm = vnet_get_main();
3185 vnet_interface_main_t * im = &vnm->interface_main;
3186 ip4_main_t * ipm = &ip4_main;
3187 ip_lookup_main_t * lm = &ipm->lookup_main;
3188 vnet_classify_main_t * cm = &vnet_classify_main;
3189 ip4_address_t *if_addr;
3191 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3192 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3194 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3195 return VNET_API_ERROR_NO_SUCH_ENTRY;
3197 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3198 lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3200 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3202 if (NULL != if_addr)
3204 fib_prefix_t pfx = {
3206 .fp_proto = FIB_PROTOCOL_IP4,
3207 .fp_addr.ip4 = *if_addr,
3211 fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3215 if (table_index != (u32) ~0)
3217 dpo_id_t dpo = DPO_INVALID;
3222 classify_dpo_create(DPO_PROTO_IP4, table_index));
3224 fib_table_entry_special_dpo_add(fib_index,
3226 FIB_SOURCE_CLASSIFY,
3227 FIB_ENTRY_FLAG_NONE,
3233 fib_table_entry_special_remove(fib_index,
3235 FIB_SOURCE_CLASSIFY);
3242 static clib_error_t *
3243 set_ip_classify_command_fn (vlib_main_t * vm,
3244 unformat_input_t * input,
3245 vlib_cli_command_t * cmd)
3247 u32 table_index = ~0;
3248 int table_index_set = 0;
3249 u32 sw_if_index = ~0;
3252 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3253 if (unformat (input, "table-index %d", &table_index))
3254 table_index_set = 1;
3255 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3256 vnet_get_main(), &sw_if_index))
3262 if (table_index_set == 0)
3263 return clib_error_return (0, "classify table-index must be specified");
3265 if (sw_if_index == ~0)
3266 return clib_error_return (0, "interface / subif must be specified");
3268 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3275 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3276 return clib_error_return (0, "No such interface");
3278 case VNET_API_ERROR_NO_SUCH_ENTRY:
3279 return clib_error_return (0, "No such classifier table");
3285 * Assign a classification table to an interface. The classification
3286 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3287 * commands. Once the table is create, use this command to filter packets
3291 * Example of how to assign a classification table to an interface:
3292 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3295 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3296 .path = "set ip classify",
3298 "set ip classify intfc <interface> table-index <classify-idx>",
3299 .function = set_ip_classify_command_fn,