2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
56 * @brief IPv4 Forwarding.
58 * This file contains the source code for IPv4 forwarding.
62 ip4_forward_next_trace (vlib_main_t * vm,
63 vlib_node_runtime_t * node,
65 vlib_rx_or_tx_t which_adj_index);
68 ip4_lookup_inline (vlib_main_t * vm,
69 vlib_node_runtime_t * node,
71 int lookup_for_responses_to_locally_received_packets)
73 ip4_main_t * im = &ip4_main;
74 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
75 u32 n_left_from, n_left_to_next, * from, * to_next;
76 ip_lookup_next_t next;
77 u32 cpu_index = os_get_cpu_number();
79 from = vlib_frame_vector_args (frame);
80 n_left_from = frame->n_vectors;
81 next = node->cached_next_index;
83 while (n_left_from > 0)
85 vlib_get_next_frame (vm, node, next,
86 to_next, n_left_to_next);
88 while (n_left_from >= 8 && n_left_to_next >= 4)
90 vlib_buffer_t * p0, * p1, * p2, * p3;
91 ip4_header_t * ip0, * ip1, * ip2, * ip3;
92 __attribute__((unused)) tcp_header_t * tcp0, * tcp1, * tcp2, * tcp3;
93 ip_lookup_next_t next0, next1, next2, next3;
94 const load_balance_t * lb0, * lb1, * lb2, * lb3;
95 ip4_fib_mtrie_t * mtrie0, * mtrie1, * mtrie2, * mtrie3;
96 ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
97 ip4_address_t * dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
98 __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
99 __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
100 __attribute__((unused)) u32 pi2, fib_index2, lb_index2, is_tcp_udp2;
101 __attribute__((unused)) u32 pi3, fib_index3, lb_index3, is_tcp_udp3;
102 flow_hash_config_t flow_hash_config0, flow_hash_config1;
103 flow_hash_config_t flow_hash_config2, flow_hash_config3;
104 u32 hash_c0, hash_c1, hash_c2, hash_c3;
105 const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3;
107 /* Prefetch next iteration. */
109 vlib_buffer_t * p4, * p5, * p6, * p7;
111 p4 = vlib_get_buffer (vm, from[4]);
112 p5 = vlib_get_buffer (vm, from[5]);
113 p6 = vlib_get_buffer (vm, from[6]);
114 p7 = vlib_get_buffer (vm, from[7]);
116 vlib_prefetch_buffer_header (p4, LOAD);
117 vlib_prefetch_buffer_header (p5, LOAD);
118 vlib_prefetch_buffer_header (p6, LOAD);
119 vlib_prefetch_buffer_header (p7, LOAD);
121 CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD);
122 CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD);
123 CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD);
124 CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD);
127 pi0 = to_next[0] = from[0];
128 pi1 = to_next[1] = from[1];
129 pi2 = to_next[2] = from[2];
130 pi3 = to_next[3] = from[3];
137 p0 = vlib_get_buffer (vm, pi0);
138 p1 = vlib_get_buffer (vm, pi1);
139 p2 = vlib_get_buffer (vm, pi2);
140 p3 = vlib_get_buffer (vm, pi3);
142 ip0 = vlib_buffer_get_current (p0);
143 ip1 = vlib_buffer_get_current (p1);
144 ip2 = vlib_buffer_get_current (p2);
145 ip3 = vlib_buffer_get_current (p3);
147 dst_addr0 = &ip0->dst_address;
148 dst_addr1 = &ip1->dst_address;
149 dst_addr2 = &ip2->dst_address;
150 dst_addr3 = &ip3->dst_address;
152 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
153 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
154 fib_index2 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p2)->sw_if_index[VLIB_RX]);
155 fib_index3 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p3)->sw_if_index[VLIB_RX]);
156 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
157 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
158 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
159 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
160 fib_index2 = (vnet_buffer(p2)->sw_if_index[VLIB_TX] == (u32)~0) ?
161 fib_index2 : vnet_buffer(p2)->sw_if_index[VLIB_TX];
162 fib_index3 = (vnet_buffer(p3)->sw_if_index[VLIB_TX] == (u32)~0) ?
163 fib_index3 : vnet_buffer(p3)->sw_if_index[VLIB_TX];
166 if (! lookup_for_responses_to_locally_received_packets)
168 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
169 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
170 mtrie2 = &ip4_fib_get (fib_index2)->mtrie;
171 mtrie3 = &ip4_fib_get (fib_index3)->mtrie;
173 leaf0 = leaf1 = leaf2 = leaf3 = IP4_FIB_MTRIE_LEAF_ROOT;
175 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
176 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
177 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 0);
178 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 0);
181 tcp0 = (void *) (ip0 + 1);
182 tcp1 = (void *) (ip1 + 1);
183 tcp2 = (void *) (ip2 + 1);
184 tcp3 = (void *) (ip3 + 1);
186 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
187 || ip0->protocol == IP_PROTOCOL_UDP);
188 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
189 || ip1->protocol == IP_PROTOCOL_UDP);
190 is_tcp_udp2 = (ip2->protocol == IP_PROTOCOL_TCP
191 || ip2->protocol == IP_PROTOCOL_UDP);
192 is_tcp_udp3 = (ip1->protocol == IP_PROTOCOL_TCP
193 || ip1->protocol == IP_PROTOCOL_UDP);
195 if (! lookup_for_responses_to_locally_received_packets)
197 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
198 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
199 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 1);
200 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 1);
203 if (! lookup_for_responses_to_locally_received_packets)
205 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
206 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
207 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
208 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
211 if (! lookup_for_responses_to_locally_received_packets)
213 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
214 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
215 leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
216 leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
219 if (lookup_for_responses_to_locally_received_packets)
221 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
222 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
223 lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX];
224 lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX];
228 /* Handle default route. */
229 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
230 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
231 leaf2 = (leaf2 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie2->default_leaf : leaf2);
232 leaf3 = (leaf3 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie3->default_leaf : leaf3);
233 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
234 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
235 lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
236 lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
239 lb0 = load_balance_get (lb_index0);
240 lb1 = load_balance_get (lb_index1);
241 lb2 = load_balance_get (lb_index2);
242 lb3 = load_balance_get (lb_index3);
244 /* Use flow hash to compute multipath adjacency. */
245 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
246 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
247 hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0;
248 hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0;
249 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
251 flow_hash_config0 = lb0->lb_hash_config;
252 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
253 ip4_compute_flow_hash (ip0, flow_hash_config0);
255 if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
257 flow_hash_config1 = lb1->lb_hash_config;
258 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
259 ip4_compute_flow_hash (ip1, flow_hash_config1);
261 if (PREDICT_FALSE (lb2->lb_n_buckets > 1))
263 flow_hash_config2 = lb2->lb_hash_config;
264 hash_c2 = vnet_buffer (p2)->ip.flow_hash =
265 ip4_compute_flow_hash (ip2, flow_hash_config2);
267 if (PREDICT_FALSE(lb3->lb_n_buckets > 1))
269 flow_hash_config3 = lb3->lb_hash_config;
270 hash_c3 = vnet_buffer (p3)->ip.flow_hash =
271 ip4_compute_flow_hash (ip3, flow_hash_config3);
274 ASSERT (lb0->lb_n_buckets > 0);
275 ASSERT (is_pow2 (lb0->lb_n_buckets));
276 ASSERT (lb1->lb_n_buckets > 0);
277 ASSERT (is_pow2 (lb1->lb_n_buckets));
278 ASSERT (lb2->lb_n_buckets > 0);
279 ASSERT (is_pow2 (lb2->lb_n_buckets));
280 ASSERT (lb3->lb_n_buckets > 0);
281 ASSERT (is_pow2 (lb3->lb_n_buckets));
283 dpo0 = load_balance_get_bucket_i(lb0,
285 (lb0->lb_n_buckets_minus_1)));
286 dpo1 = load_balance_get_bucket_i(lb1,
288 (lb1->lb_n_buckets_minus_1)));
289 dpo2 = load_balance_get_bucket_i(lb2,
291 (lb2->lb_n_buckets_minus_1)));
292 dpo3 = load_balance_get_bucket_i(lb3,
294 (lb3->lb_n_buckets_minus_1)));
296 next0 = dpo0->dpoi_next_node;
297 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
298 next1 = dpo1->dpoi_next_node;
299 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
300 next2 = dpo2->dpoi_next_node;
301 vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index;
302 next3 = dpo3->dpoi_next_node;
303 vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index;
305 vlib_increment_combined_counter
306 (cm, cpu_index, lb_index0, 1,
307 vlib_buffer_length_in_chain (vm, p0)
308 + sizeof(ethernet_header_t));
309 vlib_increment_combined_counter
310 (cm, cpu_index, lb_index1, 1,
311 vlib_buffer_length_in_chain (vm, p1)
312 + sizeof(ethernet_header_t));
313 vlib_increment_combined_counter
314 (cm, cpu_index, lb_index2, 1,
315 vlib_buffer_length_in_chain (vm, p2)
316 + sizeof(ethernet_header_t));
317 vlib_increment_combined_counter
318 (cm, cpu_index, lb_index3, 1,
319 vlib_buffer_length_in_chain (vm, p3)
320 + sizeof(ethernet_header_t));
322 vlib_validate_buffer_enqueue_x4 (vm, node, next,
323 to_next, n_left_to_next,
325 next0, next1, next2, next3);
328 while (n_left_from > 0 && n_left_to_next > 0)
332 __attribute__((unused)) tcp_header_t * tcp0;
333 ip_lookup_next_t next0;
334 const load_balance_t *lb0;
335 ip4_fib_mtrie_t * mtrie0;
336 ip4_fib_mtrie_leaf_t leaf0;
337 ip4_address_t * dst_addr0;
338 __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
339 flow_hash_config_t flow_hash_config0;
340 const dpo_id_t *dpo0;
346 p0 = vlib_get_buffer (vm, pi0);
348 ip0 = vlib_buffer_get_current (p0);
350 dst_addr0 = &ip0->dst_address;
352 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
353 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
354 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
356 if (! lookup_for_responses_to_locally_received_packets)
358 mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
360 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
362 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
365 tcp0 = (void *) (ip0 + 1);
367 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
368 || ip0->protocol == IP_PROTOCOL_UDP);
370 if (! lookup_for_responses_to_locally_received_packets)
371 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
373 if (! lookup_for_responses_to_locally_received_packets)
374 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
376 if (! lookup_for_responses_to_locally_received_packets)
377 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
379 if (lookup_for_responses_to_locally_received_packets)
380 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
383 /* Handle default route. */
384 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
385 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
388 lb0 = load_balance_get (lbi0);
390 /* Use flow hash to compute multipath adjacency. */
391 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
392 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
394 flow_hash_config0 = lb0->lb_hash_config;
396 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
397 ip4_compute_flow_hash (ip0, flow_hash_config0);
400 ASSERT (lb0->lb_n_buckets > 0);
401 ASSERT (is_pow2 (lb0->lb_n_buckets));
403 dpo0 = load_balance_get_bucket_i(lb0,
405 (lb0->lb_n_buckets_minus_1)));
407 next0 = dpo0->dpoi_next_node;
408 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
410 vlib_increment_combined_counter
411 (cm, cpu_index, lbi0, 1,
412 vlib_buffer_length_in_chain (vm, p0));
419 if (PREDICT_FALSE (next0 != next))
422 vlib_put_next_frame (vm, node, next, n_left_to_next);
424 vlib_get_next_frame (vm, node, next,
425 to_next, n_left_to_next);
432 vlib_put_next_frame (vm, node, next, n_left_to_next);
435 if (node->flags & VLIB_NODE_FLAG_TRACE)
436 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
438 return frame->n_vectors;
441 /** @brief IPv4 lookup node.
444 This is the main IPv4 lookup dispatch node.
446 @param vm vlib_main_t corresponding to the current thread
447 @param node vlib_node_runtime_t
448 @param frame vlib_frame_t whose contents should be dispatched
450 @par Graph mechanics: buffer metadata, next index usage
453 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
454 - Indicates the @c sw_if_index value of the interface that the
455 packet was received on.
456 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
457 - When the value is @c ~0 then the node performs a longest prefix
458 match (LPM) for the packet destination address in the FIB attached
459 to the receive interface.
460 - Otherwise perform LPM for the packet destination address in the
461 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
462 value (0, 1, ...) and not a VRF id.
465 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
466 - The lookup result adjacency index.
469 - Dispatches the packet to the node index found in
470 ip_adjacency_t @c adj->lookup_next_index
471 (where @c adj is the lookup result adjacency).
474 ip4_lookup (vlib_main_t * vm,
475 vlib_node_runtime_t * node,
476 vlib_frame_t * frame)
478 return ip4_lookup_inline (vm, node, frame,
479 /* lookup_for_responses_to_locally_received_packets */ 0);
483 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
485 VLIB_REGISTER_NODE (ip4_lookup_node) = {
486 .function = ip4_lookup,
487 .name = "ip4-lookup",
488 .vector_size = sizeof (u32),
490 .format_trace = format_ip4_lookup_trace,
491 .n_next_nodes = IP_LOOKUP_N_NEXT,
492 .next_nodes = IP4_LOOKUP_NEXT_NODES,
495 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
498 ip4_load_balance (vlib_main_t * vm,
499 vlib_node_runtime_t * node,
500 vlib_frame_t * frame)
502 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
503 u32 n_left_from, n_left_to_next, * from, * to_next;
504 ip_lookup_next_t next;
505 u32 cpu_index = os_get_cpu_number();
507 from = vlib_frame_vector_args (frame);
508 n_left_from = frame->n_vectors;
509 next = node->cached_next_index;
511 if (node->flags & VLIB_NODE_FLAG_TRACE)
512 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
514 while (n_left_from > 0)
516 vlib_get_next_frame (vm, node, next,
517 to_next, n_left_to_next);
520 while (n_left_from > 0 && n_left_to_next > 0)
522 ip_lookup_next_t next0;
523 const load_balance_t *lb0;
526 const ip4_header_t *ip0;
527 const dpo_id_t *dpo0;
532 p0 = vlib_get_buffer (vm, pi0);
534 ip0 = vlib_buffer_get_current (p0);
535 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
537 lb0 = load_balance_get(lbi0);
538 hc0 = lb0->lb_hash_config;
539 vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
541 dpo0 = load_balance_get_bucket_i(lb0,
542 vnet_buffer(p0)->ip.flow_hash &
543 (lb0->lb_n_buckets_minus_1));
545 next0 = dpo0->dpoi_next_node;
546 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
548 vlib_increment_combined_counter
549 (cm, cpu_index, lbi0, 1,
550 vlib_buffer_length_in_chain (vm, p0));
557 if (PREDICT_FALSE (next0 != next))
560 vlib_put_next_frame (vm, node, next, n_left_to_next);
562 vlib_get_next_frame (vm, node, next,
563 to_next, n_left_to_next);
570 vlib_put_next_frame (vm, node, next, n_left_to_next);
573 return frame->n_vectors;
576 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
578 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
579 .function = ip4_load_balance,
580 .name = "ip4-load-balance",
581 .vector_size = sizeof (u32),
582 .sibling_of = "ip4-lookup",
584 .format_trace = format_ip4_forward_next_trace,
587 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
589 /* get first interface address */
591 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
592 ip_interface_address_t ** result_ia)
594 ip_lookup_main_t * lm = &im->lookup_main;
595 ip_interface_address_t * ia = 0;
596 ip4_address_t * result = 0;
598 foreach_ip_interface_address (lm, ia, sw_if_index,
599 1 /* honor unnumbered */,
601 ip4_address_t * a = ip_interface_address_get_address (lm, ia);
606 *result_ia = result ? ia : 0;
611 ip4_add_interface_routes (u32 sw_if_index,
612 ip4_main_t * im, u32 fib_index,
613 ip_interface_address_t * a)
615 ip_lookup_main_t * lm = &im->lookup_main;
616 ip4_address_t * address = ip_interface_address_get_address (lm, a);
618 .fp_len = a->address_length,
619 .fp_proto = FIB_PROTOCOL_IP4,
620 .fp_addr.ip4 = *address,
623 a->neighbor_probe_adj_index = ~0;
627 fib_node_index_t fei;
629 fei = fib_table_entry_update_one_path(fib_index,
631 FIB_SOURCE_INTERFACE,
632 (FIB_ENTRY_FLAG_CONNECTED |
633 FIB_ENTRY_FLAG_ATTACHED),
635 NULL, /* No next-hop address */
637 ~0, // invalid FIB index
640 FIB_ROUTE_PATH_FLAG_NONE);
641 a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
646 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
648 u32 classify_table_index =
649 lm->classify_table_index_by_sw_if_index [sw_if_index];
650 if (classify_table_index != (u32) ~0)
652 dpo_id_t dpo = DPO_INVALID;
657 classify_dpo_create(DPO_PROTO_IP4,
658 classify_table_index));
660 fib_table_entry_special_dpo_add(fib_index,
669 fib_table_entry_update_one_path(fib_index,
671 FIB_SOURCE_INTERFACE,
672 (FIB_ENTRY_FLAG_CONNECTED |
673 FIB_ENTRY_FLAG_LOCAL),
677 ~0, // invalid FIB index
680 FIB_ROUTE_PATH_FLAG_NONE);
684 ip4_del_interface_routes (ip4_main_t * im,
686 ip4_address_t * address,
690 .fp_len = address_length,
691 .fp_proto = FIB_PROTOCOL_IP4,
692 .fp_addr.ip4 = *address,
697 fib_table_entry_delete(fib_index,
699 FIB_SOURCE_INTERFACE);
703 fib_table_entry_delete(fib_index,
705 FIB_SOURCE_INTERFACE);
709 ip4_sw_interface_enable_disable (u32 sw_if_index,
712 ip4_main_t * im = &ip4_main;
714 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
717 * enable/disable only on the 1<->0 transition
721 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
726 ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
727 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
730 vnet_feature_enable_disable ("ip4-unicast", "ip4-lookup", sw_if_index,
733 vnet_feature_enable_disable ("ip4-multicast", "ip4-lookup-multicast", sw_if_index,
738 static clib_error_t *
739 ip4_add_del_interface_address_internal (vlib_main_t * vm,
741 ip4_address_t * address,
745 vnet_main_t * vnm = vnet_get_main();
746 ip4_main_t * im = &ip4_main;
747 ip_lookup_main_t * lm = &im->lookup_main;
748 clib_error_t * error = 0;
749 u32 if_address_index, elts_before;
750 ip4_address_fib_t ip4_af, * addr_fib = 0;
752 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
753 ip4_addr_fib_init (&ip4_af, address,
754 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
755 vec_add1 (addr_fib, ip4_af);
758 * there is no support for adj-fib handling in the presence of overlapping
759 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
764 /* When adding an address check that it does not conflict
765 with an existing address. */
766 ip_interface_address_t * ia;
767 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
768 0 /* honor unnumbered */,
770 ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
772 if (ip4_destination_matches_route (im, address, x, ia->address_length)
773 || ip4_destination_matches_route (im, x, address, address_length))
774 return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
775 format_ip4_address_and_length, address, address_length,
776 format_ip4_address_and_length, x, ia->address_length,
777 format_vnet_sw_if_index_name, vnm, sw_if_index);
781 elts_before = pool_elts (lm->if_address_pool);
783 error = ip_interface_address_add_del
793 ip4_sw_interface_enable_disable(sw_if_index, !is_del);
796 ip4_del_interface_routes (im, ip4_af.fib_index, address,
799 ip4_add_interface_routes (sw_if_index,
800 im, ip4_af.fib_index,
802 (lm->if_address_pool, if_address_index));
804 /* If pool did not grow/shrink: add duplicate address. */
805 if (elts_before != pool_elts (lm->if_address_pool))
807 ip4_add_del_interface_address_callback_t * cb;
808 vec_foreach (cb, im->add_del_interface_address_callbacks)
809 cb->function (im, cb->function_opaque, sw_if_index,
810 address, address_length,
821 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
822 ip4_address_t * address, u32 address_length,
825 return ip4_add_del_interface_address_internal
826 (vm, sw_if_index, address, address_length,
830 /* Built-in ip4 unicast rx feature path definition */
831 VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
833 .arc_name = "ip4-unicast",
834 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
835 .arc_index_ptr = &ip4_main.lookup_main.ucast_feature_arc_index,
838 VNET_FEATURE_INIT (ip4_flow_classify, static) = {
839 .arc_name = "ip4-unicast",
840 .node_name = "ip4-flow-classify",
841 .runs_before = VNET_FEATURES ("ip4-inacl"),
844 VNET_FEATURE_INIT (ip4_inacl, static) = {
845 .arc_name = "ip4-unicast",
846 .node_name = "ip4-inacl",
847 .runs_before = VNET_FEATURES ("ip4-source-check-via-rx"),
850 VNET_FEATURE_INIT (ip4_source_check_1, static) = {
851 .arc_name = "ip4-unicast",
852 .node_name = "ip4-source-check-via-rx",
853 .runs_before = VNET_FEATURES ("ip4-source-check-via-any"),
856 VNET_FEATURE_INIT (ip4_source_check_2, static) = {
857 .arc_name = "ip4-unicast",
858 .node_name = "ip4-source-check-via-any",
859 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
862 VNET_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
863 .arc_name = "ip4-unicast",
864 .node_name = "ip4-source-and-port-range-check-rx",
865 .runs_before = VNET_FEATURES ("ip4-policer-classify"),
868 VNET_FEATURE_INIT (ip4_policer_classify, static) = {
869 .arc_name = "ip4-unicast",
870 .node_name = "ip4-policer-classify",
871 .runs_before = VNET_FEATURES ("ipsec-input-ip4"),
874 VNET_FEATURE_INIT (ip4_ipsec, static) = {
875 .arc_name = "ip4-unicast",
876 .node_name = "ipsec-input-ip4",
877 .runs_before = VNET_FEATURES ("vpath-input-ip4"),
880 VNET_FEATURE_INIT (ip4_vpath, static) = {
881 .arc_name = "ip4-unicast",
882 .node_name = "vpath-input-ip4",
883 .runs_before = VNET_FEATURES ("ip4-lookup"),
886 VNET_FEATURE_INIT (ip4_lookup, static) = {
887 .arc_name = "ip4-unicast",
888 .node_name = "ip4-lookup",
889 .runs_before = VNET_FEATURES ("ip4-drop"),
892 VNET_FEATURE_INIT (ip4_drop, static) = {
893 .arc_name = "ip4-unicast",
894 .node_name = "ip4-drop",
895 .runs_before = 0, /* not before any other features */
899 /* Built-in ip4 multicast rx feature path definition */
900 VNET_FEATURE_ARC_INIT (ip4_multicast, static) =
902 .arc_name = "ip4-multicast",
903 .start_nodes = VNET_FEATURES ("ip4-input", "ip4-input-no-checksum"),
904 .arc_index_ptr = &ip4_main.lookup_main.mcast_feature_arc_index,
907 VNET_FEATURE_INIT (ip4_vpath_mc, static) = {
908 .arc_name = "ip4-multicast",
909 .node_name = "vpath-input-ip4",
910 .runs_before = VNET_FEATURES ("ip4-lookup-multicast"),
913 VNET_FEATURE_INIT (ip4_lookup_mc, static) = {
914 .arc_name = "ip4-multicast",
915 .node_name = "ip4-lookup-multicast",
916 .runs_before = VNET_FEATURES ("ip4-drop"),
919 VNET_FEATURE_INIT (ip4_mc_drop, static) = {
920 .arc_name = "ip4-multicast",
921 .node_name = "ip4-drop",
922 .runs_before = 0, /* last feature */
925 /* Source and port-range check ip4 tx feature path definition */
926 VNET_FEATURE_ARC_INIT (ip4_output, static) =
928 .arc_name = "ip4-output",
929 .start_nodes = VNET_FEATURES ("ip4-rewrite-transit", "ip4-midchain"),
930 .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index,
933 VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
934 .arc_name = "ip4-output",
935 .node_name = "ip4-source-and-port-range-check-tx",
936 .runs_before = VNET_FEATURES ("ipsec-output-ip4"),
939 VNET_FEATURE_INIT (ip4_ipsec_output, static) = {
940 .arc_name = "ip4-output",
941 .node_name = "ipsec-output-ip4",
942 .runs_before = VNET_FEATURES ("interface-output"),
945 /* Built-in ip4 tx feature path definition */
946 VNET_FEATURE_INIT (ip4_interface_output, static) = {
947 .arc_name = "ip4-output",
948 .node_name = "interface-output",
949 .runs_before = 0, /* not before any other features */
953 static clib_error_t *
954 ip4_sw_interface_add_del (vnet_main_t * vnm,
958 ip4_main_t * im = &ip4_main;
960 /* Fill in lookup tables with default table (0). */
961 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
963 vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index,
966 vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index,
969 vnet_feature_enable_disable ("ip4-output", "interface-output", sw_if_index,
972 return /* no error */ 0;
975 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
977 /* Global IP4 main. */
981 ip4_lookup_init (vlib_main_t * vm)
983 ip4_main_t * im = &ip4_main;
984 clib_error_t * error;
987 if ((error = vlib_call_init_function (vm, vnet_feature_init)))
990 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
995 m = pow2_mask (i) << (32 - i);
998 im->fib_masks[i] = clib_host_to_net_u32 (m);
1001 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1003 /* Create FIB with index 0 and table id of 0. */
1004 fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1008 pn = pg_get_node (ip4_lookup_node.index);
1009 pn->unformat_edit = unformat_pg_ip4_header;
1013 ethernet_arp_header_t h;
1015 memset (&h, 0, sizeof (h));
1017 /* Set target ethernet address to all zeros. */
1018 memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1020 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1021 #define _8(f,v) h.f = v;
1022 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1023 _16 (l3_type, ETHERNET_TYPE_IP4);
1024 _8 (n_l2_address_bytes, 6);
1025 _8 (n_l3_address_bytes, 4);
1026 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1030 vlib_packet_template_init (vm,
1031 &im->ip4_arp_request_packet_template,
1034 /* alloc chunk size */ 8,
1041 VLIB_INIT_FUNCTION (ip4_lookup_init);
1044 /* Adjacency taken. */
1049 /* Packet data, possibly *after* rewrite. */
1050 u8 packet_data[64 - 1*sizeof(u32)];
1051 } ip4_forward_next_trace_t;
1053 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1055 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1056 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1057 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1058 uword indent = format_get_indent (s);
1059 s = format (s, "%U%U",
1060 format_white_space, indent,
1061 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1065 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1067 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1068 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1069 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1070 uword indent = format_get_indent (s);
1072 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1073 t->fib_index, t->dpo_index, t->flow_hash);
1074 s = format (s, "\n%U%U",
1075 format_white_space, indent,
1076 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1080 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1082 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1083 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1084 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1085 vnet_main_t * vnm = vnet_get_main();
1086 uword indent = format_get_indent (s);
1088 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1089 t->fib_index, t->dpo_index, format_ip_adjacency,
1090 t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
1092 s = format (s, "\n%U%U",
1093 format_white_space, indent,
1094 format_ip_adjacency_packet_data,
1096 t->packet_data, sizeof (t->packet_data));
1100 /* Common trace function for all ip4-forward next nodes. */
1102 ip4_forward_next_trace (vlib_main_t * vm,
1103 vlib_node_runtime_t * node,
1104 vlib_frame_t * frame,
1105 vlib_rx_or_tx_t which_adj_index)
1108 ip4_main_t * im = &ip4_main;
1110 n_left = frame->n_vectors;
1111 from = vlib_frame_vector_args (frame);
1116 vlib_buffer_t * b0, * b1;
1117 ip4_forward_next_trace_t * t0, * t1;
1119 /* Prefetch next iteration. */
1120 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1121 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1126 b0 = vlib_get_buffer (vm, bi0);
1127 b1 = vlib_get_buffer (vm, bi1);
1129 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1131 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1132 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1133 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1134 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1135 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1136 vec_elt (im->fib_index_by_sw_if_index,
1137 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1139 clib_memcpy (t0->packet_data,
1140 vlib_buffer_get_current (b0),
1141 sizeof (t0->packet_data));
1143 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1145 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1146 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1147 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1148 t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1149 vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1150 vec_elt (im->fib_index_by_sw_if_index,
1151 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1152 clib_memcpy (t1->packet_data,
1153 vlib_buffer_get_current (b1),
1154 sizeof (t1->packet_data));
1164 ip4_forward_next_trace_t * t0;
1168 b0 = vlib_get_buffer (vm, bi0);
1170 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1172 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1173 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1174 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1175 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1176 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1177 vec_elt (im->fib_index_by_sw_if_index,
1178 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1179 clib_memcpy (t0->packet_data,
1180 vlib_buffer_get_current (b0),
1181 sizeof (t0->packet_data));
1189 ip4_drop_or_punt (vlib_main_t * vm,
1190 vlib_node_runtime_t * node,
1191 vlib_frame_t * frame,
1192 ip4_error_t error_code)
1194 u32 * buffers = vlib_frame_vector_args (frame);
1195 uword n_packets = frame->n_vectors;
1197 vlib_error_drop_buffers (vm, node,
1202 ip4_input_node.index,
1205 if (node->flags & VLIB_NODE_FLAG_TRACE)
1206 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1212 ip4_drop (vlib_main_t * vm,
1213 vlib_node_runtime_t * node,
1214 vlib_frame_t * frame)
1215 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1218 ip4_punt (vlib_main_t * vm,
1219 vlib_node_runtime_t * node,
1220 vlib_frame_t * frame)
1221 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1223 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1224 .function = ip4_drop,
1226 .vector_size = sizeof (u32),
1228 .format_trace = format_ip4_forward_next_trace,
1236 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1238 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1239 .function = ip4_punt,
1241 .vector_size = sizeof (u32),
1243 .format_trace = format_ip4_forward_next_trace,
1251 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1253 /* Compute TCP/UDP/ICMP4 checksum in software. */
1255 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1259 u32 ip_header_length, payload_length_host_byte_order;
1260 u32 n_this_buffer, n_bytes_left;
1262 void * data_this_buffer;
1264 /* Initialize checksum with ip header. */
1265 ip_header_length = ip4_header_bytes (ip0);
1266 payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1267 sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1269 if (BITS (uword) == 32)
1271 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1272 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1275 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1277 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1278 data_this_buffer = (void *) ip0 + ip_header_length;
1279 if (n_this_buffer + ip_header_length > p0->current_length)
1280 n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1283 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1284 n_bytes_left -= n_this_buffer;
1285 if (n_bytes_left == 0)
1288 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1289 p0 = vlib_get_buffer (vm, p0->next_buffer);
1290 data_this_buffer = vlib_buffer_get_current (p0);
1291 n_this_buffer = p0->current_length;
1294 sum16 = ~ ip_csum_fold (sum0);
1300 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1302 ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1303 udp_header_t * udp0;
1306 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1307 || ip0->protocol == IP_PROTOCOL_UDP);
1309 udp0 = (void *) (ip0 + 1);
1310 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1312 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1313 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1317 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1319 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1320 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1326 ip4_local (vlib_main_t * vm,
1327 vlib_node_runtime_t * node,
1328 vlib_frame_t * frame)
1330 ip4_main_t * im = &ip4_main;
1331 ip_lookup_main_t * lm = &im->lookup_main;
1332 ip_local_next_t next_index;
1333 u32 * from, * to_next, n_left_from, n_left_to_next;
1334 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1336 from = vlib_frame_vector_args (frame);
1337 n_left_from = frame->n_vectors;
1338 next_index = node->cached_next_index;
1340 if (node->flags & VLIB_NODE_FLAG_TRACE)
1341 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1343 while (n_left_from > 0)
1345 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1347 while (n_left_from >= 4 && n_left_to_next >= 2)
1349 vlib_buffer_t * p0, * p1;
1350 ip4_header_t * ip0, * ip1;
1351 udp_header_t * udp0, * udp1;
1352 ip4_fib_mtrie_t * mtrie0, * mtrie1;
1353 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1354 const dpo_id_t *dpo0, *dpo1;
1355 const load_balance_t *lb0, *lb1;
1356 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1357 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1358 i32 len_diff0, len_diff1;
1359 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1360 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1363 pi0 = to_next[0] = from[0];
1364 pi1 = to_next[1] = from[1];
1368 n_left_to_next -= 2;
1370 p0 = vlib_get_buffer (vm, pi0);
1371 p1 = vlib_get_buffer (vm, pi1);
1373 ip0 = vlib_buffer_get_current (p0);
1374 ip1 = vlib_buffer_get_current (p1);
1376 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1377 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1378 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1379 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1381 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1382 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1384 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1386 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1387 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1389 /* Treat IP frag packets as "experimental" protocol for now
1390 until support of IP frag reassembly is implemented */
1391 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1392 proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1393 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1394 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1395 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1396 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1401 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1402 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1404 udp0 = ip4_next_header (ip0);
1405 udp1 = ip4_next_header (ip1);
1407 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1408 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1409 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1411 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1412 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1414 /* Verify UDP length. */
1415 ip_len0 = clib_net_to_host_u16 (ip0->length);
1416 ip_len1 = clib_net_to_host_u16 (ip1->length);
1417 udp_len0 = clib_net_to_host_u16 (udp0->length);
1418 udp_len1 = clib_net_to_host_u16 (udp1->length);
1420 len_diff0 = ip_len0 - udp_len0;
1421 len_diff1 = ip_len1 - udp_len1;
1423 len_diff0 = is_udp0 ? len_diff0 : 0;
1424 len_diff1 = is_udp1 ? len_diff1 : 0;
1426 if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1427 & good_tcp_udp0 & good_tcp_udp1)))
1432 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1433 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1435 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1436 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1441 && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1442 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1444 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1445 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1449 good_tcp_udp0 &= len_diff0 >= 0;
1450 good_tcp_udp1 &= len_diff1 >= 0;
1452 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1453 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1455 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1457 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1458 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1460 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1461 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1462 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1464 error1 = (is_tcp_udp1 && ! good_tcp_udp1
1465 ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1468 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1469 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1470 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1471 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1473 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1474 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1476 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1477 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1479 lb0 = load_balance_get(lbi0);
1480 lb1 = load_balance_get(lbi1);
1481 dpo0 = load_balance_get_bucket_i(lb0, 0);
1482 dpo1 = load_balance_get_bucket_i(lb1, 0);
1485 * Must have a route to source otherwise we drop the packet.
1486 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1489 * - the source is a recieve => it's from us => bogus, do this
1490 * first since it sets a different error code.
1491 * - uRPF check for any route to source - accept if passes.
1492 * - allow packets destined to the broadcast address from unknown sources
1494 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1495 dpo0->dpoi_type == DPO_RECEIVE) ?
1496 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1498 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1499 !fib_urpf_check_size(lb0->lb_urpf) &&
1500 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1501 ? IP4_ERROR_SRC_LOOKUP_MISS
1503 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1504 dpo1->dpoi_type == DPO_RECEIVE) ?
1505 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1507 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1508 !fib_urpf_check_size(lb1->lb_urpf) &&
1509 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1510 ? IP4_ERROR_SRC_LOOKUP_MISS
1513 next0 = lm->local_next_by_ip_protocol[proto0];
1514 next1 = lm->local_next_by_ip_protocol[proto1];
1516 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1517 next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1519 p0->error = error0 ? error_node->errors[error0] : 0;
1520 p1->error = error1 ? error_node->errors[error1] : 0;
1522 enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1524 if (PREDICT_FALSE (enqueue_code != 0))
1526 switch (enqueue_code)
1532 n_left_to_next += 1;
1533 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1539 n_left_to_next += 1;
1540 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1544 /* A B B or A B C */
1546 n_left_to_next += 2;
1547 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1548 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1551 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1553 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1560 while (n_left_from > 0 && n_left_to_next > 0)
1564 udp_header_t * udp0;
1565 ip4_fib_mtrie_t * mtrie0;
1566 ip4_fib_mtrie_leaf_t leaf0;
1567 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1569 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1570 load_balance_t *lb0;
1571 const dpo_id_t *dpo0;
1573 pi0 = to_next[0] = from[0];
1577 n_left_to_next -= 1;
1579 p0 = vlib_get_buffer (vm, pi0);
1581 ip0 = vlib_buffer_get_current (p0);
1583 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1584 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1586 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1588 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1590 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1592 /* Treat IP frag packets as "experimental" protocol for now
1593 until support of IP frag reassembly is implemented */
1594 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1595 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1596 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1600 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1602 udp0 = ip4_next_header (ip0);
1604 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1605 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1607 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1609 /* Verify UDP length. */
1610 ip_len0 = clib_net_to_host_u16 (ip0->length);
1611 udp_len0 = clib_net_to_host_u16 (udp0->length);
1613 len_diff0 = ip_len0 - udp_len0;
1615 len_diff0 = is_udp0 ? len_diff0 : 0;
1617 if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1622 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1623 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1625 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1626 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1630 good_tcp_udp0 &= len_diff0 >= 0;
1632 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1634 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1636 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1638 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1639 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1640 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1643 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1644 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1646 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1647 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1649 lb0 = load_balance_get(lbi0);
1650 dpo0 = load_balance_get_bucket_i(lb0, 0);
1652 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1653 vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1656 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1657 dpo0->dpoi_type == DPO_RECEIVE) ?
1658 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1660 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1661 !fib_urpf_check_size(lb0->lb_urpf) &&
1662 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1663 ? IP4_ERROR_SRC_LOOKUP_MISS
1666 next0 = lm->local_next_by_ip_protocol[proto0];
1668 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1670 p0->error = error0? error_node->errors[error0] : 0;
1672 if (PREDICT_FALSE (next0 != next_index))
1674 n_left_to_next += 1;
1675 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1678 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1681 n_left_to_next -= 1;
1685 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1688 return frame->n_vectors;
1691 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1692 .function = ip4_local,
1693 .name = "ip4-local",
1694 .vector_size = sizeof (u32),
1696 .format_trace = format_ip4_forward_next_trace,
1698 .n_next_nodes = IP_LOCAL_N_NEXT,
1700 [IP_LOCAL_NEXT_DROP] = "error-drop",
1701 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1702 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1703 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1707 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1709 void ip4_register_protocol (u32 protocol, u32 node_index)
1711 vlib_main_t * vm = vlib_get_main();
1712 ip4_main_t * im = &ip4_main;
1713 ip_lookup_main_t * lm = &im->lookup_main;
1715 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1716 lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1719 static clib_error_t *
1720 show_ip_local_command_fn (vlib_main_t * vm,
1721 unformat_input_t * input,
1722 vlib_cli_command_t * cmd)
1724 ip4_main_t * im = &ip4_main;
1725 ip_lookup_main_t * lm = &im->lookup_main;
1728 vlib_cli_output (vm, "Protocols handled by ip4_local");
1729 for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1731 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1732 vlib_cli_output (vm, "%d", i);
1740 * Display the set of protocols handled by the local IPv4 stack.
1743 * Example of how to display local protocol table:
1744 * @cliexstart{show ip local}
1745 * Protocols handled by ip4_local
1752 VLIB_CLI_COMMAND (show_ip_local, static) = {
1753 .path = "show ip local",
1754 .function = show_ip_local_command_fn,
1755 .short_help = "show ip local",
1760 ip4_arp_inline (vlib_main_t * vm,
1761 vlib_node_runtime_t * node,
1762 vlib_frame_t * frame,
1765 vnet_main_t * vnm = vnet_get_main();
1766 ip4_main_t * im = &ip4_main;
1767 ip_lookup_main_t * lm = &im->lookup_main;
1768 u32 * from, * to_next_drop;
1769 uword n_left_from, n_left_to_next_drop, next_index;
1770 static f64 time_last_seed_change = -1e100;
1771 static u32 hash_seeds[3];
1772 static uword hash_bitmap[256 / BITS (uword)];
1775 if (node->flags & VLIB_NODE_FLAG_TRACE)
1776 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1778 time_now = vlib_time_now (vm);
1779 if (time_now - time_last_seed_change > 1e-3)
1782 u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1783 sizeof (hash_seeds));
1784 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1785 hash_seeds[i] = r[i];
1787 /* Mark all hash keys as been no-seen before. */
1788 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1791 time_last_seed_change = time_now;
1794 from = vlib_frame_vector_args (frame);
1795 n_left_from = frame->n_vectors;
1796 next_index = node->cached_next_index;
1797 if (next_index == IP4_ARP_NEXT_DROP)
1798 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1800 while (n_left_from > 0)
1802 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1803 to_next_drop, n_left_to_next_drop);
1805 while (n_left_from > 0 && n_left_to_next_drop > 0)
1807 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1808 ip_adjacency_t * adj0;
1815 p0 = vlib_get_buffer (vm, pi0);
1817 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1818 adj0 = ip_get_adjacency (lm, adj_index0);
1819 ip0 = vlib_buffer_get_current (p0);
1825 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1826 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1831 * this is the Glean case, so we are ARPing for the
1832 * packet's destination
1834 a0 ^= ip0->dst_address.data_u32;
1838 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1842 hash_v3_finalize32 (a0, b0, c0);
1844 c0 &= BITS (hash_bitmap) - 1;
1845 c0 = c0 / BITS (uword);
1846 m0 = (uword) 1 << (c0 % BITS (uword));
1848 bm0 = hash_bitmap[c0];
1849 drop0 = (bm0 & m0) != 0;
1851 /* Mark it as seen. */
1852 hash_bitmap[c0] = bm0 | m0;
1856 to_next_drop[0] = pi0;
1858 n_left_to_next_drop -= 1;
1860 p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1863 * the adj has been updated to a rewrite but the node the DPO that got
1864 * us here hasn't - yet. no big deal. we'll drop while we wait.
1866 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1873 * Can happen if the control-plane is programming tables
1874 * with traffic flowing; at least that's today's lame excuse.
1876 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1877 (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1879 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1882 /* Send ARP request. */
1886 ethernet_arp_header_t * h0;
1887 vnet_hw_interface_t * hw_if0;
1889 h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1891 /* Add rewrite/encap string for ARP packet. */
1892 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1894 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1896 /* Src ethernet address in ARP header. */
1897 clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1898 sizeof (h0->ip4_over_ethernet[0].ethernet));
1902 /* The interface's source address is stashed in the Glean Adj */
1903 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1905 /* Copy in destination address we are requesting. This is the
1906 * glean case, so it's the packet's destination.*/
1907 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1911 /* Src IP address in ARP header. */
1912 if (ip4_src_address_for_packet(lm, sw_if_index0,
1913 &h0->ip4_over_ethernet[0].ip4))
1915 /* No source address available */
1916 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1917 vlib_buffer_free(vm, &bi0, 1);
1921 /* Copy in destination address we are requesting from the
1923 h0->ip4_over_ethernet[1].ip4.data_u32 =
1924 adj0->sub_type.nbr.next_hop.ip4.as_u32;
1927 vlib_buffer_copy_trace_flag (vm, p0, bi0);
1928 b0 = vlib_get_buffer (vm, bi0);
1929 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1931 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1933 vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1937 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1940 return frame->n_vectors;
1944 ip4_arp (vlib_main_t * vm,
1945 vlib_node_runtime_t * node,
1946 vlib_frame_t * frame)
1948 return (ip4_arp_inline(vm, node, frame, 0));
1952 ip4_glean (vlib_main_t * vm,
1953 vlib_node_runtime_t * node,
1954 vlib_frame_t * frame)
1956 return (ip4_arp_inline(vm, node, frame, 1));
1959 static char * ip4_arp_error_strings[] = {
1960 [IP4_ARP_ERROR_DROP] = "address overflow drops",
1961 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1962 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1963 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1964 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1965 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1968 VLIB_REGISTER_NODE (ip4_arp_node) = {
1969 .function = ip4_arp,
1971 .vector_size = sizeof (u32),
1973 .format_trace = format_ip4_forward_next_trace,
1975 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1976 .error_strings = ip4_arp_error_strings,
1978 .n_next_nodes = IP4_ARP_N_NEXT,
1980 [IP4_ARP_NEXT_DROP] = "error-drop",
1984 VLIB_REGISTER_NODE (ip4_glean_node) = {
1985 .function = ip4_glean,
1986 .name = "ip4-glean",
1987 .vector_size = sizeof (u32),
1989 .format_trace = format_ip4_forward_next_trace,
1991 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1992 .error_strings = ip4_arp_error_strings,
1994 .n_next_nodes = IP4_ARP_N_NEXT,
1996 [IP4_ARP_NEXT_DROP] = "error-drop",
2000 #define foreach_notrace_ip4_arp_error \
2006 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2008 vlib_node_runtime_t *rt =
2009 vlib_node_get_runtime (vm, ip4_arp_node.index);
2011 /* don't trace ARP request packets */
2013 vnet_pcap_drop_trace_filter_add_del \
2014 (rt->errors[IP4_ARP_ERROR_##a], \
2016 foreach_notrace_ip4_arp_error;
2021 VLIB_INIT_FUNCTION(arp_notrace_init);
2024 /* Send an ARP request to see if given destination is reachable on given interface. */
2026 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2028 vnet_main_t * vnm = vnet_get_main();
2029 ip4_main_t * im = &ip4_main;
2030 ethernet_arp_header_t * h;
2031 ip4_address_t * src;
2032 ip_interface_address_t * ia;
2033 ip_adjacency_t * adj;
2034 vnet_hw_interface_t * hi;
2035 vnet_sw_interface_t * si;
2039 si = vnet_get_sw_interface (vnm, sw_if_index);
2041 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2043 return clib_error_return (0, "%U: interface %U down",
2044 format_ip4_address, dst,
2045 format_vnet_sw_if_index_name, vnm,
2049 src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2052 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2053 return clib_error_return
2054 (0, "no matching interface address for destination %U (interface %U)",
2055 format_ip4_address, dst,
2056 format_vnet_sw_if_index_name, vnm, sw_if_index);
2059 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2061 h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2063 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2065 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2067 h->ip4_over_ethernet[0].ip4 = src[0];
2068 h->ip4_over_ethernet[1].ip4 = dst[0];
2070 b = vlib_get_buffer (vm, bi);
2071 vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2073 /* Add encapsulation string for software interface (e.g. ethernet header). */
2074 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2075 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2078 vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2079 u32 * to_next = vlib_frame_vector_args (f);
2082 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2085 return /* no error */ 0;
2089 IP4_REWRITE_NEXT_DROP,
2090 IP4_REWRITE_NEXT_ARP,
2091 IP4_REWRITE_NEXT_ICMP_ERROR,
2092 } ip4_rewrite_next_t;
2095 ip4_rewrite_inline (vlib_main_t * vm,
2096 vlib_node_runtime_t * node,
2097 vlib_frame_t * frame,
2098 int rewrite_for_locally_received_packets,
2101 ip_lookup_main_t * lm = &ip4_main.lookup_main;
2102 u32 * from = vlib_frame_vector_args (frame);
2103 u32 n_left_from, n_left_to_next, * to_next, next_index;
2104 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2105 vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2107 n_left_from = frame->n_vectors;
2108 next_index = node->cached_next_index;
2109 u32 cpu_index = os_get_cpu_number();
2111 while (n_left_from > 0)
2113 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2115 while (n_left_from >= 4 && n_left_to_next >= 2)
2117 ip_adjacency_t * adj0, * adj1;
2118 vlib_buffer_t * p0, * p1;
2119 ip4_header_t * ip0, * ip1;
2120 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2121 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2122 u32 next0_override, next1_override;
2123 u32 tx_sw_if_index0, tx_sw_if_index1;
2125 if (rewrite_for_locally_received_packets)
2126 next0_override = next1_override = 0;
2128 /* Prefetch next iteration. */
2130 vlib_buffer_t * p2, * p3;
2132 p2 = vlib_get_buffer (vm, from[2]);
2133 p3 = vlib_get_buffer (vm, from[3]);
2135 vlib_prefetch_buffer_header (p2, STORE);
2136 vlib_prefetch_buffer_header (p3, STORE);
2138 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2139 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2142 pi0 = to_next[0] = from[0];
2143 pi1 = to_next[1] = from[1];
2148 n_left_to_next -= 2;
2150 p0 = vlib_get_buffer (vm, pi0);
2151 p1 = vlib_get_buffer (vm, pi1);
2153 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2154 adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2156 /* We should never rewrite a pkt using the MISS adjacency */
2157 ASSERT(adj_index0 && adj_index1);
2159 ip0 = vlib_buffer_get_current (p0);
2160 ip1 = vlib_buffer_get_current (p1);
2162 error0 = error1 = IP4_ERROR_NONE;
2163 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2165 /* Decrement TTL & update checksum.
2166 Works either endian, so no need for byte swap. */
2167 if (! rewrite_for_locally_received_packets)
2169 i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2171 /* Input node should have reject packets with ttl 0. */
2172 ASSERT (ip0->ttl > 0);
2173 ASSERT (ip1->ttl > 0);
2175 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2176 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2178 checksum0 += checksum0 >= 0xffff;
2179 checksum1 += checksum1 >= 0xffff;
2181 ip0->checksum = checksum0;
2182 ip1->checksum = checksum1;
2191 * If the ttl drops below 1 when forwarding, generate
2194 if (PREDICT_FALSE(ttl0 <= 0))
2196 error0 = IP4_ERROR_TIME_EXPIRED;
2197 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2198 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2199 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2200 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2202 if (PREDICT_FALSE(ttl1 <= 0))
2204 error1 = IP4_ERROR_TIME_EXPIRED;
2205 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2206 icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2207 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2208 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2211 /* Verify checksum. */
2212 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2213 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2216 /* Rewrite packet header and updates lengths. */
2217 adj0 = ip_get_adjacency (lm, adj_index0);
2218 adj1 = ip_get_adjacency (lm, adj_index1);
2220 if (rewrite_for_locally_received_packets)
2222 if (PREDICT_FALSE(adj0->lookup_next_index
2223 == IP_LOOKUP_NEXT_ARP))
2224 next0_override = IP4_REWRITE_NEXT_ARP;
2225 if (PREDICT_FALSE(adj1->lookup_next_index
2226 == IP_LOOKUP_NEXT_ARP))
2227 next1_override = IP4_REWRITE_NEXT_ARP;
2230 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2231 rw_len0 = adj0[0].rewrite_header.data_bytes;
2232 rw_len1 = adj1[0].rewrite_header.data_bytes;
2233 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2234 vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2236 /* Check MTU of outgoing interface. */
2237 error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2238 ? IP4_ERROR_MTU_EXCEEDED
2240 error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2241 ? IP4_ERROR_MTU_EXCEEDED
2244 next0 = (error0 == IP4_ERROR_NONE)
2245 ? adj0[0].rewrite_header.next_index : next0;
2247 if (rewrite_for_locally_received_packets)
2248 next0 = next0 && next0_override ? next0_override : next0;
2250 next1 = (error1 == IP4_ERROR_NONE)
2251 ? adj1[0].rewrite_header.next_index : next1;
2253 if (rewrite_for_locally_received_packets)
2254 next1 = next1 && next1_override ? next1_override : next1;
2257 * We've already accounted for an ethernet_header_t elsewhere
2259 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2260 vlib_increment_combined_counter
2261 (&adjacency_counters,
2262 cpu_index, adj_index0,
2263 /* packet increment */ 0,
2264 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2266 if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2267 vlib_increment_combined_counter
2268 (&adjacency_counters,
2269 cpu_index, adj_index1,
2270 /* packet increment */ 0,
2271 /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2273 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2274 * to see the IP headerr */
2275 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2277 p0->current_data -= rw_len0;
2278 p0->current_length += rw_len0;
2279 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2280 vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2283 vnet_feature_arc_start(lm->output_feature_arc_index,
2284 tx_sw_if_index0, &next0, p0);
2286 if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2288 p1->current_data -= rw_len1;
2289 p1->current_length += rw_len1;
2291 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2292 vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2295 vnet_feature_arc_start(lm->output_feature_arc_index,
2296 tx_sw_if_index1, &next1, p1);
2299 /* Guess we are only writing on simple Ethernet header. */
2300 vnet_rewrite_two_headers (adj0[0], adj1[0],
2302 sizeof (ethernet_header_t));
2306 adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2307 adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2310 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2311 to_next, n_left_to_next,
2312 pi0, pi1, next0, next1);
2315 while (n_left_from > 0 && n_left_to_next > 0)
2317 ip_adjacency_t * adj0;
2320 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2322 u32 tx_sw_if_index0;
2324 if (rewrite_for_locally_received_packets)
2327 pi0 = to_next[0] = from[0];
2329 p0 = vlib_get_buffer (vm, pi0);
2331 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2333 /* We should never rewrite a pkt using the MISS adjacency */
2336 adj0 = ip_get_adjacency (lm, adj_index0);
2338 ip0 = vlib_buffer_get_current (p0);
2340 error0 = IP4_ERROR_NONE;
2341 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2343 /* Decrement TTL & update checksum. */
2344 if (! rewrite_for_locally_received_packets)
2346 i32 ttl0 = ip0->ttl;
2348 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2350 checksum0 += checksum0 >= 0xffff;
2352 ip0->checksum = checksum0;
2354 ASSERT (ip0->ttl > 0);
2360 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2362 if (PREDICT_FALSE(ttl0 <= 0))
2365 * If the ttl drops below 1 when forwarding, generate
2368 error0 = IP4_ERROR_TIME_EXPIRED;
2369 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2370 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2371 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2372 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2376 if (rewrite_for_locally_received_packets)
2379 * We have to override the next_index in ARP adjacencies,
2380 * because they're set up for ip4-arp, not this node...
2382 if (PREDICT_FALSE(adj0->lookup_next_index
2383 == IP_LOOKUP_NEXT_ARP))
2384 next0_override = IP4_REWRITE_NEXT_ARP;
2387 /* Guess we are only writing on simple Ethernet header. */
2388 vnet_rewrite_one_header (adj0[0], ip0,
2389 sizeof (ethernet_header_t));
2391 /* Update packet buffer attributes/set output interface. */
2392 rw_len0 = adj0[0].rewrite_header.data_bytes;
2393 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2395 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2396 vlib_increment_combined_counter
2397 (&adjacency_counters,
2398 cpu_index, adj_index0,
2399 /* packet increment */ 0,
2400 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2402 /* Check MTU of outgoing interface. */
2403 error0 = (vlib_buffer_length_in_chain (vm, p0)
2404 > adj0[0].rewrite_header.max_l3_packet_bytes
2405 ? IP4_ERROR_MTU_EXCEEDED
2408 p0->error = error_node->errors[error0];
2410 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2411 * to see the IP headerr */
2412 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2414 p0->current_data -= rw_len0;
2415 p0->current_length += rw_len0;
2416 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2418 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2419 next0 = adj0[0].rewrite_header.next_index;
2423 adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2426 vnet_feature_arc_start(lm->output_feature_arc_index,
2427 tx_sw_if_index0, &next0, p0);
2431 if (rewrite_for_locally_received_packets)
2432 next0 = next0 && next0_override ? next0_override : next0;
2437 n_left_to_next -= 1;
2439 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2440 to_next, n_left_to_next,
2444 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2447 /* Need to do trace after rewrites to pick up new packet data. */
2448 if (node->flags & VLIB_NODE_FLAG_TRACE)
2449 ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2451 return frame->n_vectors;
2455 /** @brief IPv4 transit rewrite node.
2456 @node ip4-rewrite-transit
2458 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2459 header checksum, fetch the ip adjacency, check the outbound mtu,
2460 apply the adjacency rewrite, and send pkts to the adjacency
2461 rewrite header's rewrite_next_index.
2463 @param vm vlib_main_t corresponding to the current thread
2464 @param node vlib_node_runtime_t
2465 @param frame vlib_frame_t whose contents should be dispatched
2467 @par Graph mechanics: buffer metadata, next index usage
2470 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2471 - the rewrite adjacency index
2472 - <code>adj->lookup_next_index</code>
2473 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2474 the packet will be dropped.
2475 - <code>adj->rewrite_header</code>
2476 - Rewrite string length, rewrite string, next_index
2479 - <code>b->current_data, b->current_length</code>
2480 - Updated net of applying the rewrite string
2482 <em>Next Indices:</em>
2483 - <code> adj->rewrite_header.next_index </code>
2487 ip4_rewrite_transit (vlib_main_t * vm,
2488 vlib_node_runtime_t * node,
2489 vlib_frame_t * frame)
2491 return ip4_rewrite_inline (vm, node, frame,
2492 /* rewrite_for_locally_received_packets */ 0, 0);
2495 /** @brief IPv4 local rewrite node.
2496 @node ip4-rewrite-local
2498 This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2499 the outbound interface mtu, apply the adjacency rewrite, and send
2500 pkts to the adjacency rewrite header's rewrite_next_index. Deal
2501 with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2502 dst = interface addr."
2504 @param vm vlib_main_t corresponding to the current thread
2505 @param node vlib_node_runtime_t
2506 @param frame vlib_frame_t whose contents should be dispatched
2508 @par Graph mechanics: buffer metadata, next index usage
2511 - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2512 - the rewrite adjacency index
2513 - <code>adj->lookup_next_index</code>
2514 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2515 the packet will be dropped.
2516 - <code>adj->rewrite_header</code>
2517 - Rewrite string length, rewrite string, next_index
2520 - <code>b->current_data, b->current_length</code>
2521 - Updated net of applying the rewrite string
2523 <em>Next Indices:</em>
2524 - <code> adj->rewrite_header.next_index </code>
2529 ip4_rewrite_local (vlib_main_t * vm,
2530 vlib_node_runtime_t * node,
2531 vlib_frame_t * frame)
2533 return ip4_rewrite_inline (vm, node, frame,
2534 /* rewrite_for_locally_received_packets */ 1, 0);
2538 ip4_midchain (vlib_main_t * vm,
2539 vlib_node_runtime_t * node,
2540 vlib_frame_t * frame)
2542 return ip4_rewrite_inline (vm, node, frame,
2543 /* rewrite_for_locally_received_packets */ 0, 1);
2546 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2547 .function = ip4_rewrite_transit,
2548 .name = "ip4-rewrite-transit",
2549 .vector_size = sizeof (u32),
2551 .format_trace = format_ip4_rewrite_trace,
2555 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2556 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2557 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2561 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2563 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2564 .function = ip4_midchain,
2565 .name = "ip4-midchain",
2566 .vector_size = sizeof (u32),
2568 .format_trace = format_ip4_forward_next_trace,
2570 .sibling_of = "ip4-rewrite-transit",
2573 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2575 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2576 .function = ip4_rewrite_local,
2577 .name = "ip4-rewrite-local",
2578 .vector_size = sizeof (u32),
2580 .sibling_of = "ip4-rewrite-transit",
2582 .format_trace = format_ip4_rewrite_trace,
2587 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2589 static clib_error_t *
2590 add_del_interface_table (vlib_main_t * vm,
2591 unformat_input_t * input,
2592 vlib_cli_command_t * cmd)
2594 vnet_main_t * vnm = vnet_get_main();
2595 clib_error_t * error = 0;
2596 u32 sw_if_index, table_id;
2600 if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2602 error = clib_error_return (0, "unknown interface `%U'",
2603 format_unformat_error, input);
2607 if (unformat (input, "%d", &table_id))
2611 error = clib_error_return (0, "expected table id `%U'",
2612 format_unformat_error, input);
2617 ip4_main_t * im = &ip4_main;
2620 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2625 // changing an interface's table has consequences for any connecteds
2626 // and adj-fibs already installed.
2628 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2629 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2637 * Place the indicated interface into the supplied IPv4 FIB table (also known
2638 * as a VRF). If the FIB table does not exist, this command creates it. To
2639 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2640 * FIB table will only be displayed if a route has been added to the table, or
2641 * an IP Address is assigned to an interface in the table (which adds a route
2644 * @note IP addresses added after setting the interface IP table end up in
2645 * the indicated FIB table. If the IP address is added prior to adding the
2646 * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2647 * but potentially counter-intuitive results occur if you provision interface
2648 * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2649 * IP table ID provisioned. It might be marginally useful to evade source RPF
2650 * drops to put an interface address into multiple FIBs.
2653 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2654 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2657 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2658 .path = "set interface ip table",
2659 .function = add_del_interface_table,
2660 .short_help = "set interface ip table <interface> <table-id>",
2666 ip4_lookup_multicast (vlib_main_t * vm,
2667 vlib_node_runtime_t * node,
2668 vlib_frame_t * frame)
2670 ip4_main_t * im = &ip4_main;
2671 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2672 u32 n_left_from, n_left_to_next, * from, * to_next;
2673 ip_lookup_next_t next;
2674 u32 cpu_index = os_get_cpu_number();
2676 from = vlib_frame_vector_args (frame);
2677 n_left_from = frame->n_vectors;
2678 next = node->cached_next_index;
2680 while (n_left_from > 0)
2682 vlib_get_next_frame (vm, node, next,
2683 to_next, n_left_to_next);
2685 while (n_left_from >= 4 && n_left_to_next >= 2)
2687 vlib_buffer_t * p0, * p1;
2688 u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2689 ip_lookup_next_t next0, next1;
2690 ip4_header_t * ip0, * ip1;
2691 u32 fib_index0, fib_index1;
2692 const dpo_id_t *dpo0, *dpo1;
2693 const load_balance_t * lb0, * lb1;
2695 /* Prefetch next iteration. */
2697 vlib_buffer_t * p2, * p3;
2699 p2 = vlib_get_buffer (vm, from[2]);
2700 p3 = vlib_get_buffer (vm, from[3]);
2702 vlib_prefetch_buffer_header (p2, LOAD);
2703 vlib_prefetch_buffer_header (p3, LOAD);
2705 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2706 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2709 pi0 = to_next[0] = from[0];
2710 pi1 = to_next[1] = from[1];
2712 p0 = vlib_get_buffer (vm, pi0);
2713 p1 = vlib_get_buffer (vm, pi1);
2715 ip0 = vlib_buffer_get_current (p0);
2716 ip1 = vlib_buffer_get_current (p1);
2718 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2719 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2720 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2721 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2722 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2723 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2725 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2727 lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2730 lb0 = load_balance_get (lb_index0);
2731 lb1 = load_balance_get (lb_index1);
2733 ASSERT (lb0->lb_n_buckets > 0);
2734 ASSERT (is_pow2 (lb0->lb_n_buckets));
2735 ASSERT (lb1->lb_n_buckets > 0);
2736 ASSERT (is_pow2 (lb1->lb_n_buckets));
2738 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2739 (ip0, lb0->lb_hash_config);
2741 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2742 (ip1, lb1->lb_hash_config);
2744 dpo0 = load_balance_get_bucket_i(lb0,
2745 (vnet_buffer (p0)->ip.flow_hash &
2746 (lb0->lb_n_buckets_minus_1)));
2747 dpo1 = load_balance_get_bucket_i(lb1,
2748 (vnet_buffer (p1)->ip.flow_hash &
2749 (lb1->lb_n_buckets_minus_1)));
2751 next0 = dpo0->dpoi_next_node;
2752 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2753 next1 = dpo1->dpoi_next_node;
2754 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2756 if (1) /* $$$$$$ HACK FIXME */
2757 vlib_increment_combined_counter
2758 (cm, cpu_index, lb_index0, 1,
2759 vlib_buffer_length_in_chain (vm, p0));
2760 if (1) /* $$$$$$ HACK FIXME */
2761 vlib_increment_combined_counter
2762 (cm, cpu_index, lb_index1, 1,
2763 vlib_buffer_length_in_chain (vm, p1));
2767 n_left_to_next -= 2;
2770 wrong_next = (next0 != next) + 2*(next1 != next);
2771 if (PREDICT_FALSE (wrong_next != 0))
2779 n_left_to_next += 1;
2780 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2786 n_left_to_next += 1;
2787 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2793 n_left_to_next += 2;
2794 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2795 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2799 vlib_put_next_frame (vm, node, next, n_left_to_next);
2801 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2807 while (n_left_from > 0 && n_left_to_next > 0)
2812 ip_lookup_next_t next0;
2814 const dpo_id_t *dpo0;
2815 const load_balance_t * lb0;
2820 p0 = vlib_get_buffer (vm, pi0);
2822 ip0 = vlib_buffer_get_current (p0);
2824 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2825 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2826 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2827 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2829 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2832 lb0 = load_balance_get (lb_index0);
2834 ASSERT (lb0->lb_n_buckets > 0);
2835 ASSERT (is_pow2 (lb0->lb_n_buckets));
2837 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2838 (ip0, lb0->lb_hash_config);
2840 dpo0 = load_balance_get_bucket_i(lb0,
2841 (vnet_buffer (p0)->ip.flow_hash &
2842 (lb0->lb_n_buckets_minus_1)));
2844 next0 = dpo0->dpoi_next_node;
2845 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2847 if (1) /* $$$$$$ HACK FIXME */
2848 vlib_increment_combined_counter
2849 (cm, cpu_index, lb_index0, 1,
2850 vlib_buffer_length_in_chain (vm, p0));
2854 n_left_to_next -= 1;
2857 if (PREDICT_FALSE (next0 != next))
2859 n_left_to_next += 1;
2860 vlib_put_next_frame (vm, node, next, n_left_to_next);
2862 vlib_get_next_frame (vm, node, next,
2863 to_next, n_left_to_next);
2866 n_left_to_next -= 1;
2870 vlib_put_next_frame (vm, node, next, n_left_to_next);
2873 if (node->flags & VLIB_NODE_FLAG_TRACE)
2874 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2876 return frame->n_vectors;
2879 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2880 .function = ip4_lookup_multicast,
2881 .name = "ip4-lookup-multicast",
2882 .vector_size = sizeof (u32),
2883 .sibling_of = "ip4-lookup",
2884 .format_trace = format_ip4_lookup_trace,
2889 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2891 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2892 .function = ip4_drop,
2893 .name = "ip4-multicast",
2894 .vector_size = sizeof (u32),
2896 .format_trace = format_ip4_forward_next_trace,
2904 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2906 ip4_fib_mtrie_t * mtrie0;
2907 ip4_fib_mtrie_leaf_t leaf0;
2910 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2912 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2913 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2914 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2915 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2916 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2918 /* Handle default route. */
2919 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2921 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2923 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2926 static clib_error_t *
2927 test_lookup_command_fn (vlib_main_t * vm,
2928 unformat_input_t * input,
2929 vlib_cli_command_t * cmd)
2936 ip4_address_t ip4_base_address;
2939 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2940 if (unformat (input, "table %d", &table_id))
2942 /* Make sure the entry exists. */
2943 fib = ip4_fib_get(table_id);
2944 if ((fib) && (fib->index != table_id))
2945 return clib_error_return (0, "<fib-index> %d does not exist",
2948 else if (unformat (input, "count %f", &count))
2951 else if (unformat (input, "%U",
2952 unformat_ip4_address, &ip4_base_address))
2955 return clib_error_return (0, "unknown input `%U'",
2956 format_unformat_error, input);
2961 for (i = 0; i < n; i++)
2963 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2966 ip4_base_address.as_u32 =
2967 clib_host_to_net_u32 (1 +
2968 clib_net_to_host_u32 (ip4_base_address.as_u32));
2972 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2974 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2980 * Perform a lookup of an IPv4 Address (or range of addresses) in the
2981 * given FIB table to determine if there is a conflict with the
2982 * adjacency table. The fib-id can be determined by using the
2983 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
2986 * @todo This command uses fib-id, other commands use table-id (not
2987 * just a name, they are different indexes). Would like to change this
2988 * to table-id for consistency.
2991 * Example of how to run the test lookup command:
2992 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
2993 * No errors in 2 lookups
2997 VLIB_CLI_COMMAND (lookup_test_command, static) = {
2998 .path = "test lookup",
2999 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3000 .function = test_lookup_command_fn,
3004 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3006 ip4_main_t * im4 = &ip4_main;
3008 uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3011 return VNET_API_ERROR_NO_SUCH_FIB;
3013 fib = ip4_fib_get (p[0]);
3015 fib->flow_hash_config = flow_hash_config;
3019 static clib_error_t *
3020 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3021 unformat_input_t * input,
3022 vlib_cli_command_t * cmd)
3026 u32 flow_hash_config = 0;
3029 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3030 if (unformat (input, "table %d", &table_id))
3033 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3034 foreach_flow_hash_bit
3040 return clib_error_return (0, "unknown input `%U'",
3041 format_unformat_error, input);
3043 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3049 case VNET_API_ERROR_NO_SUCH_FIB:
3050 return clib_error_return (0, "no such FIB table %d", table_id);
3053 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3061 * Configure the set of IPv4 fields used by the flow hash.
3064 * Example of how to set the flow hash on a given table:
3065 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3066 * Example of display the configured flow hash:
3067 * @cliexstart{show ip fib}
3068 * ipv4-VRF:0, fib_index 0, flow hash: src dst sport dport proto
3071 * [@0]: dpo-load-balance: [index:0 buckets:1 uRPF:0 to:[0:0]]
3072 * [0] [@0]: dpo-drop ip6
3075 * [@0]: dpo-load-balance: [index:1 buckets:1 uRPF:1 to:[0:0]]
3076 * [0] [@0]: dpo-drop ip6
3079 * [@0]: dpo-load-balance: [index:3 buckets:1 uRPF:3 to:[0:0]]
3080 * [0] [@0]: dpo-drop ip6
3083 * [@0]: dpo-load-balance: [index:30 buckets:1 uRPF:29 to:[0:0]]
3084 * [0] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3087 * [@0]: dpo-load-balance: [index:31 buckets:4 uRPF:30 to:[0:0]]
3088 * [0] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3089 * [1] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3090 * [2] [@3]: arp-ipv4: via 6.0.0.2 af_packet0
3091 * [3] [@3]: arp-ipv4: via 6.0.0.1 af_packet0
3094 * [@0]: dpo-load-balance: [index:2 buckets:1 uRPF:2 to:[0:0]]
3095 * [0] [@0]: dpo-drop ip6
3096 * 255.255.255.255/32
3098 * [@0]: dpo-load-balance: [index:4 buckets:1 uRPF:4 to:[0:0]]
3099 * [0] [@0]: dpo-drop ip6
3100 * ipv4-VRF:7, fib_index 1, flow hash: dst sport dport proto
3103 * [@0]: dpo-load-balance: [index:12 buckets:1 uRPF:11 to:[0:0]]
3104 * [0] [@0]: dpo-drop ip6
3107 * [@0]: dpo-load-balance: [index:13 buckets:1 uRPF:12 to:[0:0]]
3108 * [0] [@0]: dpo-drop ip6
3111 * [@0]: dpo-load-balance: [index:17 buckets:1 uRPF:16 to:[0:0]]
3112 * [0] [@4]: ipv4-glean: af_packet0
3115 * [@0]: dpo-load-balance: [index:18 buckets:1 uRPF:17 to:[1:84]]
3116 * [0] [@2]: dpo-receive: 172.16.1.1 on af_packet0
3119 * [@0]: dpo-load-balance: [index:21 buckets:1 uRPF:20 to:[0:0]]
3120 * [0] [@5]: ipv4 via 172.16.1.2 af_packet0: IP4: 02:fe:9e:70:7a:2b -> 26:a5:f6:9c:3a:36
3123 * [@0]: dpo-load-balance: [index:19 buckets:1 uRPF:18 to:[0:0]]
3124 * [0] [@4]: ipv4-glean: af_packet1
3127 * [@0]: dpo-load-balance: [index:20 buckets:1 uRPF:19 to:[0:0]]
3128 * [0] [@2]: dpo-receive: 172.16.2.1 on af_packet1
3131 * [@0]: dpo-load-balance: [index:15 buckets:1 uRPF:14 to:[0:0]]
3132 * [0] [@0]: dpo-drop ip6
3135 * [@0]: dpo-load-balance: [index:14 buckets:1 uRPF:13 to:[0:0]]
3136 * [0] [@0]: dpo-drop ip6
3137 * 255.255.255.255/32
3139 * [@0]: dpo-load-balance: [index:16 buckets:1 uRPF:15 to:[0:0]]
3140 * [0] [@0]: dpo-drop ip6
3144 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3145 .path = "set ip flow-hash",
3147 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3148 .function = set_ip_flow_hash_command_fn,
3152 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3155 vnet_main_t * vnm = vnet_get_main();
3156 vnet_interface_main_t * im = &vnm->interface_main;
3157 ip4_main_t * ipm = &ip4_main;
3158 ip_lookup_main_t * lm = &ipm->lookup_main;
3159 vnet_classify_main_t * cm = &vnet_classify_main;
3160 ip4_address_t *if_addr;
3162 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3163 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3165 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3166 return VNET_API_ERROR_NO_SUCH_ENTRY;
3168 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3169 lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3171 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3173 if (NULL != if_addr)
3175 fib_prefix_t pfx = {
3177 .fp_proto = FIB_PROTOCOL_IP4,
3178 .fp_addr.ip4 = *if_addr,
3182 fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3186 if (table_index != (u32) ~0)
3188 dpo_id_t dpo = DPO_INVALID;
3193 classify_dpo_create(DPO_PROTO_IP4, table_index));
3195 fib_table_entry_special_dpo_add(fib_index,
3197 FIB_SOURCE_CLASSIFY,
3198 FIB_ENTRY_FLAG_NONE,
3204 fib_table_entry_special_remove(fib_index,
3206 FIB_SOURCE_CLASSIFY);
3213 static clib_error_t *
3214 set_ip_classify_command_fn (vlib_main_t * vm,
3215 unformat_input_t * input,
3216 vlib_cli_command_t * cmd)
3218 u32 table_index = ~0;
3219 int table_index_set = 0;
3220 u32 sw_if_index = ~0;
3223 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3224 if (unformat (input, "table-index %d", &table_index))
3225 table_index_set = 1;
3226 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3227 vnet_get_main(), &sw_if_index))
3233 if (table_index_set == 0)
3234 return clib_error_return (0, "classify table-index must be specified");
3236 if (sw_if_index == ~0)
3237 return clib_error_return (0, "interface / subif must be specified");
3239 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3246 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3247 return clib_error_return (0, "No such interface");
3249 case VNET_API_ERROR_NO_SUCH_ENTRY:
3250 return clib_error_return (0, "No such classifier table");
3256 * Assign a classification table to an interface. The classification
3257 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3258 * commands. Once the table is create, use this command to filter packets
3262 * Example of how to assign a classification table to an interface:
3263 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3266 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3267 .path = "set ip classify",
3269 "set ip classify intfc <interface> table-index <classify-idx>",
3270 .function = set_ip_classify_command_fn,