2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
56 * @brief IPv4 Forwarding.
58 * This file contains the source code for IPv4 forwarding.
62 ip4_forward_next_trace (vlib_main_t * vm,
63 vlib_node_runtime_t * node,
65 vlib_rx_or_tx_t which_adj_index);
68 ip4_lookup_inline (vlib_main_t * vm,
69 vlib_node_runtime_t * node,
71 int lookup_for_responses_to_locally_received_packets)
73 ip4_main_t * im = &ip4_main;
74 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
75 u32 n_left_from, n_left_to_next, * from, * to_next;
76 ip_lookup_next_t next;
77 u32 cpu_index = os_get_cpu_number();
79 from = vlib_frame_vector_args (frame);
80 n_left_from = frame->n_vectors;
81 next = node->cached_next_index;
83 while (n_left_from > 0)
85 vlib_get_next_frame (vm, node, next,
86 to_next, n_left_to_next);
88 while (n_left_from >= 4 && n_left_to_next >= 2)
90 vlib_buffer_t * p0, * p1;
91 ip4_header_t * ip0, * ip1;
92 __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
93 ip_lookup_next_t next0, next1;
94 const load_balance_t * lb0, * lb1;
95 ip4_fib_mtrie_t * mtrie0, * mtrie1;
96 ip4_fib_mtrie_leaf_t leaf0, leaf1;
97 ip4_address_t * dst_addr0, *dst_addr1;
98 __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
99 __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
100 flow_hash_config_t flow_hash_config0, flow_hash_config1;
101 u32 hash_c0, hash_c1;
103 const dpo_id_t *dpo0, *dpo1;
105 /* Prefetch next iteration. */
107 vlib_buffer_t * p2, * p3;
109 p2 = vlib_get_buffer (vm, from[2]);
110 p3 = vlib_get_buffer (vm, from[3]);
112 vlib_prefetch_buffer_header (p2, LOAD);
113 vlib_prefetch_buffer_header (p3, LOAD);
115 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
116 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
119 pi0 = to_next[0] = from[0];
120 pi1 = to_next[1] = from[1];
122 p0 = vlib_get_buffer (vm, pi0);
123 p1 = vlib_get_buffer (vm, pi1);
125 ip0 = vlib_buffer_get_current (p0);
126 ip1 = vlib_buffer_get_current (p1);
128 dst_addr0 = &ip0->dst_address;
129 dst_addr1 = &ip1->dst_address;
131 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
132 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
133 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
134 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
135 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
136 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
139 if (! lookup_for_responses_to_locally_received_packets)
141 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
142 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
144 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
146 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
147 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
150 tcp0 = (void *) (ip0 + 1);
151 tcp1 = (void *) (ip1 + 1);
153 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
154 || ip0->protocol == IP_PROTOCOL_UDP);
155 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
156 || ip1->protocol == IP_PROTOCOL_UDP);
158 if (! lookup_for_responses_to_locally_received_packets)
160 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
161 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
164 if (! lookup_for_responses_to_locally_received_packets)
166 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
167 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
170 if (! lookup_for_responses_to_locally_received_packets)
172 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
173 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
176 if (lookup_for_responses_to_locally_received_packets)
178 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
179 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
183 /* Handle default route. */
184 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
185 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
187 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
188 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
191 lb0 = load_balance_get (lb_index0);
192 lb1 = load_balance_get (lb_index1);
194 /* Use flow hash to compute multipath adjacency. */
195 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
196 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
197 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
199 flow_hash_config0 = lb0->lb_hash_config;
200 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
201 ip4_compute_flow_hash (ip0, flow_hash_config0);
203 if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
205 flow_hash_config1 = lb1->lb_hash_config;
206 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
207 ip4_compute_flow_hash (ip1, flow_hash_config1);
210 ASSERT (lb0->lb_n_buckets > 0);
211 ASSERT (is_pow2 (lb0->lb_n_buckets));
212 ASSERT (lb1->lb_n_buckets > 0);
213 ASSERT (is_pow2 (lb1->lb_n_buckets));
215 dpo0 = load_balance_get_bucket_i(lb0,
217 (lb0->lb_n_buckets_minus_1)));
218 dpo1 = load_balance_get_bucket_i(lb1,
220 (lb0->lb_n_buckets_minus_1)));
222 next0 = dpo0->dpoi_next_node;
223 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
224 next1 = dpo1->dpoi_next_node;
225 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
227 vlib_increment_combined_counter
228 (cm, cpu_index, lb_index0, 1,
229 vlib_buffer_length_in_chain (vm, p0)
230 + sizeof(ethernet_header_t));
231 vlib_increment_combined_counter
232 (cm, cpu_index, lb_index1, 1,
233 vlib_buffer_length_in_chain (vm, p1)
234 + sizeof(ethernet_header_t));
241 wrong_next = (next0 != next) + 2*(next1 != next);
242 if (PREDICT_FALSE (wrong_next != 0))
251 vlib_set_next_frame_buffer (vm, node, next0, pi0);
258 vlib_set_next_frame_buffer (vm, node, next1, pi1);
265 vlib_set_next_frame_buffer (vm, node, next0, pi0);
266 vlib_set_next_frame_buffer (vm, node, next1, pi1);
270 vlib_put_next_frame (vm, node, next, n_left_to_next);
272 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
278 while (n_left_from > 0 && n_left_to_next > 0)
282 __attribute__((unused)) tcp_header_t * tcp0;
283 ip_lookup_next_t next0;
284 const load_balance_t *lb0;
285 ip4_fib_mtrie_t * mtrie0;
286 ip4_fib_mtrie_leaf_t leaf0;
287 ip4_address_t * dst_addr0;
288 __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
289 flow_hash_config_t flow_hash_config0;
290 const dpo_id_t *dpo0;
296 p0 = vlib_get_buffer (vm, pi0);
298 ip0 = vlib_buffer_get_current (p0);
300 dst_addr0 = &ip0->dst_address;
302 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
303 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
304 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
306 if (! lookup_for_responses_to_locally_received_packets)
308 mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
310 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
312 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
315 tcp0 = (void *) (ip0 + 1);
317 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
318 || ip0->protocol == IP_PROTOCOL_UDP);
320 if (! lookup_for_responses_to_locally_received_packets)
321 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
323 if (! lookup_for_responses_to_locally_received_packets)
324 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
326 if (! lookup_for_responses_to_locally_received_packets)
327 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
329 if (lookup_for_responses_to_locally_received_packets)
330 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
333 /* Handle default route. */
334 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
335 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
338 lb0 = load_balance_get (lbi0);
340 /* Use flow hash to compute multipath adjacency. */
341 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
342 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
344 flow_hash_config0 = lb0->lb_hash_config;
346 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
347 ip4_compute_flow_hash (ip0, flow_hash_config0);
350 ASSERT (lb0->lb_n_buckets > 0);
351 ASSERT (is_pow2 (lb0->lb_n_buckets));
353 dpo0 = load_balance_get_bucket_i(lb0,
355 (lb0->lb_n_buckets_minus_1)));
357 next0 = dpo0->dpoi_next_node;
358 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
360 vlib_increment_combined_counter
361 (cm, cpu_index, lbi0, 1,
362 vlib_buffer_length_in_chain (vm, p0));
369 if (PREDICT_FALSE (next0 != next))
372 vlib_put_next_frame (vm, node, next, n_left_to_next);
374 vlib_get_next_frame (vm, node, next,
375 to_next, n_left_to_next);
382 vlib_put_next_frame (vm, node, next, n_left_to_next);
385 if (node->flags & VLIB_NODE_FLAG_TRACE)
386 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
388 return frame->n_vectors;
391 /** @brief IPv4 lookup node.
394 This is the main IPv4 lookup dispatch node.
396 @param vm vlib_main_t corresponding to the current thread
397 @param node vlib_node_runtime_t
398 @param frame vlib_frame_t whose contents should be dispatched
400 @par Graph mechanics: buffer metadata, next index usage
403 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
404 - Indicates the @c sw_if_index value of the interface that the
405 packet was received on.
406 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
407 - When the value is @c ~0 then the node performs a longest prefix
408 match (LPM) for the packet destination address in the FIB attached
409 to the receive interface.
410 - Otherwise perform LPM for the packet destination address in the
411 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
412 value (0, 1, ...) and not a VRF id.
415 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
416 - The lookup result adjacency index.
419 - Dispatches the packet to the node index found in
420 ip_adjacency_t @c adj->lookup_next_index
421 (where @c adj is the lookup result adjacency).
424 ip4_lookup (vlib_main_t * vm,
425 vlib_node_runtime_t * node,
426 vlib_frame_t * frame)
428 return ip4_lookup_inline (vm, node, frame,
429 /* lookup_for_responses_to_locally_received_packets */ 0);
433 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
435 VLIB_REGISTER_NODE (ip4_lookup_node) = {
436 .function = ip4_lookup,
437 .name = "ip4-lookup",
438 .vector_size = sizeof (u32),
440 .format_trace = format_ip4_lookup_trace,
441 .n_next_nodes = IP_LOOKUP_N_NEXT,
442 .next_nodes = IP4_LOOKUP_NEXT_NODES,
445 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
448 ip4_load_balance (vlib_main_t * vm,
449 vlib_node_runtime_t * node,
450 vlib_frame_t * frame)
452 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
453 u32 n_left_from, n_left_to_next, * from, * to_next;
454 ip_lookup_next_t next;
455 u32 cpu_index = os_get_cpu_number();
457 from = vlib_frame_vector_args (frame);
458 n_left_from = frame->n_vectors;
459 next = node->cached_next_index;
461 if (node->flags & VLIB_NODE_FLAG_TRACE)
462 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
464 while (n_left_from > 0)
466 vlib_get_next_frame (vm, node, next,
467 to_next, n_left_to_next);
470 while (n_left_from > 0 && n_left_to_next > 0)
472 ip_lookup_next_t next0;
473 const load_balance_t *lb0;
476 const ip4_header_t *ip0;
477 const dpo_id_t *dpo0;
482 p0 = vlib_get_buffer (vm, pi0);
484 ip0 = vlib_buffer_get_current (p0);
485 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
487 lb0 = load_balance_get(lbi0);
488 hc0 = lb0->lb_hash_config;
489 vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
491 dpo0 = load_balance_get_bucket_i(lb0,
492 vnet_buffer(p0)->ip.flow_hash &
493 (lb0->lb_n_buckets_minus_1));
495 next0 = dpo0->dpoi_next_node;
496 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
498 vlib_increment_combined_counter
499 (cm, cpu_index, lbi0, 1,
500 vlib_buffer_length_in_chain (vm, p0));
507 if (PREDICT_FALSE (next0 != next))
510 vlib_put_next_frame (vm, node, next, n_left_to_next);
512 vlib_get_next_frame (vm, node, next,
513 to_next, n_left_to_next);
520 vlib_put_next_frame (vm, node, next, n_left_to_next);
523 return frame->n_vectors;
526 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
528 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
529 .function = ip4_load_balance,
530 .name = "ip4-load-balance",
531 .vector_size = sizeof (u32),
532 .sibling_of = "ip4-lookup",
534 .format_trace = format_ip4_forward_next_trace,
537 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
539 /* get first interface address */
541 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
542 ip_interface_address_t ** result_ia)
544 ip_lookup_main_t * lm = &im->lookup_main;
545 ip_interface_address_t * ia = 0;
546 ip4_address_t * result = 0;
548 foreach_ip_interface_address (lm, ia, sw_if_index,
549 1 /* honor unnumbered */,
551 ip4_address_t * a = ip_interface_address_get_address (lm, ia);
556 *result_ia = result ? ia : 0;
561 ip4_add_interface_routes (u32 sw_if_index,
562 ip4_main_t * im, u32 fib_index,
563 ip_interface_address_t * a)
565 ip_lookup_main_t * lm = &im->lookup_main;
566 ip4_address_t * address = ip_interface_address_get_address (lm, a);
568 .fp_len = a->address_length,
569 .fp_proto = FIB_PROTOCOL_IP4,
570 .fp_addr.ip4 = *address,
573 a->neighbor_probe_adj_index = ~0;
577 fib_node_index_t fei;
579 fei = fib_table_entry_update_one_path(fib_index,
581 FIB_SOURCE_INTERFACE,
582 (FIB_ENTRY_FLAG_CONNECTED |
583 FIB_ENTRY_FLAG_ATTACHED),
585 NULL, /* No next-hop address */
587 ~0, // invalid FIB index
590 FIB_ROUTE_PATH_FLAG_NONE);
591 a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
596 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
598 u32 classify_table_index =
599 lm->classify_table_index_by_sw_if_index [sw_if_index];
600 if (classify_table_index != (u32) ~0)
602 dpo_id_t dpo = DPO_NULL;
607 classify_dpo_create(FIB_PROTOCOL_IP4,
608 classify_table_index));
610 fib_table_entry_special_dpo_add(fib_index,
619 fib_table_entry_update_one_path(fib_index,
621 FIB_SOURCE_INTERFACE,
622 (FIB_ENTRY_FLAG_CONNECTED |
623 FIB_ENTRY_FLAG_LOCAL),
627 ~0, // invalid FIB index
630 FIB_ROUTE_PATH_FLAG_NONE);
634 ip4_del_interface_routes (ip4_main_t * im,
636 ip4_address_t * address,
640 .fp_len = address_length,
641 .fp_proto = FIB_PROTOCOL_IP4,
642 .fp_addr.ip4 = *address,
647 fib_table_entry_delete(fib_index,
649 FIB_SOURCE_INTERFACE);
653 fib_table_entry_delete(fib_index,
655 FIB_SOURCE_INTERFACE);
659 ip4_sw_interface_enable_disable (u32 sw_if_index,
662 vlib_main_t * vm = vlib_get_main();
663 ip4_main_t * im = &ip4_main;
664 ip_lookup_main_t * lm = &im->lookup_main;
666 u32 lookup_feature_index;
668 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
671 * enable/disable only on the 1<->0 transition
675 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
680 ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
681 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
685 for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
687 ip_config_main_t * cm = &lm->feature_config_mains[cast];
688 vnet_config_main_t * vcm = &cm->config_main;
690 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
691 ci = cm->config_index_by_sw_if_index[sw_if_index];
693 if (cast == VNET_IP_RX_UNICAST_FEAT)
694 lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
696 lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
699 ci = vnet_config_add_feature (vm, vcm,
701 lookup_feature_index,
703 /* # bytes of config data */ 0);
705 ci = vnet_config_del_feature (vm, vcm,
707 lookup_feature_index,
709 /* # bytes of config data */ 0);
710 cm->config_index_by_sw_if_index[sw_if_index] = ci;
714 static clib_error_t *
715 ip4_add_del_interface_address_internal (vlib_main_t * vm,
717 ip4_address_t * address,
721 vnet_main_t * vnm = vnet_get_main();
722 ip4_main_t * im = &ip4_main;
723 ip_lookup_main_t * lm = &im->lookup_main;
724 clib_error_t * error = 0;
725 u32 if_address_index, elts_before;
726 ip4_address_fib_t ip4_af, * addr_fib = 0;
728 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
729 ip4_addr_fib_init (&ip4_af, address,
730 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
731 vec_add1 (addr_fib, ip4_af);
734 * there is no support for adj-fib handling in the presence of overlapping
735 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
740 /* When adding an address check that it does not conflict
741 with an existing address. */
742 ip_interface_address_t * ia;
743 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
744 0 /* honor unnumbered */,
746 ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
748 if (ip4_destination_matches_route (im, address, x, ia->address_length)
749 || ip4_destination_matches_route (im, x, address, address_length))
750 return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
751 format_ip4_address_and_length, address, address_length,
752 format_ip4_address_and_length, x, ia->address_length,
753 format_vnet_sw_if_index_name, vnm, sw_if_index);
757 elts_before = pool_elts (lm->if_address_pool);
759 error = ip_interface_address_add_del
769 ip4_sw_interface_enable_disable(sw_if_index, !is_del);
772 ip4_del_interface_routes (im, ip4_af.fib_index, address,
775 ip4_add_interface_routes (sw_if_index,
776 im, ip4_af.fib_index,
778 (lm->if_address_pool, if_address_index));
780 /* If pool did not grow/shrink: add duplicate address. */
781 if (elts_before != pool_elts (lm->if_address_pool))
783 ip4_add_del_interface_address_callback_t * cb;
784 vec_foreach (cb, im->add_del_interface_address_callbacks)
785 cb->function (im, cb->function_opaque, sw_if_index,
786 address, address_length,
797 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
798 ip4_address_t * address, u32 address_length,
801 return ip4_add_del_interface_address_internal
802 (vm, sw_if_index, address, address_length,
806 /* Built-in ip4 unicast rx feature path definition */
807 VNET_IP4_UNICAST_FEATURE_INIT (ip4_flow_classify, static) = {
808 .node_name = "ip4-flow-classify",
809 .runs_before = ORDER_CONSTRAINTS {"ip4-inacl", 0},
810 .feature_index = &ip4_main.ip4_unicast_rx_feature_flow_classify,
813 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
814 .node_name = "ip4-inacl",
815 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
816 .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
819 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
820 .node_name = "ip4-source-check-via-rx",
821 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
823 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
826 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
827 .node_name = "ip4-source-check-via-any",
828 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
830 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
833 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
834 .node_name = "ip4-source-and-port-range-check-rx",
835 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
837 &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
840 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
841 .node_name = "ip4-policer-classify",
842 .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
844 &ip4_main.ip4_unicast_rx_feature_policer_classify,
847 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
848 .node_name = "ipsec-input-ip4",
849 .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
850 .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
853 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
854 .node_name = "vpath-input-ip4",
855 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
856 .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
859 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
860 .node_name = "ip4-lookup",
861 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
862 .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
865 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
866 .node_name = "ip4-drop",
867 .runs_before = 0, /* not before any other features */
868 .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
872 /* Built-in ip4 multicast rx feature path definition */
873 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
874 .node_name = "vpath-input-ip4",
875 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
876 .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
879 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
880 .node_name = "ip4-lookup-multicast",
881 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
882 .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
885 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
886 .node_name = "ip4-drop",
887 .runs_before = 0, /* last feature */
888 .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
891 static char * rx_feature_start_nodes[] =
892 { "ip4-input", "ip4-input-no-checksum"};
894 static char * tx_feature_start_nodes[] =
896 "ip4-rewrite-transit",
900 /* Source and port-range check ip4 tx feature path definition */
901 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
902 .node_name = "ip4-source-and-port-range-check-tx",
903 .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
905 &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
909 /* Built-in ip4 tx feature path definition */
910 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
911 .node_name = "interface-output",
912 .runs_before = 0, /* not before any other features */
913 .feature_index = &ip4_main.ip4_tx_feature_interface_output,
916 static clib_error_t *
917 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
919 ip_lookup_main_t * lm = &im->lookup_main;
920 clib_error_t * error;
922 ip_config_main_t * cm;
923 vnet_config_main_t * vcm;
924 char **feature_start_nodes;
925 int feature_start_len;
927 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
929 cm = &lm->feature_config_mains[cast];
930 vcm = &cm->config_main;
932 if (cast < VNET_IP_TX_FEAT)
934 feature_start_nodes = rx_feature_start_nodes;
935 feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
939 feature_start_nodes = tx_feature_start_nodes;
940 feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
943 if ((error = vnet_feature_arc_init (vm, vcm,
946 im->next_feature[cast],
947 &im->feature_nodes[cast])))
954 static clib_error_t *
955 ip4_sw_interface_add_del (vnet_main_t * vnm,
959 vlib_main_t * vm = vnm->vlib_main;
960 ip4_main_t * im = &ip4_main;
961 ip_lookup_main_t * lm = &im->lookup_main;
965 /* Fill in lookup tables with default table (0). */
966 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
968 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
970 ip_config_main_t * cm = &lm->feature_config_mains[cast];
971 vnet_config_main_t * vcm = &cm->config_main;
973 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
974 ci = cm->config_index_by_sw_if_index[sw_if_index];
976 if (cast == VNET_IP_RX_UNICAST_FEAT)
977 feature_index = im->ip4_unicast_rx_feature_drop;
978 else if (cast == VNET_IP_RX_MULTICAST_FEAT)
979 feature_index = im->ip4_multicast_rx_feature_drop;
981 feature_index = im->ip4_tx_feature_interface_output;
984 ci = vnet_config_add_feature (vm, vcm,
988 /* # bytes of config data */ 0);
991 ci = vnet_config_del_feature (vm, vcm, ci,
994 /* # bytes of config data */ 0);
995 if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
996 im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
998 cm->config_index_by_sw_if_index[sw_if_index] = ci;
1000 * note: do not update the tx feature count here.
1004 return /* no error */ 0;
1007 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1009 /* Global IP4 main. */
1010 ip4_main_t ip4_main;
1013 ip4_lookup_init (vlib_main_t * vm)
1015 ip4_main_t * im = &ip4_main;
1016 clib_error_t * error;
1019 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1024 m = pow2_mask (i) << (32 - i);
1027 im->fib_masks[i] = clib_host_to_net_u32 (m);
1030 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1032 /* Create FIB with index 0 and table id of 0. */
1033 fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1037 pn = pg_get_node (ip4_lookup_node.index);
1038 pn->unformat_edit = unformat_pg_ip4_header;
1042 ethernet_arp_header_t h;
1044 memset (&h, 0, sizeof (h));
1046 /* Set target ethernet address to all zeros. */
1047 memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1049 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1050 #define _8(f,v) h.f = v;
1051 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1052 _16 (l3_type, ETHERNET_TYPE_IP4);
1053 _8 (n_l2_address_bytes, 6);
1054 _8 (n_l3_address_bytes, 4);
1055 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1059 vlib_packet_template_init (vm,
1060 &im->ip4_arp_request_packet_template,
1063 /* alloc chunk size */ 8,
1067 error = ip4_feature_init (vm, im);
1072 VLIB_INIT_FUNCTION (ip4_lookup_init);
1075 /* Adjacency taken. */
1080 /* Packet data, possibly *after* rewrite. */
1081 u8 packet_data[64 - 1*sizeof(u32)];
1082 } ip4_forward_next_trace_t;
1084 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1086 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1087 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1088 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1089 uword indent = format_get_indent (s);
1090 s = format (s, "%U%U",
1091 format_white_space, indent,
1092 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1096 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1098 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1099 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1100 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1101 uword indent = format_get_indent (s);
1103 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1104 t->fib_index, t->dpo_index, t->flow_hash);
1105 s = format (s, "\n%U%U",
1106 format_white_space, indent,
1107 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1111 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1113 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1114 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1115 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1116 vnet_main_t * vnm = vnet_get_main();
1117 uword indent = format_get_indent (s);
1119 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1120 t->fib_index, t->dpo_index, format_ip_adjacency,
1121 t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
1123 s = format (s, "\n%U%U",
1124 format_white_space, indent,
1125 format_ip_adjacency_packet_data,
1127 t->packet_data, sizeof (t->packet_data));
1131 /* Common trace function for all ip4-forward next nodes. */
1133 ip4_forward_next_trace (vlib_main_t * vm,
1134 vlib_node_runtime_t * node,
1135 vlib_frame_t * frame,
1136 vlib_rx_or_tx_t which_adj_index)
1139 ip4_main_t * im = &ip4_main;
1141 n_left = frame->n_vectors;
1142 from = vlib_frame_vector_args (frame);
1147 vlib_buffer_t * b0, * b1;
1148 ip4_forward_next_trace_t * t0, * t1;
1150 /* Prefetch next iteration. */
1151 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1152 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1157 b0 = vlib_get_buffer (vm, bi0);
1158 b1 = vlib_get_buffer (vm, bi1);
1160 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1162 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1163 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1164 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1165 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1166 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1167 vec_elt (im->fib_index_by_sw_if_index,
1168 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1170 clib_memcpy (t0->packet_data,
1171 vlib_buffer_get_current (b0),
1172 sizeof (t0->packet_data));
1174 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1176 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1177 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1178 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1179 t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1180 vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1181 vec_elt (im->fib_index_by_sw_if_index,
1182 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1183 clib_memcpy (t1->packet_data,
1184 vlib_buffer_get_current (b1),
1185 sizeof (t1->packet_data));
1195 ip4_forward_next_trace_t * t0;
1199 b0 = vlib_get_buffer (vm, bi0);
1201 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1203 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1204 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1205 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1206 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1207 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1208 vec_elt (im->fib_index_by_sw_if_index,
1209 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1210 clib_memcpy (t0->packet_data,
1211 vlib_buffer_get_current (b0),
1212 sizeof (t0->packet_data));
1220 ip4_drop_or_punt (vlib_main_t * vm,
1221 vlib_node_runtime_t * node,
1222 vlib_frame_t * frame,
1223 ip4_error_t error_code)
1225 u32 * buffers = vlib_frame_vector_args (frame);
1226 uword n_packets = frame->n_vectors;
1228 vlib_error_drop_buffers (vm, node,
1233 ip4_input_node.index,
1236 if (node->flags & VLIB_NODE_FLAG_TRACE)
1237 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1243 ip4_drop (vlib_main_t * vm,
1244 vlib_node_runtime_t * node,
1245 vlib_frame_t * frame)
1246 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1249 ip4_punt (vlib_main_t * vm,
1250 vlib_node_runtime_t * node,
1251 vlib_frame_t * frame)
1252 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1254 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1255 .function = ip4_drop,
1257 .vector_size = sizeof (u32),
1259 .format_trace = format_ip4_forward_next_trace,
1267 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1269 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1270 .function = ip4_punt,
1272 .vector_size = sizeof (u32),
1274 .format_trace = format_ip4_forward_next_trace,
1282 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1284 /* Compute TCP/UDP/ICMP4 checksum in software. */
1286 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1290 u32 ip_header_length, payload_length_host_byte_order;
1291 u32 n_this_buffer, n_bytes_left;
1293 void * data_this_buffer;
1295 /* Initialize checksum with ip header. */
1296 ip_header_length = ip4_header_bytes (ip0);
1297 payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1298 sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1300 if (BITS (uword) == 32)
1302 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1303 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1306 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1308 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1309 data_this_buffer = (void *) ip0 + ip_header_length;
1310 if (n_this_buffer + ip_header_length > p0->current_length)
1311 n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1314 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1315 n_bytes_left -= n_this_buffer;
1316 if (n_bytes_left == 0)
1319 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1320 p0 = vlib_get_buffer (vm, p0->next_buffer);
1321 data_this_buffer = vlib_buffer_get_current (p0);
1322 n_this_buffer = p0->current_length;
1325 sum16 = ~ ip_csum_fold (sum0);
1331 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1333 ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1334 udp_header_t * udp0;
1337 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1338 || ip0->protocol == IP_PROTOCOL_UDP);
1340 udp0 = (void *) (ip0 + 1);
1341 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1343 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1344 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1348 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1350 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1351 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1357 ip4_local (vlib_main_t * vm,
1358 vlib_node_runtime_t * node,
1359 vlib_frame_t * frame)
1361 ip4_main_t * im = &ip4_main;
1362 ip_lookup_main_t * lm = &im->lookup_main;
1363 ip_local_next_t next_index;
1364 u32 * from, * to_next, n_left_from, n_left_to_next;
1365 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1367 from = vlib_frame_vector_args (frame);
1368 n_left_from = frame->n_vectors;
1369 next_index = node->cached_next_index;
1371 if (node->flags & VLIB_NODE_FLAG_TRACE)
1372 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1374 while (n_left_from > 0)
1376 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1378 while (n_left_from >= 4 && n_left_to_next >= 2)
1380 vlib_buffer_t * p0, * p1;
1381 ip4_header_t * ip0, * ip1;
1382 udp_header_t * udp0, * udp1;
1383 ip4_fib_mtrie_t * mtrie0, * mtrie1;
1384 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1385 const dpo_id_t *dpo0, *dpo1;
1386 const load_balance_t *lb0, *lb1;
1387 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1388 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1389 i32 len_diff0, len_diff1;
1390 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1391 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1394 pi0 = to_next[0] = from[0];
1395 pi1 = to_next[1] = from[1];
1399 n_left_to_next -= 2;
1401 p0 = vlib_get_buffer (vm, pi0);
1402 p1 = vlib_get_buffer (vm, pi1);
1404 ip0 = vlib_buffer_get_current (p0);
1405 ip1 = vlib_buffer_get_current (p1);
1407 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1408 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1409 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1410 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1412 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1413 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1415 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1417 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1418 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1420 /* Treat IP frag packets as "experimental" protocol for now
1421 until support of IP frag reassembly is implemented */
1422 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1423 proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1424 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1425 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1426 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1427 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1432 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1433 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1435 udp0 = ip4_next_header (ip0);
1436 udp1 = ip4_next_header (ip1);
1438 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1439 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1440 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1442 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1443 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1445 /* Verify UDP length. */
1446 ip_len0 = clib_net_to_host_u16 (ip0->length);
1447 ip_len1 = clib_net_to_host_u16 (ip1->length);
1448 udp_len0 = clib_net_to_host_u16 (udp0->length);
1449 udp_len1 = clib_net_to_host_u16 (udp1->length);
1451 len_diff0 = ip_len0 - udp_len0;
1452 len_diff1 = ip_len1 - udp_len1;
1454 len_diff0 = is_udp0 ? len_diff0 : 0;
1455 len_diff1 = is_udp1 ? len_diff1 : 0;
1457 if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1458 & good_tcp_udp0 & good_tcp_udp1)))
1463 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1464 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1466 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1467 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1472 && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1473 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1475 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1476 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1480 good_tcp_udp0 &= len_diff0 >= 0;
1481 good_tcp_udp1 &= len_diff1 >= 0;
1483 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1484 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1486 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1488 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1489 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1491 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1492 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1493 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1495 error1 = (is_tcp_udp1 && ! good_tcp_udp1
1496 ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1499 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1500 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1501 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1502 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1504 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1505 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1507 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1508 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1510 lb0 = load_balance_get(lbi0);
1511 lb1 = load_balance_get(lbi1);
1512 dpo0 = load_balance_get_bucket_i(lb0, 0);
1513 dpo1 = load_balance_get_bucket_i(lb1, 0);
1516 * Must have a route to source otherwise we drop the packet.
1517 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1520 * - the source is a recieve => it's from us => bogus, do this
1521 * first since it sets a different error code.
1522 * - uRPF check for any route to source - accept if passes.
1523 * - allow packets destined to the broadcast address from unknown sources
1525 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1526 dpo0->dpoi_type == DPO_RECEIVE) ?
1527 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1529 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1530 !fib_urpf_check_size(lb0->lb_urpf) &&
1531 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1532 ? IP4_ERROR_SRC_LOOKUP_MISS
1534 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1535 dpo1->dpoi_type == DPO_RECEIVE) ?
1536 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1538 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1539 !fib_urpf_check_size(lb1->lb_urpf) &&
1540 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1541 ? IP4_ERROR_SRC_LOOKUP_MISS
1544 next0 = lm->local_next_by_ip_protocol[proto0];
1545 next1 = lm->local_next_by_ip_protocol[proto1];
1547 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1548 next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1550 p0->error = error0 ? error_node->errors[error0] : 0;
1551 p1->error = error1 ? error_node->errors[error1] : 0;
1553 enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1555 if (PREDICT_FALSE (enqueue_code != 0))
1557 switch (enqueue_code)
1563 n_left_to_next += 1;
1564 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1570 n_left_to_next += 1;
1571 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1575 /* A B B or A B C */
1577 n_left_to_next += 2;
1578 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1579 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1582 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1584 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1591 while (n_left_from > 0 && n_left_to_next > 0)
1595 udp_header_t * udp0;
1596 ip4_fib_mtrie_t * mtrie0;
1597 ip4_fib_mtrie_leaf_t leaf0;
1598 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1600 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1601 load_balance_t *lb0;
1602 const dpo_id_t *dpo0;
1604 pi0 = to_next[0] = from[0];
1608 n_left_to_next -= 1;
1610 p0 = vlib_get_buffer (vm, pi0);
1612 ip0 = vlib_buffer_get_current (p0);
1614 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1615 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1617 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1619 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1621 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1623 /* Treat IP frag packets as "experimental" protocol for now
1624 until support of IP frag reassembly is implemented */
1625 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1626 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1627 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1631 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1633 udp0 = ip4_next_header (ip0);
1635 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1636 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1638 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1640 /* Verify UDP length. */
1641 ip_len0 = clib_net_to_host_u16 (ip0->length);
1642 udp_len0 = clib_net_to_host_u16 (udp0->length);
1644 len_diff0 = ip_len0 - udp_len0;
1646 len_diff0 = is_udp0 ? len_diff0 : 0;
1648 if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1653 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1654 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1656 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1657 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1661 good_tcp_udp0 &= len_diff0 >= 0;
1663 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1665 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1667 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1669 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1670 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1671 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1674 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1675 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1677 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1678 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1680 lb0 = load_balance_get(lbi0);
1681 dpo0 = load_balance_get_bucket_i(lb0, 0);
1683 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1684 vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1687 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1688 dpo0->dpoi_type == DPO_RECEIVE) ?
1689 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1691 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1692 !fib_urpf_check_size(lb0->lb_urpf) &&
1693 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1694 ? IP4_ERROR_SRC_LOOKUP_MISS
1697 next0 = lm->local_next_by_ip_protocol[proto0];
1699 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1701 p0->error = error0? error_node->errors[error0] : 0;
1703 if (PREDICT_FALSE (next0 != next_index))
1705 n_left_to_next += 1;
1706 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1709 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1712 n_left_to_next -= 1;
1716 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1719 return frame->n_vectors;
1722 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1723 .function = ip4_local,
1724 .name = "ip4-local",
1725 .vector_size = sizeof (u32),
1727 .format_trace = format_ip4_forward_next_trace,
1729 .n_next_nodes = IP_LOCAL_N_NEXT,
1731 [IP_LOCAL_NEXT_DROP] = "error-drop",
1732 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1733 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1734 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1738 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1740 void ip4_register_protocol (u32 protocol, u32 node_index)
1742 vlib_main_t * vm = vlib_get_main();
1743 ip4_main_t * im = &ip4_main;
1744 ip_lookup_main_t * lm = &im->lookup_main;
1746 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1747 lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1750 static clib_error_t *
1751 show_ip_local_command_fn (vlib_main_t * vm,
1752 unformat_input_t * input,
1753 vlib_cli_command_t * cmd)
1755 ip4_main_t * im = &ip4_main;
1756 ip_lookup_main_t * lm = &im->lookup_main;
1759 vlib_cli_output (vm, "Protocols handled by ip4_local");
1760 for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1762 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1763 vlib_cli_output (vm, "%d", i);
1771 * Display the set of protocols handled by the local IPv4 stack.
1774 * Example of how to display local protocol table:
1775 * @cliexstart{show ip local}
1776 * Protocols handled by ip4_local
1783 VLIB_CLI_COMMAND (show_ip_local, static) = {
1784 .path = "show ip local",
1785 .function = show_ip_local_command_fn,
1786 .short_help = "show ip local",
1791 ip4_arp_inline (vlib_main_t * vm,
1792 vlib_node_runtime_t * node,
1793 vlib_frame_t * frame,
1796 vnet_main_t * vnm = vnet_get_main();
1797 ip4_main_t * im = &ip4_main;
1798 ip_lookup_main_t * lm = &im->lookup_main;
1799 u32 * from, * to_next_drop;
1800 uword n_left_from, n_left_to_next_drop, next_index;
1801 static f64 time_last_seed_change = -1e100;
1802 static u32 hash_seeds[3];
1803 static uword hash_bitmap[256 / BITS (uword)];
1806 if (node->flags & VLIB_NODE_FLAG_TRACE)
1807 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1809 time_now = vlib_time_now (vm);
1810 if (time_now - time_last_seed_change > 1e-3)
1813 u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1814 sizeof (hash_seeds));
1815 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1816 hash_seeds[i] = r[i];
1818 /* Mark all hash keys as been no-seen before. */
1819 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1822 time_last_seed_change = time_now;
1825 from = vlib_frame_vector_args (frame);
1826 n_left_from = frame->n_vectors;
1827 next_index = node->cached_next_index;
1828 if (next_index == IP4_ARP_NEXT_DROP)
1829 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1831 while (n_left_from > 0)
1833 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1834 to_next_drop, n_left_to_next_drop);
1836 while (n_left_from > 0 && n_left_to_next_drop > 0)
1838 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1839 ip_adjacency_t * adj0;
1846 p0 = vlib_get_buffer (vm, pi0);
1848 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1849 adj0 = ip_get_adjacency (lm, adj_index0);
1850 ip0 = vlib_buffer_get_current (p0);
1853 * this is the Glean case, so we are ARPing for the
1854 * packet's destination
1860 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1861 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1865 a0 ^= ip0->dst_address.data_u32;
1869 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1873 hash_v3_finalize32 (a0, b0, c0);
1875 c0 &= BITS (hash_bitmap) - 1;
1876 c0 = c0 / BITS (uword);
1877 m0 = (uword) 1 << (c0 % BITS (uword));
1879 bm0 = hash_bitmap[c0];
1880 drop0 = (bm0 & m0) != 0;
1882 /* Mark it as seen. */
1883 hash_bitmap[c0] = bm0 | m0;
1887 to_next_drop[0] = pi0;
1889 n_left_to_next_drop -= 1;
1891 p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1894 * the adj has been updated to a rewrite but the node the DPO that got
1895 * us here hasn't - yet. no big deal. we'll drop while we wait.
1897 if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
1904 * Can happen if the control-plane is programming tables
1905 * with traffic flowing; at least that's today's lame excuse.
1907 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1908 (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1910 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1913 /* Send ARP request. */
1917 ethernet_arp_header_t * h0;
1918 vnet_hw_interface_t * hw_if0;
1920 h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1922 /* Add rewrite/encap string for ARP packet. */
1923 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1925 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1927 /* Src ethernet address in ARP header. */
1928 clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1929 sizeof (h0->ip4_over_ethernet[0].ethernet));
1933 /* The interface's source address is stashed in the Glean Adj */
1934 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1936 /* Copy in destination address we are requesting. This is the
1937 * glean case, so it's the packet's destination.*/
1938 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1942 /* Src IP address in ARP header. */
1943 if (ip4_src_address_for_packet(lm, sw_if_index0,
1944 &h0->ip4_over_ethernet[0].ip4))
1946 /* No source address available */
1947 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1948 vlib_buffer_free(vm, &bi0, 1);
1952 /* Copy in destination address we are requesting from the
1954 h0->ip4_over_ethernet[1].ip4.data_u32 =
1955 adj0->sub_type.nbr.next_hop.ip4.as_u32;
1958 vlib_buffer_copy_trace_flag (vm, p0, bi0);
1959 b0 = vlib_get_buffer (vm, bi0);
1960 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1962 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1964 vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1968 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1971 return frame->n_vectors;
1975 ip4_arp (vlib_main_t * vm,
1976 vlib_node_runtime_t * node,
1977 vlib_frame_t * frame)
1979 return (ip4_arp_inline(vm, node, frame, 0));
1983 ip4_glean (vlib_main_t * vm,
1984 vlib_node_runtime_t * node,
1985 vlib_frame_t * frame)
1987 return (ip4_arp_inline(vm, node, frame, 1));
1990 static char * ip4_arp_error_strings[] = {
1991 [IP4_ARP_ERROR_DROP] = "address overflow drops",
1992 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1993 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1994 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1995 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1996 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1999 VLIB_REGISTER_NODE (ip4_arp_node) = {
2000 .function = ip4_arp,
2002 .vector_size = sizeof (u32),
2004 .format_trace = format_ip4_forward_next_trace,
2006 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2007 .error_strings = ip4_arp_error_strings,
2009 .n_next_nodes = IP4_ARP_N_NEXT,
2011 [IP4_ARP_NEXT_DROP] = "error-drop",
2015 VLIB_REGISTER_NODE (ip4_glean_node) = {
2016 .function = ip4_glean,
2017 .name = "ip4-glean",
2018 .vector_size = sizeof (u32),
2020 .format_trace = format_ip4_forward_next_trace,
2022 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2023 .error_strings = ip4_arp_error_strings,
2025 .n_next_nodes = IP4_ARP_N_NEXT,
2027 [IP4_ARP_NEXT_DROP] = "error-drop",
2031 #define foreach_notrace_ip4_arp_error \
2037 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2039 vlib_node_runtime_t *rt =
2040 vlib_node_get_runtime (vm, ip4_arp_node.index);
2042 /* don't trace ARP request packets */
2044 vnet_pcap_drop_trace_filter_add_del \
2045 (rt->errors[IP4_ARP_ERROR_##a], \
2047 foreach_notrace_ip4_arp_error;
2052 VLIB_INIT_FUNCTION(arp_notrace_init);
2055 /* Send an ARP request to see if given destination is reachable on given interface. */
2057 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2059 vnet_main_t * vnm = vnet_get_main();
2060 ip4_main_t * im = &ip4_main;
2061 ethernet_arp_header_t * h;
2062 ip4_address_t * src;
2063 ip_interface_address_t * ia;
2064 ip_adjacency_t * adj;
2065 vnet_hw_interface_t * hi;
2066 vnet_sw_interface_t * si;
2070 si = vnet_get_sw_interface (vnm, sw_if_index);
2072 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2074 return clib_error_return (0, "%U: interface %U down",
2075 format_ip4_address, dst,
2076 format_vnet_sw_if_index_name, vnm,
2080 src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2083 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2084 return clib_error_return
2085 (0, "no matching interface address for destination %U (interface %U)",
2086 format_ip4_address, dst,
2087 format_vnet_sw_if_index_name, vnm, sw_if_index);
2090 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2092 h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2094 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2096 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2098 h->ip4_over_ethernet[0].ip4 = src[0];
2099 h->ip4_over_ethernet[1].ip4 = dst[0];
2101 b = vlib_get_buffer (vm, bi);
2102 vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2104 /* Add encapsulation string for software interface (e.g. ethernet header). */
2105 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2106 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2109 vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2110 u32 * to_next = vlib_frame_vector_args (f);
2113 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2116 return /* no error */ 0;
2120 IP4_REWRITE_NEXT_DROP,
2121 IP4_REWRITE_NEXT_ARP,
2122 IP4_REWRITE_NEXT_ICMP_ERROR,
2123 } ip4_rewrite_next_t;
2126 ip4_rewrite_inline (vlib_main_t * vm,
2127 vlib_node_runtime_t * node,
2128 vlib_frame_t * frame,
2129 int rewrite_for_locally_received_packets,
2132 ip_lookup_main_t * lm = &ip4_main.lookup_main;
2133 u32 * from = vlib_frame_vector_args (frame);
2134 u32 n_left_from, n_left_to_next, * to_next, next_index;
2135 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2136 vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2137 ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2139 n_left_from = frame->n_vectors;
2140 next_index = node->cached_next_index;
2141 u32 cpu_index = os_get_cpu_number();
2143 while (n_left_from > 0)
2145 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2147 while (n_left_from >= 4 && n_left_to_next >= 2)
2149 ip_adjacency_t * adj0, * adj1;
2150 vlib_buffer_t * p0, * p1;
2151 ip4_header_t * ip0, * ip1;
2152 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2153 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2154 u32 next0_override, next1_override;
2155 u32 tx_sw_if_index0, tx_sw_if_index1;
2157 if (rewrite_for_locally_received_packets)
2158 next0_override = next1_override = 0;
2160 /* Prefetch next iteration. */
2162 vlib_buffer_t * p2, * p3;
2164 p2 = vlib_get_buffer (vm, from[2]);
2165 p3 = vlib_get_buffer (vm, from[3]);
2167 vlib_prefetch_buffer_header (p2, STORE);
2168 vlib_prefetch_buffer_header (p3, STORE);
2170 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2171 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2174 pi0 = to_next[0] = from[0];
2175 pi1 = to_next[1] = from[1];
2180 n_left_to_next -= 2;
2182 p0 = vlib_get_buffer (vm, pi0);
2183 p1 = vlib_get_buffer (vm, pi1);
2185 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2186 adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2188 /* We should never rewrite a pkt using the MISS adjacency */
2189 ASSERT(adj_index0 && adj_index1);
2191 ip0 = vlib_buffer_get_current (p0);
2192 ip1 = vlib_buffer_get_current (p1);
2194 error0 = error1 = IP4_ERROR_NONE;
2195 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2197 /* Decrement TTL & update checksum.
2198 Works either endian, so no need for byte swap. */
2199 if (! rewrite_for_locally_received_packets)
2201 i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2203 /* Input node should have reject packets with ttl 0. */
2204 ASSERT (ip0->ttl > 0);
2205 ASSERT (ip1->ttl > 0);
2207 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2208 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2210 checksum0 += checksum0 >= 0xffff;
2211 checksum1 += checksum1 >= 0xffff;
2213 ip0->checksum = checksum0;
2214 ip1->checksum = checksum1;
2223 * If the ttl drops below 1 when forwarding, generate
2226 if (PREDICT_FALSE(ttl0 <= 0))
2228 error0 = IP4_ERROR_TIME_EXPIRED;
2229 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2230 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2231 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2232 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2234 if (PREDICT_FALSE(ttl1 <= 0))
2236 error1 = IP4_ERROR_TIME_EXPIRED;
2237 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2238 icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2239 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2240 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2243 /* Verify checksum. */
2244 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2245 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2248 /* Rewrite packet header and updates lengths. */
2249 adj0 = ip_get_adjacency (lm, adj_index0);
2250 adj1 = ip_get_adjacency (lm, adj_index1);
2252 if (rewrite_for_locally_received_packets)
2254 if (PREDICT_FALSE(adj0->lookup_next_index
2255 == IP_LOOKUP_NEXT_ARP))
2256 next0_override = IP4_REWRITE_NEXT_ARP;
2257 if (PREDICT_FALSE(adj1->lookup_next_index
2258 == IP_LOOKUP_NEXT_ARP))
2259 next1_override = IP4_REWRITE_NEXT_ARP;
2262 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2263 rw_len0 = adj0[0].rewrite_header.data_bytes;
2264 rw_len1 = adj1[0].rewrite_header.data_bytes;
2265 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2266 vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2268 /* Check MTU of outgoing interface. */
2269 error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2270 ? IP4_ERROR_MTU_EXCEEDED
2272 error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2273 ? IP4_ERROR_MTU_EXCEEDED
2276 next0 = (error0 == IP4_ERROR_NONE)
2277 ? adj0[0].rewrite_header.next_index : next0;
2279 if (rewrite_for_locally_received_packets)
2280 next0 = next0 && next0_override ? next0_override : next0;
2282 next1 = (error1 == IP4_ERROR_NONE)
2283 ? adj1[0].rewrite_header.next_index : next1;
2285 if (rewrite_for_locally_received_packets)
2286 next1 = next1 && next1_override ? next1_override : next1;
2289 * We've already accounted for an ethernet_header_t elsewhere
2291 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2292 vlib_increment_combined_counter
2293 (&adjacency_counters,
2294 cpu_index, adj_index0,
2295 /* packet increment */ 0,
2296 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2298 if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2299 vlib_increment_combined_counter
2300 (&adjacency_counters,
2301 cpu_index, adj_index1,
2302 /* packet increment */ 0,
2303 /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2305 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2306 * to see the IP headerr */
2307 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2309 p0->current_data -= rw_len0;
2310 p0->current_length += rw_len0;
2311 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2312 vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2316 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2319 p0->current_config_index =
2320 vec_elt (cm->config_index_by_sw_if_index,
2322 vnet_get_config_data (&cm->config_main,
2323 &p0->current_config_index,
2325 /* # bytes of config data */ 0);
2328 if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2330 p1->current_data -= rw_len1;
2331 p1->current_length += rw_len1;
2333 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2334 vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2338 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2341 p1->current_config_index =
2342 vec_elt (cm->config_index_by_sw_if_index,
2344 vnet_get_config_data (&cm->config_main,
2345 &p1->current_config_index,
2347 /* # bytes of config data */ 0);
2351 /* Guess we are only writing on simple Ethernet header. */
2352 vnet_rewrite_two_headers (adj0[0], adj1[0],
2354 sizeof (ethernet_header_t));
2358 adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2359 adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2362 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2363 to_next, n_left_to_next,
2364 pi0, pi1, next0, next1);
2367 while (n_left_from > 0 && n_left_to_next > 0)
2369 ip_adjacency_t * adj0;
2372 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2374 u32 tx_sw_if_index0;
2376 if (rewrite_for_locally_received_packets)
2379 pi0 = to_next[0] = from[0];
2381 p0 = vlib_get_buffer (vm, pi0);
2383 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2385 /* We should never rewrite a pkt using the MISS adjacency */
2388 adj0 = ip_get_adjacency (lm, adj_index0);
2390 ip0 = vlib_buffer_get_current (p0);
2392 error0 = IP4_ERROR_NONE;
2393 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2395 /* Decrement TTL & update checksum. */
2396 if (! rewrite_for_locally_received_packets)
2398 i32 ttl0 = ip0->ttl;
2400 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2402 checksum0 += checksum0 >= 0xffff;
2404 ip0->checksum = checksum0;
2406 ASSERT (ip0->ttl > 0);
2412 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2414 if (PREDICT_FALSE(ttl0 <= 0))
2417 * If the ttl drops below 1 when forwarding, generate
2420 error0 = IP4_ERROR_TIME_EXPIRED;
2421 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2422 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2423 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2424 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2428 if (rewrite_for_locally_received_packets)
2431 * We have to override the next_index in ARP adjacencies,
2432 * because they're set up for ip4-arp, not this node...
2434 if (PREDICT_FALSE(adj0->lookup_next_index
2435 == IP_LOOKUP_NEXT_ARP))
2436 next0_override = IP4_REWRITE_NEXT_ARP;
2439 /* Guess we are only writing on simple Ethernet header. */
2440 vnet_rewrite_one_header (adj0[0], ip0,
2441 sizeof (ethernet_header_t));
2443 /* Update packet buffer attributes/set output interface. */
2444 rw_len0 = adj0[0].rewrite_header.data_bytes;
2445 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2447 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2448 vlib_increment_combined_counter
2449 (&adjacency_counters,
2450 cpu_index, adj_index0,
2451 /* packet increment */ 0,
2452 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2454 /* Check MTU of outgoing interface. */
2455 error0 = (vlib_buffer_length_in_chain (vm, p0)
2456 > adj0[0].rewrite_header.max_l3_packet_bytes
2457 ? IP4_ERROR_MTU_EXCEEDED
2460 p0->error = error_node->errors[error0];
2462 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2463 * to see the IP headerr */
2464 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2466 p0->current_data -= rw_len0;
2467 p0->current_length += rw_len0;
2468 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2470 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2471 next0 = adj0[0].rewrite_header.next_index;
2475 adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2479 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2482 p0->current_config_index =
2483 vec_elt (cm->config_index_by_sw_if_index,
2485 vnet_get_config_data (&cm->config_main,
2486 &p0->current_config_index,
2488 /* # bytes of config data */ 0);
2492 if (rewrite_for_locally_received_packets)
2493 next0 = next0 && next0_override ? next0_override : next0;
2498 n_left_to_next -= 1;
2500 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2501 to_next, n_left_to_next,
2505 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2508 /* Need to do trace after rewrites to pick up new packet data. */
2509 if (node->flags & VLIB_NODE_FLAG_TRACE)
2510 ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2512 return frame->n_vectors;
2516 /** @brief IPv4 transit rewrite node.
2517 @node ip4-rewrite-transit
2519 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2520 header checksum, fetch the ip adjacency, check the outbound mtu,
2521 apply the adjacency rewrite, and send pkts to the adjacency
2522 rewrite header's rewrite_next_index.
2524 @param vm vlib_main_t corresponding to the current thread
2525 @param node vlib_node_runtime_t
2526 @param frame vlib_frame_t whose contents should be dispatched
2528 @par Graph mechanics: buffer metadata, next index usage
2531 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2532 - the rewrite adjacency index
2533 - <code>adj->lookup_next_index</code>
2534 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2535 the packet will be dropped.
2536 - <code>adj->rewrite_header</code>
2537 - Rewrite string length, rewrite string, next_index
2540 - <code>b->current_data, b->current_length</code>
2541 - Updated net of applying the rewrite string
2543 <em>Next Indices:</em>
2544 - <code> adj->rewrite_header.next_index </code>
2548 ip4_rewrite_transit (vlib_main_t * vm,
2549 vlib_node_runtime_t * node,
2550 vlib_frame_t * frame)
2552 return ip4_rewrite_inline (vm, node, frame,
2553 /* rewrite_for_locally_received_packets */ 0, 0);
2556 /** @brief IPv4 local rewrite node.
2557 @node ip4-rewrite-local
2559 This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2560 the outbound interface mtu, apply the adjacency rewrite, and send
2561 pkts to the adjacency rewrite header's rewrite_next_index. Deal
2562 with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2563 dst = interface addr."
2565 @param vm vlib_main_t corresponding to the current thread
2566 @param node vlib_node_runtime_t
2567 @param frame vlib_frame_t whose contents should be dispatched
2569 @par Graph mechanics: buffer metadata, next index usage
2572 - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2573 - the rewrite adjacency index
2574 - <code>adj->lookup_next_index</code>
2575 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2576 the packet will be dropped.
2577 - <code>adj->rewrite_header</code>
2578 - Rewrite string length, rewrite string, next_index
2581 - <code>b->current_data, b->current_length</code>
2582 - Updated net of applying the rewrite string
2584 <em>Next Indices:</em>
2585 - <code> adj->rewrite_header.next_index </code>
2590 ip4_rewrite_local (vlib_main_t * vm,
2591 vlib_node_runtime_t * node,
2592 vlib_frame_t * frame)
2594 return ip4_rewrite_inline (vm, node, frame,
2595 /* rewrite_for_locally_received_packets */ 1, 0);
2599 ip4_midchain (vlib_main_t * vm,
2600 vlib_node_runtime_t * node,
2601 vlib_frame_t * frame)
2603 return ip4_rewrite_inline (vm, node, frame,
2604 /* rewrite_for_locally_received_packets */ 0, 1);
2607 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2608 .function = ip4_rewrite_transit,
2609 .name = "ip4-rewrite-transit",
2610 .vector_size = sizeof (u32),
2612 .format_trace = format_ip4_rewrite_trace,
2616 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2617 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2618 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2622 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2624 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2625 .function = ip4_midchain,
2626 .name = "ip4-midchain",
2627 .vector_size = sizeof (u32),
2629 .format_trace = format_ip4_forward_next_trace,
2631 .sibling_of = "ip4-rewrite-transit",
2634 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2636 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2637 .function = ip4_rewrite_local,
2638 .name = "ip4-rewrite-local",
2639 .vector_size = sizeof (u32),
2641 .sibling_of = "ip4-rewrite-transit",
2643 .format_trace = format_ip4_rewrite_trace,
2648 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2650 static clib_error_t *
2651 add_del_interface_table (vlib_main_t * vm,
2652 unformat_input_t * input,
2653 vlib_cli_command_t * cmd)
2655 vnet_main_t * vnm = vnet_get_main();
2656 clib_error_t * error = 0;
2657 u32 sw_if_index, table_id;
2661 if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2663 error = clib_error_return (0, "unknown interface `%U'",
2664 format_unformat_error, input);
2668 if (unformat (input, "%d", &table_id))
2672 error = clib_error_return (0, "expected table id `%U'",
2673 format_unformat_error, input);
2678 ip4_main_t * im = &ip4_main;
2681 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2686 // changing an interface's table has consequences for any connecteds
2687 // and adj-fibs already installed.
2689 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2690 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2698 * Place the indicated interface into the supplied IPv4 FIB table (also known
2699 * as a VRF). If the FIB table does not exist, this command creates it. To
2700 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2701 * FIB table will only be displayed if a route has been added to the table, or
2702 * an IP Address is assigned to an interface in the table (which adds a route
2703 * automatically), or '<em>include-empty</em>' is included.
2705 * @note IP addresses added after setting the interface IP table end up in
2706 * the indicated FIB table. If the IP address is added prior to adding the
2707 * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2708 * but potentially counter-intuitive results occur if you provision interface
2709 * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2710 * IP table ID provisioned. It might be marginally useful to evade source RPF
2711 * drops to put an interface address into multiple FIBs.
2714 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2715 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2718 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2719 .path = "set interface ip table",
2720 .function = add_del_interface_table,
2721 .short_help = "set interface ip table <interface> <table-id>",
2727 ip4_lookup_multicast (vlib_main_t * vm,
2728 vlib_node_runtime_t * node,
2729 vlib_frame_t * frame)
2731 ip4_main_t * im = &ip4_main;
2732 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2733 u32 n_left_from, n_left_to_next, * from, * to_next;
2734 ip_lookup_next_t next;
2735 u32 cpu_index = os_get_cpu_number();
2737 from = vlib_frame_vector_args (frame);
2738 n_left_from = frame->n_vectors;
2739 next = node->cached_next_index;
2741 while (n_left_from > 0)
2743 vlib_get_next_frame (vm, node, next,
2744 to_next, n_left_to_next);
2746 while (n_left_from >= 4 && n_left_to_next >= 2)
2748 vlib_buffer_t * p0, * p1;
2749 u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2750 ip_lookup_next_t next0, next1;
2751 ip4_header_t * ip0, * ip1;
2752 u32 fib_index0, fib_index1;
2753 const dpo_id_t *dpo0, *dpo1;
2754 const load_balance_t * lb0, * lb1;
2756 /* Prefetch next iteration. */
2758 vlib_buffer_t * p2, * p3;
2760 p2 = vlib_get_buffer (vm, from[2]);
2761 p3 = vlib_get_buffer (vm, from[3]);
2763 vlib_prefetch_buffer_header (p2, LOAD);
2764 vlib_prefetch_buffer_header (p3, LOAD);
2766 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2767 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2770 pi0 = to_next[0] = from[0];
2771 pi1 = to_next[1] = from[1];
2773 p0 = vlib_get_buffer (vm, pi0);
2774 p1 = vlib_get_buffer (vm, pi1);
2776 ip0 = vlib_buffer_get_current (p0);
2777 ip1 = vlib_buffer_get_current (p1);
2779 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2780 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2781 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2782 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2783 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2784 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2786 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2788 lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2791 lb0 = load_balance_get (lb_index0);
2792 lb1 = load_balance_get (lb_index1);
2794 ASSERT (lb0->lb_n_buckets > 0);
2795 ASSERT (is_pow2 (lb0->lb_n_buckets));
2796 ASSERT (lb1->lb_n_buckets > 0);
2797 ASSERT (is_pow2 (lb1->lb_n_buckets));
2799 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2800 (ip0, lb0->lb_hash_config);
2802 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2803 (ip1, lb1->lb_hash_config);
2805 dpo0 = load_balance_get_bucket_i(lb0,
2806 (vnet_buffer (p0)->ip.flow_hash &
2807 (lb0->lb_n_buckets_minus_1)));
2808 dpo1 = load_balance_get_bucket_i(lb1,
2809 (vnet_buffer (p1)->ip.flow_hash &
2810 (lb0->lb_n_buckets_minus_1)));
2812 next0 = dpo0->dpoi_next_node;
2813 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2814 next1 = dpo1->dpoi_next_node;
2815 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2817 if (1) /* $$$$$$ HACK FIXME */
2818 vlib_increment_combined_counter
2819 (cm, cpu_index, lb_index0, 1,
2820 vlib_buffer_length_in_chain (vm, p0));
2821 if (1) /* $$$$$$ HACK FIXME */
2822 vlib_increment_combined_counter
2823 (cm, cpu_index, lb_index1, 1,
2824 vlib_buffer_length_in_chain (vm, p1));
2828 n_left_to_next -= 2;
2831 wrong_next = (next0 != next) + 2*(next1 != next);
2832 if (PREDICT_FALSE (wrong_next != 0))
2840 n_left_to_next += 1;
2841 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2847 n_left_to_next += 1;
2848 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2854 n_left_to_next += 2;
2855 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2856 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2860 vlib_put_next_frame (vm, node, next, n_left_to_next);
2862 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2868 while (n_left_from > 0 && n_left_to_next > 0)
2873 ip_lookup_next_t next0;
2875 const dpo_id_t *dpo0;
2876 const load_balance_t * lb0;
2881 p0 = vlib_get_buffer (vm, pi0);
2883 ip0 = vlib_buffer_get_current (p0);
2885 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2886 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2887 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2888 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2890 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2893 lb0 = load_balance_get (lb_index0);
2895 ASSERT (lb0->lb_n_buckets > 0);
2896 ASSERT (is_pow2 (lb0->lb_n_buckets));
2898 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2899 (ip0, lb0->lb_hash_config);
2901 dpo0 = load_balance_get_bucket_i(lb0,
2902 (vnet_buffer (p0)->ip.flow_hash &
2903 (lb0->lb_n_buckets_minus_1)));
2905 next0 = dpo0->dpoi_next_node;
2906 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2908 if (1) /* $$$$$$ HACK FIXME */
2909 vlib_increment_combined_counter
2910 (cm, cpu_index, lb_index0, 1,
2911 vlib_buffer_length_in_chain (vm, p0));
2915 n_left_to_next -= 1;
2918 if (PREDICT_FALSE (next0 != next))
2920 n_left_to_next += 1;
2921 vlib_put_next_frame (vm, node, next, n_left_to_next);
2923 vlib_get_next_frame (vm, node, next,
2924 to_next, n_left_to_next);
2927 n_left_to_next -= 1;
2931 vlib_put_next_frame (vm, node, next, n_left_to_next);
2934 if (node->flags & VLIB_NODE_FLAG_TRACE)
2935 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2937 return frame->n_vectors;
2940 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2941 .function = ip4_lookup_multicast,
2942 .name = "ip4-lookup-multicast",
2943 .vector_size = sizeof (u32),
2944 .sibling_of = "ip4-lookup",
2945 .format_trace = format_ip4_lookup_trace,
2950 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2952 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2953 .function = ip4_drop,
2954 .name = "ip4-multicast",
2955 .vector_size = sizeof (u32),
2957 .format_trace = format_ip4_forward_next_trace,
2965 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2967 ip4_fib_mtrie_t * mtrie0;
2968 ip4_fib_mtrie_leaf_t leaf0;
2971 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2973 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2974 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2975 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2976 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2977 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2979 /* Handle default route. */
2980 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2982 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2984 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2987 static clib_error_t *
2988 test_lookup_command_fn (vlib_main_t * vm,
2989 unformat_input_t * input,
2990 vlib_cli_command_t * cmd)
2997 ip4_address_t ip4_base_address;
3000 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3001 if (unformat (input, "table %d", &table_id))
3003 /* Make sure the entry exists. */
3004 fib = ip4_fib_get(table_id);
3005 if ((fib) && (fib->index != table_id))
3006 return clib_error_return (0, "<fib-index> %d does not exist",
3009 else if (unformat (input, "count %f", &count))
3012 else if (unformat (input, "%U",
3013 unformat_ip4_address, &ip4_base_address))
3016 return clib_error_return (0, "unknown input `%U'",
3017 format_unformat_error, input);
3022 for (i = 0; i < n; i++)
3024 if (!ip4_lookup_validate (&ip4_base_address, table_id))
3027 ip4_base_address.as_u32 =
3028 clib_host_to_net_u32 (1 +
3029 clib_net_to_host_u32 (ip4_base_address.as_u32));
3033 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3035 vlib_cli_output (vm, "No errors in %d lookups\n", n);
3041 * Perform a lookup of an IPv4 Address (or range of addresses) in the
3042 * given FIB table to determine if there is a conflict with the
3043 * adjacency table. The fib-id can be determined by using the
3044 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3047 * @todo This command uses fib-id, other commands use table-id (not
3048 * just a name, they are different indexes). Would like to change this
3049 * to table-id for consistency.
3052 * Example of how to run the test lookup command:
3053 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3054 * No errors in 2 lookups
3058 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3059 .path = "test lookup",
3060 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3061 .function = test_lookup_command_fn,
3065 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3067 ip4_main_t * im4 = &ip4_main;
3069 uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3072 return VNET_API_ERROR_NO_SUCH_FIB;
3074 fib = ip4_fib_get (p[0]);
3076 fib->flow_hash_config = flow_hash_config;
3080 static clib_error_t *
3081 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3082 unformat_input_t * input,
3083 vlib_cli_command_t * cmd)
3087 u32 flow_hash_config = 0;
3090 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3091 if (unformat (input, "table %d", &table_id))
3094 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3095 foreach_flow_hash_bit
3101 return clib_error_return (0, "unknown input `%U'",
3102 format_unformat_error, input);
3104 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3110 case VNET_API_ERROR_NO_SUCH_FIB:
3111 return clib_error_return (0, "no such FIB table %d", table_id);
3114 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3122 * Configure the set of IPv4 fields used by the flow hash.
3125 * Example of how to set the flow hash on a given table:
3126 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3127 * Example of display the configured flow hash:
3128 * @cliexstart{show ip fib}
3129 * Table 0, fib_index 0, flow hash: src dst sport dport proto
3130 * Destination Packets Bytes Adjacency
3131 * 172.16.2.0/24 0 0 weight 1, index 5
3133 * 172.16.2.1/32 0 0 weight 1, index 6
3135 * Table 7, fib_index 1, flow hash: dst sport dport proto
3136 * Destination Packets Bytes Adjacency
3137 * 172.16.1.0/24 0 0 weight 1, index 3
3139 * 172.16.1.1/32 1 98 weight 1, index 4
3141 * 172.16.1.2/32 0 0 weight 1, index 7
3142 * GigabitEthernet2/0/0
3143 * IP4: 02:fe:6a:07:39:6f -> 16:d9:e0:91:79:86
3147 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3148 .path = "set ip flow-hash",
3150 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3151 .function = set_ip_flow_hash_command_fn,
3155 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3158 vnet_main_t * vnm = vnet_get_main();
3159 vnet_interface_main_t * im = &vnm->interface_main;
3160 ip4_main_t * ipm = &ip4_main;
3161 ip_lookup_main_t * lm = &ipm->lookup_main;
3162 vnet_classify_main_t * cm = &vnet_classify_main;
3163 ip4_address_t *if_addr;
3165 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3166 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3168 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3169 return VNET_API_ERROR_NO_SUCH_ENTRY;
3171 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3172 lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3174 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3176 if (NULL != if_addr)
3178 fib_prefix_t pfx = {
3180 .fp_proto = FIB_PROTOCOL_IP4,
3181 .fp_addr.ip4 = *if_addr,
3185 fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3189 if (table_index != (u32) ~0)
3191 dpo_id_t dpo = DPO_NULL;
3196 classify_dpo_create(FIB_PROTOCOL_IP4,
3199 fib_table_entry_special_dpo_add(fib_index,
3201 FIB_SOURCE_CLASSIFY,
3202 FIB_ENTRY_FLAG_NONE,
3208 fib_table_entry_special_remove(fib_index,
3210 FIB_SOURCE_CLASSIFY);
3217 static clib_error_t *
3218 set_ip_classify_command_fn (vlib_main_t * vm,
3219 unformat_input_t * input,
3220 vlib_cli_command_t * cmd)
3222 u32 table_index = ~0;
3223 int table_index_set = 0;
3224 u32 sw_if_index = ~0;
3227 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3228 if (unformat (input, "table-index %d", &table_index))
3229 table_index_set = 1;
3230 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3231 vnet_get_main(), &sw_if_index))
3237 if (table_index_set == 0)
3238 return clib_error_return (0, "classify table-index must be specified");
3240 if (sw_if_index == ~0)
3241 return clib_error_return (0, "interface / subif must be specified");
3243 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3250 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3251 return clib_error_return (0, "No such interface");
3253 case VNET_API_ERROR_NO_SUCH_ENTRY:
3254 return clib_error_return (0, "No such classifier table");
3260 * Assign a classification table to an interface. The classification
3261 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3262 * commands. Once the table is create, use this command to filter packets
3266 * Example of how to assign a classification table to an interface:
3267 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3270 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3271 .path = "set ip classify",
3273 "set ip classify intfc <interface> table-index <classify-idx>",
3274 .function = set_ip_classify_command_fn,