2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
56 * @brief IPv4 Forwarding.
58 * This file contains the source code for IPv4 forwarding.
62 ip4_forward_next_trace (vlib_main_t * vm,
63 vlib_node_runtime_t * node,
65 vlib_rx_or_tx_t which_adj_index);
68 ip4_lookup_inline (vlib_main_t * vm,
69 vlib_node_runtime_t * node,
71 int lookup_for_responses_to_locally_received_packets)
73 ip4_main_t * im = &ip4_main;
74 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
75 u32 n_left_from, n_left_to_next, * from, * to_next;
76 ip_lookup_next_t next;
77 u32 cpu_index = os_get_cpu_number();
79 from = vlib_frame_vector_args (frame);
80 n_left_from = frame->n_vectors;
81 next = node->cached_next_index;
83 while (n_left_from > 0)
85 vlib_get_next_frame (vm, node, next,
86 to_next, n_left_to_next);
88 while (n_left_from >= 4 && n_left_to_next >= 2)
90 vlib_buffer_t * p0, * p1;
91 ip4_header_t * ip0, * ip1;
92 __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
93 ip_lookup_next_t next0, next1;
94 const load_balance_t * lb0, * lb1;
95 ip4_fib_mtrie_t * mtrie0, * mtrie1;
96 ip4_fib_mtrie_leaf_t leaf0, leaf1;
97 ip4_address_t * dst_addr0, *dst_addr1;
98 __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
99 __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
100 flow_hash_config_t flow_hash_config0, flow_hash_config1;
101 u32 hash_c0, hash_c1;
103 const dpo_id_t *dpo0, *dpo1;
105 /* Prefetch next iteration. */
107 vlib_buffer_t * p2, * p3;
109 p2 = vlib_get_buffer (vm, from[2]);
110 p3 = vlib_get_buffer (vm, from[3]);
112 vlib_prefetch_buffer_header (p2, LOAD);
113 vlib_prefetch_buffer_header (p3, LOAD);
115 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
116 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
119 pi0 = to_next[0] = from[0];
120 pi1 = to_next[1] = from[1];
122 p0 = vlib_get_buffer (vm, pi0);
123 p1 = vlib_get_buffer (vm, pi1);
125 ip0 = vlib_buffer_get_current (p0);
126 ip1 = vlib_buffer_get_current (p1);
128 dst_addr0 = &ip0->dst_address;
129 dst_addr1 = &ip1->dst_address;
131 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
132 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
133 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
134 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
135 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
136 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
139 if (! lookup_for_responses_to_locally_received_packets)
141 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
142 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
144 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
146 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
147 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
150 tcp0 = (void *) (ip0 + 1);
151 tcp1 = (void *) (ip1 + 1);
153 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
154 || ip0->protocol == IP_PROTOCOL_UDP);
155 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
156 || ip1->protocol == IP_PROTOCOL_UDP);
158 if (! lookup_for_responses_to_locally_received_packets)
160 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
161 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
164 if (! lookup_for_responses_to_locally_received_packets)
166 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
167 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
170 if (! lookup_for_responses_to_locally_received_packets)
172 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
173 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
176 if (lookup_for_responses_to_locally_received_packets)
178 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
179 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
183 /* Handle default route. */
184 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
185 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
187 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
188 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
191 lb0 = load_balance_get (lb_index0);
192 lb1 = load_balance_get (lb_index1);
194 /* Use flow hash to compute multipath adjacency. */
195 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
196 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
197 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
199 flow_hash_config0 = lb0->lb_hash_config;
200 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
201 ip4_compute_flow_hash (ip0, flow_hash_config0);
203 if (PREDICT_FALSE(lb1->lb_n_buckets > 1))
205 flow_hash_config1 = lb1->lb_hash_config;
206 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
207 ip4_compute_flow_hash (ip1, flow_hash_config1);
210 ASSERT (lb0->lb_n_buckets > 0);
211 ASSERT (is_pow2 (lb0->lb_n_buckets));
212 ASSERT (lb1->lb_n_buckets > 0);
213 ASSERT (is_pow2 (lb1->lb_n_buckets));
215 dpo0 = load_balance_get_bucket_i(lb0,
217 (lb0->lb_n_buckets_minus_1)));
218 dpo1 = load_balance_get_bucket_i(lb1,
220 (lb0->lb_n_buckets_minus_1)));
222 next0 = dpo0->dpoi_next_node;
223 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
224 next1 = dpo1->dpoi_next_node;
225 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
227 vlib_increment_combined_counter
228 (cm, cpu_index, lb_index0, 1,
229 vlib_buffer_length_in_chain (vm, p0)
230 + sizeof(ethernet_header_t));
231 vlib_increment_combined_counter
232 (cm, cpu_index, lb_index1, 1,
233 vlib_buffer_length_in_chain (vm, p1)
234 + sizeof(ethernet_header_t));
241 wrong_next = (next0 != next) + 2*(next1 != next);
242 if (PREDICT_FALSE (wrong_next != 0))
251 vlib_set_next_frame_buffer (vm, node, next0, pi0);
258 vlib_set_next_frame_buffer (vm, node, next1, pi1);
265 vlib_set_next_frame_buffer (vm, node, next0, pi0);
266 vlib_set_next_frame_buffer (vm, node, next1, pi1);
270 vlib_put_next_frame (vm, node, next, n_left_to_next);
272 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
278 while (n_left_from > 0 && n_left_to_next > 0)
282 __attribute__((unused)) tcp_header_t * tcp0;
283 ip_lookup_next_t next0;
284 const load_balance_t *lb0;
285 ip4_fib_mtrie_t * mtrie0;
286 ip4_fib_mtrie_leaf_t leaf0;
287 ip4_address_t * dst_addr0;
288 __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
289 flow_hash_config_t flow_hash_config0;
290 const dpo_id_t *dpo0;
296 p0 = vlib_get_buffer (vm, pi0);
298 ip0 = vlib_buffer_get_current (p0);
300 dst_addr0 = &ip0->dst_address;
302 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
303 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
304 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
306 if (! lookup_for_responses_to_locally_received_packets)
308 mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
310 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
312 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
315 tcp0 = (void *) (ip0 + 1);
317 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
318 || ip0->protocol == IP_PROTOCOL_UDP);
320 if (! lookup_for_responses_to_locally_received_packets)
321 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
323 if (! lookup_for_responses_to_locally_received_packets)
324 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
326 if (! lookup_for_responses_to_locally_received_packets)
327 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
329 if (lookup_for_responses_to_locally_received_packets)
330 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
333 /* Handle default route. */
334 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
335 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
338 lb0 = load_balance_get (lbi0);
340 /* Use flow hash to compute multipath adjacency. */
341 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
342 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
344 flow_hash_config0 = lb0->lb_hash_config;
346 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
347 ip4_compute_flow_hash (ip0, flow_hash_config0);
350 ASSERT (lb0->lb_n_buckets > 0);
351 ASSERT (is_pow2 (lb0->lb_n_buckets));
353 dpo0 = load_balance_get_bucket_i(lb0,
355 (lb0->lb_n_buckets_minus_1)));
357 next0 = dpo0->dpoi_next_node;
358 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
360 vlib_increment_combined_counter
361 (cm, cpu_index, lbi0, 1,
362 vlib_buffer_length_in_chain (vm, p0));
369 if (PREDICT_FALSE (next0 != next))
372 vlib_put_next_frame (vm, node, next, n_left_to_next);
374 vlib_get_next_frame (vm, node, next,
375 to_next, n_left_to_next);
382 vlib_put_next_frame (vm, node, next, n_left_to_next);
385 if (node->flags & VLIB_NODE_FLAG_TRACE)
386 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
388 return frame->n_vectors;
391 /** @brief IPv4 lookup node.
394 This is the main IPv4 lookup dispatch node.
396 @param vm vlib_main_t corresponding to the current thread
397 @param node vlib_node_runtime_t
398 @param frame vlib_frame_t whose contents should be dispatched
400 @par Graph mechanics: buffer metadata, next index usage
403 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
404 - Indicates the @c sw_if_index value of the interface that the
405 packet was received on.
406 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
407 - When the value is @c ~0 then the node performs a longest prefix
408 match (LPM) for the packet destination address in the FIB attached
409 to the receive interface.
410 - Otherwise perform LPM for the packet destination address in the
411 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
412 value (0, 1, ...) and not a VRF id.
415 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
416 - The lookup result adjacency index.
419 - Dispatches the packet to the node index found in
420 ip_adjacency_t @c adj->lookup_next_index
421 (where @c adj is the lookup result adjacency).
424 ip4_lookup (vlib_main_t * vm,
425 vlib_node_runtime_t * node,
426 vlib_frame_t * frame)
428 return ip4_lookup_inline (vm, node, frame,
429 /* lookup_for_responses_to_locally_received_packets */ 0);
433 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
435 VLIB_REGISTER_NODE (ip4_lookup_node) = {
436 .function = ip4_lookup,
437 .name = "ip4-lookup",
438 .vector_size = sizeof (u32),
440 .format_trace = format_ip4_lookup_trace,
441 .n_next_nodes = IP_LOOKUP_N_NEXT,
442 .next_nodes = IP4_LOOKUP_NEXT_NODES,
445 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
448 ip4_load_balance (vlib_main_t * vm,
449 vlib_node_runtime_t * node,
450 vlib_frame_t * frame)
452 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
453 u32 n_left_from, n_left_to_next, * from, * to_next;
454 ip_lookup_next_t next;
455 u32 cpu_index = os_get_cpu_number();
457 from = vlib_frame_vector_args (frame);
458 n_left_from = frame->n_vectors;
459 next = node->cached_next_index;
461 if (node->flags & VLIB_NODE_FLAG_TRACE)
462 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
464 while (n_left_from > 0)
466 vlib_get_next_frame (vm, node, next,
467 to_next, n_left_to_next);
470 while (n_left_from > 0 && n_left_to_next > 0)
472 ip_lookup_next_t next0;
473 const load_balance_t *lb0;
476 const ip4_header_t *ip0;
477 const dpo_id_t *dpo0;
482 p0 = vlib_get_buffer (vm, pi0);
484 ip0 = vlib_buffer_get_current (p0);
485 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
487 lb0 = load_balance_get(lbi0);
488 hc0 = lb0->lb_hash_config;
489 vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
491 dpo0 = load_balance_get_bucket_i(lb0,
492 vnet_buffer(p0)->ip.flow_hash &
493 (lb0->lb_n_buckets_minus_1));
495 next0 = dpo0->dpoi_next_node;
496 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
498 vlib_increment_combined_counter
499 (cm, cpu_index, lbi0, 1,
500 vlib_buffer_length_in_chain (vm, p0));
507 if (PREDICT_FALSE (next0 != next))
510 vlib_put_next_frame (vm, node, next, n_left_to_next);
512 vlib_get_next_frame (vm, node, next,
513 to_next, n_left_to_next);
520 vlib_put_next_frame (vm, node, next, n_left_to_next);
523 return frame->n_vectors;
526 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
528 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
529 .function = ip4_load_balance,
530 .name = "ip4-load-balance",
531 .vector_size = sizeof (u32),
532 .sibling_of = "ip4-lookup",
534 .format_trace = format_ip4_forward_next_trace,
537 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
539 /* get first interface address */
541 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
542 ip_interface_address_t ** result_ia)
544 ip_lookup_main_t * lm = &im->lookup_main;
545 ip_interface_address_t * ia = 0;
546 ip4_address_t * result = 0;
548 foreach_ip_interface_address (lm, ia, sw_if_index,
549 1 /* honor unnumbered */,
551 ip4_address_t * a = ip_interface_address_get_address (lm, ia);
556 *result_ia = result ? ia : 0;
561 ip4_add_interface_routes (u32 sw_if_index,
562 ip4_main_t * im, u32 fib_index,
563 ip_interface_address_t * a)
565 ip_lookup_main_t * lm = &im->lookup_main;
566 ip4_address_t * address = ip_interface_address_get_address (lm, a);
568 .fp_len = a->address_length,
569 .fp_proto = FIB_PROTOCOL_IP4,
570 .fp_addr.ip4 = *address,
573 a->neighbor_probe_adj_index = ~0;
577 fib_node_index_t fei;
579 fei = fib_table_entry_update_one_path(fib_index,
581 FIB_SOURCE_INTERFACE,
582 (FIB_ENTRY_FLAG_CONNECTED |
583 FIB_ENTRY_FLAG_ATTACHED),
585 NULL, /* No next-hop address */
587 ~0, // invalid FIB index
590 FIB_ROUTE_PATH_FLAG_NONE);
591 a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
596 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
598 u32 classify_table_index =
599 lm->classify_table_index_by_sw_if_index [sw_if_index];
600 if (classify_table_index != (u32) ~0)
602 dpo_id_t dpo = DPO_NULL;
607 classify_dpo_create(FIB_PROTOCOL_IP4,
608 classify_table_index));
610 fib_table_entry_special_dpo_add(fib_index,
619 fib_table_entry_update_one_path(fib_index,
621 FIB_SOURCE_INTERFACE,
622 (FIB_ENTRY_FLAG_CONNECTED |
623 FIB_ENTRY_FLAG_LOCAL),
627 ~0, // invalid FIB index
630 FIB_ROUTE_PATH_FLAG_NONE);
634 ip4_del_interface_routes (ip4_main_t * im,
636 ip4_address_t * address,
640 .fp_len = address_length,
641 .fp_proto = FIB_PROTOCOL_IP4,
642 .fp_addr.ip4 = *address,
647 fib_table_entry_delete(fib_index,
649 FIB_SOURCE_INTERFACE);
653 fib_table_entry_delete(fib_index,
655 FIB_SOURCE_INTERFACE);
659 ip4_sw_interface_enable_disable (u32 sw_if_index,
662 vlib_main_t * vm = vlib_get_main();
663 ip4_main_t * im = &ip4_main;
664 ip_lookup_main_t * lm = &im->lookup_main;
666 u32 lookup_feature_index;
668 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
671 * enable/disable only on the 1<->0 transition
675 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
680 ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
681 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
685 for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
687 ip_config_main_t * cm = &lm->feature_config_mains[cast];
688 vnet_config_main_t * vcm = &cm->config_main;
690 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
691 ci = cm->config_index_by_sw_if_index[sw_if_index];
693 if (cast == VNET_IP_RX_UNICAST_FEAT)
694 lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
696 lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
699 ci = vnet_config_add_feature (vm, vcm,
701 lookup_feature_index,
703 /* # bytes of config data */ 0);
705 ci = vnet_config_del_feature (vm, vcm,
707 lookup_feature_index,
709 /* # bytes of config data */ 0);
710 cm->config_index_by_sw_if_index[sw_if_index] = ci;
714 static clib_error_t *
715 ip4_add_del_interface_address_internal (vlib_main_t * vm,
717 ip4_address_t * address,
721 vnet_main_t * vnm = vnet_get_main();
722 ip4_main_t * im = &ip4_main;
723 ip_lookup_main_t * lm = &im->lookup_main;
724 clib_error_t * error = 0;
725 u32 if_address_index, elts_before;
726 ip4_address_fib_t ip4_af, * addr_fib = 0;
728 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
729 ip4_addr_fib_init (&ip4_af, address,
730 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
731 vec_add1 (addr_fib, ip4_af);
734 * there is no support for adj-fib handling in the presence of overlapping
735 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
740 /* When adding an address check that it does not conflict
741 with an existing address. */
742 ip_interface_address_t * ia;
743 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
744 0 /* honor unnumbered */,
746 ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
748 if (ip4_destination_matches_route (im, address, x, ia->address_length)
749 || ip4_destination_matches_route (im, x, address, address_length))
750 return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
751 format_ip4_address_and_length, address, address_length,
752 format_ip4_address_and_length, x, ia->address_length,
753 format_vnet_sw_if_index_name, vnm, sw_if_index);
757 elts_before = pool_elts (lm->if_address_pool);
759 error = ip_interface_address_add_del
769 ip4_sw_interface_enable_disable(sw_if_index, !is_del);
772 ip4_del_interface_routes (im, ip4_af.fib_index, address,
775 ip4_add_interface_routes (sw_if_index,
776 im, ip4_af.fib_index,
778 (lm->if_address_pool, if_address_index));
780 /* If pool did not grow/shrink: add duplicate address. */
781 if (elts_before != pool_elts (lm->if_address_pool))
783 ip4_add_del_interface_address_callback_t * cb;
784 vec_foreach (cb, im->add_del_interface_address_callbacks)
785 cb->function (im, cb->function_opaque, sw_if_index,
786 address, address_length,
797 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
798 ip4_address_t * address, u32 address_length,
801 return ip4_add_del_interface_address_internal
802 (vm, sw_if_index, address, address_length,
806 /* Built-in ip4 unicast rx feature path definition */
807 VNET_IP4_UNICAST_FEATURE_INIT (ip4_flow_classify, static) = {
808 .node_name = "ip4-flow-classify",
809 .runs_before = ORDER_CONSTRAINTS {"ip4-inacl", 0},
810 .feature_index = &ip4_main.ip4_unicast_rx_feature_flow_classify,
813 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
814 .node_name = "ip4-inacl",
815 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
816 .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
819 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
820 .node_name = "ip4-source-check-via-rx",
821 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
823 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
826 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
827 .node_name = "ip4-source-check-via-any",
828 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
830 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
833 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
834 .node_name = "ip4-source-and-port-range-check-rx",
835 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
837 &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
840 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
841 .node_name = "ip4-policer-classify",
842 .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
844 &ip4_main.ip4_unicast_rx_feature_policer_classify,
847 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
848 .node_name = "ipsec-input-ip4",
849 .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
850 .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
853 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
854 .node_name = "vpath-input-ip4",
855 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
856 .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
859 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
860 .node_name = "ip4-lookup",
861 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
862 .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
865 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
866 .node_name = "ip4-drop",
867 .runs_before = 0, /* not before any other features */
868 .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
872 /* Built-in ip4 multicast rx feature path definition */
873 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
874 .node_name = "vpath-input-ip4",
875 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
876 .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
879 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
880 .node_name = "ip4-lookup-multicast",
881 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
882 .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
885 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
886 .node_name = "ip4-drop",
887 .runs_before = 0, /* last feature */
888 .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
891 static char * rx_feature_start_nodes[] =
892 { "ip4-input", "ip4-input-no-checksum"};
894 static char * tx_feature_start_nodes[] =
896 "ip4-rewrite-transit",
900 /* Source and port-range check ip4 tx feature path definition */
901 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
902 .node_name = "ip4-source-and-port-range-check-tx",
903 .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
905 &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
909 /* Built-in ip4 tx feature path definition */
910 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
911 .node_name = "interface-output",
912 .runs_before = 0, /* not before any other features */
913 .feature_index = &ip4_main.ip4_tx_feature_interface_output,
916 static clib_error_t *
917 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
919 ip_lookup_main_t * lm = &im->lookup_main;
920 clib_error_t * error;
922 ip_config_main_t * cm;
923 vnet_config_main_t * vcm;
924 char **feature_start_nodes;
925 int feature_start_len;
927 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
929 cm = &lm->feature_config_mains[cast];
930 vcm = &cm->config_main;
932 if (cast < VNET_IP_TX_FEAT)
934 feature_start_nodes = rx_feature_start_nodes;
935 feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
939 feature_start_nodes = tx_feature_start_nodes;
940 feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
943 if ((error = vnet_feature_arc_init (vm, vcm,
946 im->next_feature[cast],
947 &im->feature_nodes[cast])))
954 static clib_error_t *
955 ip4_sw_interface_add_del (vnet_main_t * vnm,
959 vlib_main_t * vm = vnm->vlib_main;
960 ip4_main_t * im = &ip4_main;
961 ip_lookup_main_t * lm = &im->lookup_main;
965 /* Fill in lookup tables with default table (0). */
966 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
968 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
970 ip_config_main_t * cm = &lm->feature_config_mains[cast];
971 vnet_config_main_t * vcm = &cm->config_main;
973 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
974 ci = cm->config_index_by_sw_if_index[sw_if_index];
976 if (cast == VNET_IP_RX_UNICAST_FEAT)
977 feature_index = im->ip4_unicast_rx_feature_drop;
978 else if (cast == VNET_IP_RX_MULTICAST_FEAT)
979 feature_index = im->ip4_multicast_rx_feature_drop;
981 feature_index = im->ip4_tx_feature_interface_output;
984 ci = vnet_config_add_feature (vm, vcm,
988 /* # bytes of config data */ 0);
991 ci = vnet_config_del_feature (vm, vcm, ci,
994 /* # bytes of config data */ 0);
995 if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
996 im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
998 cm->config_index_by_sw_if_index[sw_if_index] = ci;
1000 * note: do not update the tx feature count here.
1004 return /* no error */ 0;
1007 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1009 /* Global IP4 main. */
1010 ip4_main_t ip4_main;
1013 ip4_lookup_init (vlib_main_t * vm)
1015 ip4_main_t * im = &ip4_main;
1016 clib_error_t * error;
1019 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1024 m = pow2_mask (i) << (32 - i);
1027 im->fib_masks[i] = clib_host_to_net_u32 (m);
1030 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1032 /* Create FIB with index 0 and table id of 0. */
1033 fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1037 pn = pg_get_node (ip4_lookup_node.index);
1038 pn->unformat_edit = unformat_pg_ip4_header;
1042 ethernet_arp_header_t h;
1044 memset (&h, 0, sizeof (h));
1046 /* Set target ethernet address to all zeros. */
1047 memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1049 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1050 #define _8(f,v) h.f = v;
1051 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1052 _16 (l3_type, ETHERNET_TYPE_IP4);
1053 _8 (n_l2_address_bytes, 6);
1054 _8 (n_l3_address_bytes, 4);
1055 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1059 vlib_packet_template_init (vm,
1060 &im->ip4_arp_request_packet_template,
1063 /* alloc chunk size */ 8,
1067 error = ip4_feature_init (vm, im);
1072 VLIB_INIT_FUNCTION (ip4_lookup_init);
1075 /* Adjacency taken. */
1080 /* Packet data, possibly *after* rewrite. */
1081 u8 packet_data[64 - 1*sizeof(u32)];
1082 } ip4_forward_next_trace_t;
1084 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1086 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1087 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1088 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1089 uword indent = format_get_indent (s);
1090 s = format (s, "%U%U",
1091 format_white_space, indent,
1092 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1096 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1098 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1099 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1100 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1101 uword indent = format_get_indent (s);
1103 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1104 t->fib_index, t->dpo_index, t->flow_hash);
1105 s = format (s, "\n%U%U",
1106 format_white_space, indent,
1107 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1111 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1113 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1114 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1115 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1116 vnet_main_t * vnm = vnet_get_main();
1117 uword indent = format_get_indent (s);
1119 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1120 t->fib_index, t->dpo_index, format_ip_adjacency,
1121 vnm, t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
1123 s = format (s, "\n%U%U",
1124 format_white_space, indent,
1125 format_ip_adjacency_packet_data,
1127 t->packet_data, sizeof (t->packet_data));
1131 /* Common trace function for all ip4-forward next nodes. */
1133 ip4_forward_next_trace (vlib_main_t * vm,
1134 vlib_node_runtime_t * node,
1135 vlib_frame_t * frame,
1136 vlib_rx_or_tx_t which_adj_index)
1139 ip4_main_t * im = &ip4_main;
1141 n_left = frame->n_vectors;
1142 from = vlib_frame_vector_args (frame);
1147 vlib_buffer_t * b0, * b1;
1148 ip4_forward_next_trace_t * t0, * t1;
1150 /* Prefetch next iteration. */
1151 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1152 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1157 b0 = vlib_get_buffer (vm, bi0);
1158 b1 = vlib_get_buffer (vm, bi1);
1160 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1162 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1163 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1164 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1165 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1166 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1167 vec_elt (im->fib_index_by_sw_if_index,
1168 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1170 clib_memcpy (t0->packet_data,
1171 vlib_buffer_get_current (b0),
1172 sizeof (t0->packet_data));
1174 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1176 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1177 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1178 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1179 t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1180 vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1181 vec_elt (im->fib_index_by_sw_if_index,
1182 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1183 clib_memcpy (t1->packet_data,
1184 vlib_buffer_get_current (b1),
1185 sizeof (t1->packet_data));
1195 ip4_forward_next_trace_t * t0;
1199 b0 = vlib_get_buffer (vm, bi0);
1201 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1203 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1204 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1205 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1206 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1207 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1208 vec_elt (im->fib_index_by_sw_if_index,
1209 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1210 clib_memcpy (t0->packet_data,
1211 vlib_buffer_get_current (b0),
1212 sizeof (t0->packet_data));
1220 ip4_drop_or_punt (vlib_main_t * vm,
1221 vlib_node_runtime_t * node,
1222 vlib_frame_t * frame,
1223 ip4_error_t error_code)
1225 u32 * buffers = vlib_frame_vector_args (frame);
1226 uword n_packets = frame->n_vectors;
1228 vlib_error_drop_buffers (vm, node,
1233 ip4_input_node.index,
1236 if (node->flags & VLIB_NODE_FLAG_TRACE)
1237 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1243 ip4_drop (vlib_main_t * vm,
1244 vlib_node_runtime_t * node,
1245 vlib_frame_t * frame)
1246 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1249 ip4_punt (vlib_main_t * vm,
1250 vlib_node_runtime_t * node,
1251 vlib_frame_t * frame)
1252 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1254 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1255 .function = ip4_drop,
1257 .vector_size = sizeof (u32),
1259 .format_trace = format_ip4_forward_next_trace,
1267 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1269 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1270 .function = ip4_punt,
1272 .vector_size = sizeof (u32),
1274 .format_trace = format_ip4_forward_next_trace,
1282 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1284 /* Compute TCP/UDP/ICMP4 checksum in software. */
1286 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1290 u32 ip_header_length, payload_length_host_byte_order;
1291 u32 n_this_buffer, n_bytes_left;
1293 void * data_this_buffer;
1295 /* Initialize checksum with ip header. */
1296 ip_header_length = ip4_header_bytes (ip0);
1297 payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1298 sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1300 if (BITS (uword) == 32)
1302 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1303 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1306 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1308 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1309 data_this_buffer = (void *) ip0 + ip_header_length;
1310 if (n_this_buffer + ip_header_length > p0->current_length)
1311 n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1314 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1315 n_bytes_left -= n_this_buffer;
1316 if (n_bytes_left == 0)
1319 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1320 p0 = vlib_get_buffer (vm, p0->next_buffer);
1321 data_this_buffer = vlib_buffer_get_current (p0);
1322 n_this_buffer = p0->current_length;
1325 sum16 = ~ ip_csum_fold (sum0);
1331 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1333 ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1334 udp_header_t * udp0;
1337 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1338 || ip0->protocol == IP_PROTOCOL_UDP);
1340 udp0 = (void *) (ip0 + 1);
1341 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1343 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1344 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1348 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1350 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1351 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1357 ip4_local (vlib_main_t * vm,
1358 vlib_node_runtime_t * node,
1359 vlib_frame_t * frame)
1361 ip4_main_t * im = &ip4_main;
1362 ip_lookup_main_t * lm = &im->lookup_main;
1363 ip_local_next_t next_index;
1364 u32 * from, * to_next, n_left_from, n_left_to_next;
1365 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1367 from = vlib_frame_vector_args (frame);
1368 n_left_from = frame->n_vectors;
1369 next_index = node->cached_next_index;
1371 if (node->flags & VLIB_NODE_FLAG_TRACE)
1372 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1374 while (n_left_from > 0)
1376 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1378 while (n_left_from >= 4 && n_left_to_next >= 2)
1380 vlib_buffer_t * p0, * p1;
1381 ip4_header_t * ip0, * ip1;
1382 udp_header_t * udp0, * udp1;
1383 ip4_fib_mtrie_t * mtrie0, * mtrie1;
1384 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1385 const dpo_id_t *dpo0, *dpo1;
1386 const load_balance_t *lb0, *lb1;
1387 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1388 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1389 i32 len_diff0, len_diff1;
1390 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1391 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1394 pi0 = to_next[0] = from[0];
1395 pi1 = to_next[1] = from[1];
1399 n_left_to_next -= 2;
1401 p0 = vlib_get_buffer (vm, pi0);
1402 p1 = vlib_get_buffer (vm, pi1);
1404 ip0 = vlib_buffer_get_current (p0);
1405 ip1 = vlib_buffer_get_current (p1);
1407 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1408 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1409 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1410 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1412 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1413 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1415 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1417 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1418 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1420 /* Treat IP frag packets as "experimental" protocol for now
1421 until support of IP frag reassembly is implemented */
1422 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1423 proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1424 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1425 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1426 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1427 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1432 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1433 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1435 udp0 = ip4_next_header (ip0);
1436 udp1 = ip4_next_header (ip1);
1438 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1439 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1440 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1442 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1443 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1445 /* Verify UDP length. */
1446 ip_len0 = clib_net_to_host_u16 (ip0->length);
1447 ip_len1 = clib_net_to_host_u16 (ip1->length);
1448 udp_len0 = clib_net_to_host_u16 (udp0->length);
1449 udp_len1 = clib_net_to_host_u16 (udp1->length);
1451 len_diff0 = ip_len0 - udp_len0;
1452 len_diff1 = ip_len1 - udp_len1;
1454 len_diff0 = is_udp0 ? len_diff0 : 0;
1455 len_diff1 = is_udp1 ? len_diff1 : 0;
1457 if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1458 & good_tcp_udp0 & good_tcp_udp1)))
1463 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1464 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1466 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1467 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1472 && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1473 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1475 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1476 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1480 good_tcp_udp0 &= len_diff0 >= 0;
1481 good_tcp_udp1 &= len_diff1 >= 0;
1483 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1484 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1486 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1488 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1489 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1491 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1492 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1493 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1495 error1 = (is_tcp_udp1 && ! good_tcp_udp1
1496 ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1499 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1500 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1501 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1502 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1504 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1505 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1507 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1508 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1510 lb0 = load_balance_get(lbi0);
1511 lb1 = load_balance_get(lbi1);
1512 dpo0 = load_balance_get_bucket_i(lb0, 0);
1513 dpo1 = load_balance_get_bucket_i(lb1, 0);
1516 * Must have a route to source otherwise we drop the packet.
1517 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1520 * - the source is a recieve => it's from us => bogus, do this
1521 * first since it sets a different error code.
1522 * - uRPF check for any route to source - accept if passes.
1523 * - allow packets destined to the broadcast address from unknown sources
1525 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1526 dpo0->dpoi_type == DPO_RECEIVE) ?
1527 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1529 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1530 !fib_urpf_check_size(lb0->lb_urpf) &&
1531 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1532 ? IP4_ERROR_SRC_LOOKUP_MISS
1534 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1535 dpo1->dpoi_type == DPO_RECEIVE) ?
1536 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1538 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1539 !fib_urpf_check_size(lb1->lb_urpf) &&
1540 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1541 ? IP4_ERROR_SRC_LOOKUP_MISS
1544 next0 = lm->local_next_by_ip_protocol[proto0];
1545 next1 = lm->local_next_by_ip_protocol[proto1];
1547 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1548 next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1550 p0->error = error0 ? error_node->errors[error0] : 0;
1551 p1->error = error1 ? error_node->errors[error1] : 0;
1553 enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1555 if (PREDICT_FALSE (enqueue_code != 0))
1557 switch (enqueue_code)
1563 n_left_to_next += 1;
1564 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1570 n_left_to_next += 1;
1571 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1575 /* A B B or A B C */
1577 n_left_to_next += 2;
1578 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1579 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1582 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1584 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1591 while (n_left_from > 0 && n_left_to_next > 0)
1595 udp_header_t * udp0;
1596 ip4_fib_mtrie_t * mtrie0;
1597 ip4_fib_mtrie_leaf_t leaf0;
1598 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1600 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1601 load_balance_t *lb0;
1602 const dpo_id_t *dpo0;
1604 pi0 = to_next[0] = from[0];
1608 n_left_to_next -= 1;
1610 p0 = vlib_get_buffer (vm, pi0);
1612 ip0 = vlib_buffer_get_current (p0);
1614 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1615 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1617 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1619 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1621 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1623 /* Treat IP frag packets as "experimental" protocol for now
1624 until support of IP frag reassembly is implemented */
1625 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1626 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1627 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1631 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1633 udp0 = ip4_next_header (ip0);
1635 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1636 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1638 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1640 /* Verify UDP length. */
1641 ip_len0 = clib_net_to_host_u16 (ip0->length);
1642 udp_len0 = clib_net_to_host_u16 (udp0->length);
1644 len_diff0 = ip_len0 - udp_len0;
1646 len_diff0 = is_udp0 ? len_diff0 : 0;
1648 if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1653 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1654 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1656 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1657 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1661 good_tcp_udp0 &= len_diff0 >= 0;
1663 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1665 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1667 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1669 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1670 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1671 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1674 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1675 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1677 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1678 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1680 lb0 = load_balance_get(lbi0);
1681 dpo0 = load_balance_get_bucket_i(lb0, 0);
1683 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1684 vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1687 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1688 dpo0->dpoi_type == DPO_RECEIVE) ?
1689 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1691 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1692 !fib_urpf_check_size(lb0->lb_urpf) &&
1693 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1694 ? IP4_ERROR_SRC_LOOKUP_MISS
1697 next0 = lm->local_next_by_ip_protocol[proto0];
1699 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1701 p0->error = error0? error_node->errors[error0] : 0;
1703 if (PREDICT_FALSE (next0 != next_index))
1705 n_left_to_next += 1;
1706 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1709 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1712 n_left_to_next -= 1;
1716 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1719 return frame->n_vectors;
1722 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1723 .function = ip4_local,
1724 .name = "ip4-local",
1725 .vector_size = sizeof (u32),
1727 .format_trace = format_ip4_forward_next_trace,
1729 .n_next_nodes = IP_LOCAL_N_NEXT,
1731 [IP_LOCAL_NEXT_DROP] = "error-drop",
1732 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1733 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1734 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1738 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1740 void ip4_register_protocol (u32 protocol, u32 node_index)
1742 vlib_main_t * vm = vlib_get_main();
1743 ip4_main_t * im = &ip4_main;
1744 ip_lookup_main_t * lm = &im->lookup_main;
1746 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1747 lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1750 static clib_error_t *
1751 show_ip_local_command_fn (vlib_main_t * vm,
1752 unformat_input_t * input,
1753 vlib_cli_command_t * cmd)
1755 ip4_main_t * im = &ip4_main;
1756 ip_lookup_main_t * lm = &im->lookup_main;
1759 vlib_cli_output (vm, "Protocols handled by ip4_local");
1760 for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1762 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1763 vlib_cli_output (vm, "%d", i);
1771 * Display the set of protocols handled by the local IPv4 stack.
1774 * Example of how to display local protocol table:
1775 * @cliexstart{show ip local}
1776 * Protocols handled by ip4_local
1783 VLIB_CLI_COMMAND (show_ip_local, static) = {
1784 .path = "show ip local",
1785 .function = show_ip_local_command_fn,
1786 .short_help = "show ip local",
1791 ip4_arp_inline (vlib_main_t * vm,
1792 vlib_node_runtime_t * node,
1793 vlib_frame_t * frame,
1796 vnet_main_t * vnm = vnet_get_main();
1797 ip4_main_t * im = &ip4_main;
1798 ip_lookup_main_t * lm = &im->lookup_main;
1799 u32 * from, * to_next_drop;
1800 uword n_left_from, n_left_to_next_drop, next_index;
1801 static f64 time_last_seed_change = -1e100;
1802 static u32 hash_seeds[3];
1803 static uword hash_bitmap[256 / BITS (uword)];
1806 if (node->flags & VLIB_NODE_FLAG_TRACE)
1807 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1809 time_now = vlib_time_now (vm);
1810 if (time_now - time_last_seed_change > 1e-3)
1813 u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1814 sizeof (hash_seeds));
1815 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1816 hash_seeds[i] = r[i];
1818 /* Mark all hash keys as been no-seen before. */
1819 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1822 time_last_seed_change = time_now;
1825 from = vlib_frame_vector_args (frame);
1826 n_left_from = frame->n_vectors;
1827 next_index = node->cached_next_index;
1828 if (next_index == IP4_ARP_NEXT_DROP)
1829 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1831 while (n_left_from > 0)
1833 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1834 to_next_drop, n_left_to_next_drop);
1836 while (n_left_from > 0 && n_left_to_next_drop > 0)
1838 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1839 ip_adjacency_t * adj0;
1846 p0 = vlib_get_buffer (vm, pi0);
1848 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1849 adj0 = ip_get_adjacency (lm, adj_index0);
1850 ip0 = vlib_buffer_get_current (p0);
1853 * this is the Glean case, so we are ARPing for the
1854 * packet's destination
1860 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1861 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1865 a0 ^= ip0->dst_address.data_u32;
1869 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1873 hash_v3_finalize32 (a0, b0, c0);
1875 c0 &= BITS (hash_bitmap) - 1;
1876 c0 = c0 / BITS (uword);
1877 m0 = (uword) 1 << (c0 % BITS (uword));
1879 bm0 = hash_bitmap[c0];
1880 drop0 = (bm0 & m0) != 0;
1882 /* Mark it as seen. */
1883 hash_bitmap[c0] = bm0 | m0;
1887 to_next_drop[0] = pi0;
1889 n_left_to_next_drop -= 1;
1891 p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1897 * Can happen if the control-plane is programming tables
1898 * with traffic flowing; at least that's today's lame excuse.
1900 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1901 (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1903 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1906 /* Send ARP request. */
1910 ethernet_arp_header_t * h0;
1911 vnet_hw_interface_t * hw_if0;
1913 h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1915 /* Add rewrite/encap string for ARP packet. */
1916 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1918 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1920 /* Src ethernet address in ARP header. */
1921 clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1922 sizeof (h0->ip4_over_ethernet[0].ethernet));
1926 /* The interface's source address is stashed in the Glean Adj */
1927 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1929 /* Copy in destination address we are requesting. This is the
1930 * glean case, so it's the packet's destination.*/
1931 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1935 /* Src IP address in ARP header. */
1936 if (ip4_src_address_for_packet(lm, sw_if_index0,
1937 &h0->ip4_over_ethernet[0].ip4))
1939 /* No source address available */
1940 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1941 vlib_buffer_free(vm, &bi0, 1);
1945 /* Copy in destination address we are requesting from the
1947 h0->ip4_over_ethernet[1].ip4.data_u32 =
1948 adj0->sub_type.nbr.next_hop.ip4.as_u32;
1951 vlib_buffer_copy_trace_flag (vm, p0, bi0);
1952 b0 = vlib_get_buffer (vm, bi0);
1953 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1955 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1957 vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1961 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1964 return frame->n_vectors;
1968 ip4_arp (vlib_main_t * vm,
1969 vlib_node_runtime_t * node,
1970 vlib_frame_t * frame)
1972 return (ip4_arp_inline(vm, node, frame, 0));
1976 ip4_glean (vlib_main_t * vm,
1977 vlib_node_runtime_t * node,
1978 vlib_frame_t * frame)
1980 return (ip4_arp_inline(vm, node, frame, 1));
1983 static char * ip4_arp_error_strings[] = {
1984 [IP4_ARP_ERROR_DROP] = "address overflow drops",
1985 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1986 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1987 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1988 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1989 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1992 VLIB_REGISTER_NODE (ip4_arp_node) = {
1993 .function = ip4_arp,
1995 .vector_size = sizeof (u32),
1997 .format_trace = format_ip4_forward_next_trace,
1999 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2000 .error_strings = ip4_arp_error_strings,
2002 .n_next_nodes = IP4_ARP_N_NEXT,
2004 [IP4_ARP_NEXT_DROP] = "error-drop",
2008 VLIB_REGISTER_NODE (ip4_glean_node) = {
2009 .function = ip4_glean,
2010 .name = "ip4-glean",
2011 .vector_size = sizeof (u32),
2013 .format_trace = format_ip4_forward_next_trace,
2015 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
2016 .error_strings = ip4_arp_error_strings,
2018 .n_next_nodes = IP4_ARP_N_NEXT,
2020 [IP4_ARP_NEXT_DROP] = "error-drop",
2024 #define foreach_notrace_ip4_arp_error \
2030 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2032 vlib_node_runtime_t *rt =
2033 vlib_node_get_runtime (vm, ip4_arp_node.index);
2035 /* don't trace ARP request packets */
2037 vnet_pcap_drop_trace_filter_add_del \
2038 (rt->errors[IP4_ARP_ERROR_##a], \
2040 foreach_notrace_ip4_arp_error;
2045 VLIB_INIT_FUNCTION(arp_notrace_init);
2048 /* Send an ARP request to see if given destination is reachable on given interface. */
2050 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2052 vnet_main_t * vnm = vnet_get_main();
2053 ip4_main_t * im = &ip4_main;
2054 ethernet_arp_header_t * h;
2055 ip4_address_t * src;
2056 ip_interface_address_t * ia;
2057 ip_adjacency_t * adj;
2058 vnet_hw_interface_t * hi;
2059 vnet_sw_interface_t * si;
2063 si = vnet_get_sw_interface (vnm, sw_if_index);
2065 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2067 return clib_error_return (0, "%U: interface %U down",
2068 format_ip4_address, dst,
2069 format_vnet_sw_if_index_name, vnm,
2073 src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2076 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2077 return clib_error_return
2078 (0, "no matching interface address for destination %U (interface %U)",
2079 format_ip4_address, dst,
2080 format_vnet_sw_if_index_name, vnm, sw_if_index);
2083 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2085 h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2087 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2089 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2091 h->ip4_over_ethernet[0].ip4 = src[0];
2092 h->ip4_over_ethernet[1].ip4 = dst[0];
2094 b = vlib_get_buffer (vm, bi);
2095 vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2097 /* Add encapsulation string for software interface (e.g. ethernet header). */
2098 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2099 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2102 vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2103 u32 * to_next = vlib_frame_vector_args (f);
2106 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2109 return /* no error */ 0;
2113 IP4_REWRITE_NEXT_DROP,
2114 IP4_REWRITE_NEXT_ARP,
2115 IP4_REWRITE_NEXT_ICMP_ERROR,
2116 } ip4_rewrite_next_t;
2119 ip4_rewrite_inline (vlib_main_t * vm,
2120 vlib_node_runtime_t * node,
2121 vlib_frame_t * frame,
2122 int rewrite_for_locally_received_packets,
2125 ip_lookup_main_t * lm = &ip4_main.lookup_main;
2126 u32 * from = vlib_frame_vector_args (frame);
2127 u32 n_left_from, n_left_to_next, * to_next, next_index;
2128 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2129 vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2130 ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2132 n_left_from = frame->n_vectors;
2133 next_index = node->cached_next_index;
2134 u32 cpu_index = os_get_cpu_number();
2136 while (n_left_from > 0)
2138 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2140 while (n_left_from >= 4 && n_left_to_next >= 2)
2142 ip_adjacency_t * adj0, * adj1;
2143 vlib_buffer_t * p0, * p1;
2144 ip4_header_t * ip0, * ip1;
2145 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2146 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2147 u32 next0_override, next1_override;
2148 u32 tx_sw_if_index0, tx_sw_if_index1;
2150 if (rewrite_for_locally_received_packets)
2151 next0_override = next1_override = 0;
2153 /* Prefetch next iteration. */
2155 vlib_buffer_t * p2, * p3;
2157 p2 = vlib_get_buffer (vm, from[2]);
2158 p3 = vlib_get_buffer (vm, from[3]);
2160 vlib_prefetch_buffer_header (p2, STORE);
2161 vlib_prefetch_buffer_header (p3, STORE);
2163 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2164 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2167 pi0 = to_next[0] = from[0];
2168 pi1 = to_next[1] = from[1];
2173 n_left_to_next -= 2;
2175 p0 = vlib_get_buffer (vm, pi0);
2176 p1 = vlib_get_buffer (vm, pi1);
2178 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2179 adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2181 /* We should never rewrite a pkt using the MISS adjacency */
2182 ASSERT(adj_index0 && adj_index1);
2184 ip0 = vlib_buffer_get_current (p0);
2185 ip1 = vlib_buffer_get_current (p1);
2187 error0 = error1 = IP4_ERROR_NONE;
2188 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2190 /* Decrement TTL & update checksum.
2191 Works either endian, so no need for byte swap. */
2192 if (! rewrite_for_locally_received_packets)
2194 i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2196 /* Input node should have reject packets with ttl 0. */
2197 ASSERT (ip0->ttl > 0);
2198 ASSERT (ip1->ttl > 0);
2200 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2201 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2203 checksum0 += checksum0 >= 0xffff;
2204 checksum1 += checksum1 >= 0xffff;
2206 ip0->checksum = checksum0;
2207 ip1->checksum = checksum1;
2216 * If the ttl drops below 1 when forwarding, generate
2219 if (PREDICT_FALSE(ttl0 <= 0))
2221 error0 = IP4_ERROR_TIME_EXPIRED;
2222 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2223 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2224 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2225 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2227 if (PREDICT_FALSE(ttl1 <= 0))
2229 error1 = IP4_ERROR_TIME_EXPIRED;
2230 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2231 icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2232 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2233 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2236 /* Verify checksum. */
2237 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2238 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2241 /* Rewrite packet header and updates lengths. */
2242 adj0 = ip_get_adjacency (lm, adj_index0);
2243 adj1 = ip_get_adjacency (lm, adj_index1);
2245 if (rewrite_for_locally_received_packets)
2247 if (PREDICT_FALSE(adj0->lookup_next_index
2248 == IP_LOOKUP_NEXT_ARP))
2249 next0_override = IP4_REWRITE_NEXT_ARP;
2250 if (PREDICT_FALSE(adj1->lookup_next_index
2251 == IP_LOOKUP_NEXT_ARP))
2252 next1_override = IP4_REWRITE_NEXT_ARP;
2255 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2256 rw_len0 = adj0[0].rewrite_header.data_bytes;
2257 rw_len1 = adj1[0].rewrite_header.data_bytes;
2258 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2259 vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2261 /* Check MTU of outgoing interface. */
2262 error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2263 ? IP4_ERROR_MTU_EXCEEDED
2265 error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2266 ? IP4_ERROR_MTU_EXCEEDED
2269 next0 = (error0 == IP4_ERROR_NONE)
2270 ? adj0[0].rewrite_header.next_index : next0;
2272 if (rewrite_for_locally_received_packets)
2273 next0 = next0 && next0_override ? next0_override : next0;
2275 next1 = (error1 == IP4_ERROR_NONE)
2276 ? adj1[0].rewrite_header.next_index : next1;
2278 if (rewrite_for_locally_received_packets)
2279 next1 = next1 && next1_override ? next1_override : next1;
2282 * We've already accounted for an ethernet_header_t elsewhere
2284 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2285 vlib_increment_combined_counter
2286 (&adjacency_counters,
2287 cpu_index, adj_index0,
2288 /* packet increment */ 0,
2289 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2291 if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2292 vlib_increment_combined_counter
2293 (&adjacency_counters,
2294 cpu_index, adj_index1,
2295 /* packet increment */ 0,
2296 /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2298 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2299 * to see the IP headerr */
2300 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2302 p0->current_data -= rw_len0;
2303 p0->current_length += rw_len0;
2304 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2305 vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2309 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2312 p0->current_config_index =
2313 vec_elt (cm->config_index_by_sw_if_index,
2315 vnet_get_config_data (&cm->config_main,
2316 &p0->current_config_index,
2318 /* # bytes of config data */ 0);
2321 if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2323 p1->current_data -= rw_len1;
2324 p1->current_length += rw_len1;
2326 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2327 vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2331 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2334 p1->current_config_index =
2335 vec_elt (cm->config_index_by_sw_if_index,
2337 vnet_get_config_data (&cm->config_main,
2338 &p1->current_config_index,
2340 /* # bytes of config data */ 0);
2344 /* Guess we are only writing on simple Ethernet header. */
2345 vnet_rewrite_two_headers (adj0[0], adj1[0],
2347 sizeof (ethernet_header_t));
2351 adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2352 adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2355 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2356 to_next, n_left_to_next,
2357 pi0, pi1, next0, next1);
2360 while (n_left_from > 0 && n_left_to_next > 0)
2362 ip_adjacency_t * adj0;
2365 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2367 u32 tx_sw_if_index0;
2369 if (rewrite_for_locally_received_packets)
2372 pi0 = to_next[0] = from[0];
2374 p0 = vlib_get_buffer (vm, pi0);
2376 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2378 /* We should never rewrite a pkt using the MISS adjacency */
2381 adj0 = ip_get_adjacency (lm, adj_index0);
2383 ip0 = vlib_buffer_get_current (p0);
2385 error0 = IP4_ERROR_NONE;
2386 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2388 /* Decrement TTL & update checksum. */
2389 if (! rewrite_for_locally_received_packets)
2391 i32 ttl0 = ip0->ttl;
2393 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2395 checksum0 += checksum0 >= 0xffff;
2397 ip0->checksum = checksum0;
2399 ASSERT (ip0->ttl > 0);
2405 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2407 if (PREDICT_FALSE(ttl0 <= 0))
2410 * If the ttl drops below 1 when forwarding, generate
2413 error0 = IP4_ERROR_TIME_EXPIRED;
2414 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2415 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2416 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2417 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2421 if (rewrite_for_locally_received_packets)
2424 * We have to override the next_index in ARP adjacencies,
2425 * because they're set up for ip4-arp, not this node...
2427 if (PREDICT_FALSE(adj0->lookup_next_index
2428 == IP_LOOKUP_NEXT_ARP))
2429 next0_override = IP4_REWRITE_NEXT_ARP;
2432 /* Guess we are only writing on simple Ethernet header. */
2433 vnet_rewrite_one_header (adj0[0], ip0,
2434 sizeof (ethernet_header_t));
2436 /* Update packet buffer attributes/set output interface. */
2437 rw_len0 = adj0[0].rewrite_header.data_bytes;
2438 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2440 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2441 vlib_increment_combined_counter
2442 (&adjacency_counters,
2443 cpu_index, adj_index0,
2444 /* packet increment */ 0,
2445 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2447 /* Check MTU of outgoing interface. */
2448 error0 = (vlib_buffer_length_in_chain (vm, p0)
2449 > adj0[0].rewrite_header.max_l3_packet_bytes
2450 ? IP4_ERROR_MTU_EXCEEDED
2453 p0->error = error_node->errors[error0];
2455 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2456 * to see the IP headerr */
2457 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2459 p0->current_data -= rw_len0;
2460 p0->current_length += rw_len0;
2461 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2463 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2464 next0 = adj0[0].rewrite_header.next_index;
2468 adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2472 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2475 p0->current_config_index =
2476 vec_elt (cm->config_index_by_sw_if_index,
2478 vnet_get_config_data (&cm->config_main,
2479 &p0->current_config_index,
2481 /* # bytes of config data */ 0);
2485 if (rewrite_for_locally_received_packets)
2486 next0 = next0 && next0_override ? next0_override : next0;
2491 n_left_to_next -= 1;
2493 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2494 to_next, n_left_to_next,
2498 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2501 /* Need to do trace after rewrites to pick up new packet data. */
2502 if (node->flags & VLIB_NODE_FLAG_TRACE)
2503 ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2505 return frame->n_vectors;
2509 /** @brief IPv4 transit rewrite node.
2510 @node ip4-rewrite-transit
2512 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2513 header checksum, fetch the ip adjacency, check the outbound mtu,
2514 apply the adjacency rewrite, and send pkts to the adjacency
2515 rewrite header's rewrite_next_index.
2517 @param vm vlib_main_t corresponding to the current thread
2518 @param node vlib_node_runtime_t
2519 @param frame vlib_frame_t whose contents should be dispatched
2521 @par Graph mechanics: buffer metadata, next index usage
2524 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2525 - the rewrite adjacency index
2526 - <code>adj->lookup_next_index</code>
2527 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2528 the packet will be dropped.
2529 - <code>adj->rewrite_header</code>
2530 - Rewrite string length, rewrite string, next_index
2533 - <code>b->current_data, b->current_length</code>
2534 - Updated net of applying the rewrite string
2536 <em>Next Indices:</em>
2537 - <code> adj->rewrite_header.next_index </code>
2541 ip4_rewrite_transit (vlib_main_t * vm,
2542 vlib_node_runtime_t * node,
2543 vlib_frame_t * frame)
2545 return ip4_rewrite_inline (vm, node, frame,
2546 /* rewrite_for_locally_received_packets */ 0, 0);
2549 /** @brief IPv4 local rewrite node.
2550 @node ip4-rewrite-local
2552 This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2553 the outbound interface mtu, apply the adjacency rewrite, and send
2554 pkts to the adjacency rewrite header's rewrite_next_index. Deal
2555 with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2556 dst = interface addr."
2558 @param vm vlib_main_t corresponding to the current thread
2559 @param node vlib_node_runtime_t
2560 @param frame vlib_frame_t whose contents should be dispatched
2562 @par Graph mechanics: buffer metadata, next index usage
2565 - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2566 - the rewrite adjacency index
2567 - <code>adj->lookup_next_index</code>
2568 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2569 the packet will be dropped.
2570 - <code>adj->rewrite_header</code>
2571 - Rewrite string length, rewrite string, next_index
2574 - <code>b->current_data, b->current_length</code>
2575 - Updated net of applying the rewrite string
2577 <em>Next Indices:</em>
2578 - <code> adj->rewrite_header.next_index </code>
2583 ip4_rewrite_local (vlib_main_t * vm,
2584 vlib_node_runtime_t * node,
2585 vlib_frame_t * frame)
2587 return ip4_rewrite_inline (vm, node, frame,
2588 /* rewrite_for_locally_received_packets */ 1, 0);
2592 ip4_midchain (vlib_main_t * vm,
2593 vlib_node_runtime_t * node,
2594 vlib_frame_t * frame)
2596 return ip4_rewrite_inline (vm, node, frame,
2597 /* rewrite_for_locally_received_packets */ 0, 1);
2600 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2601 .function = ip4_rewrite_transit,
2602 .name = "ip4-rewrite-transit",
2603 .vector_size = sizeof (u32),
2605 .format_trace = format_ip4_rewrite_trace,
2609 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2610 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2611 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2615 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2617 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2618 .function = ip4_midchain,
2619 .name = "ip4-midchain",
2620 .vector_size = sizeof (u32),
2622 .format_trace = format_ip4_forward_next_trace,
2624 .sibling_of = "ip4-rewrite-transit",
2627 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2629 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2630 .function = ip4_rewrite_local,
2631 .name = "ip4-rewrite-local",
2632 .vector_size = sizeof (u32),
2634 .sibling_of = "ip4-rewrite-transit",
2636 .format_trace = format_ip4_rewrite_trace,
2641 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2643 static clib_error_t *
2644 add_del_interface_table (vlib_main_t * vm,
2645 unformat_input_t * input,
2646 vlib_cli_command_t * cmd)
2648 vnet_main_t * vnm = vnet_get_main();
2649 clib_error_t * error = 0;
2650 u32 sw_if_index, table_id;
2654 if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2656 error = clib_error_return (0, "unknown interface `%U'",
2657 format_unformat_error, input);
2661 if (unformat (input, "%d", &table_id))
2665 error = clib_error_return (0, "expected table id `%U'",
2666 format_unformat_error, input);
2671 ip4_main_t * im = &ip4_main;
2674 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2679 // changing an interface's table has consequences for any connecteds
2680 // and adj-fibs already installed.
2682 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2683 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2691 * Place the indicated interface into the supplied IPv4 FIB table (also known
2692 * as a VRF). If the FIB table does not exist, this command creates it. To
2693 * display the current IPv4 FIB table, use the command '<em>show ip fib</em>'.
2694 * FIB table will only be displayed if a route has been added to the table, or
2695 * an IP Address is assigned to an interface in the table (which adds a route
2696 * automatically), or '<em>include-empty</em>' is included.
2698 * @note IP addresses added after setting the interface IP table end up in
2699 * the indicated FIB table. If the IP address is added prior to adding the
2700 * interface to the FIB table, it will NOT be part of the FIB table. Predictable
2701 * but potentially counter-intuitive results occur if you provision interface
2702 * addresses in multiple FIBs. Upon RX, packets will be processed in the last
2703 * IP table ID provisioned. It might be marginally useful to evade source RPF
2704 * drops to put an interface address into multiple FIBs.
2707 * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
2708 * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
2711 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2712 .path = "set interface ip table",
2713 .function = add_del_interface_table,
2714 .short_help = "set interface ip table <interface> <table-id>",
2720 ip4_lookup_multicast (vlib_main_t * vm,
2721 vlib_node_runtime_t * node,
2722 vlib_frame_t * frame)
2724 ip4_main_t * im = &ip4_main;
2725 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2726 u32 n_left_from, n_left_to_next, * from, * to_next;
2727 ip_lookup_next_t next;
2728 u32 cpu_index = os_get_cpu_number();
2730 from = vlib_frame_vector_args (frame);
2731 n_left_from = frame->n_vectors;
2732 next = node->cached_next_index;
2734 while (n_left_from > 0)
2736 vlib_get_next_frame (vm, node, next,
2737 to_next, n_left_to_next);
2739 while (n_left_from >= 4 && n_left_to_next >= 2)
2741 vlib_buffer_t * p0, * p1;
2742 u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2743 ip_lookup_next_t next0, next1;
2744 ip4_header_t * ip0, * ip1;
2745 u32 fib_index0, fib_index1;
2746 const dpo_id_t *dpo0, *dpo1;
2747 const load_balance_t * lb0, * lb1;
2749 /* Prefetch next iteration. */
2751 vlib_buffer_t * p2, * p3;
2753 p2 = vlib_get_buffer (vm, from[2]);
2754 p3 = vlib_get_buffer (vm, from[3]);
2756 vlib_prefetch_buffer_header (p2, LOAD);
2757 vlib_prefetch_buffer_header (p3, LOAD);
2759 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2760 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2763 pi0 = to_next[0] = from[0];
2764 pi1 = to_next[1] = from[1];
2766 p0 = vlib_get_buffer (vm, pi0);
2767 p1 = vlib_get_buffer (vm, pi1);
2769 ip0 = vlib_buffer_get_current (p0);
2770 ip1 = vlib_buffer_get_current (p1);
2772 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2773 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2774 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2775 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2776 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2777 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2779 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2781 lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2784 lb0 = load_balance_get (lb_index0);
2785 lb1 = load_balance_get (lb_index1);
2787 ASSERT (lb0->lb_n_buckets > 0);
2788 ASSERT (is_pow2 (lb0->lb_n_buckets));
2789 ASSERT (lb1->lb_n_buckets > 0);
2790 ASSERT (is_pow2 (lb1->lb_n_buckets));
2792 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2793 (ip0, lb0->lb_hash_config);
2795 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2796 (ip1, lb1->lb_hash_config);
2798 dpo0 = load_balance_get_bucket_i(lb0,
2799 (vnet_buffer (p0)->ip.flow_hash &
2800 (lb0->lb_n_buckets_minus_1)));
2801 dpo1 = load_balance_get_bucket_i(lb1,
2802 (vnet_buffer (p1)->ip.flow_hash &
2803 (lb0->lb_n_buckets_minus_1)));
2805 next0 = dpo0->dpoi_next_node;
2806 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2807 next1 = dpo1->dpoi_next_node;
2808 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2810 if (1) /* $$$$$$ HACK FIXME */
2811 vlib_increment_combined_counter
2812 (cm, cpu_index, lb_index0, 1,
2813 vlib_buffer_length_in_chain (vm, p0));
2814 if (1) /* $$$$$$ HACK FIXME */
2815 vlib_increment_combined_counter
2816 (cm, cpu_index, lb_index1, 1,
2817 vlib_buffer_length_in_chain (vm, p1));
2821 n_left_to_next -= 2;
2824 wrong_next = (next0 != next) + 2*(next1 != next);
2825 if (PREDICT_FALSE (wrong_next != 0))
2833 n_left_to_next += 1;
2834 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2840 n_left_to_next += 1;
2841 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2847 n_left_to_next += 2;
2848 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2849 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2853 vlib_put_next_frame (vm, node, next, n_left_to_next);
2855 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2861 while (n_left_from > 0 && n_left_to_next > 0)
2866 ip_lookup_next_t next0;
2868 const dpo_id_t *dpo0;
2869 const load_balance_t * lb0;
2874 p0 = vlib_get_buffer (vm, pi0);
2876 ip0 = vlib_buffer_get_current (p0);
2878 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2879 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2880 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2881 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2883 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2886 lb0 = load_balance_get (lb_index0);
2888 ASSERT (lb0->lb_n_buckets > 0);
2889 ASSERT (is_pow2 (lb0->lb_n_buckets));
2891 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2892 (ip0, lb0->lb_hash_config);
2894 dpo0 = load_balance_get_bucket_i(lb0,
2895 (vnet_buffer (p0)->ip.flow_hash &
2896 (lb0->lb_n_buckets_minus_1)));
2898 next0 = dpo0->dpoi_next_node;
2899 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2901 if (1) /* $$$$$$ HACK FIXME */
2902 vlib_increment_combined_counter
2903 (cm, cpu_index, lb_index0, 1,
2904 vlib_buffer_length_in_chain (vm, p0));
2908 n_left_to_next -= 1;
2911 if (PREDICT_FALSE (next0 != next))
2913 n_left_to_next += 1;
2914 vlib_put_next_frame (vm, node, next, n_left_to_next);
2916 vlib_get_next_frame (vm, node, next,
2917 to_next, n_left_to_next);
2920 n_left_to_next -= 1;
2924 vlib_put_next_frame (vm, node, next, n_left_to_next);
2927 if (node->flags & VLIB_NODE_FLAG_TRACE)
2928 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2930 return frame->n_vectors;
2933 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2934 .function = ip4_lookup_multicast,
2935 .name = "ip4-lookup-multicast",
2936 .vector_size = sizeof (u32),
2937 .sibling_of = "ip4-lookup",
2938 .format_trace = format_ip4_lookup_trace,
2943 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2945 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2946 .function = ip4_drop,
2947 .name = "ip4-multicast",
2948 .vector_size = sizeof (u32),
2950 .format_trace = format_ip4_forward_next_trace,
2958 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2960 ip4_fib_mtrie_t * mtrie0;
2961 ip4_fib_mtrie_leaf_t leaf0;
2964 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2966 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2967 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2968 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2969 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2970 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2972 /* Handle default route. */
2973 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2975 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2977 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2980 static clib_error_t *
2981 test_lookup_command_fn (vlib_main_t * vm,
2982 unformat_input_t * input,
2983 vlib_cli_command_t * cmd)
2989 ip4_address_t ip4_base_address;
2992 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2993 if (unformat (input, "table %d", &table_id))
2995 else if (unformat (input, "count %f", &count))
2998 else if (unformat (input, "%U",
2999 unformat_ip4_address, &ip4_base_address))
3002 return clib_error_return (0, "unknown input `%U'",
3003 format_unformat_error, input);
3008 for (i = 0; i < n; i++)
3010 if (!ip4_lookup_validate (&ip4_base_address, table_id))
3013 ip4_base_address.as_u32 =
3014 clib_host_to_net_u32 (1 +
3015 clib_net_to_host_u32 (ip4_base_address.as_u32));
3019 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
3021 vlib_cli_output (vm, "No errors in %d lookups\n", n);
3027 * Perform a lookup of an IPv4 Address (or range of addresses) in the
3028 * given FIB table to determine if there is a conflict with the
3029 * adjacency table. The fib-id can be determined by using the
3030 * '<em>show ip fib</em>' command. If fib-id is not entered, default value
3033 * @todo This command uses fib-id, other commands use table-id (not
3034 * just a name, they are different indexes). Would like to change this
3035 * to table-id for consistency.
3038 * Example of how to run the test lookup command:
3039 * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
3040 * No errors in 2 lookups
3044 VLIB_CLI_COMMAND (lookup_test_command, static) = {
3045 .path = "test lookup",
3046 .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
3047 .function = test_lookup_command_fn,
3051 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3053 ip4_main_t * im4 = &ip4_main;
3055 uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3058 return VNET_API_ERROR_NO_SUCH_FIB;
3060 fib = ip4_fib_get (p[0]);
3062 fib->flow_hash_config = flow_hash_config;
3066 static clib_error_t *
3067 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3068 unformat_input_t * input,
3069 vlib_cli_command_t * cmd)
3073 u32 flow_hash_config = 0;
3076 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3077 if (unformat (input, "table %d", &table_id))
3080 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3081 foreach_flow_hash_bit
3087 return clib_error_return (0, "unknown input `%U'",
3088 format_unformat_error, input);
3090 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3096 case VNET_API_ERROR_NO_SUCH_FIB:
3097 return clib_error_return (0, "no such FIB table %d", table_id);
3100 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3108 * Configure the set of IPv4 fields used by the flow hash.
3111 * Example of how to set the flow hash on a given table:
3112 * @cliexcmd{set ip flow-hash table 7 dst sport dport proto}
3113 * Example of display the configured flow hash:
3114 * @cliexstart{show ip fib}
3115 * Table 0, fib_index 0, flow hash: src dst sport dport proto
3116 * Destination Packets Bytes Adjacency
3117 * 172.16.2.0/24 0 0 weight 1, index 5
3119 * 172.16.2.1/32 0 0 weight 1, index 6
3121 * Table 7, fib_index 1, flow hash: dst sport dport proto
3122 * Destination Packets Bytes Adjacency
3123 * 172.16.1.0/24 0 0 weight 1, index 3
3125 * 172.16.1.1/32 1 98 weight 1, index 4
3127 * 172.16.1.2/32 0 0 weight 1, index 7
3128 * GigabitEthernet2/0/0
3129 * IP4: 02:fe:6a:07:39:6f -> 16:d9:e0:91:79:86
3133 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3134 .path = "set ip flow-hash",
3136 "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
3137 .function = set_ip_flow_hash_command_fn,
3141 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3144 vnet_main_t * vnm = vnet_get_main();
3145 vnet_interface_main_t * im = &vnm->interface_main;
3146 ip4_main_t * ipm = &ip4_main;
3147 ip_lookup_main_t * lm = &ipm->lookup_main;
3148 vnet_classify_main_t * cm = &vnet_classify_main;
3149 ip4_address_t *if_addr;
3151 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3152 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3154 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3155 return VNET_API_ERROR_NO_SUCH_ENTRY;
3157 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3158 lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3160 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3162 if (NULL != if_addr)
3164 fib_prefix_t pfx = {
3166 .fp_proto = FIB_PROTOCOL_IP4,
3167 .fp_addr.ip4 = *if_addr,
3171 fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3175 if (table_index != (u32) ~0)
3177 dpo_id_t dpo = DPO_NULL;
3182 classify_dpo_create(FIB_PROTOCOL_IP4,
3185 fib_table_entry_special_dpo_add(fib_index,
3187 FIB_SOURCE_CLASSIFY,
3188 FIB_ENTRY_FLAG_NONE,
3194 fib_table_entry_special_remove(fib_index,
3196 FIB_SOURCE_CLASSIFY);
3203 static clib_error_t *
3204 set_ip_classify_command_fn (vlib_main_t * vm,
3205 unformat_input_t * input,
3206 vlib_cli_command_t * cmd)
3208 u32 table_index = ~0;
3209 int table_index_set = 0;
3210 u32 sw_if_index = ~0;
3213 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3214 if (unformat (input, "table-index %d", &table_index))
3215 table_index_set = 1;
3216 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3217 vnet_get_main(), &sw_if_index))
3223 if (table_index_set == 0)
3224 return clib_error_return (0, "classify table-index must be specified");
3226 if (sw_if_index == ~0)
3227 return clib_error_return (0, "interface / subif must be specified");
3229 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3236 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3237 return clib_error_return (0, "No such interface");
3239 case VNET_API_ERROR_NO_SUCH_ENTRY:
3240 return clib_error_return (0, "No such classifier table");
3246 * Assign a classification table to an interface. The classification
3247 * table is created using the '<em>classify table</em>' and '<em>classify session</em>'
3248 * commands. Once the table is create, use this command to filter packets
3252 * Example of how to assign a classification table to an interface:
3253 * @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
3256 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3257 .path = "set ip classify",
3259 "set ip classify intfc <interface> table-index <classify-idx>",
3260 .function = set_ip_classify_command_fn,