2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
55 ip4_forward_next_trace (vlib_main_t * vm,
56 vlib_node_runtime_t * node,
58 vlib_rx_or_tx_t which_adj_index);
61 ip4_lookup_inline (vlib_main_t * vm,
62 vlib_node_runtime_t * node,
64 int lookup_for_responses_to_locally_received_packets)
66 ip4_main_t * im = &ip4_main;
67 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
68 u32 n_left_from, n_left_to_next, * from, * to_next;
69 ip_lookup_next_t next;
70 u32 cpu_index = os_get_cpu_number();
72 from = vlib_frame_vector_args (frame);
73 n_left_from = frame->n_vectors;
74 next = node->cached_next_index;
76 while (n_left_from > 0)
78 vlib_get_next_frame (vm, node, next,
79 to_next, n_left_to_next);
81 while (n_left_from >= 4 && n_left_to_next >= 2)
83 vlib_buffer_t * p0, * p1;
84 ip4_header_t * ip0, * ip1;
85 __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
86 ip_lookup_next_t next0, next1;
87 const load_balance_t * lb0, * lb1;
88 ip4_fib_mtrie_t * mtrie0, * mtrie1;
89 ip4_fib_mtrie_leaf_t leaf0, leaf1;
90 ip4_address_t * dst_addr0, *dst_addr1;
91 __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
92 __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
93 flow_hash_config_t flow_hash_config0, flow_hash_config1;
96 const dpo_id_t *dpo0, *dpo1;
98 /* Prefetch next iteration. */
100 vlib_buffer_t * p2, * p3;
102 p2 = vlib_get_buffer (vm, from[2]);
103 p3 = vlib_get_buffer (vm, from[3]);
105 vlib_prefetch_buffer_header (p2, LOAD);
106 vlib_prefetch_buffer_header (p3, LOAD);
108 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
109 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
112 pi0 = to_next[0] = from[0];
113 pi1 = to_next[1] = from[1];
115 p0 = vlib_get_buffer (vm, pi0);
116 p1 = vlib_get_buffer (vm, pi1);
118 ip0 = vlib_buffer_get_current (p0);
119 ip1 = vlib_buffer_get_current (p1);
121 dst_addr0 = &ip0->dst_address;
122 dst_addr1 = &ip1->dst_address;
124 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
125 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
126 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
127 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
128 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
129 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
132 if (! lookup_for_responses_to_locally_received_packets)
134 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
135 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
137 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
139 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
140 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
143 tcp0 = (void *) (ip0 + 1);
144 tcp1 = (void *) (ip1 + 1);
146 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
147 || ip0->protocol == IP_PROTOCOL_UDP);
148 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
149 || ip1->protocol == IP_PROTOCOL_UDP);
151 if (! lookup_for_responses_to_locally_received_packets)
153 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
154 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
157 if (! lookup_for_responses_to_locally_received_packets)
159 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
160 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
163 if (! lookup_for_responses_to_locally_received_packets)
165 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
166 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
169 if (lookup_for_responses_to_locally_received_packets)
171 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
172 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
176 /* Handle default route. */
177 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
178 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
180 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
181 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
184 lb0 = load_balance_get (lb_index0);
185 lb1 = load_balance_get (lb_index1);
187 /* Use flow hash to compute multipath adjacency. */
188 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
189 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
190 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
192 flow_hash_config0 = lb0->lb_hash_config;
193 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
194 ip4_compute_flow_hash (ip0, flow_hash_config0);
196 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
198 flow_hash_config1 = lb1->lb_hash_config;
199 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
200 ip4_compute_flow_hash (ip1, flow_hash_config1);
203 ASSERT (lb0->lb_n_buckets > 0);
204 ASSERT (is_pow2 (lb0->lb_n_buckets));
205 ASSERT (lb1->lb_n_buckets > 0);
206 ASSERT (is_pow2 (lb1->lb_n_buckets));
208 dpo0 = load_balance_get_bucket_i(lb0,
210 (lb0->lb_n_buckets_minus_1)));
211 dpo1 = load_balance_get_bucket_i(lb1,
213 (lb0->lb_n_buckets_minus_1)));
215 next0 = dpo0->dpoi_next_node;
216 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
217 next1 = dpo1->dpoi_next_node;
218 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
220 vlib_increment_combined_counter
221 (cm, cpu_index, lb_index0, 1,
222 vlib_buffer_length_in_chain (vm, p0)
223 + sizeof(ethernet_header_t));
224 vlib_increment_combined_counter
225 (cm, cpu_index, lb_index1, 1,
226 vlib_buffer_length_in_chain (vm, p1)
227 + sizeof(ethernet_header_t));
234 wrong_next = (next0 != next) + 2*(next1 != next);
235 if (PREDICT_FALSE (wrong_next != 0))
244 vlib_set_next_frame_buffer (vm, node, next0, pi0);
251 vlib_set_next_frame_buffer (vm, node, next1, pi1);
258 vlib_set_next_frame_buffer (vm, node, next0, pi0);
259 vlib_set_next_frame_buffer (vm, node, next1, pi1);
263 vlib_put_next_frame (vm, node, next, n_left_to_next);
265 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
271 while (n_left_from > 0 && n_left_to_next > 0)
275 __attribute__((unused)) tcp_header_t * tcp0;
276 ip_lookup_next_t next0;
277 const load_balance_t *lb0;
278 ip4_fib_mtrie_t * mtrie0;
279 ip4_fib_mtrie_leaf_t leaf0;
280 ip4_address_t * dst_addr0;
281 __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
282 flow_hash_config_t flow_hash_config0;
283 const dpo_id_t *dpo0;
289 p0 = vlib_get_buffer (vm, pi0);
291 ip0 = vlib_buffer_get_current (p0);
293 dst_addr0 = &ip0->dst_address;
295 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
296 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
297 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
299 if (! lookup_for_responses_to_locally_received_packets)
301 mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
303 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
305 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
308 tcp0 = (void *) (ip0 + 1);
310 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
311 || ip0->protocol == IP_PROTOCOL_UDP);
313 if (! lookup_for_responses_to_locally_received_packets)
314 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
316 if (! lookup_for_responses_to_locally_received_packets)
317 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
319 if (! lookup_for_responses_to_locally_received_packets)
320 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
322 if (lookup_for_responses_to_locally_received_packets)
323 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
326 /* Handle default route. */
327 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
328 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
331 lb0 = load_balance_get (lbi0);
333 /* Use flow hash to compute multipath adjacency. */
334 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
335 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
337 flow_hash_config0 = lb0->lb_hash_config;
339 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
340 ip4_compute_flow_hash (ip0, flow_hash_config0);
343 ASSERT (lb0->lb_n_buckets > 0);
344 ASSERT (is_pow2 (lb0->lb_n_buckets));
346 dpo0 = load_balance_get_bucket_i(lb0,
348 (lb0->lb_n_buckets_minus_1)));
350 next0 = dpo0->dpoi_next_node;
351 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
353 vlib_increment_combined_counter
354 (cm, cpu_index, lbi0, 1,
355 vlib_buffer_length_in_chain (vm, p0));
362 if (PREDICT_FALSE (next0 != next))
365 vlib_put_next_frame (vm, node, next, n_left_to_next);
367 vlib_get_next_frame (vm, node, next,
368 to_next, n_left_to_next);
375 vlib_put_next_frame (vm, node, next, n_left_to_next);
378 if (node->flags & VLIB_NODE_FLAG_TRACE)
379 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
381 return frame->n_vectors;
384 /** @brief IPv4 lookup node.
387 This is the main IPv4 lookup dispatch node.
389 @param vm vlib_main_t corresponding to the current thread
390 @param node vlib_node_runtime_t
391 @param frame vlib_frame_t whose contents should be dispatched
393 @par Graph mechanics: buffer metadata, next index usage
396 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
397 - Indicates the @c sw_if_index value of the interface that the
398 packet was received on.
399 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
400 - When the value is @c ~0 then the node performs a longest prefix
401 match (LPM) for the packet destination address in the FIB attached
402 to the receive interface.
403 - Otherwise perform LPM for the packet destination address in the
404 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
405 value (0, 1, ...) and not a VRF id.
408 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
409 - The lookup result adjacency index.
412 - Dispatches the packet to the node index found in
413 ip_adjacency_t @c adj->lookup_next_index
414 (where @c adj is the lookup result adjacency).
417 ip4_lookup (vlib_main_t * vm,
418 vlib_node_runtime_t * node,
419 vlib_frame_t * frame)
421 return ip4_lookup_inline (vm, node, frame,
422 /* lookup_for_responses_to_locally_received_packets */ 0);
426 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
428 VLIB_REGISTER_NODE (ip4_lookup_node) = {
429 .function = ip4_lookup,
430 .name = "ip4-lookup",
431 .vector_size = sizeof (u32),
433 .format_trace = format_ip4_lookup_trace,
434 .n_next_nodes = IP_LOOKUP_N_NEXT,
435 .next_nodes = IP4_LOOKUP_NEXT_NODES,
438 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
441 ip4_load_balance (vlib_main_t * vm,
442 vlib_node_runtime_t * node,
443 vlib_frame_t * frame)
445 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
446 u32 n_left_from, n_left_to_next, * from, * to_next;
447 ip_lookup_next_t next;
448 u32 cpu_index = os_get_cpu_number();
450 from = vlib_frame_vector_args (frame);
451 n_left_from = frame->n_vectors;
452 next = node->cached_next_index;
454 if (node->flags & VLIB_NODE_FLAG_TRACE)
455 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
457 while (n_left_from > 0)
459 vlib_get_next_frame (vm, node, next,
460 to_next, n_left_to_next);
463 while (n_left_from > 0 && n_left_to_next > 0)
465 ip_lookup_next_t next0;
466 const load_balance_t *lb0;
469 const ip4_header_t *ip0;
470 const dpo_id_t *dpo0;
475 p0 = vlib_get_buffer (vm, pi0);
477 ip0 = vlib_buffer_get_current (p0);
478 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
480 lb0 = load_balance_get(lbi0);
481 hc0 = lb0->lb_hash_config;
482 vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
484 dpo0 = load_balance_get_bucket_i(lb0,
485 vnet_buffer(p0)->ip.flow_hash &
486 (lb0->lb_n_buckets_minus_1));
488 next0 = dpo0->dpoi_next_node;
489 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
491 vlib_increment_combined_counter
492 (cm, cpu_index, lbi0, 1,
493 vlib_buffer_length_in_chain (vm, p0));
500 if (PREDICT_FALSE (next0 != next))
503 vlib_put_next_frame (vm, node, next, n_left_to_next);
505 vlib_get_next_frame (vm, node, next,
506 to_next, n_left_to_next);
513 vlib_put_next_frame (vm, node, next, n_left_to_next);
516 return frame->n_vectors;
519 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
521 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
522 .function = ip4_load_balance,
523 .name = "ip4-load-balance",
524 .vector_size = sizeof (u32),
525 .sibling_of = "ip4-lookup",
527 .format_trace = format_ip4_forward_next_trace,
530 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
532 /* get first interface address */
534 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
535 ip_interface_address_t ** result_ia)
537 ip_lookup_main_t * lm = &im->lookup_main;
538 ip_interface_address_t * ia = 0;
539 ip4_address_t * result = 0;
541 foreach_ip_interface_address (lm, ia, sw_if_index,
542 1 /* honor unnumbered */,
544 ip4_address_t * a = ip_interface_address_get_address (lm, ia);
549 *result_ia = result ? ia : 0;
554 ip4_add_interface_routes (u32 sw_if_index,
555 ip4_main_t * im, u32 fib_index,
556 ip_interface_address_t * a)
558 ip_lookup_main_t * lm = &im->lookup_main;
559 ip4_address_t * address = ip_interface_address_get_address (lm, a);
561 .fp_len = a->address_length,
562 .fp_proto = FIB_PROTOCOL_IP4,
563 .fp_addr.ip4 = *address,
566 a->neighbor_probe_adj_index = ~0;
570 fib_node_index_t fei;
572 fei = fib_table_entry_update_one_path(fib_index,
574 FIB_SOURCE_INTERFACE,
575 (FIB_ENTRY_FLAG_CONNECTED |
576 FIB_ENTRY_FLAG_ATTACHED),
578 NULL, /* No next-hop address */
580 ~0, // invalid FIB index
583 FIB_ROUTE_PATH_FLAG_NONE);
584 a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
589 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
591 u32 classify_table_index =
592 lm->classify_table_index_by_sw_if_index [sw_if_index];
593 if (classify_table_index != (u32) ~0)
595 dpo_id_t dpo = DPO_NULL;
600 classify_dpo_create(FIB_PROTOCOL_IP4,
601 classify_table_index));
603 fib_table_entry_special_dpo_add(fib_index,
612 fib_table_entry_update_one_path(fib_index,
614 FIB_SOURCE_INTERFACE,
615 (FIB_ENTRY_FLAG_CONNECTED |
616 FIB_ENTRY_FLAG_LOCAL),
620 ~0, // invalid FIB index
623 FIB_ROUTE_PATH_FLAG_NONE);
627 ip4_del_interface_routes (ip4_main_t * im,
629 ip4_address_t * address,
633 .fp_len = address_length,
634 .fp_proto = FIB_PROTOCOL_IP4,
635 .fp_addr.ip4 = *address,
640 fib_table_entry_delete(fib_index,
642 FIB_SOURCE_INTERFACE);
646 fib_table_entry_delete(fib_index,
648 FIB_SOURCE_INTERFACE);
652 ip4_sw_interface_enable_disable (u32 sw_if_index,
655 vlib_main_t * vm = vlib_get_main();
656 ip4_main_t * im = &ip4_main;
657 ip_lookup_main_t * lm = &im->lookup_main;
659 u32 lookup_feature_index;
661 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
664 * enable/disable only on the 1<->0 transition
668 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
673 ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
674 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
678 for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
680 ip_config_main_t * cm = &lm->feature_config_mains[cast];
681 vnet_config_main_t * vcm = &cm->config_main;
683 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
684 ci = cm->config_index_by_sw_if_index[sw_if_index];
686 if (cast == VNET_IP_RX_UNICAST_FEAT)
687 lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
689 lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
692 ci = vnet_config_add_feature (vm, vcm,
694 lookup_feature_index,
696 /* # bytes of config data */ 0);
698 ci = vnet_config_del_feature (vm, vcm,
700 lookup_feature_index,
702 /* # bytes of config data */ 0);
703 cm->config_index_by_sw_if_index[sw_if_index] = ci;
707 static clib_error_t *
708 ip4_add_del_interface_address_internal (vlib_main_t * vm,
710 ip4_address_t * address,
714 vnet_main_t * vnm = vnet_get_main();
715 ip4_main_t * im = &ip4_main;
716 ip_lookup_main_t * lm = &im->lookup_main;
717 clib_error_t * error = 0;
718 u32 if_address_index, elts_before;
719 ip4_address_fib_t ip4_af, * addr_fib = 0;
721 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
722 ip4_addr_fib_init (&ip4_af, address,
723 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
724 vec_add1 (addr_fib, ip4_af);
727 * there is no support for adj-fib handling in the presence of overlapping
728 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
733 /* When adding an address check that it does not conflict
734 with an existing address. */
735 ip_interface_address_t * ia;
736 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
737 0 /* honor unnumbered */,
739 ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
741 if (ip4_destination_matches_route (im, address, x, ia->address_length)
742 || ip4_destination_matches_route (im, x, address, address_length))
743 return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
744 format_ip4_address_and_length, address, address_length,
745 format_ip4_address_and_length, x, ia->address_length,
746 format_vnet_sw_if_index_name, vnm, sw_if_index);
750 elts_before = pool_elts (lm->if_address_pool);
752 error = ip_interface_address_add_del
762 ip4_sw_interface_enable_disable(sw_if_index, !is_del);
765 ip4_del_interface_routes (im, ip4_af.fib_index, address,
768 ip4_add_interface_routes (sw_if_index,
769 im, ip4_af.fib_index,
771 (lm->if_address_pool, if_address_index));
773 /* If pool did not grow/shrink: add duplicate address. */
774 if (elts_before != pool_elts (lm->if_address_pool))
776 ip4_add_del_interface_address_callback_t * cb;
777 vec_foreach (cb, im->add_del_interface_address_callbacks)
778 cb->function (im, cb->function_opaque, sw_if_index,
779 address, address_length,
790 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
791 ip4_address_t * address, u32 address_length,
794 return ip4_add_del_interface_address_internal
795 (vm, sw_if_index, address, address_length,
799 /* Built-in ip4 unicast rx feature path definition */
800 VNET_IP4_UNICAST_FEATURE_INIT (ip4_flow_classify, static) = {
801 .node_name = "ip4-flow-classify",
802 .runs_before = ORDER_CONSTRAINTS {"ip4-inacl", 0},
803 .feature_index = &ip4_main.ip4_unicast_rx_feature_flow_classify,
806 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
807 .node_name = "ip4-inacl",
808 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
809 .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
812 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
813 .node_name = "ip4-source-check-via-rx",
814 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
816 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
819 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
820 .node_name = "ip4-source-check-via-any",
821 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
823 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
826 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
827 .node_name = "ip4-source-and-port-range-check-rx",
828 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
830 &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
833 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
834 .node_name = "ip4-policer-classify",
835 .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
837 &ip4_main.ip4_unicast_rx_feature_policer_classify,
840 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
841 .node_name = "ipsec-input-ip4",
842 .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
843 .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
846 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
847 .node_name = "vpath-input-ip4",
848 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
849 .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
852 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
853 .node_name = "ip4-lookup",
854 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
855 .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
858 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
859 .node_name = "ip4-drop",
860 .runs_before = 0, /* not before any other features */
861 .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
865 /* Built-in ip4 multicast rx feature path definition */
866 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
867 .node_name = "vpath-input-ip4",
868 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
869 .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
872 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
873 .node_name = "ip4-lookup-multicast",
874 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
875 .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
878 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
879 .node_name = "ip4-drop",
880 .runs_before = 0, /* last feature */
881 .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
884 static char * rx_feature_start_nodes[] =
885 { "ip4-input", "ip4-input-no-checksum"};
887 static char * tx_feature_start_nodes[] =
889 "ip4-rewrite-transit",
893 /* Source and port-range check ip4 tx feature path definition */
894 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
895 .node_name = "ip4-source-and-port-range-check-tx",
896 .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
898 &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
902 /* Built-in ip4 tx feature path definition */
903 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
904 .node_name = "interface-output",
905 .runs_before = 0, /* not before any other features */
906 .feature_index = &ip4_main.ip4_tx_feature_interface_output,
909 static clib_error_t *
910 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
912 ip_lookup_main_t * lm = &im->lookup_main;
913 clib_error_t * error;
915 ip_config_main_t * cm;
916 vnet_config_main_t * vcm;
917 char **feature_start_nodes;
918 int feature_start_len;
920 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
922 cm = &lm->feature_config_mains[cast];
923 vcm = &cm->config_main;
925 if (cast < VNET_IP_TX_FEAT)
927 feature_start_nodes = rx_feature_start_nodes;
928 feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
932 feature_start_nodes = tx_feature_start_nodes;
933 feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
936 if ((error = vnet_feature_arc_init (vm, vcm,
939 im->next_feature[cast],
940 &im->feature_nodes[cast])))
947 static clib_error_t *
948 ip4_sw_interface_add_del (vnet_main_t * vnm,
952 vlib_main_t * vm = vnm->vlib_main;
953 ip4_main_t * im = &ip4_main;
954 ip_lookup_main_t * lm = &im->lookup_main;
958 /* Fill in lookup tables with default table (0). */
959 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
961 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
963 ip_config_main_t * cm = &lm->feature_config_mains[cast];
964 vnet_config_main_t * vcm = &cm->config_main;
966 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
967 ci = cm->config_index_by_sw_if_index[sw_if_index];
969 if (cast == VNET_IP_RX_UNICAST_FEAT)
970 feature_index = im->ip4_unicast_rx_feature_drop;
971 else if (cast == VNET_IP_RX_MULTICAST_FEAT)
972 feature_index = im->ip4_multicast_rx_feature_drop;
974 feature_index = im->ip4_tx_feature_interface_output;
977 ci = vnet_config_add_feature (vm, vcm,
981 /* # bytes of config data */ 0);
984 ci = vnet_config_del_feature (vm, vcm, ci,
987 /* # bytes of config data */ 0);
988 if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
989 im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
991 cm->config_index_by_sw_if_index[sw_if_index] = ci;
993 * note: do not update the tx feature count here.
997 return /* no error */ 0;
1000 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
1002 /* Global IP4 main. */
1003 ip4_main_t ip4_main;
1006 ip4_lookup_init (vlib_main_t * vm)
1008 ip4_main_t * im = &ip4_main;
1009 clib_error_t * error;
1012 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1017 m = pow2_mask (i) << (32 - i);
1020 im->fib_masks[i] = clib_host_to_net_u32 (m);
1023 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1025 /* Create FIB with index 0 and table id of 0. */
1026 fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1030 pn = pg_get_node (ip4_lookup_node.index);
1031 pn->unformat_edit = unformat_pg_ip4_header;
1035 ethernet_arp_header_t h;
1037 memset (&h, 0, sizeof (h));
1039 /* Set target ethernet address to all zeros. */
1040 memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1042 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1043 #define _8(f,v) h.f = v;
1044 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1045 _16 (l3_type, ETHERNET_TYPE_IP4);
1046 _8 (n_l2_address_bytes, 6);
1047 _8 (n_l3_address_bytes, 4);
1048 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1052 vlib_packet_template_init (vm,
1053 &im->ip4_arp_request_packet_template,
1056 /* alloc chunk size */ 8,
1060 error = ip4_feature_init (vm, im);
1065 VLIB_INIT_FUNCTION (ip4_lookup_init);
1068 /* Adjacency taken. */
1073 /* Packet data, possibly *after* rewrite. */
1074 u8 packet_data[64 - 1*sizeof(u32)];
1075 } ip4_forward_next_trace_t;
1077 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1079 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1080 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1081 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1082 uword indent = format_get_indent (s);
1083 s = format (s, "%U%U",
1084 format_white_space, indent,
1085 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1089 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1091 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1092 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1093 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1094 uword indent = format_get_indent (s);
1096 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1097 t->fib_index, t->dpo_index, t->flow_hash);
1098 s = format (s, "\n%U%U",
1099 format_white_space, indent,
1100 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1104 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1106 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1107 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1108 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1109 vnet_main_t * vnm = vnet_get_main();
1110 uword indent = format_get_indent (s);
1112 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1113 t->fib_index, t->dpo_index, format_ip_adjacency,
1114 vnm, t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
1116 s = format (s, "\n%U%U",
1117 format_white_space, indent,
1118 format_ip_adjacency_packet_data,
1120 t->packet_data, sizeof (t->packet_data));
1124 /* Common trace function for all ip4-forward next nodes. */
1126 ip4_forward_next_trace (vlib_main_t * vm,
1127 vlib_node_runtime_t * node,
1128 vlib_frame_t * frame,
1129 vlib_rx_or_tx_t which_adj_index)
1132 ip4_main_t * im = &ip4_main;
1134 n_left = frame->n_vectors;
1135 from = vlib_frame_vector_args (frame);
1140 vlib_buffer_t * b0, * b1;
1141 ip4_forward_next_trace_t * t0, * t1;
1143 /* Prefetch next iteration. */
1144 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1145 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1150 b0 = vlib_get_buffer (vm, bi0);
1151 b1 = vlib_get_buffer (vm, bi1);
1153 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1155 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1156 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1157 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1158 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1159 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1160 vec_elt (im->fib_index_by_sw_if_index,
1161 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1163 clib_memcpy (t0->packet_data,
1164 vlib_buffer_get_current (b0),
1165 sizeof (t0->packet_data));
1167 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1169 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1170 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1171 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1172 t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1173 vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1174 vec_elt (im->fib_index_by_sw_if_index,
1175 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1176 clib_memcpy (t1->packet_data,
1177 vlib_buffer_get_current (b1),
1178 sizeof (t1->packet_data));
1188 ip4_forward_next_trace_t * t0;
1192 b0 = vlib_get_buffer (vm, bi0);
1194 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1196 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1197 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1198 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1199 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1200 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1201 vec_elt (im->fib_index_by_sw_if_index,
1202 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1203 clib_memcpy (t0->packet_data,
1204 vlib_buffer_get_current (b0),
1205 sizeof (t0->packet_data));
1213 ip4_drop_or_punt (vlib_main_t * vm,
1214 vlib_node_runtime_t * node,
1215 vlib_frame_t * frame,
1216 ip4_error_t error_code)
1218 u32 * buffers = vlib_frame_vector_args (frame);
1219 uword n_packets = frame->n_vectors;
1221 vlib_error_drop_buffers (vm, node,
1226 ip4_input_node.index,
1229 if (node->flags & VLIB_NODE_FLAG_TRACE)
1230 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1236 ip4_drop (vlib_main_t * vm,
1237 vlib_node_runtime_t * node,
1238 vlib_frame_t * frame)
1239 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1242 ip4_punt (vlib_main_t * vm,
1243 vlib_node_runtime_t * node,
1244 vlib_frame_t * frame)
1245 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1247 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1248 .function = ip4_drop,
1250 .vector_size = sizeof (u32),
1252 .format_trace = format_ip4_forward_next_trace,
1260 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1262 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1263 .function = ip4_punt,
1265 .vector_size = sizeof (u32),
1267 .format_trace = format_ip4_forward_next_trace,
1275 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1277 /* Compute TCP/UDP/ICMP4 checksum in software. */
1279 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1283 u32 ip_header_length, payload_length_host_byte_order;
1284 u32 n_this_buffer, n_bytes_left;
1286 void * data_this_buffer;
1288 /* Initialize checksum with ip header. */
1289 ip_header_length = ip4_header_bytes (ip0);
1290 payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1291 sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1293 if (BITS (uword) == 32)
1295 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1296 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1299 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1301 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1302 data_this_buffer = (void *) ip0 + ip_header_length;
1303 if (n_this_buffer + ip_header_length > p0->current_length)
1304 n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1307 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1308 n_bytes_left -= n_this_buffer;
1309 if (n_bytes_left == 0)
1312 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1313 p0 = vlib_get_buffer (vm, p0->next_buffer);
1314 data_this_buffer = vlib_buffer_get_current (p0);
1315 n_this_buffer = p0->current_length;
1318 sum16 = ~ ip_csum_fold (sum0);
1324 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1326 ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1327 udp_header_t * udp0;
1330 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1331 || ip0->protocol == IP_PROTOCOL_UDP);
1333 udp0 = (void *) (ip0 + 1);
1334 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1336 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1337 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1341 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1343 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1344 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1350 ip4_local (vlib_main_t * vm,
1351 vlib_node_runtime_t * node,
1352 vlib_frame_t * frame)
1354 ip4_main_t * im = &ip4_main;
1355 ip_lookup_main_t * lm = &im->lookup_main;
1356 ip_local_next_t next_index;
1357 u32 * from, * to_next, n_left_from, n_left_to_next;
1358 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1360 from = vlib_frame_vector_args (frame);
1361 n_left_from = frame->n_vectors;
1362 next_index = node->cached_next_index;
1364 if (node->flags & VLIB_NODE_FLAG_TRACE)
1365 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1367 while (n_left_from > 0)
1369 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1371 while (n_left_from >= 4 && n_left_to_next >= 2)
1373 vlib_buffer_t * p0, * p1;
1374 ip4_header_t * ip0, * ip1;
1375 udp_header_t * udp0, * udp1;
1376 ip4_fib_mtrie_t * mtrie0, * mtrie1;
1377 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1378 const dpo_id_t *dpo0, *dpo1;
1379 const load_balance_t *lb0, *lb1;
1380 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1381 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1382 i32 len_diff0, len_diff1;
1383 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1384 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1387 pi0 = to_next[0] = from[0];
1388 pi1 = to_next[1] = from[1];
1392 n_left_to_next -= 2;
1394 p0 = vlib_get_buffer (vm, pi0);
1395 p1 = vlib_get_buffer (vm, pi1);
1397 ip0 = vlib_buffer_get_current (p0);
1398 ip1 = vlib_buffer_get_current (p1);
1400 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1401 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1402 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1403 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1405 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1406 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1408 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1410 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1411 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1413 /* Treat IP frag packets as "experimental" protocol for now
1414 until support of IP frag reassembly is implemented */
1415 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1416 proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1417 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1418 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1419 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1420 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1425 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1426 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1428 udp0 = ip4_next_header (ip0);
1429 udp1 = ip4_next_header (ip1);
1431 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1432 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1433 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1435 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1436 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1438 /* Verify UDP length. */
1439 ip_len0 = clib_net_to_host_u16 (ip0->length);
1440 ip_len1 = clib_net_to_host_u16 (ip1->length);
1441 udp_len0 = clib_net_to_host_u16 (udp0->length);
1442 udp_len1 = clib_net_to_host_u16 (udp1->length);
1444 len_diff0 = ip_len0 - udp_len0;
1445 len_diff1 = ip_len1 - udp_len1;
1447 len_diff0 = is_udp0 ? len_diff0 : 0;
1448 len_diff1 = is_udp1 ? len_diff1 : 0;
1450 if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1451 & good_tcp_udp0 & good_tcp_udp1)))
1456 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1457 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1459 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1460 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1465 && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1466 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1468 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1469 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1473 good_tcp_udp0 &= len_diff0 >= 0;
1474 good_tcp_udp1 &= len_diff1 >= 0;
1476 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1477 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1479 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1481 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1482 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1484 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1485 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1486 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1488 error1 = (is_tcp_udp1 && ! good_tcp_udp1
1489 ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1492 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1493 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1494 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1495 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1497 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1498 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1500 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1501 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1503 lb0 = load_balance_get(lbi0);
1504 lb1 = load_balance_get(lbi1);
1505 dpo0 = load_balance_get_bucket_i(lb0, 0);
1506 dpo1 = load_balance_get_bucket_i(lb1, 0);
1509 * Must have a route to source otherwise we drop the packet.
1510 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1513 * - the source is a recieve => it's from us => bogus, do this
1514 * first since it sets a different error code.
1515 * - uRPF check for any route to source - accept if passes.
1516 * - allow packets destined to the broadcast address from unknown sources
1518 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1519 dpo0->dpoi_type == DPO_RECEIVE) ?
1520 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1522 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1523 !fib_urpf_check_size(lb0->lb_urpf) &&
1524 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1525 ? IP4_ERROR_SRC_LOOKUP_MISS
1527 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1528 dpo1->dpoi_type == DPO_RECEIVE) ?
1529 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1531 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1532 !fib_urpf_check_size(lb1->lb_urpf) &&
1533 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1534 ? IP4_ERROR_SRC_LOOKUP_MISS
1537 next0 = lm->local_next_by_ip_protocol[proto0];
1538 next1 = lm->local_next_by_ip_protocol[proto1];
1540 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1541 next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1543 p0->error = error0 ? error_node->errors[error0] : 0;
1544 p1->error = error1 ? error_node->errors[error1] : 0;
1546 enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1548 if (PREDICT_FALSE (enqueue_code != 0))
1550 switch (enqueue_code)
1556 n_left_to_next += 1;
1557 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1563 n_left_to_next += 1;
1564 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1568 /* A B B or A B C */
1570 n_left_to_next += 2;
1571 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1572 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1575 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1577 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1584 while (n_left_from > 0 && n_left_to_next > 0)
1588 udp_header_t * udp0;
1589 ip4_fib_mtrie_t * mtrie0;
1590 ip4_fib_mtrie_leaf_t leaf0;
1591 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1593 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1594 load_balance_t *lb0;
1595 const dpo_id_t *dpo0;
1597 pi0 = to_next[0] = from[0];
1601 n_left_to_next -= 1;
1603 p0 = vlib_get_buffer (vm, pi0);
1605 ip0 = vlib_buffer_get_current (p0);
1607 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1608 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1610 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1612 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1614 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1616 /* Treat IP frag packets as "experimental" protocol for now
1617 until support of IP frag reassembly is implemented */
1618 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1619 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1620 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1624 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1626 udp0 = ip4_next_header (ip0);
1628 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1629 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1631 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1633 /* Verify UDP length. */
1634 ip_len0 = clib_net_to_host_u16 (ip0->length);
1635 udp_len0 = clib_net_to_host_u16 (udp0->length);
1637 len_diff0 = ip_len0 - udp_len0;
1639 len_diff0 = is_udp0 ? len_diff0 : 0;
1641 if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1646 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1647 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1649 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1650 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1654 good_tcp_udp0 &= len_diff0 >= 0;
1656 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1658 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1660 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1662 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1663 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1664 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1667 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1668 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1670 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1671 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1673 lb0 = load_balance_get(lbi0);
1674 dpo0 = load_balance_get_bucket_i(lb0, 0);
1676 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1677 vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1680 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1681 dpo0->dpoi_type == DPO_RECEIVE) ?
1682 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1684 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1685 !fib_urpf_check_size(lb0->lb_urpf) &&
1686 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1687 ? IP4_ERROR_SRC_LOOKUP_MISS
1690 next0 = lm->local_next_by_ip_protocol[proto0];
1692 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1694 p0->error = error0? error_node->errors[error0] : 0;
1696 if (PREDICT_FALSE (next0 != next_index))
1698 n_left_to_next += 1;
1699 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1702 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1705 n_left_to_next -= 1;
1709 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1712 return frame->n_vectors;
1715 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1716 .function = ip4_local,
1717 .name = "ip4-local",
1718 .vector_size = sizeof (u32),
1720 .format_trace = format_ip4_forward_next_trace,
1722 .n_next_nodes = IP_LOCAL_N_NEXT,
1724 [IP_LOCAL_NEXT_DROP] = "error-drop",
1725 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1726 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1727 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1731 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1733 void ip4_register_protocol (u32 protocol, u32 node_index)
1735 vlib_main_t * vm = vlib_get_main();
1736 ip4_main_t * im = &ip4_main;
1737 ip_lookup_main_t * lm = &im->lookup_main;
1739 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1740 lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1743 static clib_error_t *
1744 show_ip_local_command_fn (vlib_main_t * vm,
1745 unformat_input_t * input,
1746 vlib_cli_command_t * cmd)
1748 ip4_main_t * im = &ip4_main;
1749 ip_lookup_main_t * lm = &im->lookup_main;
1752 vlib_cli_output (vm, "Protocols handled by ip4_local");
1753 for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1755 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1756 vlib_cli_output (vm, "%d", i);
1763 VLIB_CLI_COMMAND (show_ip_local, static) = {
1764 .path = "show ip local",
1765 .function = show_ip_local_command_fn,
1766 .short_help = "Show ip local protocol table",
1770 ip4_arp_inline (vlib_main_t * vm,
1771 vlib_node_runtime_t * node,
1772 vlib_frame_t * frame,
1775 vnet_main_t * vnm = vnet_get_main();
1776 ip4_main_t * im = &ip4_main;
1777 ip_lookup_main_t * lm = &im->lookup_main;
1778 u32 * from, * to_next_drop;
1779 uword n_left_from, n_left_to_next_drop, next_index;
1780 static f64 time_last_seed_change = -1e100;
1781 static u32 hash_seeds[3];
1782 static uword hash_bitmap[256 / BITS (uword)];
1785 if (node->flags & VLIB_NODE_FLAG_TRACE)
1786 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1788 time_now = vlib_time_now (vm);
1789 if (time_now - time_last_seed_change > 1e-3)
1792 u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1793 sizeof (hash_seeds));
1794 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1795 hash_seeds[i] = r[i];
1797 /* Mark all hash keys as been no-seen before. */
1798 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1801 time_last_seed_change = time_now;
1804 from = vlib_frame_vector_args (frame);
1805 n_left_from = frame->n_vectors;
1806 next_index = node->cached_next_index;
1807 if (next_index == IP4_ARP_NEXT_DROP)
1808 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1810 while (n_left_from > 0)
1812 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1813 to_next_drop, n_left_to_next_drop);
1815 while (n_left_from > 0 && n_left_to_next_drop > 0)
1817 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1818 ip_adjacency_t * adj0;
1825 p0 = vlib_get_buffer (vm, pi0);
1827 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1828 adj0 = ip_get_adjacency (lm, adj_index0);
1829 ip0 = vlib_buffer_get_current (p0);
1832 * this is the Glean case, so we are ARPing for the
1833 * packet's destination
1839 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1840 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1844 a0 ^= ip0->dst_address.data_u32;
1848 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1852 hash_v3_finalize32 (a0, b0, c0);
1854 c0 &= BITS (hash_bitmap) - 1;
1855 c0 = c0 / BITS (uword);
1856 m0 = (uword) 1 << (c0 % BITS (uword));
1858 bm0 = hash_bitmap[c0];
1859 drop0 = (bm0 & m0) != 0;
1861 /* Mark it as seen. */
1862 hash_bitmap[c0] = bm0 | m0;
1866 to_next_drop[0] = pi0;
1868 n_left_to_next_drop -= 1;
1870 p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1876 * Can happen if the control-plane is programming tables
1877 * with traffic flowing; at least that's today's lame excuse.
1879 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1880 (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1882 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1885 /* Send ARP request. */
1889 ethernet_arp_header_t * h0;
1890 vnet_hw_interface_t * hw_if0;
1892 h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1894 /* Add rewrite/encap string for ARP packet. */
1895 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1897 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1899 /* Src ethernet address in ARP header. */
1900 clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1901 sizeof (h0->ip4_over_ethernet[0].ethernet));
1905 /* The interface's source address is stashed in the Glean Adj */
1906 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1908 /* Copy in destination address we are requesting. This is the
1909 * glean case, so it's the packet's destination.*/
1910 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1914 /* Src IP address in ARP header. */
1915 if (ip4_src_address_for_packet(lm, sw_if_index0,
1916 &h0->ip4_over_ethernet[0].ip4))
1918 /* No source address available */
1919 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1920 vlib_buffer_free(vm, &bi0, 1);
1924 /* Copy in destination address we are requesting from the
1926 h0->ip4_over_ethernet[1].ip4.data_u32 =
1927 adj0->sub_type.nbr.next_hop.ip4.as_u32;
1930 vlib_buffer_copy_trace_flag (vm, p0, bi0);
1931 b0 = vlib_get_buffer (vm, bi0);
1932 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1934 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1936 vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1940 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1943 return frame->n_vectors;
1947 ip4_arp (vlib_main_t * vm,
1948 vlib_node_runtime_t * node,
1949 vlib_frame_t * frame)
1951 return (ip4_arp_inline(vm, node, frame, 0));
1955 ip4_glean (vlib_main_t * vm,
1956 vlib_node_runtime_t * node,
1957 vlib_frame_t * frame)
1959 return (ip4_arp_inline(vm, node, frame, 1));
1962 static char * ip4_arp_error_strings[] = {
1963 [IP4_ARP_ERROR_DROP] = "address overflow drops",
1964 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1965 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1966 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1967 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1968 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1971 VLIB_REGISTER_NODE (ip4_arp_node) = {
1972 .function = ip4_arp,
1974 .vector_size = sizeof (u32),
1976 .format_trace = format_ip4_forward_next_trace,
1978 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1979 .error_strings = ip4_arp_error_strings,
1981 .n_next_nodes = IP4_ARP_N_NEXT,
1983 [IP4_ARP_NEXT_DROP] = "error-drop",
1987 VLIB_REGISTER_NODE (ip4_glean_node) = {
1988 .function = ip4_glean,
1989 .name = "ip4-glean",
1990 .vector_size = sizeof (u32),
1992 .format_trace = format_ip4_forward_next_trace,
1994 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1995 .error_strings = ip4_arp_error_strings,
1997 .n_next_nodes = IP4_ARP_N_NEXT,
1999 [IP4_ARP_NEXT_DROP] = "error-drop",
2003 #define foreach_notrace_ip4_arp_error \
2009 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2011 vlib_node_runtime_t *rt =
2012 vlib_node_get_runtime (vm, ip4_arp_node.index);
2014 /* don't trace ARP request packets */
2016 vnet_pcap_drop_trace_filter_add_del \
2017 (rt->errors[IP4_ARP_ERROR_##a], \
2019 foreach_notrace_ip4_arp_error;
2024 VLIB_INIT_FUNCTION(arp_notrace_init);
2027 /* Send an ARP request to see if given destination is reachable on given interface. */
2029 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2031 vnet_main_t * vnm = vnet_get_main();
2032 ip4_main_t * im = &ip4_main;
2033 ethernet_arp_header_t * h;
2034 ip4_address_t * src;
2035 ip_interface_address_t * ia;
2036 ip_adjacency_t * adj;
2037 vnet_hw_interface_t * hi;
2038 vnet_sw_interface_t * si;
2042 si = vnet_get_sw_interface (vnm, sw_if_index);
2044 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2046 return clib_error_return (0, "%U: interface %U down",
2047 format_ip4_address, dst,
2048 format_vnet_sw_if_index_name, vnm,
2052 src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2055 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2056 return clib_error_return
2057 (0, "no matching interface address for destination %U (interface %U)",
2058 format_ip4_address, dst,
2059 format_vnet_sw_if_index_name, vnm, sw_if_index);
2062 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2064 h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2066 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2068 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2070 h->ip4_over_ethernet[0].ip4 = src[0];
2071 h->ip4_over_ethernet[1].ip4 = dst[0];
2073 b = vlib_get_buffer (vm, bi);
2074 vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2076 /* Add encapsulation string for software interface (e.g. ethernet header). */
2077 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2078 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2081 vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2082 u32 * to_next = vlib_frame_vector_args (f);
2085 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2088 return /* no error */ 0;
2092 IP4_REWRITE_NEXT_DROP,
2093 IP4_REWRITE_NEXT_ARP,
2094 IP4_REWRITE_NEXT_ICMP_ERROR,
2095 } ip4_rewrite_next_t;
2098 ip4_rewrite_inline (vlib_main_t * vm,
2099 vlib_node_runtime_t * node,
2100 vlib_frame_t * frame,
2101 int rewrite_for_locally_received_packets,
2104 ip_lookup_main_t * lm = &ip4_main.lookup_main;
2105 u32 * from = vlib_frame_vector_args (frame);
2106 u32 n_left_from, n_left_to_next, * to_next, next_index;
2107 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2108 vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2109 ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2111 n_left_from = frame->n_vectors;
2112 next_index = node->cached_next_index;
2113 u32 cpu_index = os_get_cpu_number();
2115 while (n_left_from > 0)
2117 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2119 while (n_left_from >= 4 && n_left_to_next >= 2)
2121 ip_adjacency_t * adj0, * adj1;
2122 vlib_buffer_t * p0, * p1;
2123 ip4_header_t * ip0, * ip1;
2124 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2125 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2126 u32 next0_override, next1_override;
2127 u32 tx_sw_if_index0, tx_sw_if_index1;
2129 if (rewrite_for_locally_received_packets)
2130 next0_override = next1_override = 0;
2132 /* Prefetch next iteration. */
2134 vlib_buffer_t * p2, * p3;
2136 p2 = vlib_get_buffer (vm, from[2]);
2137 p3 = vlib_get_buffer (vm, from[3]);
2139 vlib_prefetch_buffer_header (p2, STORE);
2140 vlib_prefetch_buffer_header (p3, STORE);
2142 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2143 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2146 pi0 = to_next[0] = from[0];
2147 pi1 = to_next[1] = from[1];
2152 n_left_to_next -= 2;
2154 p0 = vlib_get_buffer (vm, pi0);
2155 p1 = vlib_get_buffer (vm, pi1);
2157 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2158 adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2160 /* We should never rewrite a pkt using the MISS adjacency */
2161 ASSERT(adj_index0 && adj_index1);
2163 ip0 = vlib_buffer_get_current (p0);
2164 ip1 = vlib_buffer_get_current (p1);
2166 error0 = error1 = IP4_ERROR_NONE;
2167 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2169 /* Decrement TTL & update checksum.
2170 Works either endian, so no need for byte swap. */
2171 if (! rewrite_for_locally_received_packets)
2173 i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2175 /* Input node should have reject packets with ttl 0. */
2176 ASSERT (ip0->ttl > 0);
2177 ASSERT (ip1->ttl > 0);
2179 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2180 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2182 checksum0 += checksum0 >= 0xffff;
2183 checksum1 += checksum1 >= 0xffff;
2185 ip0->checksum = checksum0;
2186 ip1->checksum = checksum1;
2195 * If the ttl drops below 1 when forwarding, generate
2198 if (PREDICT_FALSE(ttl0 <= 0))
2200 error0 = IP4_ERROR_TIME_EXPIRED;
2201 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2202 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2203 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2204 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2206 if (PREDICT_FALSE(ttl1 <= 0))
2208 error1 = IP4_ERROR_TIME_EXPIRED;
2209 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2210 icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2211 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2212 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2215 /* Verify checksum. */
2216 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2217 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2220 /* Rewrite packet header and updates lengths. */
2221 adj0 = ip_get_adjacency (lm, adj_index0);
2222 adj1 = ip_get_adjacency (lm, adj_index1);
2224 if (rewrite_for_locally_received_packets)
2226 if (PREDICT_FALSE(adj0->lookup_next_index
2227 == IP_LOOKUP_NEXT_ARP))
2228 next0_override = IP4_REWRITE_NEXT_ARP;
2229 if (PREDICT_FALSE(adj1->lookup_next_index
2230 == IP_LOOKUP_NEXT_ARP))
2231 next1_override = IP4_REWRITE_NEXT_ARP;
2234 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2235 rw_len0 = adj0[0].rewrite_header.data_bytes;
2236 rw_len1 = adj1[0].rewrite_header.data_bytes;
2237 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2238 vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2240 /* Check MTU of outgoing interface. */
2241 error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2242 ? IP4_ERROR_MTU_EXCEEDED
2244 error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2245 ? IP4_ERROR_MTU_EXCEEDED
2248 next0 = (error0 == IP4_ERROR_NONE)
2249 ? adj0[0].rewrite_header.next_index : next0;
2251 if (rewrite_for_locally_received_packets)
2252 next0 = next0 && next0_override ? next0_override : next0;
2254 next1 = (error1 == IP4_ERROR_NONE)
2255 ? adj1[0].rewrite_header.next_index : next1;
2257 if (rewrite_for_locally_received_packets)
2258 next1 = next1 && next1_override ? next1_override : next1;
2261 * We've already accounted for an ethernet_header_t elsewhere
2263 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2264 vlib_increment_combined_counter
2265 (&adjacency_counters,
2266 cpu_index, adj_index0,
2267 /* packet increment */ 0,
2268 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2270 if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2271 vlib_increment_combined_counter
2272 (&adjacency_counters,
2273 cpu_index, adj_index1,
2274 /* packet increment */ 0,
2275 /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2277 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2278 * to see the IP headerr */
2279 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2281 p0->current_data -= rw_len0;
2282 p0->current_length += rw_len0;
2283 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2284 vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2288 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2291 p0->current_config_index =
2292 vec_elt (cm->config_index_by_sw_if_index,
2294 vnet_get_config_data (&cm->config_main,
2295 &p0->current_config_index,
2297 /* # bytes of config data */ 0);
2300 if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2302 p1->current_data -= rw_len1;
2303 p1->current_length += rw_len1;
2305 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2306 vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2310 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2313 p1->current_config_index =
2314 vec_elt (cm->config_index_by_sw_if_index,
2316 vnet_get_config_data (&cm->config_main,
2317 &p1->current_config_index,
2319 /* # bytes of config data */ 0);
2323 /* Guess we are only writing on simple Ethernet header. */
2324 vnet_rewrite_two_headers (adj0[0], adj1[0],
2326 sizeof (ethernet_header_t));
2330 adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2331 adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2334 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2335 to_next, n_left_to_next,
2336 pi0, pi1, next0, next1);
2339 while (n_left_from > 0 && n_left_to_next > 0)
2341 ip_adjacency_t * adj0;
2344 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2346 u32 tx_sw_if_index0;
2348 if (rewrite_for_locally_received_packets)
2351 pi0 = to_next[0] = from[0];
2353 p0 = vlib_get_buffer (vm, pi0);
2355 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2357 /* We should never rewrite a pkt using the MISS adjacency */
2360 adj0 = ip_get_adjacency (lm, adj_index0);
2362 ip0 = vlib_buffer_get_current (p0);
2364 error0 = IP4_ERROR_NONE;
2365 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2367 /* Decrement TTL & update checksum. */
2368 if (! rewrite_for_locally_received_packets)
2370 i32 ttl0 = ip0->ttl;
2372 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2374 checksum0 += checksum0 >= 0xffff;
2376 ip0->checksum = checksum0;
2378 ASSERT (ip0->ttl > 0);
2384 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2386 if (PREDICT_FALSE(ttl0 <= 0))
2389 * If the ttl drops below 1 when forwarding, generate
2392 error0 = IP4_ERROR_TIME_EXPIRED;
2393 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2394 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2395 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2396 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2400 if (rewrite_for_locally_received_packets)
2403 * We have to override the next_index in ARP adjacencies,
2404 * because they're set up for ip4-arp, not this node...
2406 if (PREDICT_FALSE(adj0->lookup_next_index
2407 == IP_LOOKUP_NEXT_ARP))
2408 next0_override = IP4_REWRITE_NEXT_ARP;
2411 /* Guess we are only writing on simple Ethernet header. */
2412 vnet_rewrite_one_header (adj0[0], ip0,
2413 sizeof (ethernet_header_t));
2415 /* Update packet buffer attributes/set output interface. */
2416 rw_len0 = adj0[0].rewrite_header.data_bytes;
2417 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2419 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2420 vlib_increment_combined_counter
2421 (&adjacency_counters,
2422 cpu_index, adj_index0,
2423 /* packet increment */ 0,
2424 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2426 /* Check MTU of outgoing interface. */
2427 error0 = (vlib_buffer_length_in_chain (vm, p0)
2428 > adj0[0].rewrite_header.max_l3_packet_bytes
2429 ? IP4_ERROR_MTU_EXCEEDED
2432 p0->error = error_node->errors[error0];
2434 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2435 * to see the IP headerr */
2436 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2438 p0->current_data -= rw_len0;
2439 p0->current_length += rw_len0;
2440 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2442 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2443 next0 = adj0[0].rewrite_header.next_index;
2447 adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2451 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2454 p0->current_config_index =
2455 vec_elt (cm->config_index_by_sw_if_index,
2457 vnet_get_config_data (&cm->config_main,
2458 &p0->current_config_index,
2460 /* # bytes of config data */ 0);
2464 if (rewrite_for_locally_received_packets)
2465 next0 = next0 && next0_override ? next0_override : next0;
2470 n_left_to_next -= 1;
2472 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2473 to_next, n_left_to_next,
2477 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2480 /* Need to do trace after rewrites to pick up new packet data. */
2481 if (node->flags & VLIB_NODE_FLAG_TRACE)
2482 ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2484 return frame->n_vectors;
2488 /** @brief IPv4 transit rewrite node.
2489 @node ip4-rewrite-transit
2491 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2492 header checksum, fetch the ip adjacency, check the outbound mtu,
2493 apply the adjacency rewrite, and send pkts to the adjacency
2494 rewrite header's rewrite_next_index.
2496 @param vm vlib_main_t corresponding to the current thread
2497 @param node vlib_node_runtime_t
2498 @param frame vlib_frame_t whose contents should be dispatched
2500 @par Graph mechanics: buffer metadata, next index usage
2503 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2504 - the rewrite adjacency index
2505 - <code>adj->lookup_next_index</code>
2506 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2507 the packet will be dropped.
2508 - <code>adj->rewrite_header</code>
2509 - Rewrite string length, rewrite string, next_index
2512 - <code>b->current_data, b->current_length</code>
2513 - Updated net of applying the rewrite string
2515 <em>Next Indices:</em>
2516 - <code> adj->rewrite_header.next_index </code>
2520 ip4_rewrite_transit (vlib_main_t * vm,
2521 vlib_node_runtime_t * node,
2522 vlib_frame_t * frame)
2524 return ip4_rewrite_inline (vm, node, frame,
2525 /* rewrite_for_locally_received_packets */ 0, 0);
2528 /** @brief IPv4 local rewrite node.
2529 @node ip4-rewrite-local
2531 This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2532 the outbound interface mtu, apply the adjacency rewrite, and send
2533 pkts to the adjacency rewrite header's rewrite_next_index. Deal
2534 with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2535 dst = interface addr."
2537 @param vm vlib_main_t corresponding to the current thread
2538 @param node vlib_node_runtime_t
2539 @param frame vlib_frame_t whose contents should be dispatched
2541 @par Graph mechanics: buffer metadata, next index usage
2544 - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2545 - the rewrite adjacency index
2546 - <code>adj->lookup_next_index</code>
2547 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2548 the packet will be dropped.
2549 - <code>adj->rewrite_header</code>
2550 - Rewrite string length, rewrite string, next_index
2553 - <code>b->current_data, b->current_length</code>
2554 - Updated net of applying the rewrite string
2556 <em>Next Indices:</em>
2557 - <code> adj->rewrite_header.next_index </code>
2562 ip4_rewrite_local (vlib_main_t * vm,
2563 vlib_node_runtime_t * node,
2564 vlib_frame_t * frame)
2566 return ip4_rewrite_inline (vm, node, frame,
2567 /* rewrite_for_locally_received_packets */ 1, 0);
2571 ip4_midchain (vlib_main_t * vm,
2572 vlib_node_runtime_t * node,
2573 vlib_frame_t * frame)
2575 return ip4_rewrite_inline (vm, node, frame,
2576 /* rewrite_for_locally_received_packets */ 0, 1);
2579 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2580 .function = ip4_rewrite_transit,
2581 .name = "ip4-rewrite-transit",
2582 .vector_size = sizeof (u32),
2584 .format_trace = format_ip4_rewrite_trace,
2588 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2589 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2590 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2594 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2596 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2597 .function = ip4_midchain,
2598 .name = "ip4-midchain",
2599 .vector_size = sizeof (u32),
2601 .format_trace = format_ip4_forward_next_trace,
2603 .sibling_of = "ip4-rewrite-transit",
2606 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2608 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2609 .function = ip4_rewrite_local,
2610 .name = "ip4-rewrite-local",
2611 .vector_size = sizeof (u32),
2613 .sibling_of = "ip4-rewrite-transit",
2615 .format_trace = format_ip4_rewrite_trace,
2620 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2622 static clib_error_t *
2623 add_del_interface_table (vlib_main_t * vm,
2624 unformat_input_t * input,
2625 vlib_cli_command_t * cmd)
2627 vnet_main_t * vnm = vnet_get_main();
2628 clib_error_t * error = 0;
2629 u32 sw_if_index, table_id;
2633 if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2635 error = clib_error_return (0, "unknown interface `%U'",
2636 format_unformat_error, input);
2640 if (unformat (input, "%d", &table_id))
2644 error = clib_error_return (0, "expected table id `%U'",
2645 format_unformat_error, input);
2650 ip4_main_t * im = &ip4_main;
2653 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2658 // changing an interface's table has consequences for any connecteds
2659 // and adj-fibs already installed.
2661 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2662 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2670 * Place the indicated interface into the supplied VRF
2673 * @cliexstart{set interface ip table}
2675 * vpp# set interface ip table GigabitEthernet2/0/0 2
2677 * Interface addresses added after setting the interface IP table end up in the indicated VRF table.
2678 * Predictable but potentially counter-intuitive results occur if you provision interface addresses in multiple FIBs.
2679 * Upon RX, packets will be processed in the last IP table ID provisioned.
2680 * It might be marginally useful to evade source RPF drops to put an interface address into multiple FIBs.
2683 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2684 .path = "set interface ip table",
2685 .function = add_del_interface_table,
2686 .short_help = "Add/delete FIB table id for interface",
2691 ip4_lookup_multicast (vlib_main_t * vm,
2692 vlib_node_runtime_t * node,
2693 vlib_frame_t * frame)
2695 ip4_main_t * im = &ip4_main;
2696 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2697 u32 n_left_from, n_left_to_next, * from, * to_next;
2698 ip_lookup_next_t next;
2699 u32 cpu_index = os_get_cpu_number();
2701 from = vlib_frame_vector_args (frame);
2702 n_left_from = frame->n_vectors;
2703 next = node->cached_next_index;
2705 while (n_left_from > 0)
2707 vlib_get_next_frame (vm, node, next,
2708 to_next, n_left_to_next);
2710 while (n_left_from >= 4 && n_left_to_next >= 2)
2712 vlib_buffer_t * p0, * p1;
2713 u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2714 ip_lookup_next_t next0, next1;
2715 ip4_header_t * ip0, * ip1;
2716 u32 fib_index0, fib_index1;
2717 const dpo_id_t *dpo0, *dpo1;
2718 const load_balance_t * lb0, * lb1;
2720 /* Prefetch next iteration. */
2722 vlib_buffer_t * p2, * p3;
2724 p2 = vlib_get_buffer (vm, from[2]);
2725 p3 = vlib_get_buffer (vm, from[3]);
2727 vlib_prefetch_buffer_header (p2, LOAD);
2728 vlib_prefetch_buffer_header (p3, LOAD);
2730 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2731 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2734 pi0 = to_next[0] = from[0];
2735 pi1 = to_next[1] = from[1];
2737 p0 = vlib_get_buffer (vm, pi0);
2738 p1 = vlib_get_buffer (vm, pi1);
2740 ip0 = vlib_buffer_get_current (p0);
2741 ip1 = vlib_buffer_get_current (p1);
2743 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2744 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2745 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2746 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2747 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2748 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2750 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2752 lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2755 lb0 = load_balance_get (lb_index0);
2756 lb1 = load_balance_get (lb_index1);
2758 ASSERT (lb0->lb_n_buckets > 0);
2759 ASSERT (is_pow2 (lb0->lb_n_buckets));
2760 ASSERT (lb1->lb_n_buckets > 0);
2761 ASSERT (is_pow2 (lb1->lb_n_buckets));
2763 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2764 (ip0, lb0->lb_hash_config);
2766 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2767 (ip1, lb1->lb_hash_config);
2769 dpo0 = load_balance_get_bucket_i(lb0,
2770 (vnet_buffer (p0)->ip.flow_hash &
2771 (lb0->lb_n_buckets_minus_1)));
2772 dpo1 = load_balance_get_bucket_i(lb1,
2773 (vnet_buffer (p1)->ip.flow_hash &
2774 (lb0->lb_n_buckets_minus_1)));
2776 next0 = dpo0->dpoi_next_node;
2777 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2778 next1 = dpo1->dpoi_next_node;
2779 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2781 if (1) /* $$$$$$ HACK FIXME */
2782 vlib_increment_combined_counter
2783 (cm, cpu_index, lb_index0, 1,
2784 vlib_buffer_length_in_chain (vm, p0));
2785 if (1) /* $$$$$$ HACK FIXME */
2786 vlib_increment_combined_counter
2787 (cm, cpu_index, lb_index1, 1,
2788 vlib_buffer_length_in_chain (vm, p1));
2792 n_left_to_next -= 2;
2795 wrong_next = (next0 != next) + 2*(next1 != next);
2796 if (PREDICT_FALSE (wrong_next != 0))
2804 n_left_to_next += 1;
2805 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2811 n_left_to_next += 1;
2812 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2818 n_left_to_next += 2;
2819 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2820 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2824 vlib_put_next_frame (vm, node, next, n_left_to_next);
2826 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2832 while (n_left_from > 0 && n_left_to_next > 0)
2837 ip_lookup_next_t next0;
2839 const dpo_id_t *dpo0;
2840 const load_balance_t * lb0;
2845 p0 = vlib_get_buffer (vm, pi0);
2847 ip0 = vlib_buffer_get_current (p0);
2849 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2850 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2851 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2852 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2854 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2857 lb0 = load_balance_get (lb_index0);
2859 ASSERT (lb0->lb_n_buckets > 0);
2860 ASSERT (is_pow2 (lb0->lb_n_buckets));
2862 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2863 (ip0, lb0->lb_hash_config);
2865 dpo0 = load_balance_get_bucket_i(lb0,
2866 (vnet_buffer (p0)->ip.flow_hash &
2867 (lb0->lb_n_buckets_minus_1)));
2869 next0 = dpo0->dpoi_next_node;
2870 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2872 if (1) /* $$$$$$ HACK FIXME */
2873 vlib_increment_combined_counter
2874 (cm, cpu_index, lb_index0, 1,
2875 vlib_buffer_length_in_chain (vm, p0));
2879 n_left_to_next -= 1;
2882 if (PREDICT_FALSE (next0 != next))
2884 n_left_to_next += 1;
2885 vlib_put_next_frame (vm, node, next, n_left_to_next);
2887 vlib_get_next_frame (vm, node, next,
2888 to_next, n_left_to_next);
2891 n_left_to_next -= 1;
2895 vlib_put_next_frame (vm, node, next, n_left_to_next);
2898 if (node->flags & VLIB_NODE_FLAG_TRACE)
2899 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2901 return frame->n_vectors;
2904 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2905 .function = ip4_lookup_multicast,
2906 .name = "ip4-lookup-multicast",
2907 .vector_size = sizeof (u32),
2908 .sibling_of = "ip4-lookup",
2909 .format_trace = format_ip4_lookup_trace,
2914 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2916 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2917 .function = ip4_drop,
2918 .name = "ip4-multicast",
2919 .vector_size = sizeof (u32),
2921 .format_trace = format_ip4_forward_next_trace,
2929 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2931 ip4_fib_mtrie_t * mtrie0;
2932 ip4_fib_mtrie_leaf_t leaf0;
2935 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2937 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2938 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2939 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2940 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2941 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2943 /* Handle default route. */
2944 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2946 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2948 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2951 static clib_error_t *
2952 test_lookup_command_fn (vlib_main_t * vm,
2953 unformat_input_t * input,
2954 vlib_cli_command_t * cmd)
2960 ip4_address_t ip4_base_address;
2963 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2964 if (unformat (input, "table %d", &table_id))
2966 else if (unformat (input, "count %f", &count))
2969 else if (unformat (input, "%U",
2970 unformat_ip4_address, &ip4_base_address))
2973 return clib_error_return (0, "unknown input `%U'",
2974 format_unformat_error, input);
2979 for (i = 0; i < n; i++)
2981 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2984 ip4_base_address.as_u32 =
2985 clib_host_to_net_u32 (1 +
2986 clib_net_to_host_u32 (ip4_base_address.as_u32));
2990 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2992 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2997 VLIB_CLI_COMMAND (lookup_test_command, static) = {
2998 .path = "test lookup",
2999 .short_help = "test lookup",
3000 .function = test_lookup_command_fn,
3003 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
3005 ip4_main_t * im4 = &ip4_main;
3007 uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3010 return VNET_API_ERROR_NO_SUCH_FIB;
3012 fib = ip4_fib_get (p[0]);
3014 fib->flow_hash_config = flow_hash_config;
3018 static clib_error_t *
3019 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3020 unformat_input_t * input,
3021 vlib_cli_command_t * cmd)
3025 u32 flow_hash_config = 0;
3028 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3029 if (unformat (input, "table %d", &table_id))
3032 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3033 foreach_flow_hash_bit
3039 return clib_error_return (0, "unknown input `%U'",
3040 format_unformat_error, input);
3042 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3048 case VNET_API_ERROR_NO_SUCH_FIB:
3049 return clib_error_return (0, "no such FIB table %d", table_id);
3052 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3059 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3060 .path = "set ip flow-hash",
3062 "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3063 .function = set_ip_flow_hash_command_fn,
3066 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3069 vnet_main_t * vnm = vnet_get_main();
3070 vnet_interface_main_t * im = &vnm->interface_main;
3071 ip4_main_t * ipm = &ip4_main;
3072 ip_lookup_main_t * lm = &ipm->lookup_main;
3073 vnet_classify_main_t * cm = &vnet_classify_main;
3074 ip4_address_t *if_addr;
3076 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3077 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3079 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3080 return VNET_API_ERROR_NO_SUCH_ENTRY;
3082 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3083 lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3085 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3087 if (NULL != if_addr)
3089 fib_prefix_t pfx = {
3091 .fp_proto = FIB_PROTOCOL_IP4,
3092 .fp_addr.ip4 = *if_addr,
3096 fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3100 if (table_index != (u32) ~0)
3102 dpo_id_t dpo = DPO_NULL;
3107 classify_dpo_create(FIB_PROTOCOL_IP4,
3110 fib_table_entry_special_dpo_add(fib_index,
3112 FIB_SOURCE_CLASSIFY,
3113 FIB_ENTRY_FLAG_NONE,
3119 fib_table_entry_special_remove(fib_index,
3121 FIB_SOURCE_CLASSIFY);
3128 static clib_error_t *
3129 set_ip_classify_command_fn (vlib_main_t * vm,
3130 unformat_input_t * input,
3131 vlib_cli_command_t * cmd)
3133 u32 table_index = ~0;
3134 int table_index_set = 0;
3135 u32 sw_if_index = ~0;
3138 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3139 if (unformat (input, "table-index %d", &table_index))
3140 table_index_set = 1;
3141 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3142 vnet_get_main(), &sw_if_index))
3148 if (table_index_set == 0)
3149 return clib_error_return (0, "classify table-index must be specified");
3151 if (sw_if_index == ~0)
3152 return clib_error_return (0, "interface / subif must be specified");
3154 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3161 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3162 return clib_error_return (0, "No such interface");
3164 case VNET_API_ERROR_NO_SUCH_ENTRY:
3165 return clib_error_return (0, "No such classifier table");
3170 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3171 .path = "set ip classify",
3173 "set ip classify intfc <int> table-index <index>",
3174 .function = set_ip_classify_command_fn,