2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * ip/ip4_forward.c: IP v4 forwarding
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vnet/vnet.h>
41 #include <vnet/ip/ip.h>
42 #include <vnet/ethernet/ethernet.h> /* for ethernet_header_t */
43 #include <vnet/ethernet/arp_packet.h> /* for ethernet_arp_header_t */
44 #include <vnet/ppp/ppp.h>
45 #include <vnet/srp/srp.h> /* for srp_hw_interface_class */
46 #include <vnet/api_errno.h> /* for API error numbers */
47 #include <vnet/fib/fib_table.h> /* for FIB table and entry creation */
48 #include <vnet/fib/fib_entry.h> /* for FIB table and entry creation */
49 #include <vnet/fib/fib_urpf_list.h> /* for FIB uRPF check */
50 #include <vnet/fib/ip4_fib.h>
51 #include <vnet/dpo/load_balance.h>
52 #include <vnet/dpo/classify_dpo.h>
55 ip4_forward_next_trace (vlib_main_t * vm,
56 vlib_node_runtime_t * node,
58 vlib_rx_or_tx_t which_adj_index);
61 ip4_lookup_inline (vlib_main_t * vm,
62 vlib_node_runtime_t * node,
64 int lookup_for_responses_to_locally_received_packets)
66 ip4_main_t * im = &ip4_main;
67 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
68 u32 n_left_from, n_left_to_next, * from, * to_next;
69 ip_lookup_next_t next;
70 u32 cpu_index = os_get_cpu_number();
72 from = vlib_frame_vector_args (frame);
73 n_left_from = frame->n_vectors;
74 next = node->cached_next_index;
76 while (n_left_from > 0)
78 vlib_get_next_frame (vm, node, next,
79 to_next, n_left_to_next);
81 while (n_left_from >= 4 && n_left_to_next >= 2)
83 vlib_buffer_t * p0, * p1;
84 ip4_header_t * ip0, * ip1;
85 __attribute__((unused)) tcp_header_t * tcp0, * tcp1;
86 ip_lookup_next_t next0, next1;
87 const load_balance_t * lb0, * lb1;
88 ip4_fib_mtrie_t * mtrie0, * mtrie1;
89 ip4_fib_mtrie_leaf_t leaf0, leaf1;
90 ip4_address_t * dst_addr0, *dst_addr1;
91 __attribute__((unused)) u32 pi0, fib_index0, lb_index0, is_tcp_udp0;
92 __attribute__((unused)) u32 pi1, fib_index1, lb_index1, is_tcp_udp1;
93 flow_hash_config_t flow_hash_config0, flow_hash_config1;
96 const dpo_id_t *dpo0, *dpo1;
98 /* Prefetch next iteration. */
100 vlib_buffer_t * p2, * p3;
102 p2 = vlib_get_buffer (vm, from[2]);
103 p3 = vlib_get_buffer (vm, from[3]);
105 vlib_prefetch_buffer_header (p2, LOAD);
106 vlib_prefetch_buffer_header (p3, LOAD);
108 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
109 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
112 pi0 = to_next[0] = from[0];
113 pi1 = to_next[1] = from[1];
115 p0 = vlib_get_buffer (vm, pi0);
116 p1 = vlib_get_buffer (vm, pi1);
118 ip0 = vlib_buffer_get_current (p0);
119 ip1 = vlib_buffer_get_current (p1);
121 dst_addr0 = &ip0->dst_address;
122 dst_addr1 = &ip1->dst_address;
124 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
125 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
126 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
127 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
128 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
129 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
132 if (! lookup_for_responses_to_locally_received_packets)
134 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
135 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
137 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
139 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
140 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 0);
143 tcp0 = (void *) (ip0 + 1);
144 tcp1 = (void *) (ip1 + 1);
146 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
147 || ip0->protocol == IP_PROTOCOL_UDP);
148 is_tcp_udp1 = (ip1->protocol == IP_PROTOCOL_TCP
149 || ip1->protocol == IP_PROTOCOL_UDP);
151 if (! lookup_for_responses_to_locally_received_packets)
153 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
154 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 1);
157 if (! lookup_for_responses_to_locally_received_packets)
159 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
160 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
163 if (! lookup_for_responses_to_locally_received_packets)
165 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
166 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
169 if (lookup_for_responses_to_locally_received_packets)
171 lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
172 lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX];
176 /* Handle default route. */
177 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
178 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
180 lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
181 lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
184 lb0 = load_balance_get (lb_index0);
185 lb1 = load_balance_get (lb_index1);
187 /* Use flow hash to compute multipath adjacency. */
188 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
189 hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0;
190 if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
192 flow_hash_config0 = lb0->lb_hash_config;
193 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
194 ip4_compute_flow_hash (ip0, flow_hash_config0);
196 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
198 flow_hash_config1 = lb1->lb_hash_config;
199 hash_c1 = vnet_buffer (p1)->ip.flow_hash =
200 ip4_compute_flow_hash (ip1, flow_hash_config1);
203 ASSERT (lb0->lb_n_buckets > 0);
204 ASSERT (is_pow2 (lb0->lb_n_buckets));
205 ASSERT (lb1->lb_n_buckets > 0);
206 ASSERT (is_pow2 (lb1->lb_n_buckets));
208 dpo0 = load_balance_get_bucket_i(lb0,
210 (lb0->lb_n_buckets_minus_1)));
211 dpo1 = load_balance_get_bucket_i(lb1,
213 (lb0->lb_n_buckets_minus_1)));
215 next0 = dpo0->dpoi_next_node;
216 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
217 next1 = dpo1->dpoi_next_node;
218 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
220 vlib_increment_combined_counter
221 (cm, cpu_index, lb_index0, 1,
222 vlib_buffer_length_in_chain (vm, p0)
223 + sizeof(ethernet_header_t));
224 vlib_increment_combined_counter
225 (cm, cpu_index, lb_index1, 1,
226 vlib_buffer_length_in_chain (vm, p1)
227 + sizeof(ethernet_header_t));
234 wrong_next = (next0 != next) + 2*(next1 != next);
235 if (PREDICT_FALSE (wrong_next != 0))
244 vlib_set_next_frame_buffer (vm, node, next0, pi0);
251 vlib_set_next_frame_buffer (vm, node, next1, pi1);
258 vlib_set_next_frame_buffer (vm, node, next0, pi0);
259 vlib_set_next_frame_buffer (vm, node, next1, pi1);
263 vlib_put_next_frame (vm, node, next, n_left_to_next);
265 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
271 while (n_left_from > 0 && n_left_to_next > 0)
275 __attribute__((unused)) tcp_header_t * tcp0;
276 ip_lookup_next_t next0;
277 const load_balance_t *lb0;
278 ip4_fib_mtrie_t * mtrie0;
279 ip4_fib_mtrie_leaf_t leaf0;
280 ip4_address_t * dst_addr0;
281 __attribute__((unused)) u32 pi0, fib_index0, is_tcp_udp0, lbi0;
282 flow_hash_config_t flow_hash_config0;
283 const dpo_id_t *dpo0;
289 p0 = vlib_get_buffer (vm, pi0);
291 ip0 = vlib_buffer_get_current (p0);
293 dst_addr0 = &ip0->dst_address;
295 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
296 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
297 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
299 if (! lookup_for_responses_to_locally_received_packets)
301 mtrie0 = &ip4_fib_get( fib_index0)->mtrie;
303 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
305 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 0);
308 tcp0 = (void *) (ip0 + 1);
310 is_tcp_udp0 = (ip0->protocol == IP_PROTOCOL_TCP
311 || ip0->protocol == IP_PROTOCOL_UDP);
313 if (! lookup_for_responses_to_locally_received_packets)
314 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 1);
316 if (! lookup_for_responses_to_locally_received_packets)
317 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
319 if (! lookup_for_responses_to_locally_received_packets)
320 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
322 if (lookup_for_responses_to_locally_received_packets)
323 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX];
326 /* Handle default route. */
327 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
328 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
331 lb0 = load_balance_get (lbi0);
333 /* Use flow hash to compute multipath adjacency. */
334 hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0;
335 if (PREDICT_FALSE(lb0->lb_n_buckets > 1))
337 flow_hash_config0 = lb0->lb_hash_config;
339 hash_c0 = vnet_buffer (p0)->ip.flow_hash =
340 ip4_compute_flow_hash (ip0, flow_hash_config0);
343 ASSERT (lb0->lb_n_buckets > 0);
344 ASSERT (is_pow2 (lb0->lb_n_buckets));
346 dpo0 = load_balance_get_bucket_i(lb0,
348 (lb0->lb_n_buckets_minus_1)));
350 next0 = dpo0->dpoi_next_node;
351 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
353 vlib_increment_combined_counter
354 (cm, cpu_index, lbi0, 1,
355 vlib_buffer_length_in_chain (vm, p0));
362 if (PREDICT_FALSE (next0 != next))
365 vlib_put_next_frame (vm, node, next, n_left_to_next);
367 vlib_get_next_frame (vm, node, next,
368 to_next, n_left_to_next);
375 vlib_put_next_frame (vm, node, next, n_left_to_next);
378 if (node->flags & VLIB_NODE_FLAG_TRACE)
379 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
381 return frame->n_vectors;
384 /** @brief IPv4 lookup node.
387 This is the main IPv4 lookup dispatch node.
389 @param vm vlib_main_t corresponding to the current thread
390 @param node vlib_node_runtime_t
391 @param frame vlib_frame_t whose contents should be dispatched
393 @par Graph mechanics: buffer metadata, next index usage
396 - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
397 - Indicates the @c sw_if_index value of the interface that the
398 packet was received on.
399 - <code>vnet_buffer(b)->sw_if_index[VLIB_TX]</code>
400 - When the value is @c ~0 then the node performs a longest prefix
401 match (LPM) for the packet destination address in the FIB attached
402 to the receive interface.
403 - Otherwise perform LPM for the packet destination address in the
404 indicated FIB. In this case <code>[VLIB_TX]</code> is a FIB index
405 value (0, 1, ...) and not a VRF id.
408 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
409 - The lookup result adjacency index.
412 - Dispatches the packet to the node index found in
413 ip_adjacency_t @c adj->lookup_next_index
414 (where @c adj is the lookup result adjacency).
417 ip4_lookup (vlib_main_t * vm,
418 vlib_node_runtime_t * node,
419 vlib_frame_t * frame)
421 return ip4_lookup_inline (vm, node, frame,
422 /* lookup_for_responses_to_locally_received_packets */ 0);
426 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args);
428 VLIB_REGISTER_NODE (ip4_lookup_node) = {
429 .function = ip4_lookup,
430 .name = "ip4-lookup",
431 .vector_size = sizeof (u32),
433 .format_trace = format_ip4_lookup_trace,
434 .n_next_nodes = IP_LOOKUP_N_NEXT,
435 .next_nodes = IP4_LOOKUP_NEXT_NODES,
438 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup)
441 ip4_load_balance (vlib_main_t * vm,
442 vlib_node_runtime_t * node,
443 vlib_frame_t * frame)
445 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_via_counters;
446 u32 n_left_from, n_left_to_next, * from, * to_next;
447 ip_lookup_next_t next;
448 u32 cpu_index = os_get_cpu_number();
450 from = vlib_frame_vector_args (frame);
451 n_left_from = frame->n_vectors;
452 next = node->cached_next_index;
454 if (node->flags & VLIB_NODE_FLAG_TRACE)
455 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
457 while (n_left_from > 0)
459 vlib_get_next_frame (vm, node, next,
460 to_next, n_left_to_next);
463 while (n_left_from > 0 && n_left_to_next > 0)
465 ip_lookup_next_t next0;
466 const load_balance_t *lb0;
469 const ip4_header_t *ip0;
470 const dpo_id_t *dpo0;
475 p0 = vlib_get_buffer (vm, pi0);
477 ip0 = vlib_buffer_get_current (p0);
478 lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
480 lb0 = load_balance_get(lbi0);
481 hc0 = lb0->lb_hash_config;
482 vnet_buffer(p0)->ip.flow_hash = ip4_compute_flow_hash(ip0, hc0);
484 dpo0 = load_balance_get_bucket_i(lb0,
485 vnet_buffer(p0)->ip.flow_hash &
486 (lb0->lb_n_buckets_minus_1));
488 next0 = dpo0->dpoi_next_node;
489 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
491 vlib_increment_combined_counter
492 (cm, cpu_index, lbi0, 1,
493 vlib_buffer_length_in_chain (vm, p0));
500 if (PREDICT_FALSE (next0 != next))
503 vlib_put_next_frame (vm, node, next, n_left_to_next);
505 vlib_get_next_frame (vm, node, next,
506 to_next, n_left_to_next);
513 vlib_put_next_frame (vm, node, next, n_left_to_next);
516 return frame->n_vectors;
519 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args);
521 VLIB_REGISTER_NODE (ip4_load_balance_node) = {
522 .function = ip4_load_balance,
523 .name = "ip4-load-balance",
524 .vector_size = sizeof (u32),
525 .sibling_of = "ip4-lookup",
527 .format_trace = format_ip4_forward_next_trace,
530 VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance)
532 /* get first interface address */
534 ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
535 ip_interface_address_t ** result_ia)
537 ip_lookup_main_t * lm = &im->lookup_main;
538 ip_interface_address_t * ia = 0;
539 ip4_address_t * result = 0;
541 foreach_ip_interface_address (lm, ia, sw_if_index,
542 1 /* honor unnumbered */,
544 ip4_address_t * a = ip_interface_address_get_address (lm, ia);
549 *result_ia = result ? ia : 0;
554 ip4_add_interface_routes (u32 sw_if_index,
555 ip4_main_t * im, u32 fib_index,
556 ip_interface_address_t * a)
558 ip_lookup_main_t * lm = &im->lookup_main;
559 ip4_address_t * address = ip_interface_address_get_address (lm, a);
561 .fp_len = a->address_length,
562 .fp_proto = FIB_PROTOCOL_IP4,
563 .fp_addr.ip4 = *address,
566 a->neighbor_probe_adj_index = ~0;
570 fib_node_index_t fei;
572 fei = fib_table_entry_update_one_path(fib_index,
574 FIB_SOURCE_INTERFACE,
575 (FIB_ENTRY_FLAG_CONNECTED |
576 FIB_ENTRY_FLAG_ATTACHED),
578 NULL, /* No next-hop address */
580 ~0, // invalid FIB index
583 FIB_ROUTE_PATH_FLAG_NONE);
584 a->neighbor_probe_adj_index = fib_entry_get_adj(fei);
589 if (sw_if_index < vec_len (lm->classify_table_index_by_sw_if_index))
591 u32 classify_table_index =
592 lm->classify_table_index_by_sw_if_index [sw_if_index];
593 if (classify_table_index != (u32) ~0)
595 dpo_id_t dpo = DPO_NULL;
600 classify_dpo_create(FIB_PROTOCOL_IP4,
601 classify_table_index));
603 fib_table_entry_special_dpo_add(fib_index,
612 fib_table_entry_update_one_path(fib_index,
614 FIB_SOURCE_INTERFACE,
615 (FIB_ENTRY_FLAG_CONNECTED |
616 FIB_ENTRY_FLAG_LOCAL),
620 ~0, // invalid FIB index
623 FIB_ROUTE_PATH_FLAG_NONE);
627 ip4_del_interface_routes (ip4_main_t * im,
629 ip4_address_t * address,
633 .fp_len = address_length,
634 .fp_proto = FIB_PROTOCOL_IP4,
635 .fp_addr.ip4 = *address,
640 fib_table_entry_delete(fib_index,
642 FIB_SOURCE_INTERFACE);
646 fib_table_entry_delete(fib_index,
648 FIB_SOURCE_INTERFACE);
652 ip4_sw_interface_enable_disable (u32 sw_if_index,
655 vlib_main_t * vm = vlib_get_main();
656 ip4_main_t * im = &ip4_main;
657 ip_lookup_main_t * lm = &im->lookup_main;
659 u32 lookup_feature_index;
661 vec_validate_init_empty (im->ip_enabled_by_sw_if_index, sw_if_index, 0);
664 * enable/disable only on the 1<->0 transition
668 if (1 != ++im->ip_enabled_by_sw_if_index[sw_if_index])
673 ASSERT(im->ip_enabled_by_sw_if_index[sw_if_index] > 0);
674 if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index])
678 for (cast = 0; cast <= VNET_IP_RX_MULTICAST_FEAT; cast++)
680 ip_config_main_t * cm = &lm->feature_config_mains[cast];
681 vnet_config_main_t * vcm = &cm->config_main;
683 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
684 ci = cm->config_index_by_sw_if_index[sw_if_index];
686 if (cast == VNET_IP_RX_UNICAST_FEAT)
687 lookup_feature_index = im->ip4_unicast_rx_feature_lookup;
689 lookup_feature_index = im->ip4_multicast_rx_feature_lookup;
692 ci = vnet_config_add_feature (vm, vcm,
694 lookup_feature_index,
696 /* # bytes of config data */ 0);
698 ci = vnet_config_del_feature (vm, vcm,
700 lookup_feature_index,
702 /* # bytes of config data */ 0);
703 cm->config_index_by_sw_if_index[sw_if_index] = ci;
707 static clib_error_t *
708 ip4_add_del_interface_address_internal (vlib_main_t * vm,
710 ip4_address_t * address,
714 vnet_main_t * vnm = vnet_get_main();
715 ip4_main_t * im = &ip4_main;
716 ip_lookup_main_t * lm = &im->lookup_main;
717 clib_error_t * error = 0;
718 u32 if_address_index, elts_before;
719 ip4_address_fib_t ip4_af, * addr_fib = 0;
721 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
722 ip4_addr_fib_init (&ip4_af, address,
723 vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
724 vec_add1 (addr_fib, ip4_af);
727 * there is no support for adj-fib handling in the presence of overlapping
728 * subnets on interfaces. Easy fix - disallow overlapping subnets, like
733 /* When adding an address check that it does not conflict
734 with an existing address. */
735 ip_interface_address_t * ia;
736 foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
737 0 /* honor unnumbered */,
739 ip4_address_t * x = ip_interface_address_get_address (&im->lookup_main, ia);
741 if (ip4_destination_matches_route (im, address, x, ia->address_length)
742 || ip4_destination_matches_route (im, x, address, address_length))
743 return clib_error_create ("failed to add %U which conflicts with %U for interface %U",
744 format_ip4_address_and_length, address, address_length,
745 format_ip4_address_and_length, x, ia->address_length,
746 format_vnet_sw_if_index_name, vnm, sw_if_index);
750 elts_before = pool_elts (lm->if_address_pool);
752 error = ip_interface_address_add_del
762 ip4_sw_interface_enable_disable(sw_if_index, !is_del);
765 ip4_del_interface_routes (im, ip4_af.fib_index, address,
768 ip4_add_interface_routes (sw_if_index,
769 im, ip4_af.fib_index,
771 (lm->if_address_pool, if_address_index));
773 /* If pool did not grow/shrink: add duplicate address. */
774 if (elts_before != pool_elts (lm->if_address_pool))
776 ip4_add_del_interface_address_callback_t * cb;
777 vec_foreach (cb, im->add_del_interface_address_callbacks)
778 cb->function (im, cb->function_opaque, sw_if_index,
779 address, address_length,
790 ip4_add_del_interface_address (vlib_main_t * vm, u32 sw_if_index,
791 ip4_address_t * address, u32 address_length,
794 return ip4_add_del_interface_address_internal
795 (vm, sw_if_index, address, address_length,
799 /* Built-in ip4 unicast rx feature path definition */
800 VNET_IP4_UNICAST_FEATURE_INIT (ip4_inacl, static) = {
801 .node_name = "ip4-inacl",
802 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-rx", 0},
803 .feature_index = &ip4_main.ip4_unicast_rx_feature_check_access,
806 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_1, static) = {
807 .node_name = "ip4-source-check-via-rx",
808 .runs_before = ORDER_CONSTRAINTS {"ip4-source-check-via-any", 0},
810 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_rx,
813 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_check_2, static) = {
814 .node_name = "ip4-source-check-via-any",
815 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
817 &ip4_main.ip4_unicast_rx_feature_source_reachable_via_any,
820 VNET_IP4_UNICAST_FEATURE_INIT (ip4_source_and_port_range_check_rx, static) = {
821 .node_name = "ip4-source-and-port-range-check-rx",
822 .runs_before = ORDER_CONSTRAINTS {"ip4-policer-classify", 0},
824 &ip4_main.ip4_unicast_rx_feature_source_and_port_range_check,
827 VNET_IP4_UNICAST_FEATURE_INIT (ip4_policer_classify, static) = {
828 .node_name = "ip4-policer-classify",
829 .runs_before = ORDER_CONSTRAINTS {"ipsec-input-ip4", 0},
831 &ip4_main.ip4_unicast_rx_feature_policer_classify,
834 VNET_IP4_UNICAST_FEATURE_INIT (ip4_ipsec, static) = {
835 .node_name = "ipsec-input-ip4",
836 .runs_before = ORDER_CONSTRAINTS {"vpath-input-ip4", 0},
837 .feature_index = &ip4_main.ip4_unicast_rx_feature_ipsec,
840 VNET_IP4_UNICAST_FEATURE_INIT (ip4_vpath, static) = {
841 .node_name = "vpath-input-ip4",
842 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup", 0},
843 .feature_index = &ip4_main.ip4_unicast_rx_feature_vpath,
846 VNET_IP4_UNICAST_FEATURE_INIT (ip4_lookup, static) = {
847 .node_name = "ip4-lookup",
848 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
849 .feature_index = &ip4_main.ip4_unicast_rx_feature_lookup,
852 VNET_IP4_UNICAST_FEATURE_INIT (ip4_drop, static) = {
853 .node_name = "ip4-drop",
854 .runs_before = 0, /* not before any other features */
855 .feature_index = &ip4_main.ip4_unicast_rx_feature_drop,
859 /* Built-in ip4 multicast rx feature path definition */
860 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_vpath_mc, static) = {
861 .node_name = "vpath-input-ip4",
862 .runs_before = ORDER_CONSTRAINTS {"ip4-lookup-multicast", 0},
863 .feature_index = &ip4_main.ip4_multicast_rx_feature_vpath,
866 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_lookup_mc, static) = {
867 .node_name = "ip4-lookup-multicast",
868 .runs_before = ORDER_CONSTRAINTS {"ip4-drop", 0},
869 .feature_index = &ip4_main.ip4_multicast_rx_feature_lookup,
872 VNET_IP4_MULTICAST_FEATURE_INIT (ip4_mc_drop, static) = {
873 .node_name = "ip4-drop",
874 .runs_before = 0, /* last feature */
875 .feature_index = &ip4_main.ip4_multicast_rx_feature_drop,
878 static char * rx_feature_start_nodes[] =
879 { "ip4-input", "ip4-input-no-checksum"};
881 static char * tx_feature_start_nodes[] =
883 "ip4-rewrite-transit",
887 /* Source and port-range check ip4 tx feature path definition */
888 VNET_IP4_TX_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = {
889 .node_name = "ip4-source-and-port-range-check-tx",
890 .runs_before = ORDER_CONSTRAINTS {"interface-output", 0},
892 &ip4_main.ip4_unicast_tx_feature_source_and_port_range_check,
896 /* Built-in ip4 tx feature path definition */
897 VNET_IP4_TX_FEATURE_INIT (interface_output, static) = {
898 .node_name = "interface-output",
899 .runs_before = 0, /* not before any other features */
900 .feature_index = &ip4_main.ip4_tx_feature_interface_output,
903 static clib_error_t *
904 ip4_feature_init (vlib_main_t * vm, ip4_main_t * im)
906 ip_lookup_main_t * lm = &im->lookup_main;
907 clib_error_t * error;
909 ip_config_main_t * cm;
910 vnet_config_main_t * vcm;
911 char **feature_start_nodes;
912 int feature_start_len;
914 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
916 cm = &lm->feature_config_mains[cast];
917 vcm = &cm->config_main;
919 if (cast < VNET_IP_TX_FEAT)
921 feature_start_nodes = rx_feature_start_nodes;
922 feature_start_len = ARRAY_LEN(rx_feature_start_nodes);
926 feature_start_nodes = tx_feature_start_nodes;
927 feature_start_len = ARRAY_LEN(tx_feature_start_nodes);
930 if ((error = ip_feature_init_cast (vm, cm, vcm,
933 im->next_feature[cast],
934 &im->feature_nodes[cast])))
941 static clib_error_t *
942 ip4_sw_interface_add_del (vnet_main_t * vnm,
946 vlib_main_t * vm = vnm->vlib_main;
947 ip4_main_t * im = &ip4_main;
948 ip_lookup_main_t * lm = &im->lookup_main;
952 /* Fill in lookup tables with default table (0). */
953 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
955 for (cast = 0; cast < VNET_N_IP_FEAT; cast++)
957 ip_config_main_t * cm = &lm->feature_config_mains[cast];
958 vnet_config_main_t * vcm = &cm->config_main;
960 vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
961 ci = cm->config_index_by_sw_if_index[sw_if_index];
963 if (cast == VNET_IP_RX_UNICAST_FEAT)
964 feature_index = im->ip4_unicast_rx_feature_drop;
965 else if (cast == VNET_IP_RX_MULTICAST_FEAT)
966 feature_index = im->ip4_multicast_rx_feature_drop;
968 feature_index = im->ip4_tx_feature_interface_output;
971 ci = vnet_config_add_feature (vm, vcm,
975 /* # bytes of config data */ 0);
978 ci = vnet_config_del_feature (vm, vcm, ci,
981 /* # bytes of config data */ 0);
982 if (vec_len(im->ip_enabled_by_sw_if_index) > sw_if_index)
983 im->ip_enabled_by_sw_if_index[sw_if_index] = 0;
985 cm->config_index_by_sw_if_index[sw_if_index] = ci;
987 * note: do not update the tx feature count here.
991 return /* no error */ 0;
994 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip4_sw_interface_add_del);
996 /* Global IP4 main. */
1000 ip4_lookup_init (vlib_main_t * vm)
1002 ip4_main_t * im = &ip4_main;
1003 clib_error_t * error;
1006 for (i = 0; i < ARRAY_LEN (im->fib_masks); i++)
1011 m = pow2_mask (i) << (32 - i);
1014 im->fib_masks[i] = clib_host_to_net_u32 (m);
1017 ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0);
1019 /* Create FIB with index 0 and table id of 0. */
1020 fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4, 0);
1024 pn = pg_get_node (ip4_lookup_node.index);
1025 pn->unformat_edit = unformat_pg_ip4_header;
1029 ethernet_arp_header_t h;
1031 memset (&h, 0, sizeof (h));
1033 /* Set target ethernet address to all zeros. */
1034 memset (h.ip4_over_ethernet[1].ethernet, 0, sizeof (h.ip4_over_ethernet[1].ethernet));
1036 #define _16(f,v) h.f = clib_host_to_net_u16 (v);
1037 #define _8(f,v) h.f = v;
1038 _16 (l2_type, ETHERNET_ARP_HARDWARE_TYPE_ethernet);
1039 _16 (l3_type, ETHERNET_TYPE_IP4);
1040 _8 (n_l2_address_bytes, 6);
1041 _8 (n_l3_address_bytes, 4);
1042 _16 (opcode, ETHERNET_ARP_OPCODE_request);
1046 vlib_packet_template_init (vm,
1047 &im->ip4_arp_request_packet_template,
1050 /* alloc chunk size */ 8,
1054 error = ip4_feature_init (vm, im);
1059 VLIB_INIT_FUNCTION (ip4_lookup_init);
1062 /* Adjacency taken. */
1067 /* Packet data, possibly *after* rewrite. */
1068 u8 packet_data[64 - 1*sizeof(u32)];
1069 } ip4_forward_next_trace_t;
1071 static u8 * format_ip4_forward_next_trace (u8 * s, va_list * args)
1073 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1074 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1075 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1076 uword indent = format_get_indent (s);
1077 s = format (s, "%U%U",
1078 format_white_space, indent,
1079 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1083 static u8 * format_ip4_lookup_trace (u8 * s, va_list * args)
1085 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1086 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1087 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1088 uword indent = format_get_indent (s);
1090 s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x",
1091 t->fib_index, t->dpo_index, t->flow_hash);
1092 s = format (s, "\n%U%U",
1093 format_white_space, indent,
1094 format_ip4_header, t->packet_data, sizeof (t->packet_data));
1098 static u8 * format_ip4_rewrite_trace (u8 * s, va_list * args)
1100 CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
1101 CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
1102 ip4_forward_next_trace_t * t = va_arg (*args, ip4_forward_next_trace_t *);
1103 vnet_main_t * vnm = vnet_get_main();
1104 uword indent = format_get_indent (s);
1106 s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x",
1107 t->fib_index, t->dpo_index, format_ip_adjacency,
1108 vnm, t->dpo_index, FORMAT_IP_ADJACENCY_NONE,
1110 s = format (s, "\n%U%U",
1111 format_white_space, indent,
1112 format_ip_adjacency_packet_data,
1114 t->packet_data, sizeof (t->packet_data));
1118 /* Common trace function for all ip4-forward next nodes. */
1120 ip4_forward_next_trace (vlib_main_t * vm,
1121 vlib_node_runtime_t * node,
1122 vlib_frame_t * frame,
1123 vlib_rx_or_tx_t which_adj_index)
1126 ip4_main_t * im = &ip4_main;
1128 n_left = frame->n_vectors;
1129 from = vlib_frame_vector_args (frame);
1134 vlib_buffer_t * b0, * b1;
1135 ip4_forward_next_trace_t * t0, * t1;
1137 /* Prefetch next iteration. */
1138 vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
1139 vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
1144 b0 = vlib_get_buffer (vm, bi0);
1145 b1 = vlib_get_buffer (vm, bi1);
1147 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1149 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1150 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1151 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1152 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1153 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1154 vec_elt (im->fib_index_by_sw_if_index,
1155 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1157 clib_memcpy (t0->packet_data,
1158 vlib_buffer_get_current (b0),
1159 sizeof (t0->packet_data));
1161 if (b1->flags & VLIB_BUFFER_IS_TRACED)
1163 t1 = vlib_add_trace (vm, node, b1, sizeof (t1[0]));
1164 t1->dpo_index = vnet_buffer (b1)->ip.adj_index[which_adj_index];
1165 t1->flow_hash = vnet_buffer (b1)->ip.flow_hash;
1166 t1->fib_index = (vnet_buffer(b1)->sw_if_index[VLIB_TX] != (u32)~0) ?
1167 vnet_buffer(b1)->sw_if_index[VLIB_TX] :
1168 vec_elt (im->fib_index_by_sw_if_index,
1169 vnet_buffer(b1)->sw_if_index[VLIB_RX]);
1170 clib_memcpy (t1->packet_data,
1171 vlib_buffer_get_current (b1),
1172 sizeof (t1->packet_data));
1182 ip4_forward_next_trace_t * t0;
1186 b0 = vlib_get_buffer (vm, bi0);
1188 if (b0->flags & VLIB_BUFFER_IS_TRACED)
1190 t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
1191 t0->dpo_index = vnet_buffer (b0)->ip.adj_index[which_adj_index];
1192 t0->flow_hash = vnet_buffer (b0)->ip.flow_hash;
1193 t0->fib_index = (vnet_buffer(b0)->sw_if_index[VLIB_TX] != (u32)~0) ?
1194 vnet_buffer(b0)->sw_if_index[VLIB_TX] :
1195 vec_elt (im->fib_index_by_sw_if_index,
1196 vnet_buffer(b0)->sw_if_index[VLIB_RX]);
1197 clib_memcpy (t0->packet_data,
1198 vlib_buffer_get_current (b0),
1199 sizeof (t0->packet_data));
1207 ip4_drop_or_punt (vlib_main_t * vm,
1208 vlib_node_runtime_t * node,
1209 vlib_frame_t * frame,
1210 ip4_error_t error_code)
1212 u32 * buffers = vlib_frame_vector_args (frame);
1213 uword n_packets = frame->n_vectors;
1215 vlib_error_drop_buffers (vm, node,
1220 ip4_input_node.index,
1223 if (node->flags & VLIB_NODE_FLAG_TRACE)
1224 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1230 ip4_drop (vlib_main_t * vm,
1231 vlib_node_runtime_t * node,
1232 vlib_frame_t * frame)
1233 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); }
1236 ip4_punt (vlib_main_t * vm,
1237 vlib_node_runtime_t * node,
1238 vlib_frame_t * frame)
1239 { return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); }
1241 VLIB_REGISTER_NODE (ip4_drop_node,static) = {
1242 .function = ip4_drop,
1244 .vector_size = sizeof (u32),
1246 .format_trace = format_ip4_forward_next_trace,
1254 VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop)
1256 VLIB_REGISTER_NODE (ip4_punt_node,static) = {
1257 .function = ip4_punt,
1259 .vector_size = sizeof (u32),
1261 .format_trace = format_ip4_forward_next_trace,
1269 VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt)
1271 /* Compute TCP/UDP/ICMP4 checksum in software. */
1273 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
1277 u32 ip_header_length, payload_length_host_byte_order;
1278 u32 n_this_buffer, n_bytes_left;
1280 void * data_this_buffer;
1282 /* Initialize checksum with ip header. */
1283 ip_header_length = ip4_header_bytes (ip0);
1284 payload_length_host_byte_order = clib_net_to_host_u16 (ip0->length) - ip_header_length;
1285 sum0 = clib_host_to_net_u32 (payload_length_host_byte_order + (ip0->protocol << 16));
1287 if (BITS (uword) == 32)
1289 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u32));
1290 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->dst_address, u32));
1293 sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
1295 n_bytes_left = n_this_buffer = payload_length_host_byte_order;
1296 data_this_buffer = (void *) ip0 + ip_header_length;
1297 if (n_this_buffer + ip_header_length > p0->current_length)
1298 n_this_buffer = p0->current_length > ip_header_length ? p0->current_length - ip_header_length : 0;
1301 sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
1302 n_bytes_left -= n_this_buffer;
1303 if (n_bytes_left == 0)
1306 ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
1307 p0 = vlib_get_buffer (vm, p0->next_buffer);
1308 data_this_buffer = vlib_buffer_get_current (p0);
1309 n_this_buffer = p0->current_length;
1312 sum16 = ~ ip_csum_fold (sum0);
1318 ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
1320 ip4_header_t * ip0 = vlib_buffer_get_current (p0);
1321 udp_header_t * udp0;
1324 ASSERT (ip0->protocol == IP_PROTOCOL_TCP
1325 || ip0->protocol == IP_PROTOCOL_UDP);
1327 udp0 = (void *) (ip0 + 1);
1328 if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0)
1330 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1331 | IP_BUFFER_L4_CHECKSUM_CORRECT);
1335 sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0);
1337 p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED
1338 | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT));
1344 ip4_local (vlib_main_t * vm,
1345 vlib_node_runtime_t * node,
1346 vlib_frame_t * frame)
1348 ip4_main_t * im = &ip4_main;
1349 ip_lookup_main_t * lm = &im->lookup_main;
1350 ip_local_next_t next_index;
1351 u32 * from, * to_next, n_left_from, n_left_to_next;
1352 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
1354 from = vlib_frame_vector_args (frame);
1355 n_left_from = frame->n_vectors;
1356 next_index = node->cached_next_index;
1358 if (node->flags & VLIB_NODE_FLAG_TRACE)
1359 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1361 while (n_left_from > 0)
1363 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1365 while (n_left_from >= 4 && n_left_to_next >= 2)
1367 vlib_buffer_t * p0, * p1;
1368 ip4_header_t * ip0, * ip1;
1369 udp_header_t * udp0, * udp1;
1370 ip4_fib_mtrie_t * mtrie0, * mtrie1;
1371 ip4_fib_mtrie_leaf_t leaf0, leaf1;
1372 const dpo_id_t *dpo0, *dpo1;
1373 const load_balance_t *lb0, *lb1;
1374 u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0;
1375 u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1;
1376 i32 len_diff0, len_diff1;
1377 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1378 u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1;
1381 pi0 = to_next[0] = from[0];
1382 pi1 = to_next[1] = from[1];
1386 n_left_to_next -= 2;
1388 p0 = vlib_get_buffer (vm, pi0);
1389 p1 = vlib_get_buffer (vm, pi1);
1391 ip0 = vlib_buffer_get_current (p0);
1392 ip1 = vlib_buffer_get_current (p1);
1394 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1395 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1396 fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
1397 vnet_buffer(p1)->sw_if_index[VLIB_RX]);
1399 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1400 mtrie1 = &ip4_fib_get (fib_index1)->mtrie;
1402 leaf0 = leaf1 = IP4_FIB_MTRIE_LEAF_ROOT;
1404 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1405 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 0);
1407 /* Treat IP frag packets as "experimental" protocol for now
1408 until support of IP frag reassembly is implemented */
1409 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1410 proto1 = ip4_is_fragment(ip1) ? 0xfe : ip1->protocol;
1411 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1412 is_udp1 = proto1 == IP_PROTOCOL_UDP;
1413 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1414 is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP;
1419 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1420 good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1422 udp0 = ip4_next_header (ip0);
1423 udp1 = ip4_next_header (ip1);
1425 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1426 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1427 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1429 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1430 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 1);
1432 /* Verify UDP length. */
1433 ip_len0 = clib_net_to_host_u16 (ip0->length);
1434 ip_len1 = clib_net_to_host_u16 (ip1->length);
1435 udp_len0 = clib_net_to_host_u16 (udp0->length);
1436 udp_len1 = clib_net_to_host_u16 (udp1->length);
1438 len_diff0 = ip_len0 - udp_len0;
1439 len_diff1 = ip_len1 - udp_len1;
1441 len_diff0 = is_udp0 ? len_diff0 : 0;
1442 len_diff1 = is_udp1 ? len_diff1 : 0;
1444 if (PREDICT_FALSE (! (is_tcp_udp0 & is_tcp_udp1
1445 & good_tcp_udp0 & good_tcp_udp1)))
1450 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1451 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1453 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1454 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1459 && ! (flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1460 flags1 = ip4_tcp_udp_validate_checksum (vm, p1);
1462 (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1463 good_tcp_udp1 |= is_udp1 && udp1->checksum == 0;
1467 good_tcp_udp0 &= len_diff0 >= 0;
1468 good_tcp_udp1 &= len_diff1 >= 0;
1470 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1471 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2);
1473 error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL;
1475 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1476 error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1;
1478 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1479 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1480 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1482 error1 = (is_tcp_udp1 && ! good_tcp_udp1
1483 ? IP4_ERROR_TCP_CHECKSUM + is_udp1
1486 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1487 leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3);
1488 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1489 leaf1 = (leaf1 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie1->default_leaf : leaf1);
1491 vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1492 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1494 vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
1495 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1;
1497 lb0 = load_balance_get(lbi0);
1498 lb1 = load_balance_get(lbi1);
1499 dpo0 = load_balance_get_bucket_i(lb0, 0);
1500 dpo1 = load_balance_get_bucket_i(lb1, 0);
1503 * Must have a route to source otherwise we drop the packet.
1504 * ip4 broadcasts are accepted, e.g. to make dhcp client work
1507 * - the source is a recieve => it's from us => bogus, do this
1508 * first since it sets a different error code.
1509 * - uRPF check for any route to source - accept if passes.
1510 * - allow packets destined to the broadcast address from unknown sources
1512 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1513 dpo0->dpoi_type == DPO_RECEIVE) ?
1514 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1516 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1517 !fib_urpf_check_size(lb0->lb_urpf) &&
1518 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1519 ? IP4_ERROR_SRC_LOOKUP_MISS
1521 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1522 dpo1->dpoi_type == DPO_RECEIVE) ?
1523 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1525 error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1526 !fib_urpf_check_size(lb1->lb_urpf) &&
1527 ip1->dst_address.as_u32 != 0xFFFFFFFF)
1528 ? IP4_ERROR_SRC_LOOKUP_MISS
1531 next0 = lm->local_next_by_ip_protocol[proto0];
1532 next1 = lm->local_next_by_ip_protocol[proto1];
1534 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1535 next1 = error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1;
1537 p0->error = error0 ? error_node->errors[error0] : 0;
1538 p1->error = error1 ? error_node->errors[error1] : 0;
1540 enqueue_code = (next0 != next_index) + 2*(next1 != next_index);
1542 if (PREDICT_FALSE (enqueue_code != 0))
1544 switch (enqueue_code)
1550 n_left_to_next += 1;
1551 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1557 n_left_to_next += 1;
1558 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1562 /* A B B or A B C */
1564 n_left_to_next += 2;
1565 vlib_set_next_frame_buffer (vm, node, next0, pi0);
1566 vlib_set_next_frame_buffer (vm, node, next1, pi1);
1569 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1571 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1578 while (n_left_from > 0 && n_left_to_next > 0)
1582 udp_header_t * udp0;
1583 ip4_fib_mtrie_t * mtrie0;
1584 ip4_fib_mtrie_leaf_t leaf0;
1585 u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0;
1587 u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0;
1588 load_balance_t *lb0;
1589 const dpo_id_t *dpo0;
1591 pi0 = to_next[0] = from[0];
1595 n_left_to_next -= 1;
1597 p0 = vlib_get_buffer (vm, pi0);
1599 ip0 = vlib_buffer_get_current (p0);
1601 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
1602 vnet_buffer(p0)->sw_if_index[VLIB_RX]);
1604 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
1606 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
1608 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 0);
1610 /* Treat IP frag packets as "experimental" protocol for now
1611 until support of IP frag reassembly is implemented */
1612 proto0 = ip4_is_fragment(ip0) ? 0xfe : ip0->protocol;
1613 is_udp0 = proto0 == IP_PROTOCOL_UDP;
1614 is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP;
1618 good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1620 udp0 = ip4_next_header (ip0);
1622 /* Don't verify UDP checksum for packets with explicit zero checksum. */
1623 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1625 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 1);
1627 /* Verify UDP length. */
1628 ip_len0 = clib_net_to_host_u16 (ip0->length);
1629 udp_len0 = clib_net_to_host_u16 (udp0->length);
1631 len_diff0 = ip_len0 - udp_len0;
1633 len_diff0 = is_udp0 ? len_diff0 : 0;
1635 if (PREDICT_FALSE (! (is_tcp_udp0 & good_tcp_udp0)))
1640 && ! (flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED))
1641 flags0 = ip4_tcp_udp_validate_checksum (vm, p0);
1643 (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0;
1644 good_tcp_udp0 |= is_udp0 && udp0->checksum == 0;
1648 good_tcp_udp0 &= len_diff0 >= 0;
1650 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
1652 error0 = IP4_ERROR_UNKNOWN_PROTOCOL;
1654 error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0;
1656 ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM);
1657 error0 = (is_tcp_udp0 && ! good_tcp_udp0
1658 ? IP4_ERROR_TCP_CHECKSUM + is_udp0
1661 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
1662 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
1664 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
1665 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0;
1667 lb0 = load_balance_get(lbi0);
1668 dpo0 = load_balance_get_bucket_i(lb0, 0);
1670 vnet_buffer (p0)->ip.adj_index[VLIB_TX] =
1671 vnet_buffer (p0)->ip.adj_index[VLIB_RX] =
1674 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1675 dpo0->dpoi_type == DPO_RECEIVE) ?
1676 IP4_ERROR_SPOOFED_LOCAL_PACKETS :
1678 error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL &&
1679 !fib_urpf_check_size(lb0->lb_urpf) &&
1680 ip0->dst_address.as_u32 != 0xFFFFFFFF)
1681 ? IP4_ERROR_SRC_LOOKUP_MISS
1684 next0 = lm->local_next_by_ip_protocol[proto0];
1686 next0 = error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0;
1688 p0->error = error0? error_node->errors[error0] : 0;
1690 if (PREDICT_FALSE (next0 != next_index))
1692 n_left_to_next += 1;
1693 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1696 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1699 n_left_to_next -= 1;
1703 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1706 return frame->n_vectors;
1709 VLIB_REGISTER_NODE (ip4_local_node,static) = {
1710 .function = ip4_local,
1711 .name = "ip4-local",
1712 .vector_size = sizeof (u32),
1714 .format_trace = format_ip4_forward_next_trace,
1716 .n_next_nodes = IP_LOCAL_N_NEXT,
1718 [IP_LOCAL_NEXT_DROP] = "error-drop",
1719 [IP_LOCAL_NEXT_PUNT] = "error-punt",
1720 [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
1721 [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
1725 VLIB_NODE_FUNCTION_MULTIARCH (ip4_local_node, ip4_local)
1727 void ip4_register_protocol (u32 protocol, u32 node_index)
1729 vlib_main_t * vm = vlib_get_main();
1730 ip4_main_t * im = &ip4_main;
1731 ip_lookup_main_t * lm = &im->lookup_main;
1733 ASSERT (protocol < ARRAY_LEN (lm->local_next_by_ip_protocol));
1734 lm->local_next_by_ip_protocol[protocol] = vlib_node_add_next (vm, ip4_local_node.index, node_index);
1737 static clib_error_t *
1738 show_ip_local_command_fn (vlib_main_t * vm,
1739 unformat_input_t * input,
1740 vlib_cli_command_t * cmd)
1742 ip4_main_t * im = &ip4_main;
1743 ip_lookup_main_t * lm = &im->lookup_main;
1746 vlib_cli_output (vm, "Protocols handled by ip4_local");
1747 for (i = 0; i < ARRAY_LEN(lm->local_next_by_ip_protocol); i++)
1749 if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT)
1750 vlib_cli_output (vm, "%d", i);
1757 VLIB_CLI_COMMAND (show_ip_local, static) = {
1758 .path = "show ip local",
1759 .function = show_ip_local_command_fn,
1760 .short_help = "Show ip local protocol table",
1764 ip4_arp_inline (vlib_main_t * vm,
1765 vlib_node_runtime_t * node,
1766 vlib_frame_t * frame,
1769 vnet_main_t * vnm = vnet_get_main();
1770 ip4_main_t * im = &ip4_main;
1771 ip_lookup_main_t * lm = &im->lookup_main;
1772 u32 * from, * to_next_drop;
1773 uword n_left_from, n_left_to_next_drop, next_index;
1774 static f64 time_last_seed_change = -1e100;
1775 static u32 hash_seeds[3];
1776 static uword hash_bitmap[256 / BITS (uword)];
1779 if (node->flags & VLIB_NODE_FLAG_TRACE)
1780 ip4_forward_next_trace (vm, node, frame, VLIB_TX);
1782 time_now = vlib_time_now (vm);
1783 if (time_now - time_last_seed_change > 1e-3)
1786 u32 * r = clib_random_buffer_get_data (&vm->random_buffer,
1787 sizeof (hash_seeds));
1788 for (i = 0; i < ARRAY_LEN (hash_seeds); i++)
1789 hash_seeds[i] = r[i];
1791 /* Mark all hash keys as been no-seen before. */
1792 for (i = 0; i < ARRAY_LEN (hash_bitmap); i++)
1795 time_last_seed_change = time_now;
1798 from = vlib_frame_vector_args (frame);
1799 n_left_from = frame->n_vectors;
1800 next_index = node->cached_next_index;
1801 if (next_index == IP4_ARP_NEXT_DROP)
1802 next_index = IP4_ARP_N_NEXT; /* point to first interface */
1804 while (n_left_from > 0)
1806 vlib_get_next_frame (vm, node, IP4_ARP_NEXT_DROP,
1807 to_next_drop, n_left_to_next_drop);
1809 while (n_left_from > 0 && n_left_to_next_drop > 0)
1811 u32 pi0, adj_index0, a0, b0, c0, m0, sw_if_index0, drop0;
1812 ip_adjacency_t * adj0;
1819 p0 = vlib_get_buffer (vm, pi0);
1821 adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
1822 adj0 = ip_get_adjacency (lm, adj_index0);
1823 ip0 = vlib_buffer_get_current (p0);
1826 * this is the Glean case, so we are ARPing for the
1827 * packet's destination
1833 sw_if_index0 = adj0->rewrite_header.sw_if_index;
1834 vnet_buffer (p0)->sw_if_index[VLIB_TX] = sw_if_index0;
1838 a0 ^= ip0->dst_address.data_u32;
1842 a0 ^= adj0->sub_type.nbr.next_hop.ip4.data_u32;
1846 hash_v3_finalize32 (a0, b0, c0);
1848 c0 &= BITS (hash_bitmap) - 1;
1849 c0 = c0 / BITS (uword);
1850 m0 = (uword) 1 << (c0 % BITS (uword));
1852 bm0 = hash_bitmap[c0];
1853 drop0 = (bm0 & m0) != 0;
1855 /* Mark it as seen. */
1856 hash_bitmap[c0] = bm0 | m0;
1860 to_next_drop[0] = pi0;
1862 n_left_to_next_drop -= 1;
1864 p0->error = node->errors[drop0 ? IP4_ARP_ERROR_DROP : IP4_ARP_ERROR_REQUEST_SENT];
1870 * Can happen if the control-plane is programming tables
1871 * with traffic flowing; at least that's today's lame excuse.
1873 if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN) ||
1874 (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
1876 p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
1879 /* Send ARP request. */
1883 ethernet_arp_header_t * h0;
1884 vnet_hw_interface_t * hw_if0;
1886 h0 = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi0);
1888 /* Add rewrite/encap string for ARP packet. */
1889 vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t));
1891 hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
1893 /* Src ethernet address in ARP header. */
1894 clib_memcpy (h0->ip4_over_ethernet[0].ethernet, hw_if0->hw_address,
1895 sizeof (h0->ip4_over_ethernet[0].ethernet));
1899 /* The interface's source address is stashed in the Glean Adj */
1900 h0->ip4_over_ethernet[0].ip4 = adj0->sub_type.glean.receive_addr.ip4;
1902 /* Copy in destination address we are requesting. This is the
1903 * glean case, so it's the packet's destination.*/
1904 h0->ip4_over_ethernet[1].ip4.data_u32 = ip0->dst_address.data_u32;
1908 /* Src IP address in ARP header. */
1909 if (ip4_src_address_for_packet(lm, sw_if_index0,
1910 &h0->ip4_over_ethernet[0].ip4))
1912 /* No source address available */
1913 p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
1914 vlib_buffer_free(vm, &bi0, 1);
1918 /* Copy in destination address we are requesting from the
1920 h0->ip4_over_ethernet[1].ip4.data_u32 =
1921 adj0->sub_type.nbr.next_hop.ip4.as_u32;
1924 vlib_buffer_copy_trace_flag (vm, p0, bi0);
1925 b0 = vlib_get_buffer (vm, bi0);
1926 vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
1928 vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
1930 vlib_set_next_frame_buffer (vm, node, adj0->rewrite_header.next_index, bi0);
1934 vlib_put_next_frame (vm, node, IP4_ARP_NEXT_DROP, n_left_to_next_drop);
1937 return frame->n_vectors;
1941 ip4_arp (vlib_main_t * vm,
1942 vlib_node_runtime_t * node,
1943 vlib_frame_t * frame)
1945 return (ip4_arp_inline(vm, node, frame, 0));
1949 ip4_glean (vlib_main_t * vm,
1950 vlib_node_runtime_t * node,
1951 vlib_frame_t * frame)
1953 return (ip4_arp_inline(vm, node, frame, 1));
1956 static char * ip4_arp_error_strings[] = {
1957 [IP4_ARP_ERROR_DROP] = "address overflow drops",
1958 [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
1959 [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
1960 [IP4_ARP_ERROR_REPLICATE_DROP] = "ARP replication completed",
1961 [IP4_ARP_ERROR_REPLICATE_FAIL] = "ARP replication failed",
1962 [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
1965 VLIB_REGISTER_NODE (ip4_arp_node) = {
1966 .function = ip4_arp,
1968 .vector_size = sizeof (u32),
1970 .format_trace = format_ip4_forward_next_trace,
1972 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1973 .error_strings = ip4_arp_error_strings,
1975 .n_next_nodes = IP4_ARP_N_NEXT,
1977 [IP4_ARP_NEXT_DROP] = "error-drop",
1981 VLIB_REGISTER_NODE (ip4_glean_node) = {
1982 .function = ip4_glean,
1983 .name = "ip4-glean",
1984 .vector_size = sizeof (u32),
1986 .format_trace = format_ip4_forward_next_trace,
1988 .n_errors = ARRAY_LEN (ip4_arp_error_strings),
1989 .error_strings = ip4_arp_error_strings,
1991 .n_next_nodes = IP4_ARP_N_NEXT,
1993 [IP4_ARP_NEXT_DROP] = "error-drop",
1997 #define foreach_notrace_ip4_arp_error \
2003 clib_error_t * arp_notrace_init (vlib_main_t * vm)
2005 vlib_node_runtime_t *rt =
2006 vlib_node_get_runtime (vm, ip4_arp_node.index);
2008 /* don't trace ARP request packets */
2010 vnet_pcap_drop_trace_filter_add_del \
2011 (rt->errors[IP4_ARP_ERROR_##a], \
2013 foreach_notrace_ip4_arp_error;
2018 VLIB_INIT_FUNCTION(arp_notrace_init);
2021 /* Send an ARP request to see if given destination is reachable on given interface. */
2023 ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index)
2025 vnet_main_t * vnm = vnet_get_main();
2026 ip4_main_t * im = &ip4_main;
2027 ethernet_arp_header_t * h;
2028 ip4_address_t * src;
2029 ip_interface_address_t * ia;
2030 ip_adjacency_t * adj;
2031 vnet_hw_interface_t * hi;
2032 vnet_sw_interface_t * si;
2036 si = vnet_get_sw_interface (vnm, sw_if_index);
2038 if (!(si->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
2040 return clib_error_return (0, "%U: interface %U down",
2041 format_ip4_address, dst,
2042 format_vnet_sw_if_index_name, vnm,
2046 src = ip4_interface_address_matching_destination (im, dst, sw_if_index, &ia);
2049 vnm->api_errno = VNET_API_ERROR_NO_MATCHING_INTERFACE;
2050 return clib_error_return
2051 (0, "no matching interface address for destination %U (interface %U)",
2052 format_ip4_address, dst,
2053 format_vnet_sw_if_index_name, vnm, sw_if_index);
2056 adj = ip_get_adjacency (&im->lookup_main, ia->neighbor_probe_adj_index);
2058 h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi);
2060 hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
2062 clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet));
2064 h->ip4_over_ethernet[0].ip4 = src[0];
2065 h->ip4_over_ethernet[1].ip4 = dst[0];
2067 b = vlib_get_buffer (vm, bi);
2068 vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index;
2070 /* Add encapsulation string for software interface (e.g. ethernet header). */
2071 vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t));
2072 vlib_buffer_advance (b, -adj->rewrite_header.data_bytes);
2075 vlib_frame_t * f = vlib_get_frame_to_node (vm, hi->output_node_index);
2076 u32 * to_next = vlib_frame_vector_args (f);
2079 vlib_put_frame_to_node (vm, hi->output_node_index, f);
2082 return /* no error */ 0;
2086 IP4_REWRITE_NEXT_DROP,
2087 IP4_REWRITE_NEXT_ARP,
2088 IP4_REWRITE_NEXT_ICMP_ERROR,
2089 } ip4_rewrite_next_t;
2092 ip4_rewrite_inline (vlib_main_t * vm,
2093 vlib_node_runtime_t * node,
2094 vlib_frame_t * frame,
2095 int rewrite_for_locally_received_packets,
2098 ip_lookup_main_t * lm = &ip4_main.lookup_main;
2099 u32 * from = vlib_frame_vector_args (frame);
2100 u32 n_left_from, n_left_to_next, * to_next, next_index;
2101 vlib_node_runtime_t * error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
2102 vlib_rx_or_tx_t adj_rx_tx = rewrite_for_locally_received_packets ? VLIB_RX : VLIB_TX;
2103 ip_config_main_t * cm = &lm->feature_config_mains[VNET_IP_TX_FEAT];
2105 n_left_from = frame->n_vectors;
2106 next_index = node->cached_next_index;
2107 u32 cpu_index = os_get_cpu_number();
2109 while (n_left_from > 0)
2111 vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2113 while (n_left_from >= 4 && n_left_to_next >= 2)
2115 ip_adjacency_t * adj0, * adj1;
2116 vlib_buffer_t * p0, * p1;
2117 ip4_header_t * ip0, * ip1;
2118 u32 pi0, rw_len0, next0, error0, checksum0, adj_index0;
2119 u32 pi1, rw_len1, next1, error1, checksum1, adj_index1;
2120 u32 next0_override, next1_override;
2121 u32 tx_sw_if_index0, tx_sw_if_index1;
2123 if (rewrite_for_locally_received_packets)
2124 next0_override = next1_override = 0;
2126 /* Prefetch next iteration. */
2128 vlib_buffer_t * p2, * p3;
2130 p2 = vlib_get_buffer (vm, from[2]);
2131 p3 = vlib_get_buffer (vm, from[3]);
2133 vlib_prefetch_buffer_header (p2, STORE);
2134 vlib_prefetch_buffer_header (p3, STORE);
2136 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE);
2137 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE);
2140 pi0 = to_next[0] = from[0];
2141 pi1 = to_next[1] = from[1];
2146 n_left_to_next -= 2;
2148 p0 = vlib_get_buffer (vm, pi0);
2149 p1 = vlib_get_buffer (vm, pi1);
2151 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2152 adj_index1 = vnet_buffer (p1)->ip.adj_index[adj_rx_tx];
2154 /* We should never rewrite a pkt using the MISS adjacency */
2155 ASSERT(adj_index0 && adj_index1);
2157 ip0 = vlib_buffer_get_current (p0);
2158 ip1 = vlib_buffer_get_current (p1);
2160 error0 = error1 = IP4_ERROR_NONE;
2161 next0 = next1 = IP4_REWRITE_NEXT_DROP;
2163 /* Decrement TTL & update checksum.
2164 Works either endian, so no need for byte swap. */
2165 if (! rewrite_for_locally_received_packets)
2167 i32 ttl0 = ip0->ttl, ttl1 = ip1->ttl;
2169 /* Input node should have reject packets with ttl 0. */
2170 ASSERT (ip0->ttl > 0);
2171 ASSERT (ip1->ttl > 0);
2173 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2174 checksum1 = ip1->checksum + clib_host_to_net_u16 (0x0100);
2176 checksum0 += checksum0 >= 0xffff;
2177 checksum1 += checksum1 >= 0xffff;
2179 ip0->checksum = checksum0;
2180 ip1->checksum = checksum1;
2189 * If the ttl drops below 1 when forwarding, generate
2192 if (PREDICT_FALSE(ttl0 <= 0))
2194 error0 = IP4_ERROR_TIME_EXPIRED;
2195 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2196 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2197 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2198 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2200 if (PREDICT_FALSE(ttl1 <= 0))
2202 error1 = IP4_ERROR_TIME_EXPIRED;
2203 vnet_buffer (p1)->sw_if_index[VLIB_TX] = (u32)~0;
2204 icmp4_error_set_vnet_buffer(p1, ICMP4_time_exceeded,
2205 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2206 next1 = IP4_REWRITE_NEXT_ICMP_ERROR;
2209 /* Verify checksum. */
2210 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2211 ASSERT (ip1->checksum == ip4_header_checksum (ip1));
2214 /* Rewrite packet header and updates lengths. */
2215 adj0 = ip_get_adjacency (lm, adj_index0);
2216 adj1 = ip_get_adjacency (lm, adj_index1);
2218 if (rewrite_for_locally_received_packets)
2220 if (PREDICT_FALSE(adj0->lookup_next_index
2221 == IP_LOOKUP_NEXT_ARP))
2222 next0_override = IP4_REWRITE_NEXT_ARP;
2223 if (PREDICT_FALSE(adj1->lookup_next_index
2224 == IP_LOOKUP_NEXT_ARP))
2225 next1_override = IP4_REWRITE_NEXT_ARP;
2228 /* Worth pipelining. No guarantee that adj0,1 are hot... */
2229 rw_len0 = adj0[0].rewrite_header.data_bytes;
2230 rw_len1 = adj1[0].rewrite_header.data_bytes;
2231 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2232 vnet_buffer(p1)->ip.save_rewrite_length = rw_len1;
2234 /* Check MTU of outgoing interface. */
2235 error0 = (vlib_buffer_length_in_chain (vm, p0) > adj0[0].rewrite_header.max_l3_packet_bytes
2236 ? IP4_ERROR_MTU_EXCEEDED
2238 error1 = (vlib_buffer_length_in_chain (vm, p1) > adj1[0].rewrite_header.max_l3_packet_bytes
2239 ? IP4_ERROR_MTU_EXCEEDED
2242 next0 = (error0 == IP4_ERROR_NONE)
2243 ? adj0[0].rewrite_header.next_index : next0;
2245 if (rewrite_for_locally_received_packets)
2246 next0 = next0 && next0_override ? next0_override : next0;
2248 next1 = (error1 == IP4_ERROR_NONE)
2249 ? adj1[0].rewrite_header.next_index : next1;
2251 if (rewrite_for_locally_received_packets)
2252 next1 = next1 && next1_override ? next1_override : next1;
2255 * We've already accounted for an ethernet_header_t elsewhere
2257 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2258 vlib_increment_combined_counter
2259 (&adjacency_counters,
2260 cpu_index, adj_index0,
2261 /* packet increment */ 0,
2262 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2264 if (PREDICT_FALSE (rw_len1 > sizeof(ethernet_header_t)))
2265 vlib_increment_combined_counter
2266 (&adjacency_counters,
2267 cpu_index, adj_index1,
2268 /* packet increment */ 0,
2269 /* byte increment */ rw_len1-sizeof(ethernet_header_t));
2271 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2272 * to see the IP headerr */
2273 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2275 p0->current_data -= rw_len0;
2276 p0->current_length += rw_len0;
2277 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2278 vnet_buffer (p0)->sw_if_index[VLIB_TX] =
2282 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2285 p0->current_config_index =
2286 vec_elt (cm->config_index_by_sw_if_index,
2288 vnet_get_config_data (&cm->config_main,
2289 &p0->current_config_index,
2291 /* # bytes of config data */ 0);
2294 if (PREDICT_TRUE(error1 == IP4_ERROR_NONE))
2296 p1->current_data -= rw_len1;
2297 p1->current_length += rw_len1;
2299 tx_sw_if_index1 = adj1[0].rewrite_header.sw_if_index;
2300 vnet_buffer (p1)->sw_if_index[VLIB_TX] =
2304 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2307 p1->current_config_index =
2308 vec_elt (cm->config_index_by_sw_if_index,
2310 vnet_get_config_data (&cm->config_main,
2311 &p1->current_config_index,
2313 /* # bytes of config data */ 0);
2317 /* Guess we are only writing on simple Ethernet header. */
2318 vnet_rewrite_two_headers (adj0[0], adj1[0],
2320 sizeof (ethernet_header_t));
2324 adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2325 adj1->sub_type.midchain.fixup_func(vm, adj1, p1);
2328 vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
2329 to_next, n_left_to_next,
2330 pi0, pi1, next0, next1);
2333 while (n_left_from > 0 && n_left_to_next > 0)
2335 ip_adjacency_t * adj0;
2338 u32 pi0, rw_len0, adj_index0, next0, error0, checksum0;
2340 u32 tx_sw_if_index0;
2342 if (rewrite_for_locally_received_packets)
2345 pi0 = to_next[0] = from[0];
2347 p0 = vlib_get_buffer (vm, pi0);
2349 adj_index0 = vnet_buffer (p0)->ip.adj_index[adj_rx_tx];
2351 /* We should never rewrite a pkt using the MISS adjacency */
2354 adj0 = ip_get_adjacency (lm, adj_index0);
2356 ip0 = vlib_buffer_get_current (p0);
2358 error0 = IP4_ERROR_NONE;
2359 next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */
2361 /* Decrement TTL & update checksum. */
2362 if (! rewrite_for_locally_received_packets)
2364 i32 ttl0 = ip0->ttl;
2366 checksum0 = ip0->checksum + clib_host_to_net_u16 (0x0100);
2368 checksum0 += checksum0 >= 0xffff;
2370 ip0->checksum = checksum0;
2372 ASSERT (ip0->ttl > 0);
2378 ASSERT (ip0->checksum == ip4_header_checksum (ip0));
2380 if (PREDICT_FALSE(ttl0 <= 0))
2383 * If the ttl drops below 1 when forwarding, generate
2386 error0 = IP4_ERROR_TIME_EXPIRED;
2387 next0 = IP4_REWRITE_NEXT_ICMP_ERROR;
2388 vnet_buffer (p0)->sw_if_index[VLIB_TX] = (u32)~0;
2389 icmp4_error_set_vnet_buffer(p0, ICMP4_time_exceeded,
2390 ICMP4_time_exceeded_ttl_exceeded_in_transit, 0);
2394 if (rewrite_for_locally_received_packets)
2397 * We have to override the next_index in ARP adjacencies,
2398 * because they're set up for ip4-arp, not this node...
2400 if (PREDICT_FALSE(adj0->lookup_next_index
2401 == IP_LOOKUP_NEXT_ARP))
2402 next0_override = IP4_REWRITE_NEXT_ARP;
2405 /* Guess we are only writing on simple Ethernet header. */
2406 vnet_rewrite_one_header (adj0[0], ip0,
2407 sizeof (ethernet_header_t));
2409 /* Update packet buffer attributes/set output interface. */
2410 rw_len0 = adj0[0].rewrite_header.data_bytes;
2411 vnet_buffer(p0)->ip.save_rewrite_length = rw_len0;
2413 if (PREDICT_FALSE (rw_len0 > sizeof(ethernet_header_t)))
2414 vlib_increment_combined_counter
2415 (&adjacency_counters,
2416 cpu_index, adj_index0,
2417 /* packet increment */ 0,
2418 /* byte increment */ rw_len0-sizeof(ethernet_header_t));
2420 /* Check MTU of outgoing interface. */
2421 error0 = (vlib_buffer_length_in_chain (vm, p0)
2422 > adj0[0].rewrite_header.max_l3_packet_bytes
2423 ? IP4_ERROR_MTU_EXCEEDED
2426 p0->error = error_node->errors[error0];
2428 /* Don't adjust the buffer for ttl issue; icmp-error node wants
2429 * to see the IP headerr */
2430 if (PREDICT_TRUE(error0 == IP4_ERROR_NONE))
2432 p0->current_data -= rw_len0;
2433 p0->current_length += rw_len0;
2434 tx_sw_if_index0 = adj0[0].rewrite_header.sw_if_index;
2436 vnet_buffer (p0)->sw_if_index[VLIB_TX] = tx_sw_if_index0;
2437 next0 = adj0[0].rewrite_header.next_index;
2441 adj0->sub_type.midchain.fixup_func(vm, adj0, p0);
2445 (clib_bitmap_get (lm->tx_sw_if_has_ip_output_features,
2448 p0->current_config_index =
2449 vec_elt (cm->config_index_by_sw_if_index,
2451 vnet_get_config_data (&cm->config_main,
2452 &p0->current_config_index,
2454 /* # bytes of config data */ 0);
2458 if (rewrite_for_locally_received_packets)
2459 next0 = next0 && next0_override ? next0_override : next0;
2464 n_left_to_next -= 1;
2466 vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
2467 to_next, n_left_to_next,
2471 vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2474 /* Need to do trace after rewrites to pick up new packet data. */
2475 if (node->flags & VLIB_NODE_FLAG_TRACE)
2476 ip4_forward_next_trace (vm, node, frame, adj_rx_tx);
2478 return frame->n_vectors;
2482 /** @brief IPv4 transit rewrite node.
2483 @node ip4-rewrite-transit
2485 This is the IPv4 transit-rewrite node: decrement TTL, fix the ipv4
2486 header checksum, fetch the ip adjacency, check the outbound mtu,
2487 apply the adjacency rewrite, and send pkts to the adjacency
2488 rewrite header's rewrite_next_index.
2490 @param vm vlib_main_t corresponding to the current thread
2491 @param node vlib_node_runtime_t
2492 @param frame vlib_frame_t whose contents should be dispatched
2494 @par Graph mechanics: buffer metadata, next index usage
2497 - <code>vnet_buffer(b)->ip.adj_index[VLIB_TX]</code>
2498 - the rewrite adjacency index
2499 - <code>adj->lookup_next_index</code>
2500 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2501 the packet will be dropped.
2502 - <code>adj->rewrite_header</code>
2503 - Rewrite string length, rewrite string, next_index
2506 - <code>b->current_data, b->current_length</code>
2507 - Updated net of applying the rewrite string
2509 <em>Next Indices:</em>
2510 - <code> adj->rewrite_header.next_index </code>
2514 ip4_rewrite_transit (vlib_main_t * vm,
2515 vlib_node_runtime_t * node,
2516 vlib_frame_t * frame)
2518 return ip4_rewrite_inline (vm, node, frame,
2519 /* rewrite_for_locally_received_packets */ 0, 0);
2522 /** @brief IPv4 local rewrite node.
2523 @node ip4-rewrite-local
2525 This is the IPv4 local rewrite node. Fetch the ip adjacency, check
2526 the outbound interface mtu, apply the adjacency rewrite, and send
2527 pkts to the adjacency rewrite header's rewrite_next_index. Deal
2528 with hemorrhoids of the form "some clown sends an icmp4 w/ src =
2529 dst = interface addr."
2531 @param vm vlib_main_t corresponding to the current thread
2532 @param node vlib_node_runtime_t
2533 @param frame vlib_frame_t whose contents should be dispatched
2535 @par Graph mechanics: buffer metadata, next index usage
2538 - <code>vnet_buffer(b)->ip.adj_index[VLIB_RX]</code>
2539 - the rewrite adjacency index
2540 - <code>adj->lookup_next_index</code>
2541 - Must be IP_LOOKUP_NEXT_REWRITE or IP_LOOKUP_NEXT_ARP, otherwise
2542 the packet will be dropped.
2543 - <code>adj->rewrite_header</code>
2544 - Rewrite string length, rewrite string, next_index
2547 - <code>b->current_data, b->current_length</code>
2548 - Updated net of applying the rewrite string
2550 <em>Next Indices:</em>
2551 - <code> adj->rewrite_header.next_index </code>
2556 ip4_rewrite_local (vlib_main_t * vm,
2557 vlib_node_runtime_t * node,
2558 vlib_frame_t * frame)
2560 return ip4_rewrite_inline (vm, node, frame,
2561 /* rewrite_for_locally_received_packets */ 1, 0);
2565 ip4_midchain (vlib_main_t * vm,
2566 vlib_node_runtime_t * node,
2567 vlib_frame_t * frame)
2569 return ip4_rewrite_inline (vm, node, frame,
2570 /* rewrite_for_locally_received_packets */ 0, 1);
2573 VLIB_REGISTER_NODE (ip4_rewrite_node) = {
2574 .function = ip4_rewrite_transit,
2575 .name = "ip4-rewrite-transit",
2576 .vector_size = sizeof (u32),
2578 .format_trace = format_ip4_rewrite_trace,
2582 [IP4_REWRITE_NEXT_DROP] = "error-drop",
2583 [IP4_REWRITE_NEXT_ARP] = "ip4-arp",
2584 [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error",
2588 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite_transit)
2590 VLIB_REGISTER_NODE (ip4_midchain_node) = {
2591 .function = ip4_midchain,
2592 .name = "ip4-midchain",
2593 .vector_size = sizeof (u32),
2595 .format_trace = format_ip4_forward_next_trace,
2597 .sibling_of = "ip4-rewrite-transit",
2600 VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain)
2602 VLIB_REGISTER_NODE (ip4_rewrite_local_node) = {
2603 .function = ip4_rewrite_local,
2604 .name = "ip4-rewrite-local",
2605 .vector_size = sizeof (u32),
2607 .sibling_of = "ip4-rewrite-transit",
2609 .format_trace = format_ip4_rewrite_trace,
2614 VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_local_node, ip4_rewrite_local)
2616 static clib_error_t *
2617 add_del_interface_table (vlib_main_t * vm,
2618 unformat_input_t * input,
2619 vlib_cli_command_t * cmd)
2621 vnet_main_t * vnm = vnet_get_main();
2622 clib_error_t * error = 0;
2623 u32 sw_if_index, table_id;
2627 if (! unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index))
2629 error = clib_error_return (0, "unknown interface `%U'",
2630 format_unformat_error, input);
2634 if (unformat (input, "%d", &table_id))
2638 error = clib_error_return (0, "expected table id `%U'",
2639 format_unformat_error, input);
2644 ip4_main_t * im = &ip4_main;
2647 fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
2652 // changing an interface's table has consequences for any connecteds
2653 // and adj-fibs already installed.
2655 vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
2656 im->fib_index_by_sw_if_index[sw_if_index] = fib_index;
2664 * Place the indicated interface into the supplied VRF
2667 * @cliexstart{set interface ip table}
2669 * vpp# set interface ip table GigabitEthernet2/0/0 2
2671 * Interface addresses added after setting the interface IP table end up in the indicated VRF table.
2672 * Predictable but potentially counter-intuitive results occur if you provision interface addresses in multiple FIBs.
2673 * Upon RX, packets will be processed in the last IP table ID provisioned.
2674 * It might be marginally useful to evade source RPF drops to put an interface address into multiple FIBs.
2677 VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = {
2678 .path = "set interface ip table",
2679 .function = add_del_interface_table,
2680 .short_help = "Add/delete FIB table id for interface",
2685 ip4_lookup_multicast (vlib_main_t * vm,
2686 vlib_node_runtime_t * node,
2687 vlib_frame_t * frame)
2689 ip4_main_t * im = &ip4_main;
2690 vlib_combined_counter_main_t * cm = &load_balance_main.lbm_to_counters;
2691 u32 n_left_from, n_left_to_next, * from, * to_next;
2692 ip_lookup_next_t next;
2693 u32 cpu_index = os_get_cpu_number();
2695 from = vlib_frame_vector_args (frame);
2696 n_left_from = frame->n_vectors;
2697 next = node->cached_next_index;
2699 while (n_left_from > 0)
2701 vlib_get_next_frame (vm, node, next,
2702 to_next, n_left_to_next);
2704 while (n_left_from >= 4 && n_left_to_next >= 2)
2706 vlib_buffer_t * p0, * p1;
2707 u32 pi0, pi1, lb_index0, lb_index1, wrong_next;
2708 ip_lookup_next_t next0, next1;
2709 ip4_header_t * ip0, * ip1;
2710 u32 fib_index0, fib_index1;
2711 const dpo_id_t *dpo0, *dpo1;
2712 const load_balance_t * lb0, * lb1;
2714 /* Prefetch next iteration. */
2716 vlib_buffer_t * p2, * p3;
2718 p2 = vlib_get_buffer (vm, from[2]);
2719 p3 = vlib_get_buffer (vm, from[3]);
2721 vlib_prefetch_buffer_header (p2, LOAD);
2722 vlib_prefetch_buffer_header (p3, LOAD);
2724 CLIB_PREFETCH (p2->data, sizeof (ip0[0]), LOAD);
2725 CLIB_PREFETCH (p3->data, sizeof (ip0[0]), LOAD);
2728 pi0 = to_next[0] = from[0];
2729 pi1 = to_next[1] = from[1];
2731 p0 = vlib_get_buffer (vm, pi0);
2732 p1 = vlib_get_buffer (vm, pi1);
2734 ip0 = vlib_buffer_get_current (p0);
2735 ip1 = vlib_buffer_get_current (p1);
2737 fib_index0 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2738 fib_index1 = vec_elt (im->fib_index_by_sw_if_index, vnet_buffer (p1)->sw_if_index[VLIB_RX]);
2739 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2740 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2741 fib_index1 = (vnet_buffer(p1)->sw_if_index[VLIB_TX] == (u32)~0) ?
2742 fib_index1 : vnet_buffer(p1)->sw_if_index[VLIB_TX];
2744 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2746 lb_index1 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index1),
2749 lb0 = load_balance_get (lb_index0);
2750 lb1 = load_balance_get (lb_index1);
2752 ASSERT (lb0->lb_n_buckets > 0);
2753 ASSERT (is_pow2 (lb0->lb_n_buckets));
2754 ASSERT (lb1->lb_n_buckets > 0);
2755 ASSERT (is_pow2 (lb1->lb_n_buckets));
2757 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2758 (ip0, lb0->lb_hash_config);
2760 vnet_buffer (p1)->ip.flow_hash = ip4_compute_flow_hash
2761 (ip1, lb1->lb_hash_config);
2763 dpo0 = load_balance_get_bucket_i(lb0,
2764 (vnet_buffer (p0)->ip.flow_hash &
2765 (lb0->lb_n_buckets_minus_1)));
2766 dpo1 = load_balance_get_bucket_i(lb1,
2767 (vnet_buffer (p1)->ip.flow_hash &
2768 (lb0->lb_n_buckets_minus_1)));
2770 next0 = dpo0->dpoi_next_node;
2771 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2772 next1 = dpo1->dpoi_next_node;
2773 vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
2775 if (1) /* $$$$$$ HACK FIXME */
2776 vlib_increment_combined_counter
2777 (cm, cpu_index, lb_index0, 1,
2778 vlib_buffer_length_in_chain (vm, p0));
2779 if (1) /* $$$$$$ HACK FIXME */
2780 vlib_increment_combined_counter
2781 (cm, cpu_index, lb_index1, 1,
2782 vlib_buffer_length_in_chain (vm, p1));
2786 n_left_to_next -= 2;
2789 wrong_next = (next0 != next) + 2*(next1 != next);
2790 if (PREDICT_FALSE (wrong_next != 0))
2798 n_left_to_next += 1;
2799 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2805 n_left_to_next += 1;
2806 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2812 n_left_to_next += 2;
2813 vlib_set_next_frame_buffer (vm, node, next0, pi0);
2814 vlib_set_next_frame_buffer (vm, node, next1, pi1);
2818 vlib_put_next_frame (vm, node, next, n_left_to_next);
2820 vlib_get_next_frame (vm, node, next, to_next, n_left_to_next);
2826 while (n_left_from > 0 && n_left_to_next > 0)
2831 ip_lookup_next_t next0;
2833 const dpo_id_t *dpo0;
2834 const load_balance_t * lb0;
2839 p0 = vlib_get_buffer (vm, pi0);
2841 ip0 = vlib_buffer_get_current (p0);
2843 fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
2844 vnet_buffer (p0)->sw_if_index[VLIB_RX]);
2845 fib_index0 = (vnet_buffer(p0)->sw_if_index[VLIB_TX] == (u32)~0) ?
2846 fib_index0 : vnet_buffer(p0)->sw_if_index[VLIB_TX];
2848 lb_index0 = ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0),
2851 lb0 = load_balance_get (lb_index0);
2853 ASSERT (lb0->lb_n_buckets > 0);
2854 ASSERT (is_pow2 (lb0->lb_n_buckets));
2856 vnet_buffer (p0)->ip.flow_hash = ip4_compute_flow_hash
2857 (ip0, lb0->lb_hash_config);
2859 dpo0 = load_balance_get_bucket_i(lb0,
2860 (vnet_buffer (p0)->ip.flow_hash &
2861 (lb0->lb_n_buckets_minus_1)));
2863 next0 = dpo0->dpoi_next_node;
2864 vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
2866 if (1) /* $$$$$$ HACK FIXME */
2867 vlib_increment_combined_counter
2868 (cm, cpu_index, lb_index0, 1,
2869 vlib_buffer_length_in_chain (vm, p0));
2873 n_left_to_next -= 1;
2876 if (PREDICT_FALSE (next0 != next))
2878 n_left_to_next += 1;
2879 vlib_put_next_frame (vm, node, next, n_left_to_next);
2881 vlib_get_next_frame (vm, node, next,
2882 to_next, n_left_to_next);
2885 n_left_to_next -= 1;
2889 vlib_put_next_frame (vm, node, next, n_left_to_next);
2892 if (node->flags & VLIB_NODE_FLAG_TRACE)
2893 ip4_forward_next_trace(vm, node, frame, VLIB_TX);
2895 return frame->n_vectors;
2898 VLIB_REGISTER_NODE (ip4_lookup_multicast_node,static) = {
2899 .function = ip4_lookup_multicast,
2900 .name = "ip4-lookup-multicast",
2901 .vector_size = sizeof (u32),
2902 .sibling_of = "ip4-lookup",
2903 .format_trace = format_ip4_lookup_trace,
2908 VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_multicast_node, ip4_lookup_multicast)
2910 VLIB_REGISTER_NODE (ip4_multicast_node,static) = {
2911 .function = ip4_drop,
2912 .name = "ip4-multicast",
2913 .vector_size = sizeof (u32),
2915 .format_trace = format_ip4_forward_next_trace,
2923 int ip4_lookup_validate (ip4_address_t *a, u32 fib_index0)
2925 ip4_fib_mtrie_t * mtrie0;
2926 ip4_fib_mtrie_leaf_t leaf0;
2929 mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
2931 leaf0 = IP4_FIB_MTRIE_LEAF_ROOT;
2932 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 0);
2933 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 1);
2934 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
2935 leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
2937 /* Handle default route. */
2938 leaf0 = (leaf0 == IP4_FIB_MTRIE_LEAF_EMPTY ? mtrie0->default_leaf : leaf0);
2940 lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
2942 return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get(fib_index0), a);
2945 static clib_error_t *
2946 test_lookup_command_fn (vlib_main_t * vm,
2947 unformat_input_t * input,
2948 vlib_cli_command_t * cmd)
2954 ip4_address_t ip4_base_address;
2957 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
2958 if (unformat (input, "table %d", &table_id))
2960 else if (unformat (input, "count %f", &count))
2963 else if (unformat (input, "%U",
2964 unformat_ip4_address, &ip4_base_address))
2967 return clib_error_return (0, "unknown input `%U'",
2968 format_unformat_error, input);
2973 for (i = 0; i < n; i++)
2975 if (!ip4_lookup_validate (&ip4_base_address, table_id))
2978 ip4_base_address.as_u32 =
2979 clib_host_to_net_u32 (1 +
2980 clib_net_to_host_u32 (ip4_base_address.as_u32));
2984 vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
2986 vlib_cli_output (vm, "No errors in %d lookups\n", n);
2991 VLIB_CLI_COMMAND (lookup_test_command, static) = {
2992 .path = "test lookup",
2993 .short_help = "test lookup",
2994 .function = test_lookup_command_fn,
2997 int vnet_set_ip4_flow_hash (u32 table_id, u32 flow_hash_config)
2999 ip4_main_t * im4 = &ip4_main;
3001 uword * p = hash_get (im4->fib_index_by_table_id, table_id);
3004 return VNET_API_ERROR_NO_SUCH_FIB;
3006 fib = ip4_fib_get (p[0]);
3008 fib->flow_hash_config = flow_hash_config;
3012 static clib_error_t *
3013 set_ip_flow_hash_command_fn (vlib_main_t * vm,
3014 unformat_input_t * input,
3015 vlib_cli_command_t * cmd)
3019 u32 flow_hash_config = 0;
3022 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3023 if (unformat (input, "table %d", &table_id))
3026 else if (unformat (input, #a)) { flow_hash_config |= v; matched=1;}
3027 foreach_flow_hash_bit
3033 return clib_error_return (0, "unknown input `%U'",
3034 format_unformat_error, input);
3036 rv = vnet_set_ip4_flow_hash (table_id, flow_hash_config);
3042 case VNET_API_ERROR_NO_SUCH_FIB:
3043 return clib_error_return (0, "no such FIB table %d", table_id);
3046 clib_warning ("BUG: illegal flow hash config 0x%x", flow_hash_config);
3053 VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
3054 .path = "set ip flow-hash",
3056 "set ip table flow-hash table <fib-id> src dst sport dport proto reverse",
3057 .function = set_ip_flow_hash_command_fn,
3060 int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
3063 vnet_main_t * vnm = vnet_get_main();
3064 vnet_interface_main_t * im = &vnm->interface_main;
3065 ip4_main_t * ipm = &ip4_main;
3066 ip_lookup_main_t * lm = &ipm->lookup_main;
3067 vnet_classify_main_t * cm = &vnet_classify_main;
3068 ip4_address_t *if_addr;
3070 if (pool_is_free_index (im->sw_interfaces, sw_if_index))
3071 return VNET_API_ERROR_NO_MATCHING_INTERFACE;
3073 if (table_index != ~0 && pool_is_free_index (cm->tables, table_index))
3074 return VNET_API_ERROR_NO_SUCH_ENTRY;
3076 vec_validate (lm->classify_table_index_by_sw_if_index, sw_if_index);
3077 lm->classify_table_index_by_sw_if_index [sw_if_index] = table_index;
3079 if_addr = ip4_interface_first_address (ipm, sw_if_index, NULL);
3081 if (NULL != if_addr)
3083 fib_prefix_t pfx = {
3085 .fp_proto = FIB_PROTOCOL_IP4,
3086 .fp_addr.ip4 = *if_addr,
3090 fib_index = fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4,
3094 if (table_index != (u32) ~0)
3096 dpo_id_t dpo = DPO_NULL;
3101 classify_dpo_create(FIB_PROTOCOL_IP4,
3104 fib_table_entry_special_dpo_add(fib_index,
3106 FIB_SOURCE_CLASSIFY,
3107 FIB_ENTRY_FLAG_NONE,
3113 fib_table_entry_special_remove(fib_index,
3115 FIB_SOURCE_CLASSIFY);
3122 static clib_error_t *
3123 set_ip_classify_command_fn (vlib_main_t * vm,
3124 unformat_input_t * input,
3125 vlib_cli_command_t * cmd)
3127 u32 table_index = ~0;
3128 int table_index_set = 0;
3129 u32 sw_if_index = ~0;
3132 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
3133 if (unformat (input, "table-index %d", &table_index))
3134 table_index_set = 1;
3135 else if (unformat (input, "intfc %U", unformat_vnet_sw_interface,
3136 vnet_get_main(), &sw_if_index))
3142 if (table_index_set == 0)
3143 return clib_error_return (0, "classify table-index must be specified");
3145 if (sw_if_index == ~0)
3146 return clib_error_return (0, "interface / subif must be specified");
3148 rv = vnet_set_ip4_classify_intfc (vm, sw_if_index, table_index);
3155 case VNET_API_ERROR_NO_MATCHING_INTERFACE:
3156 return clib_error_return (0, "No such interface");
3158 case VNET_API_ERROR_NO_SUCH_ENTRY:
3159 return clib_error_return (0, "No such classifier table");
3164 VLIB_CLI_COMMAND (set_ip_classify_command, static) = {
3165 .path = "set ip classify",
3167 "set ip classify intfc <int> table-index <index>",
3168 .function = set_ip_classify_command_fn,