X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fip%2Fip4_forward.c;h=000710ed817ead3c5b8fc56a2ce215e49c02f706;hb=1855b8e4;hp=60a7fab3eb483528e75f209d6642fb5b8f10fe93;hpb=2eeeb4b1706150f69402dd8afc43856f801cd89a;p=vpp.git diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c old mode 100755 new mode 100644 index 60a7fab3eb4..000710ed817 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -39,6 +39,7 @@ #include #include +#include #include /* for ethernet_header_t */ #include /* for ethernet_arp_header_t */ #include @@ -53,378 +54,7 @@ #include #include /* for mFIB table and entry creation */ -/** - * @file - * @brief IPv4 Forwarding. - * - * This file contains the source code for IPv4 forwarding. - */ - -always_inline uword -ip4_lookup_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, - int lookup_for_responses_to_locally_received_packets) -{ - ip4_main_t *im = &ip4_main; - vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters; - u32 n_left_from, n_left_to_next, *from, *to_next; - ip_lookup_next_t next; - u32 thread_index = vlib_get_thread_index (); - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next = node->cached_next_index; - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); - - while (n_left_from >= 8 && n_left_to_next >= 4) - { - vlib_buffer_t *p0, *p1, *p2, *p3; - ip4_header_t *ip0, *ip1, *ip2, *ip3; - ip_lookup_next_t next0, next1, next2, next3; - const load_balance_t *lb0, *lb1, *lb2, *lb3; - ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3; - ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3; - ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3; - u32 pi0, fib_index0, lb_index0; - u32 pi1, fib_index1, lb_index1; - u32 pi2, fib_index2, lb_index2; - u32 pi3, fib_index3, lb_index3; - flow_hash_config_t flow_hash_config0, flow_hash_config1; - flow_hash_config_t flow_hash_config2, flow_hash_config3; - u32 hash_c0, hash_c1, hash_c2, hash_c3; - const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p4, *p5, *p6, *p7; - - p4 = vlib_get_buffer (vm, from[4]); - p5 = vlib_get_buffer (vm, from[5]); - p6 = vlib_get_buffer (vm, from[6]); - p7 = vlib_get_buffer (vm, from[7]); - - vlib_prefetch_buffer_header (p4, LOAD); - vlib_prefetch_buffer_header (p5, LOAD); - vlib_prefetch_buffer_header (p6, LOAD); - vlib_prefetch_buffer_header (p7, LOAD); - - CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD); - } - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - pi2 = to_next[2] = from[2]; - pi3 = to_next[3] = from[3]; - - from += 4; - to_next += 4; - n_left_to_next -= 4; - n_left_from -= 4; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - p2 = vlib_get_buffer (vm, pi2); - p3 = vlib_get_buffer (vm, pi3); - - ip0 = vlib_buffer_get_current (p0); - ip1 = vlib_buffer_get_current (p1); - ip2 = vlib_buffer_get_current (p2); - ip3 = vlib_buffer_get_current (p3); - - dst_addr0 = &ip0->dst_address; - dst_addr1 = &ip1->dst_address; - dst_addr2 = &ip2->dst_address; - dst_addr3 = &ip3->dst_address; - - fib_index0 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); - fib_index1 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p1)->sw_if_index[VLIB_RX]); - fib_index2 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p2)->sw_if_index[VLIB_RX]); - fib_index3 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p3)->sw_if_index[VLIB_RX]); - fib_index0 = - (vnet_buffer (p0)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - fib_index1 = - (vnet_buffer (p1)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX]; - fib_index2 = - (vnet_buffer (p2)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX]; - fib_index3 = - (vnet_buffer (p3)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX]; - - - if (!lookup_for_responses_to_locally_received_packets) - { - mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - mtrie1 = &ip4_fib_get (fib_index1)->mtrie; - mtrie2 = &ip4_fib_get (fib_index2)->mtrie; - mtrie3 = &ip4_fib_get (fib_index3)->mtrie; - - leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); - leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1); - leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2); - leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3); - } - - if (!lookup_for_responses_to_locally_received_packets) - { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2); - leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2); - leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2); - } - - if (!lookup_for_responses_to_locally_received_packets) - { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3); - leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3); - leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3); - } - - if (lookup_for_responses_to_locally_received_packets) - { - lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; - lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX]; - lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX]; - lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX]; - } - else - { - lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); - lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2); - lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3); - } - - ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3); - lb0 = load_balance_get (lb_index0); - lb1 = load_balance_get (lb_index1); - lb2 = load_balance_get (lb_index2); - lb3 = load_balance_get (lb_index3); - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - ASSERT (lb1->lb_n_buckets > 0); - ASSERT (is_pow2 (lb1->lb_n_buckets)); - ASSERT (lb2->lb_n_buckets > 0); - ASSERT (is_pow2 (lb2->lb_n_buckets)); - ASSERT (lb3->lb_n_buckets > 0); - ASSERT (is_pow2 (lb3->lb_n_buckets)); - - /* Use flow hash to compute multipath adjacency. */ - hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; - hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0; - hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0; - hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0; - if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) - { - flow_hash_config0 = lb0->lb_hash_config; - hash_c0 = vnet_buffer (p0)->ip.flow_hash = - ip4_compute_flow_hash (ip0, flow_hash_config0); - dpo0 = - load_balance_get_fwd_bucket (lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); - } - else - { - dpo0 = load_balance_get_bucket_i (lb0, 0); - } - if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) - { - flow_hash_config1 = lb1->lb_hash_config; - hash_c1 = vnet_buffer (p1)->ip.flow_hash = - ip4_compute_flow_hash (ip1, flow_hash_config1); - dpo1 = - load_balance_get_fwd_bucket (lb1, - (hash_c1 & - (lb1->lb_n_buckets_minus_1))); - } - else - { - dpo1 = load_balance_get_bucket_i (lb1, 0); - } - if (PREDICT_FALSE (lb2->lb_n_buckets > 1)) - { - flow_hash_config2 = lb2->lb_hash_config; - hash_c2 = vnet_buffer (p2)->ip.flow_hash = - ip4_compute_flow_hash (ip2, flow_hash_config2); - dpo2 = - load_balance_get_fwd_bucket (lb2, - (hash_c2 & - (lb2->lb_n_buckets_minus_1))); - } - else - { - dpo2 = load_balance_get_bucket_i (lb2, 0); - } - if (PREDICT_FALSE (lb3->lb_n_buckets > 1)) - { - flow_hash_config3 = lb3->lb_hash_config; - hash_c3 = vnet_buffer (p3)->ip.flow_hash = - ip4_compute_flow_hash (ip3, flow_hash_config3); - dpo3 = - load_balance_get_fwd_bucket (lb3, - (hash_c3 & - (lb3->lb_n_buckets_minus_1))); - } - else - { - dpo3 = load_balance_get_bucket_i (lb3, 0); - } - - next0 = dpo0->dpoi_next_node; - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - next1 = dpo1->dpoi_next_node; - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; - next2 = dpo2->dpoi_next_node; - vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; - next3 = dpo3->dpoi_next_node; - vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; - - vlib_increment_combined_counter - (cm, thread_index, lb_index0, 1, - vlib_buffer_length_in_chain (vm, p0)); - vlib_increment_combined_counter - (cm, thread_index, lb_index1, 1, - vlib_buffer_length_in_chain (vm, p1)); - vlib_increment_combined_counter - (cm, thread_index, lb_index2, 1, - vlib_buffer_length_in_chain (vm, p2)); - vlib_increment_combined_counter - (cm, thread_index, lb_index3, 1, - vlib_buffer_length_in_chain (vm, p3)); - - vlib_validate_buffer_enqueue_x4 (vm, node, next, - to_next, n_left_to_next, - pi0, pi1, pi2, pi3, - next0, next1, next2, next3); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - vlib_buffer_t *p0; - ip4_header_t *ip0; - ip_lookup_next_t next0; - const load_balance_t *lb0; - ip4_fib_mtrie_t *mtrie0; - ip4_fib_mtrie_leaf_t leaf0; - ip4_address_t *dst_addr0; - u32 pi0, fib_index0, lbi0; - flow_hash_config_t flow_hash_config0; - const dpo_id_t *dpo0; - u32 hash_c0; - - pi0 = from[0]; - to_next[0] = pi0; - - p0 = vlib_get_buffer (vm, pi0); - - ip0 = vlib_buffer_get_current (p0); - - dst_addr0 = &ip0->dst_address; - - fib_index0 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); - fib_index0 = - (vnet_buffer (p0)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - - if (!lookup_for_responses_to_locally_received_packets) - { - mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - - leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); - } - - if (!lookup_for_responses_to_locally_received_packets) - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); - - if (!lookup_for_responses_to_locally_received_packets) - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); - - if (lookup_for_responses_to_locally_received_packets) - lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; - else - { - /* Handle default route. */ - lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - } - - ASSERT (lbi0); - lb0 = load_balance_get (lbi0); - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - - /* Use flow hash to compute multipath adjacency. */ - hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; - if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) - { - flow_hash_config0 = lb0->lb_hash_config; - - hash_c0 = vnet_buffer (p0)->ip.flow_hash = - ip4_compute_flow_hash (ip0, flow_hash_config0); - dpo0 = - load_balance_get_fwd_bucket (lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); - } - else - { - dpo0 = load_balance_get_bucket_i (lb0, 0); - } - - next0 = dpo0->dpoi_next_node; - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - - vlib_increment_combined_counter (cm, thread_index, lbi0, 1, - vlib_buffer_length_in_chain (vm, - p0)); - - from += 1; - to_next += 1; - n_left_to_next -= 1; - n_left_from -= 1; - - if (PREDICT_FALSE (next0 != next)) - { - n_left_to_next += 1; - vlib_put_next_frame (vm, node, next, n_left_to_next); - next = next0; - vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); - to_next[0] = pi0; - to_next += 1; - n_left_to_next -= 1; - } - } - - vlib_put_next_frame (vm, node, next, n_left_to_next); - } - - if (node->flags & VLIB_NODE_FLAG_TRACE) - ip4_forward_next_trace (vm, node, frame, VLIB_TX); - - return frame->n_vectors; -} +#include /** @brief IPv4 lookup node. @node ip4-lookup @@ -470,22 +100,28 @@ ip4_lookup (vlib_main_t * vm, static u8 *format_ip4_lookup_trace (u8 * s, va_list * args); +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_lookup_node) = { -.function = ip4_lookup,.name = "ip4-lookup",.vector_size = - sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes = - IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,}; + .function = ip4_lookup, + .name = "ip4-lookup", + .vector_size = sizeof (u32), + .format_trace = format_ip4_lookup_trace, + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = IP4_LOOKUP_NEXT_NODES, +}; +/* *INDENT-ON* */ VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup); -always_inline uword +static uword ip4_load_balance (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters; u32 n_left_from, n_left_to_next, *from, *to_next; ip_lookup_next_t next; - u32 thread_index = vlib_get_thread_index (); + u32 thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -665,11 +301,17 @@ ip4_load_balance (vlib_main_t * vm, return frame->n_vectors; } +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_load_balance_node) = { -.function = ip4_load_balance,.name = "ip4-load-balance",.vector_size = - sizeof (u32),.sibling_of = "ip4-lookup",.format_trace = - format_ip4_lookup_trace,}; + .function = ip4_load_balance, + .name = "ip4-load-balance", + .vector_size = sizeof (u32), + .sibling_of = "ip4-lookup", + .format_trace = + format_ip4_lookup_trace, +}; +/* *INDENT-ON* */ VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance); @@ -698,6 +340,45 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, return result; } +static void +ip4_add_subnet_bcast_route (u32 fib_index, + fib_prefix_t *pfx, + u32 sw_if_index) +{ + vnet_sw_interface_flags_t iflags; + + iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index); + + fib_table_entry_special_remove(fib_index, + pfx, + FIB_SOURCE_INTERFACE); + + if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST) + { + fib_table_entry_update_one_path (fib_index, pfx, + FIB_SOURCE_INTERFACE, + FIB_ENTRY_FLAG_NONE, + DPO_PROTO_IP4, + /* No next-hop address */ + &ADJ_BCAST_ADDR, + sw_if_index, + // invalid FIB index + ~0, + 1, + // no out-label stack + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + } + else + { + fib_table_entry_special_add(fib_index, + pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_DROP | + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT)); + } +} + static void ip4_add_interface_routes (u32 sw_if_index, ip4_main_t * im, u32 fib_index, @@ -743,11 +424,7 @@ ip4_add_interface_routes (u32 sw_if_index, FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT)); net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len]; if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32) - fib_table_entry_special_add(fib_index, - &net_pfx, - FIB_SOURCE_INTERFACE, - (FIB_ENTRY_FLAG_DROP | - FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT)); + ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index); } else if (pfx.fp_len == 31) { @@ -871,11 +548,11 @@ ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index]) return; } - vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index, + vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index, !is_enable, 0, 0); - vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", + vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled", sw_if_index, !is_enable, 0, 0); } @@ -904,7 +581,7 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); vec_add1 (addr_fib, ip4_af); - /* FIXME-LATER + /* * there is no support for adj-fib handling in the presence of overlapping * subnets on interfaces. Easy fix - disallow overlapping subnets, like * most routers do. @@ -913,31 +590,44 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, if (!is_del) { /* When adding an address check that it does not conflict - with an existing address. */ + with an existing address on any interface in this table. */ ip_interface_address_t *ia; - foreach_ip_interface_address - (&im->lookup_main, ia, sw_if_index, - 0 /* honor unnumbered */ , - ({ - ip4_address_t * x = - ip_interface_address_get_address - (&im->lookup_main, ia); - if (ip4_destination_matches_route - (im, address, x, ia->address_length) || - ip4_destination_matches_route (im, - x, - address, - address_length)) - return - clib_error_create - ("failed to add %U which conflicts with %U for interface %U", - format_ip4_address_and_length, address, - address_length, - format_ip4_address_and_length, x, - ia->address_length, - format_vnet_sw_if_index_name, vnm, - sw_if_index); - })); + vnet_sw_interface_t *sif; + + pool_foreach(sif, vnm->interface_main.sw_interfaces, + ({ + if (im->fib_index_by_sw_if_index[sw_if_index] == + im->fib_index_by_sw_if_index[sif->sw_if_index]) + { + foreach_ip_interface_address + (&im->lookup_main, ia, sif->sw_if_index, + 0 /* honor unnumbered */ , + ({ + ip4_address_t * x = + ip_interface_address_get_address + (&im->lookup_main, ia); + if (ip4_destination_matches_route + (im, address, x, ia->address_length) || + ip4_destination_matches_route (im, + x, + address, + address_length)) + { + vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS; + + return + clib_error_create + ("failed to add %U which conflicts with %U for interface %U", + format_ip4_address_and_length, address, + address_length, + format_ip4_address_and_length, x, + ia->address_length, + format_vnet_sw_if_index_name, vnm, + sif->sw_if_index); + } + })); + } + })); } /* *INDENT-ON* */ @@ -982,6 +672,45 @@ ip4_add_del_interface_address (vlib_main_t * vm, (vm, sw_if_index, address, address_length, is_del); } +void +ip4_directed_broadcast (u32 sw_if_index, u8 enable) +{ + ip_interface_address_t *ia; + ip4_main_t *im; + + im = &ip4_main; + + /* + * when directed broadcast is enabled, the subnet braodcast route will forward + * packets using an adjacency with a broadcast MAC. otherwise it drops + */ + /* *INDENT-OFF* */ + foreach_ip_interface_address(&im->lookup_main, ia, + sw_if_index, 0, + ({ + if (ia->address_length <= 30) + { + ip4_address_t *ipa; + + ipa = ip_interface_address_get_address (&im->lookup_main, ia); + + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]), + }, + }; + + ip4_add_subnet_bcast_route + (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4, + sw_if_index), + &pfx, sw_if_index); + } + })); + /* *INDENT-ON* */ +} + /* Built-in ip4 unicast rx feature path definition */ /* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ip4_unicast, static) = @@ -1054,10 +783,10 @@ VNET_FEATURE_INIT (ip4_vxlan_bypass, static) = .runs_before = VNET_FEATURES ("ip4-lookup"), }; -VNET_FEATURE_INIT (ip4_drop, static) = +VNET_FEATURE_INIT (ip4_not_enabled, static) = { .arc_name = "ip4-unicast", - .node_name = "ip4-drop", + .node_name = "ip4-not-enabled", .runs_before = VNET_FEATURES ("ip4-lookup"), }; @@ -1083,10 +812,10 @@ VNET_FEATURE_INIT (ip4_vpath_mc, static) = .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"), }; -VNET_FEATURE_INIT (ip4_mc_drop, static) = +VNET_FEATURE_INIT (ip4_mc_not_enabled, static) = { .arc_name = "ip4-multicast", - .node_name = "ip4-drop", + .node_name = "ip4-not-enabled", .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"), }; @@ -1101,7 +830,7 @@ VNET_FEATURE_INIT (ip4_lookup_mc, static) = VNET_FEATURE_ARC_INIT (ip4_output, static) = { .arc_name = "ip4-output", - .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"), + .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"), .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index, }; @@ -1109,6 +838,13 @@ VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = { .arc_name = "ip4-output", .node_name = "ip4-source-and-port-range-check-tx", + .runs_before = VNET_FEATURES ("ip4-outacl"), +}; + +VNET_FEATURE_INIT (ip4_outacl, static) = +{ + .arc_name = "ip4-output", + .node_name = "ip4-outacl", .runs_before = VNET_FEATURES ("ipsec-output-ip4"), }; @@ -1145,8 +881,9 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) ip4_address_t *address; vlib_main_t *vm = vlib_get_main (); + vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0); /* *INDENT-OFF* */ - foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */, + foreach_ip_interface_address (lm4, ia, sw_if_index, 0, ({ address = ip_interface_address_get_address (lm4, ia); ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1); @@ -1154,11 +891,11 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) /* *INDENT-ON* */ } - vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index, + vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index, is_add, 0, 0); - vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index, - is_add, 0, 0); + vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled", + sw_if_index, is_add, 0, 0); return /* no error */ 0; } @@ -1177,6 +914,12 @@ ip4_lookup_init (vlib_main_t * vm) if ((error = vlib_call_init_function (vm, vnet_feature_init))) return error; + if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init))) + return (error); + if ((error = vlib_call_init_function (vm, fib_module_init))) + return error; + if ((error = vlib_call_init_function (vm, mfib_module_init))) + return error; for (i = 0; i < ARRAY_LEN (im->fib_masks); i++) { @@ -1475,8 +1218,9 @@ VNET_FEATURE_ARC_INIT (ip4_local) = /* *INDENT-ON* */ static inline void -ip4_local_validate_l4 (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip, - u8 is_udp, u8 * error, u8 * good_tcp_udp) +ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p, + ip4_header_t * ip, u8 is_udp, u8 * error, + u8 * good_tcp_udp) { u32 flags0; flags0 = ip4_tcp_udp_validate_checksum (vm, p); @@ -1497,10 +1241,247 @@ ip4_local_validate_l4 (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip, } } -#define ip4_local_do_l4_check(is_tcp_udp, flags) \ - (is_tcp_udp && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \ - || flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \ - || flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) +#define ip4_local_csum_is_offloaded(_b) \ + _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \ + || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM + +#define ip4_local_need_csum_check(is_tcp_udp, _b) \ + (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \ + || ip4_local_csum_is_offloaded (_b))) + +#define ip4_local_csum_is_valid(_b) \ + (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT \ + || (ip4_local_csum_is_offloaded (_b))) != 0 + +static inline void +ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b, + ip4_header_t * ih, u8 * error) +{ + u8 is_udp, is_tcp_udp, good_tcp_udp; + + is_udp = ih->protocol == IP_PROTOCOL_UDP; + is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP; + + if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b))) + ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp); + else + good_tcp_udp = ip4_local_csum_is_valid (b); + + ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM); + *error = (is_tcp_udp && !good_tcp_udp + ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error); +} + +static inline void +ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b, + ip4_header_t ** ih, u8 * error) +{ + u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2]; + + is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP; + is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP; + + is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP; + is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP; + + good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]); + good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]); + + if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0]) + || ip4_local_need_csum_check (is_tcp_udp[1], b[1]))) + { + if (is_tcp_udp[0]) + ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0], + &good_tcp_udp[0]); + if (is_tcp_udp[1]) + ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1], + &good_tcp_udp[1]); + } + + error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ? + IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]); + error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ? + IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]); +} + +static inline void +ip4_local_set_next_and_error (vlib_node_runtime_t * error_node, + vlib_buffer_t * b, u16 * next, u8 error, + u8 head_of_feature_arc) +{ + u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index; + u32 next_index; + + *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next; + b->error = error ? error_node->errors[error] : 0; + if (head_of_feature_arc) + { + next_index = *next; + if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) + { + vnet_feature_arc_start (arc_index, + vnet_buffer (b)->sw_if_index[VLIB_RX], + &next_index, b); + *next = next_index; + } + } +} + +typedef struct +{ + ip4_address_t src; + u32 lbi; + u8 error; +} ip4_local_last_check_t; + +static inline void +ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0, + ip4_local_last_check_t * last_check, u8 * error0) +{ + ip4_fib_mtrie_leaf_t leaf0; + ip4_fib_mtrie_t *mtrie0; + const dpo_id_t *dpo0; + load_balance_t *lb0; + u32 lbi0; + + vnet_buffer (b)->ip.fib_index = + vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ? + vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index; + + if (PREDICT_FALSE (last_check->src.as_u32 != ip0->src_address.as_u32)) + { + mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie; + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); + lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + + vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0; + vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0; + + lb0 = load_balance_get (lbi0); + dpo0 = load_balance_get_bucket_i (lb0, 0); + + /* + * Must have a route to source otherwise we drop the packet. + * ip4 broadcasts are accepted, e.g. to make dhcp client work + * + * The checks are: + * - the source is a recieve => it's from us => bogus, do this + * first since it sets a different error code. + * - uRPF check for any route to source - accept if passes. + * - allow packets destined to the broadcast address from unknown sources + */ + + *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL + && dpo0->dpoi_type == DPO_RECEIVE) ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0); + *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL + && !fib_urpf_check_size (lb0->lb_urpf) + && ip0->dst_address.as_u32 != 0xFFFFFFFF) ? + IP4_ERROR_SRC_LOOKUP_MISS : *error0); + + last_check->src.as_u32 = ip0->src_address.as_u32; + last_check->lbi = lbi0; + last_check->error = *error0; + } + else + { + vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi; + vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi; + *error0 = last_check->error; + } +} + +static inline void +ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip, + ip4_local_last_check_t * last_check, u8 * error) +{ + ip4_fib_mtrie_leaf_t leaf[2]; + ip4_fib_mtrie_t *mtrie[2]; + const dpo_id_t *dpo[2]; + load_balance_t *lb[2]; + u32 not_last_hit = 0; + u32 lbi[2]; + + not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32; + not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32; + + vnet_buffer (b[0])->ip.fib_index = + vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ? + vnet_buffer (b[0])->sw_if_index[VLIB_TX] : + vnet_buffer (b[0])->ip.fib_index; + + vnet_buffer (b[1])->ip.fib_index = + vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ? + vnet_buffer (b[1])->sw_if_index[VLIB_TX] : + vnet_buffer (b[1])->ip.fib_index; + + if (PREDICT_FALSE (not_last_hit)) + { + mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie; + mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie; + + leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address); + leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address); + + leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0], + &ip[0]->src_address, 2); + leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1], + &ip[1]->src_address, 2); + + leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0], + &ip[0]->src_address, 3); + leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1], + &ip[1]->src_address, 3); + + lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]); + lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]); + + vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0]; + vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0]; + + vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1]; + vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1]; + + lb[0] = load_balance_get (lbi[0]); + lb[1] = load_balance_get (lbi[1]); + + dpo[0] = load_balance_get_bucket_i (lb[0], 0); + dpo[1] = load_balance_get_bucket_i (lb[1], 0); + + error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL && + dpo[0]->dpoi_type == DPO_RECEIVE) ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]); + error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL && + !fib_urpf_check_size (lb[0]->lb_urpf) && + ip[0]->dst_address.as_u32 != 0xFFFFFFFF) + ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]); + + error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL && + dpo[1]->dpoi_type == DPO_RECEIVE) ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]); + error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL && + !fib_urpf_check_size (lb[1]->lb_urpf) && + ip[1]->dst_address.as_u32 != 0xFFFFFFFF) + ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]); + + last_check->src.as_u32 = ip[1]->src_address.as_u32; + last_check->lbi = lbi[1]; + last_check->error = error[1]; + } + else + { + vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi; + vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi; + + vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi; + vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi; + + error[0] = last_check->error; + error[1] = last_check->error; + } +} static inline uword ip4_local_inline (vlib_main_t * vm, @@ -1509,295 +1490,115 @@ ip4_local_inline (vlib_main_t * vm, { ip4_main_t *im = &ip4_main; ip_lookup_main_t *lm = &im->lookup_main; - ip_local_next_t next_index; - u32 *from, *to_next, n_left_from, n_left_to_next; + u32 *from, n_left_from; vlib_node_runtime_t *error_node = vlib_node_get_runtime (vm, ip4_input_node.index); - u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index; + u16 nexts[VLIB_FRAME_SIZE], *next; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + ip4_header_t *ip[2]; + u8 error[2]; + + ip4_local_last_check_t last_check = { + .src = {.as_u32 = 0}, + .lbi = ~0, + .error = IP4_ERROR_UNKNOWN_PROTOCOL + }; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; - next_index = node->cached_next_index; if (node->flags & VLIB_NODE_FLAG_TRACE) ip4_forward_next_trace (vm, node, frame, VLIB_TX); - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + vlib_get_buffers (vm, from, bufs, n_left_from); + b = bufs; + next = nexts; - while (n_left_from >= 4 && n_left_to_next >= 2) - { - vlib_buffer_t *p0, *p1; - ip4_header_t *ip0, *ip1; - ip4_fib_mtrie_t *mtrie0, *mtrie1; - ip4_fib_mtrie_leaf_t leaf0, leaf1; - const dpo_id_t *dpo0, *dpo1; - const load_balance_t *lb0, *lb1; - u32 pi0, next0, fib_index0, lbi0; - u32 pi1, next1, fib_index1, lbi1; - u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; - u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1; - u32 sw_if_index0, sw_if_index1; + while (n_left_from >= 6) + { + u32 is_nat, not_batch = 0; - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; + /* Prefetch next iteration. */ + { + vlib_prefetch_buffer_header (b[4], LOAD); + vlib_prefetch_buffer_header (b[5], LOAD); - next0 = next1 = IP_LOCAL_NEXT_DROP; - error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL; + CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD); + } - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); + error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL; - ip0 = vlib_buffer_get_current (p0); - ip1 = vlib_buffer_get_current (p1); + ip[0] = vlib_buffer_get_current (b[0]); + ip[1] = vlib_buffer_get_current (b[1]); - vnet_buffer (p0)->l3_hdr_offset = p0->current_data; - vnet_buffer (p1)->l3_hdr_offset = p1->current_data; - - sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; - sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; - - /* Treat IP frag packets as "experimental" protocol for now - until support of IP frag reassembly is implemented */ - proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol; - proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol; - - if (head_of_feature_arc == 0) - goto skip_checks; - - is_udp0 = proto0 == IP_PROTOCOL_UDP; - is_udp1 = proto1 == IP_PROTOCOL_UDP; - is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; - is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP; - - good_tcp_udp0 = - (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT - || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM - || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0; - good_tcp_udp1 = (p1->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT - || (p1->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM - || p1->flags & - VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0; - - if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags) - || ip4_local_do_l4_check (is_tcp_udp1, - p1->flags))) - { - if (is_tcp_udp0) - ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0, - &good_tcp_udp0); - if (is_tcp_udp1) - ip4_local_validate_l4 (vm, p1, ip1, is_udp1, &error1, - &good_tcp_udp1); - } + vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data; + vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data; - ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM); - error0 = (is_tcp_udp0 && !good_tcp_udp0 - ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0); - error1 = (is_tcp_udp1 && !good_tcp_udp1 - ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1); - - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); - fib_index0 = - (vnet_buffer (p0)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - - fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1); - fib_index1 = - (vnet_buffer (p1)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX]; - - /* TODO maybe move to lookup? */ - vnet_buffer (p0)->ip.fib_index = fib_index0; - vnet_buffer (p1)->ip.fib_index = fib_index1; - - mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - mtrie1 = &ip4_fib_get (fib_index1)->mtrie; - - leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); - leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, - 2); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, - 2); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, - 3); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, - 3); - - vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = - ip4_fib_mtrie_leaf_get_adj_index (leaf0); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0; - - vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = - ip4_fib_mtrie_leaf_get_adj_index (leaf1); - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1; + is_nat = b[0]->flags & VNET_BUFFER_F_IS_NATED; + not_batch |= is_nat ^ (b[1]->flags & VNET_BUFFER_F_IS_NATED); - lb0 = load_balance_get (lbi0); - lb1 = load_balance_get (lbi1); - dpo0 = load_balance_get_bucket_i (lb0, 0); - dpo1 = load_balance_get_bucket_i (lb1, 0); + if (head_of_feature_arc == 0 || (is_nat && not_batch == 0)) + goto skip_checks; - /* - * Must have a route to source otherwise we drop the packet. - * ip4 broadcasts are accepted, e.g. to make dhcp client work - * - * The checks are: - * - the source is a recieve => it's from us => bogus, do this - * first since it sets a different error code. - * - uRPF check for any route to source - accept if passes. - * - allow packets destined to the broadcast address from unknown sources - */ - if (p0->flags & VNET_BUFFER_F_IS_NATED) - goto skip_check0; - - error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL && - dpo0->dpoi_type == DPO_RECEIVE) ? - IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0); - error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL && - !fib_urpf_check_size (lb0->lb_urpf) && - ip0->dst_address.as_u32 != 0xFFFFFFFF) - ? IP4_ERROR_SRC_LOOKUP_MISS : error0); - - skip_check0: - if (p1->flags & VNET_BUFFER_F_IS_NATED) - goto skip_checks; - - error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL && - dpo1->dpoi_type == DPO_RECEIVE) ? - IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1); - error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL && - !fib_urpf_check_size (lb1->lb_urpf) && - ip1->dst_address.as_u32 != 0xFFFFFFFF) - ? IP4_ERROR_SRC_LOOKUP_MISS : error1); - - skip_checks: - - next0 = lm->local_next_by_ip_protocol[proto0]; - next1 = lm->local_next_by_ip_protocol[proto1]; - - next0 = - error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; - next1 = - error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1; - - p0->error = error0 ? error_node->errors[error0] : 0; - p1->error = error1 ? error_node->errors[error1] : 0; - - if (head_of_feature_arc) + if (PREDICT_TRUE (not_batch == 0)) + { + ip4_local_check_l4_csum_x2 (vm, b, ip, error); + ip4_local_check_src_x2 (b, ip, &last_check, error); + } + else + { + if (!(b[0]->flags & VNET_BUFFER_F_IS_NATED)) { - if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) - vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0); - if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) - vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1); + ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]); + ip4_local_check_src (b[0], ip[0], &last_check, &error[0]); + } + if (!(b[1]->flags & VNET_BUFFER_F_IS_NATED)) + { + ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]); + ip4_local_check_src (b[1], ip[1], &last_check, &error[1]); } - - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, pi0, pi1, - next0, next1); } - while (n_left_from > 0 && n_left_to_next > 0) - { - vlib_buffer_t *p0; - ip4_header_t *ip0; - ip4_fib_mtrie_t *mtrie0; - ip4_fib_mtrie_leaf_t leaf0; - u32 pi0, next0, fib_index0, lbi0; - u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; - load_balance_t *lb0; - const dpo_id_t *dpo0; - u32 sw_if_index0; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - next0 = IP_LOCAL_NEXT_DROP; - error0 = IP4_ERROR_UNKNOWN_PROTOCOL; - - p0 = vlib_get_buffer (vm, pi0); - ip0 = vlib_buffer_get_current (p0); - vnet_buffer (p0)->l3_hdr_offset = p0->current_data; - sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; - - /* Treat IP frag packets as "experimental" protocol for now - until support of IP frag reassembly is implemented */ - proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol; - - if (head_of_feature_arc == 0 || p0->flags & VNET_BUFFER_F_IS_NATED) - goto skip_check; + skip_checks: - is_udp0 = proto0 == IP_PROTOCOL_UDP; - is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; + next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol]; + next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol]; + ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0], + head_of_feature_arc); + ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1], + head_of_feature_arc); - good_tcp_udp0 = - (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT - || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM - || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0; - - if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags))) - { - ip4_local_validate_l4 (vm, p0, ip0, is_udp0, &error0, - &good_tcp_udp0); - } + b += 2; + next += 2; + n_left_from -= 2; + } - ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM); - error0 = (is_tcp_udp0 && !good_tcp_udp0 - ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0); - - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); - fib_index0 = - (vnet_buffer (p0)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - vnet_buffer (p0)->ip.fib_index = fib_index0; - mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, - 2); - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, - 3); - lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0; - vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0; + while (n_left_from > 0) + { + error[0] = IP4_ERROR_UNKNOWN_PROTOCOL; - lb0 = load_balance_get (lbi0); - dpo0 = load_balance_get_bucket_i (lb0, 0); + ip[0] = vlib_buffer_get_current (b[0]); + vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data; - error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL && - dpo0->dpoi_type == DPO_RECEIVE) ? - IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0); - error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL && - !fib_urpf_check_size (lb0->lb_urpf) && - ip0->dst_address.as_u32 != 0xFFFFFFFF) - ? IP4_ERROR_SRC_LOOKUP_MISS : error0); + if (head_of_feature_arc == 0 || (b[0]->flags & VNET_BUFFER_F_IS_NATED)) + goto skip_check; - skip_check: - next0 = lm->local_next_by_ip_protocol[proto0]; - next0 = - error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; + ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]); + ip4_local_check_src (b[0], ip[0], &last_check, &error[0]); - p0->error = error0 ? error_node->errors[error0] : 0; + skip_check: - if (head_of_feature_arc) - { - if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) - vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0); - } + next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol]; + ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0], + head_of_feature_arc); - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, next0); - } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); + b += 1; + next += 1; + n_left_from -= 1; } + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); return frame->n_vectors; } @@ -2134,25 +1935,36 @@ static char *ip4_arp_error_strings[] = { [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request", }; +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_arp_node) = { - .function = ip4_arp,.name = "ip4-arp",.vector_size = - sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors = - ARRAY_LEN (ip4_arp_error_strings),.error_strings = - ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes = + .function = ip4_arp, + .name = "ip4-arp", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .n_errors = ARRAY_LEN (ip4_arp_error_strings), + .error_strings = ip4_arp_error_strings, + .n_next_nodes = IP4_ARP_N_NEXT, + .next_nodes = { - [IP4_ARP_NEXT_DROP] = "error-drop",} -,}; + [IP4_ARP_NEXT_DROP] = "error-drop", + }, +}; VLIB_REGISTER_NODE (ip4_glean_node) = { - .function = ip4_glean,.name = "ip4-glean",.vector_size = - sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors = - ARRAY_LEN (ip4_arp_error_strings),.error_strings = - ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes = - { - [IP4_ARP_NEXT_DROP] = "error-drop",} -,}; + .function = ip4_glean, + .name = "ip4-glean", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .n_errors = ARRAY_LEN (ip4_arp_error_strings), + .error_strings = ip4_arp_error_strings, + .n_next_nodes = IP4_ARP_N_NEXT, + .next_nodes = { + [IP4_ARP_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ #define foreach_notrace_ip4_arp_error \ _(DROP) \ @@ -2180,7 +1992,8 @@ VLIB_INIT_FUNCTION (arp_notrace_init); /* Send an ARP request to see if given destination is reachable on given interface. */ clib_error_t * -ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) +ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index, + u8 refresh) { vnet_main_t *vnm = vnet_get_main (); ip4_main_t *im = &ip4_main; @@ -2193,6 +2006,7 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) vlib_buffer_t *b; adj_index_t ai; u32 bi = 0; + u8 unicast_rewrite = 0; si = vnet_get_sw_interface (vnm, sw_if_index); @@ -2220,6 +2034,9 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) &im->ip4_arp_request_packet_template, &bi); + if (!h) + return clib_error_return (0, "ARP request packet allocation failed"); + hi = vnet_get_sup_hw_interface (vnm, sw_if_index); if (PREDICT_FALSE (!hi->hw_address)) { @@ -2250,13 +2067,24 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) /* Peer has been previously resolved, retrieve glean adj instead */ if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) { - adj_unlock (ai); - ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, sw_if_index, &nh); - adj = adj_get (ai); + if (refresh) + unicast_rewrite = 1; + else + { + adj_unlock (ai); + ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, + VNET_LINK_IP4, sw_if_index, &nh); + adj = adj_get (ai); + } } /* Add encapsulation string for software interface (e.g. ethernet header). */ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t)); + if (unicast_rewrite) + { + u16 *etype = vlib_buffer_get_current (b) - 2; + etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP); + } vlib_buffer_advance (b, -adj->rewrite_header.data_bytes); { @@ -2275,8 +2103,45 @@ typedef enum { IP4_REWRITE_NEXT_DROP, IP4_REWRITE_NEXT_ICMP_ERROR, + IP4_REWRITE_NEXT_FRAGMENT, + IP4_REWRITE_N_NEXT /* Last */ } ip4_rewrite_next_t; +/** + * This bits of an IPv4 address to mask to construct a multicast + * MAC address + */ +#if CLIB_ARCH_IS_BIG_ENDIAN +#define IP4_MCAST_ADDR_MASK 0x007fffff +#else +#define IP4_MCAST_ADDR_MASK 0xffff7f00 +#endif + +always_inline void +ip4_mtu_check (vlib_buffer_t * b, u16 packet_len, + u16 adj_packet_bytes, bool df, u32 * next, u32 * error) +{ + if (packet_len > adj_packet_bytes) + { + *error = IP4_ERROR_MTU_EXCEEDED; + if (df) + { + icmp4_error_set_vnet_buffer + (b, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, + adj_packet_bytes); + *next = IP4_REWRITE_NEXT_ICMP_ERROR; + } + else + { + /* IP fragmentation */ + ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes, + IP4_FRAG_NEXT_IP4_LOOKUP, 0); + *next = IP4_REWRITE_NEXT_FRAGMENT; + } + } +} + always_inline uword ip4_rewrite_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -2291,7 +2156,7 @@ ip4_rewrite_inline (vlib_main_t * vm, n_left_from = frame->n_vectors; next_index = node->cached_next_index; - u32 thread_index = vlib_get_thread_index (); + u32 thread_index = vm->thread_index; while (n_left_from > 0) { @@ -2437,17 +2302,29 @@ ip4_rewrite_inline (vlib_main_t * vm, vnet_buffer (p1)->ip.save_rewrite_length = rw_len1; /* Check MTU of outgoing interface. */ - error0 = - (vlib_buffer_length_in_chain (vm, p0) > - adj0[0]. - rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED : - error0); - error1 = - (vlib_buffer_length_in_chain (vm, p1) > - adj1[0]. - rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED : - error1); + ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length), + adj0[0].rewrite_header.max_l3_packet_bytes, + ip0->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), + &next0, &error0); + ip4_mtu_check (p1, clib_net_to_host_u16 (ip1->length), + adj1[0].rewrite_header.max_l3_packet_bytes, + ip1->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), + &next1, &error1); + + if (is_mcast) + { + error0 = ((adj0[0].rewrite_header.sw_if_index == + vnet_buffer (p0)->sw_if_index[VLIB_RX]) ? + IP4_ERROR_SAME_INTERFACE : error0); + error1 = ((adj1[0].rewrite_header.sw_if_index == + vnet_buffer (p1)->sw_if_index[VLIB_RX]) ? + IP4_ERROR_SAME_INTERFACE : error1); + } + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; /* Don't adjust the buffer for ttl issue; icmp-error node wants * to see the IP headerr */ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE)) @@ -2502,16 +2379,26 @@ ip4_rewrite_inline (vlib_main_t * vm, if (is_midchain) { - adj0->sub_type.midchain.fixup_func (vm, adj0, p0); - adj1->sub_type.midchain.fixup_func (vm, adj1, p1); + adj0->sub_type.midchain.fixup_func + (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); + adj1->sub_type.midchain.fixup_func + (vm, adj1, p1, adj0->sub_type.midchain.fixup_data); } if (is_mcast) { /* * copy bytes from the IP address into the MAC rewrite */ - vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); - vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1); + vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, + adj0-> + rewrite_header.dst_mcast_offset, + &ip0->dst_address.as_u32, + (u8 *) ip0); + vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, + adj0-> + rewrite_header.dst_mcast_offset, + &ip1->dst_address.as_u32, + (u8 *) ip1); } vlib_validate_buffer_enqueue_x2 (vm, node, next_index, @@ -2590,7 +2477,11 @@ ip4_rewrite_inline (vlib_main_t * vm, /* * copy bytes from the IP address into the MAC rewrite */ - vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); + vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, + adj0-> + rewrite_header.dst_mcast_offset, + &ip0->dst_address.as_u32, + (u8 *) ip0); } /* Update packet buffer attributes/set output interface. */ @@ -2604,10 +2495,18 @@ ip4_rewrite_inline (vlib_main_t * vm, vlib_buffer_length_in_chain (vm, p0) + rw_len0); /* Check MTU of outgoing interface. */ - error0 = (vlib_buffer_length_in_chain (vm, p0) - > adj0[0].rewrite_header.max_l3_packet_bytes - ? IP4_ERROR_MTU_EXCEEDED : error0); + ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length), + adj0[0].rewrite_header.max_l3_packet_bytes, + ip0->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), + &next0, &error0); + if (is_mcast) + { + error0 = ((adj0[0].rewrite_header.sw_if_index == + vnet_buffer (p0)->sw_if_index[VLIB_RX]) ? + IP4_ERROR_SAME_INTERFACE : error0); + } p0->error = error_node->errors[error0]; /* Don't adjust the buffer for ttl issue; icmp-error node wants @@ -2623,7 +2522,8 @@ ip4_rewrite_inline (vlib_main_t * vm, if (is_midchain) { - adj0->sub_type.midchain.fixup_func (vm, adj0, p0); + adj0->sub_type.midchain.fixup_func + (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); } if (PREDICT_FALSE @@ -2683,7 +2583,7 @@ ip4_rewrite_inline (vlib_main_t * vm, Next Indices: - adj->rewrite_header.next_index - or @c error-drop + or @c ip4-drop */ static uword ip4_rewrite (vlib_main_t * vm, @@ -2695,6 +2595,16 @@ ip4_rewrite (vlib_main_t * vm, return ip4_rewrite_inline (vm, node, frame, 0, 0, 0); } +static uword +ip4_rewrite_bcast (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip4_rewrite_inline (vm, node, frame, 1, 0, 0); + else + return ip4_rewrite_inline (vm, node, frame, 0, 0, 0); +} + static uword ip4_midchain (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -2733,13 +2643,23 @@ VLIB_REGISTER_NODE (ip4_rewrite_node) = { .format_trace = format_ip4_rewrite_trace, - .n_next_nodes = 2, + .n_next_nodes = IP4_REWRITE_N_NEXT, .next_nodes = { - [IP4_REWRITE_NEXT_DROP] = "error-drop", + [IP4_REWRITE_NEXT_DROP] = "ip4-drop", [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag", }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite) + +VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = { + .function = ip4_rewrite, + .name = "ip4-rewrite-bcast", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_rewrite_trace, + .sibling_of = "ip4-rewrite", +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_bcast_node, ip4_rewrite_bcast) VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = { .function = ip4_rewrite_mcast, @@ -3143,6 +3063,29 @@ VLIB_CLI_COMMAND (set_ip_classify_command, static) = }; /* *INDENT-ON* */ +static clib_error_t * +ip4_config (vlib_main_t * vm, unformat_input_t * input) +{ + ip4_main_t *im = &ip4_main; + uword heapsize = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize)) + ; + else + return clib_error_return (0, + "invalid heap-size parameter `%U'", + format_unformat_error, input); + } + + im->mtrie_heap_size = heapsize; + + return 0; +} + +VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip"); + /* * fd.io coding-style-patch-verification: ON *