X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fip%2Fip4_forward.c;h=000710ed817ead3c5b8fc56a2ce215e49c02f706;hb=1855b8e4;hp=8263e01c1887e0bd7219c835fae5ed76571c43fd;hpb=9f5a2b6310ce5c8e59c32ca6f27d8a187b0e4346;p=vpp.git diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c old mode 100755 new mode 100644 index 8263e01c188..000710ed817 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -39,6 +39,7 @@ #include #include +#include #include /* for ethernet_header_t */ #include /* for ethernet_arp_header_t */ #include @@ -53,384 +54,7 @@ #include #include /* for mFIB table and entry creation */ -/** - * @file - * @brief IPv4 Forwarding. - * - * This file contains the source code for IPv4 forwarding. - */ - -void -ip4_forward_next_trace (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, - vlib_rx_or_tx_t which_adj_index); - -always_inline uword -ip4_lookup_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, - int lookup_for_responses_to_locally_received_packets) -{ - ip4_main_t *im = &ip4_main; - vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters; - u32 n_left_from, n_left_to_next, *from, *to_next; - ip_lookup_next_t next; - u32 thread_index = vlib_get_thread_index (); - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next = node->cached_next_index; - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); - - while (n_left_from >= 8 && n_left_to_next >= 4) - { - vlib_buffer_t *p0, *p1, *p2, *p3; - ip4_header_t *ip0, *ip1, *ip2, *ip3; - ip_lookup_next_t next0, next1, next2, next3; - const load_balance_t *lb0, *lb1, *lb2, *lb3; - ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3; - ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3; - ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3; - u32 pi0, fib_index0, lb_index0; - u32 pi1, fib_index1, lb_index1; - u32 pi2, fib_index2, lb_index2; - u32 pi3, fib_index3, lb_index3; - flow_hash_config_t flow_hash_config0, flow_hash_config1; - flow_hash_config_t flow_hash_config2, flow_hash_config3; - u32 hash_c0, hash_c1, hash_c2, hash_c3; - const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p4, *p5, *p6, *p7; - - p4 = vlib_get_buffer (vm, from[4]); - p5 = vlib_get_buffer (vm, from[5]); - p6 = vlib_get_buffer (vm, from[6]); - p7 = vlib_get_buffer (vm, from[7]); - - vlib_prefetch_buffer_header (p4, LOAD); - vlib_prefetch_buffer_header (p5, LOAD); - vlib_prefetch_buffer_header (p6, LOAD); - vlib_prefetch_buffer_header (p7, LOAD); - - CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD); - } - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - pi2 = to_next[2] = from[2]; - pi3 = to_next[3] = from[3]; - - from += 4; - to_next += 4; - n_left_to_next -= 4; - n_left_from -= 4; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - p2 = vlib_get_buffer (vm, pi2); - p3 = vlib_get_buffer (vm, pi3); - - ip0 = vlib_buffer_get_current (p0); - ip1 = vlib_buffer_get_current (p1); - ip2 = vlib_buffer_get_current (p2); - ip3 = vlib_buffer_get_current (p3); - - dst_addr0 = &ip0->dst_address; - dst_addr1 = &ip1->dst_address; - dst_addr2 = &ip2->dst_address; - dst_addr3 = &ip3->dst_address; - - fib_index0 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); - fib_index1 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p1)->sw_if_index[VLIB_RX]); - fib_index2 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p2)->sw_if_index[VLIB_RX]); - fib_index3 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p3)->sw_if_index[VLIB_RX]); - fib_index0 = - (vnet_buffer (p0)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - fib_index1 = - (vnet_buffer (p1)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX]; - fib_index2 = - (vnet_buffer (p2)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX]; - fib_index3 = - (vnet_buffer (p3)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX]; - - - if (!lookup_for_responses_to_locally_received_packets) - { - mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - mtrie1 = &ip4_fib_get (fib_index1)->mtrie; - mtrie2 = &ip4_fib_get (fib_index2)->mtrie; - mtrie3 = &ip4_fib_get (fib_index3)->mtrie; - - leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); - leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1); - leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2); - leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3); - } - - if (!lookup_for_responses_to_locally_received_packets) - { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2); - leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2); - leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2); - } - - if (!lookup_for_responses_to_locally_received_packets) - { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3); - leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3); - leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3); - } - - if (lookup_for_responses_to_locally_received_packets) - { - lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; - lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX]; - lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX]; - lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX]; - } - else - { - lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); - lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2); - lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3); - } - - ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3); - lb0 = load_balance_get (lb_index0); - lb1 = load_balance_get (lb_index1); - lb2 = load_balance_get (lb_index2); - lb3 = load_balance_get (lb_index3); - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - ASSERT (lb1->lb_n_buckets > 0); - ASSERT (is_pow2 (lb1->lb_n_buckets)); - ASSERT (lb2->lb_n_buckets > 0); - ASSERT (is_pow2 (lb2->lb_n_buckets)); - ASSERT (lb3->lb_n_buckets > 0); - ASSERT (is_pow2 (lb3->lb_n_buckets)); - - /* Use flow hash to compute multipath adjacency. */ - hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; - hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0; - hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0; - hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0; - if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) - { - flow_hash_config0 = lb0->lb_hash_config; - hash_c0 = vnet_buffer (p0)->ip.flow_hash = - ip4_compute_flow_hash (ip0, flow_hash_config0); - dpo0 = - load_balance_get_fwd_bucket (lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); - } - else - { - dpo0 = load_balance_get_bucket_i (lb0, 0); - } - if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) - { - flow_hash_config1 = lb1->lb_hash_config; - hash_c1 = vnet_buffer (p1)->ip.flow_hash = - ip4_compute_flow_hash (ip1, flow_hash_config1); - dpo1 = - load_balance_get_fwd_bucket (lb1, - (hash_c1 & - (lb1->lb_n_buckets_minus_1))); - } - else - { - dpo1 = load_balance_get_bucket_i (lb1, 0); - } - if (PREDICT_FALSE (lb2->lb_n_buckets > 1)) - { - flow_hash_config2 = lb2->lb_hash_config; - hash_c2 = vnet_buffer (p2)->ip.flow_hash = - ip4_compute_flow_hash (ip2, flow_hash_config2); - dpo2 = - load_balance_get_fwd_bucket (lb2, - (hash_c2 & - (lb2->lb_n_buckets_minus_1))); - } - else - { - dpo2 = load_balance_get_bucket_i (lb2, 0); - } - if (PREDICT_FALSE (lb3->lb_n_buckets > 1)) - { - flow_hash_config3 = lb3->lb_hash_config; - hash_c3 = vnet_buffer (p3)->ip.flow_hash = - ip4_compute_flow_hash (ip3, flow_hash_config3); - dpo3 = - load_balance_get_fwd_bucket (lb3, - (hash_c3 & - (lb3->lb_n_buckets_minus_1))); - } - else - { - dpo3 = load_balance_get_bucket_i (lb3, 0); - } - - next0 = dpo0->dpoi_next_node; - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - next1 = dpo1->dpoi_next_node; - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; - next2 = dpo2->dpoi_next_node; - vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; - next3 = dpo3->dpoi_next_node; - vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; - - vlib_increment_combined_counter - (cm, thread_index, lb_index0, 1, - vlib_buffer_length_in_chain (vm, p0)); - vlib_increment_combined_counter - (cm, thread_index, lb_index1, 1, - vlib_buffer_length_in_chain (vm, p1)); - vlib_increment_combined_counter - (cm, thread_index, lb_index2, 1, - vlib_buffer_length_in_chain (vm, p2)); - vlib_increment_combined_counter - (cm, thread_index, lb_index3, 1, - vlib_buffer_length_in_chain (vm, p3)); - - vlib_validate_buffer_enqueue_x4 (vm, node, next, - to_next, n_left_to_next, - pi0, pi1, pi2, pi3, - next0, next1, next2, next3); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - vlib_buffer_t *p0; - ip4_header_t *ip0; - ip_lookup_next_t next0; - const load_balance_t *lb0; - ip4_fib_mtrie_t *mtrie0; - ip4_fib_mtrie_leaf_t leaf0; - ip4_address_t *dst_addr0; - u32 pi0, fib_index0, lbi0; - flow_hash_config_t flow_hash_config0; - const dpo_id_t *dpo0; - u32 hash_c0; - - pi0 = from[0]; - to_next[0] = pi0; - - p0 = vlib_get_buffer (vm, pi0); - - ip0 = vlib_buffer_get_current (p0); - - dst_addr0 = &ip0->dst_address; - - fib_index0 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); - fib_index0 = - (vnet_buffer (p0)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - - if (!lookup_for_responses_to_locally_received_packets) - { - mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - - leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); - } - - if (!lookup_for_responses_to_locally_received_packets) - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); - - if (!lookup_for_responses_to_locally_received_packets) - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); - - if (lookup_for_responses_to_locally_received_packets) - lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; - else - { - /* Handle default route. */ - lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - } - - ASSERT (lbi0); - lb0 = load_balance_get (lbi0); - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - - /* Use flow hash to compute multipath adjacency. */ - hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; - if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) - { - flow_hash_config0 = lb0->lb_hash_config; - - hash_c0 = vnet_buffer (p0)->ip.flow_hash = - ip4_compute_flow_hash (ip0, flow_hash_config0); - dpo0 = - load_balance_get_fwd_bucket (lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); - } - else - { - dpo0 = load_balance_get_bucket_i (lb0, 0); - } - - next0 = dpo0->dpoi_next_node; - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - - vlib_increment_combined_counter (cm, thread_index, lbi0, 1, - vlib_buffer_length_in_chain (vm, - p0)); - - from += 1; - to_next += 1; - n_left_to_next -= 1; - n_left_from -= 1; - - if (PREDICT_FALSE (next0 != next)) - { - n_left_to_next += 1; - vlib_put_next_frame (vm, node, next, n_left_to_next); - next = next0; - vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); - to_next[0] = pi0; - to_next += 1; - n_left_to_next -= 1; - } - } - - vlib_put_next_frame (vm, node, next, n_left_to_next); - } - - if (node->flags & VLIB_NODE_FLAG_TRACE) - ip4_forward_next_trace (vm, node, frame, VLIB_TX); - - return frame->n_vectors; -} +#include /** @brief IPv4 lookup node. @node ip4-lookup @@ -476,22 +100,28 @@ ip4_lookup (vlib_main_t * vm, static u8 *format_ip4_lookup_trace (u8 * s, va_list * args); +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_lookup_node) = { -.function = ip4_lookup,.name = "ip4-lookup",.vector_size = - sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes = - IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,}; + .function = ip4_lookup, + .name = "ip4-lookup", + .vector_size = sizeof (u32), + .format_trace = format_ip4_lookup_trace, + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = IP4_LOOKUP_NEXT_NODES, +}; +/* *INDENT-ON* */ VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup); -always_inline uword +static uword ip4_load_balance (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { vlib_combined_counter_main_t *cm = &load_balance_main.lbm_via_counters; u32 n_left_from, n_left_to_next, *from, *to_next; ip_lookup_next_t next; - u32 thread_index = vlib_get_thread_index (); + u32 thread_index = vm->thread_index; from = vlib_frame_vector_args (frame); n_left_from = frame->n_vectors; @@ -671,11 +301,17 @@ ip4_load_balance (vlib_main_t * vm, return frame->n_vectors; } +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_load_balance_node) = { -.function = ip4_load_balance,.name = "ip4-load-balance",.vector_size = - sizeof (u32),.sibling_of = "ip4-lookup",.format_trace = - format_ip4_lookup_trace,}; + .function = ip4_load_balance, + .name = "ip4-load-balance", + .vector_size = sizeof (u32), + .sibling_of = "ip4-lookup", + .format_trace = + format_ip4_lookup_trace, +}; +/* *INDENT-ON* */ VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance); @@ -704,6 +340,45 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index, return result; } +static void +ip4_add_subnet_bcast_route (u32 fib_index, + fib_prefix_t *pfx, + u32 sw_if_index) +{ + vnet_sw_interface_flags_t iflags; + + iflags = vnet_sw_interface_get_flags(vnet_get_main(), sw_if_index); + + fib_table_entry_special_remove(fib_index, + pfx, + FIB_SOURCE_INTERFACE); + + if (iflags & VNET_SW_INTERFACE_FLAG_DIRECTED_BCAST) + { + fib_table_entry_update_one_path (fib_index, pfx, + FIB_SOURCE_INTERFACE, + FIB_ENTRY_FLAG_NONE, + DPO_PROTO_IP4, + /* No next-hop address */ + &ADJ_BCAST_ADDR, + sw_if_index, + // invalid FIB index + ~0, + 1, + // no out-label stack + NULL, + FIB_ROUTE_PATH_FLAG_NONE); + } + else + { + fib_table_entry_special_add(fib_index, + pfx, + FIB_SOURCE_INTERFACE, + (FIB_ENTRY_FLAG_DROP | + FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT)); + } +} + static void ip4_add_interface_routes (u32 sw_if_index, ip4_main_t * im, u32 fib_index, @@ -724,7 +399,7 @@ ip4_add_interface_routes (u32 sw_if_index, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, /* No next-hop address */ NULL, sw_if_index, @@ -749,11 +424,7 @@ ip4_add_interface_routes (u32 sw_if_index, FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT)); net_pfx.fp_addr.ip4.as_u32 |= ~im->fib_masks[pfx.fp_len]; if (net_pfx.fp_addr.ip4.as_u32 != pfx.fp_addr.ip4.as_u32) - fib_table_entry_special_add(fib_index, - &net_pfx, - FIB_SOURCE_INTERFACE, - (FIB_ENTRY_FLAG_DROP | - FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT)); + ip4_add_subnet_bcast_route(fib_index, &net_pfx, sw_if_index); } else if (pfx.fp_len == 31) { @@ -767,7 +438,7 @@ ip4_add_interface_routes (u32 sw_if_index, fib_table_entry_update_one_path (fib_index, &net_pfx, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_ATTACHED), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &net_pfx.fp_addr, sw_if_index, // invalid FIB index @@ -803,7 +474,7 @@ ip4_add_interface_routes (u32 sw_if_index, FIB_SOURCE_INTERFACE, (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL), - FIB_PROTOCOL_IP4, + DPO_PROTO_IP4, &pfx.fp_addr, sw_if_index, // invalid FIB index @@ -877,11 +548,11 @@ ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index]) return; } - vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index, + vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index, !is_enable, 0, 0); - vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", + vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled", sw_if_index, !is_enable, 0, 0); } @@ -898,12 +569,19 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, u32 if_address_index, elts_before; ip4_address_fib_t ip4_af, *addr_fib = 0; + /* local0 interface doesn't support IP addressing */ + if (sw_if_index == 0) + { + return + clib_error_create ("local0 interface doesn't support IP addressing"); + } + vec_validate (im->fib_index_by_sw_if_index, sw_if_index); ip4_addr_fib_init (&ip4_af, address, vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); vec_add1 (addr_fib, ip4_af); - /* FIXME-LATER + /* * there is no support for adj-fib handling in the presence of overlapping * subnets on interfaces. Easy fix - disallow overlapping subnets, like * most routers do. @@ -912,31 +590,44 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, if (!is_del) { /* When adding an address check that it does not conflict - with an existing address. */ + with an existing address on any interface in this table. */ ip_interface_address_t *ia; - foreach_ip_interface_address - (&im->lookup_main, ia, sw_if_index, - 0 /* honor unnumbered */ , - ({ - ip4_address_t * x = - ip_interface_address_get_address - (&im->lookup_main, ia); - if (ip4_destination_matches_route - (im, address, x, ia->address_length) || - ip4_destination_matches_route (im, - x, - address, - address_length)) - return - clib_error_create - ("failed to add %U which conflicts with %U for interface %U", - format_ip4_address_and_length, address, - address_length, - format_ip4_address_and_length, x, - ia->address_length, - format_vnet_sw_if_index_name, vnm, - sw_if_index); - })); + vnet_sw_interface_t *sif; + + pool_foreach(sif, vnm->interface_main.sw_interfaces, + ({ + if (im->fib_index_by_sw_if_index[sw_if_index] == + im->fib_index_by_sw_if_index[sif->sw_if_index]) + { + foreach_ip_interface_address + (&im->lookup_main, ia, sif->sw_if_index, + 0 /* honor unnumbered */ , + ({ + ip4_address_t * x = + ip_interface_address_get_address + (&im->lookup_main, ia); + if (ip4_destination_matches_route + (im, address, x, ia->address_length) || + ip4_destination_matches_route (im, + x, + address, + address_length)) + { + vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS; + + return + clib_error_create + ("failed to add %U which conflicts with %U for interface %U", + format_ip4_address_and_length, address, + address_length, + format_ip4_address_and_length, x, + ia->address_length, + format_vnet_sw_if_index_name, vnm, + sif->sw_if_index); + } + })); + } + })); } /* *INDENT-ON* */ @@ -981,6 +672,45 @@ ip4_add_del_interface_address (vlib_main_t * vm, (vm, sw_if_index, address, address_length, is_del); } +void +ip4_directed_broadcast (u32 sw_if_index, u8 enable) +{ + ip_interface_address_t *ia; + ip4_main_t *im; + + im = &ip4_main; + + /* + * when directed broadcast is enabled, the subnet braodcast route will forward + * packets using an adjacency with a broadcast MAC. otherwise it drops + */ + /* *INDENT-OFF* */ + foreach_ip_interface_address(&im->lookup_main, ia, + sw_if_index, 0, + ({ + if (ia->address_length <= 30) + { + ip4_address_t *ipa; + + ipa = ip_interface_address_get_address (&im->lookup_main, ia); + + fib_prefix_t pfx = { + .fp_len = 32, + .fp_proto = FIB_PROTOCOL_IP4, + .fp_addr = { + .ip4.as_u32 = (ipa->as_u32 | ~im->fib_masks[ia->address_length]), + }, + }; + + ip4_add_subnet_bcast_route + (fib_table_get_index_for_sw_if_index(FIB_PROTOCOL_IP4, + sw_if_index), + &pfx, sw_if_index); + } + })); + /* *INDENT-ON* */ +} + /* Built-in ip4 unicast rx feature path definition */ /* *INDENT-OFF* */ VNET_FEATURE_ARC_INIT (ip4_unicast, static) = @@ -1053,10 +783,10 @@ VNET_FEATURE_INIT (ip4_vxlan_bypass, static) = .runs_before = VNET_FEATURES ("ip4-lookup"), }; -VNET_FEATURE_INIT (ip4_drop, static) = +VNET_FEATURE_INIT (ip4_not_enabled, static) = { .arc_name = "ip4-unicast", - .node_name = "ip4-drop", + .node_name = "ip4-not-enabled", .runs_before = VNET_FEATURES ("ip4-lookup"), }; @@ -1082,10 +812,10 @@ VNET_FEATURE_INIT (ip4_vpath_mc, static) = .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"), }; -VNET_FEATURE_INIT (ip4_mc_drop, static) = +VNET_FEATURE_INIT (ip4_mc_not_enabled, static) = { .arc_name = "ip4-multicast", - .node_name = "ip4-drop", + .node_name = "ip4-not-enabled", .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"), }; @@ -1100,7 +830,7 @@ VNET_FEATURE_INIT (ip4_lookup_mc, static) = VNET_FEATURE_ARC_INIT (ip4_output, static) = { .arc_name = "ip4-output", - .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"), + .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"), .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index, }; @@ -1108,6 +838,13 @@ VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = { .arc_name = "ip4-output", .node_name = "ip4-source-and-port-range-check-tx", + .runs_before = VNET_FEATURES ("ip4-outacl"), +}; + +VNET_FEATURE_INIT (ip4_outacl, static) = +{ + .arc_name = "ip4-output", + .node_name = "ip4-outacl", .runs_before = VNET_FEATURES ("ipsec-output-ip4"), }; @@ -1144,8 +881,9 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) ip4_address_t *address; vlib_main_t *vm = vlib_get_main (); + vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0); /* *INDENT-OFF* */ - foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */, + foreach_ip_interface_address (lm4, ia, sw_if_index, 0, ({ address = ip_interface_address_get_address (lm4, ia); ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1); @@ -1153,11 +891,11 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) /* *INDENT-ON* */ } - vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index, + vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index, is_add, 0, 0); - vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index, - is_add, 0, 0); + vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled", + sw_if_index, is_add, 0, 0); return /* no error */ 0; } @@ -1176,6 +914,12 @@ ip4_lookup_init (vlib_main_t * vm) if ((error = vlib_call_init_function (vm, vnet_feature_init))) return error; + if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init))) + return (error); + if ((error = vlib_call_init_function (vm, fib_module_init))) + return error; + if ((error = vlib_call_init_function (vm, mfib_module_init))) + return error; for (i = 0; i < ARRAY_LEN (im->fib_masks); i++) { @@ -1191,8 +935,10 @@ ip4_lookup_init (vlib_main_t * vm) ip_lookup_init (&im->lookup_main, /* is_ip6 */ 0); /* Create FIB with index 0 and table id of 0. */ - fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0); - mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0); + fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0, + FIB_SOURCE_DEFAULT_ROUTE); + mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, 0, + MFIB_SOURCE_DEFAULT_ROUTE); { pg_node_t *pn; @@ -1249,7 +995,7 @@ format_ip4_forward_next_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *); - uword indent = format_get_indent (s); + u32 indent = format_get_indent (s); s = format (s, "%U%U", format_white_space, indent, format_ip4_header, t->packet_data, sizeof (t->packet_data)); @@ -1262,7 +1008,7 @@ format_ip4_lookup_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *); - uword indent = format_get_indent (s); + u32 indent = format_get_indent (s); s = format (s, "fib %d dpo-idx %d flow hash: 0x%08x", t->fib_index, t->dpo_index, t->flow_hash); @@ -1278,7 +1024,7 @@ format_ip4_rewrite_trace (u8 * s, va_list * args) CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *); - uword indent = format_get_indent (s); + u32 indent = format_get_indent (s); s = format (s, "tx_sw_if_index %d dpo-idx %d : %U flow hash: 0x%08x", t->fib_index, t->dpo_index, format_ip_adjacency, @@ -1378,68 +1124,6 @@ ip4_forward_next_trace (vlib_main_t * vm, } } -static uword -ip4_drop_or_punt (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, ip4_error_t error_code) -{ - u32 *buffers = vlib_frame_vector_args (frame); - uword n_packets = frame->n_vectors; - - vlib_error_drop_buffers (vm, node, buffers, - /* stride */ 1, - n_packets, - /* next */ 0, - ip4_input_node.index, error_code); - - if (node->flags & VLIB_NODE_FLAG_TRACE) - ip4_forward_next_trace (vm, node, frame, VLIB_TX); - - return n_packets; -} - -static uword -ip4_drop (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_DROP); -} - -static uword -ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) -{ - return ip4_drop_or_punt (vm, node, frame, IP4_ERROR_ADJACENCY_PUNT); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (ip4_drop_node, static) = -{ - .function = ip4_drop, - .name = "ip4-drop", - .vector_size = sizeof (u32), - .format_trace = format_ip4_forward_next_trace, - .n_next_nodes = 1, - .next_nodes = { - [0] = "error-drop", - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip4_drop_node, ip4_drop); - -VLIB_REGISTER_NODE (ip4_punt_node, static) = -{ - .function = ip4_punt, - .name = "ip4-punt", - .vector_size = sizeof (u32), - .format_trace = format_ip4_forward_next_trace, - .n_next_nodes = 1, - .next_nodes = { - [0] = "error-punt", - }, -}; - -VLIB_NODE_FUNCTION_MULTIARCH (ip4_punt_node, ip4_punt); -/* *INDENT-ON */ - /* Compute TCP/UDP/ICMP4 checksum in software. */ u16 ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, @@ -1447,7 +1131,7 @@ ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, { ip_csum_t sum0; u32 ip_header_length, payload_length_host_byte_order; - u32 n_this_buffer, n_bytes_left; + u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer; u16 sum16; void *data_this_buffer; @@ -1474,10 +1158,13 @@ ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, n_bytes_left = n_this_buffer = payload_length_host_byte_order; data_this_buffer = (void *) ip0 + ip_header_length; - if (n_this_buffer + ip_header_length > p0->current_length) - n_this_buffer = - p0->current_length > - ip_header_length ? p0->current_length - ip_header_length : 0; + n_ip_bytes_this_buffer = + p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data); + if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer) + { + n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ? + n_ip_bytes_this_buffer - ip_header_length : 0; + } while (1) { sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer); @@ -1509,398 +1196,409 @@ ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0) udp0 = (void *) (ip0 + 1); if (ip0->protocol == IP_PROTOCOL_UDP && udp0->checksum == 0) { - p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED - | IP_BUFFER_L4_CHECKSUM_CORRECT); + p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED + | VNET_BUFFER_F_L4_CHECKSUM_CORRECT); return p0->flags; } - sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0); - - p0->flags |= (IP_BUFFER_L4_CHECKSUM_COMPUTED - | ((sum16 == 0) << LOG2_IP_BUFFER_L4_CHECKSUM_CORRECT)); - - return p0->flags; -} - -/* *INDENT-OFF* */ -VNET_FEATURE_ARC_INIT (ip4_local) = -{ - .arc_name = "ip4-local", - .start_nodes = VNET_FEATURES ("ip4-local"), -}; -/* *INDENT-ON* */ - -static inline uword -ip4_local_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, int head_of_feature_arc) -{ - ip4_main_t *im = &ip4_main; - ip_lookup_main_t *lm = &im->lookup_main; - ip_local_next_t next_index; - u32 *from, *to_next, n_left_from, n_left_to_next; - vlib_node_runtime_t *error_node = - vlib_node_get_runtime (vm, ip4_input_node.index); - u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next_index = node->cached_next_index; - - if (node->flags & VLIB_NODE_FLAG_TRACE) - ip4_forward_next_trace (vm, node, frame, VLIB_TX); - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - - while (n_left_from >= 4 && n_left_to_next >= 2) - { - vlib_buffer_t *p0, *p1; - ip4_header_t *ip0, *ip1; - udp_header_t *udp0, *udp1; - ip4_fib_mtrie_t *mtrie0, *mtrie1; - ip4_fib_mtrie_leaf_t leaf0, leaf1; - const dpo_id_t *dpo0, *dpo1; - const load_balance_t *lb0, *lb1; - u32 pi0, ip_len0, udp_len0, flags0, next0, fib_index0, lbi0; - u32 pi1, ip_len1, udp_len1, flags1, next1, fib_index1, lbi1; - i32 len_diff0, len_diff1; - u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; - u8 error1, is_udp1, is_tcp_udp1, good_tcp_udp1, proto1; - u32 sw_if_index0, sw_if_index1; - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - - next0 = next1 = IP_LOCAL_NEXT_DROP; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - - ip0 = vlib_buffer_get_current (p0); - ip1 = vlib_buffer_get_current (p1); - - vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; - vnet_buffer (p1)->ip.start_of_ip_header = p1->current_data; - - sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; - sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; - - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); - fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1); - - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); - fib_index0 = - (vnet_buffer (p0)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - - fib_index1 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index1); - fib_index1 = - (vnet_buffer (p1)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX]; - - mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - mtrie1 = &ip4_fib_get (fib_index1)->mtrie; - - leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); - leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address); - - /* Treat IP frag packets as "experimental" protocol for now - until support of IP frag reassembly is implemented */ - proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol; - proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol; - - if (head_of_feature_arc == 0) - { - error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL; - goto skip_checks; - } - - is_udp0 = proto0 == IP_PROTOCOL_UDP; - is_udp1 = proto1 == IP_PROTOCOL_UDP; - is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; - is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP; - - flags0 = p0->flags; - flags1 = p1->flags; + sum16 = ip4_tcp_udp_compute_checksum (vm, p0, ip0); - good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; - good_tcp_udp1 = (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + p0->flags |= (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED + | ((sum16 == 0) << VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT)); - udp0 = ip4_next_header (ip0); - udp1 = ip4_next_header (ip1); + return p0->flags; +} - /* Don't verify UDP checksum for packets with explicit zero checksum. */ - good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; - good_tcp_udp1 |= is_udp1 && udp1->checksum == 0; +/* *INDENT-OFF* */ +VNET_FEATURE_ARC_INIT (ip4_local) = +{ + .arc_name = "ip4-local", + .start_nodes = VNET_FEATURES ("ip4-local"), +}; +/* *INDENT-ON* */ - /* Verify UDP length. */ - ip_len0 = clib_net_to_host_u16 (ip0->length); - ip_len1 = clib_net_to_host_u16 (ip1->length); - udp_len0 = clib_net_to_host_u16 (udp0->length); - udp_len1 = clib_net_to_host_u16 (udp1->length); +static inline void +ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p, + ip4_header_t * ip, u8 is_udp, u8 * error, + u8 * good_tcp_udp) +{ + u32 flags0; + flags0 = ip4_tcp_udp_validate_checksum (vm, p); + *good_tcp_udp = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + if (is_udp) + { + udp_header_t *udp; + u32 ip_len, udp_len; + i32 len_diff; + udp = ip4_next_header (ip); + /* Verify UDP length. */ + ip_len = clib_net_to_host_u16 (ip->length); + udp_len = clib_net_to_host_u16 (udp->length); + + len_diff = ip_len - udp_len; + *good_tcp_udp &= len_diff >= 0; + *error = len_diff < 0 ? IP4_ERROR_UDP_LENGTH : *error; + } +} - len_diff0 = ip_len0 - udp_len0; - len_diff1 = ip_len1 - udp_len1; +#define ip4_local_csum_is_offloaded(_b) \ + _b->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \ + || _b->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM - len_diff0 = is_udp0 ? len_diff0 : 0; - len_diff1 = is_udp1 ? len_diff1 : 0; +#define ip4_local_need_csum_check(is_tcp_udp, _b) \ + (is_tcp_udp && !(_b->flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \ + || ip4_local_csum_is_offloaded (_b))) - if (PREDICT_FALSE (!(is_tcp_udp0 & is_tcp_udp1 - & good_tcp_udp0 & good_tcp_udp1))) - { - if (is_tcp_udp0) - { - if (is_tcp_udp0 - && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)) - flags0 = ip4_tcp_udp_validate_checksum (vm, p0); - good_tcp_udp0 = - (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; - good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; - } - if (is_tcp_udp1) - { - if (is_tcp_udp1 - && !(flags1 & IP_BUFFER_L4_CHECKSUM_COMPUTED)) - flags1 = ip4_tcp_udp_validate_checksum (vm, p1); - good_tcp_udp1 = - (flags1 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; - good_tcp_udp1 |= is_udp1 && udp1->checksum == 0; - } - } +#define ip4_local_csum_is_valid(_b) \ + (_b->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT \ + || (ip4_local_csum_is_offloaded (_b))) != 0 - good_tcp_udp0 &= len_diff0 >= 0; - good_tcp_udp1 &= len_diff1 >= 0; +static inline void +ip4_local_check_l4_csum (vlib_main_t * vm, vlib_buffer_t * b, + ip4_header_t * ih, u8 * error) +{ + u8 is_udp, is_tcp_udp, good_tcp_udp; - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); - leaf1 = - ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 2); + is_udp = ih->protocol == IP_PROTOCOL_UDP; + is_tcp_udp = is_udp || ih->protocol == IP_PROTOCOL_TCP; - error0 = error1 = IP4_ERROR_UNKNOWN_PROTOCOL; + if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp, b))) + ip4_local_l4_csum_validate (vm, b, ih, is_udp, error, &good_tcp_udp); + else + good_tcp_udp = ip4_local_csum_is_valid (b); - error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0; - error1 = len_diff1 < 0 ? IP4_ERROR_UDP_LENGTH : error1; + ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM); + *error = (is_tcp_udp && !good_tcp_udp + ? IP4_ERROR_TCP_CHECKSUM + is_udp : *error); +} - ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM); - error0 = (is_tcp_udp0 && !good_tcp_udp0 - ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0); - error1 = (is_tcp_udp1 && !good_tcp_udp1 - ? IP4_ERROR_TCP_CHECKSUM + is_udp1 : error1); +static inline void +ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b, + ip4_header_t ** ih, u8 * error) +{ + u8 is_udp[2], is_tcp_udp[2], good_tcp_udp[2]; - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); - leaf1 = - ip4_fib_mtrie_lookup_step (mtrie1, leaf1, &ip1->src_address, 3); + is_udp[0] = ih[0]->protocol == IP_PROTOCOL_UDP; + is_udp[1] = ih[1]->protocol == IP_PROTOCOL_UDP; - vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0 = - ip4_fib_mtrie_leaf_get_adj_index (leaf0); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0; + is_tcp_udp[0] = is_udp[0] || ih[0]->protocol == IP_PROTOCOL_TCP; + is_tcp_udp[1] = is_udp[1] || ih[1]->protocol == IP_PROTOCOL_TCP; - vnet_buffer (p1)->ip.adj_index[VLIB_RX] = lbi1 = - ip4_fib_mtrie_leaf_get_adj_index (leaf1); - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = lbi1; + good_tcp_udp[0] = ip4_local_csum_is_valid (b[0]); + good_tcp_udp[1] = ip4_local_csum_is_valid (b[1]); - lb0 = load_balance_get (lbi0); - lb1 = load_balance_get (lbi1); - dpo0 = load_balance_get_bucket_i (lb0, 0); - dpo1 = load_balance_get_bucket_i (lb1, 0); + if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0]) + || ip4_local_need_csum_check (is_tcp_udp[1], b[1]))) + { + if (is_tcp_udp[0]) + ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0], + &good_tcp_udp[0]); + if (is_tcp_udp[1]) + ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1], + &good_tcp_udp[1]); + } - /* - * Must have a route to source otherwise we drop the packet. - * ip4 broadcasts are accepted, e.g. to make dhcp client work - * - * The checks are: - * - the source is a recieve => it's from us => bogus, do this - * first since it sets a different error code. - * - uRPF check for any route to source - accept if passes. - * - allow packets destined to the broadcast address from unknown sources - */ - error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL && - dpo0->dpoi_type == DPO_RECEIVE) ? - IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0); - error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL && - !fib_urpf_check_size (lb0->lb_urpf) && - ip0->dst_address.as_u32 != 0xFFFFFFFF) - ? IP4_ERROR_SRC_LOOKUP_MISS : error0); - error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL && - dpo1->dpoi_type == DPO_RECEIVE) ? - IP4_ERROR_SPOOFED_LOCAL_PACKETS : error1); - error1 = ((error1 == IP4_ERROR_UNKNOWN_PROTOCOL && - !fib_urpf_check_size (lb1->lb_urpf) && - ip1->dst_address.as_u32 != 0xFFFFFFFF) - ? IP4_ERROR_SRC_LOOKUP_MISS : error1); - - skip_checks: - - next0 = lm->local_next_by_ip_protocol[proto0]; - next1 = lm->local_next_by_ip_protocol[proto1]; - - next0 = - error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; - next1 = - error1 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next1; - - p0->error = error0 ? error_node->errors[error0] : 0; - p1->error = error1 ? error_node->errors[error1] : 0; - - if (head_of_feature_arc) - { - if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) - vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0); - if (PREDICT_TRUE (error1 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) - vnet_feature_arc_start (arc_index, sw_if_index1, &next1, p1); - } + error[0] = (is_tcp_udp[0] && !good_tcp_udp[0] ? + IP4_ERROR_TCP_CHECKSUM + is_udp[0] : error[0]); + error[1] = (is_tcp_udp[1] && !good_tcp_udp[1] ? + IP4_ERROR_TCP_CHECKSUM + is_udp[1] : error[1]); +} - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, pi0, pi1, - next0, next1); - } +static inline void +ip4_local_set_next_and_error (vlib_node_runtime_t * error_node, + vlib_buffer_t * b, u16 * next, u8 error, + u8 head_of_feature_arc) +{ + u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index; + u32 next_index; - while (n_left_from > 0 && n_left_to_next > 0) + *next = error != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : *next; + b->error = error ? error_node->errors[error] : 0; + if (head_of_feature_arc) + { + next_index = *next; + if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) { - vlib_buffer_t *p0; - ip4_header_t *ip0; - udp_header_t *udp0; - ip4_fib_mtrie_t *mtrie0; - ip4_fib_mtrie_leaf_t leaf0; - u32 pi0, next0, ip_len0, udp_len0, flags0, fib_index0, lbi0; - i32 len_diff0; - u8 error0, is_udp0, is_tcp_udp0, good_tcp_udp0, proto0; - load_balance_t *lb0; - const dpo_id_t *dpo0; - u32 sw_if_index0; - - pi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - - next0 = IP_LOCAL_NEXT_DROP; - - p0 = vlib_get_buffer (vm, pi0); - - ip0 = vlib_buffer_get_current (p0); - - vnet_buffer (p0)->ip.start_of_ip_header = p0->current_data; + vnet_feature_arc_start (arc_index, + vnet_buffer (b)->sw_if_index[VLIB_RX], + &next_index, b); + *next = next_index; + } + } +} - sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; +typedef struct +{ + ip4_address_t src; + u32 lbi; + u8 error; +} ip4_local_last_check_t; - fib_index0 = vec_elt (im->fib_index_by_sw_if_index, sw_if_index0); +static inline void +ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0, + ip4_local_last_check_t * last_check, u8 * error0) +{ + ip4_fib_mtrie_leaf_t leaf0; + ip4_fib_mtrie_t *mtrie0; + const dpo_id_t *dpo0; + load_balance_t *lb0; + u32 lbi0; - fib_index0 = - (vnet_buffer (p0)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; + vnet_buffer (b)->ip.fib_index = + vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ? + vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index; - mtrie0 = &ip4_fib_get (fib_index0)->mtrie; + if (PREDICT_FALSE (last_check->src.as_u32 != ip0->src_address.as_u32)) + { + mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie; + leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); + leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); + lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); + + vnet_buffer (b)->ip.adj_index[VLIB_TX] = lbi0; + vnet_buffer (b)->ip.adj_index[VLIB_RX] = lbi0; + + lb0 = load_balance_get (lbi0); + dpo0 = load_balance_get_bucket_i (lb0, 0); + + /* + * Must have a route to source otherwise we drop the packet. + * ip4 broadcasts are accepted, e.g. to make dhcp client work + * + * The checks are: + * - the source is a recieve => it's from us => bogus, do this + * first since it sets a different error code. + * - uRPF check for any route to source - accept if passes. + * - allow packets destined to the broadcast address from unknown sources + */ + + *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL + && dpo0->dpoi_type == DPO_RECEIVE) ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : *error0); + *error0 = ((*error0 == IP4_ERROR_UNKNOWN_PROTOCOL + && !fib_urpf_check_size (lb0->lb_urpf) + && ip0->dst_address.as_u32 != 0xFFFFFFFF) ? + IP4_ERROR_SRC_LOOKUP_MISS : *error0); + + last_check->src.as_u32 = ip0->src_address.as_u32; + last_check->lbi = lbi0; + last_check->error = *error0; + } + else + { + vnet_buffer (b)->ip.adj_index[VLIB_TX] = last_check->lbi; + vnet_buffer (b)->ip.adj_index[VLIB_RX] = last_check->lbi; + *error0 = last_check->error; + } +} - leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address); +static inline void +ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip, + ip4_local_last_check_t * last_check, u8 * error) +{ + ip4_fib_mtrie_leaf_t leaf[2]; + ip4_fib_mtrie_t *mtrie[2]; + const dpo_id_t *dpo[2]; + load_balance_t *lb[2]; + u32 not_last_hit = 0; + u32 lbi[2]; - /* Treat IP frag packets as "experimental" protocol for now - until support of IP frag reassembly is implemented */ - proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol; + not_last_hit |= ip[0]->src_address.as_u32 ^ last_check->src.as_u32; + not_last_hit |= ip[1]->src_address.as_u32 ^ last_check->src.as_u32; - if (head_of_feature_arc == 0) - { - error0 = IP4_ERROR_UNKNOWN_PROTOCOL; - goto skip_check; - } + vnet_buffer (b[0])->ip.fib_index = + vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ? + vnet_buffer (b[0])->sw_if_index[VLIB_TX] : + vnet_buffer (b[0])->ip.fib_index; - is_udp0 = proto0 == IP_PROTOCOL_UDP; - is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; + vnet_buffer (b[1])->ip.fib_index = + vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ? + vnet_buffer (b[1])->sw_if_index[VLIB_TX] : + vnet_buffer (b[1])->ip.fib_index; - flags0 = p0->flags; + if (PREDICT_FALSE (not_last_hit)) + { + mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie; + mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie; + + leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address); + leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address); + + leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0], + &ip[0]->src_address, 2); + leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1], + &ip[1]->src_address, 2); + + leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0], + &ip[0]->src_address, 3); + leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1], + &ip[1]->src_address, 3); + + lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]); + lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]); + + vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = lbi[0]; + vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = lbi[0]; + + vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = lbi[1]; + vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = lbi[1]; + + lb[0] = load_balance_get (lbi[0]); + lb[1] = load_balance_get (lbi[1]); + + dpo[0] = load_balance_get_bucket_i (lb[0], 0); + dpo[1] = load_balance_get_bucket_i (lb[1], 0); + + error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL && + dpo[0]->dpoi_type == DPO_RECEIVE) ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[0]); + error[0] = ((error[0] == IP4_ERROR_UNKNOWN_PROTOCOL && + !fib_urpf_check_size (lb[0]->lb_urpf) && + ip[0]->dst_address.as_u32 != 0xFFFFFFFF) + ? IP4_ERROR_SRC_LOOKUP_MISS : error[0]); + + error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL && + dpo[1]->dpoi_type == DPO_RECEIVE) ? + IP4_ERROR_SPOOFED_LOCAL_PACKETS : error[1]); + error[1] = ((error[1] == IP4_ERROR_UNKNOWN_PROTOCOL && + !fib_urpf_check_size (lb[1]->lb_urpf) && + ip[1]->dst_address.as_u32 != 0xFFFFFFFF) + ? IP4_ERROR_SRC_LOOKUP_MISS : error[1]); + + last_check->src.as_u32 = ip[1]->src_address.as_u32; + last_check->lbi = lbi[1]; + last_check->error = error[1]; + } + else + { + vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = last_check->lbi; + vnet_buffer (b[0])->ip.adj_index[VLIB_RX] = last_check->lbi; - good_tcp_udp0 = (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; + vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = last_check->lbi; + vnet_buffer (b[1])->ip.adj_index[VLIB_RX] = last_check->lbi; - udp0 = ip4_next_header (ip0); + error[0] = last_check->error; + error[1] = last_check->error; + } +} - /* Don't verify UDP checksum for packets with explicit zero checksum. */ - good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; +static inline uword +ip4_local_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, int head_of_feature_arc) +{ + ip4_main_t *im = &ip4_main; + ip_lookup_main_t *lm = &im->lookup_main; + u32 *from, n_left_from; + vlib_node_runtime_t *error_node = + vlib_node_get_runtime (vm, ip4_input_node.index); + u16 nexts[VLIB_FRAME_SIZE], *next; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + ip4_header_t *ip[2]; + u8 error[2]; + + ip4_local_last_check_t last_check = { + .src = {.as_u32 = 0}, + .lbi = ~0, + .error = IP4_ERROR_UNKNOWN_PROTOCOL + }; - /* Verify UDP length. */ - ip_len0 = clib_net_to_host_u16 (ip0->length); - udp_len0 = clib_net_to_host_u16 (udp0->length); + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; - len_diff0 = ip_len0 - udp_len0; + if (node->flags & VLIB_NODE_FLAG_TRACE) + ip4_forward_next_trace (vm, node, frame, VLIB_TX); - len_diff0 = is_udp0 ? len_diff0 : 0; + vlib_get_buffers (vm, from, bufs, n_left_from); + b = bufs; + next = nexts; - if (PREDICT_FALSE (!(is_tcp_udp0 & good_tcp_udp0))) - { - if (is_tcp_udp0) - { - if (is_tcp_udp0 - && !(flags0 & IP_BUFFER_L4_CHECKSUM_COMPUTED)) - flags0 = ip4_tcp_udp_validate_checksum (vm, p0); - good_tcp_udp0 = - (flags0 & IP_BUFFER_L4_CHECKSUM_CORRECT) != 0; - good_tcp_udp0 |= is_udp0 && udp0->checksum == 0; - } - } + while (n_left_from >= 6) + { + u32 is_nat, not_batch = 0; - good_tcp_udp0 &= len_diff0 >= 0; + /* Prefetch next iteration. */ + { + vlib_prefetch_buffer_header (b[4], LOAD); + vlib_prefetch_buffer_header (b[5], LOAD); - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2); + CLIB_PREFETCH (b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD); + } - error0 = IP4_ERROR_UNKNOWN_PROTOCOL; + error[0] = error[1] = IP4_ERROR_UNKNOWN_PROTOCOL; - error0 = len_diff0 < 0 ? IP4_ERROR_UDP_LENGTH : error0; + ip[0] = vlib_buffer_get_current (b[0]); + ip[1] = vlib_buffer_get_current (b[1]); - ASSERT (IP4_ERROR_TCP_CHECKSUM + 1 == IP4_ERROR_UDP_CHECKSUM); - error0 = (is_tcp_udp0 && !good_tcp_udp0 - ? IP4_ERROR_TCP_CHECKSUM + is_udp0 : error0); + vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data; + vnet_buffer (b[1])->l3_hdr_offset = b[1]->current_data; - leaf0 = - ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3); + is_nat = b[0]->flags & VNET_BUFFER_F_IS_NATED; + not_batch |= is_nat ^ (b[1]->flags & VNET_BUFFER_F_IS_NATED); - lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = lbi0; + if (head_of_feature_arc == 0 || (is_nat && not_batch == 0)) + goto skip_checks; - lb0 = load_balance_get (lbi0); - dpo0 = load_balance_get_bucket_i (lb0, 0); + if (PREDICT_TRUE (not_batch == 0)) + { + ip4_local_check_l4_csum_x2 (vm, b, ip, error); + ip4_local_check_src_x2 (b, ip, &last_check, error); + } + else + { + if (!(b[0]->flags & VNET_BUFFER_F_IS_NATED)) + { + ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]); + ip4_local_check_src (b[0], ip[0], &last_check, &error[0]); + } + if (!(b[1]->flags & VNET_BUFFER_F_IS_NATED)) + { + ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]); + ip4_local_check_src (b[1], ip[1], &last_check, &error[1]); + } + } - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = - vnet_buffer (p0)->ip.adj_index[VLIB_RX] = lbi0; + skip_checks: - error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL && - dpo0->dpoi_type == DPO_RECEIVE) ? - IP4_ERROR_SPOOFED_LOCAL_PACKETS : error0); - error0 = ((error0 == IP4_ERROR_UNKNOWN_PROTOCOL && - !fib_urpf_check_size (lb0->lb_urpf) && - ip0->dst_address.as_u32 != 0xFFFFFFFF) - ? IP4_ERROR_SRC_LOOKUP_MISS : error0); + next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol]; + next[1] = lm->local_next_by_ip_protocol[ip[1]->protocol]; + ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0], + head_of_feature_arc); + ip4_local_set_next_and_error (error_node, b[1], &next[1], error[1], + head_of_feature_arc); - skip_check: + b += 2; + next += 2; + n_left_from -= 2; + } - next0 = lm->local_next_by_ip_protocol[proto0]; + while (n_left_from > 0) + { + error[0] = IP4_ERROR_UNKNOWN_PROTOCOL; - next0 = - error0 != IP4_ERROR_UNKNOWN_PROTOCOL ? IP_LOCAL_NEXT_DROP : next0; + ip[0] = vlib_buffer_get_current (b[0]); + vnet_buffer (b[0])->l3_hdr_offset = b[0]->current_data; - p0->error = error0 ? error_node->errors[error0] : 0; + if (head_of_feature_arc == 0 || (b[0]->flags & VNET_BUFFER_F_IS_NATED)) + goto skip_check; - if (head_of_feature_arc) - { - if (PREDICT_TRUE (error0 == (u8) IP4_ERROR_UNKNOWN_PROTOCOL)) - vnet_feature_arc_start (arc_index, sw_if_index0, &next0, p0); - } + ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]); + ip4_local_check_src (b[0], ip[0], &last_check, &error[0]); - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, pi0, next0); + skip_check: - } + next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol]; + ip4_local_set_next_and_error (error_node, b[0], &next[0], error[0], + head_of_feature_arc); - vlib_put_next_frame (vm, node, next_index, n_left_to_next); + b += 1; + next += 1; + n_left_from -= 1; } + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); return frame->n_vectors; } @@ -1920,10 +1618,11 @@ VLIB_REGISTER_NODE (ip4_local_node) = .n_next_nodes = IP_LOCAL_N_NEXT, .next_nodes = { - [IP_LOCAL_NEXT_DROP] = "error-drop", - [IP_LOCAL_NEXT_PUNT] = "error-punt", + [IP_LOCAL_NEXT_DROP] = "ip4-drop", + [IP_LOCAL_NEXT_PUNT] = "ip4-punt", [IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup", - [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",}, + [IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input", + }, }; /* *INDENT-ON* */ @@ -1979,7 +1678,13 @@ show_ip_local_command_fn (vlib_main_t * vm, for (i = 0; i < ARRAY_LEN (lm->local_next_by_ip_protocol); i++) { if (lm->local_next_by_ip_protocol[i] != IP_LOCAL_NEXT_PUNT) - vlib_cli_output (vm, "%d", i); + { + u32 node_index = vlib_get_node (vm, + ip4_local_node.index)-> + next_nodes[lm->local_next_by_ip_protocol[i]]; + vlib_cli_output (vm, "%d: %U", i, format_vlib_node_name, vm, + node_index); + } } return 0; } @@ -2089,11 +1794,12 @@ ip4_arp_inline (vlib_main_t * vm, } b0 ^= sw_if_index0; + hash_v3_mix32 (a0, b0, c0); hash_v3_finalize32 (a0, b0, c0); c0 &= BITS (hash_bitmap) - 1; - c0 = c0 / BITS (uword); m0 = (uword) 1 << (c0 % BITS (uword)); + c0 = c0 / BITS (uword); bm0 = hash_bitmap[c0]; drop0 = (bm0 & m0) != 0; @@ -2143,6 +1849,10 @@ ip4_arp_inline (vlib_main_t * vm, &im->ip4_arp_request_packet_template, &bi0); + /* Seems we're out of buffers */ + if (PREDICT_FALSE (!h0)) + continue; + /* Add rewrite/encap string for ARP packet. */ vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t)); @@ -2187,6 +1897,7 @@ ip4_arp_inline (vlib_main_t * vm, vlib_buffer_copy_trace_flag (vm, p0, bi0); b0 = vlib_get_buffer (vm, bi0); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0; vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes); @@ -2224,25 +1935,36 @@ static char *ip4_arp_error_strings[] = { [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request", }; +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_arp_node) = { - .function = ip4_arp,.name = "ip4-arp",.vector_size = - sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors = - ARRAY_LEN (ip4_arp_error_strings),.error_strings = - ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes = + .function = ip4_arp, + .name = "ip4-arp", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .n_errors = ARRAY_LEN (ip4_arp_error_strings), + .error_strings = ip4_arp_error_strings, + .n_next_nodes = IP4_ARP_N_NEXT, + .next_nodes = { - [IP4_ARP_NEXT_DROP] = "error-drop",} -,}; + [IP4_ARP_NEXT_DROP] = "error-drop", + }, +}; VLIB_REGISTER_NODE (ip4_glean_node) = { - .function = ip4_glean,.name = "ip4-glean",.vector_size = - sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors = - ARRAY_LEN (ip4_arp_error_strings),.error_strings = - ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes = - { - [IP4_ARP_NEXT_DROP] = "error-drop",} -,}; + .function = ip4_glean, + .name = "ip4-glean", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .n_errors = ARRAY_LEN (ip4_arp_error_strings), + .error_strings = ip4_arp_error_strings, + .n_next_nodes = IP4_ARP_N_NEXT, + .next_nodes = { + [IP4_ARP_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ #define foreach_notrace_ip4_arp_error \ _(DROP) \ @@ -2270,7 +1992,8 @@ VLIB_INIT_FUNCTION (arp_notrace_init); /* Send an ARP request to see if given destination is reachable on given interface. */ clib_error_t * -ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) +ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index, + u8 refresh) { vnet_main_t *vnm = vnet_get_main (); ip4_main_t *im = &ip4_main; @@ -2283,6 +2006,7 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) vlib_buffer_t *b; adj_index_t ai; u32 bi = 0; + u8 unicast_rewrite = 0; si = vnet_get_sw_interface (vnm, sw_if_index); @@ -2306,19 +2030,21 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) sw_if_index); } - ip46_address_t nh = { - .ip4 = *dst, - }; - - ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, - VNET_LINK_IP4, &nh, sw_if_index); - adj = adj_get (ai); - h = vlib_packet_template_get_packet (vm, &im->ip4_arp_request_packet_template, &bi); + if (!h) + return clib_error_return (0, "ARP request packet allocation failed"); + hi = vnet_get_sup_hw_interface (vnm, sw_if_index); + if (PREDICT_FALSE (!hi->hw_address)) + { + return clib_error_return (0, "%U: interface %U do not support ip probe", + format_ip4_address, dst, + format_vnet_sw_if_index_name, vnm, + sw_if_index); + } clib_memcpy (h->ip4_over_ethernet[0].ethernet, hi->hw_address, sizeof (h->ip4_over_ethernet[0].ethernet)); @@ -2330,8 +2056,35 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index; + ip46_address_t nh = { + .ip4 = *dst, + }; + + ai = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, + VNET_LINK_IP4, &nh, sw_if_index); + adj = adj_get (ai); + + /* Peer has been previously resolved, retrieve glean adj instead */ + if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) + { + if (refresh) + unicast_rewrite = 1; + else + { + adj_unlock (ai); + ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, + VNET_LINK_IP4, sw_if_index, &nh); + adj = adj_get (ai); + } + } + /* Add encapsulation string for software interface (e.g. ethernet header). */ vnet_rewrite_one_header (adj[0], h, sizeof (ethernet_header_t)); + if (unicast_rewrite) + { + u16 *etype = vlib_buffer_get_current (b) - 2; + etype[0] = clib_host_to_net_u16 (ETHERNET_TYPE_ARP); + } vlib_buffer_advance (b, -adj->rewrite_header.data_bytes); { @@ -2350,8 +2103,45 @@ typedef enum { IP4_REWRITE_NEXT_DROP, IP4_REWRITE_NEXT_ICMP_ERROR, + IP4_REWRITE_NEXT_FRAGMENT, + IP4_REWRITE_N_NEXT /* Last */ } ip4_rewrite_next_t; +/** + * This bits of an IPv4 address to mask to construct a multicast + * MAC address + */ +#if CLIB_ARCH_IS_BIG_ENDIAN +#define IP4_MCAST_ADDR_MASK 0x007fffff +#else +#define IP4_MCAST_ADDR_MASK 0xffff7f00 +#endif + +always_inline void +ip4_mtu_check (vlib_buffer_t * b, u16 packet_len, + u16 adj_packet_bytes, bool df, u32 * next, u32 * error) +{ + if (packet_len > adj_packet_bytes) + { + *error = IP4_ERROR_MTU_EXCEEDED; + if (df) + { + icmp4_error_set_vnet_buffer + (b, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, + adj_packet_bytes); + *next = IP4_REWRITE_NEXT_ICMP_ERROR; + } + else + { + /* IP fragmentation */ + ip_frag_set_vnet_buffer (b, 0, adj_packet_bytes, + IP4_FRAG_NEXT_IP4_LOOKUP, 0); + *next = IP4_REWRITE_NEXT_FRAGMENT; + } + } +} + always_inline uword ip4_rewrite_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -2366,7 +2156,7 @@ ip4_rewrite_inline (vlib_main_t * vm, n_left_from = frame->n_vectors; next_index = node->cached_next_index; - u32 thread_index = vlib_get_thread_index (); + u32 thread_index = vm->thread_index; while (n_left_from > 0) { @@ -2428,7 +2218,7 @@ ip4_rewrite_inline (vlib_main_t * vm, /* Decrement TTL & update checksum. Works either endian, so no need for byte swap. */ - if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) + if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) { i32 ttl0 = ip0->ttl; @@ -2457,13 +2247,14 @@ ip4_rewrite_inline (vlib_main_t * vm, } /* Verify checksum. */ - ASSERT (ip0->checksum == ip4_header_checksum (ip0)); + ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) || + (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM)); } else { - p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; + p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; } - if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) + if (PREDICT_TRUE (!(p1->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) { i32 ttl1 = ip1->ttl; @@ -2492,11 +2283,12 @@ ip4_rewrite_inline (vlib_main_t * vm, } /* Verify checksum. */ - ASSERT (ip1->checksum == ip4_header_checksum (ip1)); + ASSERT ((ip1->checksum == ip4_header_checksum (ip1)) || + (p1->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM)); } else { - p1->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; + p1->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; } /* Rewrite packet header and updates lengths. */ @@ -2510,17 +2302,29 @@ ip4_rewrite_inline (vlib_main_t * vm, vnet_buffer (p1)->ip.save_rewrite_length = rw_len1; /* Check MTU of outgoing interface. */ - error0 = - (vlib_buffer_length_in_chain (vm, p0) > - adj0[0]. - rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED : - error0); - error1 = - (vlib_buffer_length_in_chain (vm, p1) > - adj1[0]. - rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED : - error1); + ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length), + adj0[0].rewrite_header.max_l3_packet_bytes, + ip0->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), + &next0, &error0); + ip4_mtu_check (p1, clib_net_to_host_u16 (ip1->length), + adj1[0].rewrite_header.max_l3_packet_bytes, + ip1->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), + &next1, &error1); + if (is_mcast) + { + error0 = ((adj0[0].rewrite_header.sw_if_index == + vnet_buffer (p0)->sw_if_index[VLIB_RX]) ? + IP4_ERROR_SAME_INTERFACE : error0); + error1 = ((adj1[0].rewrite_header.sw_if_index == + vnet_buffer (p1)->sw_if_index[VLIB_RX]) ? + IP4_ERROR_SAME_INTERFACE : error1); + } + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; /* Don't adjust the buffer for ttl issue; icmp-error node wants * to see the IP headerr */ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE)) @@ -2575,16 +2379,26 @@ ip4_rewrite_inline (vlib_main_t * vm, if (is_midchain) { - adj0->sub_type.midchain.fixup_func (vm, adj0, p0); - adj1->sub_type.midchain.fixup_func (vm, adj1, p1); + adj0->sub_type.midchain.fixup_func + (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); + adj1->sub_type.midchain.fixup_func + (vm, adj1, p1, adj0->sub_type.midchain.fixup_data); } if (is_mcast) { /* * copy bytes from the IP address into the MAC rewrite */ - vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); - vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1); + vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, + adj0-> + rewrite_header.dst_mcast_offset, + &ip0->dst_address.as_u32, + (u8 *) ip0); + vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, + adj0-> + rewrite_header.dst_mcast_offset, + &ip1->dst_address.as_u32, + (u8 *) ip1); } vlib_validate_buffer_enqueue_x2 (vm, node, next_index, @@ -2614,7 +2428,7 @@ ip4_rewrite_inline (vlib_main_t * vm, next0 = IP4_REWRITE_NEXT_DROP; /* drop on error */ /* Decrement TTL & update checksum. */ - if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_LOCALLY_ORIGINATED))) + if (PREDICT_TRUE (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))) { i32 ttl0 = ip0->ttl; @@ -2630,7 +2444,8 @@ ip4_rewrite_inline (vlib_main_t * vm, ip0->ttl = ttl0; - ASSERT (ip0->checksum == ip4_header_checksum (ip0)); + ASSERT ((ip0->checksum == ip4_header_checksum (ip0)) || + (p0->flags & VNET_BUFFER_F_OFFLOAD_IP_CKSUM)); if (PREDICT_FALSE (ttl0 <= 0)) { @@ -2648,7 +2463,7 @@ ip4_rewrite_inline (vlib_main_t * vm, } else { - p0->flags &= ~VNET_BUFFER_LOCALLY_ORIGINATED; + p0->flags &= ~VNET_BUFFER_F_LOCALLY_ORIGINATED; } if (do_counters) @@ -2662,7 +2477,11 @@ ip4_rewrite_inline (vlib_main_t * vm, /* * copy bytes from the IP address into the MAC rewrite */ - vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); + vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, + adj0-> + rewrite_header.dst_mcast_offset, + &ip0->dst_address.as_u32, + (u8 *) ip0); } /* Update packet buffer attributes/set output interface. */ @@ -2676,10 +2495,18 @@ ip4_rewrite_inline (vlib_main_t * vm, vlib_buffer_length_in_chain (vm, p0) + rw_len0); /* Check MTU of outgoing interface. */ - error0 = (vlib_buffer_length_in_chain (vm, p0) - > adj0[0].rewrite_header.max_l3_packet_bytes - ? IP4_ERROR_MTU_EXCEEDED : error0); + ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length), + adj0[0].rewrite_header.max_l3_packet_bytes, + ip0->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), + &next0, &error0); + if (is_mcast) + { + error0 = ((adj0[0].rewrite_header.sw_if_index == + vnet_buffer (p0)->sw_if_index[VLIB_RX]) ? + IP4_ERROR_SAME_INTERFACE : error0); + } p0->error = error_node->errors[error0]; /* Don't adjust the buffer for ttl issue; icmp-error node wants @@ -2695,7 +2522,8 @@ ip4_rewrite_inline (vlib_main_t * vm, if (is_midchain) { - adj0->sub_type.midchain.fixup_func (vm, adj0, p0); + adj0->sub_type.midchain.fixup_func + (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); } if (PREDICT_FALSE @@ -2755,7 +2583,7 @@ ip4_rewrite_inline (vlib_main_t * vm, Next Indices: - adj->rewrite_header.next_index - or @c error-drop + or @c ip4-drop */ static uword ip4_rewrite (vlib_main_t * vm, @@ -2767,6 +2595,16 @@ ip4_rewrite (vlib_main_t * vm, return ip4_rewrite_inline (vm, node, frame, 0, 0, 0); } +static uword +ip4_rewrite_bcast (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + if (adj_are_counters_enabled ()) + return ip4_rewrite_inline (vm, node, frame, 1, 0, 0); + else + return ip4_rewrite_inline (vm, node, frame, 0, 0, 0); +} + static uword ip4_midchain (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -2805,13 +2643,23 @@ VLIB_REGISTER_NODE (ip4_rewrite_node) = { .format_trace = format_ip4_rewrite_trace, - .n_next_nodes = 2, + .n_next_nodes = IP4_REWRITE_N_NEXT, .next_nodes = { - [IP4_REWRITE_NEXT_DROP] = "error-drop", + [IP4_REWRITE_NEXT_DROP] = "ip4-drop", [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error", + [IP4_REWRITE_NEXT_FRAGMENT] = "ip4-frag", }, }; -VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_node, ip4_rewrite) + +VLIB_REGISTER_NODE (ip4_rewrite_bcast_node) = { + .function = ip4_rewrite, + .name = "ip4-rewrite-bcast", + .vector_size = sizeof (u32), + + .format_trace = format_ip4_rewrite_trace, + .sibling_of = "ip4-rewrite", +}; +VLIB_NODE_FUNCTION_MULTIARCH (ip4_rewrite_bcast_node, ip4_rewrite_bcast) VLIB_REGISTER_NODE (ip4_rewrite_mcast_node) = { .function = ip4_rewrite_mcast, @@ -2843,101 +2691,6 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = { VLIB_NODE_FUNCTION_MULTIARCH (ip4_midchain_node, ip4_midchain); /* *INDENT-ON */ -static clib_error_t * -add_del_interface_table (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - vnet_main_t *vnm = vnet_get_main (); - ip_interface_address_t *ia; - clib_error_t *error = 0; - u32 sw_if_index, table_id; - - sw_if_index = ~0; - - if (!unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index)) - { - error = clib_error_return (0, "unknown interface `%U'", - format_unformat_error, input); - goto done; - } - - if (unformat (input, "%d", &table_id)) - ; - else - { - error = clib_error_return (0, "expected table id `%U'", - format_unformat_error, input); - goto done; - } - - /* - * If the interface already has in IP address, then a change int - * VRF is not allowed. The IP address applied must first be removed. - * We do not do that automatically here, since VPP has no knowledge - * of whether thoses subnets are valid in the destination VRF. - */ - /* *INDENT-OFF* */ - foreach_ip_interface_address (&ip4_main.lookup_main, - ia, sw_if_index, - 1 /* honor unnumbered */, - ({ - ip4_address_t * a; - - a = ip_interface_address_get_address (&ip4_main.lookup_main, ia); - error = clib_error_return (0, "interface %U has address %U", - format_vnet_sw_if_index_name, vnm, - sw_if_index, - format_ip4_address, a); - goto done; - })); - /* *INDENT-ON* */ - -{ - ip4_main_t *im = &ip4_main; - u32 fib_index; - - fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); - - vec_validate (im->fib_index_by_sw_if_index, sw_if_index); - im->fib_index_by_sw_if_index[sw_if_index] = fib_index; - - fib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id); - vec_validate (im->mfib_index_by_sw_if_index, sw_if_index); - im->mfib_index_by_sw_if_index[sw_if_index] = fib_index; -} - -done: -return error; -} - -/*? - * Place the indicated interface into the supplied IPv4 FIB table (also known - * as a VRF). If the FIB table does not exist, this command creates it. To - * display the current IPv4 FIB table, use the command 'show ip fib'. - * FIB table will only be displayed if a route has been added to the table, or - * an IP Address is assigned to an interface in the table (which adds a route - * automatically). - * - * @note IP addresses added after setting the interface IP table are added to - * the indicated FIB table. If an IP address is added prior to changing the - * table then this is an error. The control plane must remove these addresses - * first and then change the table. VPP will not automatically move the - * addresses from the old to the new table as it does not know the validity - * of such a change. - * - * @cliexpar - * Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id): - * @cliexcmd{set interface ip table GigabitEthernet2/0/0 2} - ?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (set_interface_ip_table_command, static) = -{ - .path = "set interface ip table", - .function = add_del_interface_table, - .short_help = "set interface ip table ", -}; -/* *INDENT-ON* */ - int ip4_lookup_validate (ip4_address_t * a, u32 fib_index0) { @@ -3310,6 +3063,29 @@ VLIB_CLI_COMMAND (set_ip_classify_command, static) = }; /* *INDENT-ON* */ +static clib_error_t * +ip4_config (vlib_main_t * vm, unformat_input_t * input) +{ + ip4_main_t *im = &ip4_main; + uword heapsize = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize)) + ; + else + return clib_error_return (0, + "invalid heap-size parameter `%U'", + format_unformat_error, input); + } + + im->mtrie_heap_size = heapsize; + + return 0; +} + +VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip"); + /* * fd.io coding-style-patch-verification: ON *