X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fip%2Fip4_forward.c;h=40fe9dc447e0820311d0d4a4a7858bc5e189f747;hb=8a9c8f1412cb1258340b18a8eb622a835ef3c37b;hp=c2a2f79e02f1922aaea62c0d3accb3968124ae78;hpb=cea194d8f973a2f2b5ef72d212533057174cc70a;p=vpp.git diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c old mode 100755 new mode 100644 index c2a2f79e02f..40fe9dc447e --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -53,378 +53,7 @@ #include #include /* for mFIB table and entry creation */ -/** - * @file - * @brief IPv4 Forwarding. - * - * This file contains the source code for IPv4 forwarding. - */ - -always_inline uword -ip4_lookup_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, - int lookup_for_responses_to_locally_received_packets) -{ - ip4_main_t *im = &ip4_main; - vlib_combined_counter_main_t *cm = &load_balance_main.lbm_to_counters; - u32 n_left_from, n_left_to_next, *from, *to_next; - ip_lookup_next_t next; - u32 thread_index = vlib_get_thread_index (); - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - next = node->cached_next_index; - - while (n_left_from > 0) - { - vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); - - while (n_left_from >= 8 && n_left_to_next >= 4) - { - vlib_buffer_t *p0, *p1, *p2, *p3; - ip4_header_t *ip0, *ip1, *ip2, *ip3; - ip_lookup_next_t next0, next1, next2, next3; - const load_balance_t *lb0, *lb1, *lb2, *lb3; - ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3; - ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3; - ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3; - u32 pi0, fib_index0, lb_index0; - u32 pi1, fib_index1, lb_index1; - u32 pi2, fib_index2, lb_index2; - u32 pi3, fib_index3, lb_index3; - flow_hash_config_t flow_hash_config0, flow_hash_config1; - flow_hash_config_t flow_hash_config2, flow_hash_config3; - u32 hash_c0, hash_c1, hash_c2, hash_c3; - const dpo_id_t *dpo0, *dpo1, *dpo2, *dpo3; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p4, *p5, *p6, *p7; - - p4 = vlib_get_buffer (vm, from[4]); - p5 = vlib_get_buffer (vm, from[5]); - p6 = vlib_get_buffer (vm, from[6]); - p7 = vlib_get_buffer (vm, from[7]); - - vlib_prefetch_buffer_header (p4, LOAD); - vlib_prefetch_buffer_header (p5, LOAD); - vlib_prefetch_buffer_header (p6, LOAD); - vlib_prefetch_buffer_header (p7, LOAD); - - CLIB_PREFETCH (p4->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p5->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p6->data, sizeof (ip0[0]), LOAD); - CLIB_PREFETCH (p7->data, sizeof (ip0[0]), LOAD); - } - - pi0 = to_next[0] = from[0]; - pi1 = to_next[1] = from[1]; - pi2 = to_next[2] = from[2]; - pi3 = to_next[3] = from[3]; - - from += 4; - to_next += 4; - n_left_to_next -= 4; - n_left_from -= 4; - - p0 = vlib_get_buffer (vm, pi0); - p1 = vlib_get_buffer (vm, pi1); - p2 = vlib_get_buffer (vm, pi2); - p3 = vlib_get_buffer (vm, pi3); - - ip0 = vlib_buffer_get_current (p0); - ip1 = vlib_buffer_get_current (p1); - ip2 = vlib_buffer_get_current (p2); - ip3 = vlib_buffer_get_current (p3); - - dst_addr0 = &ip0->dst_address; - dst_addr1 = &ip1->dst_address; - dst_addr2 = &ip2->dst_address; - dst_addr3 = &ip3->dst_address; - - fib_index0 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); - fib_index1 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p1)->sw_if_index[VLIB_RX]); - fib_index2 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p2)->sw_if_index[VLIB_RX]); - fib_index3 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p3)->sw_if_index[VLIB_RX]); - fib_index0 = - (vnet_buffer (p0)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - fib_index1 = - (vnet_buffer (p1)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index1 : vnet_buffer (p1)->sw_if_index[VLIB_TX]; - fib_index2 = - (vnet_buffer (p2)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index2 : vnet_buffer (p2)->sw_if_index[VLIB_TX]; - fib_index3 = - (vnet_buffer (p3)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index3 : vnet_buffer (p3)->sw_if_index[VLIB_TX]; - - - if (!lookup_for_responses_to_locally_received_packets) - { - mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - mtrie1 = &ip4_fib_get (fib_index1)->mtrie; - mtrie2 = &ip4_fib_get (fib_index2)->mtrie; - mtrie3 = &ip4_fib_get (fib_index3)->mtrie; - - leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); - leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1); - leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2); - leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3); - } - - if (!lookup_for_responses_to_locally_received_packets) - { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2); - leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2); - leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2); - } - - if (!lookup_for_responses_to_locally_received_packets) - { - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); - leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3); - leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3); - leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3); - } - - if (lookup_for_responses_to_locally_received_packets) - { - lb_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; - lb_index1 = vnet_buffer (p1)->ip.adj_index[VLIB_RX]; - lb_index2 = vnet_buffer (p2)->ip.adj_index[VLIB_RX]; - lb_index3 = vnet_buffer (p3)->ip.adj_index[VLIB_RX]; - } - else - { - lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1); - lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2); - lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3); - } - - ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3); - lb0 = load_balance_get (lb_index0); - lb1 = load_balance_get (lb_index1); - lb2 = load_balance_get (lb_index2); - lb3 = load_balance_get (lb_index3); - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - ASSERT (lb1->lb_n_buckets > 0); - ASSERT (is_pow2 (lb1->lb_n_buckets)); - ASSERT (lb2->lb_n_buckets > 0); - ASSERT (is_pow2 (lb2->lb_n_buckets)); - ASSERT (lb3->lb_n_buckets > 0); - ASSERT (is_pow2 (lb3->lb_n_buckets)); - - /* Use flow hash to compute multipath adjacency. */ - hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; - hash_c1 = vnet_buffer (p1)->ip.flow_hash = 0; - hash_c2 = vnet_buffer (p2)->ip.flow_hash = 0; - hash_c3 = vnet_buffer (p3)->ip.flow_hash = 0; - if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) - { - flow_hash_config0 = lb0->lb_hash_config; - hash_c0 = vnet_buffer (p0)->ip.flow_hash = - ip4_compute_flow_hash (ip0, flow_hash_config0); - dpo0 = - load_balance_get_fwd_bucket (lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); - } - else - { - dpo0 = load_balance_get_bucket_i (lb0, 0); - } - if (PREDICT_FALSE (lb1->lb_n_buckets > 1)) - { - flow_hash_config1 = lb1->lb_hash_config; - hash_c1 = vnet_buffer (p1)->ip.flow_hash = - ip4_compute_flow_hash (ip1, flow_hash_config1); - dpo1 = - load_balance_get_fwd_bucket (lb1, - (hash_c1 & - (lb1->lb_n_buckets_minus_1))); - } - else - { - dpo1 = load_balance_get_bucket_i (lb1, 0); - } - if (PREDICT_FALSE (lb2->lb_n_buckets > 1)) - { - flow_hash_config2 = lb2->lb_hash_config; - hash_c2 = vnet_buffer (p2)->ip.flow_hash = - ip4_compute_flow_hash (ip2, flow_hash_config2); - dpo2 = - load_balance_get_fwd_bucket (lb2, - (hash_c2 & - (lb2->lb_n_buckets_minus_1))); - } - else - { - dpo2 = load_balance_get_bucket_i (lb2, 0); - } - if (PREDICT_FALSE (lb3->lb_n_buckets > 1)) - { - flow_hash_config3 = lb3->lb_hash_config; - hash_c3 = vnet_buffer (p3)->ip.flow_hash = - ip4_compute_flow_hash (ip3, flow_hash_config3); - dpo3 = - load_balance_get_fwd_bucket (lb3, - (hash_c3 & - (lb3->lb_n_buckets_minus_1))); - } - else - { - dpo3 = load_balance_get_bucket_i (lb3, 0); - } - - next0 = dpo0->dpoi_next_node; - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - next1 = dpo1->dpoi_next_node; - vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index; - next2 = dpo2->dpoi_next_node; - vnet_buffer (p2)->ip.adj_index[VLIB_TX] = dpo2->dpoi_index; - next3 = dpo3->dpoi_next_node; - vnet_buffer (p3)->ip.adj_index[VLIB_TX] = dpo3->dpoi_index; - - vlib_increment_combined_counter - (cm, thread_index, lb_index0, 1, - vlib_buffer_length_in_chain (vm, p0)); - vlib_increment_combined_counter - (cm, thread_index, lb_index1, 1, - vlib_buffer_length_in_chain (vm, p1)); - vlib_increment_combined_counter - (cm, thread_index, lb_index2, 1, - vlib_buffer_length_in_chain (vm, p2)); - vlib_increment_combined_counter - (cm, thread_index, lb_index3, 1, - vlib_buffer_length_in_chain (vm, p3)); - - vlib_validate_buffer_enqueue_x4 (vm, node, next, - to_next, n_left_to_next, - pi0, pi1, pi2, pi3, - next0, next1, next2, next3); - } - - while (n_left_from > 0 && n_left_to_next > 0) - { - vlib_buffer_t *p0; - ip4_header_t *ip0; - ip_lookup_next_t next0; - const load_balance_t *lb0; - ip4_fib_mtrie_t *mtrie0; - ip4_fib_mtrie_leaf_t leaf0; - ip4_address_t *dst_addr0; - u32 pi0, fib_index0, lbi0; - flow_hash_config_t flow_hash_config0; - const dpo_id_t *dpo0; - u32 hash_c0; - - pi0 = from[0]; - to_next[0] = pi0; - - p0 = vlib_get_buffer (vm, pi0); - - ip0 = vlib_buffer_get_current (p0); - - dst_addr0 = &ip0->dst_address; - - fib_index0 = - vec_elt (im->fib_index_by_sw_if_index, - vnet_buffer (p0)->sw_if_index[VLIB_RX]); - fib_index0 = - (vnet_buffer (p0)->sw_if_index[VLIB_TX] == - (u32) ~ 0) ? fib_index0 : vnet_buffer (p0)->sw_if_index[VLIB_TX]; - - if (!lookup_for_responses_to_locally_received_packets) - { - mtrie0 = &ip4_fib_get (fib_index0)->mtrie; - - leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0); - } - - if (!lookup_for_responses_to_locally_received_packets) - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2); - - if (!lookup_for_responses_to_locally_received_packets) - leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3); - - if (lookup_for_responses_to_locally_received_packets) - lbi0 = vnet_buffer (p0)->ip.adj_index[VLIB_RX]; - else - { - /* Handle default route. */ - lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0); - } - - ASSERT (lbi0); - lb0 = load_balance_get (lbi0); - - ASSERT (lb0->lb_n_buckets > 0); - ASSERT (is_pow2 (lb0->lb_n_buckets)); - - /* Use flow hash to compute multipath adjacency. */ - hash_c0 = vnet_buffer (p0)->ip.flow_hash = 0; - if (PREDICT_FALSE (lb0->lb_n_buckets > 1)) - { - flow_hash_config0 = lb0->lb_hash_config; - - hash_c0 = vnet_buffer (p0)->ip.flow_hash = - ip4_compute_flow_hash (ip0, flow_hash_config0); - dpo0 = - load_balance_get_fwd_bucket (lb0, - (hash_c0 & - (lb0->lb_n_buckets_minus_1))); - } - else - { - dpo0 = load_balance_get_bucket_i (lb0, 0); - } - - next0 = dpo0->dpoi_next_node; - vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index; - - vlib_increment_combined_counter (cm, thread_index, lbi0, 1, - vlib_buffer_length_in_chain (vm, - p0)); - - from += 1; - to_next += 1; - n_left_to_next -= 1; - n_left_from -= 1; - - if (PREDICT_FALSE (next0 != next)) - { - n_left_to_next += 1; - vlib_put_next_frame (vm, node, next, n_left_to_next); - next = next0; - vlib_get_next_frame (vm, node, next, to_next, n_left_to_next); - to_next[0] = pi0; - to_next += 1; - n_left_to_next -= 1; - } - } - - vlib_put_next_frame (vm, node, next, n_left_to_next); - } - - if (node->flags & VLIB_NODE_FLAG_TRACE) - ip4_forward_next_trace (vm, node, frame, VLIB_TX); - - return frame->n_vectors; -} +#include /** @brief IPv4 lookup node. @node ip4-lookup @@ -470,11 +99,17 @@ ip4_lookup (vlib_main_t * vm, static u8 *format_ip4_lookup_trace (u8 * s, va_list * args); +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_lookup_node) = { -.function = ip4_lookup,.name = "ip4-lookup",.vector_size = - sizeof (u32),.format_trace = format_ip4_lookup_trace,.n_next_nodes = - IP_LOOKUP_N_NEXT,.next_nodes = IP4_LOOKUP_NEXT_NODES,}; + .function = ip4_lookup, + .name = "ip4-lookup", + .vector_size = sizeof (u32), + .format_trace = format_ip4_lookup_trace, + .n_next_nodes = IP_LOOKUP_N_NEXT, + .next_nodes = IP4_LOOKUP_NEXT_NODES, +}; +/* *INDENT-ON* */ VLIB_NODE_FUNCTION_MULTIARCH (ip4_lookup_node, ip4_lookup); @@ -665,11 +300,17 @@ ip4_load_balance (vlib_main_t * vm, return frame->n_vectors; } +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_load_balance_node) = { -.function = ip4_load_balance,.name = "ip4-load-balance",.vector_size = - sizeof (u32),.sibling_of = "ip4-lookup",.format_trace = - format_ip4_lookup_trace,}; + .function = ip4_load_balance, + .name = "ip4-load-balance", + .vector_size = sizeof (u32), + .sibling_of = "ip4-lookup", + .format_trace = + format_ip4_lookup_trace, +}; +/* *INDENT-ON* */ VLIB_NODE_FUNCTION_MULTIARCH (ip4_load_balance_node, ip4_load_balance); @@ -871,11 +512,11 @@ ip4_sw_interface_enable_disable (u32 sw_if_index, u32 is_enable) if (0 != --im->ip_enabled_by_sw_if_index[sw_if_index]) return; } - vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index, + vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index, !is_enable, 0, 0); - vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", + vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled", sw_if_index, !is_enable, 0, 0); } @@ -904,7 +545,7 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, vec_elt (im->fib_index_by_sw_if_index, sw_if_index)); vec_add1 (addr_fib, ip4_af); - /* FIXME-LATER + /* * there is no support for adj-fib handling in the presence of overlapping * subnets on interfaces. Easy fix - disallow overlapping subnets, like * most routers do. @@ -913,31 +554,44 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm, if (!is_del) { /* When adding an address check that it does not conflict - with an existing address. */ + with an existing address on any interface in this table. */ ip_interface_address_t *ia; - foreach_ip_interface_address - (&im->lookup_main, ia, sw_if_index, - 0 /* honor unnumbered */ , - ({ - ip4_address_t * x = - ip_interface_address_get_address - (&im->lookup_main, ia); - if (ip4_destination_matches_route - (im, address, x, ia->address_length) || - ip4_destination_matches_route (im, - x, - address, - address_length)) - return - clib_error_create - ("failed to add %U which conflicts with %U for interface %U", - format_ip4_address_and_length, address, - address_length, - format_ip4_address_and_length, x, - ia->address_length, - format_vnet_sw_if_index_name, vnm, - sw_if_index); - })); + vnet_sw_interface_t *sif; + + pool_foreach(sif, vnm->interface_main.sw_interfaces, + ({ + if (im->fib_index_by_sw_if_index[sw_if_index] == + im->fib_index_by_sw_if_index[sif->sw_if_index]) + { + foreach_ip_interface_address + (&im->lookup_main, ia, sif->sw_if_index, + 0 /* honor unnumbered */ , + ({ + ip4_address_t * x = + ip_interface_address_get_address + (&im->lookup_main, ia); + if (ip4_destination_matches_route + (im, address, x, ia->address_length) || + ip4_destination_matches_route (im, + x, + address, + address_length)) + { + vnm->api_errno = VNET_API_ERROR_DUPLICATE_IF_ADDRESS; + + return + clib_error_create + ("failed to add %U which conflicts with %U for interface %U", + format_ip4_address_and_length, address, + address_length, + format_ip4_address_and_length, x, + ia->address_length, + format_vnet_sw_if_index_name, vnm, + sif->sw_if_index); + } + })); + } + })); } /* *INDENT-ON* */ @@ -1054,10 +708,10 @@ VNET_FEATURE_INIT (ip4_vxlan_bypass, static) = .runs_before = VNET_FEATURES ("ip4-lookup"), }; -VNET_FEATURE_INIT (ip4_drop, static) = +VNET_FEATURE_INIT (ip4_not_enabled, static) = { .arc_name = "ip4-unicast", - .node_name = "ip4-drop", + .node_name = "ip4-not-enabled", .runs_before = VNET_FEATURES ("ip4-lookup"), }; @@ -1083,10 +737,10 @@ VNET_FEATURE_INIT (ip4_vpath_mc, static) = .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"), }; -VNET_FEATURE_INIT (ip4_mc_drop, static) = +VNET_FEATURE_INIT (ip4_mc_not_enabled, static) = { .arc_name = "ip4-multicast", - .node_name = "ip4-drop", + .node_name = "ip4-not-enabled", .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"), }; @@ -1101,7 +755,7 @@ VNET_FEATURE_INIT (ip4_lookup_mc, static) = VNET_FEATURE_ARC_INIT (ip4_output, static) = { .arc_name = "ip4-output", - .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain"), + .start_nodes = VNET_FEATURES ("ip4-rewrite", "ip4-midchain", "ip4-dvr-dpo"), .arc_index_ptr = &ip4_main.lookup_main.output_feature_arc_index, }; @@ -1109,6 +763,13 @@ VNET_FEATURE_INIT (ip4_source_and_port_range_check_tx, static) = { .arc_name = "ip4-output", .node_name = "ip4-source-and-port-range-check-tx", + .runs_before = VNET_FEATURES ("ip4-outacl"), +}; + +VNET_FEATURE_INIT (ip4_outacl, static) = +{ + .arc_name = "ip4-output", + .node_name = "ip4-outacl", .runs_before = VNET_FEATURES ("ipsec-output-ip4"), }; @@ -1145,8 +806,9 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) ip4_address_t *address; vlib_main_t *vm = vlib_get_main (); + vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0); /* *INDENT-OFF* */ - foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* honor unnumbered */, + foreach_ip_interface_address (lm4, ia, sw_if_index, 0, ({ address = ip_interface_address_get_address (lm4, ia); ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1); @@ -1154,11 +816,11 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add) /* *INDENT-ON* */ } - vnet_feature_enable_disable ("ip4-unicast", "ip4-drop", sw_if_index, + vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index, is_add, 0, 0); - vnet_feature_enable_disable ("ip4-multicast", "ip4-drop", sw_if_index, - is_add, 0, 0); + vnet_feature_enable_disable ("ip4-multicast", "ip4-not-enabled", + sw_if_index, is_add, 0, 0); return /* no error */ 0; } @@ -1177,6 +839,12 @@ ip4_lookup_init (vlib_main_t * vm) if ((error = vlib_call_init_function (vm, vnet_feature_init))) return error; + if ((error = vlib_call_init_function (vm, ip4_mtrie_module_init))) + return (error); + if ((error = vlib_call_init_function (vm, fib_module_init))) + return error; + if ((error = vlib_call_init_function (vm, mfib_module_init))) + return error; for (i = 0; i < ARRAY_LEN (im->fib_masks); i++) { @@ -1498,7 +1166,9 @@ ip4_local_validate_l4 (vlib_main_t * vm, vlib_buffer_t * p, ip4_header_t * ip, } #define ip4_local_do_l4_check(is_tcp_udp, flags) \ - (is_tcp_udp && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED)) + (is_tcp_udp && !(flags & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED \ + || flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM \ + || flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) static inline uword ip4_local_inline (vlib_main_t * vm, @@ -1560,10 +1230,8 @@ ip4_local_inline (vlib_main_t * vm, sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; sw_if_index1 = vnet_buffer (p1)->sw_if_index[VLIB_RX]; - /* Treat IP frag packets as "experimental" protocol for now - until support of IP frag reassembly is implemented */ - proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol; - proto1 = ip4_is_fragment (ip1) ? 0xfe : ip1->protocol; + proto0 = ip0->protocol; + proto1 = ip1->protocol; if (head_of_feature_arc == 0) goto skip_checks; @@ -1574,9 +1242,13 @@ ip4_local_inline (vlib_main_t * vm, is_tcp_udp1 = is_udp1 || proto1 == IP_PROTOCOL_TCP; good_tcp_udp0 = - (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; - good_tcp_udp1 = - (p1->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT + || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM + || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0; + good_tcp_udp1 = (p1->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT + || (p1->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM + || p1->flags & + VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0; if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags) || ip4_local_do_l4_check (is_tcp_udp1, @@ -1722,17 +1394,18 @@ ip4_local_inline (vlib_main_t * vm, vnet_buffer (p0)->l3_hdr_offset = p0->current_data; sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX]; - /* Treat IP frag packets as "experimental" protocol for now - until support of IP frag reassembly is implemented */ - proto0 = ip4_is_fragment (ip0) ? 0xfe : ip0->protocol; + proto0 = ip0->protocol; if (head_of_feature_arc == 0 || p0->flags & VNET_BUFFER_F_IS_NATED) goto skip_check; is_udp0 = proto0 == IP_PROTOCOL_UDP; is_tcp_udp0 = is_udp0 || proto0 == IP_PROTOCOL_TCP; + good_tcp_udp0 = - (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0; + (p0->flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT + || (p0->flags & VNET_BUFFER_F_OFFLOAD_TCP_CKSUM + || p0->flags & VNET_BUFFER_F_OFFLOAD_UDP_CKSUM)) != 0; if (PREDICT_FALSE (ip4_local_do_l4_check (is_tcp_udp0, p0->flags))) { @@ -2039,6 +1712,10 @@ ip4_arp_inline (vlib_main_t * vm, &im->ip4_arp_request_packet_template, &bi0); + /* Seems we're out of buffers */ + if (PREDICT_FALSE (!h0)) + continue; + /* Add rewrite/encap string for ARP packet. */ vnet_rewrite_one_header (adj0[0], h0, sizeof (ethernet_header_t)); @@ -2121,25 +1798,36 @@ static char *ip4_arp_error_strings[] = { [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request", }; +/* *INDENT-OFF* */ VLIB_REGISTER_NODE (ip4_arp_node) = { - .function = ip4_arp,.name = "ip4-arp",.vector_size = - sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors = - ARRAY_LEN (ip4_arp_error_strings),.error_strings = - ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes = + .function = ip4_arp, + .name = "ip4-arp", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .n_errors = ARRAY_LEN (ip4_arp_error_strings), + .error_strings = ip4_arp_error_strings, + .n_next_nodes = IP4_ARP_N_NEXT, + .next_nodes = { - [IP4_ARP_NEXT_DROP] = "error-drop",} -,}; + [IP4_ARP_NEXT_DROP] = "error-drop", + }, +}; VLIB_REGISTER_NODE (ip4_glean_node) = { - .function = ip4_glean,.name = "ip4-glean",.vector_size = - sizeof (u32),.format_trace = format_ip4_forward_next_trace,.n_errors = - ARRAY_LEN (ip4_arp_error_strings),.error_strings = - ip4_arp_error_strings,.n_next_nodes = IP4_ARP_N_NEXT,.next_nodes = - { - [IP4_ARP_NEXT_DROP] = "error-drop",} -,}; + .function = ip4_glean, + .name = "ip4-glean", + .vector_size = sizeof (u32), + .format_trace = format_ip4_forward_next_trace, + .n_errors = ARRAY_LEN (ip4_arp_error_strings), + .error_strings = ip4_arp_error_strings, + .n_next_nodes = IP4_ARP_N_NEXT, + .next_nodes = { + [IP4_ARP_NEXT_DROP] = "error-drop", + }, +}; +/* *INDENT-ON* */ #define foreach_notrace_ip4_arp_error \ _(DROP) \ @@ -2238,7 +1926,8 @@ ip4_probe_neighbor (vlib_main_t * vm, ip4_address_t * dst, u32 sw_if_index) if (adj->lookup_next_index == IP_LOOKUP_NEXT_REWRITE) { adj_unlock (ai); - ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, sw_if_index, &nh); + ai = adj_glean_add_or_lock (FIB_PROTOCOL_IP4, + VNET_LINK_IP4, sw_if_index, &nh); adj = adj_get (ai); } @@ -2264,6 +1953,39 @@ typedef enum IP4_REWRITE_NEXT_ICMP_ERROR, } ip4_rewrite_next_t; +/** + * This bits of an IPv4 address to mask to construct a multicast + * MAC address + */ +#if CLIB_ARCH_IS_BIG_ENDIAN +#define IP4_MCAST_ADDR_MASK 0x007fffff +#else +#define IP4_MCAST_ADDR_MASK 0xffff7f00 +#endif + +always_inline void +ip4_mtu_check (vlib_buffer_t * b, u16 packet_len, + u16 adj_packet_bytes, bool df, u32 * next, u32 * error) +{ + if (packet_len > adj_packet_bytes) + { + *error = IP4_ERROR_MTU_EXCEEDED; + if (df) + { + icmp4_error_set_vnet_buffer + (b, ICMP4_destination_unreachable, + ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set, + adj_packet_bytes); + *next = IP4_REWRITE_NEXT_ICMP_ERROR; + } + else + { + /* Add support for fragmentation here */ + *next = IP4_REWRITE_NEXT_DROP; + } + } +} + always_inline uword ip4_rewrite_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -2424,17 +2146,29 @@ ip4_rewrite_inline (vlib_main_t * vm, vnet_buffer (p1)->ip.save_rewrite_length = rw_len1; /* Check MTU of outgoing interface. */ - error0 = - (vlib_buffer_length_in_chain (vm, p0) > - adj0[0]. - rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED : - error0); - error1 = - (vlib_buffer_length_in_chain (vm, p1) > - adj1[0]. - rewrite_header.max_l3_packet_bytes ? IP4_ERROR_MTU_EXCEEDED : - error1); + ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length), + adj0[0].rewrite_header.max_l3_packet_bytes, + ip0->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), + &next0, &error0); + ip4_mtu_check (p1, clib_net_to_host_u16 (ip1->length), + adj1[0].rewrite_header.max_l3_packet_bytes, + ip1->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), + &next1, &error1); + if (is_mcast) + { + error0 = ((adj0[0].rewrite_header.sw_if_index == + vnet_buffer (p0)->sw_if_index[VLIB_RX]) ? + IP4_ERROR_SAME_INTERFACE : error0); + error1 = ((adj1[0].rewrite_header.sw_if_index == + vnet_buffer (p1)->sw_if_index[VLIB_RX]) ? + IP4_ERROR_SAME_INTERFACE : error1); + } + + p0->error = error_node->errors[error0]; + p1->error = error_node->errors[error1]; /* Don't adjust the buffer for ttl issue; icmp-error node wants * to see the IP headerr */ if (PREDICT_TRUE (error0 == IP4_ERROR_NONE)) @@ -2489,16 +2223,26 @@ ip4_rewrite_inline (vlib_main_t * vm, if (is_midchain) { - adj0->sub_type.midchain.fixup_func (vm, adj0, p0); - adj1->sub_type.midchain.fixup_func (vm, adj1, p1); + adj0->sub_type.midchain.fixup_func + (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); + adj1->sub_type.midchain.fixup_func + (vm, adj1, p1, adj0->sub_type.midchain.fixup_data); } if (is_mcast) { /* * copy bytes from the IP address into the MAC rewrite */ - vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); - vnet_fixup_one_header (adj1[0], &ip1->dst_address, ip1); + vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, + adj0-> + rewrite_header.dst_mcast_offset, + &ip0->dst_address.as_u32, + (u8 *) ip0); + vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, + adj0-> + rewrite_header.dst_mcast_offset, + &ip1->dst_address.as_u32, + (u8 *) ip1); } vlib_validate_buffer_enqueue_x2 (vm, node, next_index, @@ -2577,7 +2321,11 @@ ip4_rewrite_inline (vlib_main_t * vm, /* * copy bytes from the IP address into the MAC rewrite */ - vnet_fixup_one_header (adj0[0], &ip0->dst_address, ip0); + vnet_ip_mcast_fixup_header (IP4_MCAST_ADDR_MASK, + adj0-> + rewrite_header.dst_mcast_offset, + &ip0->dst_address.as_u32, + (u8 *) ip0); } /* Update packet buffer attributes/set output interface. */ @@ -2591,10 +2339,18 @@ ip4_rewrite_inline (vlib_main_t * vm, vlib_buffer_length_in_chain (vm, p0) + rw_len0); /* Check MTU of outgoing interface. */ - error0 = (vlib_buffer_length_in_chain (vm, p0) - > adj0[0].rewrite_header.max_l3_packet_bytes - ? IP4_ERROR_MTU_EXCEEDED : error0); + ip4_mtu_check (p0, clib_net_to_host_u16 (ip0->length), + adj0[0].rewrite_header.max_l3_packet_bytes, + ip0->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_HEADER_FLAG_DONT_FRAGMENT), + &next0, &error0); + if (is_mcast) + { + error0 = ((adj0[0].rewrite_header.sw_if_index == + vnet_buffer (p0)->sw_if_index[VLIB_RX]) ? + IP4_ERROR_SAME_INTERFACE : error0); + } p0->error = error_node->errors[error0]; /* Don't adjust the buffer for ttl issue; icmp-error node wants @@ -2610,7 +2366,8 @@ ip4_rewrite_inline (vlib_main_t * vm, if (is_midchain) { - adj0->sub_type.midchain.fixup_func (vm, adj0, p0); + adj0->sub_type.midchain.fixup_func + (vm, adj0, p0, adj0->sub_type.midchain.fixup_data); } if (PREDICT_FALSE @@ -2670,7 +2427,7 @@ ip4_rewrite_inline (vlib_main_t * vm, Next Indices: - adj->rewrite_header.next_index - or @c error-drop + or @c ip4-drop */ static uword ip4_rewrite (vlib_main_t * vm, @@ -2722,7 +2479,7 @@ VLIB_REGISTER_NODE (ip4_rewrite_node) = { .n_next_nodes = 2, .next_nodes = { - [IP4_REWRITE_NEXT_DROP] = "error-drop", + [IP4_REWRITE_NEXT_DROP] = "ip4-drop", [IP4_REWRITE_NEXT_ICMP_ERROR] = "ip4-icmp-error", }, }; @@ -3130,6 +2887,29 @@ VLIB_CLI_COMMAND (set_ip_classify_command, static) = }; /* *INDENT-ON* */ +static clib_error_t * +ip4_config (vlib_main_t * vm, unformat_input_t * input) +{ + ip4_main_t *im = &ip4_main; + uword heapsize = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "heap-size %U", unformat_memory_size, &heapsize)) + ; + else + return clib_error_return (0, + "invalid heap-size parameter `%U'", + format_unformat_error, input); + } + + im->mtrie_heap_size = heapsize; + + return 0; +} + +VLIB_EARLY_CONFIG_FUNCTION (ip4_config, "ip"); + /* * fd.io coding-style-patch-verification: ON *