From 00bff193e7e3cc9caba447ce001f57ed215fbddf Mon Sep 17 00:00:00 2001 From: Kingwel Xie Date: Thu, 7 Mar 2019 01:25:32 -0500 Subject: [PATCH] ipsec: ipsec-if optimizations & bug fixes 1. changed to vlib_buffer_enqueue_to_next 2. error counter fixes; stats added to last_sw_if_index when interface changed 3. udp-encap support Change-Id: I70b0814aa37181fea4d70fa3c96c608adb5afe49 Signed-off-by: Kingwel Xie --- src/vnet/ipsec/ipsec_if.c | 4 + src/vnet/ipsec/ipsec_if_in.c | 559 +++++++++++++++++++++++-------------------- 2 files changed, 309 insertions(+), 254 deletions(-) diff --git a/src/vnet/ipsec/ipsec_if.c b/src/vnet/ipsec/ipsec_if.c index 5150e95d6de..f40e94dbe84 100644 --- a/src/vnet/ipsec/ipsec_if.c +++ b/src/vnet/ipsec/ipsec_if.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -603,6 +604,9 @@ ipsec_tunnel_if_init (vlib_main_t * vm) im->ipsec_if_pool_index_by_key = hash_create (0, sizeof (uword)); im->ipsec_if_real_dev_by_show_dev = hash_create (0, sizeof (uword)); + udp_register_dst_port (vm, UDP_DST_PORT_ipsec, ipsec_if_input_node.index, + 1); + return 0; } diff --git a/src/vnet/ipsec/ipsec_if_in.c b/src/vnet/ipsec/ipsec_if_in.c index 9979446b71a..b12e36c4fbc 100644 --- a/src/vnet/ipsec/ipsec_if_in.c +++ b/src/vnet/ipsec/ipsec_if_in.c @@ -61,322 +61,367 @@ format_ipsec_if_input_trace (u8 * s, va_list * args) return s; } -VLIB_NODE_FN (ipsec_if_input_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame) + +always_inline uword +ipsec_if_input_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * from_frame) { ipsec_main_t *im = &ipsec_main; vnet_main_t *vnm = im->vnet_main; vnet_interface_main_t *vim = &vnm->interface_main; - u32 *from, *to_next = 0, next_index; - u32 n_left_from, last_sw_if_index = ~0; + + int is_trace = node->flags & VLIB_NODE_FLAG_TRACE; u32 thread_index = vm->thread_index; + + u32 n_left_from, *from; + u16 nexts[VLIB_FRAME_SIZE], *next; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + vlib_get_buffers (vm, from, bufs, n_left_from); + b = bufs; + next = nexts; + + clib_memset_u16 (nexts, im->esp4_decrypt_next_index, n_left_from); + u64 n_bytes = 0, n_packets = 0; - const ipsec_tunnel_if_t *last_t = NULL; + u32 n_disabled = 0, n_no_tunnel = 0; + + u32 last_sw_if_index = ~0; + u32 last_tunnel_id = ~0; + u64 last_key = ~0; + vlib_combined_counter_main_t *rx_counter; vlib_combined_counter_main_t *drop_counter; - u32 n_disabled = 0, n_no_tunnel = 0; rx_counter = vim->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX; drop_counter = vim->combined_sw_if_counters + VNET_INTERFACE_COUNTER_DROP; - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - next_index = node->cached_next_index; - - while (n_left_from > 0) + while (n_left_from >= 2) { - u32 n_left_to_next; + u32 sw_if_index0, sw_if_index1; + ip4_header_t *ip0, *ip1; + esp_header_t *esp0, *esp1; + u32 len0, len1; + u16 buf_adv0, buf_adv1; + u32 tid0, tid1; + ipsec_tunnel_if_t *t0, *t1; + u64 key0, key1; + + if (n_left_from >= 4) + { + CLIB_PREFETCH (b[2], CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (b[2]->data, CLIB_CACHE_LINE_BYTES, LOAD); + CLIB_PREFETCH (b[3], CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (b[3]->data, CLIB_CACHE_LINE_BYTES, LOAD); + } - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + ip0 = (ip4_header_t *) (b[0]->data + vnet_buffer (b[0])->l3_hdr_offset); + ip1 = (ip4_header_t *) (b[1]->data + vnet_buffer (b[1])->l3_hdr_offset); - while (n_left_from >= 4 && n_left_to_next >= 2) + /* NAT UDP port 4500 case, don't advance any more */ + if (ip0->protocol == IP_PROTOCOL_UDP) { - u32 bi0, bi1, next0, next1, sw_if_index0, sw_if_index1; - const esp_header_t *esp0, *esp1; - const ip4_header_t *ip0, *ip1; - vlib_buffer_t *b0, *b1; - uword *p0, *p1; - u32 len0, len1; - u64 key0, key1; - - /* Prefetch next iteration. */ - { - vlib_buffer_t *p2, *p3; - - p2 = vlib_get_buffer (vm, from[2]); - p3 = vlib_get_buffer (vm, from[3]); - - vlib_prefetch_buffer_header (p2, STORE); - vlib_prefetch_buffer_header (p3, STORE); - - CLIB_PREFETCH (p2->data, sizeof (ip0[0]), STORE); - CLIB_PREFETCH (p3->data, sizeof (ip0[0]), STORE); - } - - bi0 = to_next[0] = from[0]; - bi1 = to_next[1] = from[1]; - - from += 2; - n_left_from -= 2; - to_next += 2; - n_left_to_next -= 2; - next0 = next1 = IPSEC_INPUT_NEXT_DROP; - - b0 = vlib_get_buffer (vm, bi0); - b1 = vlib_get_buffer (vm, bi1); - ip0 = vlib_buffer_get_current (b0); - ip1 = vlib_buffer_get_current (b1); - esp0 = (const esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); - esp1 = (const esp_header_t *) ((u8 *) ip1 + ip4_header_bytes (ip1)); - - key0 = (u64) ip0->src_address.as_u32 << 32 | (u64) esp0->spi; - key1 = (u64) ip1->src_address.as_u32 << 32 | (u64) esp1->spi; - - p0 = hash_get (im->ipsec_if_pool_index_by_key, key0); - p1 = hash_get (im->ipsec_if_pool_index_by_key, key1); - - /* stats for the tunnel include all the data after the IP header - just like a norml IP-IP tunnel */ - vlib_buffer_advance (b0, ip4_header_bytes (ip0)); - vlib_buffer_advance (b1, ip4_header_bytes (ip1)); - len0 = vlib_buffer_length_in_chain (vm, b0); - len1 = vlib_buffer_length_in_chain (vm, b1); - - if (PREDICT_TRUE (NULL != p0)) - { - const ipsec_tunnel_if_t *t0; + esp0 = + (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0) + + sizeof (udp_header_t)); + buf_adv0 = 0; + } + else + { + esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); + buf_adv0 = ip4_header_bytes (ip0); + } + if (ip1->protocol == IP_PROTOCOL_UDP) + { + esp1 = + (esp_header_t *) ((u8 *) ip1 + ip4_header_bytes (ip1) + + sizeof (udp_header_t)); + buf_adv1 = 0; + } + else + { + esp1 = (esp_header_t *) ((u8 *) ip1 + ip4_header_bytes (ip1)); + buf_adv1 = ip4_header_bytes (ip1); + } - t0 = pool_elt_at_index (im->tunnel_interfaces, p0[0]); - vnet_buffer (b0)->ipsec.sad_index = t0->input_sa_index; + vlib_buffer_advance (b[0], buf_adv0); + vlib_buffer_advance (b[1], buf_adv1); - if (PREDICT_TRUE (t0->hw_if_index != ~0)) - { - vnet_buffer (b0)->ipsec.flags = 0; - sw_if_index0 = t0->sw_if_index; - vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0; - - if (PREDICT_FALSE - (!(t0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))) - { - vlib_increment_combined_counter - (drop_counter, thread_index, sw_if_index0, 1, len0); - b0->error = node->errors[IPSEC_IF_INPUT_ERROR_DISABLED]; - n_disabled++; - goto pkt1; - } - - if (PREDICT_TRUE (sw_if_index0 == last_sw_if_index)) - { - n_packets++; - n_bytes += len0; - } - else - { - if (last_t) - { - vlib_increment_combined_counter - (rx_counter, thread_index, sw_if_index0, - n_packets, n_bytes); - } - - last_sw_if_index = sw_if_index0; - last_t = t0; - n_packets = 1; - n_bytes = len0; - } - } - else - { - vnet_buffer (b0)->ipsec.flags = IPSEC_FLAG_IPSEC_GRE_TUNNEL; - } + len0 = vlib_buffer_length_in_chain (vm, b[0]); + len1 = vlib_buffer_length_in_chain (vm, b[1]); - next0 = im->esp4_decrypt_next_index; + key0 = (u64) ip0->src_address.as_u32 << 32 | (u64) esp0->spi; + key1 = (u64) ip1->src_address.as_u32 << 32 | (u64) esp1->spi; + + if (key0 == last_key) + { + tid0 = last_tunnel_id; + } + else + { + uword *p = hash_get (im->ipsec_if_pool_index_by_key, key0); + if (p) + { + tid0 = p[0]; + last_tunnel_id = tid0; + last_key = key0; } else { - b0->error = node->errors[IPSEC_IF_INPUT_ERROR_NO_TUNNEL]; n_no_tunnel++; + next[0] = IPSEC_INPUT_NEXT_DROP; + goto pkt1; } + } - pkt1: - if (PREDICT_TRUE (NULL != p1)) - { - const ipsec_tunnel_if_t *t1; + t0 = pool_elt_at_index (im->tunnel_interfaces, tid0); + vnet_buffer (b[0])->ipsec.sad_index = t0->input_sa_index; - t1 = pool_elt_at_index (im->tunnel_interfaces, p1[0]); - vnet_buffer (b1)->ipsec.sad_index = t1->input_sa_index; + if (PREDICT_TRUE (t0->hw_if_index != ~0)) + { + vnet_buffer (b[0])->ipsec.flags = 0; + sw_if_index0 = t0->sw_if_index; + vnet_buffer (b[0])->sw_if_index[VLIB_RX] = sw_if_index0; - if (PREDICT_TRUE (t1->hw_if_index != ~0)) - { - vnet_buffer (b1)->ipsec.flags = 0; - sw_if_index1 = t1->sw_if_index; - vnet_buffer (b1)->sw_if_index[VLIB_RX] = sw_if_index1; - - if (PREDICT_FALSE - (!(t1->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))) - { - vlib_increment_combined_counter - (drop_counter, thread_index, sw_if_index1, 1, len1); - b1->error = node->errors[IPSEC_IF_INPUT_ERROR_DISABLED]; - n_disabled++; - goto trace1; - } - - if (PREDICT_TRUE (sw_if_index1 == last_sw_if_index)) - { - n_packets++; - n_bytes += len1; - } - else - { - if (last_t) - { - vlib_increment_combined_counter - (rx_counter, thread_index, sw_if_index1, - n_packets, n_bytes); - } - - last_sw_if_index = sw_if_index1; - last_t = t1; - n_packets = 1; - n_bytes = len1; - } - } - else + if (PREDICT_FALSE (!(t0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))) + { + vlib_increment_combined_counter + (drop_counter, thread_index, sw_if_index0, 1, len0); + n_disabled++; + next[0] = IPSEC_INPUT_NEXT_DROP; + goto pkt1; + } + + if (PREDICT_TRUE (sw_if_index0 == last_sw_if_index)) + { + n_packets++; + n_bytes += len0; + } + else + { + if (n_packets) { - vnet_buffer (b1)->ipsec.flags = IPSEC_FLAG_IPSEC_GRE_TUNNEL; + vlib_increment_combined_counter + (rx_counter, thread_index, last_sw_if_index, + n_packets, n_bytes); } - next1 = im->esp4_decrypt_next_index; + last_sw_if_index = sw_if_index0; + n_packets = 1; + n_bytes = len0; + } + } + else + { + vnet_buffer (b[0])->ipsec.flags = IPSEC_FLAG_IPSEC_GRE_TUNNEL; + } + + pkt1: + if (key1 == last_key) + { + tid1 = last_tunnel_id; + } + else + { + uword *p = hash_get (im->ipsec_if_pool_index_by_key, key1); + if (p) + { + tid1 = p[0]; + last_tunnel_id = tid1; + last_key = key1; } else { - b1->error = node->errors[IPSEC_IF_INPUT_ERROR_NO_TUNNEL]; n_no_tunnel++; + next[1] = IPSEC_INPUT_NEXT_DROP; + goto trace1; + } + } + + t1 = pool_elt_at_index (im->tunnel_interfaces, tid1); + vnet_buffer (b[1])->ipsec.sad_index = t1->input_sa_index; + + if (PREDICT_TRUE (t1->hw_if_index != ~0)) + { + vnet_buffer (b[1])->ipsec.flags = 0; + sw_if_index1 = t1->sw_if_index; + vnet_buffer (b[1])->sw_if_index[VLIB_RX] = sw_if_index1; + + if (PREDICT_FALSE (!(t1->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))) + { + vlib_increment_combined_counter + (drop_counter, thread_index, sw_if_index1, 1, len1); + n_disabled++; + next[1] = IPSEC_INPUT_NEXT_DROP; + goto trace1; + } + + if (PREDICT_TRUE (sw_if_index1 == last_sw_if_index)) + { + n_packets++; + n_bytes += len1; + } + else + { + if (n_packets) + { + vlib_increment_combined_counter + (rx_counter, thread_index, last_sw_if_index, + n_packets, n_bytes); + } + + last_sw_if_index = sw_if_index1; + n_packets = 1; + n_bytes = len1; } + } + else + { + vnet_buffer (b[1])->ipsec.flags = IPSEC_FLAG_IPSEC_GRE_TUNNEL; + } - trace1: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + trace1: + if (is_trace) + { + if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) { ipsec_if_input_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); + vlib_add_trace (vm, node, b[0], sizeof (*tr)); tr->spi = clib_host_to_net_u32 (esp0->spi); tr->seq = clib_host_to_net_u32 (esp0->seq); } - if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED)) { ipsec_if_input_trace_t *tr = - vlib_add_trace (vm, node, b1, sizeof (*tr)); + vlib_add_trace (vm, node, b[1], sizeof (*tr)); tr->spi = clib_host_to_net_u32 (esp1->spi); tr->seq = clib_host_to_net_u32 (esp1->seq); } + } + + /* next */ + b += 2; + next += 2; + n_left_from -= 2; + } + while (n_left_from > 0) + { + u32 sw_if_index0; + ip4_header_t *ip0; + esp_header_t *esp0; + u32 len0; + u16 buf_adv0; + u32 tid0; + ipsec_tunnel_if_t *t0; + u64 key0; + + ip0 = (ip4_header_t *) (b[0]->data + vnet_buffer (b[0])->l3_hdr_offset); + + /* NAT UDP port 4500 case, don't advance any more */ + if (ip0->protocol == IP_PROTOCOL_UDP) + { + esp0 = + (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0) + + sizeof (udp_header_t)); + buf_adv0 = 0; + } + else + { + esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); + buf_adv0 = ip4_header_bytes (ip0); + } - vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, - n_left_to_next, - bi0, bi1, next0, next1); + /* stats for the tunnel include all the data after the IP header + just like a norml IP-IP tunnel */ + vlib_buffer_advance (b[0], buf_adv0); + len0 = vlib_buffer_length_in_chain (vm, b[0]); + + key0 = (u64) ip0->src_address.as_u32 << 32 | (u64) esp0->spi; + if (key0 == last_key) + { + tid0 = last_tunnel_id; } - while (n_left_from > 0 && n_left_to_next > 0) + else { - u32 bi0, next0, sw_if_index0; - const esp_header_t *esp0; - const ip4_header_t *ip0; - vlib_buffer_t *b0; - uword *p; - u32 len0; - - bi0 = to_next[0] = from[0]; - from += 1; - n_left_from -= 1; - to_next += 1; - n_left_to_next -= 1; - b0 = vlib_get_buffer (vm, bi0); - ip0 = vlib_buffer_get_current (b0); - esp0 = (const esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0)); - - next0 = IPSEC_INPUT_NEXT_DROP; - - u64 key = (u64) ip0->src_address.as_u32 << 32 | (u64) esp0->spi; - - p = hash_get (im->ipsec_if_pool_index_by_key, key); - - /* stats for the tunnel include all the data after the IP header - just like a norml IP-IP tunnel */ - vlib_buffer_advance (b0, ip4_header_bytes (ip0)); - len0 = vlib_buffer_length_in_chain (vm, b0); - - if (PREDICT_TRUE (NULL != p)) + uword *p = hash_get (im->ipsec_if_pool_index_by_key, key0); + if (p) { - const ipsec_tunnel_if_t *t0; + tid0 = p[0]; + last_tunnel_id = tid0; + last_key = key0; + } + else + { + n_no_tunnel++; + next[0] = IPSEC_INPUT_NEXT_DROP; + goto trace00; + } + } - t0 = pool_elt_at_index (im->tunnel_interfaces, p[0]); - vnet_buffer (b0)->ipsec.sad_index = t0->input_sa_index; + t0 = pool_elt_at_index (im->tunnel_interfaces, tid0); + vnet_buffer (b[0])->ipsec.sad_index = t0->input_sa_index; - if (PREDICT_TRUE (t0->hw_if_index != ~0)) - { - vnet_buffer (b0)->ipsec.flags = 0; - sw_if_index0 = t0->sw_if_index; - vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0; - - if (PREDICT_FALSE - (!(t0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))) - { - vlib_increment_combined_counter - (drop_counter, thread_index, sw_if_index0, 1, len0); - b0->error = node->errors[IPSEC_IF_INPUT_ERROR_DISABLED]; - n_disabled++; - goto trace; - } - - if (PREDICT_TRUE (sw_if_index0 == last_sw_if_index)) - { - n_packets++; - n_bytes += len0; - } - else - { - if (last_t) - { - vlib_increment_combined_counter - (rx_counter, thread_index, sw_if_index0, - n_packets, n_bytes); - } - - last_sw_if_index = sw_if_index0; - last_t = t0; - n_packets = 1; - n_bytes = len0; - } - } - else - { - vnet_buffer (b0)->ipsec.flags = IPSEC_FLAG_IPSEC_GRE_TUNNEL; - } + if (PREDICT_TRUE (t0->hw_if_index != ~0)) + { + vnet_buffer (b[0])->ipsec.flags = 0; + sw_if_index0 = t0->sw_if_index; + vnet_buffer (b[0])->sw_if_index[VLIB_RX] = sw_if_index0; + + if (PREDICT_FALSE (!(t0->flags & VNET_HW_INTERFACE_FLAG_LINK_UP))) + { + vlib_increment_combined_counter + (drop_counter, thread_index, sw_if_index0, 1, len0); + n_disabled++; + next[0] = IPSEC_INPUT_NEXT_DROP; + goto trace00; + } - next0 = im->esp4_decrypt_next_index; + if (PREDICT_TRUE (sw_if_index0 == last_sw_if_index)) + { + n_packets++; + n_bytes += len0; } else { - b0->error = node->errors[IPSEC_IF_INPUT_ERROR_NO_TUNNEL]; - n_no_tunnel++; + if (n_packets) + { + vlib_increment_combined_counter + (rx_counter, thread_index, last_sw_if_index, + n_packets, n_bytes); + } + + last_sw_if_index = sw_if_index0; + n_packets = 1; + n_bytes = len0; } + } + else + { + vnet_buffer (b[0])->ipsec.flags = IPSEC_FLAG_IPSEC_GRE_TUNNEL; + } - trace: - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + trace00: + if (is_trace) + { + if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) { ipsec_if_input_trace_t *tr = - vlib_add_trace (vm, node, b0, sizeof (*tr)); + vlib_add_trace (vm, node, b[0], sizeof (*tr)); tr->spi = clib_host_to_net_u32 (esp0->spi); tr->seq = clib_host_to_net_u32 (esp0->seq); } - - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); + + /* next */ + b += 1; + next += 1; + n_left_from -= 1; } - if (last_t) + if (n_packets) { vlib_increment_combined_counter (rx_counter, thread_index, @@ -386,25 +431,31 @@ VLIB_NODE_FN (ipsec_if_input_node) (vlib_main_t * vm, vlib_node_increment_counter (vm, ipsec_if_input_node.index, IPSEC_IF_INPUT_ERROR_RX, from_frame->n_vectors - n_disabled); - vlib_node_increment_counter (vm, ipsec_if_input_node.index, IPSEC_IF_INPUT_ERROR_DISABLED, n_disabled); vlib_node_increment_counter (vm, ipsec_if_input_node.index, - IPSEC_IF_INPUT_ERROR_DISABLED, n_no_tunnel); + IPSEC_IF_INPUT_ERROR_NO_TUNNEL, n_no_tunnel); + + vlib_buffer_enqueue_to_next (vm, node, from, nexts, from_frame->n_vectors); return from_frame->n_vectors; } +VLIB_NODE_FN (ipsec_if_input_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return ipsec_if_input_inline (vm, node, from_frame); +} + /* *INDENT-OFF* */ VLIB_REGISTER_NODE (ipsec_if_input_node) = { .name = "ipsec-if-input", .vector_size = sizeof (u32), .format_trace = format_ipsec_if_input_trace, .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = ARRAY_LEN(ipsec_if_input_error_strings), .error_strings = ipsec_if_input_error_strings, - .sibling_of = "ipsec4-input-feature", }; /* *INDENT-ON* */ -- 2.16.6