From 02833ff3294f4abbd8e3d52b38446e0f8f533ffc Mon Sep 17 00:00:00 2001 From: Srikanth A Date: Wed, 2 Oct 2019 17:48:58 -0700 Subject: [PATCH] tcp: custom checksum calculations for Ipv4/Ipv6 Type: feature Based on the configuration, we can disable checksum offload capability and calculate checksum while pushing the TCP & IP header. This saves some cycles when VPP stack is used in legacy hardware devices. Signed-off-by: Srikanth A Change-Id: Ic1b3fcf3040917e47ee65263694ebf7437ac5668 (cherry picked from commit 3642782a2748503f5b5ccf89d1575c1d489948ef) --- src/vnet/ip/ip.h | 79 +++++++++++++++++++++++++++++++ src/vnet/ip/ip4_forward.c | 46 ++---------------- src/vnet/ip/ip6.h | 3 +- src/vnet/ip/ip6_forward.c | 69 ++++++--------------------- src/vnet/tcp/tcp.h | 3 ++ src/vnet/tcp/tcp_output.c | 116 +++++++++++++++++++++++++++++++++++++--------- 6 files changed, 194 insertions(+), 122 deletions(-) diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h index 7a82dcf8f2b..65ccaef40c2 100644 --- a/src/vnet/ip/ip.h +++ b/src/vnet/ip/ip.h @@ -195,6 +195,85 @@ ip_incremental_checksum_buffer (vlib_main_t * vm, return sum; } +always_inline u16 +ip_calculate_l4_checksum (vlib_main_t * vm, vlib_buffer_t * p0, + ip_csum_t sum0, u32 payload_length, + u8 * iph, u32 ip_header_size, u8 * l4h) +{ + u16 sum16; + u8 *data_this_buffer, length_odd; + u32 n_bytes_left, n_this_buffer, n_ip_bytes_this_buffer; + + n_bytes_left = payload_length; + + if (l4h) /* packet l4 header and no buffer chain involved */ + { + ASSERT (p0 == NULL); + n_this_buffer = payload_length; + data_this_buffer = l4h; + } + else + { + ASSERT (p0); + if (iph) /* ip header pointer set to packet in buffer */ + { + ASSERT (ip_header_size); + n_this_buffer = payload_length; + data_this_buffer = iph + ip_header_size; /* at l4 header */ + n_ip_bytes_this_buffer = + p0->current_length - (((u8 *) iph - p0->data) - p0->current_data); + if (PREDICT_FALSE (payload_length + ip_header_size > + n_ip_bytes_this_buffer)) + { + n_this_buffer = n_ip_bytes_this_buffer - ip_header_size; + if (PREDICT_FALSE (n_this_buffer >> 31)) + { /* error - ip header don't fit this buffer */ + ASSERT (0); + return 0xfefe; + } + } + } + else /* packet in buffer with no ip header */ + { /* buffer current pointer at l4 header */ + n_this_buffer = p0->current_length; + data_this_buffer = vlib_buffer_get_current (p0); + } + n_this_buffer = clib_min (n_this_buffer, n_bytes_left); + } + + while (1) + { + sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer); + n_bytes_left -= n_this_buffer; + if (n_bytes_left == 0) + break; + + if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + ASSERT (0); /* error - more buffer expected */ + return 0xfefe; + } + + length_odd = (n_this_buffer & 1); + + p0 = vlib_get_buffer (vm, p0->next_buffer); + data_this_buffer = vlib_buffer_get_current (p0); + n_this_buffer = clib_min (p0->current_length, n_bytes_left); + + if (PREDICT_FALSE (length_odd)) + { + /* Prepend a 0 byte to maintain 2-byte checksum alignment */ + data_this_buffer--; + n_this_buffer++; + n_bytes_left++; + data_this_buffer[0] = 0; + } + } + + sum16 = ~ip_csum_fold (sum0); + return sum16; +} + void ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index); extern vlib_node_registration_t ip4_inacl_node; diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c index d4717c6db4b..40c396c4f3b 100644 --- a/src/vnet/ip/ip4_forward.c +++ b/src/vnet/ip/ip4_forward.c @@ -1303,10 +1303,6 @@ ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, { ip_csum_t sum0; u32 ip_header_length, payload_length_host_byte_order; - u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer; - u16 sum16; - u8 *data_this_buffer; - u8 length_odd; /* Initialize checksum with ip header. */ ip_header_length = ip4_header_bytes (ip0); @@ -1329,45 +1325,9 @@ ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64)); - n_bytes_left = n_this_buffer = payload_length_host_byte_order; - data_this_buffer = (u8 *) ip0 + ip_header_length; - n_ip_bytes_this_buffer = - p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data); - if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer) - { - n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ? - n_ip_bytes_this_buffer - ip_header_length : 0; - } - - while (1) - { - sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer); - n_bytes_left -= n_this_buffer; - if (n_bytes_left == 0) - break; - - ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT); - if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT)) - return 0xfefe; - - length_odd = (n_this_buffer & 1); - - p0 = vlib_get_buffer (vm, p0->next_buffer); - data_this_buffer = vlib_buffer_get_current (p0); - n_this_buffer = clib_min (p0->current_length, n_bytes_left); - - if (PREDICT_FALSE (length_odd)) - { - /* Prepend a 0 or the resulting checksum will be incorrect. */ - data_this_buffer--; - n_this_buffer++; - n_bytes_left++; - data_this_buffer[0] = 0; - } - } - - sum16 = ~ip_csum_fold (sum0); - return sum16; + return ip_calculate_l4_checksum (vm, p0, sum0, + payload_length_host_byte_order, (u8 *) ip0, + ip_header_length, NULL); } u32 diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h index 810fd70ff0c..94c5080a0aa 100644 --- a/src/vnet/ip/ip6.h +++ b/src/vnet/ip/ip6.h @@ -683,7 +683,8 @@ vlib_buffer_push_ip6 (vlib_main_t * vm, vlib_buffer_t * b, sizeof (ip6h->src_address)); clib_memcpy_fast (ip6h->dst_address.as_u8, dst->as_u8, sizeof (ip6h->src_address)); - b->flags |= VNET_BUFFER_F_IS_IP6; + vnet_buffer (b)->l3_hdr_offset = (u8 *) ip6h - b->data; + b->flags |= VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L3_HDR_OFFSET_VALID; return ip6h; } diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c index ea13116d8aa..47fb57ae201 100644 --- a/src/vnet/ip/ip6_forward.c +++ b/src/vnet/ip/ip6_forward.c @@ -1011,11 +1011,10 @@ ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, ip6_header_t * ip0, int *bogus_lengthp) { ip_csum_t sum0; - u16 sum16, payload_length_host_byte_order; - u32 i, n_this_buffer, n_bytes_left; + u16 payload_length_host_byte_order; + u32 i; u32 headers_size = sizeof (ip0[0]); u8 *data_this_buffer; - u8 length_odd; ASSERT (bogus_lengthp); *bogus_lengthp = 0; @@ -1027,14 +1026,10 @@ ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++) { - sum0 = ip_csum_with_carry (sum0, - clib_mem_unaligned (&ip0-> - src_address.as_uword[i], - uword)); - sum0 = - ip_csum_with_carry (sum0, - clib_mem_unaligned (&ip0->dst_address.as_uword[i], - uword)); + sum0 = ip_csum_with_carry + (sum0, clib_mem_unaligned (&ip0->src_address.as_uword[i], uword)); + sum0 = ip_csum_with_carry + (sum0, clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword)); } /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) @@ -1056,52 +1051,14 @@ ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, headers_size += skip_bytes; } - n_bytes_left = n_this_buffer = payload_length_host_byte_order; - if (p0) - { - u32 n_ip_bytes_this_buffer = - p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data); - if (n_this_buffer + headers_size > n_ip_bytes_this_buffer) - { - n_this_buffer = p0->current_length > headers_size ? - n_ip_bytes_this_buffer - headers_size : 0; - } - } - - while (1) - { - sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer); - n_bytes_left -= n_this_buffer; - if (n_bytes_left == 0) - break; - - ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT); - if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT)) - { - *bogus_lengthp = 1; - return 0xfefe; - } - - length_odd = (n_this_buffer & 1); - - p0 = vlib_get_buffer (vm, p0->next_buffer); - data_this_buffer = vlib_buffer_get_current (p0); - n_this_buffer = clib_min (p0->current_length, n_bytes_left); - - if (PREDICT_FALSE (length_odd)) - { - /* Prepend a 0 or the resulting checksum will be incorrect. */ - data_this_buffer--; - n_this_buffer++; - n_bytes_left++; - data_this_buffer[0] = 0; - } - } - - sum16 = ~ip_csum_fold (sum0); - - return sum16; + return ip_calculate_l4_checksum (vm, p0, sum0, + payload_length_host_byte_order, + (u8 *) ip0, headers_size, NULL); + else + return ip_calculate_l4_checksum (vm, 0, sum0, + payload_length_host_byte_order, NULL, 0, + data_this_buffer); } u32 diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index a1b7d4cbd0d..1bf32818171 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -120,6 +120,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler; _(RATE_SAMPLE, "Conn does rate sampling") \ _(TRACK_BURST, "Track burst") \ _(ZERO_RWND_SENT, "Zero RWND sent") \ + _(NO_CSUM_OFFLOAD, "No Checksum Offload") \ typedef enum _tcp_connection_flag_bits { @@ -1233,6 +1234,8 @@ vlib_buffer_push_tcp_net_order (vlib_buffer_t * b, u16 sp, u16 dp, u32 seq, th->window = wnd; th->checksum = 0; th->urgent_pointer = 0; + vnet_buffer (b)->l4_hdr_offset = (u8 *) th - b->data; + b->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID; return th; } diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 7ebea37fc43..a7b0e398d36 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -444,6 +444,78 @@ tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b) return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN); } + +/* Compute TCP checksum in software when offloading is disabled for a connection */ +u16 +ip6_tcp_compute_checksum_custom (vlib_main_t * vm, vlib_buffer_t * p0, + ip46_address_t * src, ip46_address_t * dst) +{ + ip_csum_t sum0; + u16 payload_length_host_byte_order; + u32 i; + + /* Initialize checksum with ip header. */ + sum0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0)) + + clib_host_to_net_u16 (IP_PROTOCOL_TCP); + payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0); + + for (i = 0; i < ARRAY_LEN (src->ip6.as_uword); i++) + { + sum0 = ip_csum_with_carry + (sum0, clib_mem_unaligned (&src->ip6.as_uword[i], uword)); + sum0 = ip_csum_with_carry + (sum0, clib_mem_unaligned (&dst->ip6.as_uword[i], uword)); + } + + return ip_calculate_l4_checksum (vm, p0, sum0, + payload_length_host_byte_order, NULL, 0, + NULL); +} + +u16 +ip4_tcp_compute_checksum_custom (vlib_main_t * vm, vlib_buffer_t * p0, + ip46_address_t * src, ip46_address_t * dst) +{ + ip_csum_t sum0; + u32 payload_length_host_byte_order; + + payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0); + sum0 = + clib_host_to_net_u32 (payload_length_host_byte_order + + (IP_PROTOCOL_TCP << 16)); + + sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&src->ip4, u32)); + sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&dst->ip4, u32)); + + return ip_calculate_l4_checksum (vm, p0, sum0, + payload_length_host_byte_order, NULL, 0, + NULL); +} + +static inline u16 +tcp_compute_checksum (tcp_connection_t * tc, vlib_buffer_t * b) +{ + u16 checksum = 0; + if (PREDICT_FALSE (tc->flags & TCP_CONN_NO_CSUM_OFFLOAD)) + { + tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index); + vlib_main_t *vm = wrk->vm; + + if (tc->c_is_ip4) + checksum = ip4_tcp_compute_checksum_custom + (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip); + else + checksum = ip6_tcp_compute_checksum_custom + (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip); + } + else + { + b->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM; + } + return checksum; +} + + /** * Prepare ACK */ @@ -466,6 +538,9 @@ tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state, tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd); tcp_options_write ((u8 *) (th + 1), snd_opts); + + th->checksum = tcp_compute_checksum (tc, b); + vnet_buffer (b)->tcp.connection_index = tc->c_c_index; if (wnd == 0) @@ -517,6 +592,7 @@ tcp_make_syn (tcp_connection_t * tc, vlib_buffer_t * b) initial_wnd); vnet_buffer (b)->tcp.connection_index = tc->c_c_index; tcp_options_write ((u8 *) (th + 1), &snd_opts); + th->checksum = tcp_compute_checksum (tc, b); } /** @@ -541,6 +617,7 @@ tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b) tcp_options_write ((u8 *) (th + 1), snd_opts); vnet_buffer (b)->tcp.connection_index = tc->c_c_index; + th->checksum = tcp_compute_checksum (tc, b); } always_inline void @@ -786,7 +863,8 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, { ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40); ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address, - &pkt_ih4->src_address, IP_PROTOCOL_TCP, 1); + &pkt_ih4->src_address, IP_PROTOCOL_TCP, + (!(tc->flags & TCP_CONN_NO_CSUM_OFFLOAD))); th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4); } else @@ -833,6 +911,7 @@ tcp_send_reset (tcp_connection_t * tc) tc->rcv_nxt, tcp_hdr_opts_len, flags, advertise_wnd); opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts); + th->checksum = tcp_compute_checksum (tc, b); ASSERT (opts_write_len == tc->snd_opts_len); vnet_buffer (b)->tcp.connection_index = tc->c_c_index; tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4); @@ -851,7 +930,8 @@ tcp_push_ip_hdr (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, { ip4_header_t *ih; ih = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip4, - &tc->c_rmt_ip4, IP_PROTOCOL_TCP, 1); + &tc->c_rmt_ip4, IP_PROTOCOL_TCP, + (!(tc->flags & TCP_CONN_NO_CSUM_OFFLOAD))); th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih); } else @@ -1082,6 +1162,9 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt, tc->bytes_out += data_len; tc->data_segs_out += 1; + + th->checksum = tcp_compute_checksum (tc, b); + TCP_EVT (TCP_EVT_PKTIZE, tc); } @@ -2160,30 +2243,19 @@ always_inline void tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0, tcp_connection_t * tc0, u8 is_ip4) { - tcp_header_t *th0 = 0; + u8 __clib_unused *ih0; + tcp_header_t __clib_unused *th0 = vlib_buffer_get_current (b0); - th0 = vlib_buffer_get_current (b0); TCP_EVT (TCP_EVT_OUTPUT, tc0, th0->flags, b0->current_length); + if (is_ip4) - { - vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4, - IP_PROTOCOL_TCP, 1); - b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM; - vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; - th0->checksum = 0; - } + ih0 = vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4, + IP_PROTOCOL_TCP, + (!(tc0->flags & TCP_CONN_NO_CSUM_OFFLOAD))); else - { - ip6_header_t *ih0; - ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6, - &tc0->c_rmt_ip6, IP_PROTOCOL_TCP); - b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM; - vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data; - vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; - b0->flags |= - VNET_BUFFER_F_L3_HDR_OFFSET_VALID | VNET_BUFFER_F_L4_HDR_OFFSET_VALID; - th0->checksum = 0; - } + ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6, &tc0->c_rmt_ip6, + IP_PROTOCOL_TCP); + } always_inline void -- 2.16.6