From 08bca80c998bdae87060306350124714606a1604 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 18 Jun 2018 22:21:40 +0200 Subject: [PATCH] ip: vectorized ip checksum Change-Id: Ida678e6f31daa8decb18189da712a350336326e2 Signed-off-by: Damjan Marion --- src/vnet/ip/ip4_input.h | 97 +++++++++++++--------------------------------- src/vnet/ip/ip_packet.h | 91 +++++++++++++++++++++++++++++++++++++++++++ src/vppinfra/vector_avx2.h | 28 +++++++++++++ 3 files changed, 147 insertions(+), 69 deletions(-) diff --git a/src/vnet/ip/ip4_input.h b/src/vnet/ip/ip4_input.h index e0873039990..889b423d700 100644 --- a/src/vnet/ip/ip4_input.h +++ b/src/vnet/ip/ip4_input.h @@ -56,6 +56,26 @@ typedef enum IP4_INPUT_N_NEXT, } ip4_input_next_t; +static_always_inline void +check_ver_opt_csum (ip4_header_t * ip, u8 * error, int verify_checksum) +{ + if (PREDICT_FALSE (ip->ip_version_and_header_length != 0x45)) + { + if ((ip->ip_version_and_header_length & 0xf) != 5) + { + *error = IP4_ERROR_OPTIONS; + if (verify_checksum && ip_csum (ip, ip4_header_bytes (ip)) != 0) + *error = IP4_ERROR_BAD_CHECKSUM; + } + else + *error = IP4_ERROR_VERSION; + } + else + if (PREDICT_FALSE (verify_checksum && + ip_csum (ip, sizeof (ip4_header_t)) != 0)) + *error = IP4_ERROR_BAD_CHECKSUM; +} + always_inline void ip4_input_check_x4 (vlib_main_t * vm, vlib_node_runtime_t * error_node, @@ -71,22 +91,10 @@ ip4_input_check_x4 (vlib_main_t * vm, error0 = error1 = error2 = error3 = IP4_ERROR_NONE; - /* Punt packets with options or wrong version. */ - if (PREDICT_FALSE (ip[0]->ip_version_and_header_length != 0x45)) - error0 = (ip[0]->ip_version_and_header_length & 0xf) != 5 ? - IP4_ERROR_OPTIONS : IP4_ERROR_VERSION; - - if (PREDICT_FALSE (ip[1]->ip_version_and_header_length != 0x45)) - error1 = (ip[1]->ip_version_and_header_length & 0xf) != 5 ? - IP4_ERROR_OPTIONS : IP4_ERROR_VERSION; - - if (PREDICT_FALSE (ip[2]->ip_version_and_header_length != 0x45)) - error2 = (ip[2]->ip_version_and_header_length & 0xf) != 5 ? - IP4_ERROR_OPTIONS : IP4_ERROR_VERSION; - - if (PREDICT_FALSE (ip[3]->ip_version_and_header_length != 0x45)) - error3 = (ip[3]->ip_version_and_header_length & 0xf) != 5 ? - IP4_ERROR_OPTIONS : IP4_ERROR_VERSION; + check_ver_opt_csum (ip[0], &error0, verify_checksum); + check_ver_opt_csum (ip[1], &error1, verify_checksum); + check_ver_opt_csum (ip[2], &error2, verify_checksum); + check_ver_opt_csum (ip[3], &error3, verify_checksum); if (PREDICT_FALSE (ip[0]->ttl < 1)) error0 = IP4_ERROR_TIME_EXPIRED; @@ -97,26 +105,6 @@ ip4_input_check_x4 (vlib_main_t * vm, if (PREDICT_FALSE (ip[3]->ttl < 1)) error3 = IP4_ERROR_TIME_EXPIRED; - /* Verify header checksum. */ - if (verify_checksum) - { - ip_csum_t sum0, sum1, sum2, sum3; - - ip4_partial_header_checksum_x1 (ip[0], sum0); - ip4_partial_header_checksum_x1 (ip[1], sum1); - ip4_partial_header_checksum_x1 (ip[2], sum2); - ip4_partial_header_checksum_x1 (ip[3], sum3); - - error0 = 0xffff != ip_csum_fold (sum0) ? - IP4_ERROR_BAD_CHECKSUM : error0; - error1 = 0xffff != ip_csum_fold (sum1) ? - IP4_ERROR_BAD_CHECKSUM : error1; - error2 = 0xffff != ip_csum_fold (sum2) ? - IP4_ERROR_BAD_CHECKSUM : error2; - error3 = 0xffff != ip_csum_fold (sum3) ? - IP4_ERROR_BAD_CHECKSUM : error3; - } - /* Drop fragmentation offset 1 packets. */ error0 = ip4_get_fragment_offset (ip[0]) == 1 ? IP4_ERROR_FRAGMENT_OFFSET_ONE : error0; @@ -226,34 +214,14 @@ ip4_input_check_x2 (vlib_main_t * vm, error0 = error1 = IP4_ERROR_NONE; - /* Punt packets with options or wrong version. */ - if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45)) - error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ? - IP4_ERROR_OPTIONS : IP4_ERROR_VERSION; - - if (PREDICT_FALSE (ip1->ip_version_and_header_length != 0x45)) - error1 = (ip1->ip_version_and_header_length & 0xf) != 5 ? - IP4_ERROR_OPTIONS : IP4_ERROR_VERSION; + check_ver_opt_csum (ip0, &error0, verify_checksum); + check_ver_opt_csum (ip1, &error1, verify_checksum); if (PREDICT_FALSE (ip0->ttl < 1)) error0 = IP4_ERROR_TIME_EXPIRED; if (PREDICT_FALSE (ip1->ttl < 1)) error1 = IP4_ERROR_TIME_EXPIRED; - /* Verify header checksum. */ - if (verify_checksum) - { - ip_csum_t sum0, sum1; - - ip4_partial_header_checksum_x1 (ip0, sum0); - ip4_partial_header_checksum_x1 (ip1, sum1); - - error0 = 0xffff != ip_csum_fold (sum0) ? - IP4_ERROR_BAD_CHECKSUM : error0; - error1 = 0xffff != ip_csum_fold (sum1) ? - IP4_ERROR_BAD_CHECKSUM : error1; - } - /* Drop fragmentation offset 1 packets. */ error0 = ip4_get_fragment_offset (ip0) == 1 ? IP4_ERROR_FRAGMENT_OFFSET_ONE : error0; @@ -320,22 +288,13 @@ ip4_input_check_x1 (vlib_main_t * vm, error0 = IP4_ERROR_NONE; + check_ver_opt_csum (ip0, &error0, verify_checksum); + /* Punt packets with options or wrong version. */ if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45)) error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ? IP4_ERROR_OPTIONS : IP4_ERROR_VERSION; - /* Verify header checksum. */ - if (verify_checksum) - { - ip_csum_t sum0; - - ip4_partial_header_checksum_x1 (ip0, sum0); - - error0 = 0xffff != ip_csum_fold (sum0) ? - IP4_ERROR_BAD_CHECKSUM : error0; - } - /* Drop fragmentation offset 1 packets. */ error0 = ip4_get_fragment_offset (ip0) == 1 ? IP4_ERROR_FRAGMENT_OFFSET_ONE : error0; diff --git a/src/vnet/ip/ip_packet.h b/src/vnet/ip/ip_packet.h index 6c86e3e046e..c4990976188 100644 --- a/src/vnet/ip/ip_packet.h +++ b/src/vnet/ip/ip_packet.h @@ -86,6 +86,97 @@ typedef enum /* IP checksum support. */ +static_always_inline u16 +ip_csum (void *data, u16 n_left) +{ + u32 sum; +#ifdef CLIB_HAVE_VEC256 + u16x16 v1, v2; + u32x8 zero = { 0 }; + u32x8 sum8 = { 0 }; + u32x4 sum4; +#endif + + /* if there is odd number of bytes, pad by zero and store in sum */ + sum = (n_left & 1) ? ((u8 *) data)[n_left - 1] << 8 : 0; + + /* we deal with words */ + n_left >>= 1; + +#ifdef CLIB_HAVE_VEC256 + while (n_left >= 32) + { + v1 = u16x16_load_unaligned (data); + v2 = u16x16_load_unaligned (data + 32); + +#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN + v1 = u16x16_byte_swap (v1); + v2 = u16x16_byte_swap (v2); +#endif + sum8 += u16x8_extend_to_u32x8 (u16x16_extract_lo (v1)); + sum8 += u16x8_extend_to_u32x8 (u16x16_extract_hi (v1)); + sum8 += u16x8_extend_to_u32x8 (u16x16_extract_lo (v2)); + sum8 += u16x8_extend_to_u32x8 (u16x16_extract_hi (v2)); + n_left -= 32; + data += 64; + } + + if (n_left >= 16) + { + v1 = u16x16_load_unaligned (data); +#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN + v1 = u16x16_byte_swap (v1); +#endif + v1 = u16x16_byte_swap (u16x16_load_unaligned (data)); + sum8 += u16x8_extend_to_u32x8 (u16x16_extract_lo (v1)); + sum8 += u16x8_extend_to_u32x8 (u16x16_extract_hi (v1)); + n_left -= 16; + data += 32; + } + + if (n_left) + { + v1 = u16x16_load_unaligned (data); +#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN + v1 = u16x16_byte_swap (v1); +#endif + v1 = u16x16_mask_last (v1, 16 - n_left); + sum8 += u16x8_extend_to_u32x8 (u16x16_extract_lo (v1)); + sum8 += u16x8_extend_to_u32x8 (u16x16_extract_hi (v1)); + } + + sum8 = u32x8_hadd (sum8, zero); + sum4 = u32x8_extract_lo (sum8) + u32x8_extract_hi (sum8); + sum = sum4[0] + sum4[1]; + +#else + /* scalar version */ + while (n_left >= 8) + { + sum += clib_net_to_host_u16 (*((u16 *) data + 0)); + sum += clib_net_to_host_u16 (*((u16 *) data + 1)); + sum += clib_net_to_host_u16 (*((u16 *) data + 2)); + sum += clib_net_to_host_u16 (*((u16 *) data + 3)); + sum += clib_net_to_host_u16 (*((u16 *) data + 4)); + sum += clib_net_to_host_u16 (*((u16 *) data + 5)); + sum += clib_net_to_host_u16 (*((u16 *) data + 6)); + sum += clib_net_to_host_u16 (*((u16 *) data + 7)); + n_left -= 8; + data += 16; + } + while (n_left) + { + sum += clib_net_to_host_u16 (*(u16 *) data); + n_left -= 1; + data += 2; + } +#endif + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return ~((u16) sum); +} + /* Incremental checksum update. */ typedef uword ip_csum_t; diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h index 6b298be8bfd..66c46f226aa 100644 --- a/src/vppinfra/vector_avx2.h +++ b/src/vppinfra/vector_avx2.h @@ -137,6 +137,34 @@ u32x8_hadd (u32x8 v1, u32x8 v2) return (u32x8) _mm256_hadd_epi32 ((__m256i) v1, (__m256i) v2); } +static_always_inline u16x16 +u16x16_mask_last (u16x16 v, u8 n_last) +{ + const u16x16 masks[17] = { + {0}, + {-1}, + {-1, -1}, + {-1, -1, -1}, + {-1, -1, -1, -1}, + {-1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, + }; + + ASSERT (n_last < 17); + + return v & masks[16 - n_last]; +} + #endif /* included_vector_avx2_h */ /* -- 2.16.6