X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=lib%2Flibrte_eal%2Fcommon%2Finclude%2Farch%2Farm%2Frte_vect.h;h=e5c1d358b57c5e31e07cad635d9a1667c43a4135;hb=refs%2Fheads%2Fupstream-17.11-stable;hp=4107c99884a7aaa558f1569c528e2c3539ed8a31;hpb=7595afa4d30097c1177b69257118d8ad89a539be;p=deb_dpdk.git diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h index 4107c998..e5c1d358 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_vect.h +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2015 Cavium Networks. All rights reserved. + * Copyright(c) 2015 Cavium, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -13,7 +13,7 @@ * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. - * * Neither the name of Cavium Networks nor the names of its + * * Neither the name of Cavium, Inc nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * @@ -35,6 +35,7 @@ #include #include "generic/rte_vect.h" +#include "rte_debug.h" #include "arm_neon.h" #ifdef __cplusplus @@ -76,8 +77,128 @@ vqtbl1q_u8(uint8x16_t a, uint8x16_t b) return vld1q_u8(rte_ret.u8); } + +static inline uint16_t +vaddvq_u16(uint16x8_t a) +{ + uint32x4_t m = vpaddlq_u16(a); + uint64x2_t n = vpaddlq_u32(m); + uint64x1_t o = vget_low_u64(n) + vget_high_u64(n); + + return vget_lane_u32((uint32x2_t)o, 0); +} + +#endif + +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70000) +static inline uint32x4_t +vcopyq_laneq_u32(uint32x4_t a, const int lane_a, + uint32x4_t b, const int lane_b) +{ + return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a, lane_a); +} +#endif + +#if defined(RTE_ARCH_ARM64) +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70000) + +#if (GCC_VERSION < 40900) +typedef uint64_t poly64_t; +typedef uint64x2_t poly64x2_t; +typedef uint8_t poly128_t __attribute__((vector_size(16), aligned(16))); + +static inline uint32x4_t +vceqzq_u32(uint32x4_t a) +{ + return (a == 0); +} +#endif + +/* NEON intrinsic vreinterpretq_u64_p128() is supported since GCC version 7 */ +static inline uint64x2_t +vreinterpretq_u64_p128(poly128_t x) +{ + return (uint64x2_t)x; +} + +/* NEON intrinsic vreinterpretq_p64_u64() is supported since GCC version 7 */ +static inline poly64x2_t +vreinterpretq_p64_u64(uint64x2_t x) +{ + return (poly64x2_t)x; +} + +/* NEON intrinsic vgetq_lane_p64() is supported since GCC version 7 */ +static inline poly64_t +vgetq_lane_p64(poly64x2_t x, const int lane) +{ + RTE_ASSERT(lane >= 0 && lane <= 1); + + poly64_t *p = (poly64_t *)&x; + + return p[lane]; +} +#endif #endif +/* + * If (0 <= index <= 15), then call the ASIMD ext instruction on the + * 128 bit regs v0 and v1 with the appropriate index. + * + * Else returns a zero vector. + */ +static inline uint8x16_t +vextract(uint8x16_t v0, uint8x16_t v1, const int index) +{ + switch (index) { + case 0: return vextq_u8(v0, v1, 0); + case 1: return vextq_u8(v0, v1, 1); + case 2: return vextq_u8(v0, v1, 2); + case 3: return vextq_u8(v0, v1, 3); + case 4: return vextq_u8(v0, v1, 4); + case 5: return vextq_u8(v0, v1, 5); + case 6: return vextq_u8(v0, v1, 6); + case 7: return vextq_u8(v0, v1, 7); + case 8: return vextq_u8(v0, v1, 8); + case 9: return vextq_u8(v0, v1, 9); + case 10: return vextq_u8(v0, v1, 10); + case 11: return vextq_u8(v0, v1, 11); + case 12: return vextq_u8(v0, v1, 12); + case 13: return vextq_u8(v0, v1, 13); + case 14: return vextq_u8(v0, v1, 14); + case 15: return vextq_u8(v0, v1, 15); + } + return vdupq_n_u8(0); +} + +/** + * Shifts right 128 bit register by specified number of bytes + * + * Value of shift parameter must be in range 0 - 16 + */ +static inline uint64x2_t +vshift_bytes_right(uint64x2_t reg, const unsigned int shift) +{ + return vreinterpretq_u64_u8(vextract( + vreinterpretq_u8_u64(reg), + vdupq_n_u8(0), + shift)); +} + +/** + * Shifts left 128 bit register by specified number of bytes + * + * Value of shift parameter must be in range 0 - 16 + */ +static inline uint64x2_t +vshift_bytes_left(uint64x2_t reg, const unsigned int shift) +{ + return vreinterpretq_u64_u8(vextract( + vdupq_n_u8(0), + vreinterpretq_u8_u64(reg), + 16 - shift)); +} + #ifdef __cplusplus } #endif