while (n >= 8)
{
#ifdef CLIB_HAVE_VEC256
- u64x4 b0 = u32x4_extend_to_u64x4 (*(u32x4u *) (bi + 0));
- u64x4 b1 = u32x4_extend_to_u64x4 (*(u32x4u *) (bi + 4));
+ u64x4 b0 = u64x4_from_u32x4 (*(u32x4u *) (bi + 0));
+ u64x4 b1 = u64x4_from_u32x4 (*(u32x4u *) (bi + 4));
*(u64x4u *) (fill + 0) = bi2addr (b0);
*(u64x4u *) (fill + 4) = bi2addr (b1);
#else
se we maintain two-in-parallel variant */
while (count >= 8)
{
- u64x4 b0 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi));
- u64x4 b1 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi + 4));
+ u64x4 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (bi));
+ u64x4 b1 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 4));
/* shift and add to get vlib_buffer_t pointer */
u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
u64x4_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 4);
while (count >= 4)
{
#ifdef CLIB_HAVE_VEC256
- u64x4 b0 = u32x4_extend_to_u64x4 (u32x4_load_unaligned (bi));
+ u64x4 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (bi));
/* shift and add to get vlib_buffer_t pointer */
u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
#elif defined (CLIB_HAVE_VEC128)
u64x2 off = u64x2_splat (buffer_mem_start + offset);
u32x4 bi4 = u32x4_load_unaligned (bi);
- u64x2 b0 = u32x4_extend_to_u64x2 ((u32x4) bi4);
+ u64x2 b0 = u64x2_from_u32x4 ((u32x4) bi4);
#if defined (__aarch64__)
- u64x2 b1 = u32x4_extend_to_u64x2_high ((u32x4) bi4);
+ u64x2 b1 = u64x2_from_u32x4_high ((u32x4) bi4);
#else
bi4 = u32x4_shuffle (bi4, 2, 3, 0, 1);
- u64x2 b1 = u32x4_extend_to_u64x2 ((u32x4) bi4);
+ u64x2 b1 = u64x2_from_u32x4 ((u32x4) bi4);
#endif
u64x2_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
u64x2_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 2);
v1 = u16x16_byte_swap (v1);
v2 = u16x16_byte_swap (v2);
#endif
- sum8 += u16x8_extend_to_u32x8 (u16x16_extract_lo (v1));
- sum8 += u16x8_extend_to_u32x8 (u16x16_extract_hi (v1));
- sum8 += u16x8_extend_to_u32x8 (u16x16_extract_lo (v2));
- sum8 += u16x8_extend_to_u32x8 (u16x16_extract_hi (v2));
+ sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
+ sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
+ sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v2));
+ sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v2));
n_left -= 32;
data += 64;
}
v1 = u16x16_byte_swap (v1);
#endif
v1 = u16x16_byte_swap (u16x16_load_unaligned (data));
- sum8 += u16x8_extend_to_u32x8 (u16x16_extract_lo (v1));
- sum8 += u16x8_extend_to_u32x8 (u16x16_extract_hi (v1));
+ sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
+ sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
n_left -= 16;
data += 32;
}
v1 = u16x16_byte_swap (v1);
#endif
v1 = u16x16_mask_last (v1, 16 - n_left);
- sum8 += u16x8_extend_to_u32x8 (u16x16_extract_lo (v1));
- sum8 += u16x8_extend_to_u32x8 (u16x16_extract_hi (v1));
+ sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
+ sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
}
sum8 = u32x8_hadd (sum8, zero);
return _mm256_movemask_epi8 ((__m256i) v);
}
-/* _extend_to_ */
+/* _from_ */
/* *INDENT-OFF* */
#define _(f,t,i) \
static_always_inline t \
-f##_extend_to_##t (f x) \
+t##_from_##f (f x) \
{ return (t) _mm256_cvt##i ((__m128i) x); }
_(u16x8, u32x8, epu16_epi32)
}
static_always_inline u64x2
-u32x4_extend_to_u64x2 (u32x4 v)
+u64x2_from_u32x4 (u32x4 v)
{
return vmovl_u32 (vget_low_u32 (v));
}
static_always_inline u64x2
-u32x4_extend_to_u64x2_high (u32x4 v)
+u64x2_from_u32x4_high (u32x4 v)
{
return vmovl_high_u32 (v);
}
#endif
}
-/* _extend_to_ */
+/* _from_ */
/* *INDENT-OFF* */
#define _(f,t,i) \
static_always_inline t \
-f##_extend_to_##t (f x) \
+t##_from_##f (f x) \
{ return (t) _mm_cvt##i ((__m128i) x); }
_(u8x16, u16x8, epu8_epi16)