sample_next_t next_index;
u32 pkts_swapped = 0;
/* Vector shuffle mask to swap src, dst */
- u8x16 swapmac = { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
src_dst0 = ((u8x16 *) en0)[0];
src_dst1 = ((u8x16 *) en1)[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
- src_dst1 = u8x16_shuffle (src_dst1, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
+ 4, 5, 12, 13, 14, 15);
+ src_dst1 = u8x16_shuffle (src_dst1, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
+ 4, 5, 12, 13, 14, 15);
((u8x16 *) en0)[0] = src_dst0;
((u8x16 *) en1)[0] = src_dst1;
en0 = vlib_buffer_get_current (b0);
src_dst0 = ((u8x16 *) en0)[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
+ 4, 5, 12, 13, 14, 15);
((u8x16 *) en0)[0] = src_dst0;
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
#ifdef VERSION_3
-#define u8x16_shuffle __builtin_shuffle
/* This would normally be a stack local, but since it's a constant... */
static const u16 nexts[VLIB_FRAME_SIZE] = { 0 };
u32 n_left_from, *from;
u32 pkts_swapped = 0;
/* Vector shuffle mask to swap src, dst */
- u8x16 swapmac = { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
/* See comment below about sending all pkts to the same place... */
u16 *next __attribute__ ((unused));
src_dst2 = ((u8x16 *) vlib_buffer_get_current (b[2]))[0];
src_dst3 = ((u8x16 *) vlib_buffer_get_current (b[3]))[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
- src_dst1 = u8x16_shuffle (src_dst1, swapmac);
- src_dst2 = u8x16_shuffle (src_dst2, swapmac);
- src_dst3 = u8x16_shuffle (src_dst3, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
+ src_dst1 = u8x16_shuffle (src_dst1, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
+ src_dst2 = u8x16_shuffle (src_dst2, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
+ src_dst3 = u8x16_shuffle (src_dst3, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
((u8x16 *) vlib_buffer_get_current (b[0]))[0] = src_dst0;
((u8x16 *) vlib_buffer_get_current (b[1]))[0] = src_dst1;
{
u8x16 src_dst0;
src_dst0 = ((u8x16 *) vlib_buffer_get_current (b[0]))[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
((u8x16 *) vlib_buffer_get_current (b[0]))[0] = src_dst0;
vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
vnet_buffer (b[0])->sw_if_index[VLIB_RX];
#ifdef VERSION_4
-#define u8x16_shuffle __builtin_shuffle
-
-static u8x16 swapmac =
- { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
-
/* Final stage in the pipeline, do the mac swap */
static inline u32
last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b)
{
u8x16 src_dst0;
src_dst0 = ((u8x16 *) vlib_buffer_get_current (b))[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12,
+ 13, 14, 15);
((u8x16 *) vlib_buffer_get_current (b))[0] = src_dst0;
vnet_buffer (b)->sw_if_index[VLIB_TX] =
vnet_buffer (b)->sw_if_index[VLIB_RX];
#undef _vector_size
+ /* _shuffle and _shuffle2 */
+#if defined(__GNUC__) && !defined(__clang__)
+#define __builtin_shufflevector(v1, v2, ...) \
+ __builtin_shuffle ((v1), (v2), (__typeof__ (v1)){ __VA_ARGS__ })
+#endif
+
+#define u8x16_shuffle(v1, ...) \
+ (u8x16) __builtin_shufflevector ((u8x16) (v1), (u8x16) (v1), __VA_ARGS__)
+#define u8x32_shuffle(v1, ...) \
+ (u8x32) __builtin_shufflevector ((u8x32) (v1), (u8x32) (v1), __VA_ARGS__)
+#define u8x64_shuffle(v1, ...) \
+ (u8x64) __builtin_shufflevector ((u8x64) (v1), (u8x64) (v1), __VA_ARGS__)
+
+#define u16x8_shuffle(v1, ...) \
+ (u16x8) __builtin_shufflevector ((u16x8) (v1), (u16x8) (v1), __VA_ARGS__)
+#define u16x16_shuffle(v1, ...) \
+ (u16x16) __builtin_shufflevector ((u16x16) (v1), (u16x16) (v1), __VA_ARGS__)
+#define u16x32_shuffle(v1, ...) \
+ (u16u32) __builtin_shufflevector ((u16x32) (v1), (u16x32) (v1), __VA_ARGS__);
+
+#define u32x4_shuffle(v1, ...) \
+ (u32x4) __builtin_shufflevector ((u32x4) (v1), (u32x4) (v1), __VA_ARGS__)
+#define u32x8_shuffle(v1, ...) \
+ (u32x8) __builtin_shufflevector ((u32x8) (v1), (u32x8) (v1), __VA_ARGS__)
+#define u32x16_shuffle(v1, ...) \
+ (u32x16) __builtin_shufflevector ((u32x16) (v1), (u32x16) (v1), __VA_ARGS__)
+
+#define u64x2_shuffle(v1, ...) \
+ (u64x2) __builtin_shufflevector ((u64x2) (v1), (u64x2) (v1), __VA_ARGS__)
+#define u64x4_shuffle(v1, ...) \
+ (u64x4) __builtin_shufflevector ((u64x4) (v1), (u64x4) (v1), __VA_ARGS__)
+#define u64x8_shuffle(v1, ...) \
+ (u64x8) __builtin_shufflevector ((u64x8) (v1), (u64x8) (v1), __VA_ARGS__)
+
+#define u8x16_shuffle2(v1, v2, ...) \
+ (u8x16) __builtin_shufflevector ((u8x16) (v1), (u8x16) (v2), __VA_ARGS__)
+#define u8x32_shuffle2(v1, v2, ...) \
+ (u8x32) __builtin_shufflevector ((u8x32) (v1), (u8x32) (v2), __VA_ARGS__)
+#define u8x64_shuffle2(v1, v2, ...) \
+ (u8x64) __builtin_shufflevector ((u8x64) (v1), (u8x64) (v2), __VA_ARGS__)
+
+#define u16x8_shuffle2(v1, v2, ...) \
+ (u16x8) __builtin_shufflevector ((u16x8) (v1), (u16x8) (v2), __VA_ARGS__)
+#define u16x16_shuffle2(v1, v2, ...) \
+ (u16x16) __builtin_shufflevector ((u16x16) (v1), (u16x16) (v2), __VA_ARGS__)
+#define u16x32_shuffle2(v1, v2, ...) \
+ (u16u32) __builtin_shufflevector ((u16x32) (v1), (u16x32) (v2), __VA_ARGS__);
+
+#define u32x4_shuffle2(v1, v2, ...) \
+ (u32x4) __builtin_shufflevector ((u32x4) (v1), (u32x4) (v2), __VA_ARGS__)
+#define u32x8_shuffle2(v1, v2, ...) \
+ (u32x8) __builtin_shufflevector ((u32x8) (v1), (u32x8) (v2), __VA_ARGS__)
+#define u32x16_shuffle2(v1, v2, ...) \
+ (u32x16) __builtin_shufflevector ((u32x16) (v1), (u32x16) (v2), __VA_ARGS__)
+
+#define u64x2_shuffle2(v1, v2, ...) \
+ (u64x2) __builtin_shufflevector ((u64x2) (v1), (u64x2) (v2), __VA_ARGS__)
+#define u64x4_shuffle2(v1, v2, ...) \
+ (u64x4) __builtin_shufflevector ((u64x4) (v1), (u64x4) (v2), __VA_ARGS__)
+#define u64x8_shuffle2(v1, v2, ...) \
+ (u64x8) __builtin_shufflevector ((u64x8) (v1), (u64x8) (v2), __VA_ARGS__)
+
#define VECTOR_WORD_TYPE(t) t##x
#define VECTOR_WORD_TYPE_LEN(t) (sizeof (VECTOR_WORD_TYPE(t)) / sizeof (t))