+#define u8x16_align_right(a, b, imm) \
+ (u8x16) _mm_alignr_epi8 ((__m128i) a, (__m128i) b, imm)
+
+static_always_inline u32
+u32x4_min_scalar (u32x4 v)
+{
+ v = u32x4_min (v, (u32x4) u8x16_align_right ((u8x16) v, (u8x16) v, 8));
+ v = u32x4_min (v, (u32x4) u8x16_align_right ((u8x16) v, (u8x16) v, 4));
+ return v[0];
+}
+
+static_always_inline u32
+u32x4_max_scalar (u32x4 v)
+{
+ v = u32x4_max (v, (u32x4) u8x16_align_right ((u8x16) v, (u8x16) v, 8));
+ v = u32x4_max (v, (u32x4) u8x16_align_right ((u8x16) v, (u8x16) v, 4));
+ return v[0];
+}
+
+static_always_inline u32
+i32x4_min_scalar (i32x4 v)
+{
+ v = i32x4_min (v, (i32x4) u8x16_align_right ((u8x16) v, (u8x16) v, 8));
+ v = i32x4_min (v, (i32x4) u8x16_align_right ((u8x16) v, (u8x16) v, 4));
+ return v[0];
+}
+
+static_always_inline u32
+i32x4_max_scalar (i32x4 v)
+{
+ v = i32x4_max (v, (i32x4) u8x16_align_right ((u8x16) v, (u8x16) v, 8));
+ v = i32x4_max (v, (i32x4) u8x16_align_right ((u8x16) v, (u8x16) v, 4));
+ return v[0];
+}
+