+static_always_inline u8x16
+aes_byte_mask (u8x16 x, u8 n_bytes)
+{
+ u8x16 mask = u8x16_is_greater (u8x16_splat (n_bytes), byte_mask_scale);
+ __m128i zero = { };
+
+ return (u8x16) _mm_blendv_epi8 (zero, (__m128i) x, (__m128i) mask);
+}
+
+static_always_inline u8x16
+aes_load_partial (u8x16u * p, int n_bytes)
+{
+ ASSERT (n_bytes <= 16);
+#ifdef __AVX512F__
+ __m128i zero = { };
+ return (u8x16) _mm_mask_loadu_epi8 (zero, (1 << n_bytes) - 1, p);
+#else
+ return aes_byte_mask (CLIB_MEM_OVERFLOW_LOAD (*, p), n_bytes);
+#endif
+}
+
+static_always_inline void
+aes_store_partial (void *p, u8x16 r, int n_bytes)
+{
+#ifdef __AVX512F__
+ _mm_mask_storeu_epi8 (p, (1 << n_bytes) - 1, (__m128i) r);
+#else
+ u8x16 mask = u8x16_is_greater (u8x16_splat (n_bytes), byte_mask_scale);
+ _mm_maskmoveu_si128 ((__m128i) r, (__m128i) mask, p);
+#endif
+}
+
+
+static_always_inline u8x16
+aes_encrypt_block (u8x16 block, const u8x16 * round_keys, aes_key_size_t ks)
+{
+ int i;
+ block ^= round_keys[0];
+ for (i = 1; i < AES_KEY_ROUNDS (ks); i += 1)
+ block = aes_enc_round (block, round_keys[i]);
+ return aes_enc_last_round (block, round_keys[i]);
+}
+