+static_always_inline void
+process_mini_cqes (rdma_rxq_t * rxq, u32 skip, u32 n_left, u32 cq_ci,
+ u32 mask, u32 * byte_cnt)
+{
+ mlx5dv_mini_cqe_t *mcqe;
+ u32 mcqe_array_index = (cq_ci + 1) & mask;
+ mcqe = (mlx5dv_mini_cqe_t *) (rxq->cqes + mcqe_array_index);
+
+ mcqe_array_index = cq_ci;
+
+ if (skip)
+ {
+ u32 n = skip & ~7;
+
+ if (n)
+ {
+ mcqe_array_index = (mcqe_array_index + n) & mask;
+ mcqe = (mlx5dv_mini_cqe_t *) (rxq->cqes + mcqe_array_index);
+ skip -= n;
+ }
+
+ if (skip)
+ {
+ n = clib_min (8 - skip, n_left);
+ for (int i = 0; i < n; i++)
+ byte_cnt[i] = mcqe[skip + i].byte_count;
+ mcqe_array_index = (mcqe_array_index + 8) & mask;
+ mcqe = (mlx5dv_mini_cqe_t *) (rxq->cqes + mcqe_array_index);
+ n_left -= n;
+ byte_cnt += n;
+ }
+
+ }
+
+ while (n_left >= 8)
+ {
+ for (int i = 0; i < 8; i++)
+ byte_cnt[i] = mcqe[i].byte_count;
+
+ n_left -= 8;
+ byte_cnt += 8;
+ mcqe_array_index = (mcqe_array_index + 8) & mask;
+ mcqe = (mlx5dv_mini_cqe_t *) (rxq->cqes + mcqe_array_index);
+ }
+
+ if (n_left)
+ {
+ for (int i = 0; i < n_left; i++)
+ byte_cnt[i] = mcqe[i].byte_count;
+ }
+}
+
+static_always_inline void
+cqe_set_owner (mlx5dv_cqe_t * cqe, u32 n_left, u8 owner)
+{
+ while (n_left >= 8)
+ {
+ cqe[0].opcode_cqefmt_se_owner = owner;
+ cqe[1].opcode_cqefmt_se_owner = owner;
+ cqe[2].opcode_cqefmt_se_owner = owner;
+ cqe[3].opcode_cqefmt_se_owner = owner;
+ cqe[4].opcode_cqefmt_se_owner = owner;
+ cqe[5].opcode_cqefmt_se_owner = owner;
+ cqe[6].opcode_cqefmt_se_owner = owner;
+ cqe[7].opcode_cqefmt_se_owner = owner;
+ n_left -= 8;
+ cqe += 8;
+ }
+ while (n_left)
+ {
+ cqe[0].opcode_cqefmt_se_owner = owner;
+ n_left--;
+ cqe++;
+ }
+}
+
+static_always_inline void
+compressed_cqe_reset_owner (rdma_rxq_t * rxq, u32 n_mini_cqes, u32 cq_ci,
+ u32 mask, u32 log2_cq_size)
+{
+ u8 owner;
+ u32 offset, cq_size = 1 << log2_cq_size;
+
+
+ /* first CQE is reset by hardware */
+ cq_ci++;
+ n_mini_cqes--;
+
+ offset = cq_ci & mask;
+ owner = 0xf0 | ((cq_ci >> log2_cq_size) & 1);
+
+ if (offset + n_mini_cqes < cq_size)
+ {
+ cqe_set_owner (rxq->cqes + offset, n_mini_cqes, owner);
+ }
+ else
+ {
+ u32 n = cq_size - offset;
+ cqe_set_owner (rxq->cqes + offset, n, owner);
+ cqe_set_owner (rxq->cqes, n_mini_cqes - n, owner ^ 1);
+ }
+
+}
+
+static_always_inline uword
+rdma_device_poll_cq_mlx5dv (rdma_device_t * rd, rdma_rxq_t * rxq,
+ u32 * byte_cnt, u16 * cqe_flags)
+{
+ u32 n_rx_packets = 0;
+ u32 log2_cq_size = rxq->log2_cq_size;
+ u32 mask = pow2_mask (log2_cq_size);
+ u32 cq_ci = rxq->cq_ci;
+
+ if (rxq->n_mini_cqes_left)
+ {
+ /* partially processed mini-cqe array */
+ u32 n_mini_cqes = rxq->n_mini_cqes;
+ u32 n_mini_cqes_left = rxq->n_mini_cqes_left;
+ process_mini_cqes (rxq, n_mini_cqes - n_mini_cqes_left,
+ n_mini_cqes_left, cq_ci, mask, byte_cnt);
+ compressed_cqe_reset_owner (rxq, n_mini_cqes, cq_ci, mask,
+ log2_cq_size);
+ clib_memset_u16 (cqe_flags, rxq->last_cqe_flags, n_mini_cqes_left);
+ n_rx_packets = n_mini_cqes_left;
+ byte_cnt += n_mini_cqes_left;
+ cqe_flags += n_mini_cqes_left;
+ rxq->n_mini_cqes_left = 0;
+ rxq->cq_ci = cq_ci = cq_ci + n_mini_cqes;
+ }
+
+ while (n_rx_packets < VLIB_FRAME_SIZE)
+ {
+ u8 cqe_last_byte, owner;
+ mlx5dv_cqe_t *cqe = rxq->cqes + (cq_ci & mask);
+
+ clib_prefetch_load (rxq->cqes + ((cq_ci + 8) & mask));
+
+ owner = (cq_ci >> log2_cq_size) & 1;
+ cqe_last_byte = cqe->opcode_cqefmt_se_owner;
+
+ if ((cqe_last_byte & 0x1) != owner)
+ break;
+
+ cqe_last_byte &= 0xfe; /* remove owner bit */
+
+ if (cqe_last_byte == 0x2c)
+ {
+ u32 n_mini_cqes = clib_net_to_host_u32 (cqe->mini_cqe_num);
+ u32 n_left = VLIB_FRAME_SIZE - n_rx_packets;
+ u16 flags = cqe->flags;
+
+ if (n_left >= n_mini_cqes)
+ {
+ process_mini_cqes (rxq, 0, n_mini_cqes, cq_ci, mask, byte_cnt);
+ clib_memset_u16 (cqe_flags, flags, n_mini_cqes);
+ compressed_cqe_reset_owner (rxq, n_mini_cqes, cq_ci, mask,
+ log2_cq_size);
+ n_rx_packets += n_mini_cqes;
+ byte_cnt += n_mini_cqes;
+ cqe_flags += n_mini_cqes;
+ cq_ci += n_mini_cqes;
+ }
+ else
+ {
+ process_mini_cqes (rxq, 0, n_left, cq_ci, mask, byte_cnt);
+ clib_memset_u16 (cqe_flags, flags, n_left);
+ n_rx_packets = VLIB_FRAME_SIZE;
+ rxq->n_mini_cqes = n_mini_cqes;
+ rxq->n_mini_cqes_left = n_mini_cqes - n_left;
+ rxq->last_cqe_flags = flags;
+ goto done;
+ }
+ continue;
+ }
+
+ if (cqe_last_byte == 0x20)
+ {
+ byte_cnt[0] = cqe->byte_cnt;
+ cqe_flags[0] = cqe->flags;
+ n_rx_packets++;
+ cq_ci++;
+ byte_cnt++;
+ continue;
+ }
+
+ rd->flags |= RDMA_DEVICE_F_ERROR;
+ break;
+ }
+
+done:
+ if (n_rx_packets)
+ rxq->cq_db[0] = rxq->cq_ci = cq_ci;
+ return n_rx_packets;
+}
+