rdma: fix chained buffer tx path in dv mode 52/32952/2
authorBenoît Ganne <bganne@cisco.com>
Fri, 25 Jun 2021 15:49:01 +0000 (17:49 +0200)
committerDamjan Marion <dmarion@me.com>
Wed, 30 Jun 2021 18:58:42 +0000 (18:58 +0000)
When switching to the direct verb chain buffer tx path, we must account
for all remaining packets, including the packets that would wrapped
around.
Previously we were using the 'n' counter but ignoring the 'n_wrap'
counter: if some packets would have wrapped around in the default path,
it would be ignored by the chained buffer tx path.
Compute the correct number of remaining packets based on the old and
current txq tail instead.
Also simplify the chained tx function parameters.

Type: fix

Change-Id: If12b41a8f143fda80290342e2904792f7501c559
Signed-off-by: Benoît Ganne <bganne@cisco.com>
src/plugins/rdma/output.c

index afc4845..3cc3ab7 100644 (file)
@@ -143,23 +143,22 @@ rdma_mlx5_wqe_init (rdma_mlx5_wqe_t * wqe, const void *tmpl,
  * plenty of descriptors available
  */
 static_always_inline u32
-rdma_device_output_tx_mlx5_chained (vlib_main_t * vm,
-                                   const vlib_node_runtime_t * node,
-                                   const rdma_device_t * rd,
-                                   rdma_txq_t * txq, u32 n_left_from, u32 n,
-                                   u32 * bi, vlib_buffer_t ** b,
-                                   rdma_mlx5_wqe_t * wqe, u16 tail)
+rdma_device_output_tx_mlx5_chained (vlib_main_t *vm,
+                                   const vlib_node_runtime_t *node,
+                                   const rdma_device_t *rd, rdma_txq_t *txq,
+                                   const u32 n_left_from, const u32 *bi,
+                                   vlib_buffer_t **b, u16 tail)
 {
-  rdma_mlx5_wqe_t *last = wqe;
   u32 wqe_n = RDMA_TXQ_AVAIL_SZ (txq, txq->head, tail);
   u32 sq_mask = pow2_mask (txq->dv_sq_log2sz);
   u32 mask = pow2_mask (txq->bufs_log2sz);
   u32 dseg_mask = RDMA_TXQ_DV_DSEG_SZ (txq) - 1;
   const u32 lkey = clib_host_to_net_u32 (rd->lkey);
+  const u32 done = RDMA_TXQ_USED_SZ (txq->tail, tail);
+  u32 n = n_left_from - done;
+  rdma_mlx5_wqe_t *last = txq->dv_sq_wqes + (tail & sq_mask);
 
-  vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask,
-                                   RDMA_TXQ_BUF_SZ (txq), n_left_from - n);
-  bi += n_left_from - n;
+  bi += done;
 
   while (n >= 1 && wqe_n >= 1)
     {
@@ -273,11 +272,10 @@ rdma_device_output_tx_mlx5_chained (vlib_main_t * vm,
 }
 
 static_always_inline u32
-rdma_device_output_tx_mlx5 (vlib_main_t * vm,
-                           const vlib_node_runtime_t * node,
-                           const rdma_device_t * rd, rdma_txq_t * txq,
-                           const u32 n_left_from, u32 * bi,
-                           vlib_buffer_t ** b)
+rdma_device_output_tx_mlx5 (vlib_main_t *vm, const vlib_node_runtime_t *node,
+                           const rdma_device_t *rd, rdma_txq_t *txq,
+                           const u32 n_left_from, const u32 *bi,
+                           vlib_buffer_t **b)
 {
 
   u32 sq_mask = pow2_mask (txq->dv_sq_log2sz);
@@ -300,8 +298,7 @@ wrap_around:
       u32 flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
       if (PREDICT_FALSE (flags & VLIB_BUFFER_NEXT_PRESENT))
        return rdma_device_output_tx_mlx5_chained (vm, node, rd, txq,
-                                                  n_left_from, n, bi, b, wqe,
-                                                  tail);
+                                                  n_left_from, bi, b, tail);
 
       vlib_prefetch_buffer_header (b[4], LOAD);
       rdma_mlx5_wqe_init (wqe + 0, txq->dv_wqe_tmpl, b[0], tail + 0);
@@ -325,8 +322,7 @@ wrap_around:
     {
       if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT))
        return rdma_device_output_tx_mlx5_chained (vm, node, rd, txq,
-                                                  n_left_from, n, bi, b, wqe,
-                                                  tail);
+                                                  n_left_from, bi, b, tail);
 
       rdma_mlx5_wqe_init (wqe, txq->dv_wqe_tmpl, b[0], tail);
 
@@ -344,8 +340,6 @@ wrap_around:
     }
 
   rdma_device_output_tx_mlx5_doorbell (txq, &wqe[-1], tail, sq_mask);
-  vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask,
-                                   RDMA_TXQ_BUF_SZ (txq), n_left_from);
   txq->tail = tail;
   return n_left_from;
 }
@@ -393,7 +387,6 @@ rdma_device_output_tx_ibverb (vlib_main_t * vm,
                              const rdma_device_t * rd, rdma_txq_t * txq,
                              u32 n_left_from, u32 * bi, vlib_buffer_t ** b)
 {
-  const u32 mask = pow2_mask (txq->bufs_log2sz);
   struct ibv_send_wr wr[VLIB_FRAME_SIZE], *w = wr;
   struct ibv_sge sge[VLIB_FRAME_SIZE], *s = sge;
   u32 n = n_left_from;
@@ -479,8 +472,6 @@ rdma_device_output_tx_ibverb (vlib_main_t * vm,
                        n_left_from - (w - wr));
       n_left_from = w - wr;
     }
-  vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask,
-                                   RDMA_TXQ_BUF_SZ (txq), n_left_from);
   txq->tail += n_left_from;
   return n_left_from;
 }
@@ -505,6 +496,7 @@ rdma_device_output_tx_try (vlib_main_t * vm, const vlib_node_runtime_t * node,
                           u32 n_left_from, u32 * bi, int is_mlx5dv)
 {
   vlib_buffer_t *b[VLIB_FRAME_SIZE];
+  const u32 mask = pow2_mask (txq->bufs_log2sz);
 
   /* do not enqueue more packet than ring space */
   n_left_from = clib_min (n_left_from, RDMA_TXQ_AVAIL_SZ (txq, txq->head,
@@ -513,6 +505,10 @@ rdma_device_output_tx_try (vlib_main_t * vm, const vlib_node_runtime_t * node,
   if (PREDICT_FALSE (n_left_from == 0))
     return 0;
 
+  /* speculatively copy buffer indices */
+  vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask,
+                                   RDMA_TXQ_BUF_SZ (txq), n_left_from);
+
   vlib_get_buffers (vm, bi, b, n_left_from);
 
   n_left_from = is_mlx5dv ?