X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fplugins%2Frdma%2Foutput.c;h=afc48451137df7d4662447f3d8e84e0160e68096;hb=94b80770ffd35c20beb303bc2a6e81b2c6163ba6;hp=015208c023e8ee12a9f5abd58d6d309b218b6d24;hpb=dc812d9a71f2f5105e4aaba50fd98ea3b0b50a9b;p=vpp.git diff --git a/src/plugins/rdma/output.c b/src/plugins/rdma/output.c index 015208c023e..afc48451137 100644 --- a/src/plugins/rdma/output.c +++ b/src/plugins/rdma/output.c @@ -23,10 +23,6 @@ #include #include -#ifndef MLX5_ETH_L2_INLINE_HEADER_SIZE -#define MLX5_ETH_L2_INLINE_HEADER_SIZE 18 -#endif - #define RDMA_TX_RETRIES 5 #define RDMA_TXQ_DV_DSEG_SZ(txq) (RDMA_MLX5_WQE_DS * RDMA_TXQ_DV_SQ_SZ(txq)) @@ -104,8 +100,7 @@ rdma_device_output_tx_mlx5_doorbell (rdma_txq_t * txq, rdma_mlx5_wqe_t * last, CLIB_MEMORY_STORE_BARRIER (); txq->dv_sq_dbrec[MLX5_SND_DBR] = htobe32 (tail); CLIB_COMPILER_BARRIER (); - txq->dv_sq_db[0] = *(u64 *) (txq->dv_sq_wqes + (txq->tail & sq_mask)); - txq->tail = tail; + txq->dv_sq_db[0] = *(u64 *) last; } static_always_inline void @@ -113,18 +108,32 @@ rdma_mlx5_wqe_init (rdma_mlx5_wqe_t * wqe, const void *tmpl, vlib_buffer_t * b, const u16 tail) { u16 sz = b->current_length; - u16 inline_sz = clib_min (sz, MLX5_ETH_L2_INLINE_HEADER_SIZE); + const void *cur = vlib_buffer_get_current (b); + uword addr = pointer_to_uword (cur); clib_memcpy_fast (wqe, tmpl, RDMA_MLX5_WQE_SZ); - - wqe->ctrl.opmod_idx_opcode |= ((u32) htobe16 (tail)) << 8; /* speculatively copy at least MLX5_ETH_L2_INLINE_HEADER_SIZE (18-bytes) */ - const void *cur = vlib_buffer_get_current (b); - clib_memcpy_fast (wqe->eseg.inline_hdr_start, - cur, MLX5_ETH_L2_INLINE_HEADER_SIZE); - wqe->eseg.inline_hdr_sz = htobe16 (inline_sz); - wqe->dseg.byte_count = htobe32 (sz - inline_sz); - wqe->dseg.addr = htobe64 (pointer_to_uword (cur) + inline_sz); + STATIC_ASSERT (STRUCT_SIZE_OF (struct mlx5_wqe_eth_seg, inline_hdr_start) + + STRUCT_SIZE_OF (struct mlx5_wqe_eth_seg, + inline_hdr) >= + MLX5_ETH_L2_INLINE_HEADER_SIZE, "wrong size"); + clib_memcpy_fast (wqe->eseg.inline_hdr_start, cur, + MLX5_ETH_L2_INLINE_HEADER_SIZE); + + wqe->wqe_index_lo = tail; + wqe->wqe_index_hi = tail >> 8; + if (PREDICT_TRUE (sz >= MLX5_ETH_L2_INLINE_HEADER_SIZE)) + { + /* inline_hdr_sz is set to MLX5_ETH_L2_INLINE_HEADER_SIZE + in the template */ + wqe->dseg.byte_count = htobe32 (sz - MLX5_ETH_L2_INLINE_HEADER_SIZE); + wqe->dseg.addr = htobe64 (addr + MLX5_ETH_L2_INLINE_HEADER_SIZE); + } + else + { + /* dseg.byte_count and desg.addr are set to 0 in the template */ + wqe->eseg.inline_hdr_sz = htobe16 (sz); + } } /* @@ -146,10 +155,11 @@ rdma_device_output_tx_mlx5_chained (vlib_main_t * vm, u32 sq_mask = pow2_mask (txq->dv_sq_log2sz); u32 mask = pow2_mask (txq->bufs_log2sz); u32 dseg_mask = RDMA_TXQ_DV_DSEG_SZ (txq) - 1; - const u32 lkey = wqe[0].dseg.lkey; + const u32 lkey = clib_host_to_net_u32 (rd->lkey); - vlib_buffer_copy_indices (txq->bufs + (txq->tail & mask), bi, - n_left_from - n); + vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask, + RDMA_TXQ_BUF_SZ (txq), n_left_from - n); + bi += n_left_from - n; while (n >= 1 && wqe_n >= 1) { @@ -255,10 +265,10 @@ rdma_device_output_tx_mlx5_chained (vlib_main_t * vm, n -= 1; } - if (n == n_left_from) - return 0; /* we fail to enqueue even a single packet */ + if (n != n_left_from) + rdma_device_output_tx_mlx5_doorbell (txq, last, tail, sq_mask); - rdma_device_output_tx_mlx5_doorbell (txq, last, tail, sq_mask); + txq->tail = tail; return n_left_from - n; } @@ -269,15 +279,23 @@ rdma_device_output_tx_mlx5 (vlib_main_t * vm, const u32 n_left_from, u32 * bi, vlib_buffer_t ** b) { + u32 sq_mask = pow2_mask (txq->dv_sq_log2sz); u32 mask = pow2_mask (txq->bufs_log2sz); - rdma_mlx5_wqe_t *wqe = txq->dv_sq_wqes + (txq->tail & sq_mask); - u32 n = n_left_from; + rdma_mlx5_wqe_t *wqe; + u32 n, n_wrap; u16 tail = txq->tail; ASSERT (RDMA_TXQ_BUF_SZ (txq) <= RDMA_TXQ_DV_SQ_SZ (txq)); - while (n >= 4) + /* avoid wrap-around logic in core loop */ + n = clib_min (n_left_from, RDMA_TXQ_BUF_SZ (txq) - (tail & mask)); + n_wrap = n_left_from - n; + +wrap_around: + wqe = txq->dv_sq_wqes + (tail & sq_mask); + + while (n >= 8) { u32 flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags; if (PREDICT_FALSE (flags & VLIB_BUFFER_NEXT_PRESENT)) @@ -285,18 +303,16 @@ rdma_device_output_tx_mlx5 (vlib_main_t * vm, n_left_from, n, bi, b, wqe, tail); - if (PREDICT_TRUE (n >= 8)) - { - vlib_prefetch_buffer_header (b + 4, LOAD); - vlib_prefetch_buffer_header (b + 5, LOAD); - vlib_prefetch_buffer_header (b + 6, LOAD); - vlib_prefetch_buffer_header (b + 7, LOAD); - clib_prefetch_load (wqe + 4); - } - + vlib_prefetch_buffer_header (b[4], LOAD); rdma_mlx5_wqe_init (wqe + 0, txq->dv_wqe_tmpl, b[0], tail + 0); + + vlib_prefetch_buffer_header (b[5], LOAD); rdma_mlx5_wqe_init (wqe + 1, txq->dv_wqe_tmpl, b[1], tail + 1); + + vlib_prefetch_buffer_header (b[6], LOAD); rdma_mlx5_wqe_init (wqe + 2, txq->dv_wqe_tmpl, b[2], tail + 2); + + vlib_prefetch_buffer_header (b[7], LOAD); rdma_mlx5_wqe_init (wqe + 3, txq->dv_wqe_tmpl, b[3], tail + 3); b += 4; @@ -320,9 +336,17 @@ rdma_device_output_tx_mlx5 (vlib_main_t * vm, n -= 1; } - vlib_buffer_copy_indices (txq->bufs + (txq->tail & mask), bi, n_left_from); + if (n_wrap) + { + n = n_wrap; + n_wrap = 0; + goto wrap_around; + } rdma_device_output_tx_mlx5_doorbell (txq, &wqe[-1], tail, sq_mask); + vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask, + RDMA_TXQ_BUF_SZ (txq), n_left_from); + txq->tail = tail; return n_left_from; } @@ -369,61 +393,52 @@ rdma_device_output_tx_ibverb (vlib_main_t * vm, const rdma_device_t * rd, rdma_txq_t * txq, u32 n_left_from, u32 * bi, vlib_buffer_t ** b) { + const u32 mask = pow2_mask (txq->bufs_log2sz); struct ibv_send_wr wr[VLIB_FRAME_SIZE], *w = wr; struct ibv_sge sge[VLIB_FRAME_SIZE], *s = sge; - u32 mask = txq->bufs_log2sz; u32 n = n_left_from; - memset (w, 0, n_left_from * sizeof (w[0])); - - while (n >= 4) + while (n >= 8) { - if (PREDICT_TRUE (n >= 8)) - { - vlib_prefetch_buffer_header (b[4 + 0], LOAD); - vlib_prefetch_buffer_header (b[4 + 1], LOAD); - vlib_prefetch_buffer_header (b[4 + 2], LOAD); - vlib_prefetch_buffer_header (b[4 + 3], LOAD); - - CLIB_PREFETCH (&s[4 + 0], 4 * sizeof (s[0]), STORE); - - CLIB_PREFETCH (&w[4 + 0], CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (&w[4 + 1], CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (&w[4 + 2], CLIB_CACHE_LINE_BYTES, STORE); - CLIB_PREFETCH (&w[4 + 3], CLIB_CACHE_LINE_BYTES, STORE); - } - + vlib_prefetch_buffer_header (b[4], LOAD); s[0].addr = vlib_buffer_get_current_va (b[0]); s[0].length = b[0]->current_length; s[0].lkey = rd->lkey; + vlib_prefetch_buffer_header (b[5], LOAD); s[1].addr = vlib_buffer_get_current_va (b[1]); s[1].length = b[1]->current_length; s[1].lkey = rd->lkey; + vlib_prefetch_buffer_header (b[6], LOAD); s[2].addr = vlib_buffer_get_current_va (b[2]); s[2].length = b[2]->current_length; s[2].lkey = rd->lkey; + vlib_prefetch_buffer_header (b[7], LOAD); s[3].addr = vlib_buffer_get_current_va (b[3]); s[3].length = b[3]->current_length; s[3].lkey = rd->lkey; + clib_memset_u8 (&w[0], 0, sizeof (w[0])); w[0].next = &w[0] + 1; w[0].sg_list = &s[0]; w[0].num_sge = 1; w[0].opcode = IBV_WR_SEND; + clib_memset_u8 (&w[1], 0, sizeof (w[1])); w[1].next = &w[1] + 1; w[1].sg_list = &s[1]; w[1].num_sge = 1; w[1].opcode = IBV_WR_SEND; + clib_memset_u8 (&w[2], 0, sizeof (w[2])); w[2].next = &w[2] + 1; w[2].sg_list = &s[2]; w[2].num_sge = 1; w[2].opcode = IBV_WR_SEND; + clib_memset_u8 (&w[3], 0, sizeof (w[3])); w[3].next = &w[3] + 1; w[3].sg_list = &s[3]; w[3].num_sge = 1; @@ -441,6 +456,7 @@ rdma_device_output_tx_ibverb (vlib_main_t * vm, s[0].length = b[0]->current_length; s[0].lkey = rd->lkey; + clib_memset_u8 (&w[0], 0, sizeof (w[0])); w[0].next = &w[0] + 1; w[0].sg_list = &s[0]; w[0].num_sge = 1; @@ -463,8 +479,8 @@ rdma_device_output_tx_ibverb (vlib_main_t * vm, n_left_from - (w - wr)); n_left_from = w - wr; } - - vlib_buffer_copy_indices (txq->bufs + (txq->tail & mask), bi, n_left_from); + vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask, + RDMA_TXQ_BUF_SZ (txq), n_left_from); txq->tail += n_left_from; return n_left_from; } @@ -489,45 +505,33 @@ rdma_device_output_tx_try (vlib_main_t * vm, const vlib_node_runtime_t * node, u32 n_left_from, u32 * bi, int is_mlx5dv) { vlib_buffer_t *b[VLIB_FRAME_SIZE]; - u32 mask = pow2_mask (txq->bufs_log2sz); /* do not enqueue more packet than ring space */ n_left_from = clib_min (n_left_from, RDMA_TXQ_AVAIL_SZ (txq, txq->head, txq->tail)); - /* avoid wrap-around logic in core loop */ - n_left_from = clib_min (n_left_from, RDMA_TXQ_BUF_SZ (txq) - - (txq->tail & mask)); - /* if ring is full, do nothing */ if (PREDICT_FALSE (n_left_from == 0)) return 0; vlib_get_buffers (vm, bi, b, n_left_from); - return is_mlx5dv ? - rdma_device_output_tx_mlx5 (vm, node, rd, txq, n_left_from, bi, b) : - rdma_device_output_tx_ibverb (vm, node, rd, txq, n_left_from, bi, b); + n_left_from = is_mlx5dv ? + rdma_device_output_tx_mlx5 (vm, node, rd, txq, n_left_from, bi, + b) : rdma_device_output_tx_ibverb (vm, node, + rd, txq, + n_left_from, + bi, b); + + return n_left_from; } static_always_inline uword -rdma_device_output_tx (vlib_main_t * vm, vlib_node_runtime_t * node, - vlib_frame_t * frame, rdma_device_t * rd, - int is_mlx5dv) +rdma_device_output_tx (vlib_main_t *vm, vlib_node_runtime_t *node, + rdma_device_t *rd, rdma_txq_t *txq, u32 *from, + u32 n_left_from, int is_mlx5dv) { - u32 thread_index = vm->thread_index; - rdma_txq_t *txq = - vec_elt_at_index (rd->txqs, thread_index % vec_len (rd->txqs)); - u32 *from; - u32 n_left_from; int i; - ASSERT (RDMA_TXQ_BUF_SZ (txq) >= VLIB_FRAME_SIZE); - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - - clib_spinlock_lock_if_init (&txq->lock); - for (i = 0; i < RDMA_TX_RETRIES && n_left_from > 0; i++) { u32 n_enq; @@ -539,16 +543,7 @@ rdma_device_output_tx (vlib_main_t * vm, vlib_node_runtime_t * node, from += n_enq; } - clib_spinlock_unlock_if_init (&txq->lock); - - if (PREDICT_FALSE (n_left_from)) - { - vlib_buffer_free (vm, from, n_left_from); - vlib_error_count (vm, node->node_index, - RDMA_TX_ERROR_NO_FREE_SLOTS, n_left_from); - } - - return frame->n_vectors - n_left_from; + return n_left_from; } VNET_DEVICE_CLASS_TX_FN (rdma_device_class) (vlib_main_t * vm, @@ -558,11 +553,34 @@ VNET_DEVICE_CLASS_TX_FN (rdma_device_class) (vlib_main_t * vm, rdma_main_t *rm = &rdma_main; vnet_interface_output_runtime_t *ord = (void *) node->runtime_data; rdma_device_t *rd = pool_elt_at_index (rm->devices, ord->dev_instance); + rdma_txq_t *txq = + vec_elt_at_index (rd->txqs, vm->thread_index % vec_len (rd->txqs)); + u32 *from, n_buffers, n_left; + + ASSERT (RDMA_TXQ_BUF_SZ (txq) >= VLIB_FRAME_SIZE); + + from = vlib_frame_vector_args (frame); + n_buffers = frame->n_vectors; + + clib_spinlock_lock_if_init (&txq->lock); if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV)) - return rdma_device_output_tx (vm, node, frame, rd, 1 /* is_mlx5dv */ ); + n_left = rdma_device_output_tx (vm, node, rd, txq, from, n_buffers, + 1 /* is_mlx5dv */); + else + n_left = rdma_device_output_tx (vm, node, rd, txq, from, n_buffers, + 0 /* is_mlx5dv */); + + clib_spinlock_unlock_if_init (&txq->lock); + + if (PREDICT_FALSE (n_left)) + { + vlib_buffer_free (vm, from + n_buffers - n_left, n_left); + vlib_error_count (vm, node->node_index, RDMA_TX_ERROR_NO_FREE_SLOTS, + n_left); + } - return rdma_device_output_tx (vm, node, frame, rd, 0 /* is_mlx5dv */ ); + return n_buffers - n_left; } /*