src/plugins/rdma/output.c

   1 /*
   2  *------------------------------------------------------------------
   3  * Copyright (c) 2018 Cisco and/or its affiliates.
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at:
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  *------------------------------------------------------------------
  16  */
  17
  18 #include <vlib/vlib.h>
  19 #include <vlib/unix/unix.h>
  20 #include <vlib/pci/pci.h>
  21 #include <vppinfra/ring.h>
  22 #include <vnet/ethernet/ethernet.h>
  23 #include <vnet/devices/devices.h>
  24
  25 #include <rdma/rdma.h>
  26
  27 static_always_inline void
  28 rdma_device_output_free (vlib_main_t * vm, rdma_txq_t * txq)
  29 {
  30   struct ibv_wc wc[VLIB_FRAME_SIZE];
  31   u32 to_free[VLIB_FRAME_SIZE];
  32   int n_free;
  33   int i;
  34
  35   n_free = ibv_poll_cq (txq->cq, VLIB_FRAME_SIZE, wc);
  36   if (n_free <= 0)
  37     return;
  38
  39   for (i = 0; i < n_free; i++)
  40     to_free[i] = wc[i].wr_id;
  41
  42   vlib_buffer_free (vm, to_free, n_free);
  43 }
  44
  45 VNET_DEVICE_CLASS_TX_FN (rdma_device_class) (vlib_main_t * vm,
  46                                              vlib_node_runtime_t * node,
  47                                              vlib_frame_t * frame)
  48 {
  49   rdma_main_t *rm = &rdma_main;
  50   vnet_interface_output_runtime_t *ord = (void *) node->runtime_data;
  51   rdma_device_t *rd = pool_elt_at_index (rm->devices, ord->dev_instance);
  52   u32 thread_index = vm->thread_index;
  53   rdma_txq_t *txq =
  54     vec_elt_at_index (rd->txqs, thread_index % vec_len (rd->txqs));
  55   u32 *from, *f, n_left_from;
  56   u32 n_tx_packets, n_tx_failed;
  57   vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
  58   struct ibv_send_wr wr[VLIB_FRAME_SIZE], *w = wr;
  59   struct ibv_sge sge[VLIB_FRAME_SIZE], *s = sge;
  60   int i;
  61
  62   f = from = vlib_frame_vector_args (frame);
  63   n_left_from = frame->n_vectors;
  64   vlib_get_buffers (vm, from, bufs, n_left_from);
  65
  66   memset (w, 0, n_left_from * sizeof (w[0]));
  67
  68   while (n_left_from >= 4)
  69     {
  70       if (PREDICT_TRUE (n_left_from >= 8))
  71         {
  72           vlib_prefetch_buffer_header (b[4 + 0], LOAD);
  73           vlib_prefetch_buffer_header (b[4 + 1], LOAD);
  74           vlib_prefetch_buffer_header (b[4 + 2], LOAD);
  75           vlib_prefetch_buffer_header (b[4 + 3], LOAD);
  76
  77           CLIB_PREFETCH (&s[4 + 0], 4 * sizeof (s[0]), STORE);
  78
  79           CLIB_PREFETCH (&w[4 + 0], CLIB_CACHE_LINE_BYTES, STORE);
  80           CLIB_PREFETCH (&w[4 + 1], CLIB_CACHE_LINE_BYTES, STORE);
  81           CLIB_PREFETCH (&w[4 + 2], CLIB_CACHE_LINE_BYTES, STORE);
  82           CLIB_PREFETCH (&w[4 + 3], CLIB_CACHE_LINE_BYTES, STORE);
  83         }
  84
  85       s[0].addr = vlib_buffer_get_current_va (b[0]);
  86       s[0].length = b[0]->current_length;
  87       s[0].lkey = rd->mr->lkey;
  88
  89       s[1].addr = vlib_buffer_get_current_va (b[1]);
  90       s[1].length = b[1]->current_length;
  91       s[1].lkey = rd->mr->lkey;
  92
  93       s[2].addr = vlib_buffer_get_current_va (b[2]);
  94       s[2].length = b[2]->current_length;
  95       s[2].lkey = rd->mr->lkey;
  96
  97       s[3].addr = vlib_buffer_get_current_va (b[3]);
  98       s[3].length = b[3]->current_length;
  99       s[3].lkey = rd->mr->lkey;
 100
 101       w[0].wr_id = f[0];
 102       w[0].next = &w[0] + 1;
 103       w[0].sg_list = &s[0];
 104       w[0].num_sge = 1;
 105       w[0].opcode = IBV_WR_SEND;
 106
 107       w[1].wr_id = f[1];
 108       w[1].next = &w[1] + 1;
 109       w[1].sg_list = &s[1];
 110       w[1].num_sge = 1;
 111       w[1].opcode = IBV_WR_SEND;
 112
 113       w[2].wr_id = f[2];
 114       w[2].next = &w[2] + 1;
 115       w[2].sg_list = &s[2];
 116       w[2].num_sge = 1;
 117       w[2].opcode = IBV_WR_SEND;
 118
 119       w[3].wr_id = f[3];
 120       w[3].next = &w[3] + 1;
 121       w[3].sg_list = &s[3];
 122       w[3].num_sge = 1;
 123       w[3].opcode = IBV_WR_SEND;
 124
 125       s += 4;
 126       f += 4;
 127       w += 4;
 128       b += 4;
 129       n_left_from -= 4;
 130     }
 131
 132   while (n_left_from >= 1)
 133     {
 134       s[0].addr = vlib_buffer_get_current_va (b[0]);
 135       s[0].length = b[0]->current_length;
 136       s[0].lkey = rd->mr->lkey;
 137
 138       w[0].wr_id = f[0];
 139       w[0].next = &w[0] + 1;
 140       w[0].sg_list = &s[0];
 141       w[0].num_sge = 1;
 142       w[0].opcode = IBV_WR_SEND;
 143
 144       s += 1;
 145       f += 1;
 146       w += 1;
 147       b += 1;
 148       n_left_from -= 1;
 149     }
 150
 151   w[-1].next = 0;               /* fix next pointer in WR linked-list last item */
 152
 153   w = wr;
 154   clib_spinlock_lock_if_init (&txq->lock);
 155   for (i = 0; i < 5; i++)
 156     {
 157       rdma_device_output_free (vm, txq);
 158       if (0 == ibv_post_send (txq->qp, w, &w))
 159         break;
 160     }
 161   clib_spinlock_unlock_if_init (&txq->lock);
 162
 163   n_tx_packets = w == wr ? frame->n_vectors : w - wr;
 164   n_tx_failed = frame->n_vectors - n_tx_packets;
 165
 166   if (PREDICT_FALSE (n_tx_failed))
 167     {
 168       vlib_buffer_free (vm, &from[n_tx_packets], n_tx_failed);
 169       vlib_error_count (vm, node->node_index,
 170                         RDMA_TX_ERROR_NO_FREE_SLOTS, n_tx_failed);
 171     }
 172
 173   return n_tx_packets;
 174 }
 175
 176 /*
 177  * fd.io coding-style-patch-verification: ON
 178  *
 179  * Local Variables:
 180  * eval: (c-set-style "gnu")
 181  * End:
 182  */