4 * Copyright 2016 6WIND S.A.
5 * Copyright 2016 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
37 #pragma GCC diagnostic ignored "-Wpedantic"
39 #include <infiniband/verbs.h>
41 #pragma GCC diagnostic error "-Wpedantic"
44 #include <rte_mempool.h>
45 #include <rte_malloc.h>
48 #include "mlx5_rxtx.h"
50 struct mr_update_mempool_data {
51 struct rte_eth_dev *dev;
52 struct mlx5_mr_cache *lkp_tbl;
57 * Look up LKEY from given lookup table by Binary Search, store the last index
58 * and return searched LKEY.
61 * Pointer to lookup table.
63 * Size of lookup table.
65 * Pointer to index. Even on searh failure, returns index where it stops
66 * searching so that index can be used when inserting a new entry.
71 * Searched LKEY on success, UINT32_MAX on no match.
74 mlx5_mr_lookup(struct mlx5_mr_cache *lkp_tbl, uint16_t n, uint16_t *idx,
79 /* First entry must be NULL for comparison. */
80 assert(n == 0 || (lkp_tbl[0].start == 0 &&
81 lkp_tbl[0].lkey == UINT32_MAX));
84 register uint16_t delta = n >> 1;
86 if (addr < lkp_tbl[base + delta].start) {
93 assert(addr >= lkp_tbl[base].start);
95 if (addr < lkp_tbl[base].end)
96 return lkp_tbl[base].lkey;
102 * Insert an entry to LKEY lookup table.
105 * Pointer to lookup table. The size of array must be enough to add one more
108 * Size of lookup table.
110 * Pointer to new entry to insert.
113 * Size of returning lookup table.
116 mlx5_mr_insert(struct mlx5_mr_cache *lkp_tbl, uint16_t n,
117 struct mlx5_mr_cache *entry)
122 /* Check if entry exist. */
123 if (mlx5_mr_lookup(lkp_tbl, n, &idx, entry->start) != UINT32_MAX)
127 shift = (n - idx) * sizeof(struct mlx5_mr_cache);
129 memmove(&lkp_tbl[idx + 1], &lkp_tbl[idx], shift);
130 lkp_tbl[idx] = *entry;
131 DRV_LOG(DEBUG, "%p: inserted lkp_tbl[%u], start = 0x%lx, end = 0x%lx",
132 (void *)lkp_tbl, idx, lkp_tbl[idx].start, lkp_tbl[idx].end);
137 * Incrementally update LKEY lookup table for a specific address from registered
141 * Pointer to Ethernet device structure.
143 * Pointer to lookup table to fill. The size of array must be at least
146 * Size of lookup table.
151 * Size of returning lookup table.
154 mlx5_mr_update_addr(struct rte_eth_dev *dev, struct mlx5_mr_cache *lkp_tbl,
155 uint16_t n, uintptr_t addr)
157 struct priv *priv = dev->data->dev_private;
159 uint32_t ret __rte_unused;
162 /* First entry must be NULL for comparison. */
163 lkp_tbl[n++] = (struct mlx5_mr_cache) {
167 ret = mlx5_mr_lookup(*priv->mr_cache, MR_TABLE_SZ(priv->mr_n),
169 /* Lookup must succeed, the global cache is all-inclusive. */
170 assert(ret != UINT32_MAX);
171 DRV_LOG(DEBUG, "port %u adding LKEY (0x%x) for addr 0x%lx",
172 dev->data->port_id, (*priv->mr_cache)[idx].lkey, addr);
173 return mlx5_mr_insert(lkp_tbl, n, &(*priv->mr_cache)[idx]);
177 * Bottom-half of LKEY search on datapath. Firstly search in cache_bh[] and if
178 * misses, search in the global MR cache table and update the new entry to
179 * per-queue local caches.
182 * Pointer to Ethernet device structure.
184 * Pointer to per-queue MR control structure.
191 static inline uint32_t
192 mlx5_mr_mb2mr_bh(struct rte_eth_dev *dev, struct mlx5_mr_ctrl *mr_ctrl,
197 struct mlx5_mr_cache *mr_cache = &mr_ctrl->cache[mr_ctrl->head];
199 /* Binary-search MR translation table. */
200 lkey = mlx5_mr_lookup(*mr_ctrl->cache_bh, mr_ctrl->bh_n, &bh_idx, addr);
201 if (likely(lkey != UINT32_MAX)) {
203 *mr_cache = (*mr_ctrl->cache_bh)[bh_idx];
204 mr_ctrl->mru = mr_ctrl->head;
205 /* Point to the next victim, the oldest. */
206 mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N;
209 /* Missed in the per-queue lookup table. Search in the global cache. */
210 mr_ctrl->bh_n = mlx5_mr_update_addr(dev, *mr_ctrl->cache_bh,
211 mr_ctrl->bh_n, addr);
212 /* Search again with updated entries. */
213 lkey = mlx5_mr_lookup(*mr_ctrl->cache_bh, mr_ctrl->bh_n, &bh_idx, addr);
214 /* Must always succeed. */
215 assert(lkey != UINT32_MAX);
217 *mr_cache = (*mr_ctrl->cache_bh)[bh_idx];
218 mr_ctrl->mru = mr_ctrl->head;
219 /* Point to the next victim, the oldest. */
220 mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N;
225 * Bottom-half of mlx5_rx_mb2mr() if search on mr_cache_bh[] fails.
228 * Pointer to Rx queue structure.
236 mlx5_rx_mb2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr)
238 struct mlx5_rxq_ctrl *rxq_ctrl =
239 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
242 "port %u not found in rxq->mr_cache[], last-hit=%u, head=%u",
243 PORT_ID(rxq_ctrl->priv), rxq->mr_ctrl.mru, rxq->mr_ctrl.head);
244 return mlx5_mr_mb2mr_bh(ETH_DEV(rxq_ctrl->priv), &rxq->mr_ctrl, addr);
248 * Bottom-half of mlx5_tx_mb2mr() if search on cache_bh[] fails.
251 * Pointer to Tx queue structure.
259 mlx5_tx_mb2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr)
261 struct mlx5_txq_ctrl *txq_ctrl =
262 container_of(txq, struct mlx5_txq_ctrl, txq);
265 "port %u not found in txq->mr_cache[], last-hit=%u, head=%u",
266 PORT_ID(txq_ctrl->priv), txq->mr_ctrl.mru, txq->mr_ctrl.head);
267 return mlx5_mr_mb2mr_bh(ETH_DEV(txq_ctrl->priv), &txq->mr_ctrl, addr);
270 /* Called by mr_update_mempool() when iterating the memory chunks. */
272 mr_update_mempool_cb(struct rte_mempool *mp __rte_unused,
273 void *opaque, struct rte_mempool_memhdr *memhdr,
274 unsigned int mem_idx __rte_unused)
276 struct mr_update_mempool_data *data = opaque;
278 DRV_LOG(DEBUG, "port %u adding chunk[%u] of %s",
279 data->dev->data->port_id, mem_idx, mp->name);
281 mlx5_mr_update_addr(data->dev, data->lkp_tbl, data->tbl_sz,
282 (uintptr_t)memhdr->addr);
286 * Incrementally update LKEY lookup table for a specific Memory Pool from
287 * registered Memory Regions.
290 * Pointer to Ethernet device.
291 * @param[out] lkp_tbl
292 * Pointer to lookup table to fill. The size of array must be at least
293 * (priv->static_mr_n + 1).
295 * Size of lookup table.
297 * Pointer to Memory Pool.
300 * Size of returning lookup table.
303 mlx5_mr_update_mp(struct rte_eth_dev *dev, struct mlx5_mr_cache *lkp_tbl,
304 uint16_t n, struct rte_mempool *mp)
306 struct mr_update_mempool_data data = {
312 rte_mempool_mem_iter(mp, mr_update_mempool_cb, &data);
316 /* Called by qsort() to compare MR entries. */
318 mr_comp_addr(const void *m1, const void *m2)
320 const struct mlx5_mr *mi1 = m1;
321 const struct mlx5_mr *mi2 = m2;
323 if (mi1->memseg->addr < mi2->memseg->addr)
325 else if (mi1->memseg->addr > mi2->memseg->addr)
332 * Register entire physical memory to Verbs.
335 * Pointer to Ethernet device.
338 * 0 on success, a negative errno value otherwise and rte_errno is set.
341 mlx5_mr_register_memseg(struct rte_eth_dev *dev)
343 struct priv *priv = dev->data->dev_private;
344 const struct rte_memseg *ms = rte_eal_get_physmem_layout();
346 struct mlx5_mr_cache *mr_cache;
351 /* Count the existing memsegs in the system. */
352 for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i)
354 priv->mr = rte_calloc(__func__, priv->mr_n, sizeof(*mr), 0);
355 if (priv->mr == NULL) {
357 "port %u cannot allocate memory for array of static MR",
362 priv->mr_cache = rte_calloc(__func__, MR_TABLE_SZ(priv->mr_n),
363 sizeof(*mr_cache), 0);
364 if (priv->mr_cache == NULL) {
366 "port %u cannot allocate memory for array of MR cache",
372 for (i = 0; i < priv->mr_n; ++i) {
373 mr = &(*priv->mr)[i];
375 mr->ibv_mr = ibv_reg_mr(priv->pd,
376 mr->memseg->addr, mr->memseg->len,
377 IBV_ACCESS_LOCAL_WRITE);
378 if (mr->ibv_mr == NULL) {
379 rte_dump_physmem_layout(stderr);
380 DRV_LOG(ERR, "port %u cannot register memseg[%u]",
381 dev->data->port_id, i);
385 /* Sort by virtual address. */
386 qsort(*priv->mr, priv->mr_n, sizeof(struct mlx5_mr), mr_comp_addr);
387 /* First entry must be NULL for comparison. */
388 (*priv->mr_cache)[0] = (struct mlx5_mr_cache) {
391 /* Compile global all-inclusive MR cache table. */
392 for (i = 0; i < priv->mr_n; ++i) {
393 mr = &(*priv->mr)[i];
394 mr_cache = &(*priv->mr_cache)[i + 1];
395 /* Paranoid, mr[] must be sorted. */
396 assert(i == 0 || mr->memseg->addr > (mr - 1)->memseg->addr);
397 *mr_cache = (struct mlx5_mr_cache) {
398 .start = (uintptr_t)mr->memseg->addr,
399 .end = (uintptr_t)mr->memseg->addr + mr->memseg->len,
400 .lkey = rte_cpu_to_be_32(mr->ibv_mr->lkey)
405 for (i = 0; i < priv->mr_n; ++i) {
406 mr = &(*priv->mr)[i];
407 if (mr->ibv_mr != NULL)
408 ibv_dereg_mr(mr->ibv_mr);
411 rte_free(priv->mr_cache);
417 * Deregister all Memory Regions.
420 * Pointer to Ethernet device.
423 mlx5_mr_deregister_memseg(struct rte_eth_dev *dev)
425 struct priv *priv = dev->data->dev_private;
430 for (i = 0; i < priv->mr_n; ++i) {
433 mr = &(*priv->mr)[i];
434 /* Physical memory can't be changed dynamically. */
435 assert(mr->memseg != NULL);
436 assert(mr->ibv_mr != NULL);
437 ibv_dereg_mr(mr->ibv_mr);
440 rte_free(priv->mr_cache);
442 priv->mr_cache = NULL;