2 * Copyright (c) 2017-2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
19 #include <rte_config.h>
21 #include <rte_ethdev.h>
22 #include <rte_cryptodev.h>
24 #include <rte_version.h>
26 #include <vlib/vlib.h>
27 #include <dpdk/buffer.h>
29 STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE == RTE_PKTMBUF_HEADROOM,
30 "VLIB_BUFFER_PRE_DATA_SIZE must be equal to RTE_PKTMBUF_HEADROOM");
32 extern struct rte_mbuf *dpdk_mbuf_template_by_pool_index;
33 #ifndef CLIB_MARCH_VARIANT
34 struct rte_mempool **dpdk_mempool_by_buffer_pool_index = 0;
35 struct rte_mempool **dpdk_no_cache_mempool_by_buffer_pool_index = 0;
36 struct rte_mbuf *dpdk_mbuf_template_by_pool_index = 0;
39 dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp)
41 uword buffer_mem_start = vm->buffer_main->buffer_mem_start;
42 struct rte_mempool *mp, *nmp;
43 struct rte_pktmbuf_pool_private priv;
44 enum rte_iova_mode iova_mode;
49 sizeof (struct rte_mbuf) + sizeof (vlib_buffer_t) + bp->data_size;
51 /* create empty mempools */
52 vec_validate_aligned (dpdk_mempool_by_buffer_pool_index, bp->index,
53 CLIB_CACHE_LINE_BYTES);
54 vec_validate_aligned (dpdk_no_cache_mempool_by_buffer_pool_index, bp->index,
55 CLIB_CACHE_LINE_BYTES);
58 name = format (name, "vpp pool %u%c", bp->index, 0);
59 mp = rte_mempool_create_empty ((char *) name, bp->n_buffers,
60 elt_size, 512, sizeof (priv),
65 return clib_error_return (0,
66 "failed to create normal mempool for numa node %u",
69 vec_reset_length (name);
71 /* non-cached mempool */
72 name = format (name, "vpp pool %u (no cache)%c", bp->index, 0);
73 nmp = rte_mempool_create_empty ((char *) name, bp->n_buffers,
74 elt_size, 0, sizeof (priv),
78 rte_mempool_free (mp);
80 return clib_error_return (0,
81 "failed to create non-cache mempool for numa nude %u",
86 dpdk_mempool_by_buffer_pool_index[bp->index] = mp;
87 dpdk_no_cache_mempool_by_buffer_pool_index[bp->index] = nmp;
89 mp->pool_id = nmp->pool_id = bp->index;
91 rte_mempool_set_ops_byname (mp, "vpp", NULL);
92 rte_mempool_set_ops_byname (nmp, "vpp-no-cache", NULL);
94 /* Call the mempool priv initializer */
95 memset (&priv, 0, sizeof (priv));
96 priv.mbuf_data_room_size = VLIB_BUFFER_PRE_DATA_SIZE +
97 vlib_buffer_get_default_data_size (vm);
98 priv.mbuf_priv_size = VLIB_BUFFER_HDR_SIZE;
99 rte_pktmbuf_pool_init (mp, &priv);
100 rte_pktmbuf_pool_init (nmp, &priv);
102 iova_mode = rte_eal_iova_mode ();
104 /* populate mempool object buffer header */
105 for (i = 0; i < bp->n_buffers; i++)
107 struct rte_mempool_objhdr *hdr;
108 vlib_buffer_t *b = vlib_get_buffer (vm, bp->buffers[i]);
109 struct rte_mbuf *mb = rte_mbuf_from_vlib_buffer (b);
110 hdr = (struct rte_mempool_objhdr *) RTE_PTR_SUB (mb, sizeof (*hdr));
112 hdr->iova = (iova_mode == RTE_IOVA_VA) ?
113 pointer_to_uword (mb) : vlib_physmem_get_pa (vm, mb);
114 STAILQ_INSERT_TAIL (&mp->elt_list, hdr, next);
115 STAILQ_INSERT_TAIL (&nmp->elt_list, hdr, next);
116 mp->populated_size++;
117 nmp->populated_size++;
119 #if RTE_VERSION >= RTE_VERSION_NUM(22, 3, 0, 0)
120 mp->flags &= ~RTE_MEMPOOL_F_NON_IO;
123 /* call the object initializers */
124 rte_mempool_obj_iter (mp, rte_pktmbuf_init, 0);
126 /* create mbuf header tempate from the first buffer in the pool */
127 vec_validate_aligned (dpdk_mbuf_template_by_pool_index, bp->index,
128 CLIB_CACHE_LINE_BYTES);
129 clib_memcpy (vec_elt_at_index (dpdk_mbuf_template_by_pool_index, bp->index),
130 rte_mbuf_from_vlib_buffer (vlib_buffer_ptr_from_index
131 (buffer_mem_start, *bp->buffers,
132 0)), sizeof (struct rte_mbuf));
134 for (i = 0; i < bp->n_buffers; i++)
137 b = vlib_buffer_ptr_from_index (buffer_mem_start, bp->buffers[i], 0);
138 vlib_buffer_copy_template (b, &bp->buffer_template);
141 /* map DMA pages if at least one physical device exists */
142 if (rte_eth_dev_count_avail () || rte_cryptodev_count ())
146 vlib_physmem_map_t *pm;
149 pm = vlib_physmem_get_map (vm, bp->physmem_map_index);
150 page_sz = 1ULL << pm->log2_page_size;
152 for (i = 0; i < pm->n_pages; i++)
154 char *va = ((char *) pm->base) + i * page_sz;
155 uword pa = (iova_mode == RTE_IOVA_VA) ?
156 pointer_to_uword (va) : pm->page_table[i];
159 #if RTE_VERSION < RTE_VERSION_NUM(19, 11, 0, 0)
160 rte_vfio_dma_map (pointer_to_uword (va), pa, page_sz))
162 rte_vfio_container_dma_map (RTE_VFIO_DEFAULT_CONTAINER_FD,
163 pointer_to_uword (va), pa, page_sz))
167 struct rte_mempool_memhdr *memhdr;
168 memhdr = clib_mem_alloc (sizeof (*memhdr));
172 memhdr->len = page_sz;
176 STAILQ_INSERT_TAIL (&mp->mem_list, memhdr, next);
185 dpdk_ops_vpp_alloc (struct rte_mempool *mp)
192 dpdk_ops_vpp_free (struct rte_mempool *mp)
199 static_always_inline void
200 dpdk_ops_vpp_enqueue_one (vlib_buffer_t * bt, void *obj)
202 /* Only non-replicated packets (b->ref_count == 1) expected */
204 struct rte_mbuf *mb = obj;
205 vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
206 ASSERT (b->ref_count == 1);
207 ASSERT (b->buffer_pool_index == bt->buffer_pool_index);
208 vlib_buffer_copy_template (b, bt);
212 CLIB_MULTIARCH_FN (dpdk_ops_vpp_enqueue) (struct rte_mempool * mp,
213 void *const *obj_table, unsigned n)
215 const int batch_size = 32;
216 vlib_main_t *vm = vlib_get_main ();
218 u8 buffer_pool_index = mp->pool_id;
219 vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
220 u32 bufs[batch_size];
222 void *const *obj = obj_table;
224 vlib_buffer_copy_template (&bt, &bp->buffer_template);
228 dpdk_ops_vpp_enqueue_one (&bt, obj[0]);
229 dpdk_ops_vpp_enqueue_one (&bt, obj[1]);
230 dpdk_ops_vpp_enqueue_one (&bt, obj[2]);
231 dpdk_ops_vpp_enqueue_one (&bt, obj[3]);
238 dpdk_ops_vpp_enqueue_one (&bt, obj[0]);
243 while (n >= batch_size)
245 vlib_get_buffer_indices_with_offset (vm, (void **) obj_table, bufs,
247 sizeof (struct rte_mbuf));
248 vlib_buffer_pool_put (vm, buffer_pool_index, bufs, batch_size);
250 obj_table += batch_size;
255 vlib_get_buffer_indices_with_offset (vm, (void **) obj_table, bufs,
256 n, sizeof (struct rte_mbuf));
257 vlib_buffer_pool_put (vm, buffer_pool_index, bufs, n);
263 CLIB_MARCH_FN_REGISTRATION (dpdk_ops_vpp_enqueue);
265 static_always_inline void
266 dpdk_ops_vpp_enqueue_no_cache_one (vlib_main_t * vm, struct rte_mempool *old,
267 struct rte_mempool *new, void *obj,
270 struct rte_mbuf *mb = obj;
271 vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
273 if (clib_atomic_sub_fetch (&b->ref_count, 1) == 0)
275 u32 bi = vlib_get_buffer_index (vm, b);
276 vlib_buffer_copy_template (b, bt);
277 vlib_buffer_pool_put (vm, bt->buffer_pool_index, &bi, 1);
283 CLIB_MULTIARCH_FN (dpdk_ops_vpp_enqueue_no_cache) (struct rte_mempool * cmp,
284 void *const *obj_table,
287 vlib_main_t *vm = vlib_get_main ();
289 struct rte_mempool *mp;
290 mp = dpdk_mempool_by_buffer_pool_index[cmp->pool_id];
291 u8 buffer_pool_index = cmp->pool_id;
292 vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
293 vlib_buffer_copy_template (&bt, &bp->buffer_template);
297 dpdk_ops_vpp_enqueue_no_cache_one (vm, cmp, mp, obj_table[0], &bt);
298 dpdk_ops_vpp_enqueue_no_cache_one (vm, cmp, mp, obj_table[1], &bt);
299 dpdk_ops_vpp_enqueue_no_cache_one (vm, cmp, mp, obj_table[2], &bt);
300 dpdk_ops_vpp_enqueue_no_cache_one (vm, cmp, mp, obj_table[3], &bt);
307 dpdk_ops_vpp_enqueue_no_cache_one (vm, cmp, mp, obj_table[0], &bt);
315 CLIB_MARCH_FN_REGISTRATION (dpdk_ops_vpp_enqueue_no_cache);
317 static_always_inline void
318 dpdk_mbuf_init_from_template (struct rte_mbuf **mba, struct rte_mbuf *mt,
321 /* Assumptions about rte_mbuf layout */
322 STATIC_ASSERT_OFFSET_OF (struct rte_mbuf, buf_addr, 0);
323 STATIC_ASSERT_OFFSET_OF (struct rte_mbuf, buf_iova, 8);
324 STATIC_ASSERT_SIZEOF_ELT (struct rte_mbuf, buf_iova, 8);
325 STATIC_ASSERT_SIZEOF_ELT (struct rte_mbuf, buf_iova, 8);
326 STATIC_ASSERT_SIZEOF (struct rte_mbuf, 128);
330 struct rte_mbuf *mb = mba[0];
332 /* bytes 0 .. 15 hold buf_addr and buf_iova which we need to preserve */
333 /* copy bytes 16 .. 31 */
334 *((u8x16 *) mb + 1) = *((u8x16 *) mt + 1);
336 /* copy bytes 32 .. 127 */
337 #ifdef CLIB_HAVE_VEC256
338 for (i = 1; i < 4; i++)
339 *((u8x32 *) mb + i) = *((u8x32 *) mt + i);
341 for (i = 2; i < 8; i++)
342 *((u8x16 *) mb + i) = *((u8x16 *) mt + i);
349 CLIB_MULTIARCH_FN (dpdk_ops_vpp_dequeue) (struct rte_mempool * mp,
350 void **obj_table, unsigned n)
352 const int batch_size = 32;
353 vlib_main_t *vm = vlib_get_main ();
354 u32 bufs[batch_size], total = 0, n_alloc = 0;
355 u8 buffer_pool_index = mp->pool_id;
356 void **obj = obj_table;
357 struct rte_mbuf t = dpdk_mbuf_template_by_pool_index[buffer_pool_index];
359 while (n >= batch_size)
361 n_alloc = vlib_buffer_alloc_from_pool (vm, bufs, batch_size,
363 if (n_alloc != batch_size)
366 vlib_get_buffers_with_offset (vm, bufs, obj, batch_size,
367 -(i32) sizeof (struct rte_mbuf));
368 dpdk_mbuf_init_from_template ((struct rte_mbuf **) obj, &t, batch_size);
376 n_alloc = vlib_buffer_alloc_from_pool (vm, bufs, n, buffer_pool_index);
381 vlib_get_buffers_with_offset (vm, bufs, obj, n,
382 -(i32) sizeof (struct rte_mbuf));
383 dpdk_mbuf_init_from_template ((struct rte_mbuf **) obj, &t, n);
389 /* dpdk doesn't support partial alloc, so we need to return what we
392 vlib_buffer_pool_put (vm, buffer_pool_index, bufs, n_alloc);
396 vlib_get_buffer_indices_with_offset (vm, obj, bufs, batch_size,
397 sizeof (struct rte_mbuf));
398 vlib_buffer_pool_put (vm, buffer_pool_index, bufs, batch_size);
406 CLIB_MARCH_FN_REGISTRATION (dpdk_ops_vpp_dequeue);
408 #ifndef CLIB_MARCH_VARIANT
411 dpdk_ops_vpp_dequeue_no_cache (struct rte_mempool *mp, void **obj_table,
419 dpdk_ops_vpp_get_count (const struct rte_mempool *mp)
421 vlib_main_t *vm = vlib_get_main ();
424 vlib_buffer_pool_t *pool = vlib_get_buffer_pool (vm, mp->pool_id);
427 return pool->n_avail;
434 dpdk_ops_vpp_get_count_no_cache (const struct rte_mempool *mp)
436 struct rte_mempool *cmp;
437 cmp = dpdk_no_cache_mempool_by_buffer_pool_index[mp->pool_id];
438 return dpdk_ops_vpp_get_count (cmp);
442 dpdk_buffer_pools_create (vlib_main_t * vm)
445 vlib_buffer_pool_t *bp;
447 struct rte_mempool_ops ops = { };
449 strncpy (ops.name, "vpp", 4);
450 ops.alloc = dpdk_ops_vpp_alloc;
451 ops.free = dpdk_ops_vpp_free;
452 ops.get_count = dpdk_ops_vpp_get_count;
453 ops.enqueue = CLIB_MARCH_FN_POINTER (dpdk_ops_vpp_enqueue);
454 ops.dequeue = CLIB_MARCH_FN_POINTER (dpdk_ops_vpp_dequeue);
455 rte_mempool_register_ops (&ops);
457 strncpy (ops.name, "vpp-no-cache", 13);
458 ops.get_count = dpdk_ops_vpp_get_count_no_cache;
459 ops.enqueue = CLIB_MARCH_FN_POINTER (dpdk_ops_vpp_enqueue_no_cache);
460 ops.dequeue = dpdk_ops_vpp_dequeue_no_cache;
461 rte_mempool_register_ops (&ops);
464 vec_foreach (bp, vm->buffer_main->buffer_pools)
465 if (bp->start && (err = dpdk_buffer_pool_init (vm, bp)))
471 VLIB_BUFFER_SET_EXT_HDR_SIZE (sizeof (struct rte_mempool_objhdr) +
472 sizeof (struct rte_mbuf));
478 * fd.io coding-style-patch-verification: ON
481 * eval: (c-set-style "gnu")