4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40 #include <sys/types.h>
41 #include <sys/resource.h>
44 #include <rte_common.h>
45 #include <rte_errno.h>
46 #include <rte_ethdev.h>
48 #include <rte_malloc.h>
50 #include <rte_spinlock.h>
51 #include <rte_string_fns.h>
53 #include "compat_netmap.h"
56 struct rte_mempool *pool;
57 struct netmap_if *nmif;
58 struct rte_eth_conf eth_conf;
59 struct rte_eth_txconf tx_conf;
60 struct rte_eth_rxconf rx_conf;
76 #define POLLRDNORM 0x0040
80 #define POLLWRNORM 0x0100
83 #define FD_PORT_FREE UINT32_MAX
84 #define FD_PORT_RSRV (FD_PORT_FREE - 1)
87 struct rte_netmap_conf conf;
95 #define COMPAT_NETMAP_MAX_NOFILE (2 * RTE_MAX_ETHPORTS)
96 #define COMPAT_NETMAP_MAX_BURST 64
97 #define COMPAT_NETMAP_MAX_PKT_PER_SYNC (2 * COMPAT_NETMAP_MAX_BURST)
99 static struct netmap_port ports[RTE_MAX_ETHPORTS];
100 static struct netmap_state netmap;
102 static struct fd_port fd_port[COMPAT_NETMAP_MAX_NOFILE];
103 static const int next_fd_start = RLIMIT_NOFILE + 1;
104 static rte_spinlock_t netmap_lock;
106 #define IDX_TO_FD(x) ((x) + next_fd_start)
107 #define FD_TO_IDX(x) ((x) - next_fd_start)
108 #define FD_VALID(x) ((x) >= next_fd_start && \
109 (x) < (typeof (x))(RTE_DIM(fd_port) + next_fd_start))
111 #define PORT_NUM_RINGS (2 * netmap.conf.max_rings)
112 #define PORT_NUM_SLOTS (PORT_NUM_RINGS * netmap.conf.max_slots)
114 #define BUF_IDX(port, ring, slot) \
115 (((port) * PORT_NUM_RINGS + (ring)) * netmap.conf.max_slots + \
118 #define NETMAP_IF_RING_OFS(rid, rings, slots) ({\
119 struct netmap_if *_if; \
120 struct netmap_ring *_rg; \
122 (rings) * sizeof(_if->ring_ofs[0]) + \
123 (rid) * sizeof(*_rg) + \
124 (slots) * sizeof(_rg->slot[0]); \
127 static void netmap_unregif(uint32_t idx, uint32_t port);
131 ifname_to_portid(const char *ifname, uint16_t *port)
137 portid = strtoul(ifname, &endptr, 10);
138 if (endptr == ifname || *endptr != '\0' ||
139 portid >= RTE_DIM(ports) || errno != 0)
147 * Given a dpdk mbuf, fill in the Netmap slot in ring r and its associated
148 * buffer with the data held by the mbuf.
149 * Note that mbuf chains are not supported.
152 mbuf_to_slot(struct rte_mbuf *mbuf, struct netmap_ring *r, uint32_t index)
157 data = rte_pktmbuf_mtod(mbuf, char *);
158 length = rte_pktmbuf_data_len(mbuf);
160 if (length > r->nr_buf_size)
163 r->slot[index].len = length;
164 rte_memcpy(NETMAP_BUF(r, r->slot[index].buf_idx), data, length);
168 * Given a Netmap ring and a slot index for that ring, construct a dpdk mbuf
169 * from the data held in the buffer associated with the slot.
170 * Allocation/deallocation of the dpdk mbuf are the responsibility of the
172 * Note that mbuf chains are not supported.
175 slot_to_mbuf(struct netmap_ring *r, uint32_t index, struct rte_mbuf *mbuf)
180 rte_pktmbuf_reset(mbuf);
181 length = r->slot[index].len;
182 data = rte_pktmbuf_append(mbuf, length);
185 rte_memcpy(data, NETMAP_BUF(r, r->slot[index].buf_idx), length);
193 for (i = 0; i != RTE_DIM(fd_port) && fd_port[i].port != FD_PORT_FREE;
197 if (i == RTE_DIM(fd_port))
200 fd_port[i].port = FD_PORT_RSRV;
205 fd_release(int32_t fd)
211 if (!FD_VALID(fd) || (port = fd_port[idx].port) == FD_PORT_FREE)
214 /* if we still have a valid port attached, release the port */
215 if (port < RTE_DIM(ports) && ports[port].fd == idx) {
216 netmap_unregif(idx, port);
219 fd_port[idx].port = FD_PORT_FREE;
224 check_nmreq(struct nmreq *req, uint16_t *port)
232 if (req->nr_version != NETMAP_API) {
233 req->nr_version = NETMAP_API;
237 if ((rc = ifname_to_portid(req->nr_name, &portid)) != 0) {
238 RTE_LOG(ERR, USER1, "Invalid interface name:\"%s\" "
239 "in NIOCGINFO call\n", req->nr_name);
243 if (ports[portid].pool == NULL) {
244 RTE_LOG(ERR, USER1, "Misconfigured portid %u\n", portid);
253 * Simulate a Netmap NIOCGINFO ioctl: given a struct nmreq holding an interface
254 * name (a port number in our case), fill the struct nmreq in with advisory
255 * information about the interface: number of rings and their size, total memory
256 * required in the map, ...
257 * Those are preconfigured using rte_eth_{,tx,rx}conf and
258 * rte_netmap_port_conf structures
259 * and calls to rte_netmap_init_port() in the Netmap application.
262 ioctl_niocginfo(__rte_unused int fd, void * param)
268 req = (struct nmreq *)param;
269 if ((rc = check_nmreq(req, &portid)) != 0)
272 req->nr_tx_rings = (uint16_t)(ports[portid].nr_tx_rings - 1);
273 req->nr_rx_rings = (uint16_t)(ports[portid].nr_rx_rings - 1);
274 req->nr_tx_slots = ports[portid].nr_tx_slots;
275 req->nr_rx_slots = ports[portid].nr_rx_slots;
277 /* in current implementation we have all NETIFs shared aone region. */
278 req->nr_memsize = netmap.mem_sz;
285 netmap_ring_setup(struct netmap_ring *ring, uint16_t port, uint32_t ringid,
290 ring->buf_ofs = netmap.buf_start - (uintptr_t)ring;
291 ring->num_slots = num_slots;
294 ring->nr_buf_size = netmap.conf.max_bufsz;
297 ring->ts.tv_usec = 0;
299 for (j = 0; j < ring->num_slots; j++) {
300 ring->slot[j].buf_idx = BUF_IDX(port, ringid, j);
301 ring->slot[j].len = 0;
307 netmap_regif(struct nmreq *req, uint32_t idx, uint16_t port)
309 struct netmap_if *nmif;
310 struct netmap_ring *ring;
311 uint32_t i, slots, start_ring;
314 if (ports[port].fd < RTE_DIM(fd_port)) {
315 RTE_LOG(ERR, USER1, "port %u already in use by fd: %u\n",
316 port, IDX_TO_FD(ports[port].fd));
319 if (fd_port[idx].port != FD_PORT_RSRV) {
320 RTE_LOG(ERR, USER1, "fd: %u is misconfigured\n",
325 nmif = ports[port].nmif;
327 /* setup netmap_if fields. */
328 memset(nmif, 0, netmap.netif_memsz);
330 /* only ALL rings supported right now. */
331 if (req->nr_ringid != 0)
334 snprintf(nmif->ni_name, sizeof(nmif->ni_name), "%s", req->nr_name);
335 nmif->ni_version = req->nr_version;
337 /* Netmap uses ni_(r|t)x_rings + 1 */
338 nmif->ni_rx_rings = ports[port].nr_rx_rings - 1;
339 nmif->ni_tx_rings = ports[port].nr_tx_rings - 1;
342 * Setup TX rings and slots.
343 * Refer to the comments in netmap.h for details
347 for (i = 0; i < nmif->ni_tx_rings + 1; i++) {
349 nmif->ring_ofs[i] = NETMAP_IF_RING_OFS(i,
350 PORT_NUM_RINGS, slots);
352 ring = NETMAP_TXRING(nmif, i);
353 netmap_ring_setup(ring, port, i, ports[port].nr_tx_slots);
354 ring->avail = ring->num_slots;
356 slots += ports[port].nr_tx_slots;
360 * Setup RX rings and slots.
361 * Refer to the comments in netmap.h for details
366 for (; i < nmif->ni_rx_rings + 1 + start_ring; i++) {
368 nmif->ring_ofs[i] = NETMAP_IF_RING_OFS(i,
369 PORT_NUM_RINGS, slots);
371 ring = NETMAP_RXRING(nmif, (i - start_ring));
372 netmap_ring_setup(ring, port, i, ports[port].nr_rx_slots);
375 slots += ports[port].nr_rx_slots;
378 if ((rc = rte_eth_dev_start(port)) < 0) {
380 "Couldn't start ethernet device %s (error %d)\n",
385 /* setup fdi <--> port relationtip. */
386 ports[port].fd = idx;
387 fd_port[idx].port = port;
389 req->nr_memsize = netmap.mem_sz;
390 req->nr_offset = (uintptr_t)nmif - (uintptr_t)netmap.mem;
396 * Simulate a Netmap NIOCREGIF ioctl:
399 ioctl_niocregif(int32_t fd, void * param)
406 req = (struct nmreq *)param;
407 if ((rc = check_nmreq(req, &portid)) != 0)
412 rte_spinlock_lock(&netmap_lock);
413 rc = netmap_regif(req, idx, portid);
414 rte_spinlock_unlock(&netmap_lock);
420 netmap_unregif(uint32_t idx, uint32_t port)
422 fd_port[idx].port = FD_PORT_RSRV;
423 ports[port].fd = UINT32_MAX;
424 rte_eth_dev_stop(port);
428 * Simulate a Netmap NIOCUNREGIF ioctl: put an interface running in Netmap
429 * mode back in "normal" mode. In our case, we just stop the port associated
430 * with this file descriptor.
433 ioctl_niocunregif(int fd)
440 rte_spinlock_lock(&netmap_lock);
442 port = fd_port[idx].port;
443 if (port < RTE_DIM(ports) && ports[port].fd == idx) {
444 netmap_unregif(idx, port);
448 "%s: %d is not associated with valid port\n",
453 rte_spinlock_unlock(&netmap_lock);
458 * A call to rx_sync_ring will try to fill a Netmap RX ring with as many
459 * packets as it can hold coming from its dpdk port.
462 rx_sync_ring(struct netmap_ring *ring, uint16_t port, uint16_t ring_number,
467 uint32_t cur_slot, n_free_slots;
468 struct rte_mbuf *rx_mbufs[COMPAT_NETMAP_MAX_BURST];
470 n_free_slots = ring->num_slots - (ring->avail + ring->reserved);
471 n_free_slots = RTE_MIN(n_free_slots, max_burst);
472 cur_slot = (ring->cur + ring->avail) & (ring->num_slots - 1);
474 while (n_free_slots) {
475 burst_size = (uint16_t)RTE_MIN(n_free_slots, RTE_DIM(rx_mbufs));
477 /* receive up to burst_size packets from the NIC's queue */
478 n_rx = rte_eth_rx_burst(port, ring_number, rx_mbufs,
483 if (unlikely(n_rx < 0))
486 /* Put those n_rx packets in the Netmap structures */
487 for (i = 0; i < n_rx ; i++) {
488 mbuf_to_slot(rx_mbufs[i], ring, cur_slot);
489 rte_pktmbuf_free(rx_mbufs[i]);
490 cur_slot = NETMAP_RING_NEXT(ring, cur_slot);
493 /* Update the Netmap ring structure to reflect the change */
495 n_free_slots -= n_rx;
502 rx_sync_if(uint32_t port)
506 struct netmap_if *nifp;
507 struct netmap_ring *r;
509 nifp = ports[port].nmif;
510 burst = ports[port].rx_burst;
513 for (i = 0; i < nifp->ni_rx_rings + 1; i++) {
514 r = NETMAP_RXRING(nifp, i);
515 rx_sync_ring(r, port, (uint16_t)i, burst);
523 * Simulate a Netmap NIOCRXSYNC ioctl:
526 ioctl_niocrxsync(int fd)
531 if ((port = fd_port[idx].port) < RTE_DIM(ports) &&
532 ports[port].fd == idx) {
533 return rx_sync_if(fd_port[idx].port);
540 * A call to tx_sync_ring will try to empty a Netmap TX ring by converting its
541 * buffers into rte_mbufs and sending them out on the rings's dpdk port.
544 tx_sync_ring(struct netmap_ring *ring, uint16_t port, uint16_t ring_number,
545 struct rte_mempool *pool, uint16_t max_burst)
549 uint32_t cur_slot, n_used_slots;
550 struct rte_mbuf *tx_mbufs[COMPAT_NETMAP_MAX_BURST];
552 n_used_slots = ring->num_slots - ring->avail;
553 n_used_slots = RTE_MIN(n_used_slots, max_burst);
554 cur_slot = (ring->cur + ring->avail) & (ring->num_slots - 1);
556 while (n_used_slots) {
557 burst_size = (uint16_t)RTE_MIN(n_used_slots, RTE_DIM(tx_mbufs));
559 for (i = 0; i < burst_size; i++) {
560 tx_mbufs[i] = rte_pktmbuf_alloc(pool);
561 if (tx_mbufs[i] == NULL)
564 slot_to_mbuf(ring, cur_slot, tx_mbufs[i]);
565 cur_slot = NETMAP_RING_NEXT(ring, cur_slot);
568 n_tx = rte_eth_tx_burst(port, ring_number, tx_mbufs,
571 /* Update the Netmap ring structure to reflect the change */
573 n_used_slots -= n_tx;
575 /* Return the mbufs that failed to transmit to their pool */
576 if (unlikely(n_tx != burst_size)) {
577 for (i = n_tx; i < burst_size; i++)
578 rte_pktmbuf_free(tx_mbufs[i]);
587 rte_pktmbuf_free(tx_mbufs[i]);
590 "Couldn't get mbuf from mempool is the mempool too small?\n");
595 tx_sync_if(uint32_t port)
599 struct netmap_if *nifp;
600 struct netmap_ring *r;
601 struct rte_mempool *mp;
603 nifp = ports[port].nmif;
604 mp = ports[port].pool;
605 burst = ports[port].tx_burst;
608 for (i = 0; i < nifp->ni_tx_rings + 1; i++) {
609 r = NETMAP_TXRING(nifp, i);
610 tx_sync_ring(r, port, (uint16_t)i, mp, burst);
618 * Simulate a Netmap NIOCTXSYNC ioctl:
621 ioctl_nioctxsync(int fd)
626 if ((port = fd_port[idx].port) < RTE_DIM(ports) &&
627 ports[port].fd == idx) {
628 return tx_sync_if(fd_port[idx].port);
635 * Give the library a mempool of rte_mbufs with which it can do the
636 * rte_mbuf <--> netmap slot conversions.
639 rte_netmap_init(const struct rte_netmap_conf *conf)
641 size_t buf_ofs, nmif_sz, sz;
642 size_t port_rings, port_slots, port_bufs;
643 uint32_t i, port_num;
645 port_num = RTE_MAX_ETHPORTS;
646 port_rings = 2 * conf->max_rings;
647 port_slots = port_rings * conf->max_slots;
648 port_bufs = port_slots;
650 nmif_sz = NETMAP_IF_RING_OFS(port_rings, port_rings, port_slots);
651 sz = nmif_sz * port_num;
653 buf_ofs = RTE_ALIGN_CEIL(sz, RTE_CACHE_LINE_SIZE);
654 sz = buf_ofs + port_bufs * conf->max_bufsz * port_num;
656 if (sz > UINT32_MAX ||
657 (netmap.mem = rte_zmalloc_socket(__func__, sz,
658 RTE_CACHE_LINE_SIZE, conf->socket_id)) == NULL) {
659 RTE_LOG(ERR, USER1, "%s: failed to allocate %zu bytes\n",
665 netmap.netif_memsz = nmif_sz;
666 netmap.buf_start = (uintptr_t)netmap.mem + buf_ofs;
669 rte_spinlock_init(&netmap_lock);
671 /* Mark all ports as unused and set NETIF pointer. */
672 for (i = 0; i != RTE_DIM(ports); i++) {
673 ports[i].fd = UINT32_MAX;
674 ports[i].nmif = (struct netmap_if *)
675 ((uintptr_t)netmap.mem + nmif_sz * i);
678 /* Mark all fd_ports as unused. */
679 for (i = 0; i != RTE_DIM(fd_port); i++) {
680 fd_port[i].port = FD_PORT_FREE;
688 rte_netmap_init_port(uint16_t portid, const struct rte_netmap_port_conf *conf)
692 uint16_t rx_slots, tx_slots;
695 portid >= RTE_DIM(ports) ||
696 conf->nr_tx_rings > netmap.conf.max_rings ||
697 conf->nr_rx_rings > netmap.conf.max_rings) {
698 RTE_LOG(ERR, USER1, "%s(%u): invalid parameters\n",
703 rx_slots = (uint16_t)rte_align32pow2(conf->nr_rx_slots);
704 tx_slots = (uint16_t)rte_align32pow2(conf->nr_tx_slots);
706 if (tx_slots > netmap.conf.max_slots ||
707 rx_slots > netmap.conf.max_slots) {
708 RTE_LOG(ERR, USER1, "%s(%u): invalid parameters\n",
713 ret = rte_eth_dev_configure(portid, conf->nr_rx_rings,
714 conf->nr_tx_rings, conf->eth_conf);
717 RTE_LOG(ERR, USER1, "Couldn't configure port %u\n", portid);
721 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &rx_slots, &tx_slots);
725 "Couldn't ot adjust number of descriptors for port %u\n",
730 for (i = 0; i < conf->nr_tx_rings; i++) {
731 ret = rte_eth_tx_queue_setup(portid, i, tx_slots,
732 conf->socket_id, NULL);
736 "fail to configure TX queue %u of port %u\n",
741 ret = rte_eth_rx_queue_setup(portid, i, rx_slots,
742 conf->socket_id, NULL, conf->pool);
746 "fail to configure RX queue %u of port %u\n",
752 /* copy config to the private storage. */
753 ports[portid].eth_conf = conf->eth_conf[0];
754 ports[portid].pool = conf->pool;
755 ports[portid].socket_id = conf->socket_id;
756 ports[portid].nr_tx_rings = conf->nr_tx_rings;
757 ports[portid].nr_rx_rings = conf->nr_rx_rings;
758 ports[portid].nr_tx_slots = tx_slots;
759 ports[portid].nr_rx_slots = rx_slots;
760 ports[portid].tx_burst = conf->tx_burst;
761 ports[portid].rx_burst = conf->rx_burst;
767 rte_netmap_close(__rte_unused int fd)
771 rte_spinlock_lock(&netmap_lock);
773 rte_spinlock_unlock(&netmap_lock);
782 int rte_netmap_ioctl(int fd, uint32_t op, void *param)
794 ret = ioctl_niocginfo(fd, param);
798 ret = ioctl_niocregif(fd, param);
802 ret = ioctl_niocunregif(fd);
806 ret = ioctl_niocrxsync(fd);
810 ret = ioctl_nioctxsync(fd);
828 rte_netmap_mmap(void *addr, size_t length,
829 int prot, int flags, int fd, off_t offset)
831 static const int cprot = PROT_WRITE | PROT_READ;
833 if (!FD_VALID(fd) || length + offset > netmap.mem_sz ||
834 (prot & cprot) != cprot ||
835 ((flags & MAP_FIXED) != 0 && addr != NULL)) {
841 return (void *)((uintptr_t)netmap.mem + (uintptr_t)offset);
845 * Return a "fake" file descriptor with a value above RLIMIT_NOFILE so that
846 * any attempt to use that file descriptor with the usual API will fail.
849 rte_netmap_open(__rte_unused const char *pathname, __rte_unused int flags)
853 rte_spinlock_lock(&netmap_lock);
855 rte_spinlock_unlock(&netmap_lock);
865 * Doesn't support timeout other than 0 or infinite (negative) timeout
868 rte_netmap_poll(struct pollfd *fds, nfds_t nfds, int timeout)
870 int32_t count_it, ret;
871 uint32_t i, idx, port;
872 uint32_t want_rx, want_tx;
879 for (i = 0; i < nfds; i++) {
883 if (!FD_VALID(fds[i].fd) || fds[i].events == 0) {
888 idx = FD_TO_IDX(fds[i].fd);
889 if ((port = fd_port[idx].port) >= RTE_DIM(ports) ||
890 ports[port].fd != idx) {
892 fds[i].revents |= POLLERR;
897 want_rx = fds[i].events & (POLLIN | POLLRDNORM);
898 want_tx = fds[i].events & (POLLOUT | POLLWRNORM);
900 if (want_rx && rx_sync_if(port) > 0) {
901 fds[i].revents = (uint16_t)
902 (fds[i].revents | want_rx);
905 if (want_tx && tx_sync_if(port) > 0) {
906 fds[i].revents = (uint16_t)
907 (fds[i].revents | want_tx);
914 while ((ret == 0 && timeout < 0) || timeout);