1 /* SPDX-License-Identifier: Apache-2.0
2 * Copyright(c) 2021 Cisco Systems, Inc.
10 #include <sys/socket.h>
14 #include <sys/epoll.h>
16 #include <vppinfra/cache.h>
18 #include <daq_module_api.h>
22 #define DAQ_VPP_VERSION 1
25 #define VPP_DAQ_PAUSE() __builtin_ia32_pause ()
26 #elif defined(__aarch64__) || defined(__arm__)
27 #define VPP_DAQ_PAUSE() __asm__("yield")
29 #define VPP_DAQ_PAUSE()
32 static DAQ_VariableDesc_t vpp_variable_descriptions[] = {
33 { "debug", "Enable debugging output to stdout",
34 DAQ_VAR_DESC_FORBIDS_ARGUMENT },
37 static DAQ_BaseAPI_t daq_base_api;
39 #define SET_ERROR(modinst, ...) daq_base_api.set_errbuf (modinst, __VA_ARGS__)
41 typedef struct _vpp_msg_pool
43 DAQ_MsgPoolInfo_t info;
46 typedef struct _vpp_desc_data
54 typedef struct _vpp_bpool
61 typedef struct _vpp_qpair
63 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
65 daq_vpp_desc_t *descs;
68 volatile uint32_t *enq_head;
69 volatile uint32_t *deq_head;
73 VPPDescData *desc_data;
79 DAQ_VPP_INPUT_MODE_INTERRUPT = 0,
80 DAQ_VPP_INPUT_MODE_POLLING,
81 } daq_vpp_input_mode_t;
83 typedef struct _vpp_context
90 DAQ_ModuleInstance_h modinst;
103 VPPQueuePair *qpairs;
107 struct epoll_event *epoll_events;
112 VPPBufferPool *bpools;
114 daq_vpp_input_mode_t input_mode;
115 const char *socket_name;
118 static VPP_Context_t *global_vpp_ctx = 0;
121 vpp_daq_qpair_lock (VPPQueuePair *p)
124 while (!__atomic_compare_exchange_n (&p->lock, &free, 1, 0, __ATOMIC_ACQUIRE,
127 while (__atomic_load_n (&p->lock, __ATOMIC_RELAXED))
135 vpp_daq_qpair_unlock (VPPQueuePair *p)
137 __atomic_store_n (&p->lock, 0, __ATOMIC_RELEASE);
141 vpp_daq_module_load (const DAQ_BaseAPI_t *base_api)
143 if (base_api->api_version != DAQ_BASE_API_VERSION ||
144 base_api->api_size != sizeof (DAQ_BaseAPI_t))
147 daq_base_api = *base_api;
153 vpp_daq_module_unload (void)
155 memset (&daq_base_api, 0, sizeof (daq_base_api));
160 vpp_daq_get_variable_descs (const DAQ_VariableDesc_t **var_desc_table)
162 *var_desc_table = vpp_variable_descriptions;
164 return sizeof (vpp_variable_descriptions) / sizeof (DAQ_VariableDesc_t);
168 vpp_daq_recvmsg (int fd, daq_vpp_msg_t *msg, int n_fds, int *fds)
171 CMSG_SPACE (sizeof (int) * n_fds) + CMSG_SPACE (sizeof (struct ucred));
173 struct msghdr mh = {};
175 struct cmsghdr *cmsg;
177 iov[0].iov_base = (void *) msg;
178 iov[0].iov_len = sizeof (daq_vpp_msg_t);
181 mh.msg_control = ctl;
182 mh.msg_controllen = ctl_sz;
184 memset (ctl, 0, ctl_sz);
187 if ((rv = recvmsg (fd, &mh, 0)) != sizeof (daq_vpp_msg_t))
188 return DAQ_ERROR_NODEV;
190 cmsg = CMSG_FIRSTHDR (&mh);
193 if (cmsg->cmsg_level == SOL_SOCKET)
195 if (cmsg->cmsg_type == SCM_CREDENTIALS)
199 else if (cmsg->cmsg_type == SCM_RIGHTS)
201 memcpy (fds, CMSG_DATA (cmsg), n_fds * sizeof (int));
204 cmsg = CMSG_NXTHDR (&mh, cmsg);
211 vpp_daq_destroy (void *handle)
213 VPP_Context_t *vc = (VPP_Context_t *) handle;
215 if (vc->shm_base != MAP_FAILED)
216 munmap (vc->shm_base, vc->shm_size);
218 if (vc->shm_fd != -1)
223 for (int i = 0; i < vc->num_bpools; i++)
225 VPPBufferPool *bp = vc->bpools + i;
228 if (bp->base && bp->base != MAP_FAILED)
229 munmap (bp->base, bp->size);
236 for (int i = 0; i < vc->num_qpairs; i++)
238 VPPQueuePair *qp = vc->qpairs + i;
239 if (qp->enq_fd != -1)
241 if (qp->deq_fd != -1)
244 free (qp->desc_data);
249 free (vc->epoll_events);
251 if (vc->epoll_fd != -1)
252 close (vc->epoll_fd);
256 #define ERR(rv, ...) \
258 SET_ERROR (modinst, __VA_ARGS__); \
264 vpp_daq_instantiate (const DAQ_ModuleConfig_h modcfg,
265 DAQ_ModuleInstance_h modinst, void **ctxt_ptr)
267 VPP_Context_t *vc = 0;
268 int rval = DAQ_ERROR;
270 struct sockaddr_un sun = { .sun_family = AF_UNIX };
271 int i, fd = -1, shm_fd = -1;
277 *ctxt_ptr = global_vpp_ctx;
281 vc = calloc (1, sizeof (VPP_Context_t));
284 ERR (DAQ_ERROR_NOMEM,
285 "%s: Couldn't allocate memory for the new VPP context!", __func__);
287 const char *varKey, *varValue;
288 daq_base_api.config_first_variable (modcfg, &varKey, &varValue);
291 if (!strcmp (varKey, "debug"))
293 else if (!strcmp (varKey, "input_mode"))
295 if (!strcmp (varValue, "interrupt"))
296 vc->input_mode = DAQ_VPP_INPUT_MODE_INTERRUPT;
297 else if (!strcmp (varValue, "polling"))
298 vc->input_mode = DAQ_VPP_INPUT_MODE_POLLING;
300 else if (!strcmp (varKey, "socket_name"))
302 vc->socket_name = varValue;
304 daq_base_api.config_next_variable (modcfg, &varKey, &varValue);
307 input = daq_base_api.config_get_input (modcfg);
309 if (!vc->socket_name)
310 /* try to use default socket path */
311 vc->socket_name = DAQ_VPP_DEFAULT_SOCKET_PATH;
313 if ((fd = socket (AF_UNIX, SOCK_SEQPACKET, 0)) < 0)
314 ERR (DAQ_ERROR_NODEV, "%s: Couldn't create socket!", __func__);
316 strncpy (sun.sun_path, vc->socket_name, sizeof (sun.sun_path) - 1);
318 if (connect (fd, (struct sockaddr *) &sun, sizeof (struct sockaddr_un)) != 0)
319 ERR (DAQ_ERROR_NODEV, "%s: Couldn't connect to socket! '%s'", __func__,
322 /* craft and send connect message */
323 msg.type = DAQ_VPP_MSG_TYPE_HELLO;
324 snprintf ((char *) &msg.hello.inst_name, DAQ_VPP_INST_NAME_LEN - 1, "%s",
327 if (send (fd, &msg, sizeof (msg), 0) != sizeof (msg))
328 ERR (DAQ_ERROR_NODEV, "%s: Couldn't send connect message!", __func__);
330 /* receive config message */
331 rval = vpp_daq_recvmsg (fd, &msg, 1, &shm_fd);
333 if (rval != DAQ_SUCCESS || msg.type != DAQ_VPP_MSG_TYPE_CONFIG ||
335 ERR (DAQ_ERROR_NODEV, "%s: Couldn't receive config message!", __func__);
337 vc->modinst = modinst;
341 vc->num_bpools = msg.config.num_bpools;
342 vc->num_qpairs = msg.config.num_qpairs;
343 vc->shm_size = msg.config.shm_size;
346 vc->bpools = calloc (vc->num_bpools, sizeof (VPPBufferPool));
347 vc->qpairs = calloc (vc->num_qpairs, sizeof (VPPQueuePair));
348 vc->epoll_events = calloc (vc->num_qpairs, sizeof (struct epoll_event));
350 if (vc->bpools == 0 || vc->qpairs == 0)
351 ERR (DAQ_ERROR_NOMEM,
352 "%s: Couldn't allocate memory for the new VPP context!", __func__);
354 for (i = 0; i < vc->num_bpools; i++)
355 vc->bpools[i].fd = -1;
358 mmap (0, vc->shm_size, PROT_READ | PROT_WRITE, MAP_SHARED, vc->shm_fd, 0);
360 if (base == MAP_FAILED)
361 ERR (DAQ_ERROR_NOMEM,
362 "%s: Couldn't map shared memory for the new VPP context!", __func__);
368 printf ("[%s]\n", input);
369 printf (" Shared memory size: %u\n", vc->shm_size);
370 printf (" Number of buffer pools: %u\n", vc->num_bpools);
371 printf (" Number of queue pairs: %u\n", vc->num_qpairs);
374 /* receive buffer pools */
375 for (int i = 0; i < vc->num_bpools; i++)
377 VPPBufferPool *bp = vc->bpools + i;
378 rval = vpp_daq_recvmsg (fd, &msg, 1, &bp->fd);
379 if (rval != DAQ_SUCCESS || msg.type != DAQ_VPP_MSG_TYPE_BPOOL ||
381 ERR (DAQ_ERROR_NODEV,
382 "%s: Failed to receive buffer pool message for the new "
385 bp->size = msg.bpool.size;
386 bp->base = mmap (0, bp->size, PROT_READ, MAP_SHARED, bp->fd, 0);
388 if (bp->base == MAP_FAILED)
389 ERR (DAQ_ERROR_NOMEM,
390 "%s: Couldn't map shared memory for the new VPP context!",
393 printf (" Buffer pool %u size: %u\n", i, bp->size);
396 if ((vc->epoll_fd = epoll_create (1)) == -1)
397 ERR (DAQ_ERROR_NODEV,
398 "%s: Couldn't create epoll fd for the new VPP context!", __func__);
400 /* receive queue pairs */
401 for (int i = 0; i < vc->num_qpairs; i++)
403 struct epoll_event ev = { .events = EPOLLIN };
404 int fds[2] = { -1, -1 };
406 VPPQueuePair *qp = vc->qpairs + i;
407 rval = vpp_daq_recvmsg (fd, &msg, 2, fds);
408 if (rval != DAQ_SUCCESS || msg.type != DAQ_VPP_MSG_TYPE_QPAIR ||
409 fds[0] == -1 || fds[1] == -1)
410 ERR (DAQ_ERROR_NODEV,
411 "%s: Failed to receive queu pair message for the new "
414 qp->queue_size = 1 << msg.qpair.log2_queue_size;
415 qp->descs = (daq_vpp_desc_t *) (base + msg.qpair.desc_table_offset);
416 qp->enq_ring = (uint32_t *) (base + msg.qpair.enq_ring_offset);
417 qp->deq_ring = (uint32_t *) (base + msg.qpair.deq_ring_offset);
418 qp->enq_head = (uint32_t *) (base + msg.qpair.enq_head_offset);
419 qp->deq_head = (uint32_t *) (base + msg.qpair.deq_head_offset);
424 if (epoll_ctl (vc->epoll_fd, EPOLL_CTL_ADD, qp->enq_fd, &ev) == -1)
425 ERR (DAQ_ERROR_NODEV,
426 "%s: Failed to dequeue fd to epoll instance for the new "
430 qsz = qp->queue_size;
432 qp->desc_data = calloc (qsz, sizeof (VPPDescData));
434 ERR (DAQ_ERROR_NOMEM,
435 "%s: Couldn't allocate memory for the new VPP context!",
438 for (int j = 0; j < qsz; j++)
440 VPPDescData *dd = qp->desc_data + j;
441 DAQ_PktHdr_t *pkthdr = &dd->pkthdr;
442 DAQ_Msg_t *msg = &dd->msg;
447 pkthdr->ingress_group = DAQ_PKTHDR_UNKNOWN;
448 pkthdr->egress_group = DAQ_PKTHDR_UNKNOWN;
450 msg->type = DAQ_MSG_TYPE_PACKET;
451 msg->hdr_len = sizeof (DAQ_PktHdr_t);
453 msg->owner = vc->modinst;
459 printf (" Queue pair %u:\n", i);
460 printf (" Size: %u\n", qp->queue_size);
461 printf (" Enqueue fd: %u\n", qp->enq_fd);
462 printf (" Dequeue fd: %u\n", qp->deq_fd);
466 *ctxt_ptr = global_vpp_ctx = vc;
470 vpp_daq_destroy (vc);
477 vpp_daq_start (void *handle)
483 vpp_daq_get_stats (void *handle, DAQ_Stats_t *stats)
485 memset (stats, 0, sizeof (DAQ_Stats_t));
490 vpp_daq_reset_stats (void *handle)
495 vpp_daq_get_capabilities (void *handle)
497 uint32_t capabilities = DAQ_CAPA_BLOCK | DAQ_CAPA_UNPRIV_START;
502 vpp_daq_get_datalink_type (void *handle)
507 static inline uint32_t
508 vpp_daq_msg_receive_one (VPP_Context_t *vc, VPPQueuePair *qp,
509 const DAQ_Msg_t *msgs[], unsigned max_recv)
511 uint32_t n_recv, n_left;
512 uint32_t head, next, mask = qp->queue_size - 1;
517 vpp_daq_qpair_lock (qp);
518 next = qp->next_desc;
519 head = __atomic_load_n (qp->enq_head, __ATOMIC_ACQUIRE);
520 n_recv = n_left = head - next;
522 if (n_left > max_recv)
524 n_left = n_recv = max_recv;
529 uint32_t desc_index = qp->enq_ring[next & mask];
530 daq_vpp_desc_t *d = qp->descs + desc_index;
531 VPPDescData *dd = qp->desc_data + desc_index;
532 dd->pkthdr.pktlen = d->length;
533 dd->pkthdr.address_space_id = d->address_space_id;
534 dd->msg.data = vc->bpools[d->buffer_pool].base + d->offset;
541 qp->next_desc = next;
542 vpp_daq_qpair_unlock (qp);
548 vpp_daq_msg_receive (void *handle, const unsigned max_recv,
549 const DAQ_Msg_t *msgs[], DAQ_RecvStatus *rstat)
551 VPP_Context_t *vc = (VPP_Context_t *) handle;
552 uint32_t n_qpairs_left = vc->num_qpairs;
553 uint32_t n, n_events, n_recv = 0;
555 /* first, we visit all qpairs. If we find any work there then we can give
556 * it back immediatelly. To avoid bias towards qpair 0 we remeber what
558 while (n_qpairs_left)
560 VPPQueuePair *qp = vc->qpairs + vc->next_qpair;
562 if ((n = vpp_daq_msg_receive_one (vc, qp, msgs, max_recv - n_recv)))
570 if (vc->next_qpair == vc->num_qpairs)
575 if (vc->input_mode == DAQ_VPP_INPUT_MODE_POLLING)
577 *rstat = DAQ_RSTAT_OK;
583 *rstat = DAQ_RSTAT_OK;
587 n_events = epoll_wait (vc->epoll_fd, vc->epoll_events, vc->num_qpairs, 1000);
591 *rstat = n_events == -1 ? DAQ_RSTAT_ERROR : DAQ_RSTAT_TIMEOUT;
595 for (int i = 0; i < n_events; i++)
598 VPPQueuePair *qp = vc->qpairs + vc->epoll_events[i].data.u32;
600 if ((n = vpp_daq_msg_receive_one (vc, qp, msgs, max_recv - n_recv)))
606 (void) read (qp->enq_fd, &ctr, sizeof (ctr));
609 *rstat = DAQ_RSTAT_OK;
614 vpp_daq_msg_finalize (void *handle, const DAQ_Msg_t *msg, DAQ_Verdict verdict)
616 VPP_Context_t *vc = (VPP_Context_t *) handle;
617 VPPDescData *dd = msg->priv;
618 VPPQueuePair *qp = vc->qpairs + dd->qpair_index;
621 uint64_t counter_increment = 1;
622 int rv, retv = DAQ_SUCCESS;
624 vpp_daq_qpair_lock (qp);
625 mask = qp->queue_size - 1;
626 head = *qp->deq_head;
627 d = qp->descs + dd->index;
628 if (verdict == DAQ_VERDICT_PASS)
629 d->action = DAQ_VPP_ACTION_FORWARD;
631 d->action = DAQ_VPP_ACTION_DROP;
633 qp->deq_ring[head & mask] = dd->index;
635 __atomic_store_n (qp->deq_head, head, __ATOMIC_RELEASE);
637 if (vc->input_mode == DAQ_VPP_INPUT_MODE_INTERRUPT)
639 rv = write (qp->deq_fd, &counter_increment, sizeof (counter_increment));
641 if (rv != sizeof (counter_increment))
645 vpp_daq_qpair_unlock (qp);
650 vpp_daq_get_msg_pool_info (void *handle, DAQ_MsgPoolInfo_t *info)
652 VPP_Context_t *vc = (VPP_Context_t *) handle;
654 vc->pool.info.available = 128;
655 vc->pool.info.size = 256;
657 *info = vc->pool.info;
663 const DAQ_ModuleAPI_t DAQ_MODULE_DATA = {
664 /* .api_version = */ DAQ_MODULE_API_VERSION,
665 /* .api_size = */ sizeof (DAQ_ModuleAPI_t),
666 /* .module_version = */ DAQ_VPP_VERSION,
668 /* .type = */ DAQ_TYPE_INTF_CAPABLE | DAQ_TYPE_INLINE_CAPABLE |
669 DAQ_TYPE_MULTI_INSTANCE,
670 /* .load = */ vpp_daq_module_load,
671 /* .unload = */ vpp_daq_module_unload,
672 /* .get_variable_descs = */ vpp_daq_get_variable_descs,
673 /* .instantiate = */ vpp_daq_instantiate,
674 /* .destroy = */ vpp_daq_destroy,
675 /* .set_filter = */ NULL,
676 /* .start = */ vpp_daq_start,
677 /* .inject = */ NULL,
678 /* .inject_relative = */ NULL,
679 /* .interrupt = */ NULL,
682 /* .get_stats = */ vpp_daq_get_stats,
683 /* .reset_stats = */ vpp_daq_reset_stats,
684 /* .get_snaplen = */ NULL,
685 /* .get_capabilities = */ vpp_daq_get_capabilities,
686 /* .get_datalink_type = */ vpp_daq_get_datalink_type,
687 /* .config_load = */ NULL,
688 /* .config_swap = */ NULL,
689 /* .config_free = */ NULL,
690 /* .msg_receive = */ vpp_daq_msg_receive,
691 /* .msg_finalize = */ vpp_daq_msg_finalize,
692 /* .get_msg_pool_info = */ vpp_daq_get_msg_pool_info,