From: Florin Coras Date: Thu, 2 Aug 2018 17:45:44 +0000 (-0700) Subject: vcl: support for eventfd mq signaling X-Git-Tag: v18.10-rc1~455 X-Git-Url: https://gerrit.fd.io/r/gitweb?a=commitdiff_plain;h=refs%2Fchanges%2F22%2F13922%2F25;p=vpp.git vcl: support for eventfd mq signaling - support eventfd based mq signaling. Based on configuration, vcl epoll/select can use either condvars or epoll on mq eventfds. - add vcl support for memfd segments - vpp explicitly registers cut-through segments with apps/vcl - if using eventfd, make ldp allow one call to libc_epoll_create. Needed for the message queue epfd - update svm_queue_t to allow blocking calls with eventfd signaling. Change-Id: I064151ac370bbe29bb16c968bf4e3659c8286bea Signed-off-by: Florin Coras --- diff --git a/src/svm/message_queue.c b/src/svm/message_queue.c index e97cab898e8..d6a77e783e3 100644 --- a/src/svm/message_queue.c +++ b/src/svm/message_queue.c @@ -15,6 +15,7 @@ #include #include +#include static inline svm_msg_q_ring_t * svm_msg_q_ring_inline (svm_msg_q_t * mq, u32 ring_index) @@ -235,6 +236,38 @@ svm_msg_q_sub_w_lock (svm_msg_q_t * mq, svm_msg_q_msg_t * msg) svm_queue_sub_raw (mq->q, (u8 *) msg); } +void +svm_msg_q_set_consumer_eventfd (svm_msg_q_t * mq, int fd) +{ + mq->q->consumer_evtfd = fd; +} + +void +svm_msg_q_set_producer_eventfd (svm_msg_q_t * mq, int fd) +{ + mq->q->producer_evtfd = fd; +} + +int +svm_msg_q_alloc_consumer_eventfd (svm_msg_q_t * mq) +{ + int fd; + if ((fd = eventfd (0, EFD_NONBLOCK)) < 0) + return -1; + svm_msg_q_set_consumer_eventfd (mq, fd); + return 0; +} + +int +svm_msg_q_alloc_producer_eventfd (svm_msg_q_t * mq) +{ + int fd; + if ((fd = eventfd (0, EFD_NONBLOCK)) < 0) + return -1; + svm_msg_q_set_producer_eventfd (mq, fd); + return 0; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/svm/message_queue.h b/src/svm/message_queue.h index 4c16c97ca7c..28bf14e545e 100644 --- a/src/svm/message_queue.h +++ b/src/svm/message_queue.h @@ -22,7 +22,6 @@ #include #include -#include #include typedef struct svm_msg_q_ring_ @@ -214,6 +213,40 @@ void *svm_msg_q_msg_data (svm_msg_q_t * mq, svm_msg_q_msg_t * msg); */ svm_msg_q_ring_t *svm_msg_q_ring (svm_msg_q_t * mq, u32 ring_index); +/** + * Set event fd for queue consumer + * + * If set, queue will exclusively use eventfds for signaling. Moreover, + * afterwards, the queue should only be used in non-blocking mode. Waiting + * for events should be done externally using something like epoll. + * + * @param mq message queue + * @param fd consumer eventfd + */ +void svm_msg_q_set_consumer_eventfd (svm_msg_q_t * mq, int fd); + +/** + * Set event fd for queue producer + * + * If set, queue will exclusively use eventfds for signaling. Moreover, + * afterwards, the queue should only be used in non-blocking mode. Waiting + * for events should be done externally using something like epoll. + * + * @param mq message queue + * @param fd producer eventfd + */ +void svm_msg_q_set_producer_eventfd (svm_msg_q_t * mq, int fd); + +/** + * Allocate event fd for queue consumer + */ +int svm_msg_q_alloc_consumer_eventfd (svm_msg_q_t * mq); + +/** + * Allocate event fd for queue consumer + */ +int svm_msg_q_alloc_producer_eventfd (svm_msg_q_t * mq); + /** * Check if message queue is full */ @@ -290,12 +323,13 @@ svm_msg_q_unlock (svm_msg_q_t * mq) /** * Wait for message queue event * - * Must be called with mutex held + * Must be called with mutex held. The queue only works non-blocking + * with eventfds, so handle blocking calls as an exception here. */ static inline void svm_msg_q_wait (svm_msg_q_t * mq) { - pthread_cond_wait (&mq->q->condvar, &mq->q->mutex); + svm_queue_wait (mq->q); } /** @@ -309,13 +343,19 @@ svm_msg_q_wait (svm_msg_q_t * mq) static inline int svm_msg_q_timedwait (svm_msg_q_t * mq, double timeout) { - struct timespec ts; + return svm_queue_timedwait (mq->q, timeout); +} + +static inline int +svm_msg_q_get_consumer_eventfd (svm_msg_q_t * mq) +{ + return mq->q->consumer_evtfd; +} - ts.tv_sec = unix_time_now () + (u32) timeout; - ts.tv_nsec = (timeout - (u32) timeout) * 1e9; - if (pthread_cond_timedwait (&mq->q->condvar, &mq->q->mutex, &ts)) - return -1; - return 0; +static inline int +svm_msg_q_get_producer_eventfd (svm_msg_q_t * mq) +{ + return mq->q->producer_evtfd; } #endif /* SRC_SVM_MESSAGE_QUEUE_H_ */ diff --git a/src/svm/queue.c b/src/svm/queue.c index 0fa1fe9b230..771033d7d8a 100644 --- a/src/svm/queue.c +++ b/src/svm/queue.c @@ -27,6 +27,7 @@ #include #include #include +#include svm_queue_t * svm_queue_init (void *base, int nels, int elsize) @@ -127,6 +128,63 @@ svm_queue_send_signal (svm_queue_t * q, u8 is_prod) } } +static inline void +svm_queue_wait_inline (svm_queue_t * q) +{ + if (q->producer_evtfd == -1) + { + pthread_cond_wait (&q->condvar, &q->mutex); + } + else + { + /* Fake a wait for event. We could use epoll but that would mean + * using yet another fd. Should do for now */ + u32 cursize = q->cursize; + pthread_mutex_unlock (&q->mutex); + while (q->cursize == cursize) + CLIB_PAUSE (); + pthread_mutex_lock (&q->mutex); + } +} + +void +svm_queue_wait (svm_queue_t * q) +{ + svm_queue_wait_inline (q); +} + +static inline int +svm_queue_timedwait_inline (svm_queue_t * q, double timeout) +{ + struct timespec ts; + ts.tv_sec = unix_time_now () + (u32) timeout; + ts.tv_nsec = (timeout - (u32) timeout) * 1e9; + + if (q->producer_evtfd == -1) + { + return pthread_cond_timedwait (&q->condvar, &q->mutex, &ts); + } + else + { + double max_time = unix_time_now () + timeout; + u32 cursize = q->cursize; + int rv; + + pthread_mutex_unlock (&q->mutex); + while (q->cursize == cursize && unix_time_now () < max_time) + CLIB_PAUSE (); + rv = unix_time_now () < max_time ? 0 : ETIMEDOUT; + pthread_mutex_lock (&q->mutex); + return rv; + } +} + +int +svm_queue_timedwait (svm_queue_t * q, double timeout) +{ + return svm_queue_timedwait_inline (q, timeout); +} + /* * svm_queue_add_nolock */ @@ -139,9 +197,7 @@ svm_queue_add_nolock (svm_queue_t * q, u8 * elem) if (PREDICT_FALSE (q->cursize == q->maxsize)) { while (q->cursize == q->maxsize) - { - (void) pthread_cond_wait (&q->condvar, &q->mutex); - } + svm_queue_wait_inline (q); } tailp = (i8 *) (&q->data[0] + q->elsize * q->tail); @@ -170,6 +226,9 @@ svm_queue_add_raw (svm_queue_t * q, u8 * elem) q->tail = (q->tail + 1) % q->maxsize; q->cursize++; + + if (q->cursize == 1) + svm_queue_send_signal (q, 1); } @@ -201,9 +260,7 @@ svm_queue_add (svm_queue_t * q, u8 * elem, int nowait) return (-2); } while (q->cursize == q->maxsize) - { - (void) pthread_cond_wait (&q->condvar, &q->mutex); - } + svm_queue_wait_inline (q); } tailp = (i8 *) (&q->data[0] + q->elsize * q->tail); @@ -253,9 +310,7 @@ svm_queue_add2 (svm_queue_t * q, u8 * elem, u8 * elem2, int nowait) return (-2); } while (q->cursize + 1 == q->maxsize) - { - (void) pthread_cond_wait (&q->condvar, &q->mutex); - } + svm_queue_wait_inline (q); } tailp = (i8 *) (&q->data[0] + q->elsize * q->tail); @@ -317,13 +372,9 @@ svm_queue_sub (svm_queue_t * q, u8 * elem, svm_q_conditional_wait_t cond, } else if (cond == SVM_Q_TIMEDWAIT) { - struct timespec ts; - ts.tv_sec = unix_time_now () + time; - ts.tv_nsec = 0; while (q->cursize == 0 && rc == 0) - { - rc = pthread_cond_timedwait (&q->condvar, &q->mutex, &ts); - } + rc = svm_queue_timedwait_inline (q, time); + if (rc == ETIMEDOUT) { pthread_mutex_unlock (&q->mutex); @@ -333,9 +384,7 @@ svm_queue_sub (svm_queue_t * q, u8 * elem, svm_q_conditional_wait_t cond, else { while (q->cursize == 0) - { - (void) pthread_cond_wait (&q->condvar, &q->mutex); - } + svm_queue_wait_inline (q); } } diff --git a/src/svm/queue.h b/src/svm/queue.h index 75e63a49319..3e8031e897b 100644 --- a/src/svm/queue.h +++ b/src/svm/queue.h @@ -81,6 +81,24 @@ int svm_queue_is_full (svm_queue_t * q); int svm_queue_add_nolock (svm_queue_t * q, u8 * elem); int svm_queue_sub_raw (svm_queue_t * q, u8 * elem); +/** + * Wait for queue event + * + * Must be called with mutex held. + */ +void svm_queue_wait (svm_queue_t * q); + +/** + * Timed wait for queue event + * + * Must be called with mutex held. + * + * @param q svm queue + * @param timeout time in seconds + * @return 0 on success, ETIMEDOUT on timeout or an error + */ +int svm_queue_timedwait (svm_queue_t * q, double timeout); + /** * Add element to queue with mutex held * @param q queue diff --git a/src/tests/vnet/session/tcp_echo.c b/src/tests/vnet/session/tcp_echo.c index f37a7272b23..140a198c2f1 100644 --- a/src/tests/vnet/session/tcp_echo.c +++ b/src/tests/vnet/session/tcp_echo.c @@ -263,7 +263,7 @@ wait_for_state_change (echo_main_t * em, connection_state_t state) return -1; if (em->time_to_stop == 1) return 0; - if (!em->our_event_queue) + if (!em->our_event_queue || em->state < STATE_ATTACHED) continue; if (svm_msg_q_sub (em->our_event_queue, &msg, SVM_Q_NOWAIT, 0)) @@ -347,44 +347,17 @@ application_detach (echo_main_t * em) } static int -memfd_segment_attach (void) -{ - ssvm_private_t _ssvm = { 0 }, *ssvm = &_ssvm; - clib_error_t *error; - int rv; - - if ((error = vl_socket_client_recv_fd_msg (&ssvm->fd, 1, 5))) - { - clib_error_report (error); - return -1; - } - - if ((rv = ssvm_slave_init_memfd (ssvm))) - return rv; - - return 0; -} - -static int -fifo_segment_attach (char *name, u32 size, ssvm_segment_type_t type) +ssvm_segment_attach (char *name, ssvm_segment_type_t type, int fd) { svm_fifo_segment_create_args_t _a, *a = &_a; - clib_error_t *error; int rv; memset (a, 0, sizeof (*a)); a->segment_name = (char *) name; - a->segment_size = size; a->segment_type = type; if (type == SSVM_SEGMENT_MEMFD) - { - if ((error = vl_socket_client_recv_fd_msg (&a->memfd_fd, 1, 5))) - { - clib_error_report (error); - return -1; - } - } + a->memfd_fd = fd; if ((rv = svm_fifo_segment_attach (a))) { @@ -392,6 +365,7 @@ fifo_segment_attach (char *name, u32 size, ssvm_segment_type_t type) return rv; } + vec_reset_length (a->new_segment_indices); return 0; } @@ -400,47 +374,57 @@ vl_api_application_attach_reply_t_handler (vl_api_application_attach_reply_t * mp) { echo_main_t *em = &echo_main; - ssvm_segment_type_t seg_type; + int *fds = 0; + u32 n_fds = 0; if (mp->retval) { clib_warning ("attach failed: %U", format_api_error, clib_net_to_host_u32 (mp->retval)); - em->state = STATE_FAILED; - return; + goto failed; } if (mp->segment_name_length == 0) { clib_warning ("segment_name_length zero"); - return; + goto failed; } - seg_type = em->use_sock_api ? SSVM_SEGMENT_MEMFD : SSVM_SEGMENT_SHM; + ASSERT (mp->app_event_queue_address); + em->our_event_queue = uword_to_pointer (mp->app_event_queue_address, + svm_msg_q_t *); - /* Attach to fifo segment */ - if (fifo_segment_attach ((char *) mp->segment_name, mp->segment_size, - seg_type)) + if (mp->n_fds) { - em->state = STATE_FAILED; - return; - } + vec_validate (fds, mp->n_fds); + vl_socket_client_recv_fd_msg (fds, mp->n_fds, 5); + + if (mp->fd_flags & SESSION_FD_F_VPP_MQ_SEGMENT) + if (ssvm_segment_attach (0, SSVM_SEGMENT_MEMFD, fds[n_fds++])) + goto failed; - /* If we're using memfd segments, read and attach to event qs segment */ - if (seg_type == SSVM_SEGMENT_MEMFD) + if (mp->fd_flags & SESSION_FD_F_MEMFD_SEGMENT) + if (ssvm_segment_attach ((char *) mp->segment_name, + SSVM_SEGMENT_MEMFD, fds[n_fds++])) + goto failed; + + if (mp->fd_flags & SESSION_FD_F_MQ_EVENTFD) + svm_msg_q_set_consumer_eventfd (em->our_event_queue, fds[n_fds++]); + + vec_free (fds); + } + else { - if (memfd_segment_attach ()) - { - clib_warning ("failed to attach to evt q segment"); - em->state = STATE_FAILED; - return; - } + if (ssvm_segment_attach ((char *) mp->segment_name, SSVM_SEGMENT_SHM, + -1)) + goto failed; } - ASSERT (mp->app_event_queue_address); - em->our_event_queue = uword_to_pointer (mp->app_event_queue_address, - svm_msg_q_t *); em->state = STATE_ATTACHED; + return; +failed: + em->state = STATE_FAILED; + return; } static void diff --git a/src/vcl.am b/src/vcl.am index 85fbcd7e730..45cf80c1424 100644 --- a/src/vcl.am +++ b/src/vcl.am @@ -28,6 +28,7 @@ libvppcom_la_SOURCES += \ vcl/vcl_debug.h \ vcl/vcl_event.c \ vcl/vcl_private.h \ + vcl/vcl_private.c \ $(libvppinfra_la_SOURCES) \ $(libsvm_la_SOURCES) \ $(libvlibmemoryclient_la_SOURCES) diff --git a/src/vcl/ldp.c b/src/vcl/ldp.c index c26c460bec8..b7b6b3c26b1 100644 --- a/src/vcl/ldp.c +++ b/src/vcl/ldp.c @@ -67,9 +67,17 @@ typedef struct vcl_poll_t *vcl_poll; u8 select_vcl; u8 epoll_wait_vcl; + u8 vcl_needs_real_epoll; /*< vcl needs next epoll_create to + go to libc_epoll */ + int vcl_mq_epfd; + } ldp_main_t; #define LDP_DEBUG ldp->debug +#define LDBG(_lvl, _fmt, _args...) \ + if (ldp->debug > _lvl) \ + clib_warning (_fmt, ##_args) + static ldp_main_t ldp_main = { .sid_bit_val = (1 << LDP_SID_BIT_MIN), .sid_bit_mask = (1 << LDP_SID_BIT_MIN) - 1, @@ -125,107 +133,96 @@ ldp_sid_from_fd (int fd) static inline int ldp_init (void) { - int rv = 0; + int rv; + + if (PREDICT_TRUE (ldp->init)) + return 0; + + ldp->init = 1; + ldp->vcl_needs_real_epoll = 1; + rv = vppcom_app_create (ldp_get_app_name ()); + if (rv != VPPCOM_OK) + { + fprintf (stderr, "\nLDP<%d>: ERROR: ldp_init: vppcom_app_create()" + " failed! rv = %d (%s)\n", + getpid (), rv, vppcom_retval_str (rv)); + ldp->init = 0; + return rv; + } + ldp->vcl_needs_real_epoll = 0; - if (PREDICT_FALSE (!ldp->init)) + char *env_var_str = getenv (LDP_ENV_DEBUG); + if (env_var_str) { - ldp->init = 1; - rv = vppcom_app_create (ldp_get_app_name ()); - if (rv == VPPCOM_OK) + u32 tmp; + if (sscanf (env_var_str, "%u", &tmp) != 1) + clib_warning ("LDP<%d>: WARNING: Invalid LDP debug level specified in" + " the env var " LDP_ENV_DEBUG " (%s)!", getpid (), + env_var_str); + else { - char *env_var_str = getenv (LDP_ENV_DEBUG); - if (env_var_str) - { - u32 tmp; - if (sscanf (env_var_str, "%u", &tmp) != 1) - clib_warning ("LDP<%d>: WARNING: Invalid LDP debug level " - "specified in the env var " LDP_ENV_DEBUG - " (%s)!", getpid (), env_var_str); - else - { - ldp->debug = tmp; - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: configured LDP debug level (%u) " - "from the env var " LDP_ENV_DEBUG "!", - getpid (), ldp->debug); - } - } + ldp->debug = tmp; + LDBG (0, "LDP<%d>: configured LDP debug level (%u) from env var " + LDP_ENV_DEBUG "!", getpid (), ldp->debug); + } + } - env_var_str = getenv (LDP_ENV_APP_NAME); - if (env_var_str) - { - ldp_set_app_name (env_var_str); - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: configured LDP app name (%s) " - "from the env var " LDP_ENV_APP_NAME "!", - getpid (), ldp->app_name); - } + env_var_str = getenv (LDP_ENV_APP_NAME); + if (env_var_str) + { + ldp_set_app_name (env_var_str); + LDBG (0, "LDP<%d>: configured LDP app name (%s) from the env var " + LDP_ENV_APP_NAME "!", getpid (), ldp->app_name); + } - env_var_str = getenv (LDP_ENV_SID_BIT); - if (env_var_str) - { - u32 sb; - if (sscanf (env_var_str, "%u", &sb) != 1) - { - clib_warning ("LDP<%d>: WARNING: Invalid LDP sid bit " - "specified in the env var " - LDP_ENV_SID_BIT " (%s)!" - "sid bit value %d (0x%x)", - getpid (), env_var_str, - ldp->sid_bit_val, ldp->sid_bit_val); - } - else if (sb < LDP_SID_BIT_MIN) - { - ldp->sid_bit_val = (1 << LDP_SID_BIT_MIN); - ldp->sid_bit_mask = ldp->sid_bit_val - 1; - - clib_warning ("LDP<%d>: WARNING: LDP sid bit (%u) " - "specified in the env var " - LDP_ENV_SID_BIT " (%s) is too small. " - "Using LDP_SID_BIT_MIN (%d)! " - "sid bit value %d (0x%x)", - getpid (), sb, env_var_str, LDP_SID_BIT_MIN, - ldp->sid_bit_val, ldp->sid_bit_val); - } - else if (sb > LDP_SID_BIT_MAX) - { - ldp->sid_bit_val = (1 << LDP_SID_BIT_MAX); - ldp->sid_bit_mask = ldp->sid_bit_val - 1; - - clib_warning ("LDP<%d>: WARNING: LDP sid bit (%u) " - "specified in the env var " - LDP_ENV_SID_BIT " (%s) is too big. " - "Using LDP_SID_BIT_MAX (%d)! " - "sid bit value %d (0x%x)", - getpid (), sb, env_var_str, LDP_SID_BIT_MAX, - ldp->sid_bit_val, ldp->sid_bit_val); - } - else - { - ldp->sid_bit_val = (1 << sb); - ldp->sid_bit_mask = ldp->sid_bit_val - 1; - - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: configured LDP sid bit (%u) " - "from " LDP_ENV_SID_BIT - "! sid bit value %d (0x%x)", getpid (), - sb, ldp->sid_bit_val, ldp->sid_bit_val); - } - } + env_var_str = getenv (LDP_ENV_SID_BIT); + if (env_var_str) + { + u32 sb; + if (sscanf (env_var_str, "%u", &sb) != 1) + { + clib_warning ("LDP<%d>: WARNING: Invalid LDP sid bit specified in" + " the env var " LDP_ENV_SID_BIT " (%s)! sid bit " + "value %d (0x%x)", getpid (), env_var_str, + ldp->sid_bit_val, ldp->sid_bit_val); + } + else if (sb < LDP_SID_BIT_MIN) + { + ldp->sid_bit_val = (1 << LDP_SID_BIT_MIN); + ldp->sid_bit_mask = ldp->sid_bit_val - 1; - clib_time_init (&ldp->clib_time); - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: LDP initialization: done!", getpid ()); + clib_warning ("LDP<%d>: WARNING: LDP sid bit (%u) specified in the" + " env var " LDP_ENV_SID_BIT " (%s) is too small. " + "Using LDP_SID_BIT_MIN (%d)! sid bit value %d (0x%x)", + getpid (), sb, env_var_str, LDP_SID_BIT_MIN, + ldp->sid_bit_val, ldp->sid_bit_val); + } + else if (sb > LDP_SID_BIT_MAX) + { + ldp->sid_bit_val = (1 << LDP_SID_BIT_MAX); + ldp->sid_bit_mask = ldp->sid_bit_val - 1; + + clib_warning ("LDP<%d>: WARNING: LDP sid bit (%u) specified in the" + " env var " LDP_ENV_SID_BIT " (%s) is too big. Using" + " LDP_SID_BIT_MAX (%d)! sid bit value %d (0x%x)", + getpid (), sb, env_var_str, LDP_SID_BIT_MAX, + ldp->sid_bit_val, ldp->sid_bit_val); } else { - fprintf (stderr, "\nLDP<%d>: ERROR: ldp_init: vppcom_app_create()" - " failed! rv = %d (%s)\n", - getpid (), rv, vppcom_retval_str (rv)); - ldp->init = 0; + ldp->sid_bit_val = (1 << sb); + ldp->sid_bit_mask = ldp->sid_bit_val - 1; + + LDBG (0, "LDP<%d>: configured LDP sid bit (%u) from " + LDP_ENV_SID_BIT "! sid bit value %d (0x%x)", getpid (), sb, + ldp->sid_bit_val, ldp->sid_bit_val); } } - return rv; + + clib_time_init (&ldp->clib_time); + LDBG (0, "LDP<%d>: LDP initialization: done!", getpid ()); + + return 0; } int @@ -1151,11 +1148,9 @@ socket (int domain, int type, int protocol) func_str = "vppcom_session_create"; - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: : calling %s(): " - "proto %u (%s), is_nonblocking %u", - getpid (), func_str, proto, - vppcom_proto_str (proto), is_nonblocking); + LDBG (0, "LDP<%d>: : calling %s(): proto %u (%s), is_nonblocking %u", + getpid (), func_str, proto, vppcom_proto_str (proto), + is_nonblocking); sid = vppcom_session_create (proto, is_nonblocking); if (sid < 0) @@ -1179,8 +1174,7 @@ socket (int domain, int type, int protocol) { func_str = "libc_socket"; - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: : calling %s()", getpid (), func_str); + LDBG (0, "LDP<%d>: : calling %s()", getpid (), func_str); rv = libc_socket (domain, type, protocol); } @@ -2765,10 +2759,8 @@ listen (int fd, int n) { func_str = "vppcom_session_listen"; - if (LDP_DEBUG > 0) - clib_warning - ("LDP<%d>: fd %d (0x%x): calling %s(): sid %u (0x%x), n %d", - getpid (), fd, fd, func_str, sid, sid, n); + LDBG (0, "LDP<%d>: fd %d (0x%x): calling %s(): sid %u (0x%x), n %d", + getpid (), fd, fd, func_str, sid, sid, n); rv = vppcom_session_listen (sid, n); if (rv != VPPCOM_OK) @@ -2781,9 +2773,8 @@ listen (int fd, int n) { func_str = "libc_listen"; - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): n %d", - getpid (), fd, fd, func_str, n); + LDBG (0, "LDP<%d>: fd %d (0x%x): calling %s(): n %d", getpid (), fd, + fd, func_str, n); rv = libc_listen (fd, n); } @@ -2966,10 +2957,17 @@ epoll_create1 (int flags) if ((errno = -ldp_init ())) return -1; + if (ldp->vcl_needs_real_epoll) + { + rv = libc_epoll_create1 (flags); + ldp->vcl_needs_real_epoll = 0; + ldp->vcl_mq_epfd = rv; + LDBG (0, "LDP<%d>: created vcl epfd %u", getpid (), rv); + return rv; + } func_str = "vppcom_epoll_create"; - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: calling %s()", getpid (), func_str); + LDBG (1, "LDP<%d>: calling %s()", getpid (), func_str); rv = vppcom_epoll_create (); @@ -3007,121 +3005,105 @@ epoll_create (int size) int epoll_ctl (int epfd, int op, int fd, struct epoll_event *event) { - int rv; + u32 vep_idx = ldp_sid_from_fd (epfd), sid; const char *func_str; - u32 vep_idx = ldp_sid_from_fd (epfd); + int rv; if ((errno = -ldp_init ())) return -1; - if (PREDICT_TRUE (vep_idx != INVALID_SESSION_ID)) + if (PREDICT_FALSE (vep_idx == INVALID_SESSION_ID)) { - u32 sid = ldp_sid_from_fd (fd); + /* The LDP epoll_create1 always creates VCL epfd's. + * The app should never have a kernel base epoll fd unless it + * was acquired outside of the LD_PRELOAD process context. + * In any case, if we get one, punt it to libc_epoll_ctl. + */ + func_str = "libc_epoll_ctl"; - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: epfd %d (0x%x), vep_idx %d (0x%x), " - "sid %d (0x%x)", getpid (), epfd, epfd, - vep_idx, vep_idx, sid, sid); + LDBG (1, "LDP<%d>: epfd %d (0x%x): calling %s(): op %d, fd %d (0x%x)," + " event %p", getpid (), epfd, epfd, func_str, op, fd, fd, event); - if (sid != INVALID_SESSION_ID) - { - func_str = "vppcom_epoll_ctl"; + rv = libc_epoll_ctl (epfd, op, fd, event); + goto done; + } - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: epfd %d (0x%x): calling %s(): " - "vep_idx %d (0x%x), op %d, sid %u (0x%x), event %p", - getpid (), epfd, epfd, func_str, vep_idx, vep_idx, - sid, sid, event); + sid = ldp_sid_from_fd (fd); - rv = vppcom_epoll_ctl (vep_idx, op, sid, event); - if (rv != VPPCOM_OK) - { - errno = -rv; - rv = -1; - } + LDBG (0, "LDP<%d>: epfd %d (0x%x), vep_idx %d (0x%x), sid %d (0x%x)", + getpid (), epfd, epfd, vep_idx, vep_idx, sid, sid); + + if (sid != INVALID_SESSION_ID) + { + func_str = "vppcom_epoll_ctl"; + + LDBG (1, "LDP<%d>: epfd %d (0x%x): calling %s(): vep_idx %d (0x%x)," + " op %d, sid %u (0x%x), event %p", getpid (), epfd, epfd, + func_str, vep_idx, vep_idx, sid, sid, event); + + rv = vppcom_epoll_ctl (vep_idx, op, sid, event); + if (rv != VPPCOM_OK) + { + errno = -rv; + rv = -1; } - else + } + else + { + int libc_epfd; + u32 size = sizeof (epfd); + + func_str = "vppcom_session_attr[GET_LIBC_EPFD]"; + libc_epfd = vppcom_session_attr (vep_idx, VPPCOM_ATTR_GET_LIBC_EPFD, 0, + 0); + LDBG (1, "LDP<%d>: epfd %d (0x%x), vep_idx %d (0x%x): %s() " + "returned libc_epfd %d (0x%x)", getpid (), epfd, epfd, + vep_idx, vep_idx, func_str, libc_epfd, libc_epfd); + + if (!libc_epfd) { - int libc_epfd; - u32 size = sizeof (epfd); + func_str = "libc_epoll_create1"; - func_str = "vppcom_session_attr[GET_LIBC_EPFD]"; - libc_epfd = vppcom_session_attr (vep_idx, - VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: epfd %d (0x%x), vep_idx %d (0x%x): " - "%s() returned libc_epfd %d (0x%x)", - getpid (), epfd, epfd, vep_idx, vep_idx, - func_str, libc_epfd, libc_epfd); + LDBG (1, "LDP<%d>: epfd %d (0x%x), vep_idx %d (0x%x): " + "calling %s(): EPOLL_CLOEXEC", getpid (), epfd, epfd, + vep_idx, vep_idx, func_str); - if (!libc_epfd) + libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC); + if (libc_epfd < 0) { - func_str = "libc_epoll_create1"; - - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: epfd %d (0x%x), vep_idx %d (0x%x): " - "calling %s(): EPOLL_CLOEXEC", - getpid (), epfd, epfd, vep_idx, vep_idx, - func_str); + rv = libc_epfd; + goto done; + } - libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC); - if (libc_epfd < 0) - { - rv = libc_epfd; - goto done; - } + func_str = "vppcom_session_attr[SET_LIBC_EPFD]"; + LDBG (1, "LDP<%d>: epfd %d (0x%x): calling %s(): vep_idx %d (0x%x)," + " VPPCOM_ATTR_SET_LIBC_EPFD, libc_epfd %d (0x%x), size %d", + getpid (), epfd, epfd, func_str, vep_idx, vep_idx, libc_epfd, + libc_epfd, size); - func_str = "vppcom_session_attr[SET_LIBC_EPFD]"; - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: epfd %d (0x%x): calling %s(): " - "vep_idx %d (0x%x), VPPCOM_ATTR_SET_LIBC_EPFD, " - "libc_epfd %d (0x%x), size %d", - getpid (), epfd, epfd, func_str, - vep_idx, vep_idx, libc_epfd, libc_epfd, size); - - rv = vppcom_session_attr (vep_idx, VPPCOM_ATTR_SET_LIBC_EPFD, - &libc_epfd, &size); - if (rv < 0) - { - errno = -rv; - rv = -1; - goto done; - } - } - else if (PREDICT_FALSE (libc_epfd < 0)) + rv = vppcom_session_attr (vep_idx, VPPCOM_ATTR_SET_LIBC_EPFD, + &libc_epfd, &size); + if (rv < 0) { - errno = -epfd; + errno = -rv; rv = -1; goto done; } - - func_str = "libc_epoll_ctl"; - - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: epfd %d (0x%x): calling %s(): " - "libc_epfd %d (0x%x), op %d, " - "fd %d (0x%x), event %p", - getpid (), epfd, epfd, func_str, - libc_epfd, libc_epfd, op, fd, fd, event); - - rv = libc_epoll_ctl (libc_epfd, op, fd, event); } - } - else - { - /* The LDP epoll_create1 always creates VCL epfd's. - * The app should never have a kernel base epoll fd unless it - * was acquired outside of the LD_PRELOAD process context. - * In any case, if we get one, punt it to libc_epoll_ctl. - */ + else if (PREDICT_FALSE (libc_epfd < 0)) + { + errno = -epfd; + rv = -1; + goto done; + } + func_str = "libc_epoll_ctl"; - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: epfd %d (0x%x): calling %s(): " - "op %d, fd %d (0x%x), event %p", - getpid (), epfd, epfd, func_str, op, fd, fd, event); + LDBG (1, "LDP<%d>: epfd %d (0x%x): calling %s(): libc_epfd %d (0x%x), " + "op %d, fd %d (0x%x), event %p", getpid (), epfd, epfd, func_str, + libc_epfd, libc_epfd, op, fd, fd, event); - rv = libc_epoll_ctl (epfd, op, fd, event); + rv = libc_epoll_ctl (libc_epfd, op, fd, event); } done: @@ -3144,15 +3126,13 @@ done: } static inline int -ldp_epoll_pwait (int epfd, struct epoll_event *events, - int maxevents, int timeout, const sigset_t * sigmask) +ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents, + int timeout, const sigset_t * sigmask) { - const char *func_str; - int rv = 0; - double time_to_wait = (double) 0; - double time_out, now = 0; + double time_to_wait = (double) 0, time_out, now = 0; u32 vep_idx = ldp_sid_from_fd (epfd); - int libc_epfd; + int libc_epfd, rv = 0; + const char *func_str; if ((errno = -ldp_init ())) return -1; @@ -3163,6 +3143,9 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, return -1; } + if (epfd == ldp->vcl_mq_epfd) + return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask); + if (PREDICT_FALSE (vep_idx == INVALID_SESSION_ID)) { clib_warning ("LDP<%d>: ERROR: epfd %d (0x%x): bad vep_idx %d (0x%x)!", @@ -3183,24 +3166,19 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, goto done; } - if (LDP_DEBUG > 2) - clib_warning ("LDP<%d>: epfd %d (0x%x): vep_idx %d (0x%x), " - "libc_epfd %d (0x%x), events %p, maxevents %d, " - "timeout %d, sigmask %p: time_to_wait %.02f", - getpid (), epfd, epfd, vep_idx, vep_idx, - libc_epfd, libc_epfd, events, maxevents, timeout, - sigmask, time_to_wait, time_out); + LDBG (2, "LDP<%d>: epfd %d (0x%x): vep_idx %d (0x%x), libc_epfd %d (0x%x), " + "events %p, maxevents %d, timeout %d, sigmask %p: time_to_wait %.02f", + getpid (), epfd, epfd, vep_idx, vep_idx, libc_epfd, libc_epfd, events, + maxevents, timeout, sigmask, time_to_wait, time_out); do { if (!ldp->epoll_wait_vcl) { func_str = "vppcom_epoll_wait"; - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: epfd %d (0x%x): calling %s(): " - "vep_idx %d (0x%x), events %p, maxevents %d", - getpid (), epfd, epfd, func_str, - vep_idx, vep_idx, events, maxevents); + LDBG (3, "LDP<%d>: epfd %d (0x%x): calling %s(): vep_idx %d (0x%x)," + " events %p, maxevents %d", getpid (), epfd, epfd, func_str, + vep_idx, vep_idx, events, maxevents); rv = vppcom_epoll_wait (vep_idx, events, maxevents, 0); if (rv > 0) @@ -3222,12 +3200,10 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, { func_str = "libc_epoll_pwait"; - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: epfd %d (0x%x): calling %s(): " - "libc_epfd %d (0x%x), events %p, " - "maxevents %d, sigmask %p", - getpid (), epfd, epfd, func_str, - libc_epfd, libc_epfd, events, maxevents, sigmask); + LDBG (3, "LDP<%d>: epfd %d (0x%x): calling %s(): libc_epfd %d " + "(0x%x), events %p, maxevents %d, sigmask %p", getpid (), + epfd, epfd, func_str, libc_epfd, libc_epfd, events, + maxevents, sigmask); rv = libc_epoll_pwait (libc_epfd, events, maxevents, 1, sigmask); if (rv != 0) diff --git a/src/vcl/vcl_bapi.c b/src/vcl/vcl_bapi.c index 311df64528c..55c38032f92 100644 --- a/src/vcl/vcl_bapi.c +++ b/src/vcl/vcl_bapi.c @@ -61,15 +61,35 @@ static void vcm->app_state = STATE_APP_ENABLED; } +static int +ssvm_segment_attach (char *name, ssvm_segment_type_t type, int fd) +{ + svm_fifo_segment_create_args_t _a, *a = &_a; + int rv; + + memset (a, 0, sizeof (*a)); + a->segment_name = (char *) name; + a->segment_type = type; + + if (type == SSVM_SEGMENT_MEMFD) + a->memfd_fd = fd; + + if ((rv = svm_fifo_segment_attach (a))) + { + clib_warning ("svm_fifo_segment_attach ('%s') failed", name); + return rv; + } + vec_reset_length (a->new_segment_indices); + return 0; +} + static void vl_api_application_attach_reply_t_handler (vl_api_application_attach_reply_t * mp) { - static svm_fifo_segment_create_args_t _a; - svm_fifo_segment_create_args_t *a = &_a; - int rv; + u32 n_fds = 0; + int *fds = 0; - memset (a, 0, sizeof (*a)); if (mp->retval) { clib_warning ("VCL<%d>: attach failed: %U", getpid (), @@ -77,30 +97,41 @@ vl_api_application_attach_reply_t_handler (vl_api_application_attach_reply_t * return; } - if (mp->segment_name_length == 0) + vcm->app_event_queue = uword_to_pointer (mp->app_event_queue_address, + svm_msg_q_t *); + if (mp->n_fds) { - clib_warning ("VCL<%d>: segment_name_length zero", getpid ()); - return; - } + vec_validate (fds, mp->n_fds); + vl_socket_client_recv_fd_msg (fds, mp->n_fds, 5); - a->segment_name = (char *) mp->segment_name; - a->segment_size = mp->segment_size; + if (mp->fd_flags & SESSION_FD_F_VPP_MQ_SEGMENT) + if (ssvm_segment_attach ("vpp-mq-seg", SSVM_SEGMENT_MEMFD, + fds[n_fds++])) + return; - ASSERT (mp->app_event_queue_address); + if (mp->fd_flags & SESSION_FD_F_MEMFD_SEGMENT) + if (ssvm_segment_attach ((char *) mp->segment_name, + SSVM_SEGMENT_MEMFD, fds[n_fds++])) + return; - /* Attach to the segment vpp created */ - rv = svm_fifo_segment_attach (a); - vec_reset_length (a->new_segment_indices); - if (PREDICT_FALSE (rv)) + if (mp->fd_flags & SESSION_FD_F_MQ_EVENTFD) + { + svm_msg_q_set_consumer_eventfd (vcm->app_event_queue, fds[n_fds]); + if (vcm->mqs_epfd < 0) + clib_unix_warning ("epoll_create() returned"); + vcl_mq_epoll_add_evfd (vcm->app_event_queue); + n_fds++; + } + + vec_free (fds); + } + else { - clib_warning ("VCL<%d>: svm_fifo_segment_attach ('%s') failed", - getpid (), mp->segment_name); - return; + if (ssvm_segment_attach ((char *) mp->segment_name, SSVM_SEGMENT_SHM, + -1)) + return; } - vcm->app_event_queue = - uword_to_pointer (mp->app_event_queue_address, svm_msg_q_t *); - vcm->app_state = STATE_APP_ATTACHED; } @@ -128,18 +159,19 @@ vl_api_disconnect_session_reply_t_handler (vl_api_disconnect_session_reply_t * static void vl_api_map_another_segment_t_handler (vl_api_map_another_segment_t * mp) { - static svm_fifo_segment_create_args_t _a; - svm_fifo_segment_create_args_t *a = &_a; - int rv; + ssvm_segment_type_t seg_type = SSVM_SEGMENT_SHM; + int fd = -1; vcm->mounting_segment = 1; - memset (a, 0, sizeof (*a)); - a->segment_name = (char *) mp->segment_name; - a->segment_size = mp->segment_size; - /* Attach to the segment vpp created */ - rv = svm_fifo_segment_attach (a); - vec_reset_length (a->new_segment_indices); - if (PREDICT_FALSE (rv)) + + if (mp->fd_flags) + { + vl_socket_client_recv_fd_msg (&fd, 1, 5); + seg_type = SSVM_SEGMENT_MEMFD; + } + + if (PREDICT_FALSE (ssvm_segment_attach ((char *) mp->segment_name, + seg_type, fd))) { clib_warning ("VCL<%d>: svm_fifo_segment_attach ('%s') failed", getpid (), mp->segment_name); @@ -163,6 +195,39 @@ vl_api_unmap_segment_t_handler (vl_api_unmap_segment_t * mp) VDBG (1, "Unmapped segment '%s'", mp->segment_name); } +static void + vl_api_app_cut_through_registration_add_t_handler + (vl_api_app_cut_through_registration_add_t * mp) +{ + vcl_cut_through_registration_t *ctr; + u32 mqc_index = ~0; + int *fds = 0; + + if (mp->n_fds) + { + ASSERT (mp->n_fds == 2); + vec_validate (fds, mp->n_fds); + vl_socket_client_recv_fd_msg (fds, mp->n_fds, 5); + } + + ctr = vcl_ct_registration_lock_and_alloc (); + ctr->mq = uword_to_pointer (mp->evt_q_address, svm_msg_q_t *); + ctr->peer_mq = uword_to_pointer (mp->peer_evt_q_address, svm_msg_q_t *); + VDBG (0, "Adding ct registration %u", vcl_ct_registration_index (ctr)); + + if (mp->fd_flags & SESSION_FD_F_MQ_EVENTFD) + { + svm_msg_q_set_consumer_eventfd (ctr->mq, fds[0]); + svm_msg_q_set_producer_eventfd (ctr->peer_mq, fds[1]); + mqc_index = vcl_mq_epoll_add_evfd (ctr->mq); + ctr->epoll_evt_conn_index = mqc_index; + vec_free (fds); + } + vcl_ct_registration_lookup_add (mp->evt_q_address, + vcl_ct_registration_index (ctr)); + vcl_ct_registration_unlock (); +} + static void vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) { @@ -483,19 +548,20 @@ vl_api_accept_session_t_handler (vl_api_accept_session_t * mp) VCL_SESSION_UNLOCK (); } -#define foreach_sock_msg \ -_(SESSION_ENABLE_DISABLE_REPLY, session_enable_disable_reply) \ -_(BIND_SOCK_REPLY, bind_sock_reply) \ -_(UNBIND_SOCK_REPLY, unbind_sock_reply) \ -_(ACCEPT_SESSION, accept_session) \ -_(CONNECT_SESSION_REPLY, connect_session_reply) \ -_(DISCONNECT_SESSION, disconnect_session) \ -_(DISCONNECT_SESSION_REPLY, disconnect_session_reply) \ -_(RESET_SESSION, reset_session) \ -_(APPLICATION_ATTACH_REPLY, application_attach_reply) \ -_(APPLICATION_DETACH_REPLY, application_detach_reply) \ -_(MAP_ANOTHER_SEGMENT, map_another_segment) \ -_(UNMAP_SEGMENT, unmap_segment) +#define foreach_sock_msg \ +_(SESSION_ENABLE_DISABLE_REPLY, session_enable_disable_reply) \ +_(BIND_SOCK_REPLY, bind_sock_reply) \ +_(UNBIND_SOCK_REPLY, unbind_sock_reply) \ +_(ACCEPT_SESSION, accept_session) \ +_(CONNECT_SESSION_REPLY, connect_session_reply) \ +_(DISCONNECT_SESSION, disconnect_session) \ +_(DISCONNECT_SESSION_REPLY, disconnect_session_reply) \ +_(RESET_SESSION, reset_session) \ +_(APPLICATION_ATTACH_REPLY, application_attach_reply) \ +_(APPLICATION_DETACH_REPLY, application_detach_reply) \ +_(MAP_ANOTHER_SEGMENT, map_another_segment) \ +_(UNMAP_SEGMENT, unmap_segment) \ +_(APP_CUT_THROUGH_REGISTRATION_ADD, app_cut_through_registration_add) \ void vppcom_api_hookup (void) @@ -547,7 +613,8 @@ vppcom_app_send_attach (void) (vcm->cfg.app_scope_local ? APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE : 0) | (vcm->cfg.app_scope_global ? APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE : 0) | (app_is_proxy ? APP_OPTIONS_FLAGS_IS_PROXY : 0) | - APP_OPTIONS_FLAGS_USE_MQ_FOR_CTRL_MSGS; + APP_OPTIONS_FLAGS_USE_MQ_FOR_CTRL_MSGS | + (vcm->cfg.use_mq_eventfd ? APP_OPTIONS_FLAGS_EVT_MQ_USE_EVENTFD : 0); bmp->options[APP_OPTIONS_PROXY_TRANSPORT] = (u64) ((vcm->cfg.app_proxy_transport_tcp ? 1 << TRANSPORT_PROTO_TCP : 0) | (vcm->cfg.app_proxy_transport_udp ? 1 << TRANSPORT_PROTO_UDP : 0)); @@ -709,31 +776,50 @@ vppcom_connect_to_vpp (char *app_name) { api_main_t *am = &api_main; vppcom_cfg_t *vcl_cfg = &vcm->cfg; - int rv = VPPCOM_OK; - - if (!vcl_cfg->vpp_api_filename) - vcl_cfg->vpp_api_filename = format (0, "/vpe-api%c", 0); - VDBG (0, "VCL<%d>: app (%s) connecting to VPP api (%s)...", - getpid (), app_name, vcl_cfg->vpp_api_filename); - - if (vl_client_connect_to_vlib ((char *) vcl_cfg->vpp_api_filename, app_name, - vcm->cfg.vpp_api_q_length) < 0) + if (vcl_cfg->vpp_api_socket_name) { - clib_warning ("VCL<%d>: app (%s) connect failed!", getpid (), app_name); - rv = VPPCOM_ECONNREFUSED; + if (vl_socket_client_connect ((char *) vcl_cfg->vpp_api_socket_name, + app_name, 0 /* default rx/tx buffer */ )) + { + clib_warning ("VCL<%d>: app (%s) socket connect failed!", + getpid (), app_name); + return VPPCOM_ECONNREFUSED; + } + + if (vl_socket_client_init_shm (0)) + { + clib_warning ("VCL<%d>: app (%s) init shm failed!", + getpid (), app_name); + return VPPCOM_ECONNREFUSED; + } } else { - vcm->vl_input_queue = am->shmem_hdr->vl_input_queue; - vcm->my_client_index = (u32) am->my_client_index; - vcm->app_state = STATE_APP_CONN_VPP; + if (!vcl_cfg->vpp_api_filename) + vcl_cfg->vpp_api_filename = format (0, "/vpe-api%c", 0); + + VDBG (0, "VCL<%d>: app (%s) connecting to VPP api (%s)...", getpid (), + app_name, vcl_cfg->vpp_api_filename); + + if (vl_client_connect_to_vlib ((char *) vcl_cfg->vpp_api_filename, + app_name, vcm->cfg.vpp_api_q_length) < 0) + { + clib_warning ("VCL<%d>: app (%s) connect failed!", getpid (), + app_name); + return VPPCOM_ECONNREFUSED; + } - VDBG (0, "VCL<%d>: app (%s) is connected to VPP!", getpid (), app_name); } + vcm->vl_input_queue = am->shmem_hdr->vl_input_queue; + vcm->my_client_index = (u32) am->my_client_index; + vcm->app_state = STATE_APP_CONN_VPP; + + VDBG (0, "VCL<%d>: app (%s) is connected to VPP!", getpid (), app_name); + vcl_evt (VCL_EVT_INIT, vcm); - return rv; + return VPPCOM_OK; } /* diff --git a/src/vcl/vcl_cfg.c b/src/vcl/vcl_cfg.c index ba3e53e2253..f7bddefbb3b 100644 --- a/src/vcl/vcl_cfg.c +++ b/src/vcl/vcl_cfg.c @@ -49,9 +49,11 @@ vppcom_cfg_init (vppcom_cfg_t * vcl_cfg) vcl_cfg->event_log_path = "/dev/shm"; } -#define VCL_CFG_DBG(_lvl, _fmt, _args...) \ +#define VCFG_DBG(_lvl, _fmt, _args...) \ +{ \ if (vcm->debug > _lvl) \ - fprintf (stderr, _fmt, ##_args) + fprintf (stderr, _fmt "\n", ##_args); \ +} void vppcom_cfg_heapsize (char *conf_fname) { @@ -71,17 +73,16 @@ vppcom_cfg_heapsize (char *conf_fname) fp = fopen (conf_fname, "r"); if (fp == NULL) { - VCL_CFG_DBG (0, "VCL<%d>: using default heapsize %lu (0x%lx)", - getpid (), vcl_cfg->heapsize, vcl_cfg->heapsize); + VCFG_DBG (0, "VCL<%d>: using default heapsize %lu (0x%lx)", + getpid (), vcl_cfg->heapsize, vcl_cfg->heapsize); goto defaulted; } argv = calloc (1, sizeof (char *)); if (argv == NULL) { - VCL_CFG_DBG (0, "VCL<%d>: calloc failed, using default heapsize %lu" - " (0x%lx)", getpid (), vcl_cfg->heapsize, - vcl_cfg->heapsize); + VCFG_DBG (0, "VCL<%d>: calloc failed, using default heapsize %lu" + " (0x%lx)", getpid (), vcl_cfg->heapsize, vcl_cfg->heapsize); goto defaulted; } @@ -98,18 +99,18 @@ vppcom_cfg_heapsize (char *conf_fname) char **tmp = realloc (argv, argc * sizeof (char *)); if (tmp == NULL) { - VCL_CFG_DBG (0, "VCL<%d>: realloc failed, using default " - "heapsize %lu (0x%lx)", getpid (), - vcl_cfg->heapsize, vcl_cfg->heapsize); + VCFG_DBG (0, "VCL<%d>: realloc failed, using default " + "heapsize %lu (0x%lx)", getpid (), + vcl_cfg->heapsize, vcl_cfg->heapsize); goto defaulted; } argv = tmp; arg = strndup (p, 1024); if (arg == NULL) { - VCL_CFG_DBG (0, "VCL<%d>: strndup failed, using default " - "heapsize %ld (0x%lx)", getpid (), - vcl_cfg->heapsize, vcl_cfg->heapsize); + VCFG_DBG (0, "VCL<%d>: strndup failed, using default " + "heapsize %ld (0x%lx)", getpid (), + vcl_cfg->heapsize, vcl_cfg->heapsize); goto defaulted; } argv[argc - 1] = arg; @@ -123,9 +124,8 @@ vppcom_cfg_heapsize (char *conf_fname) char **tmp = realloc (argv, (argc + 1) * sizeof (char *)); if (tmp == NULL) { - VCL_CFG_DBG (0, "VCL<%d>: realloc failed, using default heapsize %ld " - "(0x%lx)", getpid (), vcl_cfg->heapsize, - vcl_cfg->heapsize); + VCFG_DBG (0, "VCL<%d>: realloc failed, using default heapsize %ld " + "(0x%lx)", getpid (), vcl_cfg->heapsize, vcl_cfg->heapsize); goto defaulted; } argv = tmp; @@ -151,9 +151,9 @@ vppcom_cfg_heapsize (char *conf_fname) } if (size == 0) { - VCL_CFG_DBG (0, "VCL<%d>: parse error '%s %s', using default " - "heapsize %ld (0x%lx)", getpid (), argv[i], - argv[i + 1], vcl_cfg->heapsize, vcl_cfg->heapsize); + VCFG_DBG (0, "VCL<%d>: parse error '%s %s', using default " + "heapsize %ld (0x%lx)", getpid (), argv[i], + argv[i + 1], vcl_cfg->heapsize, vcl_cfg->heapsize); goto defaulted; } @@ -163,9 +163,9 @@ vppcom_cfg_heapsize (char *conf_fname) vcl_cfg->heapsize = size << 20; else { - VCL_CFG_DBG (0, "VCL<%d>: parse error '%s %s', using default " - "heapsize %ld (0x%lx)", getpid (), argv[i], - argv[i + 1], vcl_cfg->heapsize, vcl_cfg->heapsize); + VCFG_DBG (0, "VCL<%d>: parse error '%s %s', using default " + "heapsize %ld (0x%lx)", getpid (), argv[i], + argv[i + 1], vcl_cfg->heapsize, vcl_cfg->heapsize); goto defaulted; } } @@ -181,10 +181,10 @@ defaulted: MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (vcl_mem == MAP_FAILED) { - VCL_CFG_DBG (0, "VCL<%d>: ERROR: mmap(0, %ld == 0x%lx, " - "PROT_READ | PROT_WRITE,MAP_SHARED | MAP_ANONYMOUS, " - "-1, 0) failed!", getpid (), vcl_cfg->heapsize, - vcl_cfg->heapsize); + VCFG_DBG (0, "VCL<%d>: ERROR: mmap(0, %ld == 0x%lx, " + "PROT_READ | PROT_WRITE,MAP_SHARED | MAP_ANONYMOUS, " + "-1, 0) failed!", getpid (), vcl_cfg->heapsize, + vcl_cfg->heapsize); ASSERT (vcl_mem != MAP_FAILED); return; } @@ -206,8 +206,8 @@ defaulted: clib_memcpy (vcl_mem, &_vppcom_main, sizeof (_vppcom_main)); vcm = vcl_mem; - VDBG (0, "VCL<%d>: allocated VCL heap = %p, size %lld (0x%llx)", getpid (), - heap, vcl_cfg->heapsize, vcl_cfg->heapsize); + VCFG_DBG (0, "VCL<%d>: allocated VCL heap = %p, size %ld (0x%lx)", + getpid (), heap, vcl_cfg->heapsize, vcl_cfg->heapsize); } void @@ -217,30 +217,29 @@ vppcom_cfg_read_file (char *conf_fname) int fd; unformat_input_t _input, *input = &_input; unformat_input_t _line_input, *line_input = &_line_input; - u8 vc_cfg_input = 0; - u8 *chroot_path; + u8 vc_cfg_input = 0, *chroot_path; struct stat s; u32 uid, gid, q_len; fd = open (conf_fname, O_RDONLY); if (fd < 0) { - VDBG (0, "VCL<%d>: using default configuration.", getpid (), - conf_fname); + VCFG_DBG (0, "VCL<%d>: using default configuration.", getpid ()); goto file_done; } if (fstat (fd, &s) < 0) { - VDBG (0, "VCL<%d>: failed to stat `%s', using default configuration", - getpid (), conf_fname); + VCFG_DBG (0, + "VCL<%d>: failed to stat `%s', using default configuration", + getpid (), conf_fname); goto file_done; } if (!(S_ISREG (s.st_mode) || S_ISLNK (s.st_mode))) { - VDBG (0, "VCL<%d>: not a regular file `%s', using default " - "configuration", getpid (), conf_fname); + VCFG_DBG (0, "VCL<%d>: not a regular file `%s', using default " + "configuration", getpid (), conf_fname); goto file_done; } @@ -262,9 +261,10 @@ vppcom_cfg_read_file (char *conf_fname) if (unformat (line_input, "heapsize %s", &chroot_path)) { vec_terminate_c_string (chroot_path); - VDBG (0, "VCL<%d>: configured heapsize %s, actual heapsize %lld" - " (0x%llx)", getpid (), chroot_path, vcl_cfg->heapsize, - vcl_cfg->heapsize); + VCFG_DBG (0, + "VCL<%d>: configured heapsize %s, actual heapsize %ld" + " (0x%lx)", getpid (), chroot_path, vcl_cfg->heapsize, + vcl_cfg->heapsize); vec_free (chroot_path); } else if (unformat (line_input, "api-prefix %s", &chroot_path)) @@ -276,176 +276,191 @@ vppcom_cfg_read_file (char *conf_fname) chroot_path, 0); vl_set_memory_root_path ((char *) chroot_path); - VDBG (0, "VCL<%d>: configured api-prefix (%s) and api filename" - " (%s)", getpid (), chroot_path, - vcl_cfg->vpp_api_filename); + VCFG_DBG (0, + "VCL<%d>: configured api-prefix (%s) and api filename" + " (%s)", getpid (), chroot_path, + vcl_cfg->vpp_api_filename); chroot_path = 0; /* Don't vec_free() it! */ } + else if (unformat (line_input, "api-socket-name %s", + &vcl_cfg->vpp_api_socket_name)) + { + vec_terminate_c_string (vcl_cfg->vpp_api_socket_name); + VCFG_DBG (0, "VCL<%d>: configured api-socket-name (%s)", + getpid (), vcl_cfg->vpp_api_socket_name); + } else if (unformat (line_input, "vpp-api-q-length %d", &q_len)) { if (q_len < vcl_cfg->vpp_api_q_length) { - clib_warning ("VCL<%d>: ERROR: configured vpp-api-q-length " - "(%u) is too small! Using default: %u ", - getpid (), q_len, vcl_cfg->vpp_api_q_length); + fprintf (stderr, + "VCL<%d>: ERROR: configured vpp-api-q-length " + "(%u) is too small! Using default: %u ", getpid (), + q_len, vcl_cfg->vpp_api_q_length); } else { vcl_cfg->vpp_api_q_length = q_len; - VDBG (0, "VCL<%d>: configured vpp-api-q-length %u", - getpid (), vcl_cfg->vpp_api_q_length); + VCFG_DBG (0, "VCL<%d>: configured vpp-api-q-length %u", + getpid (), vcl_cfg->vpp_api_q_length); } } else if (unformat (line_input, "uid %d", &uid)) { vl_set_memory_uid (uid); - VDBG (0, "VCL<%d>: configured uid %d", getpid (), uid); + VCFG_DBG (0, "VCL<%d>: configured uid %d", getpid (), uid); } else if (unformat (line_input, "gid %d", &gid)) { vl_set_memory_gid (gid); - VDBG (0, "VCL<%d>: configured gid %d", getpid (), gid); + VCFG_DBG (0, "VCL<%d>: configured gid %d", getpid (), gid); } - else if (unformat (line_input, "segment-baseva 0x%lx", + else if (unformat (line_input, "segment-baseva 0x%x", &vcl_cfg->segment_baseva)) { - VDBG (0, "VCL<%d>: configured segment_baseva 0x%lx", getpid (), - vcl_cfg->segment_baseva); + VCFG_DBG (0, "VCL<%d>: configured segment_baseva 0x%lx", + getpid (), vcl_cfg->segment_baseva); } - else if (unformat (line_input, "segment-size 0x%lx", + else if (unformat (line_input, "segment-size 0x%x", &vcl_cfg->segment_size)) { - VDBG (0, "VCL<%d>: configured segment_size 0x%lx (%ld)", - getpid (), vcl_cfg->segment_size, vcl_cfg->segment_size); + VCFG_DBG (0, "VCL<%d>: configured segment_size 0x%x (%d)", + getpid (), vcl_cfg->segment_size, + vcl_cfg->segment_size); } - else if (unformat (line_input, "segment-size %ld", + else if (unformat (line_input, "segment-size %d", &vcl_cfg->segment_size)) { - VDBG (0, "VCL<%d>: configured segment_size %ld (0x%lx)", - getpid (), vcl_cfg->segment_size, vcl_cfg->segment_size); + VCFG_DBG (0, "VCL<%d>: configured segment_size %d (0x%x)", + getpid (), vcl_cfg->segment_size, + vcl_cfg->segment_size); } - else if (unformat (line_input, "add-segment-size 0x%lx", + else if (unformat (line_input, "add-segment-size 0x%x", &vcl_cfg->add_segment_size)) { - VDBG (0, "VCL<%d>: configured add_segment_size 0x%lx (%ld)", - getpid (), vcl_cfg->add_segment_size, - vcl_cfg->add_segment_size); + VCFG_DBG (0, "VCL<%d>: configured add_segment_size 0x%x (%d)", + getpid (), vcl_cfg->add_segment_size, + vcl_cfg->add_segment_size); } - else if (unformat (line_input, "add-segment-size %ld", + else if (unformat (line_input, "add-segment-size %d", &vcl_cfg->add_segment_size)) { - VDBG (0, "VCL<%d>: configured add_segment_size %ld (0x%lx)", - getpid (), vcl_cfg->add_segment_size, - vcl_cfg->add_segment_size); + VCFG_DBG (0, "VCL<%d>: configured add_segment_size %d (0x%x)", + getpid (), vcl_cfg->add_segment_size, + vcl_cfg->add_segment_size); } else if (unformat (line_input, "preallocated-fifo-pairs %d", &vcl_cfg->preallocated_fifo_pairs)) { - VDBG (0, "VCL<%d>: configured preallocated_fifo_pairs %d " - "(0x%x)", getpid (), vcl_cfg->preallocated_fifo_pairs, - vcl_cfg->preallocated_fifo_pairs); + VCFG_DBG (0, "VCL<%d>: configured preallocated_fifo_pairs %d " + "(0x%x)", getpid (), vcl_cfg->preallocated_fifo_pairs, + vcl_cfg->preallocated_fifo_pairs); } else if (unformat (line_input, "rx-fifo-size 0x%lx", &vcl_cfg->rx_fifo_size)) { - VDBG (0, "VCL<%d>: configured rx_fifo_size 0x%lx (%ld)", - getpid (), vcl_cfg->rx_fifo_size, vcl_cfg->rx_fifo_size); + VCFG_DBG (0, "VCL<%d>: configured rx_fifo_size 0x%x (%d)", + getpid (), vcl_cfg->rx_fifo_size, + vcl_cfg->rx_fifo_size); } - else if (unformat (line_input, "rx-fifo-size %ld", + else if (unformat (line_input, "rx-fifo-size %d", &vcl_cfg->rx_fifo_size)) { - VDBG (0, "VCL<%d>: configured rx_fifo_size %ld (0x%lx)", - getpid (), vcl_cfg->rx_fifo_size, vcl_cfg->rx_fifo_size); + VCFG_DBG (0, "VCL<%d>: configured rx_fifo_size %d (0x%x)", + getpid (), vcl_cfg->rx_fifo_size, + vcl_cfg->rx_fifo_size); } else if (unformat (line_input, "tx-fifo-size 0x%lx", &vcl_cfg->tx_fifo_size)) { - VDBG (0, "VCL<%d>: configured tx_fifo_size 0x%lx (%ld)", - getpid (), vcl_cfg->tx_fifo_size, vcl_cfg->tx_fifo_size); + VCFG_DBG (0, "VCL<%d>: configured tx_fifo_size 0x%x (%d)", + getpid (), vcl_cfg->tx_fifo_size, + vcl_cfg->tx_fifo_size); } else if (unformat (line_input, "tx-fifo-size %ld", &vcl_cfg->tx_fifo_size)) { - VDBG (0, "VCL<%d>: configured tx_fifo_size %ld (0x%lx)", - getpid (), vcl_cfg->tx_fifo_size, vcl_cfg->tx_fifo_size); + VCFG_DBG (0, "VCL<%d>: configured tx_fifo_size %d (0x%x)", + getpid (), vcl_cfg->tx_fifo_size, + vcl_cfg->tx_fifo_size); } else if (unformat (line_input, "event-queue-size 0x%lx", &vcl_cfg->event_queue_size)) { - VDBG (0, "VCL<%d>: configured event_queue_size 0x%lx (%ld)", - getpid (), vcl_cfg->event_queue_size, - vcl_cfg->event_queue_size); + VCFG_DBG (0, "VCL<%d>: configured event_queue_size 0x%x (%d)", + getpid (), vcl_cfg->event_queue_size, + vcl_cfg->event_queue_size); } else if (unformat (line_input, "event-queue-size %ld", &vcl_cfg->event_queue_size)) { - VDBG (0, "VCL<%d>: configured event_queue_size %ld (0x%lx)", - getpid (), vcl_cfg->event_queue_size, - vcl_cfg->event_queue_size); + VCFG_DBG (0, "VCL<%d>: configured event_queue_size %d (0x%x)", + getpid (), vcl_cfg->event_queue_size, + vcl_cfg->event_queue_size); } else if (unformat (line_input, "listen-queue-size 0x%lx", &vcl_cfg->listen_queue_size)) { - VDBG (0, "VCL<%d>: configured listen_queue_size 0x%lx (%ld)", - getpid (), vcl_cfg->listen_queue_size, - vcl_cfg->listen_queue_size); + VCFG_DBG (0, "VCL<%d>: configured listen_queue_size 0x%x (%u)", + getpid (), vcl_cfg->listen_queue_size, + vcl_cfg->listen_queue_size); } else if (unformat (line_input, "listen-queue-size %ld", &vcl_cfg->listen_queue_size)) { - VDBG (0, "VCL<%d>: configured listen_queue_size %ld (0x%lx)", - getpid (), vcl_cfg->listen_queue_size, - vcl_cfg->listen_queue_size); + VCFG_DBG (0, "VCL<%d>: configured listen_queue_size %u (0x%x)", + getpid (), vcl_cfg->listen_queue_size, + vcl_cfg->listen_queue_size); } else if (unformat (line_input, "app-timeout %f", &vcl_cfg->app_timeout)) { - VDBG (0, "VCL<%d>: configured app_timeout %f", - getpid (), vcl_cfg->app_timeout); + VCFG_DBG (0, "VCL<%d>: configured app_timeout %f", + getpid (), vcl_cfg->app_timeout); } else if (unformat (line_input, "session-timeout %f", &vcl_cfg->session_timeout)) { - VDBG (0, "VCL<%d>: configured session_timeout %f", - getpid (), vcl_cfg->session_timeout); + VCFG_DBG (0, "VCL<%d>: configured session_timeout %f", + getpid (), vcl_cfg->session_timeout); } else if (unformat (line_input, "accept-timeout %f", &vcl_cfg->accept_timeout)) { - VDBG (0, "VCL<%d>: configured accept_timeout %f", - getpid (), vcl_cfg->accept_timeout); + VCFG_DBG (0, "VCL<%d>: configured accept_timeout %f", + getpid (), vcl_cfg->accept_timeout); } else if (unformat (line_input, "app-proxy-transport-tcp")) { vcl_cfg->app_proxy_transport_tcp = 1; - VDBG (0, "VCL<%d>: configured app_proxy_transport_tcp (%d)", - getpid (), vcl_cfg->app_proxy_transport_tcp); + VCFG_DBG (0, "VCL<%d>: configured app_proxy_transport_tcp (%d)", + getpid (), vcl_cfg->app_proxy_transport_tcp); } else if (unformat (line_input, "app-proxy-transport-udp")) { vcl_cfg->app_proxy_transport_udp = 1; - VDBG (0, "VCL<%d>: configured app_proxy_transport_udp (%d)", - getpid (), vcl_cfg->app_proxy_transport_udp); + VCFG_DBG (0, "VCL<%d>: configured app_proxy_transport_udp (%d)", + getpid (), vcl_cfg->app_proxy_transport_udp); } else if (unformat (line_input, "app-scope-local")) { vcl_cfg->app_scope_local = 1; - VDBG (0, "VCL<%d>: configured app_scope_local (%d)", - getpid (), vcl_cfg->app_scope_local); + VCFG_DBG (0, "VCL<%d>: configured app_scope_local (%d)", + getpid (), vcl_cfg->app_scope_local); } else if (unformat (line_input, "app-scope-global")) { vcl_cfg->app_scope_global = 1; - VDBG (0, "VCL<%d>: configured app_scope_global (%d)", - getpid (), vcl_cfg->app_scope_global); + VCFG_DBG (0, "VCL<%d>: configured app_scope_global (%d)", + getpid (), vcl_cfg->app_scope_global); } else if (unformat (line_input, "namespace-secret %lu", &vcl_cfg->namespace_secret)) { - VDBG (0, "VCL<%d>: configured namespace_secret %lu (0x%lx)", - getpid (), vcl_cfg->namespace_secret, - vcl_cfg->namespace_secret); + VCFG_DBG (0, "VCL<%d>: configured namespace_secret %lu (0x%lx)", + getpid (), vcl_cfg->namespace_secret, + vcl_cfg->namespace_secret); } else if (unformat (line_input, "namespace-id %v", &vcl_cfg->namespace_id)) @@ -455,19 +470,25 @@ vppcom_cfg_read_file (char *conf_fname) if (nsid_vec_len > max_nsid_vec_len) { _vec_len (vcl_cfg->namespace_id) = max_nsid_vec_len; - VDBG (0, "VCL<%d>: configured namespace_id is too long," - " truncated to %d characters!", - getpid (), max_nsid_vec_len); + VCFG_DBG (0, "VCL<%d>: configured namespace_id is too long," + " truncated to %d characters!", + getpid (), max_nsid_vec_len); } - VDBG (0, "VCL<%d>: configured namespace_id %v", - getpid (), vcl_cfg->namespace_id); + VCFG_DBG (0, "VCL<%d>: configured namespace_id %s", + getpid (), (char *) vcl_cfg->namespace_id); + } + else if (unformat (line_input, "use-mq-eventfd")) + { + vcl_cfg->use_mq_eventfd = 1; + VCFG_DBG (0, "VCL<%d>: configured with mq with eventfd", + getpid ()); } else if (unformat (line_input, "}")) { vc_cfg_input = 0; - VDBG (0, "VCL<%d>: completed parsing vppcom config!", - getpid ()); + VCFG_DBG (0, "VCL<%d>: completed parsing vppcom config!", + getpid ()); goto input_done; } else @@ -502,15 +523,15 @@ vppcom_cfg (vppcom_cfg_t * vcl_cfg) u32 tmp; if (sscanf (env_var_str, "%u", &tmp) != 1) { - VCL_CFG_DBG (0, "VCL<%d>: WARNING: Invalid debug level specified " - "in the environment variable " VPPCOM_ENV_DEBUG - " (%s)!\n", getpid (), env_var_str); + VCFG_DBG (0, "VCL<%d>: WARNING: Invalid debug level specified " + "in the environment variable " VPPCOM_ENV_DEBUG + " (%s)!\n", getpid (), env_var_str); } else { vcm->debug = tmp; - VCL_CFG_DBG (0, "VCL<%d>: configured VCL debug level (%u) from " - VPPCOM_ENV_DEBUG "!", getpid (), vcm->debug); + VCFG_DBG (0, "VCL<%d>: configured VCL debug level (%u) from " + VPPCOM_ENV_DEBUG "!", getpid (), vcm->debug); } } conf_fname = getenv (VPPCOM_ENV_CONF); @@ -527,9 +548,9 @@ vppcom_cfg (vppcom_cfg_t * vcl_cfg) vcl_cfg->vpp_api_filename = format (0, "/%s-vpe-api%c", env_var_str, 0); vl_set_memory_root_path ((char *) env_var_str); - VDBG (0, "VCL<%d>: configured api prefix (%s) and filename (%s) " - "from " VPPCOM_ENV_API_PREFIX "!", - getpid (), env_var_str, vcl_cfg->vpp_api_filename); + VCFG_DBG (0, "VCL<%d>: configured api prefix (%s) and filename (%s) " + "from " VPPCOM_ENV_API_PREFIX "!", getpid (), env_var_str, + vcl_cfg->vpp_api_filename); } env_var_str = getenv (VPPCOM_ENV_APP_NAMESPACE_ID); if (env_var_str) @@ -540,54 +561,63 @@ vppcom_cfg (vppcom_cfg_t * vcl_cfg) vec_validate (vcm->cfg.namespace_id, ns_id_vec_len - 1); clib_memcpy (vcm->cfg.namespace_id, env_var_str, ns_id_vec_len); - VDBG (0, "VCL<%d>: configured namespace_id (%v) from " - VPPCOM_ENV_APP_NAMESPACE_ID "!", getpid (), - vcm->cfg.namespace_id); + VCFG_DBG (0, "VCL<%d>: configured namespace_id (%s) from " + VPPCOM_ENV_APP_NAMESPACE_ID "!", getpid (), + (char *) vcm->cfg.namespace_id); } env_var_str = getenv (VPPCOM_ENV_APP_NAMESPACE_SECRET); if (env_var_str) { u64 tmp; if (sscanf (env_var_str, "%lu", &tmp) != 1) - clib_warning ("VCL<%d>: WARNING: Invalid namespace secret " - "specified in the environment variable " - VPPCOM_ENV_APP_NAMESPACE_SECRET - " (%s)!\n", getpid (), env_var_str); + { + VCFG_DBG (0, "VCL<%d>: WARNING: Invalid namespace secret specified" + " in the environment variable " + VPPCOM_ENV_APP_NAMESPACE_SECRET " (%s)!\n", getpid (), + env_var_str); + } else { vcm->cfg.namespace_secret = tmp; - VDBG (0, "VCL<%d>: configured namespace secret (%lu) from " - VPPCOM_ENV_APP_NAMESPACE_SECRET "!", getpid (), - vcm->cfg.namespace_secret); + VCFG_DBG (0, "VCL<%d>: configured namespace secret (%lu) from " + VPPCOM_ENV_APP_NAMESPACE_SECRET "!", getpid (), + vcm->cfg.namespace_secret); } } if (getenv (VPPCOM_ENV_APP_PROXY_TRANSPORT_TCP)) { vcm->cfg.app_proxy_transport_tcp = 1; - VDBG (0, "VCL<%d>: configured app_proxy_transport_tcp (%u) from " - VPPCOM_ENV_APP_PROXY_TRANSPORT_TCP "!", getpid (), - vcm->cfg.app_proxy_transport_tcp); + VCFG_DBG (0, "VCL<%d>: configured app_proxy_transport_tcp (%u) from " + VPPCOM_ENV_APP_PROXY_TRANSPORT_TCP "!", getpid (), + vcm->cfg.app_proxy_transport_tcp); } if (getenv (VPPCOM_ENV_APP_PROXY_TRANSPORT_UDP)) { vcm->cfg.app_proxy_transport_udp = 1; - VDBG (0, "VCL<%d>: configured app_proxy_transport_udp (%u) from " - VPPCOM_ENV_APP_PROXY_TRANSPORT_UDP "!", getpid (), - vcm->cfg.app_proxy_transport_udp); + VCFG_DBG (0, "VCL<%d>: configured app_proxy_transport_udp (%u) from " + VPPCOM_ENV_APP_PROXY_TRANSPORT_UDP "!", getpid (), + vcm->cfg.app_proxy_transport_udp); } if (getenv (VPPCOM_ENV_APP_SCOPE_LOCAL)) { vcm->cfg.app_scope_local = 1; - VDBG (0, "VCL<%d>: configured app_scope_local (%u) from " - VPPCOM_ENV_APP_SCOPE_LOCAL "!", getpid (), - vcm->cfg.app_scope_local); + VCFG_DBG (0, "VCL<%d>: configured app_scope_local (%u) from " + VPPCOM_ENV_APP_SCOPE_LOCAL "!", getpid (), + vcm->cfg.app_scope_local); } if (getenv (VPPCOM_ENV_APP_SCOPE_GLOBAL)) { vcm->cfg.app_scope_global = 1; - VDBG (0, "VCL<%d>: configured app_scope_global (%u) from " - VPPCOM_ENV_APP_SCOPE_GLOBAL "!", getpid (), - vcm->cfg.app_scope_global); + VCFG_DBG (0, "VCL<%d>: configured app_scope_global (%u) from " + VPPCOM_ENV_APP_SCOPE_GLOBAL "!", getpid (), + vcm->cfg.app_scope_global); + } + env_var_str = getenv (VPPCOM_ENV_VPP_API_SOCKET); + if (env_var_str) + { + vcm->cfg.vpp_api_socket_name = format (0, "%s%c", env_var_str, 0); + VCFG_DBG (0, "VCL<%d>: configured api-socket-name (%s)", getpid (), + vcl_cfg->vpp_api_socket_name); } } diff --git a/src/vcl/vcl_private.c b/src/vcl/vcl_private.c new file mode 100644 index 00000000000..48abbafe646 --- /dev/null +++ b/src/vcl/vcl_private.c @@ -0,0 +1,151 @@ +/* + * Copyright (c) 2018 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +vcl_cut_through_registration_t * +vcl_ct_registration_lock_and_alloc (void) +{ + vcl_cut_through_registration_t *cr; + pool_get (vcm->cut_through_registrations, cr); + clib_spinlock_lock (&vcm->ct_registration_lock); + memset (cr, 0, sizeof (*cr)); + cr->epoll_evt_conn_index = -1; + return cr; +} + +u32 +vcl_ct_registration_index (vcl_cut_through_registration_t * ctr) +{ + return (ctr - vcm->cut_through_registrations); +} + +void +vcl_ct_registration_unlock (void) +{ + clib_spinlock_unlock (&vcm->ct_registration_lock); +} + +vcl_cut_through_registration_t * +vcl_ct_registration_get (u32 ctr_index) +{ + if (pool_is_free_index (vcm->cut_through_registrations, ctr_index)) + return 0; + return pool_elt_at_index (vcm->cut_through_registrations, ctr_index); +} + +vcl_cut_through_registration_t * +vcl_ct_registration_lock_and_lookup (uword mq_addr) +{ + uword *p; + clib_spinlock_lock (&vcm->ct_registration_lock); + p = hash_get (vcm->ct_registration_by_mq, mq_addr); + if (!p) + return 0; + return vcl_ct_registration_get (p[0]); +} + +void +vcl_ct_registration_lookup_add (uword mq_addr, u32 ctr_index) +{ + hash_set (vcm->ct_registration_by_mq, mq_addr, ctr_index); +} + +void +vcl_ct_registration_lookup_del (uword mq_addr) +{ + hash_unset (vcm->ct_registration_by_mq, mq_addr); +} + +void +vcl_ct_registration_del (vcl_cut_through_registration_t * ctr) +{ + pool_put (vcm->cut_through_registrations, ctr); +} + +vcl_mq_evt_conn_t * +vcl_mq_evt_conn_alloc (void) +{ + vcl_mq_evt_conn_t *mqc; + pool_get (vcm->mq_evt_conns, mqc); + memset (mqc, 0, sizeof (*mqc)); + return mqc; +} + +u32 +vcl_mq_evt_conn_index (vcl_mq_evt_conn_t * mqc) +{ + return (mqc - vcm->mq_evt_conns); +} + +vcl_mq_evt_conn_t * +vcl_mq_evt_conn_get (u32 mq_conn_idx) +{ + return pool_elt_at_index (vcm->mq_evt_conns, mq_conn_idx); +} + +int +vcl_mq_epoll_add_evfd (svm_msg_q_t * mq) +{ + struct epoll_event e = { 0 }; + vcl_mq_evt_conn_t *mqc; + u32 mqc_index; + int mq_fd; + + mq_fd = svm_msg_q_get_consumer_eventfd (mq); + + if (vcm->mqs_epfd < 0 || mq_fd == -1) + return -1; + + mqc = vcl_mq_evt_conn_alloc (); + mqc_index = vcl_mq_evt_conn_index (mqc); + mqc->mq_fd = mq_fd; + mqc->mq = mq; + + e.events = EPOLLIN; + e.data.u32 = mqc_index; + if (epoll_ctl (vcm->mqs_epfd, EPOLL_CTL_ADD, mq_fd, &e) < 0) + { + clib_warning ("failed to add mq eventfd to mq epoll fd"); + return -1; + } + + return mqc_index; +} + +int +vcl_mq_epoll_del_evfd (u32 mqc_index) +{ + vcl_mq_evt_conn_t *mqc; + + if (vcm->mqs_epfd || mqc_index == ~0) + return -1; + + mqc = vcl_mq_evt_conn_get (mqc_index); + if (epoll_ctl (vcm->mqs_epfd, EPOLL_CTL_DEL, mqc->mq_fd, 0) < 0) + { + clib_warning ("failed to del mq eventfd to mq epoll fd"); + return -1; + } + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vcl/vcl_private.h b/src/vcl/vcl_private.h index af58cdc3ac9..0506355b83c 100644 --- a/src/vcl/vcl_private.h +++ b/src/vcl/vcl_private.h @@ -141,7 +141,6 @@ typedef struct vppcom_epoll_t vep; int libc_epfd; svm_msg_q_t *our_evt_q; - u32 ct_registration; u64 options[16]; vce_event_handler_reg_t *poll_reg; vcl_session_msg_t *accept_evts_fifo; @@ -168,12 +167,14 @@ typedef struct vppcom_cfg_t_ u8 app_scope_global; u8 *namespace_id; u64 namespace_secret; + u8 use_mq_eventfd; f64 app_timeout; f64 session_timeout; f64 accept_timeout; u32 event_ring_size; char *event_log_path; u8 *vpp_api_filename; + u8 *vpp_api_socket_name; } vppcom_cfg_t; void vppcom_cfg (vppcom_cfg_t * vcl_cfg); @@ -181,9 +182,18 @@ void vppcom_cfg (vppcom_cfg_t * vcl_cfg); typedef struct vcl_cut_through_registration_ { svm_msg_q_t *mq; + svm_msg_q_t *peer_mq; u32 sid; + u32 epoll_evt_conn_index; /*< mq evt connection index part of + the mqs evtfd epoll (if used) */ } vcl_cut_through_registration_t; +typedef struct vcl_mq_evt_conn_ +{ + svm_msg_q_t *mq; + int mq_fd; +} vcl_mq_evt_conn_t; + typedef struct vppcom_main_t_ { u8 init; @@ -203,6 +213,15 @@ typedef struct vppcom_main_t_ clib_spinlock_t sessions_lockp; vcl_session_t *sessions; + /** Message queues epoll fd. Initialized only if using mqs with eventfds */ + int mqs_epfd; + + /** Pool of event message queue event connections */ + vcl_mq_evt_conn_t *mq_evt_conns; + + /** Per worker buffer for receiving mq epoll events */ + struct epoll_event *mq_events; + /* Hash table for disconnect processing */ uword *session_index_by_vpp_handles; @@ -214,6 +233,8 @@ typedef struct vppcom_main_t_ /* Our event queue */ svm_msg_q_t *app_event_queue; + svm_msg_q_t **vpp_event_queues; + /* unique segment name counter */ u32 unique_segment_index; @@ -237,6 +258,14 @@ typedef struct vppcom_main_t_ /** Pool of cut through registrations */ vcl_cut_through_registration_t *cut_through_registrations; + /** Lock for accessing ct registration pool */ + clib_spinlock_t ct_registration_lock; + + /** Cut-through registration by mq address hash table */ + uword *ct_registration_by_mq; + + svm_msg_q_msg_t *mq_msg_vector; + /** Flag indicating that a new segment is being mounted */ volatile u32 mounting_segment; @@ -284,6 +313,21 @@ do { \ #define VCL_INVALID_SESSION_INDEX ((u32)~0) +static inline vcl_session_t * +vcl_session_alloc (void) +{ + vcl_session_t *s; + pool_get (vcm->sessions, s); + memset (s, 0, sizeof (*s)); + return s; +} + +static inline void +vcl_session_free (vcl_session_t * s) +{ + pool_put (vcm->sessions, s); +} + static inline vcl_session_t * vcl_session_get (u32 session_index) { @@ -374,6 +418,23 @@ vppcom_session_table_lookup_listener (u64 listener_handle) const char *vppcom_session_state_str (session_state_t state); +/* + * Helpers + */ +vcl_cut_through_registration_t *vcl_ct_registration_lock_and_alloc (void); +void vcl_ct_registration_del (vcl_cut_through_registration_t * ctr); +u32 vcl_ct_registration_index (vcl_cut_through_registration_t * ctr); +void vcl_ct_registration_unlock (void); +vcl_cut_through_registration_t *vcl_ct_registration_get (u32 ctr_index); +vcl_cut_through_registration_t *vcl_ct_registration_lock_and_lookup (uword); +void vcl_ct_registration_lookup_add (uword mq_addr, u32 ctr_index); +void vcl_ct_registration_lookup_del (uword mq_addr); +vcl_mq_evt_conn_t *vcl_mq_evt_conn_alloc (void); +u32 vcl_mq_evt_conn_index (vcl_mq_evt_conn_t * mqc); +vcl_mq_evt_conn_t *vcl_mq_evt_conn_get (u32 mq_conn_idx); +int vcl_mq_epoll_add_evfd (svm_msg_q_t * mq); +int vcl_mq_epoll_del_evfd (u32 mqc_index); + /* * VCL Binary API */ diff --git a/src/vcl/vcl_test_server.c b/src/vcl/vcl_test_server.c index b49383eb73a..7b56d497153 100644 --- a/src/vcl/vcl_test_server.c +++ b/src/vcl/vcl_test_server.c @@ -629,7 +629,6 @@ main (int argc, char **argv) printf ("SERVER (fd %d): TX (%d bytes) - '%s'\n", conn->fd, tx_bytes, conn->buf); } - else // Extraneous read data from non-echo tests??? { xtra++; diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c index a74e55af634..8572b69aa13 100644 --- a/src/vcl/vppcom.c +++ b/src/vcl/vppcom.c @@ -248,47 +248,6 @@ vppcom_wait_for_app_state_change (app_state_t app_state) return VPPCOM_ETIMEDOUT; } -static u32 -vcl_ct_registration_add (svm_msg_q_t * mq, u32 sid) -{ - vcl_cut_through_registration_t *cr; - pool_get (vcm->cut_through_registrations, cr); - cr->mq = mq; - cr->sid = sid; - return (cr - vcm->cut_through_registrations); -} - -static void -vcl_ct_registration_del (u32 ct_index) -{ - pool_put_index (vcm->cut_through_registrations, ct_index); -} - -static vcl_session_t * -vcl_ct_session_get_from_fifo (svm_fifo_t * f, u8 type) -{ - vcl_session_t *s; - s = vcl_session_get (f->client_session_index); - if (s) - { - /* rx fifo */ - if (type == 0 && s->rx_fifo == f) - return s; - /* tx fifo */ - if (type == 1 && s->tx_fifo == f) - return s; - } - s = vcl_session_get (f->master_session_index); - if (s) - { - if (type == 0 && s->rx_fifo == f) - return s; - if (type == 1 && s->tx_fifo == f) - return s; - } - return 0; -} - static void vcl_send_session_accepted_reply (svm_msg_q_t * mq, u32 context, session_handle_t handle, int retval) @@ -303,15 +262,34 @@ vcl_send_session_accepted_reply (svm_msg_q_t * mq, u32 context, app_send_ctrl_evt_to_vpp (mq, app_evt); } +static void +vcl_send_session_disconnected_reply (svm_msg_q_t * mq, u32 context, + session_handle_t handle, int retval) +{ + app_session_evt_t _app_evt, *app_evt = &_app_evt; + session_disconnected_reply_msg_t *rmp; + app_alloc_ctrl_evt_to_vpp (mq, app_evt, + SESSION_CTRL_EVT_DISCONNECTED_REPLY); + rmp = (session_disconnected_reply_msg_t *) app_evt->evt->data; + rmp->handle = handle; + rmp->context = context; + rmp->retval = retval; + app_send_ctrl_evt_to_vpp (mq, app_evt); +} + static u32 vcl_session_accepted_handler (session_accepted_msg_t * mp) { vcl_session_t *session, *listen_session; svm_fifo_t *rx_fifo, *tx_fifo; - u32 session_index; + u32 session_index, vpp_wrk_index; + svm_msg_q_t *evt_q; VCL_SESSION_LOCK (); + session = vcl_session_alloc (); + session_index = vcl_session_index (session); + listen_session = vppcom_session_table_lookup_listener (mp->listener_handle); if (!listen_session) { @@ -322,13 +300,11 @@ vcl_session_accepted_handler (session_accepted_msg_t * mp) getpid (), mp->listener_handle); vcl_send_session_accepted_reply (evt_q, mp->context, mp->handle, VNET_API_ERROR_INVALID_ARGUMENT); + vcl_session_free (session); + VCL_SESSION_UNLOCK (); return VCL_INVALID_SESSION_INDEX; } - pool_get (vcm->sessions, session); - memset (session, 0, sizeof (*session)); - session_index = (u32) (session - vcm->sessions); - rx_fifo = uword_to_pointer (mp->server_rx_fifo, svm_fifo_t *); tx_fifo = uword_to_pointer (mp->server_tx_fifo, svm_fifo_t *); @@ -339,10 +315,11 @@ vcl_session_accepted_handler (session_accepted_msg_t * mp) session->our_evt_q = uword_to_pointer (mp->server_event_queue_address, svm_msg_q_t *); vcl_wait_for_memory (session->vpp_evt_q); - session->ct_registration = vcl_ct_registration_add (session->our_evt_q, - session_index); rx_fifo->master_session_index = session_index; tx_fifo->master_session_index = session_index; + vec_validate (vcm->vpp_event_queues, 0); + evt_q = uword_to_pointer (mp->vpp_event_queue_address, svm_msg_q_t *); + vcm->vpp_event_queues[0] = evt_q; } else { @@ -350,6 +327,10 @@ vcl_session_accepted_handler (session_accepted_msg_t * mp) svm_msg_q_t *); rx_fifo->client_session_index = session_index; tx_fifo->client_session_index = session_index; + + vpp_wrk_index = tx_fifo->master_thread_index; + vec_validate (vcm->vpp_event_queues, vpp_wrk_index); + vcm->vpp_event_queues[vpp_wrk_index] = session->vpp_evt_q; } session->vpp_handle = mp->handle; @@ -382,9 +363,10 @@ vcl_session_accepted_handler (session_accepted_msg_t * mp) static u32 vcl_session_connected_handler (session_connected_msg_t * mp) { - vcl_session_t *session = 0; - u32 session_index; + u32 session_index, vpp_wrk_index; svm_fifo_t *rx_fifo, *tx_fifo; + vcl_session_t *session = 0; + svm_msg_q_t *evt_q; int rv = VPPCOM_OK; session_index = mp->context; @@ -425,12 +407,19 @@ done: svm_msg_q_t *); session->our_evt_q = uword_to_pointer (mp->client_event_queue_address, svm_msg_q_t *); - session->ct_registration = vcl_ct_registration_add (session->our_evt_q, - session_index); + + vec_validate (vcm->vpp_event_queues, 0); + evt_q = uword_to_pointer (mp->vpp_event_queue_address, svm_msg_q_t *); + vcm->vpp_event_queues[0] = evt_q; } else - session->vpp_evt_q = uword_to_pointer (mp->vpp_event_queue_address, - svm_msg_q_t *); + { + session->vpp_evt_q = uword_to_pointer (mp->vpp_event_queue_address, + svm_msg_q_t *); + vpp_wrk_index = tx_fifo->master_thread_index; + vec_validate (vcm->vpp_event_queues, vpp_wrk_index); + vcm->vpp_event_queues[vpp_wrk_index] = session->vpp_evt_q; + } session->rx_fifo = rx_fifo; session->tx_fifo = tx_fifo; @@ -609,13 +598,23 @@ done: return rv; } +static svm_msg_q_t * +vcl_session_vpp_evt_q (vcl_session_t * s) +{ + if (vcl_session_is_ct (s)) + return vcm->vpp_event_queues[0]; + else + return vcm->vpp_event_queues[s->tx_fifo->master_thread_index]; +} + static int vppcom_session_disconnect (u32 session_index) { - int rv; + svm_msg_q_t *vpp_evt_q; vcl_session_t *session; - u64 vpp_handle; session_state_t state; + u64 vpp_handle; + int rv; VCL_SESSION_LOCK_AND_GET (session_index, &session); @@ -637,8 +636,9 @@ vppcom_session_disconnect (u32 session_index) if (state & STATE_CLOSE_ON_EMPTY) { - vppcom_send_disconnect_session_reply (vpp_handle, session_index, - 0 /* rv */ ); + vpp_evt_q = vcl_session_vpp_evt_q (session); + vcl_send_session_disconnected_reply (vpp_evt_q, vcm->my_client_index, + vpp_handle, 0); VDBG (1, "VCL<%d>: vpp handle 0x%llx, sid %u: sending disconnect " "REPLY...", getpid (), vpp_handle, session_index); } @@ -666,6 +666,11 @@ vppcom_app_create (char *app_name) { vcm->init = 1; vppcom_cfg (&vcm->cfg); + vcl_cfg = &vcm->cfg; + + vcm->mqs_epfd = -1; + if (vcl_cfg->use_mq_eventfd) + vcm->mqs_epfd = epoll_create (1); clib_spinlock_init (&vcm->session_fifo_lockp); clib_fifo_validate (vcm->client_session_index_fifo, @@ -676,11 +681,16 @@ vppcom_app_create (char *app_name) vcm->main_cpu = os_get_thread_index (); vcm->session_index_by_vpp_handles = hash_create (0, sizeof (uword)); + vcm->ct_registration_by_mq = hash_create (0, sizeof (uword)); + clib_spinlock_init (&vcm->ct_registration_lock); clib_time_init (&vcm->clib_time); vppcom_init_error_string_table (); svm_fifo_segment_main_init (vcl_cfg->segment_baseva, 20 /* timeout in secs */ ); + vec_validate (vcm->mq_events, 64); + vec_validate (vcm->mq_msg_vector, 128); + vec_reset_length (vcm->mq_msg_vector); } if (vcm->my_client_index == ~0) @@ -868,8 +878,21 @@ vppcom_session_close (uint32_t session_index) } VCL_SESSION_LOCK_AND_GET (session_index, &session); - if (session->our_evt_q) - vcl_ct_registration_del (session->ct_registration); + if (vcl_session_is_ct (session)) + { + vcl_cut_through_registration_t *ctr; + uword mq_addr; + + mq_addr = pointer_to_uword (session->our_evt_q); + ctr = vcl_ct_registration_lock_and_lookup (mq_addr); + ASSERT (ctr); + if (ctr->epoll_evt_conn_index != ~0) + vcl_mq_epoll_del_evfd (ctr->epoll_evt_conn_index); + VDBG (0, "Removing ct registration %u", + vcl_ct_registration_index (ctr)); + vcl_ct_registration_del (ctr); + vcl_ct_registration_unlock (); + } vpp_handle = session->vpp_handle; if (vpp_handle != ~0) @@ -1084,6 +1107,7 @@ vppcom_session_accept (uint32_t listen_session_index, vppcom_endpt_t * ep, handle: client_session_index = vcl_session_accepted_handler (&accepted_msg); + listen_session = vcl_session_get (listen_session_index); VCL_SESSION_LOCK_AND_GET (client_session_index, &client_session); rv = client_session_index; @@ -1115,6 +1139,7 @@ handle: svm_msg_q_t *); else vpp_evt_q = client_session->vpp_evt_q; + vcl_send_session_accepted_reply (vpp_evt_q, client_session->client_context, client_session->vpp_handle, 0); @@ -1287,23 +1312,25 @@ vppcom_session_read_internal (uint32_t session_index, void *buf, int n, VCL_SESSION_UNLOCK (); mq = vcl_session_is_ct (s) ? s->our_evt_q : vcm->app_event_queue; + svm_fifo_unset_event (rx_fifo); is_full = svm_fifo_is_full (rx_fifo); if (svm_fifo_is_empty (rx_fifo)) { - svm_fifo_unset_event (rx_fifo); if (is_nonblocking) { rv = VPPCOM_OK; goto done; } - svm_msg_q_lock (mq); while (1) { + svm_msg_q_lock (mq); if (svm_msg_q_is_empty (mq)) svm_msg_q_wait (mq); + svm_msg_q_sub_w_lock (mq, &msg); e = svm_msg_q_msg_data (mq, &msg); + svm_msg_q_unlock (mq); if (!vcl_is_rx_evt_for_session (e, session_index, s->our_evt_q != 0)) { @@ -1311,25 +1338,29 @@ vppcom_session_read_internal (uint32_t session_index, void *buf, int n, svm_msg_q_free_msg (mq, &msg); continue; } + svm_fifo_unset_event (rx_fifo); if (svm_fifo_is_empty (rx_fifo)) { svm_msg_q_free_msg (mq, &msg); continue; } svm_msg_q_free_msg (mq, &msg); - svm_msg_q_unlock (mq); break; } } if (s->is_dgram) - n_read = app_recv_dgram_raw (rx_fifo, buf, n, &s->transport, 1, peek); + n_read = app_recv_dgram_raw (rx_fifo, buf, n, &s->transport, 0, peek); else - n_read = app_recv_stream_raw (rx_fifo, buf, n, 1, peek); + n_read = app_recv_stream_raw (rx_fifo, buf, n, 0, peek); if (vcl_session_is_ct (s) && is_full) - app_send_io_evt_to_vpp (s->vpp_evt_q, rx_fifo, SESSION_IO_EVT_CT_RX, - SVM_Q_WAIT); + { + /* If the peer is not polling send notification */ + if (!svm_fifo_has_event (s->rx_fifo)) + app_send_io_evt_to_vpp (s->vpp_evt_q, s->rx_fifo, + SESSION_IO_EVT_CT_RX, SVM_Q_WAIT); + } if (VPPCOM_DEBUG > 2) { @@ -1452,18 +1483,20 @@ vppcom_session_write (uint32_t session_index, void *buf, size_t n) rv = VPPCOM_EWOULDBLOCK; goto done; } - svm_msg_q_lock (mq); while (1) { + svm_msg_q_lock (mq); if (!svm_fifo_is_full (tx_fifo)) { svm_msg_q_unlock (mq); break; } - if (svm_msg_q_is_empty (mq) && svm_msg_q_timedwait (mq, 10e-6)) - continue; + while (svm_msg_q_is_empty (mq) && svm_msg_q_timedwait (mq, 10e-6)) + ; svm_msg_q_sub_w_lock (mq, &msg); e = svm_msg_q_msg_data (mq, &msg); + svm_msg_q_unlock (mq); + if (!vcl_is_tx_evt_for_session (e, session_index, s->our_evt_q != 0)) { @@ -1477,7 +1510,6 @@ vppcom_session_write (uint32_t session_index, void *buf, size_t n) continue; } svm_msg_q_free_msg (mq, &msg); - svm_msg_q_unlock (mq); break; } } @@ -1510,6 +1542,31 @@ done: return rv; } +static vcl_session_t * +vcl_ct_session_get_from_fifo (svm_fifo_t * f, u8 type) +{ + vcl_session_t *s; + s = vcl_session_get (f->client_session_index); + if (s) + { + /* rx fifo */ + if (type == 0 && s->rx_fifo == f) + return s; + /* tx fifo */ + if (type == 1 && s->tx_fifo == f) + return s; + } + s = vcl_session_get (f->master_session_index); + if (s) + { + if (type == 0 && s->rx_fifo == f) + return s; + if (type == 1 && s->tx_fifo == f) + return s; + } + return 0; +} + static inline int vppcom_session_write_ready (vcl_session_t * session, u32 session_index) { @@ -1552,6 +1609,22 @@ vppcom_session_write_ready (vcl_session_t * session, u32 session_index) return svm_fifo_max_enqueue (session->tx_fifo); } +static inline int +vcl_mq_dequeue_batch (svm_msg_q_t * mq) +{ + svm_msg_q_msg_t *msg; + u32 n_msgs; + int i; + + n_msgs = svm_msg_q_size (mq); + for (i = 0; i < n_msgs; i++) + { + vec_add2 (vcm->mq_msg_vector, msg, 1); + svm_msg_q_sub_w_lock (mq, msg); + } + return n_msgs; +} + static int vcl_select_handle_mq (svm_msg_q_t * mq, unsigned long n_bits, unsigned long *read_map, unsigned long *write_map, @@ -1559,12 +1632,13 @@ vcl_select_handle_mq (svm_msg_q_t * mq, unsigned long n_bits, u32 * bits_set) { session_disconnected_msg_t *disconnected_msg; + session_connected_msg_t *connected_msg; session_accepted_msg_t *accepted_msg; vcl_session_msg_t *vcl_msg; vcl_session_t *session; - svm_msg_q_msg_t msg; + svm_msg_q_msg_t *msg; session_event_t *e; - u32 n_msgs, i, sid; + u32 i, sid; u64 handle; svm_msg_q_lock (mq); @@ -1594,23 +1668,20 @@ vcl_select_handle_mq (svm_msg_q_t * mq, unsigned long n_bits, } } } + vcl_mq_dequeue_batch (mq); svm_msg_q_unlock (mq); - n_msgs = svm_msg_q_size (mq); - for (i = 0; i < n_msgs; i++) + for (i = 0; i < vec_len (vcm->mq_msg_vector); i++) { - if (svm_msg_q_sub (mq, &msg, SVM_Q_WAIT, 0)) - { - clib_warning ("message queue returned"); - continue; - } - e = svm_msg_q_msg_data (mq, &msg); + msg = vec_elt_at_index (vcm->mq_msg_vector, i); + e = svm_msg_q_msg_data (mq, msg); switch (e->event_type) { case FIFO_EVENT_APP_RX: sid = e->fifo->client_session_index; session = vcl_session_get (sid); - if (!session || svm_fifo_is_empty (session->rx_fifo)) + svm_fifo_unset_event (session->rx_fifo); + if (svm_fifo_is_empty (session->rx_fifo)) break; if (sid < n_bits && read_map) { @@ -1632,7 +1703,8 @@ vcl_select_handle_mq (svm_msg_q_t * mq, unsigned long n_bits, case SESSION_IO_EVT_CT_TX: session = vcl_ct_session_get_from_fifo (e->fifo, 0); sid = vcl_session_index (session); - if (!session || svm_fifo_is_empty (session->rx_fifo)) + svm_fifo_unset_event (session->rx_fifo); + if (svm_fifo_is_empty (session->rx_fifo)) break; if (sid < n_bits && read_map) { @@ -1672,6 +1744,10 @@ vcl_select_handle_mq (svm_msg_q_t * mq, unsigned long n_bits, *bits_set += 1; } break; + case SESSION_CTRL_EVT_CONNECTED: + connected_msg = (session_connected_msg_t *) e->data; + vcl_session_connected_handler (connected_msg); + break; case SESSION_CTRL_EVT_DISCONNECTED: disconnected_msg = (session_disconnected_msg_t *) e->data; sid = vcl_session_get_index_from_handle (disconnected_msg->handle); @@ -1685,20 +1761,73 @@ vcl_select_handle_mq (svm_msg_q_t * mq, unsigned long n_bits, clib_warning ("unhandled: %u", e->event_type); break; } - svm_msg_q_free_msg (mq, &msg); + svm_msg_q_free_msg (mq, msg); } + vec_reset_length (vcm->mq_msg_vector); return *bits_set; } +static int +vppcom_select_condvar (unsigned long n_bits, unsigned long *read_map, + unsigned long *write_map, unsigned long *except_map, + double time_to_wait, u32 * bits_set) +{ + double total_wait = 0, wait_slice; + vcl_cut_through_registration_t *cr; + + time_to_wait = (time_to_wait == -1) ? 10e9 : time_to_wait; + wait_slice = vcm->cut_through_registrations ? 10e-6 : time_to_wait; + do + { + /* *INDENT-OFF* */ + pool_foreach (cr, vcm->cut_through_registrations, ({ + vcl_select_handle_mq (cr->mq, n_bits, read_map, write_map, except_map, + 0, bits_set); + })); + /* *INDENT-ON* */ + + vcl_select_handle_mq (vcm->app_event_queue, n_bits, read_map, write_map, + except_map, time_to_wait, bits_set); + total_wait += wait_slice; + if (*bits_set) + return *bits_set; + } + while (total_wait < time_to_wait); + + return 0; +} + +static int +vppcom_select_eventfd (unsigned long n_bits, unsigned long *read_map, + unsigned long *write_map, unsigned long *except_map, + double time_to_wait, u32 * bits_set) +{ + vcl_mq_evt_conn_t *mqc; + int __clib_unused n_read; + int n_mq_evts, i; + u64 buf; + + vec_validate (vcm->mq_events, pool_elts (vcm->mq_evt_conns)); + n_mq_evts = epoll_wait (vcm->mqs_epfd, vcm->mq_events, + vec_len (vcm->mq_events), time_to_wait); + for (i = 0; i < n_mq_evts; i++) + { + mqc = vcl_mq_evt_conn_get (vcm->mq_events[i].data.u32); + n_read = read (mqc->mq_fd, &buf, sizeof (buf)); + vcl_select_handle_mq (mqc->mq, n_bits, read_map, write_map, + except_map, 0, bits_set); + } + + return (n_mq_evts > 0 ? (int) *bits_set : 0); +} + int vppcom_select (unsigned long n_bits, unsigned long *read_map, unsigned long *write_map, unsigned long *except_map, double time_to_wait) { u32 sid, minbits = clib_max (n_bits, BITS (uword)), bits_set = 0; - vcl_cut_through_registration_t *cr; - double total_wait = 0, wait_slice; vcl_session_t *session = 0; int rv; @@ -1736,17 +1865,14 @@ vppcom_select (unsigned long n_bits, unsigned long *read_map, /* *INDENT-OFF* */ clib_bitmap_foreach (sid, vcm->wr_bitmap, ({ - VCL_SESSION_LOCK(); if (!(session = vcl_session_get (sid))) { - VCL_SESSION_UNLOCK(); VDBG (0, "VCL<%d>: session %d specified in write_map is closed.", getpid (), sid); return VPPCOM_EBADFD; } rv = svm_fifo_is_full (session->tx_fifo); - VCL_SESSION_UNLOCK(); if (!rv) { clib_bitmap_set_no_check (write_map, sid, 1); @@ -1757,18 +1883,16 @@ vppcom_select (unsigned long n_bits, unsigned long *read_map, check_rd: if (!read_map) goto check_mq; + clib_bitmap_foreach (sid, vcm->rd_bitmap, ({ - VCL_SESSION_LOCK(); if (!(session = vcl_session_get (sid))) { - VCL_SESSION_UNLOCK(); VDBG (0, "VCL<%d>: session %d specified in write_map is closed.", getpid (), sid); return VPPCOM_EBADFD; } rv = vppcom_session_read_ready (session); - VCL_SESSION_UNLOCK(); if (rv) { clib_bitmap_set_no_check (read_map, sid, 1); @@ -1778,23 +1902,13 @@ check_rd: /* *INDENT-ON* */ check_mq: - wait_slice = vcm->cut_through_registrations ? 10e-6 : time_to_wait; - do - { - /* *INDENT-OFF* */ - pool_foreach (cr, vcm->cut_through_registrations, ({ - vcl_select_handle_mq (cr->mq, n_bits, read_map, write_map, except_map, - 0, &bits_set); - })); - /* *INDENT-ON* */ - vcl_select_handle_mq (vcm->app_event_queue, n_bits, read_map, write_map, - except_map, time_to_wait, &bits_set); - total_wait += wait_slice; - if (bits_set) - return bits_set; - } - while (total_wait < time_to_wait); + if (vcm->cfg.use_mq_eventfd) + vppcom_select_eventfd (n_bits, read_map, write_map, except_map, + time_to_wait, &bits_set); + else + vppcom_select_condvar (n_bits, read_map, write_map, except_map, + time_to_wait, &bits_set); return (bits_set); } @@ -2130,11 +2244,11 @@ vcl_epoll_wait_handle_mq (svm_msg_q_t * mq, struct epoll_event *events, session_disconnected_msg_t *disconnected_msg; session_connected_msg_t *connected_msg; session_accepted_msg_t *accepted_msg; - u32 sid = ~0, session_events, n_msgs; u64 session_evt_data = ~0, handle; + u32 sid = ~0, session_events; vcl_session_msg_t *vcl_msg; vcl_session_t *session; - svm_msg_q_msg_t msg; + svm_msg_q_msg_t *msg; session_event_t *e; u8 add_event; int i; @@ -2153,61 +2267,58 @@ vcl_epoll_wait_handle_mq (svm_msg_q_t * mq, struct epoll_event *events, } else { - if (svm_msg_q_timedwait (mq, wait_for_time / 1e3)) + if (svm_msg_q_timedwait (mq, wait_for_time / 1e3) < 0) { svm_msg_q_unlock (mq); return 0; } } } + vcl_mq_dequeue_batch (mq); svm_msg_q_unlock (mq); - n_msgs = svm_msg_q_size (mq); - for (i = 0; i < n_msgs; i++) + for (i = 0; i < vec_len (vcm->mq_msg_vector); i++) { - if (svm_msg_q_sub (mq, &msg, SVM_Q_WAIT, 0)) - { - clib_warning ("message queue returned"); - continue; - } - e = svm_msg_q_msg_data (mq, &msg); + msg = vec_elt_at_index (vcm->mq_msg_vector, i); + e = svm_msg_q_msg_data (mq, msg); add_event = 0; switch (e->event_type) { case FIFO_EVENT_APP_RX: sid = e->fifo->client_session_index; - clib_spinlock_lock (&vcm->sessions_lockp); session = vcl_session_get (sid); session_events = session->vep.ev.events; - if ((EPOLLIN & session->vep.ev.events) - && !svm_fifo_is_empty (session->rx_fifo)) + if (!(EPOLLIN & session->vep.ev.events)) + break; + svm_fifo_unset_event (session->rx_fifo); + if (!svm_fifo_is_empty (session->rx_fifo)) { add_event = 1; events[*num_ev].events |= EPOLLIN; session_evt_data = session->vep.ev.data.u64; } - clib_spinlock_unlock (&vcm->sessions_lockp); break; case FIFO_EVENT_APP_TX: sid = e->fifo->client_session_index; - clib_spinlock_lock (&vcm->sessions_lockp); session = vcl_session_get (sid); session_events = session->vep.ev.events; - if ((EPOLLOUT & session_events) - && !svm_fifo_is_full (session->tx_fifo)) + if (!(EPOLLOUT & session_events)) + break; + if (!svm_fifo_is_full (session->tx_fifo)) { add_event = 1; events[*num_ev].events |= EPOLLOUT; session_evt_data = session->vep.ev.data.u64; } - clib_spinlock_unlock (&vcm->sessions_lockp); break; case SESSION_IO_EVT_CT_TX: session = vcl_ct_session_get_from_fifo (e->fifo, 0); sid = vcl_session_index (session); session_events = session->vep.ev.events; - if ((EPOLLIN & session->vep.ev.events) - && !svm_fifo_is_empty (session->rx_fifo)) + if (!(EPOLLIN & session->vep.ev.events)) + break; + svm_fifo_unset_event (session->rx_fifo); + if (!svm_fifo_is_empty (session->rx_fifo)) { add_event = 1; events[*num_ev].events |= EPOLLIN; @@ -2218,8 +2329,9 @@ vcl_epoll_wait_handle_mq (svm_msg_q_t * mq, struct epoll_event *events, session = vcl_ct_session_get_from_fifo (e->fifo, 1); sid = vcl_session_index (session); session_events = session->vep.ev.events; - if ((EPOLLOUT & session_events) - && !svm_fifo_is_full (session->tx_fifo)) + if (!(EPOLLOUT & session_events)) + break; + if (!svm_fifo_is_full (session->tx_fifo)) { add_event = 1; events[*num_ev].events |= EPOLLOUT; @@ -2276,11 +2388,10 @@ vcl_epoll_wait_handle_mq (svm_msg_q_t * mq, struct epoll_event *events, break; default: clib_warning ("unhandled: %u", e->event_type); - svm_msg_q_free_msg (mq, &msg); + svm_msg_q_free_msg (mq, msg); continue; } - - svm_msg_q_free_msg (mq, &msg); + svm_msg_q_free_msg (mq, msg); if (add_event) { @@ -2297,17 +2408,70 @@ vcl_epoll_wait_handle_mq (svm_msg_q_t * mq, struct epoll_event *events, break; } } + + vec_reset_length (vcm->mq_msg_vector); return *num_ev; } +static int +vppcom_epoll_wait_condvar (struct epoll_event *events, int maxevents, + double wait_for_time) +{ + vcl_cut_through_registration_t *cr; + double total_wait = 0, wait_slice; + u32 num_ev = 0; + int rv; + + wait_for_time = (wait_for_time == -1) ? (double) 10e9 : wait_for_time; + wait_slice = vcm->cut_through_registrations ? 10e-6 : wait_for_time; + + do + { + /* *INDENT-OFF* */ + pool_foreach (cr, vcm->cut_through_registrations, ({ + vcl_epoll_wait_handle_mq (cr->mq, events, maxevents, 0, &num_ev); + })); + /* *INDENT-ON* */ + + rv = vcl_epoll_wait_handle_mq (vcm->app_event_queue, events, maxevents, + num_ev ? 0 : wait_slice, &num_ev); + if (rv) + total_wait += wait_slice; + if (num_ev) + return num_ev; + } + while (total_wait < wait_for_time); + return (int) num_ev; +} + +static int +vppcom_epoll_wait_eventfd (struct epoll_event *events, int maxevents, + double wait_for_time) +{ + vcl_mq_evt_conn_t *mqc; + int __clib_unused n_read; + int n_mq_evts, i; + u32 n_evts = 0; + u64 buf; + + vec_validate (vcm->mq_events, pool_elts (vcm->mq_evt_conns)); + n_mq_evts = epoll_wait (vcm->mqs_epfd, vcm->mq_events, + vec_len (vcm->mq_events), wait_for_time); + for (i = 0; i < n_mq_evts; i++) + { + mqc = vcl_mq_evt_conn_get (vcm->mq_events[i].data.u32); + n_read = read (mqc->mq_fd, &buf, sizeof (buf)); + vcl_epoll_wait_handle_mq (mqc->mq, events, maxevents, 0, &n_evts); + } + + return (int) n_evts; +} + int vppcom_epoll_wait (uint32_t vep_idx, struct epoll_event *events, int maxevents, double wait_for_time) { - vcl_cut_through_registration_t *cr; vcl_session_t *vep_session; - double total_wait = 0, wait_slice; - u32 num_ev = 0; if (PREDICT_FALSE (maxevents <= 0)) { @@ -2328,25 +2492,11 @@ vppcom_epoll_wait (uint32_t vep_idx, struct epoll_event *events, clib_spinlock_unlock (&vcm->sessions_lockp); memset (events, 0, sizeof (*events) * maxevents); - wait_slice = vcm->cut_through_registrations ? 10e-6 : wait_for_time; - - do - { - /* *INDENT-OFF* */ - pool_foreach (cr, vcm->cut_through_registrations, ({ - vcl_epoll_wait_handle_mq (cr->mq, events, maxevents, 0, &num_ev); - })); - /* *INDENT-ON* */ - vcl_epoll_wait_handle_mq (vcm->app_event_queue, events, maxevents, - num_ev ? 0 : wait_slice, &num_ev); - total_wait += wait_slice; - if (num_ev) - return num_ev; - } - while (total_wait < wait_for_time); + if (vcm->cfg.use_mq_eventfd) + return vppcom_epoll_wait_eventfd (events, maxevents, wait_for_time); - return num_ev; + return vppcom_epoll_wait_condvar (events, maxevents, wait_for_time); } int @@ -2917,12 +3067,15 @@ vppcom_session_recvfrom (uint32_t session_index, void *buffer, return VPPCOM_EAFNOSUPPORT; } - if (session->transport.is_ip4) - clib_memcpy (ep->ip, &session->transport.rmt_ip.ip4, - sizeof (ip4_address_t)); - else - clib_memcpy (ep->ip, &session->transport.rmt_ip.ip6, - sizeof (ip6_address_t)); + if (ep) + { + if (session->transport.is_ip4) + clib_memcpy (ep->ip, &session->transport.rmt_ip.ip4, + sizeof (ip4_address_t)); + else + clib_memcpy (ep->ip, &session->transport.rmt_ip.ip6, + sizeof (ip6_address_t)); + } return rv; } @@ -3054,6 +3207,12 @@ vppcom_poll (vcl_poll_t * vp, uint32_t n_sids, double wait_for_time) return num_ev; } +int +vppcom_mq_epoll_fd (void) +{ + return vcm->mqs_epfd; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vcl/vppcom.h b/src/vcl/vppcom.h index 8d414f6cc77..eff79c3a5ac 100644 --- a/src/vcl/vppcom.h +++ b/src/vcl/vppcom.h @@ -31,17 +31,18 @@ extern "C" /* * VPPCOM Public API Definitions, Enums, and Data Structures */ -#define INVALID_SESSION_ID (~0) -#define VPPCOM_CONF_DEFAULT "/etc/vpp/vcl.conf" -#define VPPCOM_ENV_CONF "VCL_CONFIG" -#define VPPCOM_ENV_DEBUG "VCL_DEBUG" -#define VPPCOM_ENV_API_PREFIX "VCL_API_PREFIX" -#define VPPCOM_ENV_APP_PROXY_TRANSPORT_TCP "VCL_APP_PROXY_TRANSPORT_TCP" -#define VPPCOM_ENV_APP_PROXY_TRANSPORT_UDP "VCL_APP_PROXY_TRANSPORT_UDP" -#define VPPCOM_ENV_APP_NAMESPACE_ID "VCL_APP_NAMESPACE_ID" -#define VPPCOM_ENV_APP_NAMESPACE_SECRET "VCL_APP_NAMESPACE_SECRET" -#define VPPCOM_ENV_APP_SCOPE_LOCAL "VCL_APP_SCOPE_LOCAL" -#define VPPCOM_ENV_APP_SCOPE_GLOBAL "VCL_APP_SCOPE_GLOBAL" +#define INVALID_SESSION_ID (~0) +#define VPPCOM_CONF_DEFAULT "/etc/vpp/vcl.conf" +#define VPPCOM_ENV_CONF "VCL_CONFIG" +#define VPPCOM_ENV_DEBUG "VCL_DEBUG" +#define VPPCOM_ENV_API_PREFIX "VCL_API_PREFIX" +#define VPPCOM_ENV_APP_PROXY_TRANSPORT_TCP "VCL_APP_PROXY_TRANSPORT_TCP" +#define VPPCOM_ENV_APP_PROXY_TRANSPORT_UDP "VCL_APP_PROXY_TRANSPORT_UDP" +#define VPPCOM_ENV_APP_NAMESPACE_ID "VCL_APP_NAMESPACE_ID" +#define VPPCOM_ENV_APP_NAMESPACE_SECRET "VCL_APP_NAMESPACE_SECRET" +#define VPPCOM_ENV_APP_SCOPE_LOCAL "VCL_APP_SCOPE_LOCAL" +#define VPPCOM_ENV_APP_SCOPE_GLOBAL "VCL_APP_SCOPE_GLOBAL" +#define VPPCOM_ENV_VPP_API_SOCKET "VCL_VPP_API_SOCKET" typedef enum { @@ -250,6 +251,7 @@ extern int vppcom_session_sendto (uint32_t session_index, void *buffer, vppcom_endpt_t * ep); extern int vppcom_poll (vcl_poll_t * vp, uint32_t n_sids, double wait_for_time); +extern int vppcom_mq_epoll_fd (void); /* * VPPCOM Event Functions diff --git a/src/vlibmemory/socket_api.c b/src/vlibmemory/socket_api.c index 5587611d65a..51d35792244 100644 --- a/src/vlibmemory/socket_api.c +++ b/src/vlibmemory/socket_api.c @@ -453,7 +453,7 @@ vl_sock_api_send_fd_msg (int socket_fd, int fds[], int n_fds) { struct msghdr mh = { 0 }; struct iovec iov[1]; - char ctl[CMSG_SPACE (sizeof (int)) * n_fds]; + char ctl[CMSG_SPACE (sizeof (int) * n_fds)]; struct cmsghdr *cmsg; char *msg = "fdmsg"; int rv; @@ -470,7 +470,7 @@ vl_sock_api_send_fd_msg (int socket_fd, int fds[], int n_fds) cmsg->cmsg_len = CMSG_LEN (sizeof (int) * n_fds); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; - memcpy (CMSG_DATA (cmsg), fds, sizeof (int) * n_fds); + clib_memcpy (CMSG_DATA (cmsg), fds, sizeof (int) * n_fds); rv = sendmsg (socket_fd, &mh, 0); if (rv < 0) diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c index 757e12e1b58..3e127df3bd2 100644 --- a/src/vnet/session/application.c +++ b/src/vnet/session/application.c @@ -312,6 +312,12 @@ application_init (application_t * app, u32 api_client_index, u8 * app_name, } else { + if (options[APP_OPTIONS_FLAGS] & APP_OPTIONS_FLAGS_EVT_MQ_USE_EVENTFD) + { + clib_warning ("mq eventfds can only be used if socket transport is " + "used for api"); + return VNET_API_ERROR_APP_UNSUPPORTED_CFG; + } seg_type = SSVM_SEGMENT_PRIVATE; } @@ -336,6 +342,8 @@ application_init (application_t * app, u32 api_client_index, u8 * app_name, props->tx_fifo_size = options[APP_OPTIONS_TX_FIFO_SIZE]; if (options[APP_OPTIONS_EVT_QUEUE_SIZE]) props->evt_q_size = options[APP_OPTIONS_EVT_QUEUE_SIZE]; + if (options[APP_OPTIONS_FLAGS] & APP_OPTIONS_FLAGS_EVT_MQ_USE_EVENTFD) + props->use_mq_eventfd = 1; if (options[APP_OPTIONS_TLS_ENGINE]) app->tls_engine = options[APP_OPTIONS_TLS_ENGINE]; props->segment_type = seg_type; @@ -970,6 +978,8 @@ application_free_local_session (application_t * app, local_session_t * s) local_session_t * application_get_local_session (application_t * app, u32 session_index) { + if (pool_is_free_index (app->local_sessions, session_index)) + return 0; return pool_elt_at_index (app->local_sessions, session_index); } @@ -1078,6 +1088,23 @@ application_stop_local_listen (application_t * server, session_handle_t lh) return 0; } +static void +application_local_session_fix_eventds (svm_msg_q_t * sq, svm_msg_q_t * cq) +{ + int fd; + + /* + * segment manager initializes only the producer eventds, since vpp is + * typically the producer. But for local sessions, we also pass to the + * apps the mqs they listen on for events from peer apps, so they are also + * consumer fds. + */ + fd = svm_msg_q_get_producer_eventfd (sq); + svm_msg_q_set_consumer_eventfd (sq, fd); + fd = svm_msg_q_get_producer_eventfd (cq); + svm_msg_q_set_consumer_eventfd (cq, fd); +} + int application_local_session_connect (u32 table_index, application_t * client, application_t * server, @@ -1125,8 +1152,12 @@ application_local_session_connect (u32 table_index, application_t * client, return seg_index; } seg = segment_manager_get_segment_w_lock (sm, seg_index); - sq = segment_manager_alloc_queue (seg, props->evt_q_size); - cq = segment_manager_alloc_queue (seg, cprops->evt_q_size); + sq = segment_manager_alloc_queue (seg, props); + cq = segment_manager_alloc_queue (seg, cprops); + + if (props->use_mq_eventfd) + application_local_session_fix_eventds (sq, cq); + ls->server_evt_q = pointer_to_uword (sq); ls->client_evt_q = pointer_to_uword (cq); rv = segment_manager_try_alloc_fifos (seg, props->rx_fifo_size, @@ -1273,7 +1304,7 @@ application_local_session_disconnect (u32 app_index, local_session_t * ls) if (app_index == ls->client_index) { - send_local_session_disconnect_callback (ls->app_index, ls); + mq_send_local_session_disconnected_cb (ls->app_index, ls); } else { @@ -1292,7 +1323,7 @@ application_local_session_disconnect (u32 app_index, local_session_t * ls) } else { - send_local_session_disconnect_callback (client->index, ls); + mq_send_local_session_disconnected_cb (client->index, ls); } } diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h index f6c81275826..22df21e6140 100644 --- a/src/vnet/session/application.h +++ b/src/vnet/session/application.h @@ -267,8 +267,8 @@ application_local_session_listener_has_transport (local_session_t * ls) return (tp != TRANSPORT_PROTO_NONE); } -void send_local_session_disconnect_callback (u32 app_index, - local_session_t * ls); +void mq_send_local_session_disconnected_cb (u32 app_index, + local_session_t * ls); int application_connect (u32 client_index, u32 api_context, session_endpoint_t * sep); diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c index c8fa37f0b9d..524e7b53663 100644 --- a/src/vnet/session/application_interface.c +++ b/src/vnet/session/application_interface.c @@ -475,7 +475,7 @@ vnet_application_attach (vnet_app_attach_args_t * a) a->session_cb_vft))) return clib_error_return_code (0, rv, 0, "app init: %d", rv); - a->app_event_queue_address = pointer_to_uword (app->event_queue); + a->app_evt_q = app->event_queue; sm = segment_manager_get (app->first_segment_manager); fs = segment_manager_get_segment_w_lock (sm, 0); @@ -569,7 +569,18 @@ vnet_disconnect_session (vnet_disconnect_args_t * a) if (session_handle_is_local (a->handle)) { local_session_t *ls; - ls = application_get_local_session_from_handle (a->handle); + + /* Disconnect reply came to worker 1 not main thread */ + if (vlib_get_thread_index () == 1) + { + vlib_rpc_call_main_thread (vnet_disconnect_session, (u8 *) a, + sizeof (*a)); + return 0; + } + + if (!(ls = application_get_local_session_from_handle (a->handle))) + return 0; + if (ls->app_index != a->app_index && ls->client_index != a->app_index) { clib_warning ("app %u is neither client nor server for session %u", diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h index daba169a297..ba6d2660d7f 100644 --- a/src/vnet/session/application_interface.h +++ b/src/vnet/session/application_interface.h @@ -42,7 +42,7 @@ typedef struct _vnet_app_attach_args_t * Results */ ssvm_private_t *segment; - u64 app_event_queue_address; + svm_msg_q_t *app_evt_q; u32 app_index; } vnet_app_attach_args_t; @@ -138,6 +138,7 @@ typedef enum _(USE_GLOBAL_SCOPE, "App can use global session scope") \ _(USE_LOCAL_SCOPE, "App can use local session scope") \ _(USE_MQ_FOR_CTRL_MSGS, "Use message queue for ctr msgs") \ + _(EVT_MQ_USE_EVENTFD, "Use eventfds for signaling") \ typedef enum _app_options { @@ -153,6 +154,27 @@ typedef enum _app_options_flags #undef _ } app_options_flags_t; +#define foreach_fd_type \ + _(VPP_MQ_SEGMENT, "Fd for vpp's event mq segment") \ + _(MEMFD_SEGMENT, "Fd for memfd segment") \ + _(MQ_EVENTFD, "Event fd used by message queue") \ + _(VPP_MQ_EVENTFD, "Event fd used by vpp's message queue") \ + +typedef enum session_fd_type_ +{ +#define _(sym, str) SESSION_FD_##sym, + foreach_fd_type +#undef _ + SESSION_N_FD_TYPE +} session_fd_type_t; + +typedef enum session_fd_flag_ +{ +#define _(sym, str) SESSION_FD_F_##sym = 1 << SESSION_FD_##sym, + foreach_fd_type +#undef _ +} session_fd_flag_t; + int vnet_bind_uri (vnet_bind_args_t *); int vnet_unbind_uri (vnet_unbind_args_t * a); clib_error_t *vnet_connect_uri (vnet_connect_args_t * a); diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c index c7d2482d6bf..6f149ad9412 100644 --- a/src/vnet/session/segment_manager.c +++ b/src/vnet/session/segment_manager.c @@ -88,7 +88,7 @@ segment_manager_del_segment (segment_manager_t * sm, /** * Removes segment after acquiring writer lock */ -always_inline void +static inline void segment_manager_lock_and_del_segment (segment_manager_t * sm, u32 fs_index) { svm_fifo_segment_private_t *fs; @@ -290,8 +290,7 @@ segment_manager_init (segment_manager_t * sm, u32 first_seg_size, segment = segment_manager_get_segment (sm, seg_index); if (i == 0) - sm->event_queue = segment_manager_alloc_queue (segment, - props->evt_q_size); + sm->event_queue = segment_manager_alloc_queue (segment, props); svm_fifo_segment_preallocate_fifo_pairs (segment, props->rx_fifo_size, @@ -311,8 +310,7 @@ segment_manager_init (segment_manager_t * sm, u32 first_seg_size, return seg_index; } segment = segment_manager_get_segment (sm, seg_index); - sm->event_queue = segment_manager_alloc_queue (segment, - props->evt_q_size); + sm->event_queue = segment_manager_alloc_queue (segment, props); } return 0; @@ -623,7 +621,7 @@ segment_manager_evt_q_expected_size (u32 q_len) */ svm_msg_q_t * segment_manager_alloc_queue (svm_fifo_segment_private_t * segment, - u32 queue_size) + segment_manager_properties_t * props) { u32 fifo_evt_size, session_evt_size = 256, notif_q_size; svm_msg_q_cfg_t _cfg, *cfg = &_cfg; @@ -631,21 +629,27 @@ segment_manager_alloc_queue (svm_fifo_segment_private_t * segment, void *oldheap; fifo_evt_size = sizeof (session_event_t); - notif_q_size = clib_max (16, queue_size >> 4); + notif_q_size = clib_max (16, props->evt_q_size >> 4); /* *INDENT-OFF* */ svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = { - {queue_size, fifo_evt_size, 0}, + {props->evt_q_size, fifo_evt_size, 0}, {notif_q_size, session_evt_size, 0} }; /* *INDENT-ON* */ cfg->consumer_pid = 0; cfg->n_rings = 2; - cfg->q_nitems = queue_size; + cfg->q_nitems = props->evt_q_size; cfg->ring_cfgs = rc; oldheap = ssvm_push_heap (segment->ssvm.sh); q = svm_msg_q_alloc (cfg); ssvm_pop_heap (oldheap); + + if (props->use_mq_eventfd) + { + if (svm_msg_q_alloc_producer_eventfd (q)) + clib_warning ("failed to alloc eventfd"); + } return q; } diff --git a/src/vnet/session/segment_manager.h b/src/vnet/session/segment_manager.h index 73cb4827a8b..aae4f2e4e51 100644 --- a/src/vnet/session/segment_manager.h +++ b/src/vnet/session/segment_manager.h @@ -32,8 +32,10 @@ typedef struct _segment_manager_properties /** Configured additional segment size */ u32 add_segment_size; - /** Flag that indicates if additional segments should be created */ - u8 add_segment; + /** Flags */ + u8 add_segment:1; /**< can add new segments */ + u8 use_mq_eventfd:1; /**< use eventfds for mqs */ + u8 reserved:6; /** Segment type: if set to SSVM_N_TYPES, private segments are used */ ssvm_segment_type_t segment_type; @@ -154,7 +156,8 @@ void segment_manager_dealloc_fifos (u32 segment_index, svm_fifo_t * rx_fifo, svm_fifo_t * tx_fifo); u32 segment_manager_evt_q_expected_size (u32 q_size); svm_msg_q_t *segment_manager_alloc_queue (svm_fifo_segment_private_t * fs, - u32 queue_size); + segment_manager_properties_t * + props); void segment_manager_dealloc_queue (segment_manager_t * sm, svm_queue_t * q); void segment_manager_app_detach (segment_manager_t * sm); diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api index 98748d8fbbb..58054ae4ba2 100644 --- a/src/vnet/session/session.api +++ b/src/vnet/session/session.api @@ -13,7 +13,7 @@ * limitations under the License. */ -option version = "1.0.3"; +option version = "1.1.0"; /** \brief client->vpp, attach application to session layer @param client_index - opaque cookie to identify the sender @@ -38,6 +38,9 @@ option version = "1.0.3"; @param retval - return code for the request @param app_event_queue_address - vpp event queue address or 0 if this connection shouldn't send events + @param n_fds - number of fds exchanged + @param fd_flags - set of flags that indicate which fds are to be expected + over the socket (set only if socket transport available) @param segment_size - size of first shm segment @param segment_name_length - length of segment name @param segment_name - name of segment client needs to attach to @@ -46,6 +49,8 @@ define application_attach_reply { u32 context; i32 retval; u64 app_event_queue_address; + u8 n_fds; + u8 fd_flags; u32 segment_size; u8 segment_name_length; u8 segment_name[128]; @@ -91,11 +96,16 @@ autoreply define application_detach { /** \brief vpp->client, please map an additional shared memory segment @param client_index - opaque cookie to identify the sender @param context - sender context, to match reply w/ request - @param segment_name - + @param fd_flags - set of flags that indicate which, if any, fds are + to be expected over the socket. This is set only if + socket transport available + @param segment_size - size of the segment to be mapped + @param segment_name - name of the segment to be mapped */ autoreply define map_another_segment { u32 client_index; u32 context; + u8 fd_flags; u32 segment_size; u8 segment_name[128]; }; @@ -391,6 +401,26 @@ define connect_session_reply { u16 lcl_port; }; +/** \brief ask app to add a new cut-through registration + @param client_index - opaque cookie to identify the sender + client to vpp direction only + @param context - sender context, to match reply w/ request + @param evt_q_address - address of the mq in ssvm segment + @param peer_evt_q_address - address of peer's mq in ssvm segment + @param n_fds - number of fds exchanged + @param fd_flags - flag indicating the fds that will be exchanged over + api socket +*/ +autoreply define app_cut_through_registration_add +{ + u32 client_index; + u32 context; + u64 evt_q_address; + u64 peer_evt_q_address; + u8 n_fds; + u8 fd_flags; +}; + /** \brief enable/disable session layer @param client_index - opaque cookie to identify the sender client to vpp direction only diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 56f885b2aca..6378fe8e299 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -1245,6 +1245,11 @@ session_vpp_event_queues_allocate (session_manager_main_t * smm) cfg->q_nitems = evt_q_length; cfg->ring_cfgs = rc; smm->vpp_event_queues[i] = svm_msg_q_alloc (cfg); + if (smm->evt_qs_use_memfd_seg) + { + if (svm_msg_q_alloc_consumer_eventfd (smm->vpp_event_queues[i])) + clib_warning ("eventfd returned"); + } } if (smm->evt_qs_use_memfd_seg) diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c index 76c10210a50..d1cbc646e52 100755 --- a/src/vnet/session/session_api.c +++ b/src/vnet/session/session_api.c @@ -60,16 +60,15 @@ _(APPLICATION_TLS_CERT_ADD, application_tls_cert_add) \ _(APPLICATION_TLS_KEY_ADD, application_tls_key_add) \ static int -session_send_memfd_fd (vl_api_registration_t * reg, const ssvm_private_t * sp) +session_send_fds (vl_api_registration_t * reg, int fds[], int n_fds) { clib_error_t *error; - int fd = sp->fd; if (vl_api_registration_file_index (reg) == VL_API_INVALID_FI) { clib_warning ("can't send memfd fd"); return -1; } - error = vl_api_send_fd_msg (reg, &fd, 1); + error = vl_api_send_fd_msg (reg, fds, n_fds); if (error) { clib_error_report (error); @@ -81,8 +80,10 @@ session_send_memfd_fd (vl_api_registration_t * reg, const ssvm_private_t * sp) static int send_add_segment_callback (u32 api_client_index, const ssvm_private_t * sp) { + int fds[SESSION_N_FD_TYPE], n_fds = 0; vl_api_map_another_segment_t *mp; vl_api_registration_t *reg; + u8 fd_flags = 0; reg = vl_mem_api_client_index_to_registration (api_client_index); if (!reg) @@ -91,24 +92,31 @@ send_add_segment_callback (u32 api_client_index, const ssvm_private_t * sp) return -1; } - if (ssvm_type (sp) == SSVM_SEGMENT_MEMFD - && vl_api_registration_file_index (reg) == VL_API_INVALID_FI) + if (ssvm_type (sp) == SSVM_SEGMENT_MEMFD) { - clib_warning ("can't send memfd fd"); - return -1; + if (vl_api_registration_file_index (reg) == VL_API_INVALID_FI) + { + clib_warning ("can't send memfd fd"); + return -1; + } + + fd_flags |= SESSION_FD_F_MEMFD_SEGMENT; + fds[n_fds] = sp->fd; + n_fds += 1; } - mp = vl_msg_api_alloc_as_if_client (sizeof (*mp)); + mp = vl_mem_api_alloc_as_if_client_w_reg (reg, sizeof (*mp)); memset (mp, 0, sizeof (*mp)); mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_MAP_ANOTHER_SEGMENT); mp->segment_size = sp->ssvm_size; + mp->fd_flags = fd_flags; strncpy ((char *) mp->segment_name, (char *) sp->name, sizeof (mp->segment_name) - 1); vl_msg_api_send_shmem (reg->vl_input_queue, (u8 *) & mp); - if (ssvm_type (sp) == SSVM_SEGMENT_MEMFD) - return session_send_memfd_fd (reg, sp); + if (n_fds) + return session_send_fds (reg, fds, n_fds); return 0; } @@ -126,23 +134,58 @@ send_del_segment_callback (u32 api_client_index, const ssvm_private_t * fs) return -1; } - if (ssvm_type (fs) == SSVM_SEGMENT_MEMFD - && vl_api_registration_file_index (reg) == VL_API_INVALID_FI) + mp = vl_mem_api_alloc_as_if_client_w_reg (reg, sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_UNMAP_SEGMENT); + strcpy ((char *) mp->segment_name, (char *) fs->name); + + vl_msg_api_send_shmem (reg->vl_input_queue, (u8 *) & mp); + + return 0; +} + +static int +send_app_cut_through_registration_add (u32 api_client_index, u64 mq_addr, + u64 peer_mq_addr) +{ + vl_api_app_cut_through_registration_add_t *mp; + vl_api_registration_t *reg; + svm_msg_q_t *mq, *peer_mq; + int fds[2]; + + reg = vl_mem_api_client_index_to_registration (api_client_index); + if (!reg) { - clib_warning ("can't send memfd fd"); + clib_warning ("no registration: %u", api_client_index); return -1; } - mp = vl_msg_api_alloc_as_if_client (sizeof (*mp)); + mp = vl_mem_api_alloc_as_if_client_w_reg (reg, sizeof (*mp)); memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_UNMAP_SEGMENT); - strncpy ((char *) mp->segment_name, (char *) fs->name, - sizeof (mp->segment_name) - 1); + mp->_vl_msg_id = + clib_host_to_net_u16 (VL_API_APP_CUT_THROUGH_REGISTRATION_ADD); + + mp->evt_q_address = mq_addr; + mp->peer_evt_q_address = peer_mq_addr; + + mq = uword_to_pointer (mq_addr, svm_msg_q_t *); + peer_mq = uword_to_pointer (peer_mq_addr, svm_msg_q_t *); + + if (svm_msg_q_get_producer_eventfd (mq) != -1) + { + mp->fd_flags |= SESSION_FD_F_MQ_EVENTFD; + mp->n_fds = 2; + /* app will overwrite exactly the fds we pass here. So + * when we swap mq with peer_mq (accept vs connect) the + * fds will still be valid */ + fds[0] = svm_msg_q_get_consumer_eventfd (mq); + fds[1] = svm_msg_q_get_producer_eventfd (peer_mq); + } vl_msg_api_send_shmem (reg->vl_input_queue, (u8 *) & mp); - if (ssvm_type (fs) == SSVM_SEGMENT_MEMFD) - return session_send_memfd_fd (reg, fs); + if (mp->n_fds != 0) + session_send_fds (reg, fds, mp->n_fds); return 0; } @@ -230,25 +273,25 @@ send_session_accept_callback (stream_session_t * s) } void -send_local_session_disconnect_callback (u32 app_index, local_session_t * ls) +mq_send_local_session_disconnected_cb (u32 app_index, local_session_t * ls) { application_t *app = application_get (app_index); - vl_api_disconnect_session_t *mp; - vl_api_registration_t *reg; - - reg = vl_mem_api_client_index_to_registration (app->api_client_index); - if (!reg) - { - clib_warning ("no registration: %u", app->api_client_index); - return; - } + svm_msg_q_msg_t _msg, *msg = &_msg; + session_disconnected_msg_t *mp; + svm_msg_q_t *app_mq; + session_event_t *evt; - mp = vl_mem_api_alloc_as_if_client_w_reg (reg, sizeof (*mp)); - memset (mp, 0, sizeof (*mp)); - mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_DISCONNECT_SESSION); + app_mq = app->event_queue; + svm_msg_q_lock_and_alloc_msg_w_ring (app_mq, SESSION_MQ_CTRL_EVT_RING, + SVM_Q_WAIT, msg); + svm_msg_q_unlock (app_mq); + evt = svm_msg_q_msg_data (app_mq, msg); + memset (evt, 0, sizeof (*evt)); + evt->event_type = SESSION_CTRL_EVT_DISCONNECTED; + mp = (session_disconnected_msg_t *) evt->data; mp->handle = application_local_session_handle (ls); mp->context = app->api_client_index; - vl_msg_api_send_shmem (reg->vl_input_queue, (u8 *) & mp); + svm_msg_q_add (app_mq, msg, SVM_Q_WAIT); } static void @@ -414,6 +457,12 @@ mq_send_session_accepted_cb (stream_session_t * s) { local_session_t *ls = (local_session_t *) s; local_session_t *ll; + u8 main_thread = vlib_num_workers ()? 1 : 0; + + send_app_cut_through_registration_add (app->api_client_index, + ls->server_evt_q, + ls->client_evt_q); + if (application_local_session_listener_has_transport (ls)) { listener = listen_session_get (ls->listener_index); @@ -436,7 +485,7 @@ mq_send_session_accepted_cb (stream_session_t * s) } mp->handle = application_local_session_handle (ls); mp->port = ls->port; - vpp_queue = session_manager_get_vpp_event_queue (0); + vpp_queue = session_manager_get_vpp_event_queue (main_thread); mp->vpp_event_queue_address = pointer_to_uword (vpp_queue); mp->client_event_queue_address = ls->client_evt_q; mp->server_event_queue_address = ls->server_evt_q; @@ -542,9 +591,15 @@ mq_send_session_connected_cb (u32 app_index, u32 api_context, else { local_session_t *ls = (local_session_t *) s; + u8 main_thread = vlib_num_workers ()? 1 : 0; + + send_app_cut_through_registration_add (app->api_client_index, + ls->client_evt_q, + ls->server_evt_q); + mp->handle = application_local_session_handle (ls); mp->lcl_port = ls->port; - vpp_mq = session_manager_get_vpp_event_queue (0); + vpp_mq = session_manager_get_vpp_event_queue (main_thread); mp->vpp_event_queue_address = pointer_to_uword (vpp_mq); mp->client_event_queue_address = ls->client_evt_q; mp->server_event_queue_address = ls->server_evt_q; @@ -583,12 +638,13 @@ vl_api_session_enable_disable_t_handler (vl_api_session_enable_disable_t * mp) static void vl_api_application_attach_t_handler (vl_api_application_attach_t * mp) { + int rv = 0, fds[SESSION_N_FD_TYPE], n_fds = 0; vl_api_application_attach_reply_t *rmp; ssvm_private_t *segp, *evt_q_segment; vnet_app_attach_args_t _a, *a = &_a; vl_api_registration_t *reg; clib_error_t *error = 0; - int rv = 0; + u8 fd_flags = 0; reg = vl_api_client_index_to_registration (mp->client_index); if (!reg) @@ -629,9 +685,32 @@ vl_api_application_attach_t_handler (vl_api_application_attach_t * mp) { rv = clib_error_get_code (error); clib_error_report (error); + vec_free (a->namespace_id); + goto done; } vec_free (a->namespace_id); + /* Send event queues segment */ + if ((evt_q_segment = session_manager_get_evt_q_segment ())) + { + fd_flags |= SESSION_FD_F_VPP_MQ_SEGMENT; + fds[n_fds] = evt_q_segment->fd; + n_fds += 1; + } + /* Send fifo segment fd if needed */ + if (ssvm_type (a->segment) == SSVM_SEGMENT_MEMFD) + { + fd_flags |= SESSION_FD_F_MEMFD_SEGMENT; + fds[n_fds] = a->segment->fd; + n_fds += 1; + } + if (a->options[APP_OPTIONS_FLAGS] & APP_OPTIONS_FLAGS_EVT_MQ_USE_EVENTFD) + { + fd_flags |= SESSION_FD_F_MQ_EVENTFD; + fds[n_fds] = svm_msg_q_get_producer_eventfd (a->app_evt_q); + n_fds += 1; + } + done: /* *INDENT-OFF* */ @@ -646,20 +725,15 @@ done: memcpy (rmp->segment_name, segp->name, vec_len (segp->name)); rmp->segment_name_length = vec_len (segp->name); } - rmp->app_event_queue_address = a->app_event_queue_address; + rmp->app_event_queue_address = pointer_to_uword (a->app_evt_q); + rmp->n_fds = n_fds; + rmp->fd_flags = fd_flags; } })); /* *INDENT-ON* */ - if (rv) - return; - - /* Send fifo segment fd if needed */ - if (ssvm_type (a->segment) == SSVM_SEGMENT_MEMFD) - session_send_memfd_fd (reg, a->segment); - /* Send event queues segment */ - if ((evt_q_segment = session_manager_get_evt_q_segment ())) - session_send_memfd_fd (reg, evt_q_segment); + if (n_fds) + session_send_fds (reg, fds, n_fds); } static void