X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvcl%2Fldp.c;h=915d1caa40c0074767ca6ee3d4c206463f3416c8;hb=47c40e2d944c9a29677d0542103207ba8372b66a;hp=d31cd2cabd48920d62d8f0f5a348d809812a0e40;hpb=3ee1fe1608cca9f000bed59ba987c864adf37cd6;p=vpp.git diff --git a/src/vcl/ldp.c b/src/vcl/ldp.c index d31cd2cabd4..915d1caa40c 100644 --- a/src/vcl/ldp.c +++ b/src/vcl/ldp.c @@ -29,6 +29,9 @@ #include #include #include +#include +#include +#include #define HAVE_CONSTRUCTOR_ATTRIBUTE #ifdef HAVE_CONSTRUCTOR_ATTRIBUTE @@ -46,15 +49,23 @@ #define DESTRUCTOR_ATTRIBUTE #endif -typedef struct +#define LDP_MAX_NWORKERS 32 + +typedef struct ldp_fd_entry_ +{ + u32 session_index; + u32 fd; + u32 fd_index; +} ldp_fd_entry_t; + +typedef struct ldp_worker_ctx_ { - int init; - char app_name[LDP_APP_NAME_MAX]; - u32 sid_bit_val; - u32 sid_bit_mask; - u32 debug; u8 *io_buffer; clib_time_t clib_time; + + /* + * Select state + */ clib_bitmap_t *rd_bitmap; clib_bitmap_t *wr_bitmap; clib_bitmap_t *ex_bitmap; @@ -64,12 +75,45 @@ typedef struct clib_bitmap_t *libc_rd_bitmap; clib_bitmap_t *libc_wr_bitmap; clib_bitmap_t *libc_ex_bitmap; - vcl_poll_t *vcl_poll; u8 select_vcl; + + /* + * Poll state + */ + vcl_poll_t *vcl_poll; + struct pollfd *libc_poll; + u16 *libc_poll_idxs; + + /* + * Epoll state + */ u8 epoll_wait_vcl; + int vcl_mq_epfd; + +} ldp_worker_ctx_t; + +typedef struct +{ + ldp_worker_ctx_t *workers; + int init; + char app_name[LDP_APP_NAME_MAX]; + u32 sid_bit_val; + u32 sid_bit_mask; + u32 debug; + ldp_fd_entry_t *fd_pool; + clib_rwlock_t fd_table_lock; + uword *session_index_to_fd_table; + + /** vcl needs next epoll_create to go to libc_epoll */ + u8 vcl_needs_real_epoll; } ldp_main_t; + #define LDP_DEBUG ldp->debug +#define LDBG(_lvl, _fmt, _args...) \ + if (ldp->debug > _lvl) \ + clib_warning (_fmt, ##_args) + static ldp_main_t ldp_main = { .sid_bit_val = (1 << LDP_SID_BIT_MIN), .sid_bit_mask = (1 << LDP_SID_BIT_MIN) - 1, @@ -78,6 +122,12 @@ static ldp_main_t ldp_main = { static ldp_main_t *ldp = &ldp_main; +static inline ldp_worker_ctx_t * +ldp_worker_get_current (void) +{ + return (ldp->workers + vppcom_worker_index ()); +} + /* * RETURN: 0 on success or -1 on error. * */ @@ -100,138 +150,203 @@ ldp_get_app_name () return ldp->app_name; } +static int +ldp_fd_alloc (u32 sid) +{ + ldp_fd_entry_t *fde; + + clib_rwlock_writer_lock (&ldp->fd_table_lock); + if (pool_elts (ldp->fd_pool) >= (1ULL << 32) - ldp->sid_bit_val) + { + clib_rwlock_writer_unlock (&ldp->fd_table_lock); + return -1; + } + pool_get (ldp->fd_pool, fde); + fde->session_index = vppcom_session_index (sid); + fde->fd_index = fde - ldp->fd_pool; + fde->fd = fde->fd_index + ldp->sid_bit_val; + hash_set (ldp->session_index_to_fd_table, fde->session_index, fde->fd); + clib_rwlock_writer_unlock (&ldp->fd_table_lock); + return fde->fd; +} + +static ldp_fd_entry_t * +ldp_fd_entry_get_w_lock (u32 fd_index) +{ + clib_rwlock_reader_lock (&ldp->fd_table_lock); + if (pool_is_free_index (ldp->fd_pool, fd_index)) + return 0; + + return pool_elt_at_index (ldp->fd_pool, fd_index); +} + static inline int ldp_fd_from_sid (u32 sid) { - if (PREDICT_FALSE (sid >= ldp->sid_bit_val)) - return -EMFILE; - else - return (sid | ldp->sid_bit_val); + uword *fdp; + int fd; + + clib_rwlock_reader_lock (&ldp->fd_table_lock); + fdp = hash_get (ldp->session_index_to_fd_table, vppcom_session_index (sid)); + fd = fdp ? *fdp : -EMFILE; + clib_rwlock_reader_unlock (&ldp->fd_table_lock); + + return fd; } static inline int ldp_fd_is_sid (int fd) { - return ((u32) fd & ldp->sid_bit_val) ? 1 : 0; + return fd >= ldp->sid_bit_val; } static inline u32 ldp_sid_from_fd (int fd) { - return (ldp_fd_is_sid (fd) ? ((u32) fd & ldp->sid_bit_mask) : - INVALID_SESSION_ID); + u32 fd_index, session_index; + ldp_fd_entry_t *fde; + + if (!ldp_fd_is_sid (fd)) + return INVALID_SESSION_ID; + + fd_index = fd - ldp->sid_bit_val; + fde = ldp_fd_entry_get_w_lock (fd_index); + if (!fde) + { + LDBG (0, "unknown fd %d", fd); + clib_rwlock_reader_unlock (&ldp->fd_table_lock); + return INVALID_SESSION_ID; + } + session_index = fde->session_index; + clib_rwlock_reader_unlock (&ldp->fd_table_lock); + + return vppcom_session_handle (session_index); +} + +static void +ldp_fd_free_w_sid (u32 sid) +{ + ldp_fd_entry_t *fde; + u32 fd_index; + int fd; + + fd = ldp_fd_from_sid (sid); + if (!fd) + return; + + fd_index = fd - ldp->sid_bit_val; + fde = ldp_fd_entry_get_w_lock (fd_index); + if (fde) + { + hash_unset (ldp->session_index_to_fd_table, fde->session_index); + pool_put (ldp->fd_pool, fde); + } + clib_rwlock_writer_unlock (&ldp->fd_table_lock); } static inline int ldp_init (void) { - int rv = 0; + ldp_worker_ctx_t *ldpw; + int rv; + + if (PREDICT_TRUE (ldp->init)) + return 0; - if (PREDICT_FALSE (!ldp->init)) + ldp->init = 1; + ldp->vcl_needs_real_epoll = 1; + rv = vppcom_app_create (ldp_get_app_name ()); + if (rv != VPPCOM_OK) { - ldp->init = 1; - rv = vppcom_app_create (ldp_get_app_name ()); - if (rv == VPPCOM_OK) + fprintf (stderr, "\nLDP<%d>: ERROR: ldp_init: vppcom_app_create()" + " failed! rv = %d (%s)\n", + getpid (), rv, vppcom_retval_str (rv)); + ldp->init = 0; + return rv; + } + ldp->vcl_needs_real_epoll = 0; + pool_alloc (ldp->workers, LDP_MAX_NWORKERS); + ldpw = ldp_worker_get_current (); + + char *env_var_str = getenv (LDP_ENV_DEBUG); + if (env_var_str) + { + u32 tmp; + if (sscanf (env_var_str, "%u", &tmp) != 1) + clib_warning ("LDP<%d>: WARNING: Invalid LDP debug level specified in" + " the env var " LDP_ENV_DEBUG " (%s)!", getpid (), + env_var_str); + else { - char *env_var_str = getenv (LDP_ENV_DEBUG); - if (env_var_str) - { - u32 tmp; - if (sscanf (env_var_str, "%u", &tmp) != 1) - clib_warning ("LDP<%d>: WARNING: Invalid LDP debug level " - "specified in the env var " LDP_ENV_DEBUG - " (%s)!", getpid (), env_var_str); - else - { - ldp->debug = tmp; - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: configured LDP debug level (%u) " - "from the env var " LDP_ENV_DEBUG "!", - getpid (), ldp->debug); - } - } + ldp->debug = tmp; + LDBG (0, "LDP<%d>: configured LDP debug level (%u) from env var " + LDP_ENV_DEBUG "!", getpid (), ldp->debug); + } + } - env_var_str = getenv (LDP_ENV_APP_NAME); - if (env_var_str) - { - ldp_set_app_name (env_var_str); - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: configured LDP app name (%s) " - "from the env var " LDP_ENV_APP_NAME "!", - getpid (), ldp->app_name); - } + env_var_str = getenv (LDP_ENV_APP_NAME); + if (env_var_str) + { + ldp_set_app_name (env_var_str); + LDBG (0, "LDP<%d>: configured LDP app name (%s) from the env var " + LDP_ENV_APP_NAME "!", getpid (), ldp->app_name); + } - env_var_str = getenv (LDP_ENV_SID_BIT); - if (env_var_str) - { - u32 sb; - if (sscanf (env_var_str, "%u", &sb) != 1) - { - clib_warning ("LDP<%d>: WARNING: Invalid LDP sid bit " - "specified in the env var " - LDP_ENV_SID_BIT " (%s)!" - "sid bit value %d (0x%x)", - getpid (), env_var_str, - ldp->sid_bit_val, ldp->sid_bit_val); - } - else if (sb < LDP_SID_BIT_MIN) - { - ldp->sid_bit_val = (1 << LDP_SID_BIT_MIN); - ldp->sid_bit_mask = ldp->sid_bit_val - 1; - - clib_warning ("LDP<%d>: WARNING: LDP sid bit (%u) " - "specified in the env var " - LDP_ENV_SID_BIT " (%s) is too small. " - "Using LDP_SID_BIT_MIN (%d)! " - "sid bit value %d (0x%x)", - getpid (), sb, env_var_str, LDP_SID_BIT_MIN, - ldp->sid_bit_val, ldp->sid_bit_val); - } - else if (sb > LDP_SID_BIT_MAX) - { - ldp->sid_bit_val = (1 << LDP_SID_BIT_MAX); - ldp->sid_bit_mask = ldp->sid_bit_val - 1; - - clib_warning ("LDP<%d>: WARNING: LDP sid bit (%u) " - "specified in the env var " - LDP_ENV_SID_BIT " (%s) is too big. " - "Using LDP_SID_BIT_MAX (%d)! " - "sid bit value %d (0x%x)", - getpid (), sb, env_var_str, LDP_SID_BIT_MAX, - ldp->sid_bit_val, ldp->sid_bit_val); - } - else - { - ldp->sid_bit_val = (1 << sb); - ldp->sid_bit_mask = ldp->sid_bit_val - 1; - - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: configured LDP sid bit (%u) " - "from " LDP_ENV_SID_BIT - "! sid bit value %d (0x%x)", getpid (), - sb, ldp->sid_bit_val, ldp->sid_bit_val); - } - } + env_var_str = getenv (LDP_ENV_SID_BIT); + if (env_var_str) + { + u32 sb; + if (sscanf (env_var_str, "%u", &sb) != 1) + { + clib_warning ("LDP<%d>: WARNING: Invalid LDP sid bit specified in" + " the env var " LDP_ENV_SID_BIT " (%s)! sid bit " + "value %d (0x%x)", getpid (), env_var_str, + ldp->sid_bit_val, ldp->sid_bit_val); + } + else if (sb < LDP_SID_BIT_MIN) + { + ldp->sid_bit_val = (1 << LDP_SID_BIT_MIN); + ldp->sid_bit_mask = ldp->sid_bit_val - 1; + + clib_warning ("LDP<%d>: WARNING: LDP sid bit (%u) specified in the" + " env var " LDP_ENV_SID_BIT " (%s) is too small. " + "Using LDP_SID_BIT_MIN (%d)! sid bit value %d (0x%x)", + getpid (), sb, env_var_str, LDP_SID_BIT_MIN, + ldp->sid_bit_val, ldp->sid_bit_val); + } + else if (sb > LDP_SID_BIT_MAX) + { + ldp->sid_bit_val = (1 << LDP_SID_BIT_MAX); + ldp->sid_bit_mask = ldp->sid_bit_val - 1; - clib_time_init (&ldp->clib_time); - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: LDP initialization: done!", getpid ()); + clib_warning ("LDP<%d>: WARNING: LDP sid bit (%u) specified in the" + " env var " LDP_ENV_SID_BIT " (%s) is too big. Using" + " LDP_SID_BIT_MAX (%d)! sid bit value %d (0x%x)", + getpid (), sb, env_var_str, LDP_SID_BIT_MAX, + ldp->sid_bit_val, ldp->sid_bit_val); } else { - fprintf (stderr, "\nLDP<%d>: ERROR: ldp_init: vppcom_app_create()" - " failed! rv = %d (%s)\n", - getpid (), rv, vppcom_retval_str (rv)); - ldp->init = 0; + ldp->sid_bit_val = (1 << sb); + ldp->sid_bit_mask = ldp->sid_bit_val - 1; + + LDBG (0, "LDP<%d>: configured LDP sid bit (%u) from " + LDP_ENV_SID_BIT "! sid bit value %d (0x%x)", getpid (), sb, + ldp->sid_bit_val, ldp->sid_bit_val); } } - return rv; + + clib_time_init (&ldpw->clib_time); + clib_rwlock_init (&ldp->fd_table_lock); + LDBG (0, "LDP<%d>: LDP initialization: done!", getpid ()); + + return 0; } int close (int fd) { - int rv; + int rv, refcnt; const char *func_str; u32 sid = ldp_sid_from_fd (fd); @@ -248,10 +363,8 @@ close (int fd) { func_str = "libc_close"; - if (LDP_DEBUG > 0) - clib_warning - ("LDP<%d>: fd %d (0x%x): calling %s(): epfd %u (0x%x)", - getpid (), fd, fd, func_str, epfd, epfd); + LDBG (0, "LDP<%d>: fd %d (0x%x): calling %s(): epfd %u (0x%x)", + getpid (), fd, fd, func_str, epfd, epfd); rv = libc_close (epfd); if (rv < 0) @@ -272,24 +385,25 @@ close (int fd) func_str = "vppcom_session_close"; - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): sid %u (0x%x)", - getpid (), fd, fd, func_str, sid, sid); + LDBG (0, "LDP<%d>: fd %d (0x%x): calling %s(): sid %u (0x%x)", + getpid (), fd, fd, func_str, sid, sid); + refcnt = vppcom_session_attr (sid, VPPCOM_ATTR_GET_REFCNT, 0, 0); rv = vppcom_session_close (sid); if (rv != VPPCOM_OK) { errno = -rv; rv = -1; } + if (refcnt == 1) + ldp_fd_free_w_sid (sid); } else { func_str = "libc_close"; - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: fd %d (0x%x): calling %s()", - getpid (), fd, fd, func_str); + LDBG (0, "LDP<%d>: fd %d (0x%x): calling %s()", getpid (), fd, fd, + func_str); rv = libc_close (fd); } @@ -617,36 +731,35 @@ fcntl (int fd, int cmd, ...) { case F_SETFL: func_str = "vppcom_session_attr[SET_FLAGS]"; - if (LDP_DEBUG > 2) - clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): " - "sid %u (0x%x) flags %d (0x%x), size %d", - getpid (), fd, fd, func_str, sid, sid, - flags, flags, size); + LDBG (2, "LDP<%d>: fd %d (0x%x): calling %s(): sid %u (0x%x) " + "flags %d (0x%x), size %d", getpid (), fd, fd, func_str, sid, + sid, flags, flags, size); - rv = - vppcom_session_attr (sid, VPPCOM_ATTR_SET_FLAGS, &flags, &size); + rv = vppcom_session_attr (sid, VPPCOM_ATTR_SET_FLAGS, &flags, + &size); break; case F_GETFL: func_str = "vppcom_session_attr[GET_FLAGS]"; - if (LDP_DEBUG > 2) - clib_warning - ("LDP<%d>: fd %d (0x%x): calling %s(): sid %u (0x%x), " - "flags %d (0x%x), size %d", getpid (), fd, fd, func_str, sid, - sid, flags, flags, size); + LDBG (2, "LDP<%d>: fd %d (0x%x): calling %s(): sid %u (0x%x), " + "flags %d (0x%x), size %d", getpid (), fd, fd, func_str, sid, + sid, flags, flags, size); - rv = - vppcom_session_attr (sid, VPPCOM_ATTR_GET_FLAGS, &flags, &size); + rv = vppcom_session_attr (sid, VPPCOM_ATTR_GET_FLAGS, &flags, + &size); if (rv == VPPCOM_OK) { - if (LDP_DEBUG > 2) - clib_warning ("LDP<%d>: fd %d (0x%x), cmd %d (F_GETFL): " - "%s() returned flags %d (0x%x)", - getpid (), fd, fd, cmd, func_str, flags, flags); + LDBG (2, "LDP<%d>: fd %d (0x%x), cmd %d (F_GETFL): %s() " + "returned flags %d (0x%x)", getpid (), fd, fd, cmd, + func_str, flags, flags); rv = flags; } break; - + case F_SETFD: + /* TODO handle this */ + LDBG (0, "F_SETFD ignored flags %u", flags); + rv = 0; + break; default: rv = -EOPNOTSUPP; break; @@ -782,13 +895,12 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, const struct timespec *__restrict timeout, const __sigset_t * __restrict sigmask) { - int rv; + uword sid_bits, sid_bits_set, libc_bits, libc_bits_set; + ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); + u32 minbits = clib_max (nfds, BITS (uword)), sid; char *func_str = "##"; f64 time_out; - int fd; - uword sid_bits, sid_bits_set, libc_bits, libc_bits_set; - u32 minbits = clib_max (nfds, BITS (uword)); - u32 sid; + int rv, fd; if (nfds < 0) { @@ -805,12 +917,11 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, /* select as fine grained sleep */ if (!nfds) { - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: sleeping for %.02f seconds", - getpid (), time_out); + LDBG (3, "LDP<%d>: sleeping for %.02f seconds", getpid (), + time_out); - time_out += clib_time_now (&ldp->clib_time); - while (clib_time_now (&ldp->clib_time) < time_out) + time_out += clib_time_now (&ldpw->clib_time); + while (clib_time_now (&ldpw->clib_time) < time_out) ; return 0; } @@ -828,11 +939,9 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, { func_str = "libc_pselect"; - if (LDP_DEBUG > 3) - clib_warning - ("LDP<%d>: calling %s(): nfds %d, readfds %p, writefds %p, " - "exceptfds %p, timeout %p, sigmask %p", getpid (), func_str, nfds, - readfds, writefds, exceptfds, timeout, sigmask); + LDBG (3, "LDP<%d>: calling %s(): nfds %d, readfds %p, writefds %p, " + "exceptfds %p, timeout %p, sigmask %p", getpid (), func_str, nfds, + readfds, writefds, exceptfds, timeout, sigmask); rv = libc_pselect (nfds, readfds, writefds, exceptfds, timeout, sigmask); @@ -850,107 +959,105 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, } sid_bits = libc_bits = 0; + u32 n_bytes = nfds / 8 + ((nfds % 8) ? 1 : 0); if (readfds) { - clib_bitmap_validate (ldp->sid_rd_bitmap, minbits); - clib_bitmap_validate (ldp->libc_rd_bitmap, minbits); - clib_bitmap_validate (ldp->rd_bitmap, minbits); - clib_memcpy (ldp->rd_bitmap, readfds, - vec_len (ldp->rd_bitmap) * sizeof (clib_bitmap_t)); - FD_ZERO (readfds); + clib_bitmap_validate (ldpw->sid_rd_bitmap, minbits); + clib_bitmap_validate (ldpw->libc_rd_bitmap, minbits); + clib_bitmap_validate (ldpw->rd_bitmap, minbits); + clib_memcpy_fast (ldpw->rd_bitmap, readfds, n_bytes); + memset (readfds, 0, n_bytes); /* *INDENT-OFF* */ - clib_bitmap_foreach (fd, ldp->rd_bitmap, - ({ - sid = ldp_sid_from_fd (fd); - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: readfds: fd %d (0x%x), sid %u (0x%x)", - getpid (), fd, fd, sid, sid); - if (sid == INVALID_SESSION_ID) - clib_bitmap_set_no_check (ldp->libc_rd_bitmap, fd, 1); - else - clib_bitmap_set_no_check (ldp->sid_rd_bitmap, sid, 1); - })); + clib_bitmap_foreach (fd, ldpw->rd_bitmap, ({ + if (fd > nfds) + break; + sid = ldp_sid_from_fd (fd); + LDBG (3, "LDP<%d>: readfds: fd %d (0x%x), sid %u (0x%x)", + getpid (), fd, fd, sid, sid); + if (sid == INVALID_SESSION_ID) + clib_bitmap_set_no_check (ldpw->libc_rd_bitmap, fd, 1); + else + clib_bitmap_set_no_check (ldpw->sid_rd_bitmap, + vppcom_session_index (sid), 1); + })); /* *INDENT-ON* */ - sid_bits_set = clib_bitmap_last_set (ldp->sid_rd_bitmap) + 1; + sid_bits_set = clib_bitmap_last_set (ldpw->sid_rd_bitmap) + 1; sid_bits = (sid_bits_set > sid_bits) ? sid_bits_set : sid_bits; - libc_bits_set = clib_bitmap_last_set (ldp->libc_rd_bitmap) + 1; + libc_bits_set = clib_bitmap_last_set (ldpw->libc_rd_bitmap) + 1; libc_bits = (libc_bits_set > libc_bits) ? libc_bits_set : libc_bits; - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: readfds: sid_bits_set %d, sid_bits %d, " - "libc_bits_set %d, libc_bits %d", getpid (), - sid_bits_set, sid_bits, libc_bits_set, libc_bits); + LDBG (3, "LDP<%d>: readfds: sid_bits_set %d, sid_bits %d, " + "libc_bits_set %d, libc_bits %d", getpid (), sid_bits_set, + sid_bits, libc_bits_set, libc_bits); } if (writefds) { - clib_bitmap_validate (ldp->sid_wr_bitmap, minbits); - clib_bitmap_validate (ldp->libc_wr_bitmap, minbits); - clib_bitmap_validate (ldp->wr_bitmap, minbits); - clib_memcpy (ldp->wr_bitmap, writefds, - vec_len (ldp->wr_bitmap) * sizeof (clib_bitmap_t)); - FD_ZERO (writefds); + clib_bitmap_validate (ldpw->sid_wr_bitmap, minbits); + clib_bitmap_validate (ldpw->libc_wr_bitmap, minbits); + clib_bitmap_validate (ldpw->wr_bitmap, minbits); + clib_memcpy_fast (ldpw->wr_bitmap, writefds, n_bytes); + memset (writefds, 0, n_bytes); /* *INDENT-OFF* */ - clib_bitmap_foreach (fd, ldp->wr_bitmap, - ({ - sid = ldp_sid_from_fd (fd); - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: writefds: fd %d (0x%x), sid %u (0x%x)", - getpid (), fd, fd, sid, sid); - if (sid == INVALID_SESSION_ID) - clib_bitmap_set_no_check (ldp->libc_wr_bitmap, fd, 1); - else - clib_bitmap_set_no_check (ldp->sid_wr_bitmap, sid, 1); - })); + clib_bitmap_foreach (fd, ldpw->wr_bitmap, ({ + if (fd > nfds) + break; + sid = ldp_sid_from_fd (fd); + LDBG (3, "LDP<%d>: writefds: fd %d (0x%x), sid %u (0x%x)", + getpid (), fd, fd, sid, sid); + if (sid == INVALID_SESSION_ID) + clib_bitmap_set_no_check (ldpw->libc_wr_bitmap, fd, 1); + else + clib_bitmap_set_no_check (ldpw->sid_wr_bitmap, + vppcom_session_index (sid), 1); + })); /* *INDENT-ON* */ - sid_bits_set = clib_bitmap_last_set (ldp->sid_wr_bitmap) + 1; + sid_bits_set = clib_bitmap_last_set (ldpw->sid_wr_bitmap) + 1; sid_bits = (sid_bits_set > sid_bits) ? sid_bits_set : sid_bits; - libc_bits_set = clib_bitmap_last_set (ldp->libc_wr_bitmap) + 1; + libc_bits_set = clib_bitmap_last_set (ldpw->libc_wr_bitmap) + 1; libc_bits = (libc_bits_set > libc_bits) ? libc_bits_set : libc_bits; - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: writefds: sid_bits_set %d, sid_bits %d, " - "libc_bits_set %d, libc_bits %d", getpid (), - sid_bits_set, sid_bits, libc_bits_set, libc_bits); + LDBG (3, "LDP<%d>: writefds: sid_bits_set %d, sid_bits %d, " + "libc_bits_set %d, libc_bits %d", getpid (), + sid_bits_set, sid_bits, libc_bits_set, libc_bits); } if (exceptfds) { - clib_bitmap_validate (ldp->sid_ex_bitmap, minbits); - clib_bitmap_validate (ldp->libc_ex_bitmap, minbits); - clib_bitmap_validate (ldp->ex_bitmap, minbits); - clib_memcpy (ldp->ex_bitmap, exceptfds, - vec_len (ldp->ex_bitmap) * sizeof (clib_bitmap_t)); - FD_ZERO (exceptfds); + clib_bitmap_validate (ldpw->sid_ex_bitmap, minbits); + clib_bitmap_validate (ldpw->libc_ex_bitmap, minbits); + clib_bitmap_validate (ldpw->ex_bitmap, minbits); + clib_memcpy_fast (ldpw->ex_bitmap, exceptfds, n_bytes); + memset (exceptfds, 0, n_bytes); /* *INDENT-OFF* */ - clib_bitmap_foreach (fd, ldp->ex_bitmap, - ({ - sid = ldp_sid_from_fd (fd); - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: exceptfds: fd %d (0x%x), sid %u (0x%x)", - getpid (), fd, fd, sid, sid); - if (sid == INVALID_SESSION_ID) - clib_bitmap_set_no_check (ldp->libc_ex_bitmap, fd, 1); - else - clib_bitmap_set_no_check (ldp->sid_ex_bitmap, sid, 1); - })); + clib_bitmap_foreach (fd, ldpw->ex_bitmap, ({ + if (fd > nfds) + break; + sid = ldp_sid_from_fd (fd); + LDBG (3, "LDP<%d>: exceptfds: fd %d (0x%x), sid %u (0x%x)", + getpid (), fd, fd, sid, sid); + if (sid == INVALID_SESSION_ID) + clib_bitmap_set_no_check (ldpw->libc_ex_bitmap, fd, 1); + else + clib_bitmap_set_no_check (ldpw->sid_ex_bitmap, + vppcom_session_index (sid), 1); + })); /* *INDENT-ON* */ - sid_bits_set = clib_bitmap_last_set (ldp->sid_ex_bitmap) + 1; + sid_bits_set = clib_bitmap_last_set (ldpw->sid_ex_bitmap) + 1; sid_bits = (sid_bits_set > sid_bits) ? sid_bits_set : sid_bits; - libc_bits_set = clib_bitmap_last_set (ldp->libc_ex_bitmap) + 1; + libc_bits_set = clib_bitmap_last_set (ldpw->libc_ex_bitmap) + 1; libc_bits = (libc_bits_set > libc_bits) ? libc_bits_set : libc_bits; - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: exceptfds: sid_bits_set %d, sid_bits %d, " - "libc_bits_set %d, libc_bits %d", getpid (), - sid_bits_set, sid_bits, libc_bits_set, libc_bits); + LDBG (3, "LDP<%d>: exceptfds: sid_bits_set %d, sid_bits %d, " + "libc_bits_set %d, libc_bits %d", getpid (), + sid_bits_set, sid_bits, libc_bits_set, libc_bits); } if (PREDICT_FALSE (!sid_bits && !libc_bits)) @@ -964,27 +1071,27 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, { if (sid_bits) { - if (!ldp->select_vcl) + if (!ldpw->select_vcl) { func_str = "vppcom_select"; if (readfds) - clib_memcpy (ldp->rd_bitmap, ldp->sid_rd_bitmap, - vec_len (ldp->rd_bitmap) * - sizeof (clib_bitmap_t)); + clib_memcpy_fast (ldpw->rd_bitmap, ldpw->sid_rd_bitmap, + vec_len (ldpw->rd_bitmap) * + sizeof (clib_bitmap_t)); if (writefds) - clib_memcpy (ldp->wr_bitmap, ldp->sid_wr_bitmap, - vec_len (ldp->wr_bitmap) * - sizeof (clib_bitmap_t)); + clib_memcpy_fast (ldpw->wr_bitmap, ldpw->sid_wr_bitmap, + vec_len (ldpw->wr_bitmap) * + sizeof (clib_bitmap_t)); if (exceptfds) - clib_memcpy (ldp->ex_bitmap, ldp->sid_ex_bitmap, - vec_len (ldp->ex_bitmap) * - sizeof (clib_bitmap_t)); + clib_memcpy_fast (ldpw->ex_bitmap, ldpw->sid_ex_bitmap, + vec_len (ldpw->ex_bitmap) * + sizeof (clib_bitmap_t)); rv = vppcom_select (sid_bits, - readfds ? ldp->rd_bitmap : NULL, - writefds ? ldp->wr_bitmap : NULL, - exceptfds ? ldp->ex_bitmap : NULL, 0); + readfds ? ldpw->rd_bitmap : NULL, + writefds ? ldpw->wr_bitmap : NULL, + exceptfds ? ldpw->ex_bitmap : NULL, 0); if (rv < 0) { errno = -rv; @@ -995,9 +1102,9 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, if (readfds) { /* *INDENT-OFF* */ - clib_bitmap_foreach (sid, ldp->rd_bitmap, + clib_bitmap_foreach (sid, ldpw->rd_bitmap, ({ - fd = ldp_fd_from_sid (sid); + fd = ldp_fd_from_sid (vppcom_session_handle (sid)); if (PREDICT_FALSE (fd < 0)) { errno = EBADFD; @@ -1011,9 +1118,9 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, if (writefds) { /* *INDENT-OFF* */ - clib_bitmap_foreach (sid, ldp->wr_bitmap, + clib_bitmap_foreach (sid, ldpw->wr_bitmap, ({ - fd = ldp_fd_from_sid (sid); + fd = ldp_fd_from_sid (vppcom_session_handle (sid)); if (PREDICT_FALSE (fd < 0)) { errno = EBADFD; @@ -1027,9 +1134,9 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, if (exceptfds) { /* *INDENT-OFF* */ - clib_bitmap_foreach (sid, ldp->ex_bitmap, + clib_bitmap_foreach (sid, ldpw->ex_bitmap, ({ - fd = ldp_fd_from_sid (sid); + fd = ldp_fd_from_sid (vppcom_session_handle (sid)); if (PREDICT_FALSE (fd < 0)) { errno = EBADFD; @@ -1040,12 +1147,12 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, })); /* *INDENT-ON* */ } - ldp->select_vcl = 1; + ldpw->select_vcl = 1; goto done; } } else - ldp->select_vcl = 0; + ldpw->select_vcl = 0; } if (libc_bits) { @@ -1054,14 +1161,17 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, func_str = "libc_pselect"; if (readfds) - clib_memcpy (readfds, ldp->libc_rd_bitmap, - vec_len (ldp->rd_bitmap) * sizeof (clib_bitmap_t)); + clib_memcpy_fast (readfds, ldpw->libc_rd_bitmap, + vec_len (ldpw->rd_bitmap) * + sizeof (clib_bitmap_t)); if (writefds) - clib_memcpy (writefds, ldp->libc_wr_bitmap, - vec_len (ldp->wr_bitmap) * sizeof (clib_bitmap_t)); + clib_memcpy_fast (writefds, ldpw->libc_wr_bitmap, + vec_len (ldpw->wr_bitmap) * + sizeof (clib_bitmap_t)); if (exceptfds) - clib_memcpy (exceptfds, ldp->libc_ex_bitmap, - vec_len (ldp->ex_bitmap) * sizeof (clib_bitmap_t)); + clib_memcpy_fast (exceptfds, ldpw->libc_ex_bitmap, + vec_len (ldpw->ex_bitmap) * + sizeof (clib_bitmap_t)); tspec.tv_sec = tspec.tv_nsec = 0; rv = libc_pselect (libc_bits, readfds ? readfds : NULL, @@ -1071,20 +1181,20 @@ ldp_pselect (int nfds, fd_set * __restrict readfds, goto done; } } - while ((time_out == -1) || (clib_time_now (&ldp->clib_time) < time_out)); + while ((time_out == -1) || (clib_time_now (&ldpw->clib_time) < time_out)); rv = 0; done: /* TBD: set timeout to amount of time left */ - vec_reset_length (ldp->rd_bitmap); - vec_reset_length (ldp->sid_rd_bitmap); - vec_reset_length (ldp->libc_rd_bitmap); - vec_reset_length (ldp->wr_bitmap); - vec_reset_length (ldp->sid_wr_bitmap); - vec_reset_length (ldp->libc_wr_bitmap); - vec_reset_length (ldp->ex_bitmap); - vec_reset_length (ldp->sid_ex_bitmap); - vec_reset_length (ldp->libc_ex_bitmap); + clib_bitmap_zero (ldpw->rd_bitmap); + clib_bitmap_zero (ldpw->sid_rd_bitmap); + clib_bitmap_zero (ldpw->libc_rd_bitmap); + clib_bitmap_zero (ldpw->wr_bitmap); + clib_bitmap_zero (ldpw->sid_wr_bitmap); + clib_bitmap_zero (ldpw->libc_wr_bitmap); + clib_bitmap_zero (ldpw->ex_bitmap); + clib_bitmap_zero (ldpw->sid_ex_bitmap); + clib_bitmap_zero (ldpw->libc_ex_bitmap); if (LDP_DEBUG > 3) { @@ -1151,11 +1261,9 @@ socket (int domain, int type, int protocol) func_str = "vppcom_session_create"; - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: : calling %s(): " - "proto %u (%s), is_nonblocking %u", - getpid (), func_str, proto, - vppcom_proto_str (proto), is_nonblocking); + LDBG (0, "LDP<%d>: : calling %s(): proto %u (%s), is_nonblocking %u", + getpid (), func_str, proto, vppcom_proto_str (proto), + is_nonblocking); sid = vppcom_session_create (proto, is_nonblocking); if (sid < 0) @@ -1166,7 +1274,7 @@ socket (int domain, int type, int protocol) else { func_str = "ldp_fd_from_sid"; - rv = ldp_fd_from_sid (sid); + rv = ldp_fd_alloc (sid); if (rv < 0) { (void) vppcom_session_close (sid); @@ -1179,8 +1287,7 @@ socket (int domain, int type, int protocol) { func_str = "libc_socket"; - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: : calling %s()", getpid (), func_str); + LDBG (0, "LDP<%d>: : calling %s()", getpid (), func_str); rv = libc_socket (domain, type, protocol); } @@ -1232,10 +1339,9 @@ socketpair (int domain, int type, int protocol, int fds[2]) { func_str = "libc_socket"; - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: : calling %s()", getpid (), func_str); + LDBG (1, "LDP<%d>: : calling %s()", getpid (), func_str); - rv = libc_socket (domain, type, protocol); + rv = libc_socketpair (domain, type, protocol, fds); } if (LDP_DEBUG > 1) @@ -1669,7 +1775,7 @@ send (int fd, const void *buf, size_t n, int flags) getpid (), fd, fd, func_str, sid, sid, buf, n, flags); size = vppcom_session_sendto (sid, (void *) buf, n, flags, NULL); - if (size != VPPCOM_OK) + if (size < VPPCOM_OK) { errno = -size; size = -1; @@ -1708,6 +1814,7 @@ send (int fd, const void *buf, size_t n, int flags) ssize_t sendfile (int out_fd, int in_fd, off_t * offset, size_t len) { + ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); ssize_t size = 0; const char *func_str; u32 sid = ldp_sid_from_fd (out_fd); @@ -1736,7 +1843,7 @@ sendfile (int out_fd, int in_fd, off_t * offset, size_t len) out_fd, out_fd, func_str, sid, sid, rv, vppcom_retval_str (rv)); - vec_reset_length (ldp->io_buffer); + vec_reset_length (ldpw->io_buffer); errno = -rv; size = -1; goto done; @@ -1772,7 +1879,7 @@ sendfile (int out_fd, int in_fd, off_t * offset, size_t len) ("LDP<%d>: ERROR: fd %d (0x%x): %s(): sid %u (0x%x), " "returned %d (%s)!", getpid (), out_fd, out_fd, func_str, sid, sid, size, vppcom_retval_str (size)); - vec_reset_length (ldp->io_buffer); + vec_reset_length (ldpw->io_buffer); errno = -size; size = -1; goto done; @@ -1804,8 +1911,8 @@ sendfile (int out_fd, int in_fd, off_t * offset, size_t len) continue; } bytes_to_read = clib_min (n_bytes_left, bytes_to_read); - vec_validate (ldp->io_buffer, bytes_to_read); - nbytes = libc_read (in_fd, ldp->io_buffer, bytes_to_read); + vec_validate (ldpw->io_buffer, bytes_to_read); + nbytes = libc_read (in_fd, ldpw->io_buffer, bytes_to_read); if (nbytes < 0) { func_str = "libc_read"; @@ -1813,13 +1920,13 @@ sendfile (int out_fd, int in_fd, off_t * offset, size_t len) clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s(): in_fd (%d), " "io_buffer %p, bytes_to_read %lu, rv %d, " "errno %d", getpid (), out_fd, out_fd, func_str, - in_fd, ldp->io_buffer, bytes_to_read, nbytes, + in_fd, ldpw->io_buffer, bytes_to_read, nbytes, errno_val); errno = errno_val; if (results == 0) { - vec_reset_length (ldp->io_buffer); + vec_reset_length (ldpw->io_buffer); size = -1; goto done; } @@ -1830,10 +1937,10 @@ sendfile (int out_fd, int in_fd, off_t * offset, size_t len) clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): sid %u (0x%x), " "buf %p, nbytes %u: results %d, n_bytes_left %d", getpid (), - out_fd, out_fd, func_str, sid, sid, ldp->io_buffer, nbytes, + out_fd, out_fd, func_str, sid, sid, ldpw->io_buffer, nbytes, results, n_bytes_left); - size = vppcom_session_write (sid, ldp->io_buffer, nbytes); + size = vppcom_session_write (sid, ldpw->io_buffer, nbytes); if (size < 0) { if (size == VPPCOM_EAGAIN) @@ -1859,12 +1966,12 @@ sendfile (int out_fd, int in_fd, off_t * offset, size_t len) "sid %u, io_buffer %p, nbytes %u " "returned %d (%s)", getpid (), out_fd, out_fd, func_str, - sid, ldp->io_buffer, nbytes, + sid, ldpw->io_buffer, nbytes, size, vppcom_retval_str (size)); } if (results == 0) { - vec_reset_length (ldp->io_buffer); + vec_reset_length (ldpw->io_buffer); errno = -size; size = -1; goto done; @@ -1879,7 +1986,7 @@ sendfile (int out_fd, int in_fd, off_t * offset, size_t len) while (n_bytes_left > 0); update_offset: - vec_reset_length (ldp->io_buffer); + vec_reset_length (ldpw->io_buffer); if (offset) { off_t off = lseek (in_fd, *offset, SEEK_SET); @@ -2765,10 +2872,8 @@ listen (int fd, int n) { func_str = "vppcom_session_listen"; - if (LDP_DEBUG > 0) - clib_warning - ("LDP<%d>: fd %d (0x%x): calling %s(): sid %u (0x%x), n %d", - getpid (), fd, fd, func_str, sid, sid, n); + LDBG (0, "LDP<%d>: fd %d (0x%x): calling %s(): sid %u (0x%x), n %d", + getpid (), fd, fd, func_str, sid, sid, n); rv = vppcom_session_listen (sid, n); if (rv != VPPCOM_OK) @@ -2781,9 +2886,8 @@ listen (int fd, int n) { func_str = "libc_listen"; - if (LDP_DEBUG > 0) - clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): n %d", - getpid (), fd, fd, func_str, n); + LDBG (0, "LDP<%d>: fd %d (0x%x): calling %s(): n %d", getpid (), fd, + fd, func_str, n); rv = libc_listen (fd, n); } @@ -2856,7 +2960,7 @@ ldp_accept4 (int listen_fd, __SOCKADDR_ARG addr, "accept sid %u (0x%x), ep %p, flags 0x%x", getpid (), listen_fd, listen_fd, func_str, accept_sid, accept_sid, ep, flags); - rv = ldp_fd_from_sid ((u32) accept_sid); + rv = ldp_fd_alloc ((u32) accept_sid); if (rv < 0) { (void) vppcom_session_close ((u32) accept_sid); @@ -2922,11 +3026,8 @@ shutdown (int fd, int how) if (sid != INVALID_SESSION_ID) { - func_str = __func__; - - clib_warning ("LDP<%d>: LDP-TBD", getpid ()); - errno = ENOSYS; - rv = -1; + func_str = "vppcom_session_close[TODO]"; + rv = close (fd); } else { @@ -2960,16 +3061,24 @@ shutdown (int fd, int how) int epoll_create1 (int flags) { + ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); const char *func_str; int rv; if ((errno = -ldp_init ())) return -1; + if (ldp->vcl_needs_real_epoll) + { + rv = libc_epoll_create1 (flags); + ldp->vcl_needs_real_epoll = 0; + ldpw->vcl_mq_epfd = rv; + LDBG (0, "LDP<%d>: created vcl epfd %u", getpid (), rv); + return rv; + } func_str = "vppcom_epoll_create"; - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: calling %s()", getpid (), func_str); + LDBG (1, "LDP<%d>: calling %s()", getpid (), func_str); rv = vppcom_epoll_create (); @@ -2979,7 +3088,7 @@ epoll_create1 (int flags) rv = -1; } else - rv = ldp_fd_from_sid ((u32) rv); + rv = ldp_fd_alloc ((u32) rv); if (LDP_DEBUG > 1) { @@ -3007,121 +3116,105 @@ epoll_create (int size) int epoll_ctl (int epfd, int op, int fd, struct epoll_event *event) { - int rv; + u32 vep_idx = ldp_sid_from_fd (epfd), sid; const char *func_str; - u32 vep_idx = ldp_sid_from_fd (epfd); + int rv; if ((errno = -ldp_init ())) return -1; - if (PREDICT_TRUE (vep_idx != INVALID_SESSION_ID)) + if (PREDICT_FALSE (vep_idx == INVALID_SESSION_ID)) { - u32 sid = ldp_sid_from_fd (fd); + /* The LDP epoll_create1 always creates VCL epfd's. + * The app should never have a kernel base epoll fd unless it + * was acquired outside of the LD_PRELOAD process context. + * In any case, if we get one, punt it to libc_epoll_ctl. + */ + func_str = "libc_epoll_ctl"; - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: epfd %d (0x%x), vep_idx %d (0x%x), " - "sid %d (0x%x)", getpid (), epfd, epfd, - vep_idx, vep_idx, sid, sid); + LDBG (1, "LDP<%d>: epfd %d (0x%x): calling %s(): op %d, fd %d (0x%x)," + " event %p", getpid (), epfd, epfd, func_str, op, fd, fd, event); - if (sid != INVALID_SESSION_ID) - { - func_str = "vppcom_epoll_ctl"; + rv = libc_epoll_ctl (epfd, op, fd, event); + goto done; + } - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: epfd %d (0x%x): calling %s(): " - "vep_idx %d (0x%x), op %d, sid %u (0x%x), event %p", - getpid (), epfd, epfd, func_str, vep_idx, vep_idx, - sid, sid, event); + sid = ldp_sid_from_fd (fd); - rv = vppcom_epoll_ctl (vep_idx, op, sid, event); - if (rv != VPPCOM_OK) - { - errno = -rv; - rv = -1; - } + LDBG (0, "LDP<%d>: epfd %d (0x%x), vep_idx %d (0x%x), sid %d (0x%x)", + getpid (), epfd, epfd, vep_idx, vep_idx, sid, sid); + + if (sid != INVALID_SESSION_ID) + { + func_str = "vppcom_epoll_ctl"; + + LDBG (1, "LDP<%d>: epfd %d (0x%x): calling %s(): vep_idx %d (0x%x)," + " op %d, sid %u (0x%x), event %p", getpid (), epfd, epfd, + func_str, vep_idx, vep_idx, sid, sid, event); + + rv = vppcom_epoll_ctl (vep_idx, op, sid, event); + if (rv != VPPCOM_OK) + { + errno = -rv; + rv = -1; } - else + } + else + { + int libc_epfd; + u32 size = sizeof (epfd); + + func_str = "vppcom_session_attr[GET_LIBC_EPFD]"; + libc_epfd = vppcom_session_attr (vep_idx, VPPCOM_ATTR_GET_LIBC_EPFD, 0, + 0); + LDBG (1, "LDP<%d>: epfd %d (0x%x), vep_idx %d (0x%x): %s() " + "returned libc_epfd %d (0x%x)", getpid (), epfd, epfd, + vep_idx, vep_idx, func_str, libc_epfd, libc_epfd); + + if (!libc_epfd) { - int libc_epfd; - u32 size = sizeof (epfd); + func_str = "libc_epoll_create1"; - func_str = "vppcom_session_attr[GET_LIBC_EPFD]"; - libc_epfd = vppcom_session_attr (vep_idx, - VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: epfd %d (0x%x), vep_idx %d (0x%x): " - "%s() returned libc_epfd %d (0x%x)", - getpid (), epfd, epfd, vep_idx, vep_idx, - func_str, libc_epfd, libc_epfd); + LDBG (1, "LDP<%d>: epfd %d (0x%x), vep_idx %d (0x%x): " + "calling %s(): EPOLL_CLOEXEC", getpid (), epfd, epfd, + vep_idx, vep_idx, func_str); - if (!libc_epfd) + libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC); + if (libc_epfd < 0) { - func_str = "libc_epoll_create1"; + rv = libc_epfd; + goto done; + } - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: epfd %d (0x%x), vep_idx %d (0x%x): " - "calling %s(): EPOLL_CLOEXEC", - getpid (), epfd, epfd, vep_idx, vep_idx, - func_str); + func_str = "vppcom_session_attr[SET_LIBC_EPFD]"; + LDBG (1, "LDP<%d>: epfd %d (0x%x): calling %s(): vep_idx %d (0x%x)," + " VPPCOM_ATTR_SET_LIBC_EPFD, libc_epfd %d (0x%x), size %d", + getpid (), epfd, epfd, func_str, vep_idx, vep_idx, libc_epfd, + libc_epfd, size); - libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC); - if (libc_epfd < 0) - { - rv = libc_epfd; - goto done; - } - - func_str = "vppcom_session_attr[SET_LIBC_EPFD]"; - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: epfd %d (0x%x): calling %s(): " - "vep_idx %d (0x%x), VPPCOM_ATTR_SET_LIBC_EPFD, " - "libc_epfd %d (0x%x), size %d", - getpid (), epfd, epfd, func_str, - vep_idx, vep_idx, libc_epfd, libc_epfd, size); - - rv = vppcom_session_attr (vep_idx, VPPCOM_ATTR_SET_LIBC_EPFD, - &libc_epfd, &size); - if (rv < 0) - { - errno = -rv; - rv = -1; - goto done; - } - } - else if (PREDICT_FALSE (libc_epfd < 0)) + rv = vppcom_session_attr (vep_idx, VPPCOM_ATTR_SET_LIBC_EPFD, + &libc_epfd, &size); + if (rv < 0) { - errno = -epfd; + errno = -rv; rv = -1; goto done; } - - func_str = "libc_epoll_ctl"; - - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: epfd %d (0x%x): calling %s(): " - "libc_epfd %d (0x%x), op %d, " - "fd %d (0x%x), event %p", - getpid (), epfd, epfd, func_str, - libc_epfd, libc_epfd, op, fd, fd, event); - - rv = libc_epoll_ctl (libc_epfd, op, fd, event); } - } - else - { - /* The LDP epoll_create1 always creates VCL epfd's. - * The app should never have a kernel base epoll fd unless it - * was acquired outside of the LD_PRELOAD process context. - * In any case, if we get one, punt it to libc_epoll_ctl. - */ + else if (PREDICT_FALSE (libc_epfd < 0)) + { + errno = -epfd; + rv = -1; + goto done; + } + func_str = "libc_epoll_ctl"; - if (LDP_DEBUG > 1) - clib_warning ("LDP<%d>: epfd %d (0x%x): calling %s(): " - "op %d, fd %d (0x%x), event %p", - getpid (), epfd, epfd, func_str, op, fd, fd, event); + LDBG (1, "LDP<%d>: epfd %d (0x%x): calling %s(): libc_epfd %d (0x%x), " + "op %d, fd %d (0x%x), event %p", getpid (), epfd, epfd, func_str, + libc_epfd, libc_epfd, op, fd, fd, event); - rv = libc_epoll_ctl (epfd, op, fd, event); + rv = libc_epoll_ctl (libc_epfd, op, fd, event); } done: @@ -3144,15 +3237,14 @@ done: } static inline int -ldp_epoll_pwait (int epfd, struct epoll_event *events, - int maxevents, int timeout, const sigset_t * sigmask) +ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents, + int timeout, const sigset_t * sigmask) { - const char *func_str; - int rv = 0; - double time_to_wait = (double) 0; - double time_out, now = 0; + ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); + double time_to_wait = (double) 0, time_out, now = 0; u32 vep_idx = ldp_sid_from_fd (epfd); - int libc_epfd; + int libc_epfd, rv = 0; + const char *func_str; if ((errno = -ldp_init ())) return -1; @@ -3163,6 +3255,9 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, return -1; } + if (epfd == ldpw->vcl_mq_epfd) + return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask); + if (PREDICT_FALSE (vep_idx == INVALID_SESSION_ID)) { clib_warning ("LDP<%d>: ERROR: epfd %d (0x%x): bad vep_idx %d (0x%x)!", @@ -3171,8 +3266,8 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, return -1; } - time_to_wait = ((timeout >= 0) ? (double) timeout / (double) 1000 : 0); - time_out = clib_time_now (&ldp->clib_time) + time_to_wait; + time_to_wait = ((timeout >= 0) ? (double) timeout : 0); + time_out = clib_time_now (&ldpw->clib_time) + time_to_wait; func_str = "vppcom_session_attr[GET_LIBC_EPFD]"; libc_epfd = vppcom_session_attr (vep_idx, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0); @@ -3183,29 +3278,24 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, goto done; } - if (LDP_DEBUG > 2) - clib_warning ("LDP<%d>: epfd %d (0x%x): vep_idx %d (0x%x), " - "libc_epfd %d (0x%x), events %p, maxevents %d, " - "timeout %d, sigmask %p: time_to_wait %.02f", - getpid (), epfd, epfd, vep_idx, vep_idx, - libc_epfd, libc_epfd, events, maxevents, timeout, - sigmask, time_to_wait, time_out); + LDBG (2, "LDP<%d>: epfd %d (0x%x): vep_idx %d (0x%x), libc_epfd %d (0x%x), " + "events %p, maxevents %d, timeout %d, sigmask %p: time_to_wait %.02f", + getpid (), epfd, epfd, vep_idx, vep_idx, libc_epfd, libc_epfd, events, + maxevents, timeout, sigmask, time_to_wait, time_out); do { - if (!ldp->epoll_wait_vcl) + if (!ldpw->epoll_wait_vcl) { func_str = "vppcom_epoll_wait"; - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: epfd %d (0x%x): calling %s(): " - "vep_idx %d (0x%x), events %p, maxevents %d", - getpid (), epfd, epfd, func_str, - vep_idx, vep_idx, events, maxevents); + LDBG (3, "LDP<%d>: epfd %d (0x%x): calling %s(): vep_idx %d (0x%x)," + " events %p, maxevents %d", getpid (), epfd, epfd, func_str, + vep_idx, vep_idx, events, maxevents); rv = vppcom_epoll_wait (vep_idx, events, maxevents, 0); if (rv > 0) { - ldp->epoll_wait_vcl = 1; + ldpw->epoll_wait_vcl = 1; goto done; } else if (rv < 0) @@ -3216,18 +3306,16 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, } } else - ldp->epoll_wait_vcl = 0; + ldpw->epoll_wait_vcl = 0; if (libc_epfd > 0) { func_str = "libc_epoll_pwait"; - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: epfd %d (0x%x): calling %s(): " - "libc_epfd %d (0x%x), events %p, " - "maxevents %d, sigmask %p", - getpid (), epfd, epfd, func_str, - libc_epfd, libc_epfd, events, maxevents, sigmask); + LDBG (3, "LDP<%d>: epfd %d (0x%x): calling %s(): libc_epfd %d " + "(0x%x), events %p, maxevents %d, sigmask %p", getpid (), + epfd, epfd, func_str, libc_epfd, libc_epfd, events, + maxevents, sigmask); rv = libc_epoll_pwait (libc_epfd, events, maxevents, 1, sigmask); if (rv != 0) @@ -3235,7 +3323,7 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, } if (timeout != -1) - now = clib_time_now (&ldp->clib_time); + now = clib_time_now (&ldpw->clib_time); } while (now < time_out); @@ -3276,68 +3364,65 @@ epoll_wait (int epfd, struct epoll_event *events, int maxevents, int timeout) int poll (struct pollfd *fds, nfds_t nfds, int timeout) { + ldp_worker_ctx_t *ldpw = ldp_worker_get_current (); const char *func_str = __func__; - int rv, i, n_libc_fds, n_revents; + int rv, i, n_revents = 0; u32 sid; vcl_poll_t *vp; double wait_for_time; - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: fds %p, nfds %d, timeout %d", - getpid (), fds, nfds, timeout); + LDBG (3, "LDP<%d>: fds %p, nfds %d, timeout %d", getpid (), fds, nfds, + timeout); if (timeout >= 0) wait_for_time = (f64) timeout / 1000; else wait_for_time = -1; - n_libc_fds = 0; for (i = 0; i < nfds; i++) { - if (fds[i].fd >= 0) - { - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: fds[%d].fd %d (0x%0x), .events = 0x%x, " - ".revents = 0x%x", getpid (), i, fds[i].fd, - fds[i].fd, fds[i].events, fds[i].revents); + if (fds[i].fd < 0) + continue; - sid = ldp_sid_from_fd (fds[i].fd); - if (sid != INVALID_SESSION_ID) - { - fds[i].fd = -fds[i].fd; - vec_add2 (ldp->vcl_poll, vp, 1); - vp->fds_ndx = i; - vp->sid = sid; - vp->events = fds[i].events; + LDBG (3, "LDP<%d>: fds[%d] fd %d (0x%0x) events = 0x%x revents = 0x%x", + getpid (), i, fds[i].fd, fds[i].fd, fds[i].events, + fds[i].revents); + + sid = ldp_sid_from_fd (fds[i].fd); + if (sid != INVALID_SESSION_ID) + { + fds[i].fd = -fds[i].fd; + vec_add2 (ldpw->vcl_poll, vp, 1); + vp->fds_ndx = i; + vp->sid = sid; + vp->events = fds[i].events; #ifdef __USE_XOPEN2K - if (fds[i].events & POLLRDNORM) - vp->events |= POLLIN; - if (fds[i].events & POLLWRNORM) - vp->events |= POLLOUT; + if (fds[i].events & POLLRDNORM) + vp->events |= POLLIN; + if (fds[i].events & POLLWRNORM) + vp->events |= POLLOUT; #endif - vp->revents = &fds[i].revents; - } - else - n_libc_fds++; + vp->revents = fds[i].revents; + } + else + { + vec_add1 (ldpw->libc_poll, fds[i]); + vec_add1 (ldpw->libc_poll_idxs, i); } } - n_revents = 0; do { - if (vec_len (ldp->vcl_poll)) + if (vec_len (ldpw->vcl_poll)) { func_str = "vppcom_poll"; - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: calling %s(): " - "vcl_poll %p, n_sids %u (0x%x): " - "n_libc_fds %u", - getpid (), func_str, ldp->vcl_poll, - vec_len (ldp->vcl_poll), vec_len (ldp->vcl_poll), - n_libc_fds); + LDBG (3, "LDP<%d>: calling %s(): vcl_poll %p, n_sids %u (0x%x): " + "n_libc_fds %u", getpid (), func_str, ldpw->vcl_poll, + vec_len (ldpw->vcl_poll), vec_len (ldpw->vcl_poll), + vec_len (ldpw->libc_poll)); - rv = vppcom_poll (ldp->vcl_poll, vec_len (ldp->vcl_poll), 0); + rv = vppcom_poll (ldpw->vcl_poll, vec_len (ldpw->vcl_poll), 0); if (rv < 0) { errno = -rv; @@ -3348,15 +3433,14 @@ poll (struct pollfd *fds, nfds_t nfds, int timeout) n_revents += rv; } - if (n_libc_fds) + if (vec_len (ldpw->libc_poll)) { func_str = "libc_poll"; - if (LDP_DEBUG > 3) - clib_warning ("LDP<%d>: calling %s(): fds %p, nfds %u: n_sids %u", - getpid (), fds, nfds, vec_len (ldp->vcl_poll)); + LDBG (3, "LDP<%d>: calling %s(): fds %p, nfds %u: n_sids %u", + getpid (), fds, nfds, vec_len (ldpw->vcl_poll)); - rv = libc_poll (fds, nfds, 0); + rv = libc_poll (ldpw->libc_poll, vec_len (ldpw->libc_poll), 0); if (rv < 0) goto done; else @@ -3370,13 +3454,14 @@ poll (struct pollfd *fds, nfds_t nfds, int timeout) } } while ((wait_for_time == -1) || - (clib_time_now (&ldp->clib_time) < wait_for_time)); + (clib_time_now (&ldpw->clib_time) < wait_for_time)); rv = 0; done: - vec_foreach (vp, ldp->vcl_poll) + vec_foreach (vp, ldpw->vcl_poll) { fds[vp->fds_ndx].fd = -fds[vp->fds_ndx].fd; + fds[vp->fds_ndx].revents = vp->revents; #ifdef __USE_XOPEN2K if ((fds[vp->fds_ndx].revents & POLLIN) && (fds[vp->fds_ndx].events & POLLRDNORM)) @@ -3386,7 +3471,14 @@ done: fds[vp->fds_ndx].revents |= POLLWRNORM; #endif } - vec_reset_length (ldp->vcl_poll); + vec_reset_length (ldpw->vcl_poll); + + for (i = 0; i < vec_len (ldpw->libc_poll); i++) + { + fds[ldpw->libc_poll_idxs[i]].revents = ldpw->libc_poll[i].revents; + } + vec_reset_length (ldpw->libc_poll_idxs); + vec_reset_length (ldpw->libc_poll); if (LDP_DEBUG > 3) { @@ -3403,7 +3495,7 @@ done: { clib_warning ("LDP<%d>: returning %d (0x%x): n_sids %u, " "n_libc_fds %d", getpid (), rv, rv, - vec_len (ldp->vcl_poll), n_libc_fds); + vec_len (ldpw->vcl_poll), vec_len (ldpw->libc_poll)); for (i = 0; i < nfds; i++) {