* Epoll state
*/
u8 epoll_wait_vcl;
+ u8 mq_epfd_added;
int vcl_mq_epfd;
} ldp_worker_ctx_t;
#define LDBG(_lvl, _fmt, _args...) \
if (ldp->debug > _lvl) \
- clib_warning ("ldp<%d>: " _fmt, getpid(), ##_args)
+ { \
+ int errno_saved = errno; \
+ fprintf (stderr, "ldp<%d>: " _fmt, getpid(), ##_args); \
+ errno = errno_saved; \
+ }
static ldp_main_t ldp_main = {
.vlsh_bit_val = (1 << LDP_SID_BIT_MIN),
vlsh = ldp_fd_to_vlsh (fd);
if (vlsh != VLS_INVALID_HANDLE)
{
- do
+ for (i = 0; i < iovcnt; ++i)
{
- for (i = 0; i < iovcnt; ++i)
+ rv = vls_read (vlsh, iov[i].iov_base, iov[i].iov_len);
+ if (rv <= 0)
+ break;
+ else
{
- rv = vls_read (vlsh, iov[i].iov_base, iov[i].iov_len);
- if (rv < 0)
+ total += rv;
+ if (rv < iov[i].iov_len)
break;
- else
- {
- total += rv;
- if (rv < iov[i].iov_len)
- break;
- }
}
}
- while ((rv >= 0) && (total == 0));
-
- if (rv < 0)
+ if (rv < 0 && total == 0)
{
errno = -rv;
size = -1;
vlsh = ldp_fd_to_vlsh (fd);
if (vlsh != VLS_INVALID_HANDLE)
{
- do
+ for (i = 0; i < iovcnt; ++i)
{
- for (i = 0; i < iovcnt; ++i)
+ rv = vls_write_msg (vlsh, iov[i].iov_base, iov[i].iov_len);
+ if (rv < 0)
+ break;
+ else
{
- rv = vls_write_msg (vlsh, iov[i].iov_base, iov[i].iov_len);
- if (rv < 0)
+ total += rv;
+ if (rv < iov[i].iov_len)
break;
- else
- {
- total += rv;
- if (rv < iov[i].iov_len)
- break;
- }
}
}
- while ((rv >= 0) && (total == 0));
- if (rv < 0)
+ if (rv < 0 && total == 0)
{
errno = -rv;
size = -1;
return size;
}
-int
-fcntl (int fd, int cmd, ...)
+static int
+fcntl_internal (int fd, int cmd, va_list ap)
{
vls_handle_t vlsh;
int rv = 0;
- va_list ap;
-
- if ((errno = -ldp_init ()))
- return -1;
-
- va_start (ap, cmd);
vlsh = ldp_fd_to_vlsh (fd);
LDBG (0, "fd %u vlsh %d, cmd %u", fd, vlsh, cmd);
}
else
{
+#ifdef HAVE_FCNTL64
+ rv = libc_vfcntl64 (fd, cmd, ap);
+#else
rv = libc_vfcntl (fd, cmd, ap);
+#endif
}
+ return rv;
+}
+
+int
+fcntl (int fd, int cmd, ...)
+{
+ va_list ap;
+ int rv;
+
+ if ((errno = -ldp_init ()))
+ return -1;
+
+ va_start (ap, cmd);
+ rv = fcntl_internal (fd, cmd, ap);
va_end (ap);
return rv;
}
+int
+fcntl64 (int fd, int cmd, ...)
+{
+ va_list ap;
+ int rv;
+
+ if ((errno = -ldp_init ()))
+ return -1;
+
+ va_start (ap, cmd);
+ rv = fcntl_internal (fd, cmd, ap);
+ va_end (ap);
+ return rv;
+}
+
int
ioctl (int fd, unsigned long int cmd, ...)
{
if (vlsh == VLS_INVALID_HANDLE)
clib_bitmap_set_no_check (*libcb, fd, 1);
else
- clib_bitmap_set_no_check (*vclb, vlsh_to_session_index (vlsh), 1);
+ *vclb = clib_bitmap_set (*vclb, vlsh_to_session_index (vlsh), 1);
}));
/* *INDENT-ON* */
si_bits_set = clib_bitmap_last_set (*vclb) + 1;
*si_bits = (si_bits_set > *si_bits) ? si_bits_set : *si_bits;
+ clib_bitmap_validate (*resultb, *si_bits);
libc_bits_set = clib_bitmap_last_set (*libcb) + 1;
*libc_bits = (libc_bits_set > *libc_bits) ? libc_bits_set : *libc_bits;
/* *INDENT-OFF* */
clib_bitmap_foreach (si, vclb, ({
vlsh = vls_session_index_to_vlsh (si);
+ ASSERT (vlsh != VLS_INVALID_HANDLE);
fd = ldp_vlsh_to_fd (vlsh);
if (PREDICT_FALSE (fd < 0))
{
{
if (readfds)
clib_memcpy_fast (ldpw->rd_bitmap, ldpw->si_rd_bitmap,
- vec_len (ldpw->rd_bitmap) *
+ vec_len (ldpw->si_rd_bitmap) *
sizeof (clib_bitmap_t));
if (writefds)
clib_memcpy_fast (ldpw->wr_bitmap, ldpw->si_wr_bitmap,
- vec_len (ldpw->wr_bitmap) *
+ vec_len (ldpw->si_wr_bitmap) *
sizeof (clib_bitmap_t));
if (exceptfds)
clib_memcpy_fast (ldpw->ex_bitmap, ldpw->si_ex_bitmap,
- vec_len (ldpw->ex_bitmap) *
+ vec_len (ldpw->si_ex_bitmap) *
sizeof (clib_bitmap_t));
rv = vls_select (si_bits, readfds ? ldpw->rd_bitmap : NULL,
{
errno = -rv;
rv = -1;
+ goto done;
}
else if (rv > 0)
{
size = vls_attr (vlsh, VPPCOM_ATTR_GET_NWRITE, 0, 0);
if (size < 0)
{
- LDBG (0, "ERROR: fd %d: vls_attr: vlsh %u returned %d (%s)!",
+ LDBG (0, "ERROR: fd %d: vls_attr: vlsh %u returned %ld (%s)!",
out_fd, vlsh, size, vppcom_retval_str (size));
vec_reset_length (ldpw->io_buffer);
errno = -size;
{
size = vls_recvfrom (vlsh, buf, n, flags, NULL);
if (size < 0)
- errno = -size;
+ {
+ errno = -size;
+ size = -1;
+ }
}
else
{
return size;
}
+static int
+ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n, int flags,
+ __CONST_SOCKADDR_ARG addr, socklen_t addr_len)
+{
+ vppcom_endpt_t *ep = 0;
+ vppcom_endpt_t _ep;
+
+ if (addr)
+ {
+ ep = &_ep;
+ switch (addr->sa_family)
+ {
+ case AF_INET:
+ ep->is_ip4 = VPPCOM_IS_IP4;
+ ep->ip =
+ (uint8_t *) & ((const struct sockaddr_in *) addr)->sin_addr;
+ ep->port = (uint16_t) ((const struct sockaddr_in *) addr)->sin_port;
+ break;
+
+ case AF_INET6:
+ ep->is_ip4 = VPPCOM_IS_IP6;
+ ep->ip =
+ (uint8_t *) & ((const struct sockaddr_in6 *) addr)->sin6_addr;
+ ep->port =
+ (uint16_t) ((const struct sockaddr_in6 *) addr)->sin6_port;
+ break;
+
+ default:
+ return EAFNOSUPPORT;
+ }
+ }
+
+ return vls_sendto (vlsh, (void *) buf, n, flags, ep);
+}
+
+static int
+ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n,
+ int flags, __SOCKADDR_ARG addr,
+ socklen_t * __restrict addr_len)
+{
+ u8 src_addr[sizeof (struct sockaddr_in6)];
+ vppcom_endpt_t ep;
+ ssize_t size;
+ int rv;
+
+ if (addr)
+ {
+ ep.ip = src_addr;
+ size = vls_recvfrom (vlsh, buf, n, flags, &ep);
+
+ if (size > 0)
+ {
+ rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep);
+ if (rv < 0)
+ size = rv;
+ }
+ }
+ else
+ size = vls_recvfrom (vlsh, buf, n, flags, NULL);
+
+ return size;
+}
+
ssize_t
sendto (int fd, const void *buf, size_t n, int flags,
__CONST_SOCKADDR_ARG addr, socklen_t addr_len)
vlsh = ldp_fd_to_vlsh (fd);
if (vlsh != INVALID_SESSION_ID)
{
- vppcom_endpt_t *ep = 0;
- vppcom_endpt_t _ep;
-
- if (addr)
- {
- ep = &_ep;
- switch (addr->sa_family)
- {
- case AF_INET:
- ep->is_ip4 = VPPCOM_IS_IP4;
- ep->ip =
- (uint8_t *) & ((const struct sockaddr_in *) addr)->sin_addr;
- ep->port =
- (uint16_t) ((const struct sockaddr_in *) addr)->sin_port;
- break;
-
- case AF_INET6:
- ep->is_ip4 = VPPCOM_IS_IP6;
- ep->ip =
- (uint8_t *) & ((const struct sockaddr_in6 *) addr)->sin6_addr;
- ep->port =
- (uint16_t) ((const struct sockaddr_in6 *) addr)->sin6_port;
- break;
-
- default:
- errno = EAFNOSUPPORT;
- size = -1;
- goto done;
- }
- }
-
- size = vls_sendto (vlsh, (void *) buf, n, flags, ep);
+ size = ldp_vls_sendo (vlsh, buf, n, flags, addr, addr_len);
if (size < 0)
{
errno = -size;
size = libc_sendto (fd, buf, n, flags, addr, addr_len);
}
-done:
return size;
}
recvfrom (int fd, void *__restrict buf, size_t n, int flags,
__SOCKADDR_ARG addr, socklen_t * __restrict addr_len)
{
- vls_handle_t sid;
- ssize_t size, rv;
+ vls_handle_t vlsh;
+ ssize_t size;
if ((errno = -ldp_init ()))
return -1;
- sid = ldp_fd_to_vlsh (fd);
- if (sid != VLS_INVALID_HANDLE)
+ vlsh = ldp_fd_to_vlsh (fd);
+ if (vlsh != VLS_INVALID_HANDLE)
{
- vppcom_endpt_t ep;
- u8 src_addr[sizeof (struct sockaddr_in6)];
-
- if (addr)
- {
- ep.ip = src_addr;
- size = vls_recvfrom (sid, buf, n, flags, &ep);
-
- if (size > 0)
- {
- rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep);
- if (rv < 0)
- size = rv;
- }
- }
- else
- size = vls_recvfrom (sid, buf, n, flags, NULL);
-
+ size = ldp_vls_recvfrom (vlsh, buf, n, flags, addr, addr_len);
if (size < 0)
{
errno = -size;
}
ssize_t
-sendmsg (int fd, const struct msghdr * message, int flags)
+sendmsg (int fd, const struct msghdr * msg, int flags)
{
vls_handle_t vlsh;
ssize_t size;
vlsh = ldp_fd_to_vlsh (fd);
if (vlsh != VLS_INVALID_HANDLE)
{
- LDBG (0, "LDP-TBD");
- errno = ENOSYS;
- size = -1;
+ struct iovec *iov = msg->msg_iov;
+ ssize_t total = 0;
+ int i, rv;
+
+ for (i = 0; i < msg->msg_iovlen; ++i)
+ {
+ rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, flags,
+ msg->msg_name, msg->msg_namelen);
+ if (rv < 0)
+ break;
+ else
+ {
+ total += rv;
+ if (rv < iov[i].iov_len)
+ break;
+ }
+ }
+
+ if (rv < 0 && total == 0)
+ {
+ errno = -rv;
+ size = -1;
+ }
+ else
+ size = total;
}
else
{
- size = libc_sendmsg (fd, message, flags);
+ size = libc_sendmsg (fd, msg, flags);
}
return size;
#endif
ssize_t
-recvmsg (int fd, struct msghdr * message, int flags)
+recvmsg (int fd, struct msghdr * msg, int flags)
{
vls_handle_t vlsh;
ssize_t size;
vlsh = ldp_fd_to_vlsh (fd);
if (vlsh != VLS_INVALID_HANDLE)
{
- LDBG (0, "LDP-TBD");
- errno = ENOSYS;
- size = -1;
+ struct iovec *iov = msg->msg_iov;
+ ssize_t max_deq, total = 0;
+ int i, rv;
+
+ max_deq = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0);
+ if (!max_deq)
+ return 0;
+
+ for (i = 0; i < msg->msg_iovlen; i++)
+ {
+ rv = ldp_vls_recvfrom (vlsh, iov[i].iov_base, iov[i].iov_len, flags,
+ (i == 0 ? msg->msg_name : NULL),
+ (i == 0 ? &msg->msg_namelen : NULL));
+ if (rv <= 0)
+ break;
+ else
+ {
+ total += rv;
+ if (rv < iov[i].iov_len)
+ break;
+ }
+ if (total >= max_deq)
+ break;
+ }
+
+ if (rv < 0 && total == 0)
+ {
+ errno = -rv;
+ size = -1;
+ }
+ else
+ size = total;
}
else
{
- size = libc_recvmsg (fd, message, flags);
+ size = libc_recvmsg (fd, msg, flags);
}
return size;
rv = -EFAULT;
break;
case TCP_CONGESTION:
- strcpy (optval, "cubic");
*optlen = strlen ("cubic");
+ strncpy (optval, "cubic", *optlen + 1);
rv = 0;
break;
default:
ep.ip = src_addr;
LDBG (0, "listen fd %d: calling vppcom_session_accept: listen sid %u,"
- " ep %p, flags 0x%x", listen_fd, listen_vlsh, ep, flags);
+ " ep %p, flags 0x%x", listen_fd, listen_vlsh, &ep, flags);
accept_vlsh = vls_accept (listen_vlsh, &ep, flags);
if (accept_vlsh < 0)
if ((errno = -ldp_init ()))
return -1;
- if (ldp->vcl_needs_real_epoll)
+ if (ldp->vcl_needs_real_epoll || vls_use_real_epoll ())
{
/* Make sure workers have been allocated */
if (!ldp->workers)
if (vlsh != VLS_INVALID_HANDLE)
{
LDBG (1, "epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u,"
- " event %p", epfd, vep_vlsh, vlsh, event);
+ " event %p", epfd, vep_vlsh, op, vlsh, event);
rv = vls_epoll_ctl (vep_vlsh, op, vlsh, event);
if (rv != VPPCOM_OK)
return rv;
}
+static inline int
+ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events,
+ int maxevents, int timeout, const sigset_t * sigmask)
+{
+ ldp_worker_ctx_t *ldpw;
+ int libc_epfd, rv = 0, num_ev;
+ vls_handle_t ep_vlsh;
+
+ if ((errno = -ldp_init ()))
+ return -1;
+
+ if (PREDICT_FALSE (!events || (timeout < -1)))
+ {
+ errno = EFAULT;
+ return -1;
+ }
+
+ /* Make sure the vcl worker is valid. Could be that epoll fd was created on
+ * one thread but it is now used on another */
+ if (PREDICT_FALSE (vppcom_worker_index () == ~0))
+ vls_register_vcl_worker ();
+
+ ldpw = ldp_worker_get_current ();
+ if (epfd == ldpw->vcl_mq_epfd)
+ return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
+
+ ep_vlsh = ldp_fd_to_vlsh (epfd);
+ if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE))
+ {
+ LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh);
+ errno = EBADFD;
+ return -1;
+ }
+
+ libc_epfd = vls_attr (ep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
+ if (PREDICT_FALSE (!libc_epfd))
+ {
+ u32 size = sizeof (epfd);
+
+ LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: "
+ "EPOLL_CLOEXEC", epfd, ep_vlsh);
+ libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC);
+ if (libc_epfd < 0)
+ {
+ rv = libc_epfd;
+ goto done;
+ }
+
+ rv = vls_attr (ep_vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &libc_epfd, &size);
+ if (rv < 0)
+ {
+ errno = -rv;
+ rv = -1;
+ goto done;
+ }
+ }
+ if (PREDICT_FALSE (libc_epfd <= 0))
+ {
+ errno = -libc_epfd;
+ rv = -1;
+ goto done;
+ }
+
+ if (PREDICT_FALSE (!ldpw->mq_epfd_added))
+ {
+ struct epoll_event e = { 0 };
+ e.events = EPOLLIN;
+ e.data.fd = ldpw->vcl_mq_epfd;
+ if (libc_epoll_ctl (libc_epfd, EPOLL_CTL_ADD, ldpw->vcl_mq_epfd, &e) <
+ 0)
+ {
+ LDBG (0, "epfd %d, add libc mq epoll fd %d to libc epoll fd %d",
+ epfd, ldpw->vcl_mq_epfd, libc_epfd);
+ rv = -1;
+ goto done;
+ }
+ ldpw->mq_epfd_added = 1;
+ }
+
+ rv = vls_epoll_wait (ep_vlsh, events, maxevents, 0);
+ if (rv > 0)
+ goto done;
+ else if (PREDICT_FALSE (rv < 0))
+ {
+ errno = -rv;
+ rv = -1;
+ goto done;
+ }
+
+ rv = libc_epoll_pwait (libc_epfd, events, maxevents, timeout, sigmask);
+ if (rv <= 0)
+ goto done;
+ for (int i = 0; i < rv; i++)
+ {
+ if (events[i].data.fd == ldpw->vcl_mq_epfd)
+ {
+ /* We should remove mq epoll fd from events. */
+ rv--;
+ if (i != rv)
+ {
+ events[i].events = events[rv].events;
+ events[i].data.u64 = events[rv].data.u64;
+ }
+ num_ev = vls_epoll_wait (ep_vlsh, &events[rv], maxevents - rv, 0);
+ if (PREDICT_TRUE (num_ev > 0))
+ rv += num_ev;
+ break;
+ }
+ }
+
+done:
+ return rv;
+}
+
int
epoll_pwait (int epfd, struct epoll_event *events,
int maxevents, int timeout, const sigset_t * sigmask)
{
- return ldp_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
+ if (vls_use_eventfd ())
+ return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout,
+ sigmask);
+ else
+ return ldp_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
}
int
epoll_wait (int epfd, struct epoll_event *events, int maxevents, int timeout)
{
- return ldp_epoll_pwait (epfd, events, maxevents, timeout, NULL);
+ if (vls_use_eventfd ())
+ return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout, NULL);
+ else
+ return ldp_epoll_pwait (epfd, events, maxevents, timeout, NULL);
}
int
vcl_poll_t *vp;
double max_time;
- LDBG (3, "fds %p, nfds %d, timeout %d", fds, nfds, timeout);
+ LDBG (3, "fds %p, nfds %ld, timeout %d", fds, nfds, timeout);
if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
clib_time_init (&ldpw->clib_time);