#include <stdarg.h>
#include <sys/resource.h>
#include <netinet/tcp.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
#include <vcl/ldp_socket_wrapper.h>
#include <vcl/ldp.h>
#define SOCKADDR_GET_SA(__addr) _addr;
#endif
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT 103
+#endif
+
+#ifndef SO_ORIGINAL_DST
+/* from <linux/netfilter_ipv4.h> */
+#define SO_ORIGINAL_DST 80
+#endif
typedef struct ldp_worker_ctx_
{
u8 *io_buffer;
static inline void
ldp_set_app_name (char *app_name)
{
- snprintf (ldp->app_name, LDP_APP_NAME_MAX,
- "ldp-%d-%s", getpid (), app_name);
+ snprintf (ldp->app_name, LDP_APP_NAME_MAX, "%s-ldp-%d", app_name, getpid ());
}
static inline char *
ldp_get_app_name ()
{
if (ldp->app_name[0] == '\0')
- ldp_set_app_name ("app");
+ ldp_set_app_name (program_invocation_short_name);
return ldp->app_name;
}
{
if (ldp->workers)
return;
- pool_alloc (ldp->workers, LDP_MAX_NWORKERS);
+ ldp->workers = vec_new (ldp_worker_ctx_t, LDP_MAX_NWORKERS);
}
-static int
-ldp_init (void)
+static void
+ldp_init_cfg (void)
{
- ldp_worker_ctx_t *ldpw;
- int rv;
-
- ASSERT (!ldp->init);
-
- ldp->init = 1;
- ldp->vcl_needs_real_epoll = 1;
- rv = vls_app_create (ldp_get_app_name ());
- if (rv != VPPCOM_OK)
- {
- ldp->vcl_needs_real_epoll = 0;
- if (rv == VPPCOM_EEXIST)
- return 0;
- LDBG (2, "\nERROR: ldp_init: vppcom_app_create()"
- " failed! rv = %d (%s)\n", rv, vppcom_retval_str (rv));
- ldp->init = 0;
- return rv;
- }
- ldp->vcl_needs_real_epoll = 0;
- ldp_alloc_workers ();
- ldpw = ldp_worker_get_current ();
-
char *env_var_str = getenv (LDP_ENV_DEBUG);
if (env_var_str)
{
{
ldp->transparent_tls = 1;
}
+}
- /* *INDENT-OFF* */
- pool_foreach (ldpw, ldp->workers) {
+static int
+ldp_init (void)
+{
+ ldp_worker_ctx_t *ldpw;
+ int rv;
+
+ if (ldp->init)
+ {
+ LDBG (0, "LDP is initialized already");
+ return 0;
+ }
+
+ ldp_init_cfg ();
+ ldp->init = 1;
+ ldp->vcl_needs_real_epoll = 1;
+ rv = vls_app_create (ldp_get_app_name ());
+ if (rv != VPPCOM_OK)
+ {
+ ldp->vcl_needs_real_epoll = 0;
+ if (rv == VPPCOM_EEXIST)
+ return 0;
+ LDBG (2,
+ "\nERROR: ldp_init: vppcom_app_create()"
+ " failed! rv = %d (%s)\n",
+ rv, vppcom_retval_str (rv));
+ ldp->init = 0;
+ return rv;
+ }
+ ldp->vcl_needs_real_epoll = 0;
+ ldp_alloc_workers ();
+
+ vec_foreach (ldpw, ldp->workers)
clib_memset (&ldpw->clib_time, 0, sizeof (ldpw->clib_time));
- }
- /* *INDENT-ON* */
LDBG (0, "LDP initialization: done!");
epfd = vls_attr (vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
if (epfd > 0)
{
+ ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
+ u32 size = sizeof (epfd);
+
LDBG (0, "fd %d: calling libc_close: epfd %u", fd, epfd);
- rv = libc_close (epfd);
- if (rv < 0)
- {
- u32 size = sizeof (epfd);
- epfd = 0;
+ libc_close (epfd);
+ ldpw->mq_epfd_added = 0;
- (void) vls_attr (vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &epfd, &size);
- }
+ epfd = 0;
+ (void) vls_attr (vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &epfd, &size);
}
else if (PREDICT_FALSE (epfd < 0))
{
case FIONBIO:
{
- u32 flags = va_arg (ap, int) ? O_NONBLOCK : 0;
+ u32 flags = *(va_arg (ap, int *)) ? O_NONBLOCK : 0;
u32 size = sizeof (flags);
/* TBD: When VPPCOM_ATTR_[GS]ET_FLAGS supports flags other than
static inline int
ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n,
- vppcom_endpt_tlv_t *ep_tlv, int flags,
+ vppcom_endpt_tlv_t *app_tlvs, int flags,
__CONST_SOCKADDR_ARG _addr, socklen_t addr_len)
{
const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vppcom_endpt_t *ep = 0;
vppcom_endpt_t _ep;
- if (ep_tlv)
- {
- _ep.app_data = *ep_tlv;
- }
+ _ep.app_tlvs = app_tlvs;
if (addr)
{
return size;
}
+static int
+ldp_parse_cmsg (vls_handle_t vlsh, const struct msghdr *msg,
+ vppcom_endpt_tlv_t **app_tlvs)
+{
+ uint8_t *ad, *at = (uint8_t *) *app_tlvs;
+ vppcom_endpt_tlv_t *adh;
+ struct in_pktinfo *pi;
+ struct cmsghdr *cmsg;
+
+ cmsg = CMSG_FIRSTHDR (msg);
+
+ while (cmsg != NULL)
+ {
+ switch (cmsg->cmsg_level)
+ {
+ case SOL_UDP:
+ switch (cmsg->cmsg_type)
+ {
+ case UDP_SEGMENT:
+ vec_add2 (at, adh, sizeof (*adh));
+ adh->data_type = VCL_UDP_SEGMENT;
+ adh->data_len = sizeof (uint16_t);
+ vec_add2 (at, ad, sizeof (uint16_t));
+ *(uint16_t *) ad = *(uint16_t *) CMSG_DATA (cmsg);
+ break;
+ default:
+ LDBG (1, "SOL_UDP cmsg_type %u not supported", cmsg->cmsg_type);
+ break;
+ }
+ break;
+ case SOL_IP:
+ switch (cmsg->cmsg_type)
+ {
+ case IP_PKTINFO:
+ vec_add2 (at, adh, sizeof (*adh));
+ adh->data_type = VCL_IP_PKTINFO;
+ adh->data_len = sizeof (struct in_addr);
+ vec_add2 (at, ad, sizeof (struct in_addr));
+ pi = (void *) CMSG_DATA (cmsg);
+ clib_memcpy_fast (ad, &pi->ipi_spec_dst,
+ sizeof (struct in_addr));
+ break;
+ default:
+ LDBG (1, "SOL_IP cmsg_type %u not supported", cmsg->cmsg_type);
+ break;
+ }
+ break;
+ default:
+ LDBG (1, "cmsg_level %u not supported", cmsg->cmsg_level);
+ break;
+ }
+ cmsg = CMSG_NXTHDR ((struct msghdr *) msg, cmsg);
+ }
+ *app_tlvs = (vppcom_endpt_tlv_t *) at;
+ return 0;
+}
+
+static int
+ldp_make_cmsg (vls_handle_t vlsh, struct msghdr *msg)
+{
+ u32 optval, optlen = sizeof (optval);
+ struct cmsghdr *cmsg;
+
+ cmsg = CMSG_FIRSTHDR (msg);
+ memset (cmsg, 0, sizeof (*cmsg));
+
+ if (!vls_attr (vlsh, VPPCOM_ATTR_GET_IP_PKTINFO, (void *) &optval, &optlen))
+ return 0;
+
+ if (optval)
+ {
+ vppcom_endpt_t ep;
+ u8 addr_buf[sizeof (struct in_addr)];
+ u32 size = sizeof (ep);
+
+ ep.ip = addr_buf;
+
+ if (!vls_attr (vlsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size))
+ {
+ struct in_pktinfo pi = {};
+
+ clib_memcpy (&pi.ipi_addr, ep.ip, sizeof (struct in_addr));
+ cmsg->cmsg_level = SOL_IP;
+ cmsg->cmsg_type = IP_PKTINFO;
+ cmsg->cmsg_len = CMSG_LEN (sizeof (pi));
+ clib_memcpy (CMSG_DATA (cmsg), &pi, sizeof (pi));
+ }
+ }
+
+ return 0;
+}
+
ssize_t
sendmsg (int fd, const struct msghdr * msg, int flags)
{
vlsh = ldp_fd_to_vlsh (fd);
if (vlsh != VLS_INVALID_HANDLE)
{
+ vppcom_endpt_tlv_t *app_tlvs = 0;
struct iovec *iov = msg->msg_iov;
ssize_t total = 0;
int i, rv = 0;
- struct cmsghdr *cmsg;
- uint16_t *valp;
- vppcom_endpt_tlv_t _app_data;
- vppcom_endpt_tlv_t *p_app_data = NULL;
- cmsg = CMSG_FIRSTHDR (msg);
- if (cmsg && cmsg->cmsg_type == UDP_SEGMENT)
- {
- p_app_data = &_app_data;
- valp = (void *) CMSG_DATA (cmsg);
- p_app_data->data_type = VCL_UDP_SEGMENT;
- p_app_data->data_len = sizeof (*valp);
- p_app_data->value = *valp;
- }
+ ldp_parse_cmsg (vlsh, msg, &app_tlvs);
for (i = 0; i < msg->msg_iovlen; ++i)
{
- rv =
- ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, p_app_data,
- flags, msg->msg_name, msg->msg_namelen);
+ rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, app_tlvs,
+ flags, msg->msg_name, msg->msg_namelen);
if (rv < 0)
break;
else
}
}
+ vec_free (app_tlvs);
+
if (rv < 0 && total == 0)
{
errno = -rv;
if (size < 0)
{
int errno_val = errno;
- perror (func_str);
clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! "
"rv %d, errno = %d", getpid (), fd, fd,
func_str, size, errno_val);
{
struct iovec *iov = msg->msg_iov;
ssize_t max_deq, total = 0;
- int i, rv;
+ int i, rv = 0;
max_deq = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0);
if (!max_deq)
size = -1;
}
else
- size = total;
+ {
+ if (msg->msg_controllen)
+ ldp_make_cmsg (vlsh, msg);
+ size = total;
+ }
}
else
{
break;
}
break;
+ case SOL_IP:
+ switch (optname)
+ {
+ case SO_ORIGINAL_DST:
+ rv =
+ vls_attr (vlsh, VPPCOM_ATTR_GET_ORIGINAL_DST, optval, optlen);
+ break;
+ default:
+ LDBG (0,
+ "ERROR: fd %d: getsockopt SOL_IP: vlsh %u "
+ "optname %d unsupported!",
+ fd, vlsh, optname);
+ break;
+ }
+ break;
case SOL_IPV6:
switch (optname)
{
break;
}
break;
+ case SOL_IP:
+ switch (optname)
+ {
+ case IP_PKTINFO:
+ rv = vls_attr (vlsh, VPPCOM_ATTR_SET_IP_PKTINFO, (void *) optval,
+ &optlen);
+ break;
+ default:
+ LDBG (0,
+ "ERROR: fd %d: setsockopt SOL_IP: vlsh %u optname %d"
+ "unsupported!",
+ fd, vlsh, optname);
+ break;
+ }
+ break;
default:
break;
}
* was acquired outside of the LD_PRELOAD process context.
* In any case, if we get one, punt it to libc_epoll_ctl.
*/
- LDBG (1, "epfd %d: calling libc_epoll_ctl: op %d, fd %d"
- " event %p", epfd, op, fd, event);
+ LDBG (1,
+ "epfd %d: calling libc_epoll_ctl: op %d, fd %d"
+ " events 0x%x",
+ epfd, op, fd, event ? event->events : 0);
rv = libc_epoll_ctl (epfd, op, fd, event);
goto done;
if (vlsh != VLS_INVALID_HANDLE)
{
- LDBG (1, "epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u,"
- " event %p", epfd, vep_vlsh, op, vlsh, event);
+ LDBG (1,
+ "epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u,"
+ " events 0x%x",
+ epfd, vep_vlsh, op, vlsh, event ? event->events : 0);
rv = vls_epoll_ctl (vep_vlsh, op, vlsh, event);
if (rv != VPPCOM_OK)
ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents,
int timeout, const sigset_t * sigmask)
{
- ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
+ ldp_worker_ctx_t *ldpw;
double time_to_wait = (double) 0, max_time;
int libc_epfd, rv = 0;
vls_handle_t ep_vlsh;
return -1;
}
+ if (PREDICT_FALSE (vppcom_worker_index () == ~0))
+ vls_register_vcl_worker ();
+
+ ldpw = ldp_worker_get_current ();
if (epfd == ldpw->vcl_mq_epfd)
return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events,
int maxevents, int timeout, const sigset_t * sigmask)
{
+ int libc_epfd, rv = 0, num_ev, libc_num_ev, vcl_wups = 0;
+ struct epoll_event *libc_evts;
ldp_worker_ctx_t *ldpw;
- int libc_epfd, rv = 0, num_ev;
vls_handle_t ep_vlsh;
ldp_init_check ();
/* Request to only drain unhandled to prevent libc_epoll_wait starved */
rv = vls_epoll_wait (ep_vlsh, events, maxevents, -2);
if (rv > 0)
- goto done;
+ {
+ timeout = 0;
+ if (rv >= maxevents)
+ goto done;
+ maxevents -= rv;
+ }
else if (PREDICT_FALSE (rv < 0))
{
errno = -rv;
goto done;
}
- rv = libc_epoll_pwait (libc_epfd, events, maxevents, timeout, sigmask);
- if (rv <= 0)
- goto done;
- for (int i = 0; i < rv; i++)
+epoll_again:
+
+ libc_evts = &events[rv];
+ libc_num_ev =
+ libc_epoll_pwait (libc_epfd, libc_evts, maxevents, timeout, sigmask);
+ if (libc_num_ev <= 0)
+ {
+ rv = rv >= 0 ? rv : -1;
+ goto done;
+ }
+
+ for (int i = 0; i < libc_num_ev; i++)
{
- if (events[i].data.fd == ldpw->vcl_mq_epfd)
+ if (libc_evts[i].data.fd == ldpw->vcl_mq_epfd)
{
/* We should remove mq epoll fd from events. */
- rv--;
- if (i != rv)
+ libc_num_ev--;
+ if (i != libc_num_ev)
{
- events[i].events = events[rv].events;
- events[i].data.u64 = events[rv].data.u64;
+ libc_evts[i].events = libc_evts[libc_num_ev].events;
+ libc_evts[i].data.u64 = libc_evts[libc_num_ev].data.u64;
}
- num_ev = vls_epoll_wait (ep_vlsh, &events[rv], maxevents - rv, 0);
+ num_ev = vls_epoll_wait (ep_vlsh, &libc_evts[libc_num_ev],
+ maxevents - libc_num_ev, 0);
if (PREDICT_TRUE (num_ev > 0))
rv += num_ev;
+ /* Woken up by vcl but no events generated. Accept it once */
+ if (rv == 0 && libc_num_ev == 0 && timeout && vcl_wups++ < 1)
+ goto epoll_again;
break;
}
}
+ rv += libc_num_ev;
+
done:
return rv;
}