2 * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
22 #include <sys/resource.h>
23 #include <netinet/tcp.h>
24 #include <linux/udp.h>
26 #include <vcl/ldp_socket_wrapper.h>
30 #include <vcl/vcl_locked.h>
31 #include <vppinfra/time.h>
32 #include <vppinfra/bitmap.h>
33 #include <vppinfra/lock.h>
34 #include <vppinfra/pool.h>
35 #include <vppinfra/hash.h>
37 #define HAVE_CONSTRUCTOR_ATTRIBUTE
38 #ifdef HAVE_CONSTRUCTOR_ATTRIBUTE
39 #define CONSTRUCTOR_ATTRIBUTE \
40 __attribute__ ((constructor))
42 #define CONSTRUCTOR_ATTRIBUTE
43 #endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */
45 #define HAVE_DESTRUCTOR_ATTRIBUTE
46 #ifdef HAVE_DESTRUCTOR_ATTRIBUTE
47 #define DESTRUCTOR_ATTRIBUTE \
48 __attribute__ ((destructor))
50 #define DESTRUCTOR_ATTRIBUTE
53 #define LDP_MAX_NWORKERS 32
55 typedef struct ldp_worker_ctx_
58 clib_time_t clib_time;
63 clib_bitmap_t *rd_bitmap;
64 clib_bitmap_t *wr_bitmap;
65 clib_bitmap_t *ex_bitmap;
66 clib_bitmap_t *si_rd_bitmap;
67 clib_bitmap_t *si_wr_bitmap;
68 clib_bitmap_t *si_ex_bitmap;
69 clib_bitmap_t *libc_rd_bitmap;
70 clib_bitmap_t *libc_wr_bitmap;
71 clib_bitmap_t *libc_ex_bitmap;
77 struct pollfd *libc_poll;
89 /* clib_bitmap_t, fd_mask and vcl_si_set are used interchangeably. Make sure
90 * they are the same size */
91 STATIC_ASSERT (sizeof (clib_bitmap_t) == sizeof (fd_mask),
92 "ldp bitmap size mismatch");
93 STATIC_ASSERT (sizeof (vcl_si_set) == sizeof (fd_mask),
94 "ldp bitmap size mismatch");
98 ldp_worker_ctx_t *workers;
100 char app_name[LDP_APP_NAME_MAX];
105 /** vcl needs next epoll_create to go to libc_epoll */
106 u8 vcl_needs_real_epoll;
109 * crypto state used only for testing
115 #define LDP_DEBUG ldp->debug
117 #define LDBG(_lvl, _fmt, _args...) \
118 if (ldp->debug > _lvl) \
120 int errno_saved = errno; \
121 fprintf (stderr, "ldp<%d>: " _fmt "\n", getpid(), ##_args); \
122 errno = errno_saved; \
125 static ldp_main_t ldp_main = {
126 .vlsh_bit_val = (1 << LDP_SID_BIT_MIN),
127 .vlsh_bit_mask = (1 << LDP_SID_BIT_MIN) - 1,
128 .debug = LDP_DEBUG_INIT,
129 .transparent_tls = 0,
133 static ldp_main_t *ldp = &ldp_main;
135 static inline ldp_worker_ctx_t *
136 ldp_worker_get_current (void)
138 return (ldp->workers + vppcom_worker_index ());
142 * RETURN: 0 on success or -1 on error.
145 ldp_set_app_name (char *app_name)
147 snprintf (ldp->app_name, LDP_APP_NAME_MAX,
148 "ldp-%d-%s", getpid (), app_name);
154 if (ldp->app_name[0] == '\0')
155 ldp_set_app_name ("app");
157 return ldp->app_name;
161 ldp_vlsh_to_fd (vls_handle_t vlsh)
163 return (vlsh + ldp->vlsh_bit_val);
166 static inline vls_handle_t
167 ldp_fd_to_vlsh (int fd)
169 if (fd < ldp->vlsh_bit_val)
170 return VLS_INVALID_HANDLE;
172 return (fd - ldp->vlsh_bit_val);
176 ldp_alloc_workers (void)
180 pool_alloc (ldp->workers, LDP_MAX_NWORKERS);
186 ldp_worker_ctx_t *ldpw;
192 ldp->vcl_needs_real_epoll = 1;
193 rv = vls_app_create (ldp_get_app_name ());
196 ldp->vcl_needs_real_epoll = 0;
197 if (rv == VPPCOM_EEXIST)
199 LDBG (2, "\nERROR: ldp_init: vppcom_app_create()"
200 " failed! rv = %d (%s)\n", rv, vppcom_retval_str (rv));
204 ldp->vcl_needs_real_epoll = 0;
205 ldp_alloc_workers ();
206 ldpw = ldp_worker_get_current ();
208 char *env_var_str = getenv (LDP_ENV_DEBUG);
212 if (sscanf (env_var_str, "%u", &tmp) != 1)
213 clib_warning ("LDP<%d>: WARNING: Invalid LDP debug level specified in"
214 " the env var " LDP_ENV_DEBUG " (%s)!", getpid (),
219 LDBG (0, "configured LDP debug level (%u) from env var "
220 LDP_ENV_DEBUG "!", ldp->debug);
224 env_var_str = getenv (LDP_ENV_APP_NAME);
227 ldp_set_app_name (env_var_str);
228 LDBG (0, "configured LDP app name (%s) from the env var "
229 LDP_ENV_APP_NAME "!", ldp->app_name);
232 env_var_str = getenv (LDP_ENV_SID_BIT);
236 if (sscanf (env_var_str, "%u", &sb) != 1)
238 LDBG (0, "WARNING: Invalid LDP sid bit specified in the env var "
239 LDP_ENV_SID_BIT " (%s)! sid bit value %d (0x%x)", env_var_str,
240 ldp->vlsh_bit_val, ldp->vlsh_bit_val);
242 else if (sb < LDP_SID_BIT_MIN)
244 ldp->vlsh_bit_val = (1 << LDP_SID_BIT_MIN);
245 ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1;
247 LDBG (0, "WARNING: LDP sid bit (%u) specified in the env var "
248 LDP_ENV_SID_BIT " (%s) is too small. Using LDP_SID_BIT_MIN"
249 " (%d)! sid bit value %d (0x%x)", sb, env_var_str,
250 LDP_SID_BIT_MIN, ldp->vlsh_bit_val, ldp->vlsh_bit_val);
252 else if (sb > LDP_SID_BIT_MAX)
254 ldp->vlsh_bit_val = (1 << LDP_SID_BIT_MAX);
255 ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1;
257 LDBG (0, "WARNING: LDP sid bit (%u) specified in the env var "
258 LDP_ENV_SID_BIT " (%s) is too big. Using LDP_SID_BIT_MAX"
259 " (%d)! sid bit value %d (0x%x)", sb, env_var_str,
260 LDP_SID_BIT_MAX, ldp->vlsh_bit_val, ldp->vlsh_bit_val);
264 ldp->vlsh_bit_val = (1 << sb);
265 ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1;
267 LDBG (0, "configured LDP sid bit (%u) from "
268 LDP_ENV_SID_BIT "! sid bit value %d (0x%x)", sb,
269 ldp->vlsh_bit_val, ldp->vlsh_bit_val);
272 /* Make sure there are enough bits in the fd set for vcl sessions */
273 if (ldp->vlsh_bit_val > FD_SETSIZE / 2)
275 /* Only valid for select/pselect, so just WARNING and not exit */
277 "WARNING: LDP vlsh bit value %d > FD_SETSIZE/2 %d, "
278 "select/pselect not supported now!",
279 ldp->vlsh_bit_val, FD_SETSIZE / 2);
282 env_var_str = getenv (LDP_ENV_TLS_TRANS);
285 ldp->transparent_tls = 1;
289 pool_foreach (ldpw, ldp->workers) {
290 clib_memset (&ldpw->clib_time, 0, sizeof (ldpw->clib_time));
294 LDBG (0, "LDP initialization: done!");
299 #define ldp_init_check() \
300 if (PREDICT_FALSE (!ldp->init)) \
302 if ((errno = -ldp_init ())) \
314 vlsh = ldp_fd_to_vlsh (fd);
315 if (vlsh != VLS_INVALID_HANDLE)
317 epfd = vls_attr (vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
320 LDBG (0, "fd %d: calling libc_close: epfd %u", fd, epfd);
322 rv = libc_close (epfd);
325 u32 size = sizeof (epfd);
328 (void) vls_attr (vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &epfd, &size);
331 else if (PREDICT_FALSE (epfd < 0))
338 LDBG (0, "fd %d: calling vls_close: vlsh %u", fd, vlsh);
340 rv = vls_close (vlsh);
349 LDBG (0, "fd %d: calling libc_close", fd);
350 rv = libc_close (fd);
358 read (int fd, void *buf, size_t nbytes)
365 vlsh = ldp_fd_to_vlsh (fd);
366 if (vlsh != VLS_INVALID_HANDLE)
368 size = vls_read (vlsh, buf, nbytes);
377 size = libc_read (fd, buf, nbytes);
384 readv (int fd, const struct iovec * iov, int iovcnt)
386 int rv = 0, i, total = 0;
392 vlsh = ldp_fd_to_vlsh (fd);
393 if (vlsh != VLS_INVALID_HANDLE)
395 for (i = 0; i < iovcnt; ++i)
397 rv = vls_read (vlsh, iov[i].iov_base, iov[i].iov_len);
403 if (rv < iov[i].iov_len)
407 if (rv < 0 && total == 0)
417 size = libc_readv (fd, iov, iovcnt);
424 write (int fd, const void *buf, size_t nbytes)
431 vlsh = ldp_fd_to_vlsh (fd);
432 if (vlsh != VLS_INVALID_HANDLE)
434 size = vls_write_msg (vlsh, (void *) buf, nbytes);
443 size = libc_write (fd, buf, nbytes);
450 writev (int fd, const struct iovec * iov, int iovcnt)
452 ssize_t size = 0, total = 0;
458 vlsh = ldp_fd_to_vlsh (fd);
459 if (vlsh != VLS_INVALID_HANDLE)
461 for (i = 0; i < iovcnt; ++i)
463 rv = vls_write_msg (vlsh, iov[i].iov_base, iov[i].iov_len);
469 if (rv < iov[i].iov_len)
474 if (rv < 0 && total == 0)
484 size = libc_writev (fd, iov, iovcnt);
491 fcntl_internal (int fd, int cmd, va_list ap)
496 vlsh = ldp_fd_to_vlsh (fd);
497 LDBG (0, "fd %u vlsh %d, cmd %u", fd, vlsh, cmd);
498 if (vlsh != VLS_INVALID_HANDLE)
500 int flags = va_arg (ap, int);
503 size = sizeof (flags);
508 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size);
512 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &size);
517 /* TODO handle this */
518 LDBG (0, "F_SETFD ignored flags %u", flags);
534 rv = libc_vfcntl64 (fd, cmd, ap);
536 rv = libc_vfcntl (fd, cmd, ap);
544 fcntl (int fd, int cmd, ...)
552 rv = fcntl_internal (fd, cmd, ap);
559 fcntl64 (int fd, int cmd, ...)
567 rv = fcntl_internal (fd, cmd, ap);
573 ioctl (int fd, unsigned long int cmd, ...)
583 vlsh = ldp_fd_to_vlsh (fd);
584 if (vlsh != VLS_INVALID_HANDLE)
589 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0);
594 u32 flags = va_arg (ap, int) ? O_NONBLOCK : 0;
595 u32 size = sizeof (flags);
597 /* TBD: When VPPCOM_ATTR_[GS]ET_FLAGS supports flags other than
598 * non-blocking, the flags should be read here and merged
601 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size);
617 rv = libc_vioctl (fd, cmd, ap);
625 ldp_select_init_maps (fd_set * __restrict original,
626 clib_bitmap_t ** resultb, clib_bitmap_t ** libcb,
627 clib_bitmap_t ** vclb, int nfds, u32 minbits,
628 u32 n_bytes, uword * si_bits, uword * libc_bits)
630 uword si_bits_set, libc_bits_set;
634 clib_bitmap_validate (*vclb, minbits);
635 clib_bitmap_validate (*libcb, minbits);
636 clib_bitmap_validate (*resultb, minbits);
637 clib_memcpy_fast (*resultb, original, n_bytes);
638 memset (original, 0, n_bytes);
641 clib_bitmap_foreach (fd, *resultb) {
644 vlsh = ldp_fd_to_vlsh (fd);
645 if (vlsh == VLS_INVALID_HANDLE)
646 clib_bitmap_set_no_check (*libcb, fd, 1);
648 *vclb = clib_bitmap_set (*vclb, vlsh_to_session_index (vlsh), 1);
652 si_bits_set = clib_bitmap_last_set (*vclb) + 1;
653 *si_bits = (si_bits_set > *si_bits) ? si_bits_set : *si_bits;
654 clib_bitmap_validate (*resultb, *si_bits);
656 libc_bits_set = clib_bitmap_last_set (*libcb) + 1;
657 *libc_bits = (libc_bits_set > *libc_bits) ? libc_bits_set : *libc_bits;
661 ldp_select_vcl_map_to_libc (clib_bitmap_t * vclb, fd_set * __restrict libcb)
671 clib_bitmap_foreach (si, vclb) {
672 vlsh = vls_session_index_to_vlsh (si);
673 ASSERT (vlsh != VLS_INVALID_HANDLE);
674 fd = ldp_vlsh_to_fd (vlsh);
675 if (PREDICT_FALSE (fd < 0))
688 ldp_select_libc_map_merge (clib_bitmap_t * result, fd_set * __restrict libcb)
696 clib_bitmap_foreach (fd, result)
697 FD_SET ((int)fd, libcb);
702 ldp_pselect (int nfds, fd_set * __restrict readfds,
703 fd_set * __restrict writefds,
704 fd_set * __restrict exceptfds,
705 const struct timespec *__restrict timeout,
706 const __sigset_t * __restrict sigmask)
708 u32 minbits = clib_max (nfds, BITS (uword)), n_bytes;
709 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
710 struct timespec libc_tspec = { 0 };
711 f64 time_out, vcl_timeout = 0;
712 uword si_bits, libc_bits;
713 int rv, bits_set = 0;
721 if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
722 clib_time_init (&ldpw->clib_time);
726 time_out = (timeout->tv_sec == 0 && timeout->tv_nsec == 0) ?
727 (f64) 0 : (f64) timeout->tv_sec + (f64) timeout->tv_nsec / (f64) 1e9;
729 time_out += clib_time_now (&ldpw->clib_time);
731 /* select as fine grained sleep */
734 while (clib_time_now (&ldpw->clib_time) < time_out)
747 if (nfds <= ldp->vlsh_bit_val)
749 rv = libc_pselect (nfds, readfds, writefds, exceptfds,
754 si_bits = libc_bits = 0;
755 n_bytes = nfds / 8 + ((nfds % 8) ? 1 : 0);
758 ldp_select_init_maps (readfds, &ldpw->rd_bitmap, &ldpw->libc_rd_bitmap,
759 &ldpw->si_rd_bitmap, nfds, minbits, n_bytes,
760 &si_bits, &libc_bits);
762 ldp_select_init_maps (writefds, &ldpw->wr_bitmap,
763 &ldpw->libc_wr_bitmap, &ldpw->si_wr_bitmap, nfds,
764 minbits, n_bytes, &si_bits, &libc_bits);
766 ldp_select_init_maps (exceptfds, &ldpw->ex_bitmap,
767 &ldpw->libc_ex_bitmap, &ldpw->si_ex_bitmap, nfds,
768 minbits, n_bytes, &si_bits, &libc_bits);
770 if (PREDICT_FALSE (!si_bits && !libc_bits))
778 libc_tspec = timeout ? *timeout : libc_tspec;
785 clib_memcpy_fast (ldpw->rd_bitmap, ldpw->si_rd_bitmap,
786 vec_len (ldpw->si_rd_bitmap) *
787 sizeof (clib_bitmap_t));
789 clib_memcpy_fast (ldpw->wr_bitmap, ldpw->si_wr_bitmap,
790 vec_len (ldpw->si_wr_bitmap) *
791 sizeof (clib_bitmap_t));
793 clib_memcpy_fast (ldpw->ex_bitmap, ldpw->si_ex_bitmap,
794 vec_len (ldpw->si_ex_bitmap) *
795 sizeof (clib_bitmap_t));
797 rv = vls_select (si_bits, readfds ? ldpw->rd_bitmap : NULL,
798 writefds ? ldpw->wr_bitmap : NULL,
799 exceptfds ? ldpw->ex_bitmap : NULL, vcl_timeout);
808 if (ldp_select_vcl_map_to_libc (ldpw->rd_bitmap, readfds))
814 if (ldp_select_vcl_map_to_libc (ldpw->wr_bitmap, writefds))
820 if (ldp_select_vcl_map_to_libc (ldpw->ex_bitmap, exceptfds))
831 clib_memcpy_fast (ldpw->rd_bitmap, ldpw->libc_rd_bitmap,
832 vec_len (ldpw->libc_rd_bitmap) *
833 sizeof (clib_bitmap_t));
835 clib_memcpy_fast (ldpw->wr_bitmap, ldpw->libc_wr_bitmap,
836 vec_len (ldpw->libc_wr_bitmap) *
837 sizeof (clib_bitmap_t));
839 clib_memcpy_fast (ldpw->ex_bitmap, ldpw->libc_ex_bitmap,
840 vec_len (ldpw->libc_ex_bitmap) *
841 sizeof (clib_bitmap_t));
843 rv = libc_pselect (libc_bits,
844 readfds ? (fd_set *) ldpw->rd_bitmap : NULL,
845 writefds ? (fd_set *) ldpw->wr_bitmap : NULL,
846 exceptfds ? (fd_set *) ldpw->ex_bitmap : NULL,
847 &libc_tspec, sigmask);
850 ldp_select_libc_map_merge (ldpw->rd_bitmap, readfds);
851 ldp_select_libc_map_merge (ldpw->wr_bitmap, writefds);
852 ldp_select_libc_map_merge (ldpw->ex_bitmap, exceptfds);
863 while ((time_out == -1) || (clib_time_now (&ldpw->clib_time) < time_out));
867 /* TBD: set timeout to amount of time left */
868 clib_bitmap_zero (ldpw->rd_bitmap);
869 clib_bitmap_zero (ldpw->si_rd_bitmap);
870 clib_bitmap_zero (ldpw->libc_rd_bitmap);
871 clib_bitmap_zero (ldpw->wr_bitmap);
872 clib_bitmap_zero (ldpw->si_wr_bitmap);
873 clib_bitmap_zero (ldpw->libc_wr_bitmap);
874 clib_bitmap_zero (ldpw->ex_bitmap);
875 clib_bitmap_zero (ldpw->si_ex_bitmap);
876 clib_bitmap_zero (ldpw->libc_ex_bitmap);
882 select (int nfds, fd_set * __restrict readfds,
883 fd_set * __restrict writefds,
884 fd_set * __restrict exceptfds, struct timeval *__restrict timeout)
886 struct timespec tspec;
890 tspec.tv_sec = timeout->tv_sec;
891 tspec.tv_nsec = timeout->tv_usec * 1000;
893 return ldp_pselect (nfds, readfds, writefds, exceptfds,
894 timeout ? &tspec : NULL, NULL);
899 pselect (int nfds, fd_set * __restrict readfds,
900 fd_set * __restrict writefds,
901 fd_set * __restrict exceptfds,
902 const struct timespec *__restrict timeout,
903 const __sigset_t * __restrict sigmask)
905 return ldp_pselect (nfds, readfds, writefds, exceptfds, timeout, 0);
909 /* If transparent TLS mode is turned on, then ldp will load key and cert.
912 load_cert_key_pair (void)
914 char *cert_str = getenv (LDP_ENV_TLS_CERT);
915 char *key_str = getenv (LDP_ENV_TLS_KEY);
916 char cert_buf[4096], key_buf[4096];
917 int cert_size, key_size;
918 vppcom_cert_key_pair_t crypto;
922 if (!cert_str || !key_str)
924 LDBG (0, "ERROR: failed to read LDP environment %s\n",
929 fp = fopen (cert_str, "r");
932 LDBG (0, "ERROR: failed to open cert file %s \n", cert_str);
935 cert_size = fread (cert_buf, sizeof (char), sizeof (cert_buf), fp);
938 fp = fopen (key_str, "r");
941 LDBG (0, "ERROR: failed to open key file %s \n", key_str);
944 key_size = fread (key_buf, sizeof (char), sizeof (key_buf), fp);
947 crypto.cert = cert_buf;
948 crypto.key = key_buf;
949 crypto.cert_len = cert_size;
950 crypto.key_len = key_size;
951 ckp_index = vppcom_add_cert_key_pair (&crypto);
954 LDBG (0, "ERROR: failed to add cert key pair\n");
958 ldp->ckpair_index = ckp_index;
964 assign_cert_key_pair (vls_handle_t vlsh)
968 if (ldp->ckpair_index == ~0 && load_cert_key_pair () < 0)
971 ckp_len = sizeof (ldp->ckpair_index);
972 return vls_attr (vlsh, VPPCOM_ATTR_SET_CKPAIR, &ldp->ckpair_index, &ckp_len);
976 socket (int domain, int type, int protocol)
978 int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
979 u8 is_nonblocking = type & SOCK_NONBLOCK ? 1 : 0;
984 if (((domain == AF_INET) || (domain == AF_INET6)) &&
985 ((sock_type == SOCK_STREAM) || (sock_type == SOCK_DGRAM)))
988 if (ldp->transparent_tls)
990 proto = VPPCOM_PROTO_TLS;
993 proto = ((sock_type == SOCK_DGRAM) ?
994 VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP);
996 LDBG (0, "calling vls_create: proto %u (%s), is_nonblocking %u",
997 proto, vppcom_proto_str (proto), is_nonblocking);
999 vlsh = vls_create (proto, is_nonblocking);
1007 if (ldp->transparent_tls)
1009 if (assign_cert_key_pair (vlsh) < 0)
1012 rv = ldp_vlsh_to_fd (vlsh);
1017 LDBG (0, "calling libc_socket");
1018 rv = libc_socket (domain, type, protocol);
1025 * Create two new sockets, of type TYPE in domain DOMAIN and using
1026 * protocol PROTOCOL, which are connected to each other, and put file
1027 * descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero,
1028 * one will be chosen automatically.
1029 * Returns 0 on success, -1 for errors.
1032 socketpair (int domain, int type, int protocol, int fds[2])
1034 int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
1038 if (((domain == AF_INET) || (domain == AF_INET6)) &&
1039 ((sock_type == SOCK_STREAM) || (sock_type == SOCK_DGRAM)))
1041 LDBG (0, "LDP-TBD");
1047 LDBG (1, "calling libc_socketpair");
1048 rv = libc_socketpair (domain, type, protocol, fds);
1055 bind (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len)
1062 vlsh = ldp_fd_to_vlsh (fd);
1063 if (vlsh != VLS_INVALID_HANDLE)
1067 switch (addr->sa_family)
1070 if (len != sizeof (struct sockaddr_in))
1072 LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET addr len %u!",
1078 ep.is_ip4 = VPPCOM_IS_IP4;
1079 ep.ip = (u8 *) & ((const struct sockaddr_in *) addr)->sin_addr;
1080 ep.port = (u16) ((const struct sockaddr_in *) addr)->sin_port;
1084 if (len != sizeof (struct sockaddr_in6))
1086 LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET6 addr len %u!",
1092 ep.is_ip4 = VPPCOM_IS_IP6;
1093 ep.ip = (u8 *) & ((const struct sockaddr_in6 *) addr)->sin6_addr;
1094 ep.port = (u16) ((const struct sockaddr_in6 *) addr)->sin6_port;
1098 LDBG (0, "ERROR: fd %d: vlsh %u: Unsupported address family %u!",
1099 fd, vlsh, addr->sa_family);
1100 errno = EAFNOSUPPORT;
1104 LDBG (0, "fd %d: calling vls_bind: vlsh %u, addr %p, len %u", fd, vlsh,
1107 rv = vls_bind (vlsh, &ep);
1108 if (rv != VPPCOM_OK)
1116 LDBG (0, "fd %d: calling libc_bind: addr %p, len %u", fd, addr, len);
1117 rv = libc_bind (fd, addr, len);
1121 LDBG (1, "fd %d: returning %d", fd, rv);
1127 ldp_copy_ep_to_sockaddr (__SOCKADDR_ARG addr, socklen_t * __restrict len,
1128 vppcom_endpt_t * ep)
1131 int sa_len, copy_len;
1135 if (addr && len && ep)
1137 addr->sa_family = (ep->is_ip4 == VPPCOM_IS_IP4) ? AF_INET : AF_INET6;
1138 switch (addr->sa_family)
1141 ((struct sockaddr_in *) addr)->sin_port = ep->port;
1142 if (*len > sizeof (struct sockaddr_in))
1143 *len = sizeof (struct sockaddr_in);
1144 sa_len = sizeof (struct sockaddr_in) - sizeof (struct in_addr);
1145 copy_len = *len - sa_len;
1147 memcpy (&((struct sockaddr_in *) addr)->sin_addr, ep->ip,
1152 ((struct sockaddr_in6 *) addr)->sin6_port = ep->port;
1153 if (*len > sizeof (struct sockaddr_in6))
1154 *len = sizeof (struct sockaddr_in6);
1155 sa_len = sizeof (struct sockaddr_in6) - sizeof (struct in6_addr);
1156 copy_len = *len - sa_len;
1158 memcpy (((struct sockaddr_in6 *) addr)->sin6_addr.
1159 __in6_u.__u6_addr8, ep->ip, copy_len);
1172 getsockname (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len)
1179 vlsh = ldp_fd_to_vlsh (fd);
1180 if (vlsh != VLS_INVALID_HANDLE)
1183 u8 addr_buf[sizeof (struct in6_addr)];
1184 u32 size = sizeof (ep);
1188 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size);
1189 if (rv != VPPCOM_OK)
1196 rv = ldp_copy_ep_to_sockaddr (addr, len, &ep);
1197 if (rv != VPPCOM_OK)
1206 rv = libc_getsockname (fd, addr, len);
1213 connect (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len)
1222 LDBG (0, "ERROR: fd %d: NULL addr, len %u", fd, len);
1228 vlsh = ldp_fd_to_vlsh (fd);
1229 if (vlsh != VLS_INVALID_HANDLE)
1233 switch (addr->sa_family)
1236 if (len != sizeof (struct sockaddr_in))
1238 LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET addr len %u!",
1244 ep.is_ip4 = VPPCOM_IS_IP4;
1245 ep.ip = (u8 *) & ((const struct sockaddr_in *) addr)->sin_addr;
1246 ep.port = (u16) ((const struct sockaddr_in *) addr)->sin_port;
1250 if (len != sizeof (struct sockaddr_in6))
1252 LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET6 addr len %u!",
1258 ep.is_ip4 = VPPCOM_IS_IP6;
1259 ep.ip = (u8 *) & ((const struct sockaddr_in6 *) addr)->sin6_addr;
1260 ep.port = (u16) ((const struct sockaddr_in6 *) addr)->sin6_port;
1264 LDBG (0, "fd %d: ERROR vlsh %u: Unsupported address family %u!",
1265 fd, vlsh, addr->sa_family);
1266 errno = EAFNOSUPPORT;
1270 LDBG (0, "fd %d: calling vls_connect(): vlsh %u addr %p len %u", fd,
1273 rv = vls_connect (vlsh, &ep);
1274 if (rv != VPPCOM_OK)
1282 LDBG (0, "fd %d: calling libc_connect(): addr %p, len %u",
1285 rv = libc_connect (fd, addr, len);
1289 LDBG (1, "fd %d: returning %d (0x%x)", fd, rv, rv);
1294 getpeername (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len)
1301 vlsh = ldp_fd_to_vlsh (fd);
1302 if (vlsh != VLS_INVALID_HANDLE)
1305 u8 addr_buf[sizeof (struct in6_addr)];
1306 u32 size = sizeof (ep);
1309 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PEER_ADDR, &ep, &size);
1310 if (rv != VPPCOM_OK)
1317 rv = ldp_copy_ep_to_sockaddr (addr, len, &ep);
1318 if (rv != VPPCOM_OK)
1327 rv = libc_getpeername (fd, addr, len);
1334 send (int fd, const void *buf, size_t n, int flags)
1336 vls_handle_t vlsh = ldp_fd_to_vlsh (fd);
1341 if (vlsh != VLS_INVALID_HANDLE)
1343 size = vls_sendto (vlsh, (void *) buf, n, flags, NULL);
1344 if (size < VPPCOM_OK)
1352 size = libc_send (fd, buf, n, flags);
1359 sendfile (int out_fd, int in_fd, off_t * offset, size_t len)
1361 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
1367 vlsh = ldp_fd_to_vlsh (out_fd);
1368 if (vlsh != VLS_INVALID_HANDLE)
1371 ssize_t results = 0;
1372 size_t n_bytes_left = len;
1373 size_t bytes_to_read;
1376 u32 flags, flags_len = sizeof (flags);
1378 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &flags_len);
1379 if (PREDICT_FALSE (rv != VPPCOM_OK))
1381 LDBG (0, "ERROR: out fd %d: vls_attr: vlsh %u, returned %d (%s)!",
1382 out_fd, vlsh, rv, vppcom_retval_str (rv));
1384 vec_reset_length (ldpw->io_buffer);
1392 off_t off = lseek (in_fd, *offset, SEEK_SET);
1393 if (PREDICT_FALSE (off == -1))
1399 ASSERT (off == *offset);
1404 size = vls_attr (vlsh, VPPCOM_ATTR_GET_NWRITE, 0, 0);
1407 LDBG (0, "ERROR: fd %d: vls_attr: vlsh %u returned %ld (%s)!",
1408 out_fd, vlsh, size, vppcom_retval_str (size));
1409 vec_reset_length (ldpw->io_buffer);
1415 bytes_to_read = size;
1416 if (bytes_to_read == 0)
1418 if (flags & O_NONBLOCK)
1427 bytes_to_read = clib_min (n_bytes_left, bytes_to_read);
1428 vec_validate (ldpw->io_buffer, bytes_to_read);
1429 nbytes = libc_read (in_fd, ldpw->io_buffer, bytes_to_read);
1434 vec_reset_length (ldpw->io_buffer);
1441 size = vls_write (vlsh, ldpw->io_buffer, nbytes);
1444 if (size == VPPCOM_EAGAIN)
1446 if (flags & O_NONBLOCK)
1457 vec_reset_length (ldpw->io_buffer);
1466 ASSERT (n_bytes_left >= nbytes);
1467 n_bytes_left = n_bytes_left - nbytes;
1469 while (n_bytes_left > 0);
1472 vec_reset_length (ldpw->io_buffer);
1475 off_t off = lseek (in_fd, *offset, SEEK_SET);
1476 if (PREDICT_FALSE (off == -1))
1482 ASSERT (off == *offset);
1483 *offset += results + 1;
1495 size = libc_sendfile (out_fd, in_fd, offset, len);
1503 sendfile64 (int out_fd, int in_fd, off_t * offset, size_t len)
1505 return sendfile (out_fd, in_fd, offset, len);
1509 recv (int fd, void *buf, size_t n, int flags)
1516 vlsh = ldp_fd_to_vlsh (fd);
1517 if (vlsh != VLS_INVALID_HANDLE)
1519 size = vls_recvfrom (vlsh, buf, n, flags, NULL);
1528 size = libc_recv (fd, buf, n, flags);
1535 __recv_chk (int fd, void *buf, size_t n, size_t buflen, int flags)
1540 return recv (fd, buf, n, flags);
1544 ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n,
1545 vppcom_endpt_tlv_t *ep_tlv, int flags,
1546 __CONST_SOCKADDR_ARG addr, socklen_t addr_len)
1548 vppcom_endpt_t *ep = 0;
1553 _ep.app_data = *ep_tlv;
1559 switch (addr->sa_family)
1562 ep->is_ip4 = VPPCOM_IS_IP4;
1564 (uint8_t *) & ((const struct sockaddr_in *) addr)->sin_addr;
1565 ep->port = (uint16_t) ((const struct sockaddr_in *) addr)->sin_port;
1569 ep->is_ip4 = VPPCOM_IS_IP6;
1571 (uint8_t *) & ((const struct sockaddr_in6 *) addr)->sin6_addr;
1573 (uint16_t) ((const struct sockaddr_in6 *) addr)->sin6_port;
1577 return EAFNOSUPPORT;
1581 return vls_sendto (vlsh, (void *) buf, n, flags, ep);
1585 ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n,
1586 int flags, __SOCKADDR_ARG addr,
1587 socklen_t * __restrict addr_len)
1589 u8 src_addr[sizeof (struct sockaddr_in6)];
1597 size = vls_recvfrom (vlsh, buf, n, flags, &ep);
1601 rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep);
1607 size = vls_recvfrom (vlsh, buf, n, flags, NULL);
1613 sendto (int fd, const void *buf, size_t n, int flags,
1614 __CONST_SOCKADDR_ARG addr, socklen_t addr_len)
1621 vlsh = ldp_fd_to_vlsh (fd);
1622 if (vlsh != VLS_INVALID_HANDLE)
1624 size = ldp_vls_sendo (vlsh, buf, n, NULL, flags, addr, addr_len);
1633 size = libc_sendto (fd, buf, n, flags, addr, addr_len);
1640 recvfrom (int fd, void *__restrict buf, size_t n, int flags,
1641 __SOCKADDR_ARG addr, socklen_t * __restrict addr_len)
1648 vlsh = ldp_fd_to_vlsh (fd);
1649 if (vlsh != VLS_INVALID_HANDLE)
1651 size = ldp_vls_recvfrom (vlsh, buf, n, flags, addr, addr_len);
1660 size = libc_recvfrom (fd, buf, n, flags, addr, addr_len);
1667 sendmsg (int fd, const struct msghdr * msg, int flags)
1674 vlsh = ldp_fd_to_vlsh (fd);
1675 if (vlsh != VLS_INVALID_HANDLE)
1677 struct iovec *iov = msg->msg_iov;
1680 struct cmsghdr *cmsg;
1682 vppcom_endpt_tlv_t _app_data;
1683 vppcom_endpt_tlv_t *p_app_data = NULL;
1685 cmsg = CMSG_FIRSTHDR (msg);
1686 if (cmsg && cmsg->cmsg_type == UDP_SEGMENT)
1688 p_app_data = &_app_data;
1689 valp = (void *) CMSG_DATA (cmsg);
1690 p_app_data->data_type = VCL_UDP_SEGMENT;
1691 p_app_data->data_len = sizeof (*valp);
1692 p_app_data->value = *valp;
1695 for (i = 0; i < msg->msg_iovlen; ++i)
1698 ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, p_app_data,
1699 flags, msg->msg_name, msg->msg_namelen);
1705 if (rv < iov[i].iov_len)
1710 if (rv < 0 && total == 0)
1720 size = libc_sendmsg (fd, msg, flags);
1728 sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
1731 const char *func_str;
1732 u32 sh = ldp_fd_to_vlsh (fd);
1736 if (sh != VLS_INVALID_HANDLE)
1738 clib_warning ("LDP<%d>: LDP-TBD", getpid ());
1744 func_str = "libc_sendmmsg";
1747 clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): "
1748 "vmessages %p, vlen %u, flags 0x%x",
1749 getpid (), fd, fd, func_str, vmessages, vlen, flags);
1751 size = libc_sendmmsg (fd, vmessages, vlen, flags);
1758 int errno_val = errno;
1760 clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! "
1761 "rv %d, errno = %d", getpid (), fd, fd,
1762 func_str, size, errno_val);
1766 clib_warning ("LDP<%d>: fd %d (0x%x): returning %d (0x%x)",
1767 getpid (), fd, fd, size, size);
1774 recvmsg (int fd, struct msghdr * msg, int flags)
1781 vlsh = ldp_fd_to_vlsh (fd);
1782 if (vlsh != VLS_INVALID_HANDLE)
1784 struct iovec *iov = msg->msg_iov;
1785 ssize_t max_deq, total = 0;
1788 max_deq = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0);
1792 for (i = 0; i < msg->msg_iovlen; i++)
1794 rv = ldp_vls_recvfrom (vlsh, iov[i].iov_base, iov[i].iov_len, flags,
1795 (i == 0 ? msg->msg_name : NULL),
1796 (i == 0 ? &msg->msg_namelen : NULL));
1802 if (rv < iov[i].iov_len)
1805 if (total >= max_deq)
1809 if (rv < 0 && total == 0)
1819 size = libc_recvmsg (fd, msg, flags);
1827 recvmmsg (int fd, struct mmsghdr *vmessages,
1828 unsigned int vlen, int flags, struct timespec *tmo)
1831 const char *func_str;
1832 u32 sh = ldp_fd_to_vlsh (fd);
1836 if (sh != VLS_INVALID_HANDLE)
1838 clib_warning ("LDP<%d>: LDP-TBD", getpid ());
1844 func_str = "libc_recvmmsg";
1847 clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): "
1848 "vmessages %p, vlen %u, flags 0x%x, tmo %p",
1849 getpid (), fd, fd, func_str, vmessages, vlen,
1852 size = libc_recvmmsg (fd, vmessages, vlen, flags, tmo);
1859 int errno_val = errno;
1861 clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! "
1862 "rv %d, errno = %d", getpid (), fd, fd,
1863 func_str, size, errno_val);
1867 clib_warning ("LDP<%d>: fd %d (0x%x): returning %d (0x%x)",
1868 getpid (), fd, fd, size, size);
1875 getsockopt (int fd, int level, int optname,
1876 void *__restrict optval, socklen_t * __restrict optlen)
1883 vlsh = ldp_fd_to_vlsh (fd);
1884 if (vlsh != VLS_INVALID_HANDLE)
1894 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_NODELAY,
1898 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_USER_MSS,
1902 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPIDLE,
1906 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPINTVL,
1910 if (optval && optlen && (*optlen == sizeof (struct tcp_info)))
1912 LDBG (1, "fd %d: vlsh %u SOL_TCP, TCP_INFO, optval %p, "
1913 "optlen %d: #LDP-NOP#", fd, vlsh, optval, *optlen);
1914 memset (optval, 0, *optlen);
1920 case TCP_CONGESTION:
1921 *optlen = strlen ("cubic");
1922 strncpy (optval, "cubic", *optlen + 1);
1926 LDBG (0, "ERROR: fd %d: getsockopt SOL_TCP: sid %u, "
1927 "optname %d unsupported!", fd, vlsh, optname);
1935 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_V6ONLY, optval, optlen);
1938 LDBG (0, "ERROR: fd %d: getsockopt SOL_IPV6: vlsh %u "
1939 "optname %d unsupported!", fd, vlsh, optname);
1947 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LISTEN, optval, optlen);
1950 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_KEEPALIVE, optval, optlen);
1953 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PROTOCOL, optval, optlen);
1954 *(int *) optval = *(int *) optval ? SOCK_DGRAM : SOCK_STREAM;
1957 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TX_FIFO_LEN,
1961 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_RX_FIFO_LEN,
1965 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_REUSEADDR, optval, optlen);
1968 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_REUSEPORT, optval, optlen);
1971 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_BROADCAST, optval, optlen);
1974 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_DOMAIN, optval, optlen);
1977 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_ERROR, optval, optlen);
1979 case SO_BINDTODEVICE:
1983 LDBG (0, "ERROR: fd %d: getsockopt SOL_SOCKET: vlsh %u "
1984 "optname %d unsupported!", fd, vlsh, optname);
1992 if (rv != VPPCOM_OK)
2000 rv = libc_getsockopt (fd, level, optname, optval, optlen);
2007 setsockopt (int fd, int level, int optname,
2008 const void *optval, socklen_t optlen)
2015 vlsh = ldp_fd_to_vlsh (fd);
2016 if (vlsh != VLS_INVALID_HANDLE)
2026 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_NODELAY,
2027 (void *) optval, &optlen);
2030 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_USER_MSS,
2031 (void *) optval, &optlen);
2034 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPIDLE,
2035 (void *) optval, &optlen);
2038 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPINTVL,
2039 (void *) optval, &optlen);
2041 case TCP_CONGESTION:
2047 LDBG (0, "ERROR: fd %d: setsockopt() SOL_TCP: vlsh %u"
2048 "optname %d unsupported!", fd, vlsh, optname);
2056 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_V6ONLY,
2057 (void *) optval, &optlen);
2060 LDBG (0, "ERROR: fd %d: setsockopt SOL_IPV6: vlsh %u"
2061 "optname %d unsupported!", fd, vlsh, optname);
2069 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_KEEPALIVE,
2070 (void *) optval, &optlen);
2073 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_REUSEADDR,
2074 (void *) optval, &optlen);
2077 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_REUSEPORT, (void *) optval,
2081 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_BROADCAST,
2082 (void *) optval, &optlen);
2088 LDBG (0, "ERROR: fd %d: setsockopt SOL_SOCKET: vlsh %u "
2089 "optname %d unsupported!", fd, vlsh, optname);
2097 if (rv != VPPCOM_OK)
2105 rv = libc_setsockopt (fd, level, optname, optval, optlen);
2112 listen (int fd, int n)
2119 vlsh = ldp_fd_to_vlsh (fd);
2120 if (vlsh != VLS_INVALID_HANDLE)
2122 LDBG (0, "fd %d: calling vls_listen: vlsh %u, n %d", fd, vlsh, n);
2124 rv = vls_listen (vlsh, n);
2125 if (rv != VPPCOM_OK)
2133 LDBG (0, "fd %d: calling libc_listen(): n %d", fd, n);
2134 rv = libc_listen (fd, n);
2137 LDBG (1, "fd %d: returning %d", fd, rv);
2142 ldp_accept4 (int listen_fd, __SOCKADDR_ARG addr,
2143 socklen_t * __restrict addr_len, int flags)
2145 vls_handle_t listen_vlsh, accept_vlsh;
2150 listen_vlsh = ldp_fd_to_vlsh (listen_fd);
2151 if (listen_vlsh != VLS_INVALID_HANDLE)
2154 u8 src_addr[sizeof (struct sockaddr_in6)];
2155 memset (&ep, 0, sizeof (ep));
2158 LDBG (0, "listen fd %d: calling vppcom_session_accept: listen sid %u,"
2159 " ep %p, flags 0x%x", listen_fd, listen_vlsh, &ep, flags);
2161 accept_vlsh = vls_accept (listen_vlsh, &ep, flags);
2162 if (accept_vlsh < 0)
2164 errno = -accept_vlsh;
2169 rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep);
2170 if (rv != VPPCOM_OK)
2172 (void) vls_close (accept_vlsh);
2178 rv = ldp_vlsh_to_fd (accept_vlsh);
2184 LDBG (0, "listen fd %d: calling libc_accept4(): addr %p, addr_len %p,"
2185 " flags 0x%x", listen_fd, addr, addr_len, flags);
2187 rv = libc_accept4 (listen_fd, addr, addr_len, flags);
2190 LDBG (1, "listen fd %d: accept returning %d", listen_fd, rv);
2196 accept4 (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len,
2199 return ldp_accept4 (fd, addr, addr_len, flags);
2203 accept (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len)
2205 return ldp_accept4 (fd, addr, addr_len, 0);
2209 shutdown (int fd, int how)
2216 vlsh = ldp_fd_to_vlsh (fd);
2217 if (vlsh != VLS_INVALID_HANDLE)
2219 LDBG (0, "called shutdown: fd %u vlsh %u how %d", fd, vlsh, how);
2220 rv = vls_shutdown (vlsh, how);
2224 LDBG (0, "fd %d: calling libc_shutdown: how %d", fd, how);
2225 rv = libc_shutdown (fd, how);
2232 epoll_create1 (int flags)
2234 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
2240 if (ldp->vcl_needs_real_epoll || vls_use_real_epoll ())
2242 /* Make sure workers have been allocated */
2245 ldp_alloc_workers ();
2246 ldpw = ldp_worker_get_current ();
2248 rv = libc_epoll_create1 (flags);
2249 ldp->vcl_needs_real_epoll = 0;
2250 ldpw->vcl_mq_epfd = rv;
2251 LDBG (0, "created vcl epfd %u", rv);
2255 vlsh = vls_epoll_create ();
2256 if (PREDICT_FALSE (vlsh == VLS_INVALID_HANDLE))
2263 rv = ldp_vlsh_to_fd (vlsh);
2265 LDBG (0, "epoll_create epfd %u vlsh %u", rv, vlsh);
2270 epoll_create (int size)
2272 return epoll_create1 (0);
2276 epoll_ctl (int epfd, int op, int fd, struct epoll_event *event)
2278 vls_handle_t vep_vlsh, vlsh;
2283 vep_vlsh = ldp_fd_to_vlsh (epfd);
2284 if (PREDICT_FALSE (vep_vlsh == VLS_INVALID_HANDLE))
2286 /* The LDP epoll_create1 always creates VCL epfd's.
2287 * The app should never have a kernel base epoll fd unless it
2288 * was acquired outside of the LD_PRELOAD process context.
2289 * In any case, if we get one, punt it to libc_epoll_ctl.
2291 LDBG (1, "epfd %d: calling libc_epoll_ctl: op %d, fd %d"
2292 " event %p", epfd, op, fd, event);
2294 rv = libc_epoll_ctl (epfd, op, fd, event);
2298 vlsh = ldp_fd_to_vlsh (fd);
2300 LDBG (0, "epfd %d ep_vlsh %d, fd %u vlsh %d, op %u", epfd, vep_vlsh, fd,
2303 if (vlsh != VLS_INVALID_HANDLE)
2305 LDBG (1, "epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u,"
2306 " event %p", epfd, vep_vlsh, op, vlsh, event);
2308 rv = vls_epoll_ctl (vep_vlsh, op, vlsh, event);
2309 if (rv != VPPCOM_OK)
2318 u32 size = sizeof (epfd);
2320 libc_epfd = vls_attr (vep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
2323 LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: "
2324 "EPOLL_CLOEXEC", epfd, vep_vlsh);
2326 libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC);
2333 rv = vls_attr (vep_vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &libc_epfd,
2342 else if (PREDICT_FALSE (libc_epfd < 0))
2349 LDBG (1, "epfd %d: calling libc_epoll_ctl: libc_epfd %d, op %d, fd %d,"
2350 " event %p", epfd, libc_epfd, op, fd, event);
2352 rv = libc_epoll_ctl (libc_epfd, op, fd, event);
2360 ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents,
2361 int timeout, const sigset_t * sigmask)
2363 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
2364 double time_to_wait = (double) 0, max_time;
2365 int libc_epfd, rv = 0;
2366 vls_handle_t ep_vlsh;
2370 if (PREDICT_FALSE (!events || (timeout < -1)))
2376 if (epfd == ldpw->vcl_mq_epfd)
2377 return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
2379 ep_vlsh = ldp_fd_to_vlsh (epfd);
2380 if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE))
2382 LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh);
2387 if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
2388 clib_time_init (&ldpw->clib_time);
2389 time_to_wait = ((timeout >= 0) ? (double) timeout / 1000 : 0);
2390 max_time = clib_time_now (&ldpw->clib_time) + time_to_wait;
2392 libc_epfd = vls_attr (ep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
2393 if (PREDICT_FALSE (libc_epfd < 0))
2400 LDBG (2, "epfd %d: vep_idx %d, libc_epfd %d, events %p, maxevents %d, "
2401 "timeout %d, sigmask %p: time_to_wait %.02f", epfd, ep_vlsh,
2402 libc_epfd, events, maxevents, timeout, sigmask, time_to_wait);
2405 if (!ldpw->epoll_wait_vcl)
2407 rv = vls_epoll_wait (ep_vlsh, events, maxevents, 0);
2410 ldpw->epoll_wait_vcl = 1;
2421 ldpw->epoll_wait_vcl = 0;
2425 rv = libc_epoll_pwait (libc_epfd, events, maxevents, 0, sigmask);
2430 while ((timeout == -1) || (clib_time_now (&ldpw->clib_time) < max_time));
2437 ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events,
2438 int maxevents, int timeout, const sigset_t * sigmask)
2440 ldp_worker_ctx_t *ldpw;
2441 int libc_epfd, rv = 0, num_ev;
2442 vls_handle_t ep_vlsh;
2446 if (PREDICT_FALSE (!events || (timeout < -1)))
2452 /* Make sure the vcl worker is valid. Could be that epoll fd was created on
2453 * one thread but it is now used on another */
2454 if (PREDICT_FALSE (vppcom_worker_index () == ~0))
2455 vls_register_vcl_worker ();
2457 ldpw = ldp_worker_get_current ();
2458 if (epfd == ldpw->vcl_mq_epfd)
2459 return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
2461 ep_vlsh = ldp_fd_to_vlsh (epfd);
2462 if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE))
2464 LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh);
2469 libc_epfd = vls_attr (ep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
2470 if (PREDICT_FALSE (!libc_epfd))
2472 u32 size = sizeof (epfd);
2474 LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: "
2475 "EPOLL_CLOEXEC", epfd, ep_vlsh);
2476 libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC);
2483 rv = vls_attr (ep_vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &libc_epfd, &size);
2491 if (PREDICT_FALSE (libc_epfd <= 0))
2498 if (PREDICT_FALSE (!ldpw->mq_epfd_added))
2500 struct epoll_event e = { 0 };
2502 e.data.fd = ldpw->vcl_mq_epfd;
2503 if (libc_epoll_ctl (libc_epfd, EPOLL_CTL_ADD, ldpw->vcl_mq_epfd, &e) <
2506 LDBG (0, "epfd %d, add libc mq epoll fd %d to libc epoll fd %d",
2507 epfd, ldpw->vcl_mq_epfd, libc_epfd);
2511 ldpw->mq_epfd_added = 1;
2514 /* Request to only drain unhandled to prevent libc_epoll_wait starved */
2515 rv = vls_epoll_wait (ep_vlsh, events, maxevents, -2);
2518 else if (PREDICT_FALSE (rv < 0))
2525 rv = libc_epoll_pwait (libc_epfd, events, maxevents, timeout, sigmask);
2528 for (int i = 0; i < rv; i++)
2530 if (events[i].data.fd == ldpw->vcl_mq_epfd)
2532 /* We should remove mq epoll fd from events. */
2536 events[i].events = events[rv].events;
2537 events[i].data.u64 = events[rv].data.u64;
2539 num_ev = vls_epoll_wait (ep_vlsh, &events[rv], maxevents - rv, 0);
2540 if (PREDICT_TRUE (num_ev > 0))
2551 epoll_pwait (int epfd, struct epoll_event *events,
2552 int maxevents, int timeout, const sigset_t * sigmask)
2554 if (vls_use_eventfd ())
2555 return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout,
2558 return ldp_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
2562 epoll_wait (int epfd, struct epoll_event *events, int maxevents, int timeout)
2564 if (vls_use_eventfd ())
2565 return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout, NULL);
2567 return ldp_epoll_pwait (epfd, events, maxevents, timeout, NULL);
2571 poll (struct pollfd *fds, nfds_t nfds, int timeout)
2573 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
2574 int rv, i, n_revents = 0;
2579 LDBG (3, "fds %p, nfds %ld, timeout %d", fds, nfds, timeout);
2581 if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
2582 clib_time_init (&ldpw->clib_time);
2584 max_time = (timeout >= 0) ? (f64) timeout / 1000 : 0;
2585 max_time += clib_time_now (&ldpw->clib_time);
2587 for (i = 0; i < nfds; i++)
2592 vlsh = ldp_fd_to_vlsh (fds[i].fd);
2593 if (vlsh != VLS_INVALID_HANDLE)
2595 fds[i].fd = -fds[i].fd;
2596 vec_add2 (ldpw->vcl_poll, vp, 1);
2598 vp->sh = vlsh_to_sh (vlsh);
2599 vp->events = fds[i].events;
2600 #ifdef __USE_XOPEN2K
2601 if (fds[i].events & POLLRDNORM)
2602 vp->events |= POLLIN;
2603 if (fds[i].events & POLLWRNORM)
2604 vp->events |= POLLOUT;
2606 vp->revents = fds[i].revents;
2610 vec_add1 (ldpw->libc_poll, fds[i]);
2611 vec_add1 (ldpw->libc_poll_idxs, i);
2617 if (vec_len (ldpw->vcl_poll))
2619 rv = vppcom_poll (ldpw->vcl_poll, vec_len (ldpw->vcl_poll), 0);
2630 if (vec_len (ldpw->libc_poll))
2632 rv = libc_poll (ldpw->libc_poll, vec_len (ldpw->libc_poll), 0);
2645 while ((timeout < 0) || (clib_time_now (&ldpw->clib_time) < max_time));
2649 vec_foreach (vp, ldpw->vcl_poll)
2651 fds[vp->fds_ndx].fd = -fds[vp->fds_ndx].fd;
2652 fds[vp->fds_ndx].revents = vp->revents;
2653 #ifdef __USE_XOPEN2K
2654 if ((fds[vp->fds_ndx].revents & POLLIN) &&
2655 (fds[vp->fds_ndx].events & POLLRDNORM))
2656 fds[vp->fds_ndx].revents |= POLLRDNORM;
2657 if ((fds[vp->fds_ndx].revents & POLLOUT) &&
2658 (fds[vp->fds_ndx].events & POLLWRNORM))
2659 fds[vp->fds_ndx].revents |= POLLWRNORM;
2662 vec_reset_length (ldpw->vcl_poll);
2664 for (i = 0; i < vec_len (ldpw->libc_poll); i++)
2666 fds[ldpw->libc_poll_idxs[i]].revents = ldpw->libc_poll[i].revents;
2668 vec_reset_length (ldpw->libc_poll_idxs);
2669 vec_reset_length (ldpw->libc_poll);
2676 ppoll (struct pollfd *fds, nfds_t nfds,
2677 const struct timespec *timeout, const sigset_t * sigmask)
2681 clib_warning ("LDP<%d>: LDP-TBD", getpid ());
2689 void CONSTRUCTOR_ATTRIBUTE ldp_constructor (void);
2691 void DESTRUCTOR_ATTRIBUTE ldp_destructor (void);
2694 * This function is called when the library is loaded
2697 ldp_constructor (void)
2699 swrap_constructor ();
2700 if (ldp_init () != 0)
2702 fprintf (stderr, "\nLDP<%d>: ERROR: ldp_constructor: failed!\n",
2706 else if (LDP_DEBUG > 0)
2707 clib_warning ("LDP<%d>: LDP constructor: done!\n", getpid ());
2711 * This function is called when the library is unloaded
2714 ldp_destructor (void)
2717 swrap_destructor ();
2722 /* Don't use clib_warning() here because that calls writev()
2723 * which will call ldp_init().
2726 fprintf (stderr, "%s:%d: LDP<%d>: LDP destructor: done!\n",
2727 __func__, __LINE__, getpid ());
2732 * fd.io coding-style-patch-verification: ON
2735 * eval: (c-set-style "gnu")