2 * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
22 #include <sys/resource.h>
23 #include <netinet/tcp.h>
25 #include <vcl/ldp_socket_wrapper.h>
29 #include <vcl/vcl_locked.h>
30 #include <vppinfra/time.h>
31 #include <vppinfra/bitmap.h>
32 #include <vppinfra/lock.h>
33 #include <vppinfra/pool.h>
34 #include <vppinfra/hash.h>
36 #define HAVE_CONSTRUCTOR_ATTRIBUTE
37 #ifdef HAVE_CONSTRUCTOR_ATTRIBUTE
38 #define CONSTRUCTOR_ATTRIBUTE \
39 __attribute__ ((constructor))
41 #define CONSTRUCTOR_ATTRIBUTE
42 #endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */
44 #define HAVE_DESTRUCTOR_ATTRIBUTE
45 #ifdef HAVE_DESTRUCTOR_ATTRIBUTE
46 #define DESTRUCTOR_ATTRIBUTE \
47 __attribute__ ((destructor))
49 #define DESTRUCTOR_ATTRIBUTE
52 #define LDP_MAX_NWORKERS 32
54 typedef struct ldp_worker_ctx_
57 clib_time_t clib_time;
62 clib_bitmap_t *rd_bitmap;
63 clib_bitmap_t *wr_bitmap;
64 clib_bitmap_t *ex_bitmap;
65 clib_bitmap_t *si_rd_bitmap;
66 clib_bitmap_t *si_wr_bitmap;
67 clib_bitmap_t *si_ex_bitmap;
68 clib_bitmap_t *libc_rd_bitmap;
69 clib_bitmap_t *libc_wr_bitmap;
70 clib_bitmap_t *libc_ex_bitmap;
76 struct pollfd *libc_poll;
88 /* clib_bitmap_t, fd_mask and vcl_si_set are used interchangeably. Make sure
89 * they are the same size */
90 STATIC_ASSERT (sizeof (clib_bitmap_t) == sizeof (fd_mask),
91 "ldp bitmap size mismatch");
92 STATIC_ASSERT (sizeof (vcl_si_set) == sizeof (fd_mask),
93 "ldp bitmap size mismatch");
97 ldp_worker_ctx_t *workers;
99 char app_name[LDP_APP_NAME_MAX];
105 /** vcl needs next epoll_create to go to libc_epoll */
106 u8 vcl_needs_real_epoll;
109 #define LDP_DEBUG ldp->debug
111 #define LDBG(_lvl, _fmt, _args...) \
112 if (ldp->debug > _lvl) \
114 int errno_saved = errno; \
115 clib_warning ("ldp<%d>: " _fmt, getpid(), ##_args); \
116 errno = errno_saved; \
119 static ldp_main_t ldp_main = {
120 .vlsh_bit_val = (1 << LDP_SID_BIT_MIN),
121 .vlsh_bit_mask = (1 << LDP_SID_BIT_MIN) - 1,
122 .debug = LDP_DEBUG_INIT,
123 .transparent_tls = 0,
126 static ldp_main_t *ldp = &ldp_main;
128 static inline ldp_worker_ctx_t *
129 ldp_worker_get_current (void)
131 return (ldp->workers + vppcom_worker_index ());
135 * RETURN: 0 on success or -1 on error.
138 ldp_set_app_name (char *app_name)
140 snprintf (ldp->app_name, LDP_APP_NAME_MAX,
141 "ldp-%d-%s", getpid (), app_name);
147 if (ldp->app_name[0] == '\0')
148 ldp_set_app_name ("app");
150 return ldp->app_name;
154 ldp_vlsh_to_fd (vls_handle_t vlsh)
156 return (vlsh + ldp->vlsh_bit_val);
159 static inline vls_handle_t
160 ldp_fd_to_vlsh (int fd)
162 if (fd < ldp->vlsh_bit_val)
163 return VLS_INVALID_HANDLE;
165 return (fd - ldp->vlsh_bit_val);
169 ldp_alloc_workers (void)
173 pool_alloc (ldp->workers, LDP_MAX_NWORKERS);
179 ldp_worker_ctx_t *ldpw;
182 if (PREDICT_TRUE (ldp->init))
186 ldp->vcl_needs_real_epoll = 1;
187 rv = vls_app_create (ldp_get_app_name ());
190 ldp->vcl_needs_real_epoll = 0;
191 if (rv == VPPCOM_EEXIST)
193 LDBG (2, "\nERROR: ldp_init: vppcom_app_create()"
194 " failed! rv = %d (%s)\n", rv, vppcom_retval_str (rv));
198 ldp->vcl_needs_real_epoll = 0;
199 ldp_alloc_workers ();
200 ldpw = ldp_worker_get_current ();
202 char *env_var_str = getenv (LDP_ENV_DEBUG);
206 if (sscanf (env_var_str, "%u", &tmp) != 1)
207 clib_warning ("LDP<%d>: WARNING: Invalid LDP debug level specified in"
208 " the env var " LDP_ENV_DEBUG " (%s)!", getpid (),
213 LDBG (0, "configured LDP debug level (%u) from env var "
214 LDP_ENV_DEBUG "!", ldp->debug);
218 env_var_str = getenv (LDP_ENV_APP_NAME);
221 ldp_set_app_name (env_var_str);
222 LDBG (0, "configured LDP app name (%s) from the env var "
223 LDP_ENV_APP_NAME "!", ldp->app_name);
226 env_var_str = getenv (LDP_ENV_SID_BIT);
230 if (sscanf (env_var_str, "%u", &sb) != 1)
232 LDBG (0, "WARNING: Invalid LDP sid bit specified in the env var "
233 LDP_ENV_SID_BIT " (%s)! sid bit value %d (0x%x)", env_var_str,
234 ldp->vlsh_bit_val, ldp->vlsh_bit_val);
236 else if (sb < LDP_SID_BIT_MIN)
238 ldp->vlsh_bit_val = (1 << LDP_SID_BIT_MIN);
239 ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1;
241 LDBG (0, "WARNING: LDP sid bit (%u) specified in the env var "
242 LDP_ENV_SID_BIT " (%s) is too small. Using LDP_SID_BIT_MIN"
243 " (%d)! sid bit value %d (0x%x)", sb, env_var_str,
244 LDP_SID_BIT_MIN, ldp->vlsh_bit_val, ldp->vlsh_bit_val);
246 else if (sb > LDP_SID_BIT_MAX)
248 ldp->vlsh_bit_val = (1 << LDP_SID_BIT_MAX);
249 ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1;
251 LDBG (0, "WARNING: LDP sid bit (%u) specified in the env var "
252 LDP_ENV_SID_BIT " (%s) is too big. Using LDP_SID_BIT_MAX"
253 " (%d)! sid bit value %d (0x%x)", sb, env_var_str,
254 LDP_SID_BIT_MAX, ldp->vlsh_bit_val, ldp->vlsh_bit_val);
258 ldp->vlsh_bit_val = (1 << sb);
259 ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1;
261 LDBG (0, "configured LDP sid bit (%u) from "
262 LDP_ENV_SID_BIT "! sid bit value %d (0x%x)", sb,
263 ldp->vlsh_bit_val, ldp->vlsh_bit_val);
266 /* Make sure there are enough bits in the fd set for vcl sessions */
267 if (ldp->vlsh_bit_val > FD_SETSIZE / 2)
269 LDBG (0, "ERROR: LDP vlsh bit value %d > FD_SETSIZE/2 %d!",
270 ldp->vlsh_bit_val, FD_SETSIZE / 2);
275 env_var_str = getenv (LDP_ENV_TLS_TRANS);
278 ldp->transparent_tls = 1;
282 pool_foreach (ldpw, ldp->workers, ({
283 clib_memset (&ldpw->clib_time, 0, sizeof (ldpw->clib_time));
287 LDBG (0, "LDP initialization: done!");
298 if ((errno = -ldp_init ()))
301 vlsh = ldp_fd_to_vlsh (fd);
302 if (vlsh != VLS_INVALID_HANDLE)
304 epfd = vls_attr (vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
307 LDBG (0, "fd %d: calling libc_close: epfd %u", fd, epfd);
309 rv = libc_close (epfd);
312 u32 size = sizeof (epfd);
315 (void) vls_attr (vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &epfd, &size);
318 else if (PREDICT_FALSE (epfd < 0))
325 LDBG (0, "fd %d: calling vls_close: vlsh %u", fd, vlsh);
327 rv = vls_close (vlsh);
336 LDBG (0, "fd %d: calling libc_close", fd);
337 rv = libc_close (fd);
345 read (int fd, void *buf, size_t nbytes)
350 if ((errno = -ldp_init ()))
353 vlsh = ldp_fd_to_vlsh (fd);
354 if (vlsh != VLS_INVALID_HANDLE)
356 size = vls_read (vlsh, buf, nbytes);
365 size = libc_read (fd, buf, nbytes);
372 readv (int fd, const struct iovec * iov, int iovcnt)
374 int rv = 0, i, total = 0;
378 if ((errno = -ldp_init ()))
381 vlsh = ldp_fd_to_vlsh (fd);
382 if (vlsh != VLS_INVALID_HANDLE)
384 for (i = 0; i < iovcnt; ++i)
386 rv = vls_read (vlsh, iov[i].iov_base, iov[i].iov_len);
392 if (rv < iov[i].iov_len)
396 if (rv < 0 && total == 0)
406 size = libc_readv (fd, iov, iovcnt);
413 write (int fd, const void *buf, size_t nbytes)
418 if ((errno = -ldp_init ()))
421 vlsh = ldp_fd_to_vlsh (fd);
422 if (vlsh != VLS_INVALID_HANDLE)
424 size = vls_write_msg (vlsh, (void *) buf, nbytes);
433 size = libc_write (fd, buf, nbytes);
440 writev (int fd, const struct iovec * iov, int iovcnt)
442 ssize_t size = 0, total = 0;
446 if ((errno = -ldp_init ()))
449 vlsh = ldp_fd_to_vlsh (fd);
450 if (vlsh != VLS_INVALID_HANDLE)
452 for (i = 0; i < iovcnt; ++i)
454 rv = vls_write_msg (vlsh, iov[i].iov_base, iov[i].iov_len);
460 if (rv < iov[i].iov_len)
465 if (rv < 0 && total == 0)
475 size = libc_writev (fd, iov, iovcnt);
482 fcntl_internal (int fd, int cmd, va_list ap)
487 vlsh = ldp_fd_to_vlsh (fd);
488 LDBG (0, "fd %u vlsh %d, cmd %u", fd, vlsh, cmd);
489 if (vlsh != VLS_INVALID_HANDLE)
491 int flags = va_arg (ap, int);
494 size = sizeof (flags);
499 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size);
503 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &size);
508 /* TODO handle this */
509 LDBG (0, "F_SETFD ignored flags %u", flags);
525 rv = libc_vfcntl64 (fd, cmd, ap);
527 rv = libc_vfcntl (fd, cmd, ap);
535 fcntl (int fd, int cmd, ...)
540 if ((errno = -ldp_init ()))
544 rv = fcntl_internal (fd, cmd, ap);
551 fcntl64 (int fd, int cmd, ...)
556 if ((errno = -ldp_init ()))
560 rv = fcntl_internal (fd, cmd, ap);
566 ioctl (int fd, unsigned long int cmd, ...)
572 if ((errno = -ldp_init ()))
577 vlsh = ldp_fd_to_vlsh (fd);
578 if (vlsh != VLS_INVALID_HANDLE)
583 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0);
588 u32 flags = va_arg (ap, int) ? O_NONBLOCK : 0;
589 u32 size = sizeof (flags);
591 /* TBD: When VPPCOM_ATTR_[GS]ET_FLAGS supports flags other than
592 * non-blocking, the flags should be read here and merged
595 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size);
611 rv = libc_vioctl (fd, cmd, ap);
619 ldp_select_init_maps (fd_set * __restrict original,
620 clib_bitmap_t ** resultb, clib_bitmap_t ** libcb,
621 clib_bitmap_t ** vclb, int nfds, u32 minbits,
622 u32 n_bytes, uword * si_bits, uword * libc_bits)
624 uword si_bits_set, libc_bits_set;
628 clib_bitmap_validate (*vclb, minbits);
629 clib_bitmap_validate (*libcb, minbits);
630 clib_bitmap_validate (*resultb, minbits);
631 clib_memcpy_fast (*resultb, original, n_bytes);
632 memset (original, 0, n_bytes);
635 clib_bitmap_foreach (fd, *resultb, ({
638 vlsh = ldp_fd_to_vlsh (fd);
639 if (vlsh == VLS_INVALID_HANDLE)
640 clib_bitmap_set_no_check (*libcb, fd, 1);
642 *vclb = clib_bitmap_set (*vclb, vlsh_to_session_index (vlsh), 1);
646 si_bits_set = clib_bitmap_last_set (*vclb) + 1;
647 *si_bits = (si_bits_set > *si_bits) ? si_bits_set : *si_bits;
648 clib_bitmap_validate (*resultb, *si_bits);
650 libc_bits_set = clib_bitmap_last_set (*libcb) + 1;
651 *libc_bits = (libc_bits_set > *libc_bits) ? libc_bits_set : *libc_bits;
655 ldp_select_vcl_map_to_libc (clib_bitmap_t * vclb, fd_set * __restrict libcb)
665 clib_bitmap_foreach (si, vclb, ({
666 vlsh = vls_session_index_to_vlsh (si);
667 ASSERT (vlsh != VLS_INVALID_HANDLE);
668 fd = ldp_vlsh_to_fd (vlsh);
669 if (PREDICT_FALSE (fd < 0))
682 ldp_select_libc_map_merge (clib_bitmap_t * result, fd_set * __restrict libcb)
690 clib_bitmap_foreach (fd, result, ({
691 FD_SET ((int)fd, libcb);
697 ldp_pselect (int nfds, fd_set * __restrict readfds,
698 fd_set * __restrict writefds,
699 fd_set * __restrict exceptfds,
700 const struct timespec *__restrict timeout,
701 const __sigset_t * __restrict sigmask)
703 u32 minbits = clib_max (nfds, BITS (uword)), n_bytes;
704 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
705 struct timespec libc_tspec = { 0 };
706 f64 time_out, vcl_timeout = 0;
707 uword si_bits, libc_bits;
708 int rv, bits_set = 0;
716 if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
717 clib_time_init (&ldpw->clib_time);
721 time_out = (timeout->tv_sec == 0 && timeout->tv_nsec == 0) ?
722 (f64) 0 : (f64) timeout->tv_sec + (f64) timeout->tv_nsec / (f64) 1e9;
724 /* select as fine grained sleep */
727 time_out += clib_time_now (&ldpw->clib_time);
728 while (clib_time_now (&ldpw->clib_time) < time_out)
741 if (nfds <= ldp->vlsh_bit_val)
743 rv = libc_pselect (nfds, readfds, writefds, exceptfds,
748 si_bits = libc_bits = 0;
749 n_bytes = nfds / 8 + ((nfds % 8) ? 1 : 0);
752 ldp_select_init_maps (readfds, &ldpw->rd_bitmap, &ldpw->libc_rd_bitmap,
753 &ldpw->si_rd_bitmap, nfds, minbits, n_bytes,
754 &si_bits, &libc_bits);
756 ldp_select_init_maps (writefds, &ldpw->wr_bitmap,
757 &ldpw->libc_wr_bitmap, &ldpw->si_wr_bitmap, nfds,
758 minbits, n_bytes, &si_bits, &libc_bits);
760 ldp_select_init_maps (exceptfds, &ldpw->ex_bitmap,
761 &ldpw->libc_ex_bitmap, &ldpw->si_ex_bitmap, nfds,
762 minbits, n_bytes, &si_bits, &libc_bits);
764 if (PREDICT_FALSE (!si_bits && !libc_bits))
772 libc_tspec = timeout ? *timeout : libc_tspec;
779 clib_memcpy_fast (ldpw->rd_bitmap, ldpw->si_rd_bitmap,
780 vec_len (ldpw->si_rd_bitmap) *
781 sizeof (clib_bitmap_t));
783 clib_memcpy_fast (ldpw->wr_bitmap, ldpw->si_wr_bitmap,
784 vec_len (ldpw->si_wr_bitmap) *
785 sizeof (clib_bitmap_t));
787 clib_memcpy_fast (ldpw->ex_bitmap, ldpw->si_ex_bitmap,
788 vec_len (ldpw->si_ex_bitmap) *
789 sizeof (clib_bitmap_t));
791 rv = vls_select (si_bits, readfds ? ldpw->rd_bitmap : NULL,
792 writefds ? ldpw->wr_bitmap : NULL,
793 exceptfds ? ldpw->ex_bitmap : NULL, vcl_timeout);
802 if (ldp_select_vcl_map_to_libc (ldpw->rd_bitmap, readfds))
808 if (ldp_select_vcl_map_to_libc (ldpw->wr_bitmap, writefds))
814 if (ldp_select_vcl_map_to_libc (ldpw->ex_bitmap, exceptfds))
825 clib_memcpy_fast (ldpw->rd_bitmap, ldpw->libc_rd_bitmap,
826 vec_len (ldpw->libc_rd_bitmap) *
827 sizeof (clib_bitmap_t));
829 clib_memcpy_fast (ldpw->wr_bitmap, ldpw->libc_wr_bitmap,
830 vec_len (ldpw->libc_wr_bitmap) *
831 sizeof (clib_bitmap_t));
833 clib_memcpy_fast (ldpw->ex_bitmap, ldpw->libc_ex_bitmap,
834 vec_len (ldpw->libc_ex_bitmap) *
835 sizeof (clib_bitmap_t));
837 rv = libc_pselect (libc_bits,
838 readfds ? (fd_set *) ldpw->rd_bitmap : NULL,
839 writefds ? (fd_set *) ldpw->wr_bitmap : NULL,
840 exceptfds ? (fd_set *) ldpw->ex_bitmap : NULL,
841 &libc_tspec, sigmask);
844 ldp_select_libc_map_merge (ldpw->rd_bitmap, readfds);
845 ldp_select_libc_map_merge (ldpw->wr_bitmap, writefds);
846 ldp_select_libc_map_merge (ldpw->ex_bitmap, exceptfds);
857 while ((time_out == -1) || (clib_time_now (&ldpw->clib_time) < time_out));
861 /* TBD: set timeout to amount of time left */
862 clib_bitmap_zero (ldpw->rd_bitmap);
863 clib_bitmap_zero (ldpw->si_rd_bitmap);
864 clib_bitmap_zero (ldpw->libc_rd_bitmap);
865 clib_bitmap_zero (ldpw->wr_bitmap);
866 clib_bitmap_zero (ldpw->si_wr_bitmap);
867 clib_bitmap_zero (ldpw->libc_wr_bitmap);
868 clib_bitmap_zero (ldpw->ex_bitmap);
869 clib_bitmap_zero (ldpw->si_ex_bitmap);
870 clib_bitmap_zero (ldpw->libc_ex_bitmap);
876 select (int nfds, fd_set * __restrict readfds,
877 fd_set * __restrict writefds,
878 fd_set * __restrict exceptfds, struct timeval *__restrict timeout)
880 struct timespec tspec;
884 tspec.tv_sec = timeout->tv_sec;
885 tspec.tv_nsec = timeout->tv_usec * 1000;
887 return ldp_pselect (nfds, readfds, writefds, exceptfds,
888 timeout ? &tspec : NULL, NULL);
893 pselect (int nfds, fd_set * __restrict readfds,
894 fd_set * __restrict writefds,
895 fd_set * __restrict exceptfds,
896 const struct timespec *__restrict timeout,
897 const __sigset_t * __restrict sigmask)
899 return ldp_pselect (nfds, readfds, writefds, exceptfds, timeout, 0);
903 /* If transparent TLS mode is turned on, then ldp will load key and cert.
906 load_tls_cert (vls_handle_t vlsh)
908 char *env_var_str = getenv (LDP_ENV_TLS_CERT);
916 fp = fopen (env_var_str, "r");
919 LDBG (0, "ERROR: failed to open cert file %s \n", env_var_str);
922 cert_size = fread (inbuf, sizeof (char), sizeof (inbuf), fp);
924 vppcom_session_tls_add_cert (vlsh_to_session_index (vlsh), tls_cert,
930 LDBG (0, "ERROR: failed to read LDP environment %s\n",
938 load_tls_key (vls_handle_t vlsh)
940 char *env_var_str = getenv (LDP_ENV_TLS_KEY);
948 fp = fopen (env_var_str, "r");
951 LDBG (0, "ERROR: failed to open key file %s \n", env_var_str);
954 key_size = fread (inbuf, sizeof (char), sizeof (inbuf), fp);
956 vppcom_session_tls_add_key (vlsh_to_session_index (vlsh), tls_key,
962 LDBG (0, "ERROR: failed to read LDP environment %s\n", LDP_ENV_TLS_KEY);
969 socket (int domain, int type, int protocol)
971 int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
972 u8 is_nonblocking = type & SOCK_NONBLOCK ? 1 : 0;
975 if ((errno = -ldp_init ()))
978 if (((domain == AF_INET) || (domain == AF_INET6)) &&
979 ((sock_type == SOCK_STREAM) || (sock_type == SOCK_DGRAM)))
982 if (ldp->transparent_tls)
984 proto = VPPCOM_PROTO_TLS;
987 proto = ((sock_type == SOCK_DGRAM) ?
988 VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP);
990 LDBG (0, "calling vls_create: proto %u (%s), is_nonblocking %u",
991 proto, vppcom_proto_str (proto), is_nonblocking);
993 vlsh = vls_create (proto, is_nonblocking);
1001 if (ldp->transparent_tls)
1003 if (load_tls_cert (vlsh) < 0 || load_tls_key (vlsh) < 0)
1008 rv = ldp_vlsh_to_fd (vlsh);
1013 LDBG (0, "calling libc_socket");
1014 rv = libc_socket (domain, type, protocol);
1021 * Create two new sockets, of type TYPE in domain DOMAIN and using
1022 * protocol PROTOCOL, which are connected to each other, and put file
1023 * descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero,
1024 * one will be chosen automatically.
1025 * Returns 0 on success, -1 for errors.
1028 socketpair (int domain, int type, int protocol, int fds[2])
1030 int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
1032 if ((errno = -ldp_init ()))
1035 if (((domain == AF_INET) || (domain == AF_INET6)) &&
1036 ((sock_type == SOCK_STREAM) || (sock_type == SOCK_DGRAM)))
1038 LDBG (0, "LDP-TBD");
1044 LDBG (1, "calling libc_socketpair");
1045 rv = libc_socketpair (domain, type, protocol, fds);
1052 bind (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len)
1057 if ((errno = -ldp_init ()))
1060 vlsh = ldp_fd_to_vlsh (fd);
1061 if (vlsh != VLS_INVALID_HANDLE)
1065 switch (addr->sa_family)
1068 if (len != sizeof (struct sockaddr_in))
1070 LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET addr len %u!",
1076 ep.is_ip4 = VPPCOM_IS_IP4;
1077 ep.ip = (u8 *) & ((const struct sockaddr_in *) addr)->sin_addr;
1078 ep.port = (u16) ((const struct sockaddr_in *) addr)->sin_port;
1082 if (len != sizeof (struct sockaddr_in6))
1084 LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET6 addr len %u!",
1090 ep.is_ip4 = VPPCOM_IS_IP6;
1091 ep.ip = (u8 *) & ((const struct sockaddr_in6 *) addr)->sin6_addr;
1092 ep.port = (u16) ((const struct sockaddr_in6 *) addr)->sin6_port;
1096 LDBG (0, "ERROR: fd %d: vlsh %u: Unsupported address family %u!",
1097 fd, vlsh, addr->sa_family);
1098 errno = EAFNOSUPPORT;
1102 LDBG (0, "fd %d: calling vls_bind: vlsh %u, addr %p, len %u", fd, vlsh,
1105 rv = vls_bind (vlsh, &ep);
1106 if (rv != VPPCOM_OK)
1114 LDBG (0, "fd %d: calling libc_bind: addr %p, len %u", fd, addr, len);
1115 rv = libc_bind (fd, addr, len);
1119 LDBG (1, "fd %d: returning %d", fd, rv);
1125 ldp_copy_ep_to_sockaddr (__SOCKADDR_ARG addr, socklen_t * __restrict len,
1126 vppcom_endpt_t * ep)
1129 int sa_len, copy_len;
1131 if ((errno = -ldp_init ()))
1134 if (addr && len && ep)
1136 addr->sa_family = (ep->is_ip4 == VPPCOM_IS_IP4) ? AF_INET : AF_INET6;
1137 switch (addr->sa_family)
1140 ((struct sockaddr_in *) addr)->sin_port = ep->port;
1141 if (*len > sizeof (struct sockaddr_in))
1142 *len = sizeof (struct sockaddr_in);
1143 sa_len = sizeof (struct sockaddr_in) - sizeof (struct in_addr);
1144 copy_len = *len - sa_len;
1146 memcpy (&((struct sockaddr_in *) addr)->sin_addr, ep->ip,
1151 ((struct sockaddr_in6 *) addr)->sin6_port = ep->port;
1152 if (*len > sizeof (struct sockaddr_in6))
1153 *len = sizeof (struct sockaddr_in6);
1154 sa_len = sizeof (struct sockaddr_in6) - sizeof (struct in6_addr);
1155 copy_len = *len - sa_len;
1157 memcpy (((struct sockaddr_in6 *) addr)->sin6_addr.
1158 __in6_u.__u6_addr8, ep->ip, copy_len);
1171 getsockname (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len)
1176 if ((errno = -ldp_init ()))
1179 vlsh = ldp_fd_to_vlsh (fd);
1180 if (vlsh != VLS_INVALID_HANDLE)
1183 u8 addr_buf[sizeof (struct in6_addr)];
1184 u32 size = sizeof (ep);
1188 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size);
1189 if (rv != VPPCOM_OK)
1196 rv = ldp_copy_ep_to_sockaddr (addr, len, &ep);
1197 if (rv != VPPCOM_OK)
1206 rv = libc_getsockname (fd, addr, len);
1213 connect (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len)
1218 if ((errno = -ldp_init ()))
1223 LDBG (0, "ERROR: fd %d: NULL addr, len %u", fd, len);
1229 vlsh = ldp_fd_to_vlsh (fd);
1230 if (vlsh != VLS_INVALID_HANDLE)
1234 switch (addr->sa_family)
1237 if (len != sizeof (struct sockaddr_in))
1239 LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET addr len %u!",
1245 ep.is_ip4 = VPPCOM_IS_IP4;
1246 ep.ip = (u8 *) & ((const struct sockaddr_in *) addr)->sin_addr;
1247 ep.port = (u16) ((const struct sockaddr_in *) addr)->sin_port;
1251 if (len != sizeof (struct sockaddr_in6))
1253 LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET6 addr len %u!",
1259 ep.is_ip4 = VPPCOM_IS_IP6;
1260 ep.ip = (u8 *) & ((const struct sockaddr_in6 *) addr)->sin6_addr;
1261 ep.port = (u16) ((const struct sockaddr_in6 *) addr)->sin6_port;
1265 LDBG (0, "fd %d: ERROR vlsh %u: Unsupported address family %u!",
1266 fd, vlsh, addr->sa_family);
1267 errno = EAFNOSUPPORT;
1271 LDBG (0, "fd %d: calling vls_connect(): vlsh %u addr %p len %u", fd,
1274 rv = vls_connect (vlsh, &ep);
1275 if (rv != VPPCOM_OK)
1283 LDBG (0, "fd %d: calling libc_connect(): addr %p, len %u",
1286 rv = libc_connect (fd, addr, len);
1290 LDBG (1, "fd %d: returning %d (0x%x)", fd, rv, rv);
1295 getpeername (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len)
1300 if ((errno = -ldp_init ()))
1303 vlsh = ldp_fd_to_vlsh (fd);
1304 if (vlsh != VLS_INVALID_HANDLE)
1307 u8 addr_buf[sizeof (struct in6_addr)];
1308 u32 size = sizeof (ep);
1311 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PEER_ADDR, &ep, &size);
1312 if (rv != VPPCOM_OK)
1319 rv = ldp_copy_ep_to_sockaddr (addr, len, &ep);
1320 if (rv != VPPCOM_OK)
1329 rv = libc_getpeername (fd, addr, len);
1336 send (int fd, const void *buf, size_t n, int flags)
1338 vls_handle_t vlsh = ldp_fd_to_vlsh (fd);
1341 if ((errno = -ldp_init ()))
1344 if (vlsh != VLS_INVALID_HANDLE)
1346 size = vls_sendto (vlsh, (void *) buf, n, flags, NULL);
1347 if (size < VPPCOM_OK)
1355 size = libc_send (fd, buf, n, flags);
1362 sendfile (int out_fd, int in_fd, off_t * offset, size_t len)
1364 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
1368 if ((errno = -ldp_init ()))
1371 vlsh = ldp_fd_to_vlsh (out_fd);
1372 if (vlsh != VLS_INVALID_HANDLE)
1375 ssize_t results = 0;
1376 size_t n_bytes_left = len;
1377 size_t bytes_to_read;
1380 u32 flags, flags_len = sizeof (flags);
1382 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &flags_len);
1383 if (PREDICT_FALSE (rv != VPPCOM_OK))
1385 LDBG (0, "ERROR: out fd %d: vls_attr: vlsh %u, returned %d (%s)!",
1386 out_fd, vlsh, rv, vppcom_retval_str (rv));
1388 vec_reset_length (ldpw->io_buffer);
1396 off_t off = lseek (in_fd, *offset, SEEK_SET);
1397 if (PREDICT_FALSE (off == -1))
1403 ASSERT (off == *offset);
1408 size = vls_attr (vlsh, VPPCOM_ATTR_GET_NWRITE, 0, 0);
1411 LDBG (0, "ERROR: fd %d: vls_attr: vlsh %u returned %d (%s)!",
1412 out_fd, vlsh, size, vppcom_retval_str (size));
1413 vec_reset_length (ldpw->io_buffer);
1419 bytes_to_read = size;
1420 if (bytes_to_read == 0)
1422 if (flags & O_NONBLOCK)
1431 bytes_to_read = clib_min (n_bytes_left, bytes_to_read);
1432 vec_validate (ldpw->io_buffer, bytes_to_read);
1433 nbytes = libc_read (in_fd, ldpw->io_buffer, bytes_to_read);
1438 vec_reset_length (ldpw->io_buffer);
1445 size = vls_write (vlsh, ldpw->io_buffer, nbytes);
1448 if (size == VPPCOM_EAGAIN)
1450 if (flags & O_NONBLOCK)
1461 vec_reset_length (ldpw->io_buffer);
1470 ASSERT (n_bytes_left >= nbytes);
1471 n_bytes_left = n_bytes_left - nbytes;
1473 while (n_bytes_left > 0);
1476 vec_reset_length (ldpw->io_buffer);
1479 off_t off = lseek (in_fd, *offset, SEEK_SET);
1480 if (PREDICT_FALSE (off == -1))
1486 ASSERT (off == *offset);
1487 *offset += results + 1;
1499 size = libc_sendfile (out_fd, in_fd, offset, len);
1507 sendfile64 (int out_fd, int in_fd, off_t * offset, size_t len)
1509 return sendfile (out_fd, in_fd, offset, len);
1513 recv (int fd, void *buf, size_t n, int flags)
1518 if ((errno = -ldp_init ()))
1521 vlsh = ldp_fd_to_vlsh (fd);
1522 if (vlsh != VLS_INVALID_HANDLE)
1524 size = vls_recvfrom (vlsh, buf, n, flags, NULL);
1533 size = libc_recv (fd, buf, n, flags);
1540 ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n, int flags,
1541 __CONST_SOCKADDR_ARG addr, socklen_t addr_len)
1543 vppcom_endpt_t *ep = 0;
1549 switch (addr->sa_family)
1552 ep->is_ip4 = VPPCOM_IS_IP4;
1554 (uint8_t *) & ((const struct sockaddr_in *) addr)->sin_addr;
1555 ep->port = (uint16_t) ((const struct sockaddr_in *) addr)->sin_port;
1559 ep->is_ip4 = VPPCOM_IS_IP6;
1561 (uint8_t *) & ((const struct sockaddr_in6 *) addr)->sin6_addr;
1563 (uint16_t) ((const struct sockaddr_in6 *) addr)->sin6_port;
1567 return EAFNOSUPPORT;
1571 return vls_sendto (vlsh, (void *) buf, n, flags, ep);
1575 ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n,
1576 int flags, __SOCKADDR_ARG addr,
1577 socklen_t * __restrict addr_len)
1579 u8 src_addr[sizeof (struct sockaddr_in6)];
1587 size = vls_recvfrom (vlsh, buf, n, flags, &ep);
1591 rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep);
1597 size = vls_recvfrom (vlsh, buf, n, flags, NULL);
1603 sendto (int fd, const void *buf, size_t n, int flags,
1604 __CONST_SOCKADDR_ARG addr, socklen_t addr_len)
1609 if ((errno = -ldp_init ()))
1612 vlsh = ldp_fd_to_vlsh (fd);
1613 if (vlsh != INVALID_SESSION_ID)
1615 size = ldp_vls_sendo (vlsh, buf, n, flags, addr, addr_len);
1624 size = libc_sendto (fd, buf, n, flags, addr, addr_len);
1631 recvfrom (int fd, void *__restrict buf, size_t n, int flags,
1632 __SOCKADDR_ARG addr, socklen_t * __restrict addr_len)
1637 if ((errno = -ldp_init ()))
1640 vlsh = ldp_fd_to_vlsh (fd);
1641 if (vlsh != VLS_INVALID_HANDLE)
1643 size = ldp_vls_recvfrom (vlsh, buf, n, flags, addr, addr_len);
1652 size = libc_recvfrom (fd, buf, n, flags, addr, addr_len);
1659 sendmsg (int fd, const struct msghdr * msg, int flags)
1664 if ((errno = -ldp_init ()))
1667 vlsh = ldp_fd_to_vlsh (fd);
1668 if (vlsh != VLS_INVALID_HANDLE)
1670 struct iovec *iov = msg->msg_iov;
1674 for (i = 0; i < msg->msg_iovlen; ++i)
1676 rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, flags,
1677 msg->msg_name, msg->msg_namelen);
1683 if (rv < iov[i].iov_len)
1688 if (rv < 0 && total == 0)
1698 size = libc_sendmsg (fd, msg, flags);
1706 sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
1709 const char *func_str;
1710 u32 sh = ldp_fd_to_vlsh (fd);
1712 if ((errno = -ldp_init ()))
1715 if (sh != INVALID_SESSION_ID)
1717 clib_warning ("LDP<%d>: LDP-TBD", getpid ());
1723 func_str = "libc_sendmmsg";
1726 clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): "
1727 "vmessages %p, vlen %u, flags 0x%x",
1728 getpid (), fd, fd, func_str, vmessages, vlen, flags);
1730 size = libc_sendmmsg (fd, vmessages, vlen, flags);
1737 int errno_val = errno;
1739 clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! "
1740 "rv %d, errno = %d", getpid (), fd, fd,
1741 func_str, size, errno_val);
1745 clib_warning ("LDP<%d>: fd %d (0x%x): returning %d (0x%x)",
1746 getpid (), fd, fd, size, size);
1753 recvmsg (int fd, struct msghdr * msg, int flags)
1758 if ((errno = -ldp_init ()))
1761 vlsh = ldp_fd_to_vlsh (fd);
1762 if (vlsh != VLS_INVALID_HANDLE)
1764 struct iovec *iov = msg->msg_iov;
1765 ssize_t max_deq, total = 0;
1768 max_deq = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0);
1772 for (i = 0; i < msg->msg_iovlen; i++)
1774 rv = ldp_vls_recvfrom (vlsh, iov[i].iov_base, iov[i].iov_len, flags,
1775 (i == 0 ? msg->msg_name : NULL),
1776 (i == 0 ? &msg->msg_namelen : NULL));
1782 if (rv < iov[i].iov_len)
1785 if (total >= max_deq)
1789 if (rv < 0 && total == 0)
1799 size = libc_recvmsg (fd, msg, flags);
1807 recvmmsg (int fd, struct mmsghdr *vmessages,
1808 unsigned int vlen, int flags, struct timespec *tmo)
1811 const char *func_str;
1812 u32 sh = ldp_fd_to_vlsh (fd);
1814 if ((errno = -ldp_init ()))
1817 if (sh != INVALID_SESSION_ID)
1819 clib_warning ("LDP<%d>: LDP-TBD", getpid ());
1825 func_str = "libc_recvmmsg";
1828 clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): "
1829 "vmessages %p, vlen %u, flags 0x%x, tmo %p",
1830 getpid (), fd, fd, func_str, vmessages, vlen,
1833 size = libc_recvmmsg (fd, vmessages, vlen, flags, tmo);
1840 int errno_val = errno;
1842 clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! "
1843 "rv %d, errno = %d", getpid (), fd, fd,
1844 func_str, size, errno_val);
1848 clib_warning ("LDP<%d>: fd %d (0x%x): returning %d (0x%x)",
1849 getpid (), fd, fd, size, size);
1856 getsockopt (int fd, int level, int optname,
1857 void *__restrict optval, socklen_t * __restrict optlen)
1862 if ((errno = -ldp_init ()))
1865 vlsh = ldp_fd_to_vlsh (fd);
1866 if (vlsh != VLS_INVALID_HANDLE)
1876 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_NODELAY,
1880 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_USER_MSS,
1884 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPIDLE,
1888 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPINTVL,
1892 if (optval && optlen && (*optlen == sizeof (struct tcp_info)))
1894 LDBG (1, "fd %d: vlsh %u SOL_TCP, TCP_INFO, optval %p, "
1895 "optlen %d: #LDP-NOP#", fd, vlsh, optval, *optlen);
1896 memset (optval, 0, *optlen);
1902 case TCP_CONGESTION:
1903 *optlen = strlen ("cubic");
1904 strncpy (optval, "cubic", *optlen + 1);
1908 LDBG (0, "ERROR: fd %d: getsockopt SOL_TCP: sid %u, "
1909 "optname %d unsupported!", fd, vlsh, optname);
1917 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_V6ONLY, optval, optlen);
1920 LDBG (0, "ERROR: fd %d: getsockopt SOL_IPV6: vlsh %u "
1921 "optname %d unsupported!", fd, vlsh, optname);
1929 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LISTEN, optval, optlen);
1932 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_KEEPALIVE, optval, optlen);
1935 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PROTOCOL, optval, optlen);
1936 *(int *) optval = *(int *) optval ? SOCK_DGRAM : SOCK_STREAM;
1939 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TX_FIFO_LEN,
1943 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_RX_FIFO_LEN,
1947 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_REUSEADDR, optval, optlen);
1950 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_BROADCAST, optval, optlen);
1953 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_ERROR, optval, optlen);
1956 LDBG (0, "ERROR: fd %d: getsockopt SOL_SOCKET: vlsh %u "
1957 "optname %d unsupported!", fd, vlsh, optname);
1965 if (rv != VPPCOM_OK)
1973 rv = libc_getsockopt (fd, level, optname, optval, optlen);
1980 setsockopt (int fd, int level, int optname,
1981 const void *optval, socklen_t optlen)
1986 if ((errno = -ldp_init ()))
1989 vlsh = ldp_fd_to_vlsh (fd);
1990 if (vlsh != VLS_INVALID_HANDLE)
2000 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_NODELAY,
2001 (void *) optval, &optlen);
2004 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_USER_MSS,
2005 (void *) optval, &optlen);
2008 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPIDLE,
2009 (void *) optval, &optlen);
2012 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPINTVL,
2013 (void *) optval, &optlen);
2015 case TCP_CONGESTION:
2021 LDBG (0, "ERROR: fd %d: setsockopt() SOL_TCP: vlsh %u"
2022 "optname %d unsupported!", fd, vlsh, optname);
2030 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_V6ONLY,
2031 (void *) optval, &optlen);
2034 LDBG (0, "ERROR: fd %d: setsockopt SOL_IPV6: vlsh %u"
2035 "optname %d unsupported!", fd, vlsh, optname);
2043 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_KEEPALIVE,
2044 (void *) optval, &optlen);
2047 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_REUSEADDR,
2048 (void *) optval, &optlen);
2051 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_BROADCAST,
2052 (void *) optval, &optlen);
2055 LDBG (0, "ERROR: fd %d: setsockopt SOL_SOCKET: vlsh %u "
2056 "optname %d unsupported!", fd, vlsh, optname);
2064 if (rv != VPPCOM_OK)
2072 rv = libc_setsockopt (fd, level, optname, optval, optlen);
2079 listen (int fd, int n)
2084 if ((errno = -ldp_init ()))
2087 vlsh = ldp_fd_to_vlsh (fd);
2088 if (vlsh != VLS_INVALID_HANDLE)
2090 LDBG (0, "fd %d: calling vls_listen: vlsh %u, n %d", fd, vlsh, n);
2092 rv = vls_listen (vlsh, n);
2093 if (rv != VPPCOM_OK)
2101 LDBG (0, "fd %d: calling libc_listen(): n %d", fd, n);
2102 rv = libc_listen (fd, n);
2105 LDBG (1, "fd %d: returning %d", fd, rv);
2110 ldp_accept4 (int listen_fd, __SOCKADDR_ARG addr,
2111 socklen_t * __restrict addr_len, int flags)
2113 vls_handle_t listen_vlsh, accept_vlsh;
2116 if ((errno = -ldp_init ()))
2119 listen_vlsh = ldp_fd_to_vlsh (listen_fd);
2120 if (listen_vlsh != VLS_INVALID_HANDLE)
2123 u8 src_addr[sizeof (struct sockaddr_in6)];
2124 memset (&ep, 0, sizeof (ep));
2127 LDBG (0, "listen fd %d: calling vppcom_session_accept: listen sid %u,"
2128 " ep %p, flags 0x%x", listen_fd, listen_vlsh, ep, flags);
2130 accept_vlsh = vls_accept (listen_vlsh, &ep, flags);
2131 if (accept_vlsh < 0)
2133 errno = -accept_vlsh;
2138 rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep);
2139 if (rv != VPPCOM_OK)
2141 (void) vls_close (accept_vlsh);
2147 rv = ldp_vlsh_to_fd (accept_vlsh);
2153 LDBG (0, "listen fd %d: calling libc_accept4(): addr %p, addr_len %p,"
2154 " flags 0x%x", listen_fd, addr, addr_len, flags);
2156 rv = libc_accept4 (listen_fd, addr, addr_len, flags);
2159 LDBG (1, "listen fd %d: accept returning %d", listen_fd, rv);
2165 accept4 (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len,
2168 return ldp_accept4 (fd, addr, addr_len, flags);
2172 accept (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len)
2174 return ldp_accept4 (fd, addr, addr_len, 0);
2178 shutdown (int fd, int how)
2182 u32 flags_len = sizeof (flags);
2184 if ((errno = -ldp_init ()))
2187 vlsh = ldp_fd_to_vlsh (fd);
2188 if (vlsh != VLS_INVALID_HANDLE)
2190 LDBG (0, "called shutdown: fd %u vlsh %u how %d", fd, vlsh, how);
2192 if (vls_attr (vlsh, VPPCOM_ATTR_SET_SHUT, &how, &flags_len))
2198 if (vls_attr (vlsh, VPPCOM_ATTR_GET_SHUT, &flags, &flags_len))
2204 if (flags == SHUT_RDWR)
2209 LDBG (0, "fd %d: calling libc_shutdown: how %d", fd, how);
2210 rv = libc_shutdown (fd, how);
2217 epoll_create1 (int flags)
2219 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
2223 if ((errno = -ldp_init ()))
2226 if (ldp->vcl_needs_real_epoll)
2228 /* Make sure workers have been allocated */
2231 ldp_alloc_workers ();
2232 ldpw = ldp_worker_get_current ();
2234 rv = libc_epoll_create1 (flags);
2235 ldp->vcl_needs_real_epoll = 0;
2236 ldpw->vcl_mq_epfd = rv;
2237 LDBG (0, "created vcl epfd %u", rv);
2241 vlsh = vls_epoll_create ();
2242 if (PREDICT_FALSE (vlsh == VLS_INVALID_HANDLE))
2249 rv = ldp_vlsh_to_fd (vlsh);
2251 LDBG (0, "epoll_create epfd %u vlsh %u", rv, vlsh);
2256 epoll_create (int size)
2258 return epoll_create1 (0);
2262 epoll_ctl (int epfd, int op, int fd, struct epoll_event *event)
2264 vls_handle_t vep_vlsh, vlsh;
2267 if ((errno = -ldp_init ()))
2270 vep_vlsh = ldp_fd_to_vlsh (epfd);
2271 if (PREDICT_FALSE (vep_vlsh == VLS_INVALID_HANDLE))
2273 /* The LDP epoll_create1 always creates VCL epfd's.
2274 * The app should never have a kernel base epoll fd unless it
2275 * was acquired outside of the LD_PRELOAD process context.
2276 * In any case, if we get one, punt it to libc_epoll_ctl.
2278 LDBG (1, "epfd %d: calling libc_epoll_ctl: op %d, fd %d"
2279 " event %p", epfd, op, fd, event);
2281 rv = libc_epoll_ctl (epfd, op, fd, event);
2285 vlsh = ldp_fd_to_vlsh (fd);
2287 LDBG (0, "epfd %d ep_vlsh %d, fd %u vlsh %d, op %u", epfd, vep_vlsh, fd,
2290 if (vlsh != VLS_INVALID_HANDLE)
2292 LDBG (1, "epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u,"
2293 " event %p", epfd, vep_vlsh, vlsh, event);
2295 rv = vls_epoll_ctl (vep_vlsh, op, vlsh, event);
2296 if (rv != VPPCOM_OK)
2305 u32 size = sizeof (epfd);
2307 libc_epfd = vls_attr (vep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
2310 LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: "
2311 "EPOLL_CLOEXEC", epfd, vep_vlsh);
2313 libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC);
2320 rv = vls_attr (vep_vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &libc_epfd,
2329 else if (PREDICT_FALSE (libc_epfd < 0))
2336 LDBG (1, "epfd %d: calling libc_epoll_ctl: libc_epfd %d, op %d, fd %d,"
2337 " event %p", epfd, libc_epfd, op, fd, event);
2339 rv = libc_epoll_ctl (libc_epfd, op, fd, event);
2347 ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents,
2348 int timeout, const sigset_t * sigmask)
2350 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
2351 double time_to_wait = (double) 0, max_time;
2352 int libc_epfd, rv = 0;
2353 vls_handle_t ep_vlsh;
2355 if ((errno = -ldp_init ()))
2358 if (PREDICT_FALSE (!events || (timeout < -1)))
2364 if (epfd == ldpw->vcl_mq_epfd)
2365 return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
2367 ep_vlsh = ldp_fd_to_vlsh (epfd);
2368 if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE))
2370 LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh);
2375 if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
2376 clib_time_init (&ldpw->clib_time);
2377 time_to_wait = ((timeout >= 0) ? (double) timeout / 1000 : 0);
2378 max_time = clib_time_now (&ldpw->clib_time) + time_to_wait;
2380 libc_epfd = vls_attr (ep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
2381 if (PREDICT_FALSE (libc_epfd < 0))
2388 LDBG (2, "epfd %d: vep_idx %d, libc_epfd %d, events %p, maxevents %d, "
2389 "timeout %d, sigmask %p: time_to_wait %.02f", epfd, ep_vlsh,
2390 libc_epfd, events, maxevents, timeout, sigmask, time_to_wait);
2393 if (!ldpw->epoll_wait_vcl)
2395 rv = vls_epoll_wait (ep_vlsh, events, maxevents, 0);
2398 ldpw->epoll_wait_vcl = 1;
2409 ldpw->epoll_wait_vcl = 0;
2413 rv = libc_epoll_pwait (libc_epfd, events, maxevents, 0, sigmask);
2418 while ((timeout == -1) || (clib_time_now (&ldpw->clib_time) < max_time));
2425 ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events,
2426 int maxevents, int timeout, const sigset_t * sigmask)
2428 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
2429 int libc_epfd, rv = 0, num_ev;
2430 vls_handle_t ep_vlsh;
2432 if ((errno = -ldp_init ()))
2435 if (PREDICT_FALSE (!events || (timeout < -1)))
2441 if (epfd == ldpw->vcl_mq_epfd)
2442 return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
2444 ep_vlsh = ldp_fd_to_vlsh (epfd);
2445 if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE))
2447 LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh);
2452 libc_epfd = vls_attr (ep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
2453 if (PREDICT_FALSE (!libc_epfd))
2455 u32 size = sizeof (epfd);
2457 LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: "
2458 "EPOLL_CLOEXEC", epfd, ep_vlsh);
2459 libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC);
2466 rv = vls_attr (ep_vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &libc_epfd, &size);
2474 if (PREDICT_FALSE (libc_epfd <= 0))
2481 if (PREDICT_FALSE (!ldpw->mq_epfd_added))
2483 struct epoll_event e = { 0 };
2485 e.data.fd = ldpw->vcl_mq_epfd;
2486 if (libc_epoll_ctl (libc_epfd, EPOLL_CTL_ADD, ldpw->vcl_mq_epfd, &e) <
2489 LDBG (0, "epfd %d, add libc mq epoll fd %d to libc epoll fd %d",
2490 epfd, ldpw->vcl_mq_epfd, libc_epfd);
2494 ldpw->mq_epfd_added = 1;
2497 rv = vls_epoll_wait (ep_vlsh, events, maxevents, 0);
2507 rv = libc_epoll_pwait (libc_epfd, events, maxevents, timeout, sigmask);
2510 for (int i = 0; i < rv; i++)
2512 if (events[i].data.fd == ldpw->vcl_mq_epfd)
2514 /* We should remove mq epoll fd from events. */
2518 events[i].events = events[rv].events;
2519 events[i].data.u64 = events[rv].data.u64;
2521 num_ev = vls_epoll_wait (ep_vlsh, &events[rv], maxevents - rv, 0);
2522 if (PREDICT_TRUE (num_ev > 0))
2533 epoll_pwait (int epfd, struct epoll_event *events,
2534 int maxevents, int timeout, const sigset_t * sigmask)
2536 if (vls_use_eventfd ())
2537 return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout,
2540 return ldp_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
2544 epoll_wait (int epfd, struct epoll_event *events, int maxevents, int timeout)
2546 if (vls_use_eventfd ())
2547 return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout, NULL);
2549 return ldp_epoll_pwait (epfd, events, maxevents, timeout, NULL);
2553 poll (struct pollfd *fds, nfds_t nfds, int timeout)
2555 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
2556 int rv, i, n_revents = 0;
2561 LDBG (3, "fds %p, nfds %d, timeout %d", fds, nfds, timeout);
2563 if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
2564 clib_time_init (&ldpw->clib_time);
2566 max_time = (timeout >= 0) ? (f64) timeout / 1000 : 0;
2567 max_time += clib_time_now (&ldpw->clib_time);
2569 for (i = 0; i < nfds; i++)
2574 vlsh = ldp_fd_to_vlsh (fds[i].fd);
2575 if (vlsh != VLS_INVALID_HANDLE)
2577 fds[i].fd = -fds[i].fd;
2578 vec_add2 (ldpw->vcl_poll, vp, 1);
2580 vp->sh = vlsh_to_sh (vlsh);
2581 vp->events = fds[i].events;
2582 #ifdef __USE_XOPEN2K
2583 if (fds[i].events & POLLRDNORM)
2584 vp->events |= POLLIN;
2585 if (fds[i].events & POLLWRNORM)
2586 vp->events |= POLLOUT;
2588 vp->revents = fds[i].revents;
2592 vec_add1 (ldpw->libc_poll, fds[i]);
2593 vec_add1 (ldpw->libc_poll_idxs, i);
2599 if (vec_len (ldpw->vcl_poll))
2601 rv = vppcom_poll (ldpw->vcl_poll, vec_len (ldpw->vcl_poll), 0);
2612 if (vec_len (ldpw->libc_poll))
2614 rv = libc_poll (ldpw->libc_poll, vec_len (ldpw->libc_poll), 0);
2627 while ((timeout < 0) || (clib_time_now (&ldpw->clib_time) < max_time));
2631 vec_foreach (vp, ldpw->vcl_poll)
2633 fds[vp->fds_ndx].fd = -fds[vp->fds_ndx].fd;
2634 fds[vp->fds_ndx].revents = vp->revents;
2635 #ifdef __USE_XOPEN2K
2636 if ((fds[vp->fds_ndx].revents & POLLIN) &&
2637 (fds[vp->fds_ndx].events & POLLRDNORM))
2638 fds[vp->fds_ndx].revents |= POLLRDNORM;
2639 if ((fds[vp->fds_ndx].revents & POLLOUT) &&
2640 (fds[vp->fds_ndx].events & POLLWRNORM))
2641 fds[vp->fds_ndx].revents |= POLLWRNORM;
2644 vec_reset_length (ldpw->vcl_poll);
2646 for (i = 0; i < vec_len (ldpw->libc_poll); i++)
2648 fds[ldpw->libc_poll_idxs[i]].revents = ldpw->libc_poll[i].revents;
2650 vec_reset_length (ldpw->libc_poll_idxs);
2651 vec_reset_length (ldpw->libc_poll);
2658 ppoll (struct pollfd *fds, nfds_t nfds,
2659 const struct timespec *timeout, const sigset_t * sigmask)
2661 if ((errno = -ldp_init ()))
2664 clib_warning ("LDP<%d>: LDP-TBD", getpid ());
2672 void CONSTRUCTOR_ATTRIBUTE ldp_constructor (void);
2674 void DESTRUCTOR_ATTRIBUTE ldp_destructor (void);
2677 * This function is called when the library is loaded
2680 ldp_constructor (void)
2682 swrap_constructor ();
2683 if (ldp_init () != 0)
2685 fprintf (stderr, "\nLDP<%d>: ERROR: ldp_constructor: failed!\n",
2689 else if (LDP_DEBUG > 0)
2690 clib_warning ("LDP<%d>: LDP constructor: done!\n", getpid ());
2694 * This function is called when the library is unloaded
2697 ldp_destructor (void)
2700 swrap_destructor ();
2705 /* Don't use clib_warning() here because that calls writev()
2706 * which will call ldp_init().
2709 fprintf (stderr, "%s:%d: LDP<%d>: LDP destructor: done!\n",
2710 __func__, __LINE__, getpid ());
2715 * fd.io coding-style-patch-verification: ON
2718 * eval: (c-set-style "gnu")