2 * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #ifdef HAVE_GNU_SOURCE
27 #include <sys/resource.h>
28 #include <netinet/tcp.h>
29 #include <netinet/udp.h>
31 #include <vcl/ldp_socket_wrapper.h>
35 #include <vcl/vcl_locked.h>
36 #include <vppinfra/time.h>
37 #include <vppinfra/bitmap.h>
38 #include <vppinfra/lock.h>
39 #include <vppinfra/pool.h>
40 #include <vppinfra/hash.h>
42 #define HAVE_CONSTRUCTOR_ATTRIBUTE
43 #ifdef HAVE_CONSTRUCTOR_ATTRIBUTE
44 #define CONSTRUCTOR_ATTRIBUTE \
45 __attribute__ ((constructor))
47 #define CONSTRUCTOR_ATTRIBUTE
48 #endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */
50 #define HAVE_DESTRUCTOR_ATTRIBUTE
51 #ifdef HAVE_DESTRUCTOR_ATTRIBUTE
52 #define DESTRUCTOR_ATTRIBUTE \
53 __attribute__ ((destructor))
55 #define DESTRUCTOR_ATTRIBUTE
58 #define LDP_MAX_NWORKERS 32
60 #ifdef HAVE_GNU_SOURCE
61 #define SOCKADDR_GET_SA(__addr) __addr.__sockaddr__;
63 #define SOCKADDR_GET_SA(__addr) _addr;
67 #define UDP_SEGMENT 103
70 #ifndef SO_ORIGINAL_DST
71 /* from <linux/netfilter_ipv4.h> */
72 #define SO_ORIGINAL_DST 80
74 typedef struct ldp_worker_ctx_
77 clib_time_t clib_time;
82 clib_bitmap_t *rd_bitmap;
83 clib_bitmap_t *wr_bitmap;
84 clib_bitmap_t *ex_bitmap;
85 clib_bitmap_t *si_rd_bitmap;
86 clib_bitmap_t *si_wr_bitmap;
87 clib_bitmap_t *si_ex_bitmap;
88 clib_bitmap_t *libc_rd_bitmap;
89 clib_bitmap_t *libc_wr_bitmap;
90 clib_bitmap_t *libc_ex_bitmap;
96 struct pollfd *libc_poll;
108 /* clib_bitmap_t, fd_mask and vcl_si_set are used interchangeably. Make sure
109 * they are the same size */
110 STATIC_ASSERT (sizeof (clib_bitmap_t) == sizeof (fd_mask),
111 "ldp bitmap size mismatch");
112 STATIC_ASSERT (sizeof (vcl_si_set) == sizeof (fd_mask),
113 "ldp bitmap size mismatch");
117 ldp_worker_ctx_t *workers;
119 char app_name[LDP_APP_NAME_MAX];
124 /** vcl needs next epoll_create to go to libc_epoll */
125 u8 vcl_needs_real_epoll;
128 * crypto state used only for testing
134 #define LDP_DEBUG ldp->debug
136 #define LDBG(_lvl, _fmt, _args...) \
137 if (ldp->debug > _lvl) \
139 int errno_saved = errno; \
140 fprintf (stderr, "ldp<%d>: " _fmt "\n", getpid(), ##_args); \
141 errno = errno_saved; \
144 static ldp_main_t ldp_main = {
145 .vlsh_bit_val = (1 << LDP_SID_BIT_MIN),
146 .vlsh_bit_mask = (1 << LDP_SID_BIT_MIN) - 1,
147 .debug = LDP_DEBUG_INIT,
148 .transparent_tls = 0,
152 static ldp_main_t *ldp = &ldp_main;
154 static inline ldp_worker_ctx_t *
155 ldp_worker_get_current (void)
157 return (ldp->workers + vppcom_worker_index ());
161 * RETURN: 0 on success or -1 on error.
164 ldp_set_app_name (char *app_name)
166 snprintf (ldp->app_name, LDP_APP_NAME_MAX, "%s-ldp-%d", app_name, getpid ());
172 if (ldp->app_name[0] == '\0')
173 ldp_set_app_name (program_invocation_short_name);
175 return ldp->app_name;
179 ldp_vlsh_to_fd (vls_handle_t vlsh)
181 return (vlsh + ldp->vlsh_bit_val);
184 static inline vls_handle_t
185 ldp_fd_to_vlsh (int fd)
187 if (fd < ldp->vlsh_bit_val)
188 return VLS_INVALID_HANDLE;
190 return (fd - ldp->vlsh_bit_val);
194 ldp_alloc_workers (void)
198 ldp->workers = vec_new (ldp_worker_ctx_t, LDP_MAX_NWORKERS);
204 char *env_var_str = getenv (LDP_ENV_DEBUG);
208 if (sscanf (env_var_str, "%u", &tmp) != 1)
209 clib_warning ("LDP<%d>: WARNING: Invalid LDP debug level specified in"
210 " the env var " LDP_ENV_DEBUG " (%s)!", getpid (),
215 LDBG (0, "configured LDP debug level (%u) from env var "
216 LDP_ENV_DEBUG "!", ldp->debug);
220 env_var_str = getenv (LDP_ENV_APP_NAME);
223 ldp_set_app_name (env_var_str);
224 LDBG (0, "configured LDP app name (%s) from the env var "
225 LDP_ENV_APP_NAME "!", ldp->app_name);
228 env_var_str = getenv (LDP_ENV_SID_BIT);
232 if (sscanf (env_var_str, "%u", &sb) != 1)
234 LDBG (0, "WARNING: Invalid LDP sid bit specified in the env var "
235 LDP_ENV_SID_BIT " (%s)! sid bit value %d (0x%x)", env_var_str,
236 ldp->vlsh_bit_val, ldp->vlsh_bit_val);
238 else if (sb < LDP_SID_BIT_MIN)
240 ldp->vlsh_bit_val = (1 << LDP_SID_BIT_MIN);
241 ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1;
243 LDBG (0, "WARNING: LDP sid bit (%u) specified in the env var "
244 LDP_ENV_SID_BIT " (%s) is too small. Using LDP_SID_BIT_MIN"
245 " (%d)! sid bit value %d (0x%x)", sb, env_var_str,
246 LDP_SID_BIT_MIN, ldp->vlsh_bit_val, ldp->vlsh_bit_val);
248 else if (sb > LDP_SID_BIT_MAX)
250 ldp->vlsh_bit_val = (1 << LDP_SID_BIT_MAX);
251 ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1;
253 LDBG (0, "WARNING: LDP sid bit (%u) specified in the env var "
254 LDP_ENV_SID_BIT " (%s) is too big. Using LDP_SID_BIT_MAX"
255 " (%d)! sid bit value %d (0x%x)", sb, env_var_str,
256 LDP_SID_BIT_MAX, ldp->vlsh_bit_val, ldp->vlsh_bit_val);
260 ldp->vlsh_bit_val = (1 << sb);
261 ldp->vlsh_bit_mask = ldp->vlsh_bit_val - 1;
263 LDBG (0, "configured LDP sid bit (%u) from "
264 LDP_ENV_SID_BIT "! sid bit value %d (0x%x)", sb,
265 ldp->vlsh_bit_val, ldp->vlsh_bit_val);
268 /* Make sure there are enough bits in the fd set for vcl sessions */
269 if (ldp->vlsh_bit_val > FD_SETSIZE / 2)
271 /* Only valid for select/pselect, so just WARNING and not exit */
273 "WARNING: LDP vlsh bit value %d > FD_SETSIZE/2 %d, "
274 "select/pselect not supported now!",
275 ldp->vlsh_bit_val, FD_SETSIZE / 2);
278 env_var_str = getenv (LDP_ENV_TLS_TRANS);
281 ldp->transparent_tls = 1;
288 ldp_worker_ctx_t *ldpw;
293 LDBG (0, "LDP is initialized already");
299 ldp->vcl_needs_real_epoll = 1;
300 rv = vls_app_create (ldp_get_app_name ());
303 ldp->vcl_needs_real_epoll = 0;
304 if (rv == VPPCOM_EEXIST)
307 "\nERROR: ldp_init: vppcom_app_create()"
308 " failed! rv = %d (%s)\n",
309 rv, vppcom_retval_str (rv));
313 ldp->vcl_needs_real_epoll = 0;
314 ldp_alloc_workers ();
316 vec_foreach (ldpw, ldp->workers)
317 clib_memset (&ldpw->clib_time, 0, sizeof (ldpw->clib_time));
319 LDBG (0, "LDP initialization: done!");
324 #define ldp_init_check() \
325 if (PREDICT_FALSE (!ldp->init)) \
327 if ((errno = -ldp_init ())) \
339 vlsh = ldp_fd_to_vlsh (fd);
340 if (vlsh != VLS_INVALID_HANDLE)
342 epfd = vls_attr (vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
345 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
346 u32 size = sizeof (epfd);
348 LDBG (0, "fd %d: calling libc_close: epfd %u", fd, epfd);
351 ldpw->mq_epfd_added = 0;
354 (void) vls_attr (vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &epfd, &size);
356 else if (PREDICT_FALSE (epfd < 0))
363 LDBG (0, "fd %d: calling vls_close: vlsh %u", fd, vlsh);
365 rv = vls_close (vlsh);
374 LDBG (0, "fd %d: calling libc_close", fd);
375 rv = libc_close (fd);
383 read (int fd, void *buf, size_t nbytes)
390 vlsh = ldp_fd_to_vlsh (fd);
391 if (vlsh != VLS_INVALID_HANDLE)
393 size = vls_read (vlsh, buf, nbytes);
402 size = libc_read (fd, buf, nbytes);
409 readv (int fd, const struct iovec * iov, int iovcnt)
411 int rv = 0, i, total = 0;
417 vlsh = ldp_fd_to_vlsh (fd);
418 if (vlsh != VLS_INVALID_HANDLE)
420 for (i = 0; i < iovcnt; ++i)
422 rv = vls_read (vlsh, iov[i].iov_base, iov[i].iov_len);
428 if (rv < iov[i].iov_len)
432 if (rv < 0 && total == 0)
442 size = libc_readv (fd, iov, iovcnt);
449 write (int fd, const void *buf, size_t nbytes)
456 vlsh = ldp_fd_to_vlsh (fd);
457 if (vlsh != VLS_INVALID_HANDLE)
459 size = vls_write_msg (vlsh, (void *) buf, nbytes);
468 size = libc_write (fd, buf, nbytes);
475 writev (int fd, const struct iovec * iov, int iovcnt)
477 ssize_t size = 0, total = 0;
483 vlsh = ldp_fd_to_vlsh (fd);
484 if (vlsh != VLS_INVALID_HANDLE)
486 for (i = 0; i < iovcnt; ++i)
488 rv = vls_write_msg (vlsh, iov[i].iov_base, iov[i].iov_len);
494 if (rv < iov[i].iov_len)
499 if (rv < 0 && total == 0)
509 size = libc_writev (fd, iov, iovcnt);
516 fcntl_internal (int fd, int cmd, va_list ap)
521 vlsh = ldp_fd_to_vlsh (fd);
522 LDBG (0, "fd %u vlsh %d, cmd %u", fd, vlsh, cmd);
523 if (vlsh != VLS_INVALID_HANDLE)
525 int flags = va_arg (ap, int);
528 size = sizeof (flags);
533 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size);
537 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &size);
542 /* TODO handle this */
543 LDBG (0, "F_SETFD ignored flags %u", flags);
559 rv = libc_vfcntl64 (fd, cmd, ap);
561 rv = libc_vfcntl (fd, cmd, ap);
569 fcntl (int fd, int cmd, ...)
577 rv = fcntl_internal (fd, cmd, ap);
584 fcntl64 (int fd, int cmd, ...)
592 rv = fcntl_internal (fd, cmd, ap);
598 ioctl (int fd, unsigned long int cmd, ...)
608 vlsh = ldp_fd_to_vlsh (fd);
609 if (vlsh != VLS_INVALID_HANDLE)
614 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0);
618 u32 *buf = va_arg (ap, void *);
619 u32 *buflen = va_arg (ap, u32 *);
620 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NWRITEQ, buf, buflen);
625 u32 flags = *(va_arg (ap, int *)) ? O_NONBLOCK : 0;
626 u32 size = sizeof (flags);
628 /* TBD: When VPPCOM_ATTR_[GS]ET_FLAGS supports flags other than
629 * non-blocking, the flags should be read here and merged
632 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_FLAGS, &flags, &size);
648 rv = libc_vioctl (fd, cmd, ap);
656 ldp_select_init_maps (fd_set * __restrict original,
657 clib_bitmap_t ** resultb, clib_bitmap_t ** libcb,
658 clib_bitmap_t ** vclb, int nfds, u32 minbits,
659 u32 n_bytes, uword * si_bits, uword * libc_bits)
661 uword si_bits_set, libc_bits_set;
665 clib_bitmap_validate (*vclb, minbits);
666 clib_bitmap_validate (*libcb, minbits);
667 clib_bitmap_validate (*resultb, minbits);
668 clib_memcpy_fast (*resultb, original, n_bytes);
669 memset (original, 0, n_bytes);
671 clib_bitmap_foreach (fd, *resultb) {
674 vlsh = ldp_fd_to_vlsh (fd);
675 if (vlsh == VLS_INVALID_HANDLE)
676 clib_bitmap_set_no_check (*libcb, fd, 1);
678 *vclb = clib_bitmap_set (*vclb, vlsh_to_session_index (vlsh), 1);
681 si_bits_set = clib_bitmap_last_set (*vclb) + 1;
682 *si_bits = (si_bits_set > *si_bits) ? si_bits_set : *si_bits;
683 clib_bitmap_validate (*resultb, *si_bits);
685 libc_bits_set = clib_bitmap_last_set (*libcb) + 1;
686 *libc_bits = (libc_bits_set > *libc_bits) ? libc_bits_set : *libc_bits;
690 ldp_select_vcl_map_to_libc (clib_bitmap_t * vclb, fd_set * __restrict libcb)
699 clib_bitmap_foreach (si, vclb) {
700 vlsh = vls_session_index_to_vlsh (si);
701 ASSERT (vlsh != VLS_INVALID_HANDLE);
702 fd = ldp_vlsh_to_fd (vlsh);
703 if (PREDICT_FALSE (fd < 0))
715 ldp_select_libc_map_merge (clib_bitmap_t * result, fd_set * __restrict libcb)
722 clib_bitmap_foreach (fd, result)
723 FD_SET ((int)fd, libcb);
727 ldp_pselect (int nfds, fd_set * __restrict readfds,
728 fd_set * __restrict writefds,
729 fd_set * __restrict exceptfds,
730 const struct timespec *__restrict timeout,
731 const __sigset_t * __restrict sigmask)
733 u32 minbits = clib_max (nfds, BITS (uword)), n_bytes;
734 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
735 struct timespec libc_tspec = { 0 };
736 f64 time_out, vcl_timeout = 0;
737 uword si_bits, libc_bits;
738 int rv, bits_set = 0;
746 if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
747 clib_time_init (&ldpw->clib_time);
751 time_out = (timeout->tv_sec == 0 && timeout->tv_nsec == 0) ?
752 (f64) 0 : (f64) timeout->tv_sec + (f64) timeout->tv_nsec / (f64) 1e9;
754 time_out += clib_time_now (&ldpw->clib_time);
756 /* select as fine grained sleep */
759 while (clib_time_now (&ldpw->clib_time) < time_out)
772 if (nfds <= ldp->vlsh_bit_val)
774 rv = libc_pselect (nfds, readfds, writefds, exceptfds,
779 si_bits = libc_bits = 0;
780 n_bytes = nfds / 8 + ((nfds % 8) ? 1 : 0);
783 ldp_select_init_maps (readfds, &ldpw->rd_bitmap, &ldpw->libc_rd_bitmap,
784 &ldpw->si_rd_bitmap, nfds, minbits, n_bytes,
785 &si_bits, &libc_bits);
787 ldp_select_init_maps (writefds, &ldpw->wr_bitmap,
788 &ldpw->libc_wr_bitmap, &ldpw->si_wr_bitmap, nfds,
789 minbits, n_bytes, &si_bits, &libc_bits);
791 ldp_select_init_maps (exceptfds, &ldpw->ex_bitmap,
792 &ldpw->libc_ex_bitmap, &ldpw->si_ex_bitmap, nfds,
793 minbits, n_bytes, &si_bits, &libc_bits);
795 if (PREDICT_FALSE (!si_bits && !libc_bits))
803 libc_tspec = timeout ? *timeout : libc_tspec;
810 clib_memcpy_fast (ldpw->rd_bitmap, ldpw->si_rd_bitmap,
811 vec_len (ldpw->si_rd_bitmap) *
812 sizeof (clib_bitmap_t));
814 clib_memcpy_fast (ldpw->wr_bitmap, ldpw->si_wr_bitmap,
815 vec_len (ldpw->si_wr_bitmap) *
816 sizeof (clib_bitmap_t));
818 clib_memcpy_fast (ldpw->ex_bitmap, ldpw->si_ex_bitmap,
819 vec_len (ldpw->si_ex_bitmap) *
820 sizeof (clib_bitmap_t));
822 rv = vls_select (si_bits, readfds ? ldpw->rd_bitmap : NULL,
823 writefds ? ldpw->wr_bitmap : NULL,
824 exceptfds ? ldpw->ex_bitmap : NULL, vcl_timeout);
833 if (ldp_select_vcl_map_to_libc (ldpw->rd_bitmap, readfds))
839 if (ldp_select_vcl_map_to_libc (ldpw->wr_bitmap, writefds))
845 if (ldp_select_vcl_map_to_libc (ldpw->ex_bitmap, exceptfds))
856 clib_memcpy_fast (ldpw->rd_bitmap, ldpw->libc_rd_bitmap,
857 vec_len (ldpw->libc_rd_bitmap) *
858 sizeof (clib_bitmap_t));
860 clib_memcpy_fast (ldpw->wr_bitmap, ldpw->libc_wr_bitmap,
861 vec_len (ldpw->libc_wr_bitmap) *
862 sizeof (clib_bitmap_t));
864 clib_memcpy_fast (ldpw->ex_bitmap, ldpw->libc_ex_bitmap,
865 vec_len (ldpw->libc_ex_bitmap) *
866 sizeof (clib_bitmap_t));
868 rv = libc_pselect (libc_bits,
869 readfds ? (fd_set *) ldpw->rd_bitmap : NULL,
870 writefds ? (fd_set *) ldpw->wr_bitmap : NULL,
871 exceptfds ? (fd_set *) ldpw->ex_bitmap : NULL,
872 &libc_tspec, sigmask);
875 ldp_select_libc_map_merge (ldpw->rd_bitmap, readfds);
876 ldp_select_libc_map_merge (ldpw->wr_bitmap, writefds);
877 ldp_select_libc_map_merge (ldpw->ex_bitmap, exceptfds);
888 while ((time_out == -1) || (clib_time_now (&ldpw->clib_time) < time_out));
892 /* TBD: set timeout to amount of time left */
893 clib_bitmap_zero (ldpw->rd_bitmap);
894 clib_bitmap_zero (ldpw->si_rd_bitmap);
895 clib_bitmap_zero (ldpw->libc_rd_bitmap);
896 clib_bitmap_zero (ldpw->wr_bitmap);
897 clib_bitmap_zero (ldpw->si_wr_bitmap);
898 clib_bitmap_zero (ldpw->libc_wr_bitmap);
899 clib_bitmap_zero (ldpw->ex_bitmap);
900 clib_bitmap_zero (ldpw->si_ex_bitmap);
901 clib_bitmap_zero (ldpw->libc_ex_bitmap);
907 select (int nfds, fd_set * __restrict readfds,
908 fd_set * __restrict writefds,
909 fd_set * __restrict exceptfds, struct timeval *__restrict timeout)
911 struct timespec tspec;
915 tspec.tv_sec = timeout->tv_sec;
916 tspec.tv_nsec = timeout->tv_usec * 1000;
918 return ldp_pselect (nfds, readfds, writefds, exceptfds,
919 timeout ? &tspec : NULL, NULL);
924 pselect (int nfds, fd_set * __restrict readfds,
925 fd_set * __restrict writefds,
926 fd_set * __restrict exceptfds,
927 const struct timespec *__restrict timeout,
928 const __sigset_t * __restrict sigmask)
930 return ldp_pselect (nfds, readfds, writefds, exceptfds, timeout, 0);
934 /* If transparent TLS mode is turned on, then ldp will load key and cert.
937 load_cert_key_pair (void)
939 char *cert_str = getenv (LDP_ENV_TLS_CERT);
940 char *key_str = getenv (LDP_ENV_TLS_KEY);
941 char cert_buf[4096], key_buf[4096];
942 int cert_size, key_size;
943 vppcom_cert_key_pair_t crypto;
947 if (!cert_str || !key_str)
949 LDBG (0, "ERROR: failed to read LDP environment %s\n",
954 fp = fopen (cert_str, "r");
957 LDBG (0, "ERROR: failed to open cert file %s \n", cert_str);
960 cert_size = fread (cert_buf, sizeof (char), sizeof (cert_buf), fp);
963 fp = fopen (key_str, "r");
966 LDBG (0, "ERROR: failed to open key file %s \n", key_str);
969 key_size = fread (key_buf, sizeof (char), sizeof (key_buf), fp);
972 crypto.cert = cert_buf;
973 crypto.key = key_buf;
974 crypto.cert_len = cert_size;
975 crypto.key_len = key_size;
976 ckp_index = vppcom_add_cert_key_pair (&crypto);
979 LDBG (0, "ERROR: failed to add cert key pair\n");
983 ldp->ckpair_index = ckp_index;
989 assign_cert_key_pair (vls_handle_t vlsh)
993 if (ldp->ckpair_index == ~0 && load_cert_key_pair () < 0)
996 ckp_len = sizeof (ldp->ckpair_index);
997 return vls_attr (vlsh, VPPCOM_ATTR_SET_CKPAIR, &ldp->ckpair_index, &ckp_len);
1001 socket (int domain, int type, int protocol)
1003 int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
1004 u8 is_nonblocking = type & SOCK_NONBLOCK ? 1 : 0;
1009 if (((domain == AF_INET) || (domain == AF_INET6)) &&
1010 ((sock_type == SOCK_STREAM) || (sock_type == SOCK_DGRAM)))
1013 if (ldp->transparent_tls)
1015 proto = VPPCOM_PROTO_TLS;
1018 proto = ((sock_type == SOCK_DGRAM) ?
1019 VPPCOM_PROTO_UDP : VPPCOM_PROTO_TCP);
1021 LDBG (0, "calling vls_create: proto %u (%s), is_nonblocking %u",
1022 proto, vppcom_proto_str (proto), is_nonblocking);
1024 vlsh = vls_create (proto, is_nonblocking);
1032 if (ldp->transparent_tls)
1034 if (assign_cert_key_pair (vlsh) < 0)
1037 rv = ldp_vlsh_to_fd (vlsh);
1042 LDBG (0, "calling libc_socket");
1043 rv = libc_socket (domain, type, protocol);
1050 * Create two new sockets, of type TYPE in domain DOMAIN and using
1051 * protocol PROTOCOL, which are connected to each other, and put file
1052 * descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero,
1053 * one will be chosen automatically.
1054 * Returns 0 on success, -1 for errors.
1057 socketpair (int domain, int type, int protocol, int fds[2])
1059 int rv, sock_type = type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK);
1063 if (((domain == AF_INET) || (domain == AF_INET6)) &&
1064 ((sock_type == SOCK_STREAM) || (sock_type == SOCK_DGRAM)))
1066 LDBG (0, "LDP-TBD");
1072 LDBG (1, "calling libc_socketpair");
1073 rv = libc_socketpair (domain, type, protocol, fds);
1080 bind (int fd, __CONST_SOCKADDR_ARG _addr, socklen_t len)
1082 const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
1088 vlsh = ldp_fd_to_vlsh (fd);
1089 if (vlsh != VLS_INVALID_HANDLE)
1093 switch (addr->sa_family)
1096 if (len != sizeof (struct sockaddr_in))
1098 LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET addr len %u!",
1104 ep.is_ip4 = VPPCOM_IS_IP4;
1105 ep.ip = (u8 *) & ((const struct sockaddr_in *) addr)->sin_addr;
1106 ep.port = (u16) ((const struct sockaddr_in *) addr)->sin_port;
1110 if (len != sizeof (struct sockaddr_in6))
1112 LDBG (0, "ERROR: fd %d: vlsh %u: Invalid AF_INET6 addr len %u!",
1118 ep.is_ip4 = VPPCOM_IS_IP6;
1119 ep.ip = (u8 *) & ((const struct sockaddr_in6 *) addr)->sin6_addr;
1120 ep.port = (u16) ((const struct sockaddr_in6 *) addr)->sin6_port;
1124 LDBG (0, "ERROR: fd %d: vlsh %u: Unsupported address family %u!",
1125 fd, vlsh, addr->sa_family);
1126 errno = EAFNOSUPPORT;
1130 LDBG (0, "fd %d: calling vls_bind: vlsh %u, addr %p, len %u", fd, vlsh,
1133 rv = vls_bind (vlsh, &ep);
1134 if (rv != VPPCOM_OK)
1142 LDBG (0, "fd %d: calling libc_bind: addr %p, len %u", fd, addr, len);
1143 rv = libc_bind (fd, addr, len);
1147 LDBG (1, "fd %d: returning %d", fd, rv);
1153 ldp_copy_ep_to_sockaddr (struct sockaddr *addr, socklen_t *__restrict len,
1156 int rv = 0, sa_len, copy_len;
1160 if (addr && len && ep)
1162 addr->sa_family = (ep->is_ip4 == VPPCOM_IS_IP4) ? AF_INET : AF_INET6;
1163 switch (addr->sa_family)
1166 ((struct sockaddr_in *) addr)->sin_port = ep->port;
1167 if (*len > sizeof (struct sockaddr_in))
1168 *len = sizeof (struct sockaddr_in);
1169 sa_len = sizeof (struct sockaddr_in) - sizeof (struct in_addr);
1170 copy_len = *len - sa_len;
1172 memcpy (&((struct sockaddr_in *) addr)->sin_addr, ep->ip,
1177 ((struct sockaddr_in6 *) addr)->sin6_port = ep->port;
1178 if (*len > sizeof (struct sockaddr_in6))
1179 *len = sizeof (struct sockaddr_in6);
1180 sa_len = sizeof (struct sockaddr_in6) - sizeof (struct in6_addr);
1181 copy_len = *len - sa_len;
1183 memcpy (((struct sockaddr_in6 *) addr)->sin6_addr.
1184 __in6_u.__u6_addr8, ep->ip, copy_len);
1197 getsockname (int fd, __SOCKADDR_ARG _addr, socklen_t *__restrict len)
1199 struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
1205 vlsh = ldp_fd_to_vlsh (fd);
1206 if (vlsh != VLS_INVALID_HANDLE)
1209 u8 addr_buf[sizeof (struct in6_addr)];
1210 u32 size = sizeof (ep);
1214 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size);
1215 if (rv != VPPCOM_OK)
1222 rv = ldp_copy_ep_to_sockaddr (addr, len, &ep);
1223 if (rv != VPPCOM_OK)
1232 rv = libc_getsockname (fd, _addr, len);
1239 connect (int fd, __CONST_SOCKADDR_ARG _addr, socklen_t len)
1241 const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
1249 LDBG (0, "ERROR: fd %d: NULL addr, len %u", fd, len);
1255 vlsh = ldp_fd_to_vlsh (fd);
1256 if (vlsh != VLS_INVALID_HANDLE)
1260 switch (addr->sa_family)
1263 if (len != sizeof (struct sockaddr_in))
1265 LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET addr len %u!",
1271 ep.is_ip4 = VPPCOM_IS_IP4;
1272 ep.ip = (u8 *) & ((const struct sockaddr_in *) addr)->sin_addr;
1273 ep.port = (u16) ((const struct sockaddr_in *) addr)->sin_port;
1277 if (len != sizeof (struct sockaddr_in6))
1279 LDBG (0, "fd %d: ERROR vlsh %u: Invalid AF_INET6 addr len %u!",
1285 ep.is_ip4 = VPPCOM_IS_IP6;
1286 ep.ip = (u8 *) & ((const struct sockaddr_in6 *) addr)->sin6_addr;
1287 ep.port = (u16) ((const struct sockaddr_in6 *) addr)->sin6_port;
1291 LDBG (0, "fd %d: ERROR vlsh %u: Unsupported address family %u!",
1292 fd, vlsh, addr->sa_family);
1293 errno = EAFNOSUPPORT;
1297 LDBG (0, "fd %d: calling vls_connect(): vlsh %u addr %p len %u", fd,
1300 rv = vls_connect (vlsh, &ep);
1301 if (rv != VPPCOM_OK)
1309 LDBG (0, "fd %d: calling libc_connect(): addr %p, len %u",
1312 rv = libc_connect (fd, addr, len);
1316 LDBG (1, "fd %d: returning %d (0x%x)", fd, rv, rv);
1321 getpeername (int fd, __SOCKADDR_ARG _addr, socklen_t *__restrict len)
1323 struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
1329 vlsh = ldp_fd_to_vlsh (fd);
1330 if (vlsh != VLS_INVALID_HANDLE)
1333 u8 addr_buf[sizeof (struct in6_addr)];
1334 u32 size = sizeof (ep);
1337 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PEER_ADDR, &ep, &size);
1338 if (rv != VPPCOM_OK)
1345 rv = ldp_copy_ep_to_sockaddr (addr, len, &ep);
1346 if (rv != VPPCOM_OK)
1355 rv = libc_getpeername (fd, addr, len);
1362 send (int fd, const void *buf, size_t n, int flags)
1364 vls_handle_t vlsh = ldp_fd_to_vlsh (fd);
1369 if (vlsh != VLS_INVALID_HANDLE)
1371 size = vls_sendto (vlsh, (void *) buf, n, flags, NULL);
1372 if (size < VPPCOM_OK)
1380 size = libc_send (fd, buf, n, flags);
1387 sendfile (int out_fd, int in_fd, off_t * offset, size_t len)
1389 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
1395 vlsh = ldp_fd_to_vlsh (out_fd);
1396 if (vlsh != VLS_INVALID_HANDLE)
1399 ssize_t results = 0;
1400 size_t n_bytes_left = len;
1401 size_t bytes_to_read;
1404 u32 flags, flags_len = sizeof (flags);
1406 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_FLAGS, &flags, &flags_len);
1407 if (PREDICT_FALSE (rv != VPPCOM_OK))
1409 LDBG (0, "ERROR: out fd %d: vls_attr: vlsh %u, returned %d (%s)!",
1410 out_fd, vlsh, rv, vppcom_retval_str (rv));
1412 vec_reset_length (ldpw->io_buffer);
1420 off_t off = lseek (in_fd, *offset, SEEK_SET);
1421 if (PREDICT_FALSE (off == -1))
1427 ASSERT (off == *offset);
1432 size = vls_attr (vlsh, VPPCOM_ATTR_GET_NWRITE, 0, 0);
1435 LDBG (0, "ERROR: fd %d: vls_attr: vlsh %u returned %ld (%s)!",
1436 out_fd, vlsh, size, vppcom_retval_str (size));
1437 vec_reset_length (ldpw->io_buffer);
1443 bytes_to_read = size;
1444 if (bytes_to_read == 0)
1446 if (flags & O_NONBLOCK)
1455 bytes_to_read = clib_min (n_bytes_left, bytes_to_read);
1456 vec_validate (ldpw->io_buffer, bytes_to_read);
1457 nbytes = libc_read (in_fd, ldpw->io_buffer, bytes_to_read);
1462 vec_reset_length (ldpw->io_buffer);
1469 size = vls_write (vlsh, ldpw->io_buffer, nbytes);
1472 if (size == VPPCOM_EAGAIN)
1474 if (flags & O_NONBLOCK)
1485 vec_reset_length (ldpw->io_buffer);
1494 ASSERT (n_bytes_left >= nbytes);
1495 n_bytes_left = n_bytes_left - nbytes;
1497 while (n_bytes_left > 0);
1500 vec_reset_length (ldpw->io_buffer);
1503 off_t off = lseek (in_fd, *offset, SEEK_SET);
1504 if (PREDICT_FALSE (off == -1))
1510 ASSERT (off == *offset);
1511 *offset += results + 1;
1523 size = libc_sendfile (out_fd, in_fd, offset, len);
1531 sendfile64 (int out_fd, int in_fd, off_t * offset, size_t len)
1533 return sendfile (out_fd, in_fd, offset, len);
1537 recv (int fd, void *buf, size_t n, int flags)
1544 vlsh = ldp_fd_to_vlsh (fd);
1545 if (vlsh != VLS_INVALID_HANDLE)
1547 size = vls_recvfrom (vlsh, buf, n, flags, NULL);
1556 size = libc_recv (fd, buf, n, flags);
1563 __recv_chk (int fd, void *buf, size_t n, size_t buflen, int flags)
1568 return recv (fd, buf, n, flags);
1572 ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n,
1573 vppcom_endpt_tlv_t *app_tlvs, int flags,
1574 __CONST_SOCKADDR_ARG _addr, socklen_t addr_len)
1576 const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
1577 vppcom_endpt_t *ep = 0;
1580 _ep.app_tlvs = app_tlvs;
1585 switch (addr->sa_family)
1588 ep->is_ip4 = VPPCOM_IS_IP4;
1590 (uint8_t *) & ((const struct sockaddr_in *) addr)->sin_addr;
1591 ep->port = (uint16_t) ((const struct sockaddr_in *) addr)->sin_port;
1595 ep->is_ip4 = VPPCOM_IS_IP6;
1597 (uint8_t *) & ((const struct sockaddr_in6 *) addr)->sin6_addr;
1599 (uint16_t) ((const struct sockaddr_in6 *) addr)->sin6_port;
1603 return EAFNOSUPPORT;
1607 return vls_sendto (vlsh, (void *) buf, n, flags, ep);
1611 ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n, int flags,
1612 __SOCKADDR_ARG _addr, socklen_t *__restrict addr_len)
1614 u8 src_addr[sizeof (struct sockaddr_in6)];
1615 struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
1623 size = vls_recvfrom (vlsh, buf, n, flags, &ep);
1627 rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep);
1633 size = vls_recvfrom (vlsh, buf, n, flags, NULL);
1639 sendto (int fd, const void *buf, size_t n, int flags,
1640 __CONST_SOCKADDR_ARG _addr, socklen_t addr_len)
1642 const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
1648 vlsh = ldp_fd_to_vlsh (fd);
1649 if (vlsh != VLS_INVALID_HANDLE)
1651 size = ldp_vls_sendo (vlsh, buf, n, NULL, flags, addr, addr_len);
1660 size = libc_sendto (fd, buf, n, flags, addr, addr_len);
1667 recvfrom (int fd, void *__restrict buf, size_t n, int flags,
1668 __SOCKADDR_ARG addr, socklen_t * __restrict addr_len)
1675 vlsh = ldp_fd_to_vlsh (fd);
1676 if (vlsh != VLS_INVALID_HANDLE)
1678 size = ldp_vls_recvfrom (vlsh, buf, n, flags, addr, addr_len);
1687 size = libc_recvfrom (fd, buf, n, flags, addr, addr_len);
1694 ldp_parse_cmsg (vls_handle_t vlsh, const struct msghdr *msg,
1695 vppcom_endpt_tlv_t **app_tlvs)
1697 uint8_t *ad, *at = (uint8_t *) *app_tlvs;
1698 vppcom_endpt_tlv_t *adh;
1699 struct in_pktinfo *pi;
1700 struct cmsghdr *cmsg;
1702 cmsg = CMSG_FIRSTHDR (msg);
1704 while (cmsg != NULL)
1706 switch (cmsg->cmsg_level)
1709 switch (cmsg->cmsg_type)
1712 vec_add2 (at, adh, sizeof (*adh));
1713 adh->data_type = VCL_UDP_SEGMENT;
1714 adh->data_len = sizeof (uint16_t);
1715 vec_add2 (at, ad, sizeof (uint16_t));
1716 *(uint16_t *) ad = *(uint16_t *) CMSG_DATA (cmsg);
1719 LDBG (1, "SOL_UDP cmsg_type %u not supported", cmsg->cmsg_type);
1724 switch (cmsg->cmsg_type)
1727 vec_add2 (at, adh, sizeof (*adh));
1728 adh->data_type = VCL_IP_PKTINFO;
1729 adh->data_len = sizeof (struct in_addr);
1730 vec_add2 (at, ad, sizeof (struct in_addr));
1731 pi = (void *) CMSG_DATA (cmsg);
1732 clib_memcpy_fast (ad, &pi->ipi_spec_dst,
1733 sizeof (struct in_addr));
1736 LDBG (1, "SOL_IP cmsg_type %u not supported", cmsg->cmsg_type);
1741 LDBG (1, "cmsg_level %u not supported", cmsg->cmsg_level);
1744 cmsg = CMSG_NXTHDR ((struct msghdr *) msg, cmsg);
1746 *app_tlvs = (vppcom_endpt_tlv_t *) at;
1751 ldp_make_cmsg (vls_handle_t vlsh, struct msghdr *msg)
1753 u32 optval, optlen = sizeof (optval);
1754 struct cmsghdr *cmsg;
1756 cmsg = CMSG_FIRSTHDR (msg);
1757 memset (cmsg, 0, sizeof (*cmsg));
1759 if (!vls_attr (vlsh, VPPCOM_ATTR_GET_IP_PKTINFO, (void *) &optval, &optlen))
1765 u8 addr_buf[sizeof (struct in_addr)];
1766 u32 size = sizeof (ep);
1770 if (!vls_attr (vlsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size))
1772 struct in_pktinfo pi = {};
1774 clib_memcpy (&pi.ipi_addr, ep.ip, sizeof (struct in_addr));
1775 cmsg->cmsg_level = SOL_IP;
1776 cmsg->cmsg_type = IP_PKTINFO;
1777 cmsg->cmsg_len = CMSG_LEN (sizeof (pi));
1778 clib_memcpy (CMSG_DATA (cmsg), &pi, sizeof (pi));
1786 sendmsg (int fd, const struct msghdr * msg, int flags)
1793 vlsh = ldp_fd_to_vlsh (fd);
1794 if (vlsh != VLS_INVALID_HANDLE)
1796 vppcom_endpt_tlv_t *app_tlvs = 0;
1797 struct iovec *iov = msg->msg_iov;
1801 ldp_parse_cmsg (vlsh, msg, &app_tlvs);
1803 for (i = 0; i < msg->msg_iovlen; ++i)
1805 rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, app_tlvs,
1806 flags, msg->msg_name, msg->msg_namelen);
1812 if (rv < iov[i].iov_len)
1817 vec_free (app_tlvs);
1819 if (rv < 0 && total == 0)
1829 size = libc_sendmsg (fd, msg, flags);
1837 sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
1840 const char *func_str;
1841 u32 sh = ldp_fd_to_vlsh (fd);
1845 if (sh != VLS_INVALID_HANDLE)
1847 clib_warning ("LDP<%d>: LDP-TBD", getpid ());
1853 func_str = "libc_sendmmsg";
1856 clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): "
1857 "vmessages %p, vlen %u, flags 0x%x",
1858 getpid (), fd, fd, func_str, vmessages, vlen, flags);
1860 size = libc_sendmmsg (fd, vmessages, vlen, flags);
1867 int errno_val = errno;
1868 clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! "
1869 "rv %d, errno = %d", getpid (), fd, fd,
1870 func_str, size, errno_val);
1874 clib_warning ("LDP<%d>: fd %d (0x%x): returning %d (0x%x)",
1875 getpid (), fd, fd, size, size);
1882 recvmsg (int fd, struct msghdr * msg, int flags)
1889 vlsh = ldp_fd_to_vlsh (fd);
1890 if (vlsh != VLS_INVALID_HANDLE)
1892 struct iovec *iov = msg->msg_iov;
1893 ssize_t max_deq, total = 0;
1896 max_deq = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0);
1900 for (i = 0; i < msg->msg_iovlen; i++)
1902 rv = ldp_vls_recvfrom (vlsh, iov[i].iov_base, iov[i].iov_len, flags,
1903 (i == 0 ? msg->msg_name : NULL),
1904 (i == 0 ? &msg->msg_namelen : NULL));
1910 if (rv < iov[i].iov_len)
1913 if (total >= max_deq)
1917 if (rv < 0 && total == 0)
1924 if (msg->msg_controllen)
1925 ldp_make_cmsg (vlsh, msg);
1931 size = libc_recvmsg (fd, msg, flags);
1939 recvmmsg (int fd, struct mmsghdr *vmessages,
1940 unsigned int vlen, int flags, struct timespec *tmo)
1942 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
1947 sh = ldp_fd_to_vlsh (fd);
1949 if (sh != VLS_INVALID_HANDLE)
1956 if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
1957 clib_time_init (&ldpw->clib_time);
1960 time_out = (f64) tmo->tv_sec + (f64) tmo->tv_nsec / (f64) 1e9;
1961 time_out += clib_time_now (&ldpw->clib_time);
1965 time_out = (f64) ~0;
1968 while (nvecs < vlen)
1970 mh = &vmessages[nvecs];
1971 rv = recvmsg (fd, &mh->msg_hdr, flags);
1979 if (!time_out || clib_time_now (&ldpw->clib_time) >= time_out)
1985 return nvecs > 0 ? nvecs : rv;
1989 return libc_recvmmsg (fd, vmessages, vlen, flags, tmo);
1995 getsockopt (int fd, int level, int optname,
1996 void *__restrict optval, socklen_t * __restrict optlen)
2003 vlsh = ldp_fd_to_vlsh (fd);
2004 if (vlsh != VLS_INVALID_HANDLE)
2014 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_NODELAY,
2018 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_USER_MSS,
2022 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPIDLE,
2026 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TCP_KEEPINTVL,
2030 if (optval && optlen && (*optlen == sizeof (struct tcp_info)))
2032 LDBG (1, "fd %d: vlsh %u SOL_TCP, TCP_INFO, optval %p, "
2033 "optlen %d: #LDP-NOP#", fd, vlsh, optval, *optlen);
2034 memset (optval, 0, *optlen);
2040 case TCP_CONGESTION:
2041 *optlen = strlen ("cubic");
2042 strncpy (optval, "cubic", *optlen + 1);
2046 LDBG (0, "ERROR: fd %d: getsockopt SOL_TCP: sid %u, "
2047 "optname %d unsupported!", fd, vlsh, optname);
2054 case SO_ORIGINAL_DST:
2056 vls_attr (vlsh, VPPCOM_ATTR_GET_ORIGINAL_DST, optval, optlen);
2060 "ERROR: fd %d: getsockopt SOL_IP: vlsh %u "
2061 "optname %d unsupported!",
2070 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_V6ONLY, optval, optlen);
2073 LDBG (0, "ERROR: fd %d: getsockopt SOL_IPV6: vlsh %u "
2074 "optname %d unsupported!", fd, vlsh, optname);
2082 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_LISTEN, optval, optlen);
2085 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_KEEPALIVE, optval, optlen);
2088 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_PROTOCOL, optval, optlen);
2089 *(int *) optval = *(int *) optval ? SOCK_DGRAM : SOCK_STREAM;
2092 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_TX_FIFO_LEN,
2096 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_RX_FIFO_LEN,
2100 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_REUSEADDR, optval, optlen);
2103 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_REUSEPORT, optval, optlen);
2106 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_BROADCAST, optval, optlen);
2109 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_DOMAIN, optval, optlen);
2112 rv = vls_attr (vlsh, VPPCOM_ATTR_GET_ERROR, optval, optlen);
2114 case SO_BINDTODEVICE:
2118 LDBG (0, "ERROR: fd %d: getsockopt SOL_SOCKET: vlsh %u "
2119 "optname %d unsupported!", fd, vlsh, optname);
2127 if (rv != VPPCOM_OK)
2135 rv = libc_getsockopt (fd, level, optname, optval, optlen);
2142 setsockopt (int fd, int level, int optname,
2143 const void *optval, socklen_t optlen)
2150 vlsh = ldp_fd_to_vlsh (fd);
2151 if (vlsh != VLS_INVALID_HANDLE)
2161 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_NODELAY,
2162 (void *) optval, &optlen);
2165 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_USER_MSS,
2166 (void *) optval, &optlen);
2169 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPIDLE,
2170 (void *) optval, &optlen);
2173 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_TCP_KEEPINTVL,
2174 (void *) optval, &optlen);
2176 case TCP_CONGESTION:
2182 LDBG (0, "ERROR: fd %d: setsockopt() SOL_TCP: vlsh %u"
2183 "optname %d unsupported!", fd, vlsh, optname);
2191 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_V6ONLY,
2192 (void *) optval, &optlen);
2195 LDBG (0, "ERROR: fd %d: setsockopt SOL_IPV6: vlsh %u"
2196 "optname %d unsupported!", fd, vlsh, optname);
2204 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_KEEPALIVE,
2205 (void *) optval, &optlen);
2208 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_REUSEADDR,
2209 (void *) optval, &optlen);
2212 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_REUSEPORT, (void *) optval,
2216 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_BROADCAST,
2217 (void *) optval, &optlen);
2223 LDBG (0, "ERROR: fd %d: setsockopt SOL_SOCKET: vlsh %u "
2224 "optname %d unsupported!", fd, vlsh, optname);
2232 rv = vls_attr (vlsh, VPPCOM_ATTR_SET_IP_PKTINFO, (void *) optval,
2237 "ERROR: fd %d: setsockopt SOL_IP: vlsh %u optname %d"
2247 if (rv != VPPCOM_OK)
2255 rv = libc_setsockopt (fd, level, optname, optval, optlen);
2262 listen (int fd, int n)
2269 vlsh = ldp_fd_to_vlsh (fd);
2270 if (vlsh != VLS_INVALID_HANDLE)
2272 LDBG (0, "fd %d: calling vls_listen: vlsh %u, n %d", fd, vlsh, n);
2274 rv = vls_listen (vlsh, n);
2275 if (rv != VPPCOM_OK)
2283 LDBG (0, "fd %d: calling libc_listen(): n %d", fd, n);
2284 rv = libc_listen (fd, n);
2287 LDBG (1, "fd %d: returning %d", fd, rv);
2292 ldp_accept4 (int listen_fd, __SOCKADDR_ARG _addr,
2293 socklen_t *__restrict addr_len, int flags)
2295 struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
2296 vls_handle_t listen_vlsh, accept_vlsh;
2301 listen_vlsh = ldp_fd_to_vlsh (listen_fd);
2302 if (listen_vlsh != VLS_INVALID_HANDLE)
2305 u8 src_addr[sizeof (struct sockaddr_in6)];
2306 memset (&ep, 0, sizeof (ep));
2309 LDBG (0, "listen fd %d: calling vppcom_session_accept: listen sid %u,"
2310 " ep %p, flags 0x%x", listen_fd, listen_vlsh, &ep, flags);
2312 accept_vlsh = vls_accept (listen_vlsh, &ep, flags);
2313 if (accept_vlsh < 0)
2315 errno = -accept_vlsh;
2320 rv = ldp_copy_ep_to_sockaddr (addr, addr_len, &ep);
2321 if (rv != VPPCOM_OK)
2323 (void) vls_close (accept_vlsh);
2329 rv = ldp_vlsh_to_fd (accept_vlsh);
2335 LDBG (0, "listen fd %d: calling libc_accept4(): addr %p, addr_len %p,"
2336 " flags 0x%x", listen_fd, addr, addr_len, flags);
2338 rv = libc_accept4 (listen_fd, addr, addr_len, flags);
2341 LDBG (1, "listen fd %d: accept returning %d", listen_fd, rv);
2347 accept4 (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len,
2350 return ldp_accept4 (fd, addr, addr_len, flags);
2354 accept (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict addr_len)
2356 return ldp_accept4 (fd, addr, addr_len, 0);
2360 shutdown (int fd, int how)
2367 vlsh = ldp_fd_to_vlsh (fd);
2368 if (vlsh != VLS_INVALID_HANDLE)
2370 LDBG (0, "called shutdown: fd %u vlsh %u how %d", fd, vlsh, how);
2371 rv = vls_shutdown (vlsh, how);
2375 LDBG (0, "fd %d: calling libc_shutdown: how %d", fd, how);
2376 rv = libc_shutdown (fd, how);
2383 epoll_create1 (int flags)
2385 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
2391 if (ldp->vcl_needs_real_epoll || vls_use_real_epoll ())
2393 /* Make sure workers have been allocated */
2396 ldp_alloc_workers ();
2397 ldpw = ldp_worker_get_current ();
2399 rv = libc_epoll_create1 (flags);
2400 ldp->vcl_needs_real_epoll = 0;
2401 ldpw->vcl_mq_epfd = rv;
2402 LDBG (0, "created vcl epfd %u", rv);
2406 vlsh = vls_epoll_create ();
2407 if (PREDICT_FALSE (vlsh == VLS_INVALID_HANDLE))
2414 rv = ldp_vlsh_to_fd (vlsh);
2416 LDBG (0, "epoll_create epfd %u vlsh %u", rv, vlsh);
2421 epoll_create (int size)
2423 return epoll_create1 (0);
2427 epoll_ctl (int epfd, int op, int fd, struct epoll_event *event)
2429 vls_handle_t vep_vlsh, vlsh;
2434 vep_vlsh = ldp_fd_to_vlsh (epfd);
2435 if (PREDICT_FALSE (vep_vlsh == VLS_INVALID_HANDLE))
2437 /* The LDP epoll_create1 always creates VCL epfd's.
2438 * The app should never have a kernel base epoll fd unless it
2439 * was acquired outside of the LD_PRELOAD process context.
2440 * In any case, if we get one, punt it to libc_epoll_ctl.
2443 "epfd %d: calling libc_epoll_ctl: op %d, fd %d"
2445 epfd, op, fd, event ? event->events : 0);
2447 rv = libc_epoll_ctl (epfd, op, fd, event);
2451 vlsh = ldp_fd_to_vlsh (fd);
2453 LDBG (0, "epfd %d ep_vlsh %d, fd %u vlsh %d, op %u", epfd, vep_vlsh, fd,
2456 if (vlsh != VLS_INVALID_HANDLE)
2459 "epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u,"
2461 epfd, vep_vlsh, op, vlsh, event ? event->events : 0);
2463 rv = vls_epoll_ctl (vep_vlsh, op, vlsh, event);
2464 if (rv != VPPCOM_OK)
2473 u32 size = sizeof (epfd);
2475 libc_epfd = vls_attr (vep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
2478 LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: "
2479 "EPOLL_CLOEXEC", epfd, vep_vlsh);
2481 libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC);
2488 rv = vls_attr (vep_vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &libc_epfd,
2497 else if (PREDICT_FALSE (libc_epfd < 0))
2504 LDBG (1, "epfd %d: calling libc_epoll_ctl: libc_epfd %d, op %d, fd %d,"
2505 " event %p", epfd, libc_epfd, op, fd, event);
2507 rv = libc_epoll_ctl (libc_epfd, op, fd, event);
2515 ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents,
2516 int timeout, const sigset_t * sigmask)
2518 ldp_worker_ctx_t *ldpw;
2519 double time_to_wait = (double) 0, max_time;
2520 int libc_epfd, rv = 0;
2521 vls_handle_t ep_vlsh;
2525 if (PREDICT_FALSE (!events || (timeout < -1)))
2531 if (PREDICT_FALSE (vppcom_worker_index () == ~0))
2532 vls_register_vcl_worker ();
2534 ldpw = ldp_worker_get_current ();
2535 if (epfd == ldpw->vcl_mq_epfd)
2536 return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
2538 ep_vlsh = ldp_fd_to_vlsh (epfd);
2539 if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE))
2541 LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh);
2546 if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
2547 clib_time_init (&ldpw->clib_time);
2548 time_to_wait = ((timeout >= 0) ? (double) timeout / 1000 : 0);
2549 max_time = clib_time_now (&ldpw->clib_time) + time_to_wait;
2551 libc_epfd = vls_attr (ep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
2552 if (PREDICT_FALSE (libc_epfd < 0))
2559 LDBG (2, "epfd %d: vep_idx %d, libc_epfd %d, events %p, maxevents %d, "
2560 "timeout %d, sigmask %p: time_to_wait %.02f", epfd, ep_vlsh,
2561 libc_epfd, events, maxevents, timeout, sigmask, time_to_wait);
2564 if (!ldpw->epoll_wait_vcl)
2566 rv = vls_epoll_wait (ep_vlsh, events, maxevents, 0);
2569 ldpw->epoll_wait_vcl = 1;
2580 ldpw->epoll_wait_vcl = 0;
2584 rv = libc_epoll_pwait (libc_epfd, events, maxevents, 0, sigmask);
2589 while ((timeout == -1) || (clib_time_now (&ldpw->clib_time) < max_time));
2596 ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events,
2597 int maxevents, int timeout, const sigset_t * sigmask)
2599 int libc_epfd, rv = 0, num_ev, libc_num_ev, vcl_wups = 0;
2600 struct epoll_event *libc_evts;
2601 ldp_worker_ctx_t *ldpw;
2602 vls_handle_t ep_vlsh;
2606 if (PREDICT_FALSE (!events || (timeout < -1)))
2612 /* Make sure the vcl worker is valid. Could be that epoll fd was created on
2613 * one thread but it is now used on another */
2614 if (PREDICT_FALSE (vppcom_worker_index () == ~0))
2615 vls_register_vcl_worker ();
2617 ldpw = ldp_worker_get_current ();
2618 if (epfd == ldpw->vcl_mq_epfd)
2619 return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
2621 ep_vlsh = ldp_fd_to_vlsh (epfd);
2622 if (PREDICT_FALSE (ep_vlsh == VLS_INVALID_HANDLE))
2624 LDBG (0, "epfd %d: bad ep_vlsh %d!", epfd, ep_vlsh);
2629 libc_epfd = vls_attr (ep_vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
2630 if (PREDICT_FALSE (!libc_epfd))
2632 u32 size = sizeof (epfd);
2634 LDBG (1, "epfd %d, vep_vlsh %d calling libc_epoll_create1: "
2635 "EPOLL_CLOEXEC", epfd, ep_vlsh);
2636 libc_epfd = libc_epoll_create1 (EPOLL_CLOEXEC);
2643 rv = vls_attr (ep_vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &libc_epfd, &size);
2651 if (PREDICT_FALSE (libc_epfd <= 0))
2658 if (PREDICT_FALSE (!ldpw->mq_epfd_added))
2660 struct epoll_event e = { 0 };
2662 e.data.fd = ldpw->vcl_mq_epfd;
2663 if (libc_epoll_ctl (libc_epfd, EPOLL_CTL_ADD, ldpw->vcl_mq_epfd, &e) <
2666 LDBG (0, "epfd %d, add libc mq epoll fd %d to libc epoll fd %d",
2667 epfd, ldpw->vcl_mq_epfd, libc_epfd);
2671 ldpw->mq_epfd_added = 1;
2674 /* Request to only drain unhandled to prevent libc_epoll_wait starved */
2675 rv = vls_epoll_wait (ep_vlsh, events, maxevents, -2);
2679 if (rv >= maxevents)
2683 else if (PREDICT_FALSE (rv < 0))
2692 libc_evts = &events[rv];
2694 libc_epoll_pwait (libc_epfd, libc_evts, maxevents, timeout, sigmask);
2695 if (libc_num_ev <= 0)
2697 rv = rv >= 0 ? rv : -1;
2701 for (int i = 0; i < libc_num_ev; i++)
2703 if (libc_evts[i].data.fd == ldpw->vcl_mq_epfd)
2705 /* We should remove mq epoll fd from events. */
2707 if (i != libc_num_ev)
2709 libc_evts[i].events = libc_evts[libc_num_ev].events;
2710 libc_evts[i].data.u64 = libc_evts[libc_num_ev].data.u64;
2712 num_ev = vls_epoll_wait (ep_vlsh, &libc_evts[libc_num_ev],
2713 maxevents - libc_num_ev, 0);
2714 if (PREDICT_TRUE (num_ev > 0))
2716 /* Woken up by vcl but no events generated. Accept it once */
2717 if (rv == 0 && libc_num_ev == 0 && timeout && vcl_wups++ < 1)
2730 epoll_pwait (int epfd, struct epoll_event *events,
2731 int maxevents, int timeout, const sigset_t * sigmask)
2733 if (vls_use_eventfd ())
2734 return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout,
2737 return ldp_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
2741 epoll_wait (int epfd, struct epoll_event *events, int maxevents, int timeout)
2743 if (vls_use_eventfd ())
2744 return ldp_epoll_pwait_eventfd (epfd, events, maxevents, timeout, NULL);
2746 return ldp_epoll_pwait (epfd, events, maxevents, timeout, NULL);
2750 poll (struct pollfd *fds, nfds_t nfds, int timeout)
2752 ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
2753 int rv, i, n_revents = 0;
2758 LDBG (3, "fds %p, nfds %ld, timeout %d", fds, nfds, timeout);
2760 if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
2761 clib_time_init (&ldpw->clib_time);
2763 max_time = (timeout >= 0) ? (f64) timeout / 1000 : 0;
2764 max_time += clib_time_now (&ldpw->clib_time);
2766 for (i = 0; i < nfds; i++)
2771 vlsh = ldp_fd_to_vlsh (fds[i].fd);
2772 if (vlsh != VLS_INVALID_HANDLE)
2774 fds[i].fd = -fds[i].fd;
2775 vec_add2 (ldpw->vcl_poll, vp, 1);
2777 vp->sh = vlsh_to_sh (vlsh);
2778 vp->events = fds[i].events;
2779 #ifdef __USE_XOPEN2K
2780 if (fds[i].events & POLLRDNORM)
2781 vp->events |= POLLIN;
2782 if (fds[i].events & POLLWRNORM)
2783 vp->events |= POLLOUT;
2785 vp->revents = fds[i].revents;
2789 vec_add1 (ldpw->libc_poll, fds[i]);
2790 vec_add1 (ldpw->libc_poll_idxs, i);
2796 if (vec_len (ldpw->vcl_poll))
2798 rv = vppcom_poll (ldpw->vcl_poll, vec_len (ldpw->vcl_poll), 0);
2809 if (vec_len (ldpw->libc_poll))
2811 rv = libc_poll (ldpw->libc_poll, vec_len (ldpw->libc_poll), 0);
2824 while ((timeout < 0) || (clib_time_now (&ldpw->clib_time) < max_time));
2828 vec_foreach (vp, ldpw->vcl_poll)
2830 fds[vp->fds_ndx].fd = -fds[vp->fds_ndx].fd;
2831 fds[vp->fds_ndx].revents = vp->revents;
2832 #ifdef __USE_XOPEN2K
2833 if ((fds[vp->fds_ndx].revents & POLLIN) &&
2834 (fds[vp->fds_ndx].events & POLLRDNORM))
2835 fds[vp->fds_ndx].revents |= POLLRDNORM;
2836 if ((fds[vp->fds_ndx].revents & POLLOUT) &&
2837 (fds[vp->fds_ndx].events & POLLWRNORM))
2838 fds[vp->fds_ndx].revents |= POLLWRNORM;
2841 vec_reset_length (ldpw->vcl_poll);
2843 for (i = 0; i < vec_len (ldpw->libc_poll); i++)
2845 fds[ldpw->libc_poll_idxs[i]].revents = ldpw->libc_poll[i].revents;
2847 vec_reset_length (ldpw->libc_poll_idxs);
2848 vec_reset_length (ldpw->libc_poll);
2855 ppoll (struct pollfd *fds, nfds_t nfds,
2856 const struct timespec *timeout, const sigset_t * sigmask)
2860 clib_warning ("LDP<%d>: LDP-TBD", getpid ());
2868 void CONSTRUCTOR_ATTRIBUTE ldp_constructor (void);
2870 void DESTRUCTOR_ATTRIBUTE ldp_destructor (void);
2873 * This function is called when the library is loaded
2876 ldp_constructor (void)
2878 swrap_constructor ();
2879 if (ldp_init () != 0)
2881 fprintf (stderr, "\nLDP<%d>: ERROR: ldp_constructor: failed!\n",
2885 else if (LDP_DEBUG > 0)
2886 clib_warning ("LDP<%d>: LDP constructor: done!\n", getpid ());
2890 * This function is called when the library is unloaded
2893 ldp_destructor (void)
2896 swrap_destructor ();
2901 /* Don't use clib_warning() here because that calls writev()
2902 * which will call ldp_init().
2905 fprintf (stderr, "%s:%d: LDP<%d>: LDP destructor: done!\n",
2906 __func__, __LINE__, getpid ());
2911 * fd.io coding-style-patch-verification: ON
2914 * eval: (c-set-style "gnu")