2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
22 #include <sys/resource.h>
24 #include <vcl/vcom_socket_wrapper.h>
28 #include <vcl/vppcom.h>
29 #include <vcl/vcom_socket.h>
31 /* GCC have printf type attribute check. */
32 #ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT
33 #define PRINTF_ATTRIBUTE(a,b) \
34 __attribute__ ((__format__ (__printf__, a, b)))
36 #define PRINTF_ATTRIBUTE(a,b)
37 #endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */
39 #define HAVE_CONSTRUCTOR_ATTRIBUTE
40 #ifdef HAVE_CONSTRUCTOR_ATTRIBUTE
41 #define CONSTRUCTOR_ATTRIBUTE \
42 __attribute__ ((constructor))
44 #define CONSTRUCTOR_ATTRIBUTE
45 #endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */
47 #define HAVE_DESTRUCTOR_ATTRIBUTE
48 #ifdef HAVE_DESTRUCTOR_ATTRIBUTE
49 #define DESTRUCTOR_ATTRIBUTE \
50 __attribute__ ((destructor))
52 #define DESTRUCTOR_ATTRIBUTE
55 #define HAVE_ADDRESS_SANITIZER_ATTRIBUTE
56 #ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE
57 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE \
58 __attribute__((no_sanitize_address))
60 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
63 #define VCOM_SOCKET_FD_MAX 0x10000
65 static char vcom_app_name[MAX_VCOM_APP_NAME];
68 * RETURN: 0 on success or -1 on error.
71 vcom_set_app_name (char *__app_name)
73 return snprintf (vcom_app_name, MAX_VCOM_APP_NAME, "vcom-%s-%d",
74 __app_name, getpid ()) < 0 ? -1 : 0;
80 if (vcom_app_name[0] == '\0')
82 snprintf (vcom_app_name, MAX_VCOM_APP_NAME, "vcom-app-%d", getpid ());
88 * 1 if init, 0 otherwise
90 static int is_vcom_init;
93 * TBD: Make it thread safe
97 * constructor function called before main is called
98 * RETURN: 0 on success -1 on failure
103 pid_t pid = getpid ();
108 rv = vppcom_app_create (vcom_get_app_name ());
111 printf ("\n[%d] vcom_init...failed!\n", pid);
114 "[%d] vcom_init: vppcom_app_create failed!\n", pid);
117 if (vcom_socket_main_init () != 0)
119 printf ("\n[%d] vcom_init...failed!\n", pid);
122 "[%d] vcom_init: vcom_socket_main_init failed!\n", pid);
127 printf ("\n[%d] vcom_init...done!\n", pid);
135 pid_t pid = getpid ();
139 vcom_socket_main_destroy ();
140 vppcom_app_destroy ();
142 fprintf (stderr, "\n[%d] vcom_destroy...done!\n", pid);
147 is_vcom_socket_fd (int fd)
149 return vcom_socket_is_vcom_fd (fd);
153 is_vcom_epfd (int epfd)
155 return vcom_socket_is_vcom_epfd (epfd);
161 * Generic glibc fd api
165 /* Close the file descriptor FD.
167 This function is a cancellation point and therefore
168 not marked with __THROW. */
170 * PRE: is_vcom_socket_fd(__fd) == 1
171 * RETURN: 0 on success and -1 for errors.
174 vcom_close (int __fd)
176 if (vcom_init () != 0)
181 if (vcom_socket_close (__fd) != 0)
190 * RETURN: 0 on success, or -1 on error
196 pid_t pid = getpid ();
198 if (is_vcom_socket_fd (__fd) || is_vcom_epfd (__fd))
201 fprintf (stderr, "[%d] close: fd %d\n", pid, __fd);
202 rv = vcom_close (__fd);
204 fprintf (stderr, "[%d] close: vcom_close() returned %d\n", pid, rv);
212 return libc_close (__fd);
215 /* Read NBYTES into BUF from FD. Return the
216 number read, -1 for errors or 0 for EOF.
218 This function is a cancellation point and therefore
219 not marked with __THROW. */
221 vcom_read (int __fd, void *__buf, size_t __nbytes)
223 if (vcom_init () != 0)
228 return vcom_socket_read (__fd, __buf, __nbytes);
232 read (int __fd, void *__buf, size_t __nbytes)
235 pid_t pid = getpid ();
236 pthread_t tid = pthread_self ();
238 if (is_vcom_socket_fd (__fd))
242 "[%d][%lu (0x%lx)] read:1 "
243 "'%04d'='%04d', '%p', '%04d'\n",
244 pid, (unsigned long) tid, (unsigned long) tid,
245 (int) size, __fd, __buf, (int) __nbytes);
246 size = vcom_read (__fd, __buf, __nbytes);
249 "[%d][%lu (0x%lx)] read:2 "
250 "'%04d'='%04d', '%p', '%04d'\n",
251 pid, (unsigned long) tid, (unsigned long) tid,
252 (int) size, __fd, __buf, (int) __nbytes);
260 return libc_read (__fd, __buf, __nbytes);
264 vcom_readv (int __fd, const struct iovec * __iov, int __iovcnt)
266 if (vcom_init () != 0)
271 return vcom_socket_readv (__fd, __iov, __iovcnt);
275 readv (int __fd, const struct iovec * __iov, int __iovcnt)
279 if (is_vcom_socket_fd (__fd))
281 size = vcom_readv (__fd, __iov, __iovcnt);
290 return libc_readv (__fd, __iov, __iovcnt);
293 /* Write N bytes of BUF to FD. Return the number written, or -1.
295 This function is a cancellation point and therefore
296 not marked with __THROW. */
298 vcom_write (int __fd, const void *__buf, size_t __n)
300 if (vcom_init () != 0)
305 return vcom_socket_write (__fd, (void *) __buf, __n);
309 write (int __fd, const void *__buf, size_t __n)
312 pid_t pid = getpid ();
313 pthread_t tid = pthread_self ();
315 if (is_vcom_socket_fd (__fd))
319 "[%d][%lu (0x%lx)] write:1 "
320 "'%04d'='%04d', '%p', '%04d'\n",
321 pid, (unsigned long) tid, (unsigned long) tid,
322 (int) size, __fd, __buf, (int) __n);
323 size = vcom_write (__fd, __buf, __n);
326 "[%d][%lu (0x%lx)] write:2 "
327 "'%04d'='%04d', '%p', '%04d'\n",
328 pid, (unsigned long) tid, (unsigned long) tid,
329 (int) size, __fd, __buf, (int) __n);
337 return libc_write (__fd, __buf, __n);
341 vcom_writev (int __fd, const struct iovec * __iov, int __iovcnt)
343 if (vcom_init () != 0)
348 return vcom_socket_writev (__fd, __iov, __iovcnt);
352 writev (int __fd, const struct iovec * __iov, int __iovcnt)
356 if (is_vcom_socket_fd (__fd))
358 size = vcom_writev (__fd, __iov, __iovcnt);
367 return libc_writev (__fd, __iov, __iovcnt);
370 /* Do the file control operation described by CMD on FD.
371 The remaining arguments are interpreted depending on CMD.
373 This function is a cancellation point and therefore
374 not marked with __THROW. */
376 vcom_fcntl_va (int __fd, int __cmd, va_list __ap)
378 if (vcom_init () != 0)
383 return vcom_socket_fcntl_va (__fd, __cmd, __ap);
387 vcom_fcntl (int __fd, int __cmd, ...)
392 if (is_vcom_socket_fd (__fd))
394 va_start (ap, __cmd);
395 rv = vcom_fcntl_va (__fd, __cmd, ap);
402 fcntl (int __fd, int __cmd, ...)
406 pid_t pid = getpid ();
408 va_start (ap, __cmd);
409 if (is_vcom_socket_fd (__fd))
411 rv = vcom_fcntl_va (__fd, __cmd, ap);
415 "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __cmd);
423 rv = libc_vfcntl (__fd, __cmd, ap);
431 vcom_ioctl_va (int __fd, unsigned long int __cmd, va_list __ap)
433 if (vcom_init () != 0)
438 return vcom_socket_ioctl_va (__fd, __cmd, __ap);
442 vcom_ioctl (int __fd, unsigned long int __cmd, ...)
447 if (is_vcom_socket_fd (__fd))
449 va_start (ap, __cmd);
450 rv = vcom_ioctl_va (__fd, __cmd, ap);
457 ioctl (int __fd, unsigned long int __cmd, ...)
461 pid_t pid = getpid ();
463 va_start (ap, __cmd);
464 if (is_vcom_socket_fd (__fd))
466 rv = vcom_ioctl_va (__fd, __cmd, ap);
470 "'%04d'='%04d', '%04ld'\n", pid, rv, __fd, __cmd);
478 rv = libc_vioctl (__fd, __cmd, ap);
486 * Check the first NFDS descriptors each in READFDS (if not NULL) for
487 * read readiness, in WRITEFDS (if not NULL) for write readiness,
488 * and in EXCEPTFDS (if not NULL) for exceptional conditions.
489 * If TIMEOUT is not NULL, time out after waiting the interval
490 * specified therein. Returns the number of ready descriptors,
493 * This function is a cancellation point and therefore not marked
498 * clear all vcom FDs from fd_sets __readfds, __writefds and
499 * __exceptfds and update the new nfds
501 * new nfds is the highest-numbered file descriptor
502 * in any of the three sets, plus 1
504 * Return the number of file descriptors contained in the
505 * three descriptor sets. ie. the total number of the bits
506 * that are set in __readfds, __writefds and __exceptfds
509 vcom_fd_clear (int __nfds,
511 fd_set * __restrict __readfds,
512 fd_set * __restrict __writefds,
513 fd_set * __restrict __exceptfds)
516 /* invalid max_fd is -1 */
521 /* clear all vcom fd from the sets */
522 for (fd = 0; fd < __nfds; fd++)
525 /* clear vcom fd from set */
530 if ((F) && FD_ISSET (fd, (F))) \
532 if (is_vcom_socket_fd (fd)) \
546 * compute nfd and __new_nfds
548 for (fd = 0; fd < __nfds; fd++)
555 if ((F) && FD_ISSET (fd, (F))) \
571 *__new_nfds = max_fd != -1 ? max_fd + 1 : 0;
576 * Return the number of file descriptors contained in the
577 * three descriptor sets. ie. the total number of the bits
578 * that are set in __readfds, __writefds and __exceptfds
581 vcom_fd_set (int __nfds,
584 fd_set * __restrict __readfds,
585 fd_set * __restrict __writefds, fd_set * __restrict __exceptfds,
587 fd_set * __restrict __saved_readfds,
588 fd_set * __restrict __saved_writefds,
589 fd_set * __restrict __saved_exceptfds)
592 /* invalid max_fd is -1 */
596 for (fd = 0; fd < __nfds; fd++)
603 if ((F) && (S) && FD_ISSET (fd, (S))) \
605 if (is_vcom_socket_fd (fd)) \
612 _(__saved_readfds, __readfds);
613 _(__saved_writefds, __writefds);
619 * compute nfd and __new_nfds
621 for (fd = 0; fd < __nfds; fd++)
628 if ((F) && FD_ISSET (fd, (F))) \
644 *__new_nfds = max_fd != -1 ? max_fd + 1 : 0;
649 * split select sets(src) into
650 * vcom sets(dest1) and libc sets(dest2)
654 /* src, select sets */
656 fd_set * __restrict readfds,
657 fd_set * __restrict writefds,
658 fd_set * __restrict exceptfds,
659 /* dest1, vcom sets */
661 fd_set * __restrict vcom_readfds,
662 fd_set * __restrict vcom_writefds,
663 fd_set * __restrict vcom_exceptfds, int *vcom_nfd,
664 /* dest2, libc sets */
666 fd_set * __restrict libc_readfds,
667 fd_set * __restrict libc_writefds,
668 fd_set * __restrict libc_exceptfds, int *libc_nfd)
673 /* invalid max_fd is -1 */
674 int vcom_max_fd = -1;
678 /* invalid max_fd is -1 */
679 int libc_max_fd = -1;
683 for (fd = 0; fd < nfds; fd++)
691 if ((S) && FD_ISSET (fd, (S))) \
693 if (is_vcom_socket_fd (fd)) \
698 if (fd > vcom_max_fd) \
710 if (fd > libc_max_fd) \
720 _(readfds, vcom_readfds, libc_readfds);
721 _(writefds, vcom_writefds, libc_writefds);
722 _(exceptfds, vcom_exceptfds, libc_exceptfds);
727 *vcom_nfds = vcom_max_fd != -1 ? vcom_max_fd + 1 : 0;
729 *vcom_nfd = vcom_nfd2;
731 *libc_nfds = libc_max_fd != -1 ? libc_max_fd + 1 : 0;
733 *libc_nfd = libc_nfd2;
737 * merge vcom sets(src1) and libc sets(src2)
738 * into select sets(dest)
742 /* dest, select sets */
744 fd_set * __restrict readfds,
745 fd_set * __restrict writefds,
746 fd_set * __restrict exceptfds, int *nfd,
747 /* src1, vcom sets */
749 fd_set * __restrict vcom_readfds,
750 fd_set * __restrict vcom_writefds,
751 fd_set * __restrict vcom_exceptfds, int vcom_nfd,
752 /* src2, libc sets */
754 fd_set * __restrict libc_readfds,
755 fd_set * __restrict libc_writefds,
756 fd_set * __restrict libc_exceptfds, int libc_nfd)
759 /* invalid max_fd is -1 */
766 * dest |= src at current bit index
767 * update MAX and NFD of dest fd set
773 * MAX current max_fd of dest fd sets
774 * NFD current nfd of dest fd sets
775 * N nfds of source fd set
777 #define FD_BIT_OR(FD,FS,BI, \
779 if ((FS) && (FD) && FD_ISSET ((BI), (FS))) \
781 FD_SET ((BI), (FD)); \
792 * SR,SW,SE source RWE fd sets
793 * DR,DW,DE dest RWE fd sets
795 * NFDS nfds of source fd sets
796 * MAX current max_fd of dest fd sets
797 * NFD current nfd of dest fd sets
799 #define FD_RWE_SETS_OR(DR,DW,DE, \
805 for ((BI) = 0; (BI) < (NFDS); (BI)++) \
807 FD_BIT_OR((DR), (SR), (BI), (MAX), (NFD)); \
808 FD_BIT_OR((DW), (SW), (BI), (MAX), (NFD)); \
809 FD_BIT_OR((DE), (SE), (BI), (MAX), (NFD)); \
815 /* source(vcom) to dest(select) rwe fd sets */
816 FD_RWE_SETS_OR (readfds, writefds, exceptfds,
817 vcom_readfds, vcom_writefds, vcom_exceptfds,
818 fd, vcom_nfds, max_fd, nfd2);
820 /* source(libc) to dest(select) rwe fd sets */
821 FD_RWE_SETS_OR (readfds, writefds, exceptfds,
822 libc_readfds, libc_writefds, libc_exceptfds,
823 fd, libc_nfds, max_fd, nfd2);
825 #undef FD_RWE_SETS_OR
829 *nfds = max_fd != -1 ? max_fd + 1 : 0;
835 * RETURN 1 if fds is NULL or empty. 0 otherwise
838 fd_set_iszero (fd_set * __restrict fds)
846 for (fd = 0; fd < FD_SETSIZE; fd++)
848 if (FD_ISSET (fd, fds))
864 typedef long int s64;
865 typedef unsigned long int u64;
867 typedef long long int __s64;
868 typedef unsigned long long int __u64;
870 typedef __s64 time64_t;
871 typedef __u64 timeu64_t;
873 /* Parameters used to convert the timespec values: */
874 #define MSEC_PER_SEC 1000L
875 #define USEC_PER_MSEC 1000L
876 #define NSEC_PER_USEC 1000L
877 #define NSEC_PER_MSEC 1000000L
878 #define USEC_PER_SEC 1000000L
879 #define NSEC_PER_SEC 1000000000L
880 #define FSEC_PER_SEC 1000000000000000LL
890 #define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
892 #ifdef VCOM_USE_TIMESPEC_EQUAL
894 timespec_equal (const struct timespec *a, const struct timespec *b)
896 return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
901 * lhs < rhs: return <0
902 * lhs == rhs: return 0
903 * lhs > rhs: return >0
906 timespec_compare (const struct timespec *lhs, const struct timespec *rhs)
908 if (lhs->tv_sec < rhs->tv_sec)
910 if (lhs->tv_sec > rhs->tv_sec)
912 return lhs->tv_nsec - rhs->tv_nsec;
915 #ifdef VCOM_USE_TIMEVAL_COMPARE
917 timeval_compare (const struct timeval *lhs, const struct timeval *rhs)
919 if (lhs->tv_sec < rhs->tv_sec)
921 if (lhs->tv_sec > rhs->tv_sec)
923 return lhs->tv_usec - rhs->tv_usec;
927 extern void set_normalized_timespec (struct timespec *ts, time_t sec,
930 static inline struct timespec
931 timespec_add (struct timespec lhs, struct timespec rhs)
933 struct timespec ts_delta;
934 set_normalized_timespec (&ts_delta, lhs.tv_sec + rhs.tv_sec,
935 lhs.tv_nsec + rhs.tv_nsec);
940 * sub = lhs - rhs, in normalized form
942 static inline struct timespec
943 timespec_sub (struct timespec lhs, struct timespec rhs)
945 struct timespec ts_delta;
946 set_normalized_timespec (&ts_delta, lhs.tv_sec - rhs.tv_sec,
947 lhs.tv_nsec - rhs.tv_nsec);
959 * set_normalized_timespec - set timespec sec and nsec parts and normalize
961 * @ts: pointer to timespec variable to be set
962 * @sec: seconds to set
963 * @nsec: nanoseconds to set
965 * Set seconds and nanoseconds field of a timespec variable and
966 * normalize to the timespec storage format
968 * Note: The tv_nsec part is always in the range of
969 * 0 <= tv_nsec < NSEC_PER_SEC
970 * For negative values only the tv_sec field is negative !
973 set_normalized_timespec (struct timespec *ts, time_t sec, s64 nsec)
975 while (nsec >= NSEC_PER_SEC)
978 * The following asm() prevents the compiler from
979 * optimising this loop into a modulo operation. See
980 * also __iter_div_u64_rem() in include/linux/time.h
982 asm ("":"+rm" (nsec));
983 nsec -= NSEC_PER_SEC;
988 asm ("":"+rm" (nsec));
989 nsec += NSEC_PER_SEC;
996 #define vcom_timerisvalid(tvp) (!((tvp)->tv_sec < 0 || (tvp)->tv_usec < 0))
998 /* Macros for converting between `struct timeval' and `struct timespec'. */
999 #define VCOM_TIMEVAL_TO_TIMESPEC(tv, ts) { \
1000 (ts)->tv_sec = (tv)->tv_sec; \
1001 (ts)->tv_nsec = (tv)->tv_usec * 1000; \
1003 #define VCOM_TIMESPEC_TO_TIMEVAL(tv, ts) { \
1004 (tv)->tv_sec = (ts)->tv_sec; \
1005 (tv)->tv_usec = (ts)->tv_nsec / 1000; \
1009 vcom_select_impl (int vcom_nfds, fd_set * __restrict vcom_readfds,
1010 fd_set * __restrict vcom_writefds,
1011 fd_set * __restrict vcom_exceptfds,
1012 struct timeval *__restrict timeout)
1014 return vcom_socket_select (vcom_nfds, vcom_readfds,
1015 vcom_writefds, vcom_exceptfds, timeout);
1019 vcom_select (int __nfds, fd_set * __restrict __readfds,
1020 fd_set * __restrict __writefds,
1021 fd_set * __restrict __exceptfds,
1022 struct timeval *__restrict __timeout)
1026 pid_t pid = getpid ();
1029 /* block indefinitely */
1031 int first_clock_gettime_failed = 0;
1032 /* timeout value in units of timespec */
1033 struct timespec timeout_ts;
1034 struct timespec start_time, now, end_time;
1036 /* select sets attributes - after merge */
1042 fd_set vcom_readfds;
1043 fd_set vcom_writefds;
1044 fd_set vcom_exceptfds;
1049 fd_set libc_readfds;
1050 fd_set libc_writefds;
1051 fd_set libc_exceptfds;
1055 struct timeval tv = {.tv_sec = 0,.tv_usec = 0 };
1057 /* validate __timeout */
1060 /* validate tv_sec */
1062 if (!vcom_timerisvalid (__timeout))
1068 /* validate tv_usec */
1070 /* init timeout_ts */
1071 VCOM_TIMEVAL_TO_TIMESPEC (__timeout, &timeout_ts);
1072 set_normalized_timespec (&timeout_ts,
1073 timeout_ts.tv_sec, timeout_ts.tv_nsec);
1076 rv = clock_gettime (CLOCK_MONOTONIC, &start_time);
1080 first_clock_gettime_failed = 1;
1087 if (timerisset (__timeout))
1089 end_time = timespec_add (start_time, timeout_ts);
1094 * if both fields of the timeout structure are zero,
1095 * then select returns immediately
1097 end_time = start_time;
1102 /* block indefinitely */
1108 if (vcom_init () != 0)
1114 /* validate __nfds */
1115 if (__nfds < 0 || __nfds > FD_SETSIZE)
1123 * usleep(3) emulation
1126 /* call libc_select() with a finite timeout and
1127 * no file descriptors or empty fd sets and
1130 (!__readfds || fd_set_iszero (__readfds)) &&
1131 (!__writefds || fd_set_iszero (__writefds)) &&
1132 (!__exceptfds || fd_set_iszero (__exceptfds)))
1136 rv = libc_select (__nfds,
1137 __readfds, __writefds, __exceptfds, __timeout);
1143 /* TBD: block indefinitely or return -EINVAL */
1149 /* init once before the polling loop */
1151 /* zero vcom and libc fd sets */
1165 _(__readfds, &vcom_readfds, &libc_readfds);
1166 _(__writefds, &vcom_writefds, &libc_writefds);
1167 _(__exceptfds, &vcom_exceptfds, &libc_exceptfds);
1178 /* src, select sets */
1179 __nfds, __readfds, __writefds, __exceptfds,
1180 /* dest1, vcom sets */
1181 __readfds || __writefds || __exceptfds ?
1183 __readfds ? &vcom_readfds : NULL,
1184 __writefds ? &vcom_writefds : NULL,
1185 __exceptfds ? &vcom_exceptfds : NULL,
1186 __readfds || __writefds || __exceptfds ?
1188 /* dest2, libc sets */
1189 __readfds || __writefds || __exceptfds ?
1191 __readfds ? &libc_readfds : NULL,
1192 __writefds ? &libc_writefds : NULL,
1193 __exceptfds ? &libc_exceptfds : NULL,
1194 __readfds || __writefds || __exceptfds ?
1207 * if both fields of timeval structure are zero,
1208 * vcom_select_impl and libc_select returns immediately.
1209 * useful for polling and ensure fairness among
1210 * file descriptors watched.
1217 /* select on vcom fds */
1220 vcom_nfd = vcom_select_impl (vcom_nfds,
1221 __readfds ? &vcom_readfds : NULL,
1222 __writefds ? &vcom_writefds : NULL,
1223 __exceptfds ? &vcom_exceptfds : NULL,
1227 "[%d] select vcom: "
1228 "'%04d'='%04d'\n", pid, vcom_nfd, vcom_nfds);
1236 /* select on libc fds */
1239 libc_nfd = libc_select (libc_nfds,
1240 __readfds ? &libc_readfds : NULL,
1241 __writefds ? &libc_writefds : NULL,
1242 __exceptfds ? &libc_exceptfds : NULL, &tv);
1245 "[%d] select libc: "
1246 "'%04d'='%04d'\n", pid, libc_nfd, libc_nfds);
1250 /* tv becomes undefined */
1257 /* check if any file descriptors changed status */
1258 if ((vcom_nfds && vcom_nfd > 0) || (libc_nfds && libc_nfd > 0))
1260 /* zero the sets before merge and exit */
1280 * on exit, sets are modified in place to indicate which
1281 * file descriptors actually changed status
1284 /* dest, select sets */
1286 __readfds, __writefds, __exceptfds, &new_nfd,
1287 /* src1, vcom sets */
1289 __readfds ? &vcom_readfds : NULL,
1290 __writefds ? &vcom_writefds : NULL,
1291 __exceptfds ? &vcom_exceptfds : NULL, vcom_nfd,
1292 /* src2, libc sets */
1294 __readfds ? &libc_readfds : NULL,
1295 __writefds ? &libc_writefds : NULL,
1296 __exceptfds ? &libc_exceptfds : NULL, libc_nfd);
1298 * return the number of file descriptors contained in the
1299 * three returned sets
1311 rv = new_nfd == -1 ? 0 : new_nfd;
1315 rv = clock_gettime (CLOCK_MONOTONIC, &now);
1322 while (no_timeout || timespec_compare (&now, &end_time) < 0);
1324 /* timeout expired before anything interesting happened */
1330 fprintf (stderr, "[%d] vselect1: " "'%04d'='%04d'\n", pid, rv, __nfds);
1332 * modify timeout parameter to reflect the amount of time not slept
1336 if (vcom_timerisvalid (__timeout))
1338 /* timeout expired */
1341 timerclear (__timeout);
1343 else if (!first_clock_gettime_failed)
1345 rv2 = clock_gettime (CLOCK_MONOTONIC, &now);
1352 struct timespec ts_delta;
1353 ts_delta = timespec_sub (end_time, now);
1354 VCOM_TIMESPEC_TO_TIMEVAL (__timeout, &ts_delta);
1360 fprintf (stderr, "[%d] vselect2: " "'%04d',='%04d'\n", pid, rv, __nfds);
1366 vcom_select_internal (int __nfds, fd_set * __restrict __readfds,
1367 fd_set * __restrict __writefds,
1368 fd_set * __restrict __exceptfds,
1369 struct timeval *__restrict __timeout)
1374 pid_t pid = getpid ();
1376 fd_set saved_readfds;
1377 fd_set saved_writefds;
1378 fd_set saved_exceptfds;
1380 /* validate __nfds */
1387 /* validate __timeout */
1390 /* validate tv_sec */
1392 if (__timeout->tv_sec < 0 || __timeout->tv_usec < 0)
1398 /* validate tv_usec */
1402 /* init saved_x fds */
1405 saved_readfds = *__readfds;
1407 memcpy (&saved_readfds, __readfds, sizeof (*__readfds));
1412 FD_ZERO (&saved_readfds);
1417 saved_writefds = *__writefds;
1419 memcpy (&saved_writefds, __writefds, sizeof (*__writefds));
1425 FD_ZERO (&saved_writefds);
1430 saved_exceptfds = *__exceptfds;
1432 memcpy (&saved_exceptfds, __exceptfds, sizeof (*__exceptfds));
1438 FD_ZERO (&saved_exceptfds);
1441 /* clear vcom fds */
1442 nfd = vcom_fd_clear (__nfds, &new_nfds, __readfds, __writefds, __exceptfds);
1444 /* set to an invalid value */
1446 /* have kernel fds */
1448 rv = libc_select (new_nfds, __readfds,
1449 __writefds, __exceptfds, __timeout);
1451 if (new_nfds && rv == -1)
1453 /* on error, the file descriptor sets are unmodified */
1455 *__readfds = saved_readfds;
1457 *__writefds = saved_writefds;
1459 *__exceptfds = saved_exceptfds;
1462 else if ((new_nfds && rv != -1) || (rv == -2))
1464 /* restore vcom fds */
1465 nfd = vcom_fd_set (__nfds,
1470 &saved_readfds, &saved_writefds, &saved_exceptfds);
1475 fprintf (stderr, "[%d] select: " "'%04d'='%04d'\n", pid, rv, __nfds);
1480 select (int __nfds, fd_set * __restrict __readfds,
1481 fd_set * __restrict __writefds,
1482 fd_set * __restrict __exceptfds, struct timeval *__restrict __timeout)
1485 pid_t pid = getpid ();
1488 fprintf (stderr, "[%d] select1: " "'%04d'='%04d'\n", pid, rv, __nfds);
1489 rv = vcom_select (__nfds, __readfds, __writefds, __exceptfds, __timeout);
1491 fprintf (stderr, "[%d] select2: " "'%04d'='%04d'\n", pid, rv, __nfds);
1500 #ifdef __USE_XOPEN2K
1502 * Same as above only that the TIMEOUT value is given with higher
1503 * resolution and a sigmask which is been set temporarily. This
1504 * version should be used.
1506 * This function is a cancellation point and therefore not marked
1510 vcom_pselect (int __nfds, fd_set * __restrict __readfds,
1511 fd_set * __restrict __writefds,
1512 fd_set * __restrict __exceptfds,
1513 const struct timespec *__restrict __timeout,
1514 const __sigset_t * __restrict __sigmask)
1519 for (fd = 0; fd < __nfds; fd++)
1521 if (__readfds && FD_ISSET (fd, __readfds))
1523 if (is_vcom_socket_fd (fd))
1529 if (__writefds && FD_ISSET (fd, __writefds))
1531 if (is_vcom_socket_fd (fd))
1536 if (__exceptfds && FD_ISSET (fd, __exceptfds))
1538 if (is_vcom_socket_fd (fd))
1540 FD_CLR (fd, __exceptfds);
1548 pselect (int __nfds, fd_set * __restrict __readfds,
1549 fd_set * __restrict __writefds,
1550 fd_set * __restrict __exceptfds,
1551 const struct timespec *__restrict __timeout,
1552 const __sigset_t * __restrict __sigmask)
1557 pid_t pid = getpid ();
1559 fd_set saved_readfds;
1560 fd_set saved_writefds;
1561 fd_set saved_exceptfds;
1563 /* validate __nfds */
1570 /* validate __timeout */
1573 /* validate tv_sec */
1575 if (__timeout->tv_sec < 0 || __timeout->tv_nsec < 0)
1581 /* validate tv_usec */
1585 /* init saved fds */
1588 saved_readfds = *__readfds;
1590 memcpy (&saved_readfds, __readfds, sizeof (*__readfds));
1595 FD_ZERO (&saved_readfds);
1600 saved_writefds = *__writefds;
1602 memcpy (&saved_writefds, __writefds, sizeof (*__writefds));
1608 FD_ZERO (&saved_writefds);
1613 saved_exceptfds = *__exceptfds;
1615 memcpy (&saved_exceptfds, __exceptfds, sizeof (*__exceptfds));
1621 FD_ZERO (&saved_exceptfds);
1624 /* clear vcom fds */
1625 nfd = vcom_fd_clear (__nfds, &new_nfds, __readfds, __writefds, __exceptfds);
1627 /* set to an invalid value */
1630 rv = libc_pselect (new_nfds,
1632 __writefds, __exceptfds, __timeout, __sigmask);
1634 if (new_nfds && rv == -1)
1636 /* on error, the file descriptor sets are unmodified */
1638 *__readfds = saved_readfds;
1640 *__writefds = saved_writefds;
1642 *__exceptfds = saved_exceptfds;
1645 else if ((new_nfds && rv != -1) || (rv == -2))
1647 /* restore vcom fds */
1648 nfd = vcom_fd_set (__nfds,
1653 &saved_readfds, &saved_writefds, &saved_exceptfds);
1658 fprintf (stderr, "[%d] pselect: " "'%04d'='%04d'\n", pid, rv, __nfds);
1665 * Socket specific glibc api
1669 /* Create a new socket of type TYPE in domain DOMAIN, using
1670 * protocol PROTOCOL. If PROTOCOL is zero, one is chosen
1671 * automatically. Returns a file descriptor for the new socket,
1673 * RETURN: a valid file descriptor for the new socket,
1678 vcom_socket (int __domain, int __type, int __protocol)
1680 if (vcom_init () != 0)
1685 return vcom_socket_socket (__domain, __type, __protocol);
1689 socket (int __domain, int __type, int __protocol)
1692 pid_t pid = getpid ();
1693 pthread_t tid = pthread_self ();
1695 /* handle domains implemented by vpp */
1700 /* handle types implemented by vpp */
1701 switch (__type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1706 vcom_socket_main_show ();
1707 rv = vcom_socket (__domain, __type, __protocol);
1710 "[%d][%lu (0x%lx)] socket: "
1711 "'%04d'= D='%04d', T='%04d', P='%04d'\n",
1712 pid, (unsigned long) tid, (unsigned long) tid,
1713 rv, __domain, __type, __protocol);
1715 vcom_socket_main_show ();
1725 goto CALL_GLIBC_SOCKET_API;
1732 goto CALL_GLIBC_SOCKET_API;
1736 CALL_GLIBC_SOCKET_API:
1737 return libc_socket (__domain, __type, __protocol);
1741 * Create two new sockets, of type TYPE in domain DOMAIN and using
1742 * protocol PROTOCOL, which are connected to each other, and put file
1743 * descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero,
1744 * one will be chosen automatically.
1745 * Returns 0 on success, -1 for errors.
1748 vcom_socketpair (int __domain, int __type, int __protocol, int __fds[2])
1750 if (vcom_init () != 0)
1755 return vcom_socket_socketpair (__domain, __type, __protocol, __fds);
1759 socketpair (int __domain, int __type, int __protocol, int __fds[2])
1762 pid_t pid = getpid ();
1764 /* handle domains implemented by vpp */
1769 /* handle types implemented by vpp */
1774 rv = vcom_socketpair (__domain, __type, __protocol, __fds);
1778 "'%04d'= D='%04d', T='%04d', P='%04d'\n",
1779 pid, rv, __domain, __type, __protocol);
1789 goto CALL_GLIBC_SOCKET_API;
1796 goto CALL_GLIBC_SOCKET_API;
1800 CALL_GLIBC_SOCKET_API:
1801 return libc_socketpair (__domain, __type, __protocol, __fds);
1805 * Give the socket FD the local address ADDR
1806 * (which is LEN bytes long).
1809 vcom_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
1813 if (vcom_init () != 0)
1818 /* validate __len */
1819 switch (__addr->sa_family)
1822 if (__len != sizeof (struct sockaddr_in))
1826 if (__len != sizeof (struct sockaddr_in6))
1835 /* handle domains implemented by vpp */
1836 switch (__addr->sa_family)
1840 rv = vcom_socket_bind (__fd, __addr, __len);
1853 bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
1856 pid_t pid = getpid ();
1858 if (is_vcom_socket_fd (__fd))
1861 rv = vcom_bind (__fd, __addr, __len);
1865 "'%04d'='%04d', '%p', '%04d'\n",
1866 pid, rv, __fd, __addr, __len);
1874 return libc_bind (__fd, __addr, __len);
1878 * Put the local address of FD into *ADDR and its length in *LEN.
1881 vcom_getsockname (int __fd, __SOCKADDR_ARG __addr,
1882 socklen_t * __restrict __len)
1884 if (vcom_init () != 0)
1889 return vcom_socket_getsockname (__fd, __addr, __len);
1893 getsockname (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len)
1896 pid_t pid = getpid ();
1898 if (is_vcom_socket_fd (__fd))
1900 rv = vcom_getsockname (__fd, __addr, __len);
1903 "[%d] getsockname: "
1904 "'%04d'='%04d', '%p', '%p'\n", pid, rv, __fd, __addr, __len);
1912 return libc_getsockname (__fd, __addr, __len);
1916 * Open a connection on socket FD to peer at ADDR
1917 * (which LEN bytes long). For connectionless socket types, just set
1918 * the default address to send to and the only address from which to
1919 * accept transmissions. Return 0 on success, -1 for errors.
1920 * This function is a cancellation point and therefore not marked
1924 vcom_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
1934 /* validate __len */
1935 switch (__addr->sa_family)
1938 if (__len != INET_ADDRSTRLEN)
1942 if (__len != INET6_ADDRSTRLEN)
1947 return -EAFNOSUPPORT;
1951 /* handle domains implemented by vpp */
1952 switch (__addr->sa_family)
1956 rv = vcom_socket_connect (__fd, __addr, __len);
1960 return -EPFNOSUPPORT;
1968 connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
1971 pid_t pid = getpid ();
1973 if (is_vcom_socket_fd (__fd))
1975 rv = vcom_connect (__fd, __addr, __len);
1979 "'%04d'='%04d', '%p', '%04d'\n",
1980 pid, rv, __fd, __addr, __len);
1989 return libc_connect (__fd, __addr, __len);
1993 * Put the address of the peer connected to socket FD into *ADDR
1994 * (which is *LEN bytes long), and its actual length into *LEN.
1997 vcom_getpeername (int __fd, __SOCKADDR_ARG __addr,
1998 socklen_t * __restrict __len)
2000 if (vcom_init () != 0)
2005 return vcom_socket_getpeername (__fd, __addr, __len);
2009 getpeername (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len)
2012 pid_t pid = getpid ();
2014 if (is_vcom_socket_fd (__fd))
2016 rv = vcom_getpeername (__fd, __addr, __len);
2019 "[%d] getpeername: "
2020 "'%04d'='%04d', '%p', '%p'\n", pid, rv, __fd, __addr, __len);
2028 return libc_getpeername (__fd, __addr, __len);
2032 * Send N bytes of BUF to socket FD. Returns the number sent or -1.
2033 * This function is a cancellation point and therefore not marked
2037 vcom_send (int __fd, const void *__buf, size_t __n, int __flags)
2040 if (vcom_init () != 0)
2045 return vcom_socket_send (__fd, (void *) __buf, (int) __n, __flags);
2049 send (int __fd, const void *__buf, size_t __n, int __flags)
2052 pid_t pid = getpid ();
2054 if (is_vcom_socket_fd (__fd))
2056 size = vcom_send (__fd, __buf, __n, __flags);
2060 "'%04d'='%04d', '%p', '%04d', '%04x'\n",
2061 pid, (int) size, __fd, __buf, (int) __n, __flags);
2069 return libc_send (__fd, __buf, __n, __flags);
2073 sendfile (int __out_fd, int __in_fd, off_t * __offset, size_t __len)
2078 clib_warning ("[%d] __out_fd %d, __in_fd %d, __offset %p, __len %ld",
2079 getpid (), __out_fd, __in_fd, __offset, __len);
2081 if (is_vcom_socket_fd (__out_fd))
2083 /* TBD: refactor this check to be part of is_vcom_socket_fd() */
2084 if (vcom_init () != 0)
2087 size = vcom_socket_sendfile (__out_fd, __in_fd, __offset, __len);
2089 clib_warning ("[%d] vcom_socket_sendfile (out_fd %d, in_fd %d, "
2090 "offset %p (%ld), len %lu) returned %ld",
2091 getpid (), __out_fd, __in_fd, __offset,
2092 __offset ? *__offset : -1, __len, size);
2101 clib_warning ("[%d] calling libc_sendfile!", getpid ());
2102 return libc_sendfile (__out_fd, __in_fd, __offset, __len);
2106 sendfile64 (int __out_fd, int __in_fd, off_t * __offset, size_t __len)
2108 return sendfile (__out_fd, __in_fd, __offset, __len);
2113 * Read N bytes into BUF from socket FD.
2114 * Returns the number read or -1 for errors.
2115 * This function is a cancellation point and therefore not marked
2119 vcom_recv (int __fd, void *__buf, size_t __n, int __flags)
2121 if (vcom_init () != 0)
2126 return vcom_socket_recv (__fd, __buf, __n, __flags);
2130 recv (int __fd, void *__buf, size_t __n, int __flags)
2133 pid_t pid = getpid ();
2135 if (is_vcom_socket_fd (__fd))
2137 size = vcom_recv (__fd, __buf, __n, __flags);
2141 "'%04d'='%04d', '%p', '%04d', '%04x'\n",
2142 pid, (int) size, __fd, __buf, (int) __n, __flags);
2150 return libc_recv (__fd, __buf, __n, __flags);
2154 * Send N bytes of BUF on socket FD to peer at address ADDR (which is
2155 * ADDR_LEN bytes long). Returns the number sent, or -1 for errors.
2156 * This function is a cancellation point and therefore not marked
2160 vcom_sendto (int __fd, const void *__buf, size_t __n, int __flags,
2161 __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len)
2163 if (vcom_init () != 0)
2168 return vcom_socket_sendto (__fd, __buf, __n, __flags, __addr, __addr_len);
2172 sendto (int __fd, const void *__buf, size_t __n, int __flags,
2173 __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len)
2176 pid_t pid = getpid ();
2178 if (is_vcom_socket_fd (__fd))
2180 size = vcom_sendto (__fd, __buf, __n, __flags, __addr, __addr_len);
2184 "'%04d'='%04d', '%p', '%04d', '%04x', "
2186 pid, (int) size, __fd, __buf, (int) __n, __flags,
2187 __addr, __addr_len);
2195 return libc_sendto (__fd, __buf, __n, __flags, __addr, __addr_len);
2199 * Read N bytes into BUF through socket FD.
2200 * If ADDR is not NULL, fill in *ADDR_LEN bytes of it with the
2201 * address of the sender, and store the actual size of the address
2203 * Returns the number of bytes read or -1 for errors.
2204 * This function is a cancellation point and therefore not marked
2208 vcom_recvfrom (int __fd, void *__restrict __buf, size_t __n,
2210 __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len)
2212 if (vcom_init () != 0)
2217 return vcom_socket_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len);
2221 recvfrom (int __fd, void *__restrict __buf, size_t __n,
2223 __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len)
2226 pid_t pid = getpid ();
2228 if (is_vcom_socket_fd (__fd))
2230 size = vcom_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len);
2234 "'%04d'='%04d', '%p', '%04d', '%04x', "
2236 pid, (int) size, __fd, __buf, (int) __n, __flags,
2237 __addr, __addr_len);
2245 return libc_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len);
2249 * Send a message described MESSAGE on socket FD.
2250 * Returns the number of bytes sent, or -1 for errors.
2251 * This function is a cancellation point and therefore not marked
2255 vcom_sendmsg (int __fd, const struct msghdr * __message, int __flags)
2257 if (vcom_init () != 0)
2262 return vcom_socket_sendmsg (__fd, __message, __flags);
2266 sendmsg (int __fd, const struct msghdr * __message, int __flags)
2269 pid_t pid = getpid ();
2271 if (is_vcom_socket_fd (__fd))
2273 size = vcom_sendmsg (__fd, __message, __flags);
2277 "'%04d'='%04d', '%p', '%04x'\n",
2278 pid, (int) size, __fd, __message, __flags);
2286 return libc_sendmsg (__fd, __message, __flags);
2291 * Send a VLEN messages as described by VMESSAGES to socket FD.
2292 * Returns the number of datagrams successfully written
2294 * This function is a cancellation point and therefore not marked
2298 vcom_sendmmsg (int __fd, struct mmsghdr *__vmessages,
2299 unsigned int __vlen, int __flags)
2301 if (vcom_init () != 0)
2306 return vcom_socket_sendmmsg (__fd, __message, __vlen, __flags);
2310 sendmmsg (int __fd, struct mmsghdr *__vmessages,
2311 unsigned int __vlen, int __flags)
2314 pid_t pid = getpid ();
2316 if (is_vcom_socket_fd (__fd))
2318 size = vcom_sendmmsg (__fd, __message, __vlen, __flags);
2322 "'%04d'='%04d', '%p', '%04d', '%04x'\n",
2323 pid, (int) size, __fd, __vmessages, __vlen, __flags);
2331 return libc_sendmmsg (__fd, __message, __vlen, __flags);
2337 * Receive a message as described by MESSAGE from socket FD.
2338 * Returns the number of bytes read or -1 for errors.
2339 * This function is a cancellation point and therefore not marked
2343 vcom_recvmsg (int __fd, struct msghdr * __message, int __flags)
2345 if (vcom_init () != 0)
2350 return vcom_socket_recvmsg (__fd, __message, __flags);
2354 recvmsg (int __fd, struct msghdr * __message, int __flags)
2357 pid_t pid = getpid ();
2359 if (is_vcom_socket_fd (__fd))
2361 size = vcom_recvmsg (__fd, __message, __flags);
2365 "'%04d'='%04d', '%p', '%04x'\n",
2366 pid, (int) size, __fd, __message, __flags);
2374 return libc_recvmsg (__fd, __message, __flags);
2379 * Receive up to VLEN messages as described by VMESSAGES from socket FD.
2380 * Returns the number of messages received or -1 for errors.
2381 * This function is a cancellation point and therefore not marked
2385 vcom_recvmmsg (int __fd, struct mmsghdr *__vmessages,
2386 unsigned int __vlen, int __flags, struct timespec *__tmo)
2388 if (vcom_init () != 0)
2393 return vcom_socket_recvmmsg (__fd, __message, __vlen, __flags, __tmo);
2397 recvmmsg (int __fd, struct mmsghdr *__vmessages,
2398 unsigned int __vlen, int __flags, struct timespec *__tmo)
2401 pid_t pid = getpid ();
2403 if (is_vcom_socket_fd (__fd))
2405 size = vcom_recvmmsg (__fd, __message, __vlen, __flags, __tmo);
2409 "'%04d'='%04d', '%p', "
2410 "'%04d', '%04x', '%p'\n",
2411 pid, (int) size, __fd, __vmessages, __vlen, __flags, __tmo);
2419 return libc_recvmmsg (__fd, __message, __vlen, __flags, __tmo);
2425 * Put the current value for socket FD's option OPTNAME
2426 * at protocol level LEVEL into OPTVAL (which is *OPTLEN bytes long),
2427 * and set *OPTLEN to the value's actual length.
2428 * Returns 0 on success, -1 for errors.
2431 vcom_getsockopt (int __fd, int __level, int __optname,
2432 void *__restrict __optval, socklen_t * __restrict __optlen)
2434 if (vcom_init () != 0)
2439 return vcom_socket_getsockopt (__fd, __level, __optname,
2440 __optval, __optlen);
2444 getsockopt (int __fd, int __level, int __optname,
2445 void *__restrict __optval, socklen_t * __restrict __optlen)
2448 pid_t pid = getpid ();
2450 if (is_vcom_socket_fd (__fd))
2452 rv = vcom_getsockopt (__fd, __level, __optname, __optval, __optlen);
2456 "'%04d'='%04d', '%04d', '%04d', "
2458 pid, rv, __fd, __level, __optname, __optval, __optlen);
2466 return libc_getsockopt (__fd, __level, __optname, __optval, __optlen);
2470 * Set socket FD's option OPTNAME at protocol level LEVEL
2471 * to *OPTVAL (which is OPTLEN bytes long).
2472 * Returns 0 on success, -1 for errors.
2475 vcom_setsockopt (int __fd, int __level, int __optname,
2476 const void *__optval, socklen_t __optlen)
2478 if (vcom_init () != 0)
2483 return vcom_socket_setsockopt (__fd, __level, __optname,
2484 __optval, __optlen);
2488 setsockopt (int __fd, int __level, int __optname,
2489 const void *__optval, socklen_t __optlen)
2492 pid_t pid = getpid ();
2494 if (is_vcom_socket_fd (__fd))
2496 rv = vcom_setsockopt (__fd, __level, __optname, __optval, __optlen);
2500 "'%04d'='%04d', '%04d', '%04d', "
2502 pid, rv, __fd, __level, __optname, __optval, __optlen);
2510 return libc_setsockopt (__fd, __level, __optname, __optval, __optlen);
2514 * Prepare to accept connections on socket FD.
2515 * N connection requests will be queued before further
2516 * requests are refused.
2517 * Returns 0 on success, -1 for errors.
2520 vcom_listen (int __fd, int __n)
2522 if (vcom_init () != 0)
2527 return vcom_socket_listen (__fd, __n);
2531 listen (int __fd, int __n)
2534 pid_t pid = getpid ();
2536 if (is_vcom_socket_fd (__fd))
2538 rv = vcom_listen (__fd, __n);
2542 "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __n);
2550 return libc_listen (__fd, __n);
2554 * Await a connection on socket FD.
2555 * When a connection arrives, open a new socket to communicate
2556 * with it, set *ADDR (which is *ADDR_LEN bytes long) to the address
2557 * of the connecting peer and *ADDR_LEN to the address's actual
2558 * length, and return the new socket's descriptor, or -1 for errors.
2559 * This function is a cancellation point and therefore not marked
2563 vcom_accept (int __fd, __SOCKADDR_ARG __addr,
2564 socklen_t * __restrict __addr_len)
2567 if (vcom_init () != 0)
2571 return vcom_socket_accept (__fd, __addr, __addr_len);
2575 accept (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len)
2578 pid_t pid = getpid ();
2579 pthread_t tid = pthread_self ();
2581 if (is_vcom_socket_fd (__fd))
2584 vcom_socket_main_show ();
2587 "[%d][%lu (0x%lx)] accept1: "
2588 "'%04d'='%04d', '%p', '%p'\n",
2589 pid, (unsigned long) tid, (unsigned long) tid,
2590 rv, __fd, __addr, __addr_len);
2591 rv = vcom_accept (__fd, __addr, __addr_len);
2594 "[%d][%lu (0x%lx)] accept2: "
2595 "'%04d'='%04d', '%p', '%p'\n",
2596 pid, (unsigned long) tid, (unsigned long) tid,
2597 rv, __fd, __addr, __addr_len);
2599 vcom_socket_main_show ();
2607 return libc_accept (__fd, __addr, __addr_len);
2611 * Similar to 'accept' but takes an additional parameter to specify
2613 * This function is a cancellation point and therefore not marked
2617 vcom_accept4 (int __fd, __SOCKADDR_ARG __addr,
2618 socklen_t * __restrict __addr_len, int __flags)
2621 if (vcom_init () != 0)
2626 return vcom_socket_accept4 (__fd, __addr, __addr_len, __flags);
2630 accept4 (int __fd, __SOCKADDR_ARG __addr,
2631 socklen_t * __restrict __addr_len, int __flags)
2634 pid_t pid = getpid ();
2637 "[%d] accept4: in the beginning... "
2638 "'%04d'='%04d', '%p', '%p', '%04x'\n",
2639 pid, rv, __fd, __addr, __addr_len, __flags);
2641 if (is_vcom_socket_fd (__fd))
2644 vcom_socket_main_show ();
2645 rv = vcom_accept4 (__fd, __addr, __addr_len, __flags);
2648 "[%d] accept4: VCL "
2649 "'%04d'='%04d', '%p', '%p', '%04x'\n",
2650 pid, rv, __fd, __addr, __addr_len, __flags);
2652 vcom_socket_main_show ();
2661 "[%d] accept4: libc "
2662 "'%04d'='%04d', '%p', '%p', '%04x'\n",
2663 pid, rv, __fd, __addr, __addr_len, __flags);
2665 return libc_accept4 (__fd, __addr, __addr_len, __flags);
2669 * Shut down all or part of the connection open on socket FD.
2670 * HOW determines what to shut down:
2671 * SHUT_RD = No more receptions;
2672 * SHUT_WR = No more transmissions;
2673 * SHUT_RDWR = No more receptions or transmissions.
2674 * Returns 0 on success, -1 for errors.
2677 vcom_shutdown (int __fd, int __how)
2679 if (vcom_init () != 0)
2683 return vcom_socket_shutdown (__fd, __how);
2687 shutdown (int __fd, int __how)
2690 pid_t pid = getpid ();
2692 if (is_vcom_socket_fd (__fd))
2694 rv = vcom_shutdown (__fd, __how);
2698 "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __how);
2706 return libc_shutdown (__fd, __how);
2710 vcom_epoll_create (int __size)
2713 if (vcom_init () != 0)
2723 /* __size argument is ignored "thereafter" */
2724 return vcom_epoll_create1 (0);
2728 * __size argument is ignored, but must be greater than zero
2731 epoll_create (int __size)
2734 pid_t pid = getpid ();
2736 rv = vcom_epoll_create (__size);
2739 "[%d] epoll_create: " "'%04d'='%04d'\n", pid, rv, __size);
2749 vcom_epoll_create1 (int __flags)
2751 if (vcom_init () != 0)
2760 if (__flags & ~EPOLL_CLOEXEC)
2764 /* __flags can be either zero or EPOLL_CLOEXEC */
2765 /* implementation */
2766 return vcom_socket_epoll_create1 (__flags);
2770 * __flags can be either zero or EPOLL_CLOEXEC
2773 epoll_create1 (int __flags)
2776 pid_t pid = getpid ();
2778 rv = vcom_epoll_create1 (__flags);
2781 "[%d] epoll_create: " "'%04d'='%08x'\n", pid, rv, __flags);
2791 ep_op_has_event (int op)
2793 return op != EPOLL_CTL_DEL;
2797 vcom_epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event)
2799 if (vcom_init () != 0)
2805 * the requested operation __op is not supported
2806 * by this interface */
2807 if (!((__op == EPOLL_CTL_ADD) ||
2808 (__op == EPOLL_CTL_MOD) || (__op == EPOLL_CTL_DEL)))
2813 /* op is ADD or MOD but event parameter is NULL */
2814 if ((ep_op_has_event (__op) && !__event))
2819 /* fd is same as epfd */
2820 /* do not permit adding an epoll file descriptor inside itself */
2826 /* implementation */
2827 return vcom_socket_epoll_ctl (__epfd, __op, __fd, __event);
2831 * implement the controller interface for epoll
2832 * that enables the insertion/removal/change of
2833 * file descriptors inside the interest set.
2836 epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event)
2839 pid_t pid = getpid ();
2841 rv = vcom_epoll_ctl (__epfd, __op, __fd, __event);
2845 "'%04d'='%04d', '%04d', '%04d'\n", pid, rv, __epfd, __op, __fd);
2855 epoll_wait (int __epfd, struct epoll_event *__events,
2856 int __maxevents, int __timeout)
2859 pid_t pid = getpid ();
2861 if (__maxevents <= 0 || __maxevents > EP_MAX_EVENTS)
2863 fprintf (stderr, "[%d] ERROR: epoll_wait() invalid maxevents %d\n",
2870 vcom_socket_epoll_pwait (__epfd, __events, __maxevents, __timeout, NULL);
2874 "'%04d'='%04d', '%p', "
2876 pid, rv, __epfd, __events, __maxevents, __timeout);
2887 epoll_pwait (int __epfd, struct epoll_event *__events,
2888 int __maxevents, int __timeout, const __sigset_t * __ss)
2891 pid_t pid = getpid ();
2893 if (__maxevents <= 0 || __maxevents > EP_MAX_EVENTS)
2899 if (is_vcom_epfd (__epfd))
2902 vcom_socket_epoll_pwait (__epfd, __events, __maxevents, __timeout,
2906 "[%d] epoll_pwait: "
2907 "'%04d'='%04d', '%p', "
2910 pid, rv, __epfd, __events, __maxevents, __timeout, __ss);
2927 /* Poll the file descriptors described by the NFDS structures starting at
2928 FDS. If TIMEOUT is nonzero and not -1, allow TIMEOUT milliseconds for
2929 an event to occur; if TIMEOUT is -1, block until an event occurs.
2930 Returns the number of file descriptors with events, zero if timed out,
2933 This function is a cancellation point and therefore not marked with
2937 vcom_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
2940 pid_t pid = getpid ();
2942 struct rlimit nofile_limit;
2943 struct pollfd vcom_fds[MAX_POLL_NFDS_DEFAULT];
2946 /* actual set of file descriptors to be monitored */
2947 nfds_t libc_nfds = 0;
2948 nfds_t vcom_nfds = 0;
2950 /* ready file descriptors
2952 * number of structures which have nonzero revents fields
2953 * in other words, descriptors with events or errors reported.
2955 /* after call to libc_poll () */
2957 /* after call to vcom_socket_poll () */
2961 /* timeout value in units of timespec */
2962 struct timespec timeout_ts;
2963 struct timespec start_time, now, end_time;
2966 /* get start_time */
2967 rv = clock_gettime (CLOCK_MONOTONIC, &start_time);
2974 /* set timeout_ts & end_time */
2977 /* set timeout_ts */
2978 timeout_ts.tv_sec = __timeout / MSEC_PER_SEC;
2979 timeout_ts.tv_nsec = (__timeout % MSEC_PER_SEC) * NSEC_PER_MSEC;
2980 set_normalized_timespec (&timeout_ts,
2981 timeout_ts.tv_sec, timeout_ts.tv_nsec);
2985 end_time = timespec_add (start_time, timeout_ts);
2989 end_time = start_time;
2993 if (vcom_init () != 0)
2999 /* validate __fds */
3006 /* validate __nfds */
3007 /*TBD: call getrlimit once when vcl-ldpreload library is init */
3008 rv = getrlimit (RLIMIT_NOFILE, &nofile_limit);
3014 if (__nfds >= nofile_limit.rlim_cur)
3021 * for the POC, it's fair to assume that nfds is less than 1024
3023 if (__nfds >= MAX_POLL_NFDS_DEFAULT)
3029 /* set revents field (output parameter)
3032 for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
3034 __fds[fds_idx].revents = 0;
3038 /* set revents field (output parameter)
3039 * to zero for user ignored fds
3041 for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
3044 * if negative fd, ignore events field
3045 * and set output parameter (revents field) to zero */
3046 if (__fds[fds_idx].fd < 0)
3048 __fds[fds_idx].revents = 0;
3054 * 00. prepare __fds and vcom_fds for polling
3055 * copy __fds to vcom_fds
3056 * 01. negate all except libc fds in __fds,
3057 * ignore user negated fds
3058 * 02. negate all except vcom_fds in vocm fds,
3059 * ignore user negated fds
3060 * ignore fd 0 by setting it to negative number
3062 memcpy (vcom_fds, __fds, sizeof (*__fds) * __nfds);
3065 for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
3067 /* ignore negative fds */
3068 if (__fds[fds_idx].fd < 0)
3074 * 00. ignore vcom fds in __fds
3075 * 01. ignore libc fds in vcom_fds,
3076 * ignore fd 0 by setting it to negative number.
3077 * as fd 0 cannot be ignored.
3079 if (is_vcom_socket_fd (__fds[fds_idx].fd) ||
3080 is_vcom_epfd (__fds[fds_idx].fd))
3082 __fds[fds_idx].fd = -__fds[fds_idx].fd;
3088 /* ignore fd 0 by setting it to negative number */
3089 if (!vcom_fds[fds_idx].fd)
3091 vcom_fds[fds_idx].fd = -1;
3093 vcom_fds[fds_idx].fd = -vcom_fds[fds_idx].fd;
3100 * poll on libc fds and vcom fds
3102 * specifying a timeout of zero causes libc_poll() and
3103 * vcom_socket_poll() to return immediately, even if no
3104 * file descriptors are ready
3112 * timeout parameter for libc_poll () set to zero
3113 * to poll on libc fds
3116 /* poll on libc fds */
3120 * a timeout of zero causes libc_poll()
3121 * to return immediately
3123 rlibc_nfds = libc_poll (__fds, __nfds, 0);
3127 "'%04d'='%08lu'\n", pid, rlibc_nfds, __nfds);
3132 goto poll_done_update_nfds;
3137 * timeout parameter for vcom_socket_poll () set to zero
3138 * to poll on vcom fds
3141 /* poll on vcom fds */
3145 * a timeout of zero causes vcom_socket_poll()
3146 * to return immediately
3148 rvcom_nfds = vcom_socket_poll (vcom_fds, __nfds, 0);
3152 "'%04d'='%08lu'\n", pid, rvcom_nfds, __nfds);
3156 goto poll_done_update_nfds;
3160 /* check if any file descriptors changed status */
3161 if ((libc_nfds && rlibc_nfds > 0) || (vcom_nfds && rvcom_nfds > 0))
3163 /* something interesting happened */
3164 rv = rlibc_nfds + rvcom_nfds;
3165 goto poll_done_update_nfds;
3168 rv = clock_gettime (CLOCK_MONOTONIC, &now);
3172 goto poll_done_update_nfds;
3176 /* block indefinitely || timeout elapsed */
3177 while ((__timeout < 0) || timespec_compare (&now, &end_time) < 0);
3179 /* timeout expired before anything interesting happened */
3182 poll_done_update_nfds:
3183 for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
3185 /* ignore negative fds in vcom_fds
3186 * 00. user negated fds
3189 if (vcom_fds[fds_idx].fd < 0)
3194 /* from here on handle positive vcom fds */
3196 * restore vcom fds to positive number in __fds
3197 * and update revents in __fds with the events
3198 * that actually occurred in vcom fds
3200 __fds[fds_idx].fd = -__fds[fds_idx].fd;
3203 __fds[fds_idx].revents = vcom_fds[fds_idx].revents;
3209 fprintf (stderr, "[%d] vpoll: " "'%04d'='%08lu'\n", pid, rv, __nfds);
3214 * 00. The field __fds[i].fd contains a file descriptor for an
3216 * If this field is negative, then the corresponding
3217 * events field is ignored and the revents field returns zero.
3218 * The field __fds[i].events is an input parameter.
3219 * The field __fds[i].revents is an output parameter.
3220 * 01. Specifying a negative value in timeout
3221 * means an infinite timeout.
3222 * Specifying a timeout of zero causes poll() to return
3223 * immediately, even if no file descriptors are ready.
3225 * NOTE: observed __nfds is less than 128 from kubecon strace files
3230 poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
3233 pid_t pid = getpid ();
3237 fprintf (stderr, "[%d] poll1: " "'%04d'='%08lu, %d, 0x%x'\n",
3238 pid, rv, __nfds, __fds[0].fd, __fds[0].events);
3239 rv = vcom_poll (__fds, __nfds, __timeout);
3241 fprintf (stderr, "[%d] poll2: " "'%04d'='%08lu, %d, 0x%x'\n",
3242 pid, rv, __nfds, __fds[0].fd, __fds[0].revents);
3252 /* Like poll, but before waiting the threads signal mask is replaced
3253 with that specified in the fourth parameter. For better usability,
3254 the timeout value is specified using a TIMESPEC object.
3256 This function is a cancellation point and therefore not marked with
3259 vcom_ppoll (struct pollfd *__fds, nfds_t __nfds,
3260 const struct timespec *__timeout, const __sigset_t * __ss)
3262 if (vcom_init () != 0)
3271 ppoll (struct pollfd *__fds, nfds_t __nfds,
3272 const struct timespec *__timeout, const __sigset_t * __ss)
3282 void CONSTRUCTOR_ATTRIBUTE vcom_constructor (void);
3284 void DESTRUCTOR_ATTRIBUTE vcom_destructor (void);
3287 vcom_constructor (void)
3289 pid_t pid = getpid ();
3291 swrap_constructor ();
3292 if (vcom_init () != 0)
3294 printf ("\n[%d] vcom_constructor...failed!\n", pid);
3298 printf ("\n[%d] vcom_constructor...done!\n", pid);
3303 * This function is called when the library is unloaded
3306 vcom_destructor (void)
3308 pid_t pid = getpid ();
3311 swrap_destructor ();
3312 printf ("\n[%d] vcom_destructor...done!\n", pid);
3317 * fd.io coding-style-patch-verification: ON
3320 * eval: (c-set-style "gnu")