2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
22 #include <sys/resource.h>
24 #include <vcl/vcom_socket_wrapper.h>
28 #include <vcl/vppcom.h>
29 #include <vcl/vcom_socket.h>
31 /* GCC have printf type attribute check. */
32 #ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT
33 #define PRINTF_ATTRIBUTE(a,b) \
34 __attribute__ ((__format__ (__printf__, a, b)))
36 #define PRINTF_ATTRIBUTE(a,b)
37 #endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */
39 #define HAVE_CONSTRUCTOR_ATTRIBUTE
40 #ifdef HAVE_CONSTRUCTOR_ATTRIBUTE
41 #define CONSTRUCTOR_ATTRIBUTE \
42 __attribute__ ((constructor))
44 #define CONSTRUCTOR_ATTRIBUTE
45 #endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */
47 #define HAVE_DESTRUCTOR_ATTRIBUTE
48 #ifdef HAVE_DESTRUCTOR_ATTRIBUTE
49 #define DESTRUCTOR_ATTRIBUTE \
50 __attribute__ ((destructor))
52 #define DESTRUCTOR_ATTRIBUTE
55 #define HAVE_ADDRESS_SANITIZER_ATTRIBUTE
56 #ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE
57 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE \
58 __attribute__((no_sanitize_address))
60 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
63 #define VCOM_SOCKET_FD_MAX 0x10000
65 static char vcom_app_name[MAX_VCOM_APP_NAME];
68 * RETURN: 0 on success or -1 on error.
71 vcom_set_app_name (char *__app_name)
73 return snprintf (vcom_app_name, MAX_VCOM_APP_NAME, "vcom-%s-%d",
74 __app_name, getpid ()) < 0 ? -1 : 0;
80 if (vcom_app_name[0] == '\0')
82 snprintf (vcom_app_name, MAX_VCOM_APP_NAME, "vcom-app-%d", getpid ());
88 * 1 if init, 0 otherwise
90 static int is_vcom_init;
93 * TBD: Make it thread safe
97 * constructor function called before main is called
98 * RETURN: 0 on success -1 on failure
103 pid_t pid = getpid ();
107 if (vppcom_app_create (vcom_get_app_name ()) != 0)
109 printf ("\n[%d] vcom_init...failed!\n", pid);
112 "[%d] vcom_init: vppcom_app_create failed!\n", pid);
115 if (vcom_socket_main_init () != 0)
117 printf ("\n[%d] vcom_init...failed!\n", pid);
120 "[%d] vcom_init: vcom_socket_main_init failed!\n", pid);
125 printf ("\n[%d] vcom_init...done!\n", pid);
133 pid_t pid = getpid ();
137 vcom_socket_main_destroy ();
138 vppcom_app_destroy ();
140 fprintf (stderr, "\n[%d] vcom_destroy...done!\n", pid);
145 is_vcom_socket_fd (int fd)
147 return vcom_socket_is_vcom_fd (fd);
151 is_vcom_epfd (int epfd)
153 return vcom_socket_is_vcom_epfd (epfd);
159 * Generic glibc fd api
163 /* Close the file descriptor FD.
165 This function is a cancellation point and therefore
166 not marked with __THROW. */
168 * PRE: is_vcom_socket_fd(__fd) == 1
169 * RETURN: 0 on success and -1 for errors.
172 vcom_close (int __fd)
174 if (vcom_init () != 0)
179 if (vcom_socket_close (__fd) != 0)
188 * RETURN: 0 on success, or -1 on error
194 pid_t pid = getpid ();
196 if (is_vcom_socket_fd (__fd) || is_vcom_epfd (__fd))
199 fprintf (stderr, "[%d] close: fd %d\n", pid, __fd);
200 rv = vcom_close (__fd);
202 fprintf (stderr, "[%d] close: vcom_close() returned %d\n", pid, rv);
210 return libc_close (__fd);
213 /* Read NBYTES into BUF from FD. Return the
214 number read, -1 for errors or 0 for EOF.
216 This function is a cancellation point and therefore
217 not marked with __THROW. */
219 vcom_read (int __fd, void *__buf, size_t __nbytes)
221 if (vcom_init () != 0)
226 return vcom_socket_read (__fd, __buf, __nbytes);
230 read (int __fd, void *__buf, size_t __nbytes)
233 pid_t pid = getpid ();
234 pthread_t tid = pthread_self ();
236 if (is_vcom_socket_fd (__fd))
240 "[%d][%lu (0x%lx)] read:1 "
241 "'%04d'='%04d', '%p', '%04d'\n",
242 pid, (unsigned long) tid, (unsigned long) tid,
243 (int) size, __fd, __buf, (int) __nbytes);
244 size = vcom_read (__fd, __buf, __nbytes);
247 "[%d][%lu (0x%lx)] read:2 "
248 "'%04d'='%04d', '%p', '%04d'\n",
249 pid, (unsigned long) tid, (unsigned long) tid,
250 (int) size, __fd, __buf, (int) __nbytes);
258 return libc_read (__fd, __buf, __nbytes);
262 vcom_readv (int __fd, const struct iovec * __iov, int __iovcnt)
264 if (vcom_init () != 0)
269 return vcom_socket_readv (__fd, __iov, __iovcnt);
273 readv (int __fd, const struct iovec * __iov, int __iovcnt)
277 if (is_vcom_socket_fd (__fd))
279 size = vcom_readv (__fd, __iov, __iovcnt);
288 return libc_readv (__fd, __iov, __iovcnt);
291 /* Write N bytes of BUF to FD. Return the number written, or -1.
293 This function is a cancellation point and therefore
294 not marked with __THROW. */
296 vcom_write (int __fd, const void *__buf, size_t __n)
298 if (vcom_init () != 0)
303 return vcom_socket_write (__fd, (void *) __buf, __n);
307 write (int __fd, const void *__buf, size_t __n)
310 pid_t pid = getpid ();
311 pthread_t tid = pthread_self ();
313 if (is_vcom_socket_fd (__fd))
317 "[%d][%lu (0x%lx)] write:1 "
318 "'%04d'='%04d', '%p', '%04d'\n",
319 pid, (unsigned long) tid, (unsigned long) tid,
320 (int) size, __fd, __buf, (int) __n);
321 size = vcom_write (__fd, __buf, __n);
324 "[%d][%lu (0x%lx)] write:2 "
325 "'%04d'='%04d', '%p', '%04d'\n",
326 pid, (unsigned long) tid, (unsigned long) tid,
327 (int) size, __fd, __buf, (int) __n);
335 return libc_write (__fd, __buf, __n);
339 vcom_writev (int __fd, const struct iovec * __iov, int __iovcnt)
341 if (vcom_init () != 0)
346 return vcom_socket_writev (__fd, __iov, __iovcnt);
350 writev (int __fd, const struct iovec * __iov, int __iovcnt)
354 if (is_vcom_socket_fd (__fd))
356 size = vcom_writev (__fd, __iov, __iovcnt);
365 return libc_writev (__fd, __iov, __iovcnt);
368 /* Do the file control operation described by CMD on FD.
369 The remaining arguments are interpreted depending on CMD.
371 This function is a cancellation point and therefore
372 not marked with __THROW. */
374 vcom_fcntl_va (int __fd, int __cmd, va_list __ap)
376 if (vcom_init () != 0)
381 return vcom_socket_fcntl_va (__fd, __cmd, __ap);
385 vcom_fcntl (int __fd, int __cmd, ...)
390 if (is_vcom_socket_fd (__fd))
392 va_start (ap, __cmd);
393 rv = vcom_fcntl_va (__fd, __cmd, ap);
400 fcntl (int __fd, int __cmd, ...)
404 pid_t pid = getpid ();
406 va_start (ap, __cmd);
407 if (is_vcom_socket_fd (__fd))
409 rv = vcom_fcntl_va (__fd, __cmd, ap);
413 "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __cmd);
421 rv = libc_vfcntl (__fd, __cmd, ap);
429 vcom_ioctl_va (int __fd, unsigned long int __cmd, va_list __ap)
431 if (vcom_init () != 0)
436 return vcom_socket_ioctl_va (__fd, __cmd, __ap);
440 vcom_ioctl (int __fd, unsigned long int __cmd, ...)
445 if (is_vcom_socket_fd (__fd))
447 va_start (ap, __cmd);
448 rv = vcom_ioctl_va (__fd, __cmd, ap);
455 ioctl (int __fd, unsigned long int __cmd, ...)
459 pid_t pid = getpid ();
461 va_start (ap, __cmd);
462 if (is_vcom_socket_fd (__fd))
464 rv = vcom_ioctl_va (__fd, __cmd, ap);
468 "'%04d'='%04d', '%04ld'\n", pid, rv, __fd, __cmd);
476 rv = libc_vioctl (__fd, __cmd, ap);
484 * Check the first NFDS descriptors each in READFDS (if not NULL) for
485 * read readiness, in WRITEFDS (if not NULL) for write readiness,
486 * and in EXCEPTFDS (if not NULL) for exceptional conditions.
487 * If TIMEOUT is not NULL, time out after waiting the interval
488 * specified therein. Returns the number of ready descriptors,
491 * This function is a cancellation point and therefore not marked
496 * clear all vcom FDs from fd_sets __readfds, __writefds and
497 * __exceptfds and update the new nfds
499 * new nfds is the highest-numbered file descriptor
500 * in any of the three sets, plus 1
502 * Return the number of file descriptors contained in the
503 * three descriptor sets. ie. the total number of the bits
504 * that are set in __readfds, __writefds and __exceptfds
507 vcom_fd_clear (int __nfds,
509 fd_set * __restrict __readfds,
510 fd_set * __restrict __writefds,
511 fd_set * __restrict __exceptfds)
514 /* invalid max_fd is -1 */
519 /* clear all vcom fd from the sets */
520 for (fd = 0; fd < __nfds; fd++)
523 /* clear vcom fd from set */
528 if ((F) && FD_ISSET (fd, (F))) \
530 if (is_vcom_socket_fd (fd)) \
544 * compute nfd and __new_nfds
546 for (fd = 0; fd < __nfds; fd++)
553 if ((F) && FD_ISSET (fd, (F))) \
569 *__new_nfds = max_fd != -1 ? max_fd + 1 : 0;
574 * Return the number of file descriptors contained in the
575 * three descriptor sets. ie. the total number of the bits
576 * that are set in __readfds, __writefds and __exceptfds
579 vcom_fd_set (int __nfds,
582 fd_set * __restrict __readfds,
583 fd_set * __restrict __writefds, fd_set * __restrict __exceptfds,
585 fd_set * __restrict __saved_readfds,
586 fd_set * __restrict __saved_writefds,
587 fd_set * __restrict __saved_exceptfds)
590 /* invalid max_fd is -1 */
594 for (fd = 0; fd < __nfds; fd++)
601 if ((F) && (S) && FD_ISSET (fd, (S))) \
603 if (is_vcom_socket_fd (fd)) \
610 _(__saved_readfds, __readfds);
611 _(__saved_writefds, __writefds);
617 * compute nfd and __new_nfds
619 for (fd = 0; fd < __nfds; fd++)
626 if ((F) && FD_ISSET (fd, (F))) \
642 *__new_nfds = max_fd != -1 ? max_fd + 1 : 0;
647 * split select sets(src) into
648 * vcom sets(dest1) and libc sets(dest2)
652 /* src, select sets */
654 fd_set * __restrict readfds,
655 fd_set * __restrict writefds,
656 fd_set * __restrict exceptfds,
657 /* dest1, vcom sets */
659 fd_set * __restrict vcom_readfds,
660 fd_set * __restrict vcom_writefds,
661 fd_set * __restrict vcom_exceptfds, int *vcom_nfd,
662 /* dest2, libc sets */
664 fd_set * __restrict libc_readfds,
665 fd_set * __restrict libc_writefds,
666 fd_set * __restrict libc_exceptfds, int *libc_nfd)
671 /* invalid max_fd is -1 */
672 int vcom_max_fd = -1;
676 /* invalid max_fd is -1 */
677 int libc_max_fd = -1;
681 for (fd = 0; fd < nfds; fd++)
689 if ((S) && FD_ISSET (fd, (S))) \
691 if (is_vcom_socket_fd (fd)) \
696 if (fd > vcom_max_fd) \
708 if (fd > libc_max_fd) \
718 _(readfds, vcom_readfds, libc_readfds);
719 _(writefds, vcom_writefds, libc_writefds);
720 _(exceptfds, vcom_exceptfds, libc_exceptfds);
725 *vcom_nfds = vcom_max_fd != -1 ? vcom_max_fd + 1 : 0;
727 *vcom_nfd = vcom_nfd2;
729 *libc_nfds = libc_max_fd != -1 ? libc_max_fd + 1 : 0;
731 *libc_nfd = libc_nfd2;
735 * merge vcom sets(src1) and libc sets(src2)
736 * into select sets(dest)
740 /* dest, select sets */
742 fd_set * __restrict readfds,
743 fd_set * __restrict writefds,
744 fd_set * __restrict exceptfds, int *nfd,
745 /* src1, vcom sets */
747 fd_set * __restrict vcom_readfds,
748 fd_set * __restrict vcom_writefds,
749 fd_set * __restrict vcom_exceptfds, int vcom_nfd,
750 /* src2, libc sets */
752 fd_set * __restrict libc_readfds,
753 fd_set * __restrict libc_writefds,
754 fd_set * __restrict libc_exceptfds, int libc_nfd)
757 /* invalid max_fd is -1 */
764 * dest |= src at current bit index
765 * update MAX and NFD of dest fd set
771 * MAX current max_fd of dest fd sets
772 * NFD current nfd of dest fd sets
773 * N nfds of source fd set
775 #define FD_BIT_OR(FD,FS,BI, \
777 if ((FS) && (FD) && FD_ISSET ((BI), (FS))) \
779 FD_SET ((BI), (FD)); \
790 * SR,SW,SE source RWE fd sets
791 * DR,DW,DE dest RWE fd sets
793 * NFDS nfds of source fd sets
794 * MAX current max_fd of dest fd sets
795 * NFD current nfd of dest fd sets
797 #define FD_RWE_SETS_OR(DR,DW,DE, \
803 for ((BI) = 0; (BI) < (NFDS); (BI)++) \
805 FD_BIT_OR((DR), (SR), (BI), (MAX), (NFD)); \
806 FD_BIT_OR((DW), (SW), (BI), (MAX), (NFD)); \
807 FD_BIT_OR((DE), (SE), (BI), (MAX), (NFD)); \
813 /* source(vcom) to dest(select) rwe fd sets */
814 FD_RWE_SETS_OR (readfds, writefds, exceptfds,
815 vcom_readfds, vcom_writefds, vcom_exceptfds,
816 fd, vcom_nfds, max_fd, nfd2);
818 /* source(libc) to dest(select) rwe fd sets */
819 FD_RWE_SETS_OR (readfds, writefds, exceptfds,
820 libc_readfds, libc_writefds, libc_exceptfds,
821 fd, libc_nfds, max_fd, nfd2);
823 #undef FD_RWE_SETS_OR
827 *nfds = max_fd != -1 ? max_fd + 1 : 0;
833 * RETURN 1 if fds is NULL or empty. 0 otherwise
836 fd_set_iszero (fd_set * __restrict fds)
844 for (fd = 0; fd < FD_SETSIZE; fd++)
846 if (FD_ISSET (fd, fds))
862 typedef long int s64;
863 typedef unsigned long int u64;
865 typedef long long int __s64;
866 typedef unsigned long long int __u64;
868 typedef __s64 time64_t;
869 typedef __u64 timeu64_t;
871 /* Parameters used to convert the timespec values: */
872 #define MSEC_PER_SEC 1000L
873 #define USEC_PER_MSEC 1000L
874 #define NSEC_PER_USEC 1000L
875 #define NSEC_PER_MSEC 1000000L
876 #define USEC_PER_SEC 1000000L
877 #define NSEC_PER_SEC 1000000000L
878 #define FSEC_PER_SEC 1000000000000000LL
888 #define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
890 #ifdef VCOM_USE_TIMESPEC_EQUAL
892 timespec_equal (const struct timespec *a, const struct timespec *b)
894 return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
899 * lhs < rhs: return <0
900 * lhs == rhs: return 0
901 * lhs > rhs: return >0
904 timespec_compare (const struct timespec *lhs, const struct timespec *rhs)
906 if (lhs->tv_sec < rhs->tv_sec)
908 if (lhs->tv_sec > rhs->tv_sec)
910 return lhs->tv_nsec - rhs->tv_nsec;
913 #ifdef VCOM_USE_TIMEVAL_COMPARE
915 timeval_compare (const struct timeval *lhs, const struct timeval *rhs)
917 if (lhs->tv_sec < rhs->tv_sec)
919 if (lhs->tv_sec > rhs->tv_sec)
921 return lhs->tv_usec - rhs->tv_usec;
925 extern void set_normalized_timespec (struct timespec *ts, time_t sec,
928 static inline struct timespec
929 timespec_add (struct timespec lhs, struct timespec rhs)
931 struct timespec ts_delta;
932 set_normalized_timespec (&ts_delta, lhs.tv_sec + rhs.tv_sec,
933 lhs.tv_nsec + rhs.tv_nsec);
938 * sub = lhs - rhs, in normalized form
940 static inline struct timespec
941 timespec_sub (struct timespec lhs, struct timespec rhs)
943 struct timespec ts_delta;
944 set_normalized_timespec (&ts_delta, lhs.tv_sec - rhs.tv_sec,
945 lhs.tv_nsec - rhs.tv_nsec);
957 * set_normalized_timespec - set timespec sec and nsec parts and normalize
959 * @ts: pointer to timespec variable to be set
960 * @sec: seconds to set
961 * @nsec: nanoseconds to set
963 * Set seconds and nanoseconds field of a timespec variable and
964 * normalize to the timespec storage format
966 * Note: The tv_nsec part is always in the range of
967 * 0 <= tv_nsec < NSEC_PER_SEC
968 * For negative values only the tv_sec field is negative !
971 set_normalized_timespec (struct timespec *ts, time_t sec, s64 nsec)
973 while (nsec >= NSEC_PER_SEC)
976 * The following asm() prevents the compiler from
977 * optimising this loop into a modulo operation. See
978 * also __iter_div_u64_rem() in include/linux/time.h
980 asm ("":"+rm" (nsec));
981 nsec -= NSEC_PER_SEC;
986 asm ("":"+rm" (nsec));
987 nsec += NSEC_PER_SEC;
994 #define vcom_timerisvalid(tvp) (!((tvp)->tv_sec < 0 || (tvp)->tv_usec < 0))
996 /* Macros for converting between `struct timeval' and `struct timespec'. */
997 #define VCOM_TIMEVAL_TO_TIMESPEC(tv, ts) { \
998 (ts)->tv_sec = (tv)->tv_sec; \
999 (ts)->tv_nsec = (tv)->tv_usec * 1000; \
1001 #define VCOM_TIMESPEC_TO_TIMEVAL(tv, ts) { \
1002 (tv)->tv_sec = (ts)->tv_sec; \
1003 (tv)->tv_usec = (ts)->tv_nsec / 1000; \
1007 vcom_select_impl (int vcom_nfds, fd_set * __restrict vcom_readfds,
1008 fd_set * __restrict vcom_writefds,
1009 fd_set * __restrict vcom_exceptfds,
1010 struct timeval *__restrict timeout)
1012 return vcom_socket_select (vcom_nfds, vcom_readfds,
1013 vcom_writefds, vcom_exceptfds, timeout);
1017 vcom_select (int __nfds, fd_set * __restrict __readfds,
1018 fd_set * __restrict __writefds,
1019 fd_set * __restrict __exceptfds,
1020 struct timeval *__restrict __timeout)
1024 pid_t pid = getpid ();
1027 /* block indefinitely */
1029 int first_clock_gettime_failed = 0;
1030 /* timeout value in units of timespec */
1031 struct timespec timeout_ts;
1032 struct timespec start_time, now, end_time;
1034 /* select sets attributes - after merge */
1040 fd_set vcom_readfds;
1041 fd_set vcom_writefds;
1042 fd_set vcom_exceptfds;
1047 fd_set libc_readfds;
1048 fd_set libc_writefds;
1049 fd_set libc_exceptfds;
1053 struct timeval tv = {.tv_sec = 0,.tv_usec = 0 };
1055 /* validate __timeout */
1058 /* validate tv_sec */
1060 if (!vcom_timerisvalid (__timeout))
1066 /* validate tv_usec */
1068 /* init timeout_ts */
1069 VCOM_TIMEVAL_TO_TIMESPEC (__timeout, &timeout_ts);
1070 set_normalized_timespec (&timeout_ts,
1071 timeout_ts.tv_sec, timeout_ts.tv_nsec);
1074 rv = clock_gettime (CLOCK_MONOTONIC, &start_time);
1078 first_clock_gettime_failed = 1;
1085 if (timerisset (__timeout))
1087 end_time = timespec_add (start_time, timeout_ts);
1092 * if both fields of the timeout structure are zero,
1093 * then select returns immediately
1095 end_time = start_time;
1100 /* block indefinitely */
1106 if (vcom_init () != 0)
1112 /* validate __nfds */
1113 if (__nfds < 0 || __nfds > FD_SETSIZE)
1121 * usleep(3) emulation
1124 /* call libc_select() with a finite timeout and
1125 * no file descriptors or empty fd sets and
1128 (!__readfds || fd_set_iszero (__readfds)) &&
1129 (!__writefds || fd_set_iszero (__writefds)) &&
1130 (!__exceptfds || fd_set_iszero (__exceptfds)))
1134 rv = libc_select (__nfds,
1135 __readfds, __writefds, __exceptfds, __timeout);
1141 /* TBD: block indefinitely or return -EINVAL */
1147 /* init once before the polling loop */
1149 /* zero vcom and libc fd sets */
1163 _(__readfds, &vcom_readfds, &libc_readfds);
1164 _(__writefds, &vcom_writefds, &libc_writefds);
1165 _(__exceptfds, &vcom_exceptfds, &libc_exceptfds);
1176 /* src, select sets */
1177 __nfds, __readfds, __writefds, __exceptfds,
1178 /* dest1, vcom sets */
1179 __readfds || __writefds || __exceptfds ?
1181 __readfds ? &vcom_readfds : NULL,
1182 __writefds ? &vcom_writefds : NULL,
1183 __exceptfds ? &vcom_exceptfds : NULL,
1184 __readfds || __writefds || __exceptfds ?
1186 /* dest2, libc sets */
1187 __readfds || __writefds || __exceptfds ?
1189 __readfds ? &libc_readfds : NULL,
1190 __writefds ? &libc_writefds : NULL,
1191 __exceptfds ? &libc_exceptfds : NULL,
1192 __readfds || __writefds || __exceptfds ?
1205 * if both fields of timeval structure are zero,
1206 * vcom_select_impl and libc_select returns immediately.
1207 * useful for polling and ensure fairness among
1208 * file descriptors watched.
1215 /* select on vcom fds */
1218 vcom_nfd = vcom_select_impl (vcom_nfds,
1219 __readfds ? &vcom_readfds : NULL,
1220 __writefds ? &vcom_writefds : NULL,
1221 __exceptfds ? &vcom_exceptfds : NULL,
1225 "[%d] select vcom: "
1226 "'%04d'='%04d'\n", pid, vcom_nfd, vcom_nfds);
1234 /* select on libc fds */
1237 libc_nfd = libc_select (libc_nfds,
1238 __readfds ? &libc_readfds : NULL,
1239 __writefds ? &libc_writefds : NULL,
1240 __exceptfds ? &libc_exceptfds : NULL, &tv);
1243 "[%d] select libc: "
1244 "'%04d'='%04d'\n", pid, libc_nfd, libc_nfds);
1248 /* tv becomes undefined */
1255 /* check if any file descriptors changed status */
1256 if ((vcom_nfds && vcom_nfd > 0) || (libc_nfds && libc_nfd > 0))
1258 /* zero the sets before merge and exit */
1278 * on exit, sets are modified in place to indicate which
1279 * file descriptors actually changed status
1282 /* dest, select sets */
1284 __readfds, __writefds, __exceptfds, &new_nfd,
1285 /* src1, vcom sets */
1287 __readfds ? &vcom_readfds : NULL,
1288 __writefds ? &vcom_writefds : NULL,
1289 __exceptfds ? &vcom_exceptfds : NULL, vcom_nfd,
1290 /* src2, libc sets */
1292 __readfds ? &libc_readfds : NULL,
1293 __writefds ? &libc_writefds : NULL,
1294 __exceptfds ? &libc_exceptfds : NULL, libc_nfd);
1296 * return the number of file descriptors contained in the
1297 * three returned sets
1309 rv = new_nfd == -1 ? 0 : new_nfd;
1313 rv = clock_gettime (CLOCK_MONOTONIC, &now);
1320 while (no_timeout || timespec_compare (&now, &end_time) < 0);
1322 /* timeout expired before anything interesting happened */
1328 fprintf (stderr, "[%d] vselect1: " "'%04d'='%04d'\n", pid, rv, __nfds);
1330 * modify timeout parameter to reflect the amount of time not slept
1334 if (vcom_timerisvalid (__timeout))
1336 /* timeout expired */
1339 timerclear (__timeout);
1341 else if (!first_clock_gettime_failed)
1343 rv2 = clock_gettime (CLOCK_MONOTONIC, &now);
1350 struct timespec ts_delta;
1351 ts_delta = timespec_sub (end_time, now);
1352 VCOM_TIMESPEC_TO_TIMEVAL (__timeout, &ts_delta);
1358 fprintf (stderr, "[%d] vselect2: " "'%04d',='%04d'\n", pid, rv, __nfds);
1364 vcom_select_internal (int __nfds, fd_set * __restrict __readfds,
1365 fd_set * __restrict __writefds,
1366 fd_set * __restrict __exceptfds,
1367 struct timeval *__restrict __timeout)
1372 pid_t pid = getpid ();
1374 fd_set saved_readfds;
1375 fd_set saved_writefds;
1376 fd_set saved_exceptfds;
1378 /* validate __nfds */
1385 /* validate __timeout */
1388 /* validate tv_sec */
1390 if (__timeout->tv_sec < 0 || __timeout->tv_usec < 0)
1396 /* validate tv_usec */
1400 /* init saved_x fds */
1403 saved_readfds = *__readfds;
1405 memcpy (&saved_readfds, __readfds, sizeof (*__readfds));
1410 FD_ZERO (&saved_readfds);
1415 saved_writefds = *__writefds;
1417 memcpy (&saved_writefds, __writefds, sizeof (*__writefds));
1423 FD_ZERO (&saved_writefds);
1428 saved_exceptfds = *__exceptfds;
1430 memcpy (&saved_exceptfds, __exceptfds, sizeof (*__exceptfds));
1436 FD_ZERO (&saved_exceptfds);
1439 /* clear vcom fds */
1440 nfd = vcom_fd_clear (__nfds, &new_nfds, __readfds, __writefds, __exceptfds);
1442 /* set to an invalid value */
1444 /* have kernel fds */
1446 rv = libc_select (new_nfds, __readfds,
1447 __writefds, __exceptfds, __timeout);
1449 if (new_nfds && rv == -1)
1451 /* on error, the file descriptor sets are unmodified */
1453 *__readfds = saved_readfds;
1455 *__writefds = saved_writefds;
1457 *__exceptfds = saved_exceptfds;
1460 else if ((new_nfds && rv != -1) || (rv == -2))
1462 /* restore vcom fds */
1463 nfd = vcom_fd_set (__nfds,
1468 &saved_readfds, &saved_writefds, &saved_exceptfds);
1473 fprintf (stderr, "[%d] select: " "'%04d'='%04d'\n", pid, rv, __nfds);
1478 select (int __nfds, fd_set * __restrict __readfds,
1479 fd_set * __restrict __writefds,
1480 fd_set * __restrict __exceptfds, struct timeval *__restrict __timeout)
1483 pid_t pid = getpid ();
1486 fprintf (stderr, "[%d] select1: " "'%04d'='%04d'\n", pid, rv, __nfds);
1487 rv = vcom_select (__nfds, __readfds, __writefds, __exceptfds, __timeout);
1489 fprintf (stderr, "[%d] select2: " "'%04d'='%04d'\n", pid, rv, __nfds);
1498 #ifdef __USE_XOPEN2K
1500 * Same as above only that the TIMEOUT value is given with higher
1501 * resolution and a sigmask which is been set temporarily. This
1502 * version should be used.
1504 * This function is a cancellation point and therefore not marked
1508 vcom_pselect (int __nfds, fd_set * __restrict __readfds,
1509 fd_set * __restrict __writefds,
1510 fd_set * __restrict __exceptfds,
1511 const struct timespec *__restrict __timeout,
1512 const __sigset_t * __restrict __sigmask)
1517 for (fd = 0; fd < __nfds; fd++)
1519 if (__readfds && FD_ISSET (fd, __readfds))
1521 if (is_vcom_socket_fd (fd))
1527 if (__writefds && FD_ISSET (fd, __writefds))
1529 if (is_vcom_socket_fd (fd))
1534 if (__exceptfds && FD_ISSET (fd, __exceptfds))
1536 if (is_vcom_socket_fd (fd))
1538 FD_CLR (fd, __exceptfds);
1546 pselect (int __nfds, fd_set * __restrict __readfds,
1547 fd_set * __restrict __writefds,
1548 fd_set * __restrict __exceptfds,
1549 const struct timespec *__restrict __timeout,
1550 const __sigset_t * __restrict __sigmask)
1555 pid_t pid = getpid ();
1557 fd_set saved_readfds;
1558 fd_set saved_writefds;
1559 fd_set saved_exceptfds;
1561 /* validate __nfds */
1568 /* validate __timeout */
1571 /* validate tv_sec */
1573 if (__timeout->tv_sec < 0 || __timeout->tv_nsec < 0)
1579 /* validate tv_usec */
1583 /* init saved fds */
1586 saved_readfds = *__readfds;
1588 memcpy (&saved_readfds, __readfds, sizeof (*__readfds));
1593 FD_ZERO (&saved_readfds);
1598 saved_writefds = *__writefds;
1600 memcpy (&saved_writefds, __writefds, sizeof (*__writefds));
1606 FD_ZERO (&saved_writefds);
1611 saved_exceptfds = *__exceptfds;
1613 memcpy (&saved_exceptfds, __exceptfds, sizeof (*__exceptfds));
1619 FD_ZERO (&saved_exceptfds);
1622 /* clear vcom fds */
1623 nfd = vcom_fd_clear (__nfds, &new_nfds, __readfds, __writefds, __exceptfds);
1625 /* set to an invalid value */
1628 rv = libc_pselect (new_nfds,
1630 __writefds, __exceptfds, __timeout, __sigmask);
1632 if (new_nfds && rv == -1)
1634 /* on error, the file descriptor sets are unmodified */
1636 *__readfds = saved_readfds;
1638 *__writefds = saved_writefds;
1640 *__exceptfds = saved_exceptfds;
1643 else if ((new_nfds && rv != -1) || (rv == -2))
1645 /* restore vcom fds */
1646 nfd = vcom_fd_set (__nfds,
1651 &saved_readfds, &saved_writefds, &saved_exceptfds);
1656 fprintf (stderr, "[%d] pselect: " "'%04d'='%04d'\n", pid, rv, __nfds);
1663 * Socket specific glibc api
1667 /* Create a new socket of type TYPE in domain DOMAIN, using
1668 * protocol PROTOCOL. If PROTOCOL is zero, one is chosen
1669 * automatically. Returns a file descriptor for the new socket,
1671 * RETURN: a valid file descriptor for the new socket,
1676 vcom_socket (int __domain, int __type, int __protocol)
1678 if (vcom_init () != 0)
1683 return vcom_socket_socket (__domain, __type, __protocol);
1687 socket (int __domain, int __type, int __protocol)
1690 pid_t pid = getpid ();
1691 pthread_t tid = pthread_self ();
1693 /* handle domains implemented by vpp */
1698 /* handle types implemented by vpp */
1699 switch (__type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1704 vcom_socket_main_show ();
1705 rv = vcom_socket (__domain, __type, __protocol);
1708 "[%d][%lu (0x%lx)] socket: "
1709 "'%04d'= D='%04d', T='%04d', P='%04d'\n",
1710 pid, (unsigned long) tid, (unsigned long) tid,
1711 rv, __domain, __type, __protocol);
1713 vcom_socket_main_show ();
1723 goto CALL_GLIBC_SOCKET_API;
1730 goto CALL_GLIBC_SOCKET_API;
1734 CALL_GLIBC_SOCKET_API:
1735 return libc_socket (__domain, __type, __protocol);
1739 * Create two new sockets, of type TYPE in domain DOMAIN and using
1740 * protocol PROTOCOL, which are connected to each other, and put file
1741 * descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero,
1742 * one will be chosen automatically.
1743 * Returns 0 on success, -1 for errors.
1746 vcom_socketpair (int __domain, int __type, int __protocol, int __fds[2])
1748 if (vcom_init () != 0)
1753 return vcom_socket_socketpair (__domain, __type, __protocol, __fds);
1757 socketpair (int __domain, int __type, int __protocol, int __fds[2])
1760 pid_t pid = getpid ();
1762 /* handle domains implemented by vpp */
1767 /* handle types implemented by vpp */
1772 rv = vcom_socketpair (__domain, __type, __protocol, __fds);
1776 "'%04d'= D='%04d', T='%04d', P='%04d'\n",
1777 pid, rv, __domain, __type, __protocol);
1787 goto CALL_GLIBC_SOCKET_API;
1794 goto CALL_GLIBC_SOCKET_API;
1798 CALL_GLIBC_SOCKET_API:
1799 return libc_socketpair (__domain, __type, __protocol, __fds);
1803 * Give the socket FD the local address ADDR
1804 * (which is LEN bytes long).
1807 vcom_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
1811 if (vcom_init () != 0)
1816 /* validate __len */
1817 switch (__addr->sa_family)
1820 if (__len != sizeof (struct sockaddr_in))
1824 if (__len != sizeof (struct sockaddr_in6))
1833 /* handle domains implemented by vpp */
1834 switch (__addr->sa_family)
1838 rv = vcom_socket_bind (__fd, __addr, __len);
1851 bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
1854 pid_t pid = getpid ();
1856 if (is_vcom_socket_fd (__fd))
1859 rv = vcom_bind (__fd, __addr, __len);
1863 "'%04d'='%04d', '%p', '%04d'\n",
1864 pid, rv, __fd, __addr, __len);
1872 return libc_bind (__fd, __addr, __len);
1876 * Put the local address of FD into *ADDR and its length in *LEN.
1879 vcom_getsockname (int __fd, __SOCKADDR_ARG __addr,
1880 socklen_t * __restrict __len)
1882 if (vcom_init () != 0)
1887 return vcom_socket_getsockname (__fd, __addr, __len);
1891 getsockname (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len)
1894 pid_t pid = getpid ();
1896 if (is_vcom_socket_fd (__fd))
1898 rv = vcom_getsockname (__fd, __addr, __len);
1901 "[%d] getsockname: "
1902 "'%04d'='%04d', '%p', '%p'\n", pid, rv, __fd, __addr, __len);
1910 return libc_getsockname (__fd, __addr, __len);
1914 * Open a connection on socket FD to peer at ADDR
1915 * (which LEN bytes long). For connectionless socket types, just set
1916 * the default address to send to and the only address from which to
1917 * accept transmissions. Return 0 on success, -1 for errors.
1918 * This function is a cancellation point and therefore not marked
1922 vcom_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
1926 if (vcom_init () != 0)
1931 /* validate __len */
1932 switch (__addr->sa_family)
1935 if (__len != INET_ADDRSTRLEN)
1939 if (__len != INET6_ADDRSTRLEN)
1948 /* handle domains implemented by vpp */
1949 switch (__addr->sa_family)
1953 rv = vcom_socket_connect (__fd, __addr, __len);
1971 connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
1974 pid_t pid = getpid ();
1975 pthread_t tid = pthread_self ();
1977 if (is_vcom_socket_fd (__fd))
1979 rv = vcom_connect (__fd, __addr, __len);
1982 "[%d][%lu (0x%lx)] connect: "
1983 "'%04d'='%04d', '%p', '%04d'\n",
1984 pid, (unsigned long) tid, (unsigned long) tid,
1985 rv, __fd, __addr, __len);
1994 return libc_connect (__fd, __addr, __len);
1998 * Put the address of the peer connected to socket FD into *ADDR
1999 * (which is *LEN bytes long), and its actual length into *LEN.
2002 vcom_getpeername (int __fd, __SOCKADDR_ARG __addr,
2003 socklen_t * __restrict __len)
2005 if (vcom_init () != 0)
2010 return vcom_socket_getpeername (__fd, __addr, __len);
2014 getpeername (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len)
2017 pid_t pid = getpid ();
2019 if (is_vcom_socket_fd (__fd))
2021 rv = vcom_getpeername (__fd, __addr, __len);
2024 "[%d] getpeername: "
2025 "'%04d'='%04d', '%p', '%p'\n", pid, rv, __fd, __addr, __len);
2033 return libc_getpeername (__fd, __addr, __len);
2037 * Send N bytes of BUF to socket FD. Returns the number sent or -1.
2038 * This function is a cancellation point and therefore not marked
2042 vcom_send (int __fd, const void *__buf, size_t __n, int __flags)
2045 if (vcom_init () != 0)
2050 return vcom_socket_send (__fd, (void *) __buf, (int) __n, __flags);
2054 send (int __fd, const void *__buf, size_t __n, int __flags)
2057 pid_t pid = getpid ();
2059 if (is_vcom_socket_fd (__fd))
2061 size = vcom_send (__fd, __buf, __n, __flags);
2065 "'%04d'='%04d', '%p', '%04d', '%04x'\n",
2066 pid, (int) size, __fd, __buf, (int) __n, __flags);
2074 return libc_send (__fd, __buf, __n, __flags);
2078 sendfile (int __out_fd, int __in_fd, off_t * __offset, size_t __len)
2083 clib_warning ("[%d] __out_fd %d, __in_fd %d, __offset %p, __len %ld",
2084 getpid (), __out_fd, __in_fd, __offset, __len);
2086 if (is_vcom_socket_fd (__out_fd))
2088 /* TBD: refactor this check to be part of is_vcom_socket_fd() */
2089 if (vcom_init () != 0)
2092 size = vcom_socket_sendfile (__out_fd, __in_fd, __offset, __len);
2094 clib_warning ("[%d] vcom_socket_sendfile (out_fd %d, in_fd %d, "
2095 "offset %p (%ld), len %lu) returned %ld",
2096 getpid (), __out_fd, __in_fd, __offset,
2097 __offset ? *__offset : -1, __len, size);
2106 clib_warning ("[%d] calling libc_sendfile!", getpid ());
2107 return libc_sendfile (__out_fd, __in_fd, __offset, __len);
2111 sendfile64 (int __out_fd, int __in_fd, off_t * __offset, size_t __len)
2113 return sendfile (__out_fd, __in_fd, __offset, __len);
2118 * Read N bytes into BUF from socket FD.
2119 * Returns the number read or -1 for errors.
2120 * This function is a cancellation point and therefore not marked
2124 vcom_recv (int __fd, void *__buf, size_t __n, int __flags)
2126 if (vcom_init () != 0)
2131 return vcom_socket_recv (__fd, __buf, __n, __flags);
2135 recv (int __fd, void *__buf, size_t __n, int __flags)
2138 pid_t pid = getpid ();
2140 if (is_vcom_socket_fd (__fd))
2142 size = vcom_recv (__fd, __buf, __n, __flags);
2146 "'%04d'='%04d', '%p', '%04d', '%04x'\n",
2147 pid, (int) size, __fd, __buf, (int) __n, __flags);
2155 return libc_recv (__fd, __buf, __n, __flags);
2159 * Send N bytes of BUF on socket FD to peer at address ADDR (which is
2160 * ADDR_LEN bytes long). Returns the number sent, or -1 for errors.
2161 * This function is a cancellation point and therefore not marked
2165 vcom_sendto (int __fd, const void *__buf, size_t __n, int __flags,
2166 __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len)
2168 if (vcom_init () != 0)
2173 return vcom_socket_sendto (__fd, __buf, __n, __flags, __addr, __addr_len);
2177 sendto (int __fd, const void *__buf, size_t __n, int __flags,
2178 __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len)
2181 pid_t pid = getpid ();
2183 if (is_vcom_socket_fd (__fd))
2185 size = vcom_sendto (__fd, __buf, __n, __flags, __addr, __addr_len);
2189 "'%04d'='%04d', '%p', '%04d', '%04x', "
2191 pid, (int) size, __fd, __buf, (int) __n, __flags,
2192 __addr, __addr_len);
2200 return libc_sendto (__fd, __buf, __n, __flags, __addr, __addr_len);
2204 * Read N bytes into BUF through socket FD.
2205 * If ADDR is not NULL, fill in *ADDR_LEN bytes of it with the
2206 * address of the sender, and store the actual size of the address
2208 * Returns the number of bytes read or -1 for errors.
2209 * This function is a cancellation point and therefore not marked
2213 vcom_recvfrom (int __fd, void *__restrict __buf, size_t __n,
2215 __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len)
2217 if (vcom_init () != 0)
2222 return vcom_socket_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len);
2226 recvfrom (int __fd, void *__restrict __buf, size_t __n,
2228 __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len)
2231 pid_t pid = getpid ();
2233 if (is_vcom_socket_fd (__fd))
2235 size = vcom_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len);
2239 "'%04d'='%04d', '%p', '%04d', '%04x', "
2241 pid, (int) size, __fd, __buf, (int) __n, __flags,
2242 __addr, __addr_len);
2250 return libc_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len);
2254 * Send a message described MESSAGE on socket FD.
2255 * Returns the number of bytes sent, or -1 for errors.
2256 * This function is a cancellation point and therefore not marked
2260 vcom_sendmsg (int __fd, const struct msghdr * __message, int __flags)
2262 if (vcom_init () != 0)
2267 return vcom_socket_sendmsg (__fd, __message, __flags);
2271 sendmsg (int __fd, const struct msghdr * __message, int __flags)
2274 pid_t pid = getpid ();
2276 if (is_vcom_socket_fd (__fd))
2278 size = vcom_sendmsg (__fd, __message, __flags);
2282 "'%04d'='%04d', '%p', '%04x'\n",
2283 pid, (int) size, __fd, __message, __flags);
2291 return libc_sendmsg (__fd, __message, __flags);
2296 * Send a VLEN messages as described by VMESSAGES to socket FD.
2297 * Returns the number of datagrams successfully written
2299 * This function is a cancellation point and therefore not marked
2303 vcom_sendmmsg (int __fd, struct mmsghdr *__vmessages,
2304 unsigned int __vlen, int __flags)
2306 if (vcom_init () != 0)
2311 return vcom_socket_sendmmsg (__fd, __message, __vlen, __flags);
2315 sendmmsg (int __fd, struct mmsghdr *__vmessages,
2316 unsigned int __vlen, int __flags)
2319 pid_t pid = getpid ();
2321 if (is_vcom_socket_fd (__fd))
2323 size = vcom_sendmmsg (__fd, __message, __vlen, __flags);
2327 "'%04d'='%04d', '%p', '%04d', '%04x'\n",
2328 pid, (int) size, __fd, __vmessages, __vlen, __flags);
2336 return libc_sendmmsg (__fd, __message, __vlen, __flags);
2342 * Receive a message as described by MESSAGE from socket FD.
2343 * Returns the number of bytes read or -1 for errors.
2344 * This function is a cancellation point and therefore not marked
2348 vcom_recvmsg (int __fd, struct msghdr * __message, int __flags)
2350 if (vcom_init () != 0)
2355 return vcom_socket_recvmsg (__fd, __message, __flags);
2359 recvmsg (int __fd, struct msghdr * __message, int __flags)
2362 pid_t pid = getpid ();
2364 if (is_vcom_socket_fd (__fd))
2366 size = vcom_recvmsg (__fd, __message, __flags);
2370 "'%04d'='%04d', '%p', '%04x'\n",
2371 pid, (int) size, __fd, __message, __flags);
2379 return libc_recvmsg (__fd, __message, __flags);
2384 * Receive up to VLEN messages as described by VMESSAGES from socket FD.
2385 * Returns the number of messages received or -1 for errors.
2386 * This function is a cancellation point and therefore not marked
2390 vcom_recvmmsg (int __fd, struct mmsghdr *__vmessages,
2391 unsigned int __vlen, int __flags, struct timespec *__tmo)
2393 if (vcom_init () != 0)
2398 return vcom_socket_recvmmsg (__fd, __message, __vlen, __flags, __tmo);
2402 recvmmsg (int __fd, struct mmsghdr *__vmessages,
2403 unsigned int __vlen, int __flags, struct timespec *__tmo)
2406 pid_t pid = getpid ();
2408 if (is_vcom_socket_fd (__fd))
2410 size = vcom_recvmmsg (__fd, __message, __vlen, __flags, __tmo);
2414 "'%04d'='%04d', '%p', "
2415 "'%04d', '%04x', '%p'\n",
2416 pid, (int) size, __fd, __vmessages, __vlen, __flags, __tmo);
2424 return libc_recvmmsg (__fd, __message, __vlen, __flags, __tmo);
2430 * Put the current value for socket FD's option OPTNAME
2431 * at protocol level LEVEL into OPTVAL (which is *OPTLEN bytes long),
2432 * and set *OPTLEN to the value's actual length.
2433 * Returns 0 on success, -1 for errors.
2436 vcom_getsockopt (int __fd, int __level, int __optname,
2437 void *__restrict __optval, socklen_t * __restrict __optlen)
2439 if (vcom_init () != 0)
2444 return vcom_socket_getsockopt (__fd, __level, __optname,
2445 __optval, __optlen);
2449 getsockopt (int __fd, int __level, int __optname,
2450 void *__restrict __optval, socklen_t * __restrict __optlen)
2453 pid_t pid = getpid ();
2455 if (is_vcom_socket_fd (__fd))
2457 rv = vcom_getsockopt (__fd, __level, __optname, __optval, __optlen);
2461 "'%04d'='%04d', '%04d', '%04d', "
2463 pid, rv, __fd, __level, __optname, __optval, __optlen);
2471 return libc_getsockopt (__fd, __level, __optname, __optval, __optlen);
2475 * Set socket FD's option OPTNAME at protocol level LEVEL
2476 * to *OPTVAL (which is OPTLEN bytes long).
2477 * Returns 0 on success, -1 for errors.
2480 vcom_setsockopt (int __fd, int __level, int __optname,
2481 const void *__optval, socklen_t __optlen)
2483 if (vcom_init () != 0)
2488 return vcom_socket_setsockopt (__fd, __level, __optname,
2489 __optval, __optlen);
2493 setsockopt (int __fd, int __level, int __optname,
2494 const void *__optval, socklen_t __optlen)
2497 pid_t pid = getpid ();
2499 if (is_vcom_socket_fd (__fd))
2501 rv = vcom_setsockopt (__fd, __level, __optname, __optval, __optlen);
2505 "'%04d'='%04d', '%04d', '%04d', "
2507 pid, rv, __fd, __level, __optname, __optval, __optlen);
2515 return libc_setsockopt (__fd, __level, __optname, __optval, __optlen);
2519 * Prepare to accept connections on socket FD.
2520 * N connection requests will be queued before further
2521 * requests are refused.
2522 * Returns 0 on success, -1 for errors.
2525 vcom_listen (int __fd, int __n)
2527 if (vcom_init () != 0)
2532 return vcom_socket_listen (__fd, __n);
2536 listen (int __fd, int __n)
2539 pid_t pid = getpid ();
2541 if (is_vcom_socket_fd (__fd))
2543 rv = vcom_listen (__fd, __n);
2547 "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __n);
2555 return libc_listen (__fd, __n);
2559 * Await a connection on socket FD.
2560 * When a connection arrives, open a new socket to communicate
2561 * with it, set *ADDR (which is *ADDR_LEN bytes long) to the address
2562 * of the connecting peer and *ADDR_LEN to the address's actual
2563 * length, and return the new socket's descriptor, or -1 for errors.
2564 * This function is a cancellation point and therefore not marked
2568 vcom_accept (int __fd, __SOCKADDR_ARG __addr,
2569 socklen_t * __restrict __addr_len)
2572 if (vcom_init () != 0)
2576 return vcom_socket_accept (__fd, __addr, __addr_len);
2580 accept (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len)
2583 pid_t pid = getpid ();
2584 pthread_t tid = pthread_self ();
2586 if (is_vcom_socket_fd (__fd))
2589 vcom_socket_main_show ();
2592 "[%d][%lu (0x%lx)] accept1: "
2593 "'%04d'='%04d', '%p', '%p'\n",
2594 pid, (unsigned long) tid, (unsigned long) tid,
2595 rv, __fd, __addr, __addr_len);
2596 rv = vcom_accept (__fd, __addr, __addr_len);
2599 "[%d][%lu (0x%lx)] accept2: "
2600 "'%04d'='%04d', '%p', '%p'\n",
2601 pid, (unsigned long) tid, (unsigned long) tid,
2602 rv, __fd, __addr, __addr_len);
2604 vcom_socket_main_show ();
2612 return libc_accept (__fd, __addr, __addr_len);
2616 * Similar to 'accept' but takes an additional parameter to specify
2618 * This function is a cancellation point and therefore not marked
2622 vcom_accept4 (int __fd, __SOCKADDR_ARG __addr,
2623 socklen_t * __restrict __addr_len, int __flags)
2626 if (vcom_init () != 0)
2631 return vcom_socket_accept4 (__fd, __addr, __addr_len, __flags);
2635 accept4 (int __fd, __SOCKADDR_ARG __addr,
2636 socklen_t * __restrict __addr_len, int __flags)
2639 pid_t pid = getpid ();
2642 "[%d] accept4: in the beginning... "
2643 "'%04d'='%04d', '%p', '%p', '%04x'\n",
2644 pid, rv, __fd, __addr, __addr_len, __flags);
2646 if (is_vcom_socket_fd (__fd))
2649 vcom_socket_main_show ();
2650 rv = vcom_accept4 (__fd, __addr, __addr_len, __flags);
2653 "[%d] accept4: VCL "
2654 "'%04d'='%04d', '%p', '%p', '%04x'\n",
2655 pid, rv, __fd, __addr, __addr_len, __flags);
2657 vcom_socket_main_show ();
2666 "[%d] accept4: libc "
2667 "'%04d'='%04d', '%p', '%p', '%04x'\n",
2668 pid, rv, __fd, __addr, __addr_len, __flags);
2670 return libc_accept4 (__fd, __addr, __addr_len, __flags);
2674 * Shut down all or part of the connection open on socket FD.
2675 * HOW determines what to shut down:
2676 * SHUT_RD = No more receptions;
2677 * SHUT_WR = No more transmissions;
2678 * SHUT_RDWR = No more receptions or transmissions.
2679 * Returns 0 on success, -1 for errors.
2682 vcom_shutdown (int __fd, int __how)
2684 if (vcom_init () != 0)
2688 return vcom_socket_shutdown (__fd, __how);
2692 shutdown (int __fd, int __how)
2695 pid_t pid = getpid ();
2697 if (is_vcom_socket_fd (__fd))
2699 rv = vcom_shutdown (__fd, __how);
2703 "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __how);
2711 return libc_shutdown (__fd, __how);
2715 vcom_epoll_create (int __size)
2718 if (vcom_init () != 0)
2728 /* __size argument is ignored "thereafter" */
2729 return vcom_epoll_create1 (0);
2733 * __size argument is ignored, but must be greater than zero
2736 epoll_create (int __size)
2739 pid_t pid = getpid ();
2741 rv = vcom_epoll_create (__size);
2744 "[%d] epoll_create: " "'%04d'='%04d'\n", pid, rv, __size);
2754 vcom_epoll_create1 (int __flags)
2756 if (vcom_init () != 0)
2765 if (__flags & ~EPOLL_CLOEXEC)
2769 /* __flags can be either zero or EPOLL_CLOEXEC */
2770 /* implementation */
2771 return vcom_socket_epoll_create1 (__flags);
2775 * __flags can be either zero or EPOLL_CLOEXEC
2778 epoll_create1 (int __flags)
2781 pid_t pid = getpid ();
2783 rv = vcom_epoll_create1 (__flags);
2786 "[%d] epoll_create: " "'%04d'='%08x'\n", pid, rv, __flags);
2796 ep_op_has_event (int op)
2798 return op != EPOLL_CTL_DEL;
2802 vcom_epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event)
2804 if (vcom_init () != 0)
2810 * the requested operation __op is not supported
2811 * by this interface */
2812 if (!((__op == EPOLL_CTL_ADD) ||
2813 (__op == EPOLL_CTL_MOD) || (__op == EPOLL_CTL_DEL)))
2818 /* op is ADD or MOD but event parameter is NULL */
2819 if ((ep_op_has_event (__op) && !__event))
2824 /* fd is same as epfd */
2825 /* do not permit adding an epoll file descriptor inside itself */
2831 /* implementation */
2832 return vcom_socket_epoll_ctl (__epfd, __op, __fd, __event);
2836 * implement the controller interface for epoll
2837 * that enables the insertion/removal/change of
2838 * file descriptors inside the interest set.
2841 epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event)
2844 pid_t pid = getpid ();
2846 rv = vcom_epoll_ctl (__epfd, __op, __fd, __event);
2850 "'%04d'='%04d', '%04d', '%04d'\n", pid, rv, __epfd, __op, __fd);
2860 epoll_wait (int __epfd, struct epoll_event *__events,
2861 int __maxevents, int __timeout)
2864 pid_t pid = getpid ();
2866 if (__maxevents <= 0 || __maxevents > EP_MAX_EVENTS)
2868 fprintf (stderr, "[%d] ERROR: epoll_wait() invalid maxevents %d\n",
2875 vcom_socket_epoll_pwait (__epfd, __events, __maxevents, __timeout, NULL);
2879 "'%04d'='%04d', '%p', "
2881 pid, rv, __epfd, __events, __maxevents, __timeout);
2892 epoll_pwait (int __epfd, struct epoll_event *__events,
2893 int __maxevents, int __timeout, const __sigset_t * __ss)
2896 pid_t pid = getpid ();
2898 if (__maxevents <= 0 || __maxevents > EP_MAX_EVENTS)
2904 if (is_vcom_epfd (__epfd))
2907 vcom_socket_epoll_pwait (__epfd, __events, __maxevents, __timeout,
2911 "[%d] epoll_pwait: "
2912 "'%04d'='%04d', '%p', "
2915 pid, rv, __epfd, __events, __maxevents, __timeout, __ss);
2932 /* Poll the file descriptors described by the NFDS structures starting at
2933 FDS. If TIMEOUT is nonzero and not -1, allow TIMEOUT milliseconds for
2934 an event to occur; if TIMEOUT is -1, block until an event occurs.
2935 Returns the number of file descriptors with events, zero if timed out,
2938 This function is a cancellation point and therefore not marked with
2942 vcom_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
2945 pid_t pid = getpid ();
2947 struct rlimit nofile_limit;
2948 struct pollfd vcom_fds[MAX_POLL_NFDS_DEFAULT];
2951 /* actual set of file descriptors to be monitored */
2952 nfds_t libc_nfds = 0;
2953 nfds_t vcom_nfds = 0;
2955 /* ready file descriptors
2957 * number of structures which have nonzero revents fields
2958 * in other words, descriptors with events or errors reported.
2960 /* after call to libc_poll () */
2962 /* after call to vcom_socket_poll () */
2966 /* timeout value in units of timespec */
2967 struct timespec timeout_ts;
2968 struct timespec start_time, now, end_time;
2971 /* get start_time */
2972 rv = clock_gettime (CLOCK_MONOTONIC, &start_time);
2979 /* set timeout_ts & end_time */
2982 /* set timeout_ts */
2983 timeout_ts.tv_sec = __timeout / MSEC_PER_SEC;
2984 timeout_ts.tv_nsec = (__timeout % MSEC_PER_SEC) * NSEC_PER_MSEC;
2985 set_normalized_timespec (&timeout_ts,
2986 timeout_ts.tv_sec, timeout_ts.tv_nsec);
2990 end_time = timespec_add (start_time, timeout_ts);
2994 end_time = start_time;
2998 if (vcom_init () != 0)
3004 /* validate __fds */
3011 /* validate __nfds */
3012 /*TBD: call getrlimit once when vcl-ldpreload library is init */
3013 rv = getrlimit (RLIMIT_NOFILE, &nofile_limit);
3019 if (__nfds >= nofile_limit.rlim_cur)
3026 * for the POC, it's fair to assume that nfds is less than 1024
3028 if (__nfds >= MAX_POLL_NFDS_DEFAULT)
3034 /* set revents field (output parameter)
3037 for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
3039 __fds[fds_idx].revents = 0;
3043 /* set revents field (output parameter)
3044 * to zero for user ignored fds
3046 for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
3049 * if negative fd, ignore events field
3050 * and set output parameter (revents field) to zero */
3051 if (__fds[fds_idx].fd < 0)
3053 __fds[fds_idx].revents = 0;
3059 * 00. prepare __fds and vcom_fds for polling
3060 * copy __fds to vcom_fds
3061 * 01. negate all except libc fds in __fds,
3062 * ignore user negated fds
3063 * 02. negate all except vcom_fds in vocm fds,
3064 * ignore user negated fds
3065 * ignore fd 0 by setting it to negative number
3067 memcpy (vcom_fds, __fds, sizeof (*__fds) * __nfds);
3070 for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
3072 /* ignore negative fds */
3073 if (__fds[fds_idx].fd < 0)
3079 * 00. ignore vcom fds in __fds
3080 * 01. ignore libc fds in vcom_fds,
3081 * ignore fd 0 by setting it to negative number.
3082 * as fd 0 cannot be ignored.
3084 if (is_vcom_socket_fd (__fds[fds_idx].fd) ||
3085 is_vcom_epfd (__fds[fds_idx].fd))
3087 __fds[fds_idx].fd = -__fds[fds_idx].fd;
3093 /* ignore fd 0 by setting it to negative number */
3094 if (!vcom_fds[fds_idx].fd)
3096 vcom_fds[fds_idx].fd = -1;
3098 vcom_fds[fds_idx].fd = -vcom_fds[fds_idx].fd;
3105 * poll on libc fds and vcom fds
3107 * specifying a timeout of zero causes libc_poll() and
3108 * vcom_socket_poll() to return immediately, even if no
3109 * file descriptors are ready
3117 * timeout parameter for libc_poll () set to zero
3118 * to poll on libc fds
3121 /* poll on libc fds */
3125 * a timeout of zero causes libc_poll()
3126 * to return immediately
3128 rlibc_nfds = libc_poll (__fds, __nfds, 0);
3132 "'%04d'='%08lu'\n", pid, rlibc_nfds, __nfds);
3137 goto poll_done_update_nfds;
3142 * timeout parameter for vcom_socket_poll () set to zero
3143 * to poll on vcom fds
3146 /* poll on vcom fds */
3150 * a timeout of zero causes vcom_socket_poll()
3151 * to return immediately
3153 rvcom_nfds = vcom_socket_poll (vcom_fds, __nfds, 0);
3157 "'%04d'='%08lu'\n", pid, rvcom_nfds, __nfds);
3161 goto poll_done_update_nfds;
3165 /* check if any file descriptors changed status */
3166 if ((libc_nfds && rlibc_nfds > 0) || (vcom_nfds && rvcom_nfds > 0))
3168 /* something interesting happened */
3169 rv = rlibc_nfds + rvcom_nfds;
3170 goto poll_done_update_nfds;
3173 rv = clock_gettime (CLOCK_MONOTONIC, &now);
3177 goto poll_done_update_nfds;
3181 /* block indefinitely || timeout elapsed */
3182 while ((__timeout < 0) || timespec_compare (&now, &end_time) < 0);
3184 /* timeout expired before anything interesting happened */
3187 poll_done_update_nfds:
3188 for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
3190 /* ignore negative fds in vcom_fds
3191 * 00. user negated fds
3194 if (vcom_fds[fds_idx].fd < 0)
3199 /* from here on handle positive vcom fds */
3201 * restore vcom fds to positive number in __fds
3202 * and update revents in __fds with the events
3203 * that actually occurred in vcom fds
3205 __fds[fds_idx].fd = -__fds[fds_idx].fd;
3208 __fds[fds_idx].revents = vcom_fds[fds_idx].revents;
3214 fprintf (stderr, "[%d] vpoll: " "'%04d'='%08lu'\n", pid, rv, __nfds);
3219 * 00. The field __fds[i].fd contains a file descriptor for an
3221 * If this field is negative, then the corresponding
3222 * events field is ignored and the revents field returns zero.
3223 * The field __fds[i].events is an input parameter.
3224 * The field __fds[i].revents is an output parameter.
3225 * 01. Specifying a negative value in timeout
3226 * means an infinite timeout.
3227 * Specifying a timeout of zero causes poll() to return
3228 * immediately, even if no file descriptors are ready.
3230 * NOTE: observed __nfds is less than 128 from kubecon strace files
3235 poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
3238 pid_t pid = getpid ();
3242 fprintf (stderr, "[%d] poll1: " "'%04d'='%08lu, %d, 0x%x'\n",
3243 pid, rv, __nfds, __fds[0].fd, __fds[0].events);
3244 rv = vcom_poll (__fds, __nfds, __timeout);
3246 fprintf (stderr, "[%d] poll2: " "'%04d'='%08lu, %d, 0x%x'\n",
3247 pid, rv, __nfds, __fds[0].fd, __fds[0].revents);
3257 /* Like poll, but before waiting the threads signal mask is replaced
3258 with that specified in the fourth parameter. For better usability,
3259 the timeout value is specified using a TIMESPEC object.
3261 This function is a cancellation point and therefore not marked with
3264 vcom_ppoll (struct pollfd *__fds, nfds_t __nfds,
3265 const struct timespec *__timeout, const __sigset_t * __ss)
3267 if (vcom_init () != 0)
3276 ppoll (struct pollfd *__fds, nfds_t __nfds,
3277 const struct timespec *__timeout, const __sigset_t * __ss)
3287 void CONSTRUCTOR_ATTRIBUTE vcom_constructor (void);
3289 void DESTRUCTOR_ATTRIBUTE vcom_destructor (void);
3292 vcom_constructor (void)
3294 pid_t pid = getpid ();
3296 swrap_constructor ();
3297 if (vcom_init () != 0)
3299 printf ("\n[%d] vcom_constructor...failed!\n", pid);
3303 printf ("\n[%d] vcom_constructor...done!\n", pid);
3308 * This function is called when the library is unloaded
3311 vcom_destructor (void)
3313 pid_t pid = getpid ();
3316 swrap_destructor ();
3317 printf ("\n[%d] vcom_destructor...done!\n", pid);
3322 * fd.io coding-style-patch-verification: ON
3325 * eval: (c-set-style "gnu")