2 * Copyright (c) 2016 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
22 #include <sys/resource.h>
24 #include <vcl/vcom_socket_wrapper.h>
28 #include <vcl/vppcom.h>
29 #include <vcl/vcom_socket.h>
31 /* GCC have printf type attribute check. */
32 #ifdef HAVE_FUNCTION_ATTRIBUTE_FORMAT
33 #define PRINTF_ATTRIBUTE(a,b) \
34 __attribute__ ((__format__ (__printf__, a, b)))
36 #define PRINTF_ATTRIBUTE(a,b)
37 #endif /* HAVE_FUNCTION_ATTRIBUTE_FORMAT */
39 #define HAVE_CONSTRUCTOR_ATTRIBUTE
40 #ifdef HAVE_CONSTRUCTOR_ATTRIBUTE
41 #define CONSTRUCTOR_ATTRIBUTE \
42 __attribute__ ((constructor))
44 #define CONSTRUCTOR_ATTRIBUTE
45 #endif /* HAVE_CONSTRUCTOR_ATTRIBUTE */
47 #define HAVE_DESTRUCTOR_ATTRIBUTE
48 #ifdef HAVE_DESTRUCTOR_ATTRIBUTE
49 #define DESTRUCTOR_ATTRIBUTE \
50 __attribute__ ((destructor))
52 #define DESTRUCTOR_ATTRIBUTE
55 #define HAVE_ADDRESS_SANITIZER_ATTRIBUTE
56 #ifdef HAVE_ADDRESS_SANITIZER_ATTRIBUTE
57 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE \
58 __attribute__((no_sanitize_address))
60 #define DO_NOT_SANITIZE_ADDRESS_ATTRIBUTE
63 #define VCOM_SOCKET_FD_MAX 0x10000
65 static char vcom_app_name[MAX_VCOM_APP_NAME];
68 * RETURN: 0 on success or -1 on error.
71 vcom_set_app_name (char *__app_name)
73 return snprintf (vcom_app_name, MAX_VCOM_APP_NAME, "vcom-%s-%d",
74 __app_name, getpid ()) < 0 ? -1 : 0;
80 if (vcom_app_name[0] == '\0')
82 snprintf (vcom_app_name, MAX_VCOM_APP_NAME, "vcom-app-%d", getpid ());
88 * 1 if init, 0 otherwise
90 static int is_vcom_init;
93 * TBD: Make it thread safe
97 * constructor function called before main is called
98 * RETURN: 0 on success -1 on failure
103 pid_t pid = getpid ();
107 if (vppcom_app_create (vcom_get_app_name ()) != 0)
109 printf ("\n[%d] vcom_init...failed!\n", pid);
112 "[%d] vcom_init: vppcom_app_create failed!\n", pid);
115 if (vcom_socket_main_init () != 0)
117 printf ("\n[%d] vcom_init...failed!\n", pid);
120 "[%d] vcom_init: vcom_socket_main_init failed!\n", pid);
125 printf ("\n[%d] vcom_init...done!\n", pid);
133 pid_t pid = getpid ();
137 vcom_socket_main_destroy ();
138 vppcom_app_destroy ();
140 fprintf (stderr, "\n[%d] vcom_destroy...done!\n", pid);
145 is_vcom_socket_fd (int fd)
147 return vcom_socket_is_vcom_fd (fd);
151 is_vcom_epfd (int epfd)
153 return vcom_socket_is_vcom_epfd (epfd);
159 * Generic glibc fd api
163 /* Close the file descriptor FD.
165 This function is a cancellation point and therefore
166 not marked with __THROW. */
168 * PRE: is_vcom_socket_fd(__fd) == 1
169 * RETURN: 0 on success and -1 for errors.
172 vcom_close (int __fd)
174 if (vcom_init () != 0)
179 if (vcom_socket_close (__fd) != 0)
188 * RETURN: 0 on success, or -1 on error
194 pid_t pid = getpid ();
196 if (is_vcom_socket_fd (__fd) || is_vcom_epfd (__fd))
199 fprintf (stderr, "[%d] close: fd %d\n", pid, __fd);
200 rv = vcom_close (__fd);
202 fprintf (stderr, "[%d] close: vcom_close() returned %d\n", pid, rv);
210 return libc_close (__fd);
213 /* Read NBYTES into BUF from FD. Return the
214 number read, -1 for errors or 0 for EOF.
216 This function is a cancellation point and therefore
217 not marked with __THROW. */
219 vcom_read (int __fd, void *__buf, size_t __nbytes)
221 if (vcom_init () != 0)
226 return vcom_socket_read (__fd, __buf, __nbytes);
230 read (int __fd, void *__buf, size_t __nbytes)
233 pid_t pid = getpid ();
234 pthread_t tid = pthread_self ();
236 if (is_vcom_socket_fd (__fd))
240 "[%d][%lu (0x%lx)] read:1 "
241 "'%04d'='%04d', '%p', '%04d'\n",
242 pid, (unsigned long) tid, (unsigned long) tid,
243 (int) size, __fd, __buf, (int) __nbytes);
244 size = vcom_read (__fd, __buf, __nbytes);
247 "[%d][%lu (0x%lx)] read:2 "
248 "'%04d'='%04d', '%p', '%04d'\n",
249 pid, (unsigned long) tid, (unsigned long) tid,
250 (int) size, __fd, __buf, (int) __nbytes);
258 return libc_read (__fd, __buf, __nbytes);
262 vcom_readv (int __fd, const struct iovec * __iov, int __iovcnt)
264 if (vcom_init () != 0)
269 return vcom_socket_readv (__fd, __iov, __iovcnt);
273 readv (int __fd, const struct iovec * __iov, int __iovcnt)
277 if (is_vcom_socket_fd (__fd))
279 size = vcom_readv (__fd, __iov, __iovcnt);
288 return libc_readv (__fd, __iov, __iovcnt);
291 /* Write N bytes of BUF to FD. Return the number written, or -1.
293 This function is a cancellation point and therefore
294 not marked with __THROW. */
296 vcom_write (int __fd, const void *__buf, size_t __n)
298 if (vcom_init () != 0)
303 return vcom_socket_write (__fd, (void *) __buf, __n);
307 write (int __fd, const void *__buf, size_t __n)
310 pid_t pid = getpid ();
311 pthread_t tid = pthread_self ();
313 if (is_vcom_socket_fd (__fd))
317 "[%d][%lu (0x%lx)] write:1 "
318 "'%04d'='%04d', '%p', '%04d'\n",
319 pid, (unsigned long) tid, (unsigned long) tid,
320 (int) size, __fd, __buf, (int) __n);
321 size = vcom_write (__fd, __buf, __n);
324 "[%d][%lu (0x%lx)] write:2 "
325 "'%04d'='%04d', '%p', '%04d'\n",
326 pid, (unsigned long) tid, (unsigned long) tid,
327 (int) size, __fd, __buf, (int) __n);
335 return libc_write (__fd, __buf, __n);
339 vcom_writev (int __fd, const struct iovec * __iov, int __iovcnt)
341 if (vcom_init () != 0)
346 return vcom_socket_writev (__fd, __iov, __iovcnt);
350 writev (int __fd, const struct iovec * __iov, int __iovcnt)
354 if (is_vcom_socket_fd (__fd))
356 size = vcom_writev (__fd, __iov, __iovcnt);
365 return libc_writev (__fd, __iov, __iovcnt);
368 /* Do the file control operation described by CMD on FD.
369 The remaining arguments are interpreted depending on CMD.
371 This function is a cancellation point and therefore
372 not marked with __THROW. */
374 vcom_fcntl_va (int __fd, int __cmd, va_list __ap)
376 if (vcom_init () != 0)
381 return vcom_socket_fcntl_va (__fd, __cmd, __ap);
385 vcom_fcntl (int __fd, int __cmd, ...)
390 if (is_vcom_socket_fd (__fd))
392 va_start (ap, __cmd);
393 rv = vcom_fcntl_va (__fd, __cmd, ap);
400 fcntl (int __fd, int __cmd, ...)
404 pid_t pid = getpid ();
406 va_start (ap, __cmd);
407 if (is_vcom_socket_fd (__fd))
409 rv = vcom_fcntl_va (__fd, __cmd, ap);
413 "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __cmd);
421 rv = libc_vfcntl (__fd, __cmd, ap);
429 vcom_ioctl_va (int __fd, unsigned long int __cmd, va_list __ap)
431 if (vcom_init () != 0)
436 return vcom_socket_ioctl_va (__fd, __cmd, __ap);
440 vcom_ioctl (int __fd, unsigned long int __cmd, ...)
445 if (is_vcom_socket_fd (__fd))
447 va_start (ap, __cmd);
448 rv = vcom_ioctl_va (__fd, __cmd, ap);
455 ioctl (int __fd, unsigned long int __cmd, ...)
459 pid_t pid = getpid ();
461 va_start (ap, __cmd);
462 if (is_vcom_socket_fd (__fd))
464 rv = vcom_ioctl_va (__fd, __cmd, ap);
468 "'%04d'='%04d', '%04ld'\n", pid, rv, __fd, __cmd);
476 rv = libc_vioctl (__fd, __cmd, ap);
484 * Check the first NFDS descriptors each in READFDS (if not NULL) for
485 * read readiness, in WRITEFDS (if not NULL) for write readiness,
486 * and in EXCEPTFDS (if not NULL) for exceptional conditions.
487 * If TIMEOUT is not NULL, time out after waiting the interval
488 * specified therein. Returns the number of ready descriptors,
491 * This function is a cancellation point and therefore not marked
496 * clear all vcom FDs from fd_sets __readfds, __writefds and
497 * __exceptfds and update the new nfds
499 * new nfds is the highest-numbered file descriptor
500 * in any of the three sets, plus 1
502 * Return the number of file descriptors contained in the
503 * three descriptor sets. ie. the total number of the bits
504 * that are set in __readfds, __writefds and __exceptfds
507 vcom_fd_clear (int __nfds,
509 fd_set * __restrict __readfds,
510 fd_set * __restrict __writefds,
511 fd_set * __restrict __exceptfds)
514 /* invalid max_fd is -1 */
519 /* clear all vcom fd from the sets */
520 for (fd = 0; fd < __nfds; fd++)
523 /* clear vcom fd from set */
528 if ((F) && FD_ISSET (fd, (F))) \
530 if (is_vcom_socket_fd (fd)) \
544 * compute nfd and __new_nfds
546 for (fd = 0; fd < __nfds; fd++)
553 if ((F) && FD_ISSET (fd, (F))) \
569 *__new_nfds = max_fd != -1 ? max_fd + 1 : 0;
574 * Return the number of file descriptors contained in the
575 * three descriptor sets. ie. the total number of the bits
576 * that are set in __readfds, __writefds and __exceptfds
579 vcom_fd_set (int __nfds,
582 fd_set * __restrict __readfds,
583 fd_set * __restrict __writefds, fd_set * __restrict __exceptfds,
585 fd_set * __restrict __saved_readfds,
586 fd_set * __restrict __saved_writefds,
587 fd_set * __restrict __saved_exceptfds)
590 /* invalid max_fd is -1 */
594 for (fd = 0; fd < __nfds; fd++)
601 if ((F) && (S) && FD_ISSET (fd, (S))) \
603 if (is_vcom_socket_fd (fd)) \
610 _(__saved_readfds, __readfds);
611 _(__saved_writefds, __writefds);
617 * compute nfd and __new_nfds
619 for (fd = 0; fd < __nfds; fd++)
626 if ((F) && FD_ISSET (fd, (F))) \
642 *__new_nfds = max_fd != -1 ? max_fd + 1 : 0;
647 * split select sets(src) into
648 * vcom sets(dest1) and libc sets(dest2)
652 /* src, select sets */
654 fd_set * __restrict readfds,
655 fd_set * __restrict writefds,
656 fd_set * __restrict exceptfds,
657 /* dest1, vcom sets */
659 fd_set * __restrict vcom_readfds,
660 fd_set * __restrict vcom_writefds,
661 fd_set * __restrict vcom_exceptfds, int *vcom_nfd,
662 /* dest2, libc sets */
664 fd_set * __restrict libc_readfds,
665 fd_set * __restrict libc_writefds,
666 fd_set * __restrict libc_exceptfds, int *libc_nfd)
671 /* invalid max_fd is -1 */
672 int vcom_max_fd = -1;
676 /* invalid max_fd is -1 */
677 int libc_max_fd = -1;
681 for (fd = 0; fd < nfds; fd++)
689 if ((S) && FD_ISSET (fd, (S))) \
691 if (is_vcom_socket_fd (fd)) \
696 if (fd > vcom_max_fd) \
708 if (fd > libc_max_fd) \
718 _(readfds, vcom_readfds, libc_readfds);
719 _(writefds, vcom_writefds, libc_writefds);
720 _(exceptfds, vcom_exceptfds, libc_exceptfds);
725 *vcom_nfds = vcom_max_fd != -1 ? vcom_max_fd + 1 : 0;
727 *vcom_nfd = vcom_nfd2;
729 *libc_nfds = libc_max_fd != -1 ? libc_max_fd + 1 : 0;
731 *libc_nfd = libc_nfd2;
735 * merge vcom sets(src1) and libc sets(src2)
736 * into select sets(dest)
740 /* dest, select sets */
742 fd_set * __restrict readfds,
743 fd_set * __restrict writefds,
744 fd_set * __restrict exceptfds, int *nfd,
745 /* src1, vcom sets */
747 fd_set * __restrict vcom_readfds,
748 fd_set * __restrict vcom_writefds,
749 fd_set * __restrict vcom_exceptfds, int vcom_nfd,
750 /* src2, libc sets */
752 fd_set * __restrict libc_readfds,
753 fd_set * __restrict libc_writefds,
754 fd_set * __restrict libc_exceptfds, int libc_nfd)
757 /* invalid max_fd is -1 */
764 * dest |= src at current bit index
765 * update MAX and NFD of dest fd set
771 * MAX current max_fd of dest fd sets
772 * NFD current nfd of dest fd sets
773 * N nfds of source fd set
775 #define FD_BIT_OR(FD,FS,BI, \
777 if ((FS) && (FD) && FD_ISSET ((BI), (FS))) \
779 FD_SET ((BI), (FD)); \
790 * SR,SW,SE source RWE fd sets
791 * DR,DW,DE dest RWE fd sets
793 * NFDS nfds of source fd sets
794 * MAX current max_fd of dest fd sets
795 * NFD current nfd of dest fd sets
797 #define FD_RWE_SETS_OR(DR,DW,DE, \
803 for ((BI) = 0; (BI) < (NFDS); (BI)++) \
805 FD_BIT_OR((DR), (SR), (BI), (MAX), (NFD)); \
806 FD_BIT_OR((DW), (SW), (BI), (MAX), (NFD)); \
807 FD_BIT_OR((DE), (SE), (BI), (MAX), (NFD)); \
813 /* source(vcom) to dest(select) rwe fd sets */
814 FD_RWE_SETS_OR (readfds, writefds, exceptfds,
815 vcom_readfds, vcom_writefds, vcom_exceptfds,
816 fd, vcom_nfds, max_fd, nfd2);
818 /* source(libc) to dest(select) rwe fd sets */
819 FD_RWE_SETS_OR (readfds, writefds, exceptfds,
820 libc_readfds, libc_writefds, libc_exceptfds,
821 fd, libc_nfds, max_fd, nfd2);
823 #undef FD_RWE_SETS_OR
827 *nfds = max_fd != -1 ? max_fd + 1 : 0;
833 * RETURN 1 if fds is NULL or empty. 0 otherwise
836 fd_set_iszero (fd_set * __restrict fds)
844 for (fd = 0; fd < FD_SETSIZE; fd++)
846 if (FD_ISSET (fd, fds))
862 typedef long int s64;
863 typedef unsigned long int u64;
865 typedef long long int __s64;
866 typedef unsigned long long int __u64;
868 typedef __s64 time64_t;
869 typedef __u64 timeu64_t;
871 /* Parameters used to convert the timespec values: */
872 #define MSEC_PER_SEC 1000L
873 #define USEC_PER_MSEC 1000L
874 #define NSEC_PER_USEC 1000L
875 #define NSEC_PER_MSEC 1000000L
876 #define USEC_PER_SEC 1000000L
877 #define NSEC_PER_SEC 1000000000L
878 #define FSEC_PER_SEC 1000000000000000LL
888 #define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
890 #ifdef VCOM_USE_TIMESPEC_EQUAL
892 timespec_equal (const struct timespec *a, const struct timespec *b)
894 return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec);
899 * lhs < rhs: return <0
900 * lhs == rhs: return 0
901 * lhs > rhs: return >0
904 timespec_compare (const struct timespec *lhs, const struct timespec *rhs)
906 if (lhs->tv_sec < rhs->tv_sec)
908 if (lhs->tv_sec > rhs->tv_sec)
910 return lhs->tv_nsec - rhs->tv_nsec;
913 #ifdef VCOM_USE_TIMEVAL_COMPARE
915 timeval_compare (const struct timeval *lhs, const struct timeval *rhs)
917 if (lhs->tv_sec < rhs->tv_sec)
919 if (lhs->tv_sec > rhs->tv_sec)
921 return lhs->tv_usec - rhs->tv_usec;
925 extern void set_normalized_timespec (struct timespec *ts, time_t sec,
928 static inline struct timespec
929 timespec_add (struct timespec lhs, struct timespec rhs)
931 struct timespec ts_delta;
932 set_normalized_timespec (&ts_delta, lhs.tv_sec + rhs.tv_sec,
933 lhs.tv_nsec + rhs.tv_nsec);
938 * sub = lhs - rhs, in normalized form
940 static inline struct timespec
941 timespec_sub (struct timespec lhs, struct timespec rhs)
943 struct timespec ts_delta;
944 set_normalized_timespec (&ts_delta, lhs.tv_sec - rhs.tv_sec,
945 lhs.tv_nsec - rhs.tv_nsec);
957 * set_normalized_timespec - set timespec sec and nsec parts and normalize
959 * @ts: pointer to timespec variable to be set
960 * @sec: seconds to set
961 * @nsec: nanoseconds to set
963 * Set seconds and nanoseconds field of a timespec variable and
964 * normalize to the timespec storage format
966 * Note: The tv_nsec part is always in the range of
967 * 0 <= tv_nsec < NSEC_PER_SEC
968 * For negative values only the tv_sec field is negative !
971 set_normalized_timespec (struct timespec *ts, time_t sec, s64 nsec)
973 while (nsec >= NSEC_PER_SEC)
976 * The following asm() prevents the compiler from
977 * optimising this loop into a modulo operation. See
978 * also __iter_div_u64_rem() in include/linux/time.h
980 asm ("":"+rm" (nsec));
981 nsec -= NSEC_PER_SEC;
986 asm ("":"+rm" (nsec));
987 nsec += NSEC_PER_SEC;
994 #define vcom_timerisvalid(tvp) (!((tvp)->tv_sec < 0 || (tvp)->tv_usec < 0))
996 /* Macros for converting between `struct timeval' and `struct timespec'. */
997 #define VCOM_TIMEVAL_TO_TIMESPEC(tv, ts) { \
998 (ts)->tv_sec = (tv)->tv_sec; \
999 (ts)->tv_nsec = (tv)->tv_usec * 1000; \
1001 #define VCOM_TIMESPEC_TO_TIMEVAL(tv, ts) { \
1002 (tv)->tv_sec = (ts)->tv_sec; \
1003 (tv)->tv_usec = (ts)->tv_nsec / 1000; \
1007 vcom_select_impl (int vcom_nfds, fd_set * __restrict vcom_readfds,
1008 fd_set * __restrict vcom_writefds,
1009 fd_set * __restrict vcom_exceptfds,
1010 struct timeval *__restrict timeout)
1012 return vcom_socket_select (vcom_nfds, vcom_readfds,
1013 vcom_writefds, vcom_exceptfds, timeout);
1017 vcom_select (int __nfds, fd_set * __restrict __readfds,
1018 fd_set * __restrict __writefds,
1019 fd_set * __restrict __exceptfds,
1020 struct timeval *__restrict __timeout)
1024 pid_t pid = getpid ();
1027 /* block indefinitely */
1029 int first_clock_gettime_failed = 0;
1030 /* timeout value in units of timespec */
1031 struct timespec timeout_ts;
1032 struct timespec start_time, now, end_time;
1034 /* select sets attributes - after merge */
1040 fd_set vcom_readfds;
1041 fd_set vcom_writefds;
1042 fd_set vcom_exceptfds;
1047 fd_set libc_readfds;
1048 fd_set libc_writefds;
1049 fd_set libc_exceptfds;
1053 struct timeval tv = {.tv_sec = 0,.tv_usec = 0 };
1055 /* validate __timeout */
1058 /* validate tv_sec */
1060 if (!vcom_timerisvalid (__timeout))
1066 /* validate tv_usec */
1068 /* init timeout_ts */
1069 VCOM_TIMEVAL_TO_TIMESPEC (__timeout, &timeout_ts);
1070 set_normalized_timespec (&timeout_ts,
1071 timeout_ts.tv_sec, timeout_ts.tv_nsec);
1074 rv = clock_gettime (CLOCK_MONOTONIC, &start_time);
1078 first_clock_gettime_failed = 1;
1085 if (timerisset (__timeout))
1087 end_time = timespec_add (start_time, timeout_ts);
1092 * if both fields of the timeout structure are zero,
1093 * then select returns immediately
1095 end_time = start_time;
1100 /* block indefinitely */
1106 if (vcom_init () != 0)
1112 /* validate __nfds */
1113 if (__nfds < 0 || __nfds > FD_SETSIZE)
1121 * usleep(3) emulation
1124 /* call libc_select() with a finite timeout and
1125 * no file descriptors or empty fd sets and
1128 (!__readfds || fd_set_iszero (__readfds)) &&
1129 (!__writefds || fd_set_iszero (__writefds)) &&
1130 (!__exceptfds || fd_set_iszero (__exceptfds)))
1134 rv = libc_select (__nfds,
1135 __readfds, __writefds, __exceptfds, __timeout);
1141 /* TBD: block indefinitely or return -EINVAL */
1147 /* init once before the polling loop */
1149 /* zero vcom and libc fd sets */
1163 _(__readfds, &vcom_readfds, &libc_readfds);
1164 _(__writefds, &vcom_writefds, &libc_writefds);
1165 _(__exceptfds, &vcom_exceptfds, &libc_exceptfds);
1176 /* src, select sets */
1177 __nfds, __readfds, __writefds, __exceptfds,
1178 /* dest1, vcom sets */
1179 __readfds || __writefds || __exceptfds ?
1181 __readfds ? &vcom_readfds : NULL,
1182 __writefds ? &vcom_writefds : NULL,
1183 __exceptfds ? &vcom_exceptfds : NULL,
1184 __readfds || __writefds || __exceptfds ?
1186 /* dest2, libc sets */
1187 __readfds || __writefds || __exceptfds ?
1189 __readfds ? &libc_readfds : NULL,
1190 __writefds ? &libc_writefds : NULL,
1191 __exceptfds ? &libc_exceptfds : NULL,
1192 __readfds || __writefds || __exceptfds ?
1205 * if both fields of timeval structure are zero,
1206 * vcom_select_impl and libc_select returns immediately.
1207 * useful for polling and ensure fairness among
1208 * file descriptors watched.
1215 /* select on vcom fds */
1218 vcom_nfd = vcom_select_impl (vcom_nfds,
1219 __readfds ? &vcom_readfds : NULL,
1220 __writefds ? &vcom_writefds : NULL,
1221 __exceptfds ? &vcom_exceptfds : NULL,
1225 "[%d] select vcom: "
1226 "'%04d'='%04d'\n", pid, vcom_nfd, vcom_nfds);
1234 /* select on libc fds */
1237 libc_nfd = libc_select (libc_nfds,
1238 __readfds ? &libc_readfds : NULL,
1239 __writefds ? &libc_writefds : NULL,
1240 __exceptfds ? &libc_exceptfds : NULL, &tv);
1243 "[%d] select libc: "
1244 "'%04d'='%04d'\n", pid, libc_nfd, libc_nfds);
1248 /* tv becomes undefined */
1255 /* check if any file descriptors changed status */
1256 if ((vcom_nfds && vcom_nfd > 0) || (libc_nfds && libc_nfd > 0))
1258 /* zero the sets before merge and exit */
1278 * on exit, sets are modified in place to indicate which
1279 * file descriptors actually changed status
1282 /* dest, select sets */
1284 __readfds, __writefds, __exceptfds, &new_nfd,
1285 /* src1, vcom sets */
1287 __readfds ? &vcom_readfds : NULL,
1288 __writefds ? &vcom_writefds : NULL,
1289 __exceptfds ? &vcom_exceptfds : NULL, vcom_nfd,
1290 /* src2, libc sets */
1292 __readfds ? &libc_readfds : NULL,
1293 __writefds ? &libc_writefds : NULL,
1294 __exceptfds ? &libc_exceptfds : NULL, libc_nfd);
1296 * return the number of file descriptors contained in the
1297 * three returned sets
1309 rv = new_nfd == -1 ? 0 : new_nfd;
1313 rv = clock_gettime (CLOCK_MONOTONIC, &now);
1320 while (no_timeout || timespec_compare (&now, &end_time) < 0);
1322 /* timeout expired before anything interesting happened */
1328 fprintf (stderr, "[%d] vselect1: " "'%04d'='%04d'\n", pid, rv, __nfds);
1330 * modify timeout parameter to reflect the amount of time not slept
1334 if (vcom_timerisvalid (__timeout))
1336 /* timeout expired */
1339 timerclear (__timeout);
1341 else if (!first_clock_gettime_failed)
1343 rv2 = clock_gettime (CLOCK_MONOTONIC, &now);
1350 struct timespec ts_delta;
1351 ts_delta = timespec_sub (end_time, now);
1352 VCOM_TIMESPEC_TO_TIMEVAL (__timeout, &ts_delta);
1358 fprintf (stderr, "[%d] vselect2: " "'%04d',='%04d'\n", pid, rv, __nfds);
1364 vcom_select_internal (int __nfds, fd_set * __restrict __readfds,
1365 fd_set * __restrict __writefds,
1366 fd_set * __restrict __exceptfds,
1367 struct timeval *__restrict __timeout)
1372 pid_t pid = getpid ();
1374 fd_set saved_readfds;
1375 fd_set saved_writefds;
1376 fd_set saved_exceptfds;
1378 /* validate __nfds */
1385 /* validate __timeout */
1388 /* validate tv_sec */
1390 if (__timeout->tv_sec < 0 || __timeout->tv_usec < 0)
1396 /* validate tv_usec */
1400 /* init saved_x fds */
1403 saved_readfds = *__readfds;
1405 memcpy (&saved_readfds, __readfds, sizeof (*__readfds));
1410 FD_ZERO (&saved_readfds);
1415 saved_writefds = *__writefds;
1417 memcpy (&saved_writefds, __writefds, sizeof (*__writefds));
1423 FD_ZERO (&saved_writefds);
1428 saved_exceptfds = *__exceptfds;
1430 memcpy (&saved_exceptfds, __exceptfds, sizeof (*__exceptfds));
1436 FD_ZERO (&saved_exceptfds);
1439 /* clear vcom fds */
1440 nfd = vcom_fd_clear (__nfds, &new_nfds, __readfds, __writefds, __exceptfds);
1442 /* set to an invalid value */
1444 /* have kernel fds */
1446 rv = libc_select (new_nfds, __readfds,
1447 __writefds, __exceptfds, __timeout);
1449 if (new_nfds && rv == -1)
1451 /* on error, the file descriptor sets are unmodified */
1453 *__readfds = saved_readfds;
1455 *__writefds = saved_writefds;
1457 *__exceptfds = saved_exceptfds;
1460 else if ((new_nfds && rv != -1) || (rv == -2))
1462 /* restore vcom fds */
1463 nfd = vcom_fd_set (__nfds,
1468 &saved_readfds, &saved_writefds, &saved_exceptfds);
1473 fprintf (stderr, "[%d] select: " "'%04d'='%04d'\n", pid, rv, __nfds);
1478 select (int __nfds, fd_set * __restrict __readfds,
1479 fd_set * __restrict __writefds,
1480 fd_set * __restrict __exceptfds, struct timeval *__restrict __timeout)
1483 pid_t pid = getpid ();
1486 fprintf (stderr, "[%d] select1: " "'%04d'='%04d'\n", pid, rv, __nfds);
1487 rv = vcom_select (__nfds, __readfds, __writefds, __exceptfds, __timeout);
1489 fprintf (stderr, "[%d] select2: " "'%04d'='%04d'\n", pid, rv, __nfds);
1498 #ifdef __USE_XOPEN2K
1500 * Same as above only that the TIMEOUT value is given with higher
1501 * resolution and a sigmask which is been set temporarily. This
1502 * version should be used.
1504 * This function is a cancellation point and therefore not marked
1508 vcom_pselect (int __nfds, fd_set * __restrict __readfds,
1509 fd_set * __restrict __writefds,
1510 fd_set * __restrict __exceptfds,
1511 const struct timespec *__restrict __timeout,
1512 const __sigset_t * __restrict __sigmask)
1517 for (fd = 0; fd < __nfds; fd++)
1519 if (__readfds && FD_ISSET (fd, __readfds))
1521 if (is_vcom_socket_fd (fd))
1527 if (__writefds && FD_ISSET (fd, __writefds))
1529 if (is_vcom_socket_fd (fd))
1534 if (__exceptfds && FD_ISSET (fd, __exceptfds))
1536 if (is_vcom_socket_fd (fd))
1538 FD_CLR (fd, __exceptfds);
1546 pselect (int __nfds, fd_set * __restrict __readfds,
1547 fd_set * __restrict __writefds,
1548 fd_set * __restrict __exceptfds,
1549 const struct timespec *__restrict __timeout,
1550 const __sigset_t * __restrict __sigmask)
1555 pid_t pid = getpid ();
1557 fd_set saved_readfds;
1558 fd_set saved_writefds;
1559 fd_set saved_exceptfds;
1561 /* validate __nfds */
1568 /* validate __timeout */
1571 /* validate tv_sec */
1573 if (__timeout->tv_sec < 0 || __timeout->tv_nsec < 0)
1579 /* validate tv_usec */
1583 /* init saved fds */
1586 saved_readfds = *__readfds;
1588 memcpy (&saved_readfds, __readfds, sizeof (*__readfds));
1593 FD_ZERO (&saved_readfds);
1598 saved_writefds = *__writefds;
1600 memcpy (&saved_writefds, __writefds, sizeof (*__writefds));
1606 FD_ZERO (&saved_writefds);
1611 saved_exceptfds = *__exceptfds;
1613 memcpy (&saved_exceptfds, __exceptfds, sizeof (*__exceptfds));
1619 FD_ZERO (&saved_exceptfds);
1622 /* clear vcom fds */
1623 nfd = vcom_fd_clear (__nfds, &new_nfds, __readfds, __writefds, __exceptfds);
1625 /* set to an invalid value */
1628 rv = libc_pselect (new_nfds,
1630 __writefds, __exceptfds, __timeout, __sigmask);
1632 if (new_nfds && rv == -1)
1634 /* on error, the file descriptor sets are unmodified */
1636 *__readfds = saved_readfds;
1638 *__writefds = saved_writefds;
1640 *__exceptfds = saved_exceptfds;
1643 else if ((new_nfds && rv != -1) || (rv == -2))
1645 /* restore vcom fds */
1646 nfd = vcom_fd_set (__nfds,
1651 &saved_readfds, &saved_writefds, &saved_exceptfds);
1656 fprintf (stderr, "[%d] pselect: " "'%04d'='%04d'\n", pid, rv, __nfds);
1663 * Socket specific glibc api
1667 /* Create a new socket of type TYPE in domain DOMAIN, using
1668 * protocol PROTOCOL. If PROTOCOL is zero, one is chosen
1669 * automatically. Returns a file descriptor for the new socket,
1671 * RETURN: a valid file descriptor for the new socket,
1676 vcom_socket (int __domain, int __type, int __protocol)
1678 if (vcom_init () != 0)
1683 return vcom_socket_socket (__domain, __type, __protocol);
1687 socket (int __domain, int __type, int __protocol)
1690 pid_t pid = getpid ();
1691 pthread_t tid = pthread_self ();
1693 /* handle domains implemented by vpp */
1698 /* handle types implemented by vpp */
1699 switch (__type & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
1704 vcom_socket_main_show ();
1705 rv = vcom_socket (__domain, __type, __protocol);
1708 "[%d][%lu (0x%lx)] socket: "
1709 "'%04d'= D='%04d', T='%04d', P='%04d'\n",
1710 pid, (unsigned long) tid, (unsigned long) tid,
1711 rv, __domain, __type, __protocol);
1713 vcom_socket_main_show ();
1723 goto CALL_GLIBC_SOCKET_API;
1730 goto CALL_GLIBC_SOCKET_API;
1734 CALL_GLIBC_SOCKET_API:
1735 return libc_socket (__domain, __type, __protocol);
1739 * Create two new sockets, of type TYPE in domain DOMAIN and using
1740 * protocol PROTOCOL, which are connected to each other, and put file
1741 * descriptors for them in FDS[0] and FDS[1]. If PROTOCOL is zero,
1742 * one will be chosen automatically.
1743 * Returns 0 on success, -1 for errors.
1746 vcom_socketpair (int __domain, int __type, int __protocol, int __fds[2])
1748 if (vcom_init () != 0)
1753 return vcom_socket_socketpair (__domain, __type, __protocol, __fds);
1757 socketpair (int __domain, int __type, int __protocol, int __fds[2])
1760 pid_t pid = getpid ();
1762 /* handle domains implemented by vpp */
1767 /* handle types implemented by vpp */
1772 rv = vcom_socketpair (__domain, __type, __protocol, __fds);
1776 "'%04d'= D='%04d', T='%04d', P='%04d'\n",
1777 pid, rv, __domain, __type, __protocol);
1787 goto CALL_GLIBC_SOCKET_API;
1794 goto CALL_GLIBC_SOCKET_API;
1798 CALL_GLIBC_SOCKET_API:
1799 return libc_socketpair (__domain, __type, __protocol, __fds);
1803 * Give the socket FD the local address ADDR
1804 * (which is LEN bytes long).
1807 vcom_bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
1811 if (vcom_init () != 0)
1816 /* validate __len */
1817 switch (__addr->sa_family)
1820 if (__len != sizeof (struct sockaddr_in))
1824 if (__len != sizeof (struct sockaddr_in6))
1833 /* handle domains implemented by vpp */
1834 switch (__addr->sa_family)
1838 rv = vcom_socket_bind (__fd, __addr, __len);
1851 bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
1854 pid_t pid = getpid ();
1856 if (is_vcom_socket_fd (__fd))
1859 rv = vcom_bind (__fd, __addr, __len);
1863 "'%04d'='%04d', '%p', '%04d'\n",
1864 pid, rv, __fd, __addr, __len);
1872 return libc_bind (__fd, __addr, __len);
1876 * Put the local address of FD into *ADDR and its length in *LEN.
1879 vcom_getsockname (int __fd, __SOCKADDR_ARG __addr,
1880 socklen_t * __restrict __len)
1882 if (vcom_init () != 0)
1887 return vcom_socket_getsockname (__fd, __addr, __len);
1891 getsockname (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len)
1894 pid_t pid = getpid ();
1896 if (is_vcom_socket_fd (__fd))
1898 rv = vcom_getsockname (__fd, __addr, __len);
1901 "[%d] getsockname: "
1902 "'%04d'='%04d', '%p', '%p'\n", pid, rv, __fd, __addr, __len);
1910 return libc_getsockname (__fd, __addr, __len);
1914 * Open a connection on socket FD to peer at ADDR
1915 * (which LEN bytes long). For connectionless socket types, just set
1916 * the default address to send to and the only address from which to
1917 * accept transmissions. Return 0 on success, -1 for errors.
1918 * This function is a cancellation point and therefore not marked
1922 vcom_connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
1926 if (vcom_init () != 0)
1931 /* validate __len */
1932 switch (__addr->sa_family)
1935 if (__len != INET_ADDRSTRLEN)
1939 if (__len != INET6_ADDRSTRLEN)
1948 /* handle domains implemented by vpp */
1949 switch (__addr->sa_family)
1953 rv = vcom_socket_connect (__fd, __addr, __len);
1965 connect (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
1968 pid_t pid = getpid ();
1969 pthread_t tid = pthread_self ();
1971 if (is_vcom_socket_fd (__fd))
1973 rv = vcom_connect (__fd, __addr, __len);
1976 "[%d][%lu (0x%lx)] connect: "
1977 "'%04d'='%04d', '%p', '%04d'\n",
1978 pid, (unsigned long) tid, (unsigned long) tid,
1979 rv, __fd, __addr, __len);
1988 return libc_connect (__fd, __addr, __len);
1992 * Put the address of the peer connected to socket FD into *ADDR
1993 * (which is *LEN bytes long), and its actual length into *LEN.
1996 vcom_getpeername (int __fd, __SOCKADDR_ARG __addr,
1997 socklen_t * __restrict __len)
1999 if (vcom_init () != 0)
2004 return vcom_socket_getpeername (__fd, __addr, __len);
2008 getpeername (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __len)
2011 pid_t pid = getpid ();
2013 if (is_vcom_socket_fd (__fd))
2015 rv = vcom_getpeername (__fd, __addr, __len);
2018 "[%d] getpeername: "
2019 "'%04d'='%04d', '%p', '%p'\n", pid, rv, __fd, __addr, __len);
2027 return libc_getpeername (__fd, __addr, __len);
2031 * Send N bytes of BUF to socket FD. Returns the number sent or -1.
2032 * This function is a cancellation point and therefore not marked
2036 vcom_send (int __fd, const void *__buf, size_t __n, int __flags)
2039 if (vcom_init () != 0)
2044 return vcom_socket_send (__fd, (void *) __buf, (int) __n, __flags);
2048 send (int __fd, const void *__buf, size_t __n, int __flags)
2051 pid_t pid = getpid ();
2053 if (is_vcom_socket_fd (__fd))
2055 size = vcom_send (__fd, __buf, __n, __flags);
2059 "'%04d'='%04d', '%p', '%04d', '%04x'\n",
2060 pid, (int) size, __fd, __buf, (int) __n, __flags);
2068 return libc_send (__fd, __buf, __n, __flags);
2072 sendfile (int __out_fd, int __in_fd, off_t * __offset, size_t __len)
2077 clib_warning ("[%d] __out_fd %d, __in_fd %d, __offset %p, __len %ld",
2078 getpid (), __out_fd, __in_fd, __offset, __len);
2080 if (is_vcom_socket_fd (__out_fd))
2082 /* TBD: refactor this check to be part of is_vcom_socket_fd() */
2083 if (vcom_init () != 0)
2086 size = vcom_socket_sendfile (__out_fd, __in_fd, __offset, __len);
2088 clib_warning ("[%d] vcom_socket_sendfile (out_fd %d, in_fd %d, "
2089 "offset %p (%ld), len %lu) returned %ld",
2090 getpid (), __out_fd, __in_fd, __offset,
2091 __offset ? *__offset : -1, __len, size);
2100 clib_warning ("[%d] calling libc_sendfile!", getpid ());
2101 return libc_sendfile (__out_fd, __in_fd, __offset, __len);
2105 sendfile64 (int __out_fd, int __in_fd, off_t * __offset, size_t __len)
2107 return sendfile (__out_fd, __in_fd, __offset, __len);
2112 * Read N bytes into BUF from socket FD.
2113 * Returns the number read or -1 for errors.
2114 * This function is a cancellation point and therefore not marked
2118 vcom_recv (int __fd, void *__buf, size_t __n, int __flags)
2120 if (vcom_init () != 0)
2125 return vcom_socket_recv (__fd, __buf, __n, __flags);
2129 recv (int __fd, void *__buf, size_t __n, int __flags)
2132 pid_t pid = getpid ();
2134 if (is_vcom_socket_fd (__fd))
2136 size = vcom_recv (__fd, __buf, __n, __flags);
2140 "'%04d'='%04d', '%p', '%04d', '%04x'\n",
2141 pid, (int) size, __fd, __buf, (int) __n, __flags);
2149 return libc_recv (__fd, __buf, __n, __flags);
2153 * Send N bytes of BUF on socket FD to peer at address ADDR (which is
2154 * ADDR_LEN bytes long). Returns the number sent, or -1 for errors.
2155 * This function is a cancellation point and therefore not marked
2159 vcom_sendto (int __fd, const void *__buf, size_t __n, int __flags,
2160 __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len)
2162 if (vcom_init () != 0)
2167 return vcom_socket_sendto (__fd, __buf, __n, __flags, __addr, __addr_len);
2171 sendto (int __fd, const void *__buf, size_t __n, int __flags,
2172 __CONST_SOCKADDR_ARG __addr, socklen_t __addr_len)
2175 pid_t pid = getpid ();
2177 if (is_vcom_socket_fd (__fd))
2179 size = vcom_sendto (__fd, __buf, __n, __flags, __addr, __addr_len);
2183 "'%04d'='%04d', '%p', '%04d', '%04x', "
2185 pid, (int) size, __fd, __buf, (int) __n, __flags,
2186 __addr, __addr_len);
2194 return libc_sendto (__fd, __buf, __n, __flags, __addr, __addr_len);
2198 * Read N bytes into BUF through socket FD.
2199 * If ADDR is not NULL, fill in *ADDR_LEN bytes of it with the
2200 * address of the sender, and store the actual size of the address
2202 * Returns the number of bytes read or -1 for errors.
2203 * This function is a cancellation point and therefore not marked
2207 vcom_recvfrom (int __fd, void *__restrict __buf, size_t __n,
2209 __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len)
2211 if (vcom_init () != 0)
2216 return vcom_socket_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len);
2220 recvfrom (int __fd, void *__restrict __buf, size_t __n,
2222 __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len)
2225 pid_t pid = getpid ();
2227 if (is_vcom_socket_fd (__fd))
2229 size = vcom_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len);
2233 "'%04d'='%04d', '%p', '%04d', '%04x', "
2235 pid, (int) size, __fd, __buf, (int) __n, __flags,
2236 __addr, __addr_len);
2244 return libc_recvfrom (__fd, __buf, __n, __flags, __addr, __addr_len);
2248 * Send a message described MESSAGE on socket FD.
2249 * Returns the number of bytes sent, or -1 for errors.
2250 * This function is a cancellation point and therefore not marked
2254 vcom_sendmsg (int __fd, const struct msghdr * __message, int __flags)
2256 if (vcom_init () != 0)
2261 return vcom_socket_sendmsg (__fd, __message, __flags);
2265 sendmsg (int __fd, const struct msghdr * __message, int __flags)
2268 pid_t pid = getpid ();
2270 if (is_vcom_socket_fd (__fd))
2272 size = vcom_sendmsg (__fd, __message, __flags);
2276 "'%04d'='%04d', '%p', '%04x'\n",
2277 pid, (int) size, __fd, __message, __flags);
2285 return libc_sendmsg (__fd, __message, __flags);
2290 * Send a VLEN messages as described by VMESSAGES to socket FD.
2291 * Returns the number of datagrams successfully written
2293 * This function is a cancellation point and therefore not marked
2297 vcom_sendmmsg (int __fd, struct mmsghdr *__vmessages,
2298 unsigned int __vlen, int __flags)
2300 if (vcom_init () != 0)
2305 return vcom_socket_sendmmsg (__fd, __message, __vlen, __flags);
2309 sendmmsg (int __fd, struct mmsghdr *__vmessages,
2310 unsigned int __vlen, int __flags)
2313 pid_t pid = getpid ();
2315 if (is_vcom_socket_fd (__fd))
2317 size = vcom_sendmmsg (__fd, __message, __vlen, __flags);
2321 "'%04d'='%04d', '%p', '%04d', '%04x'\n",
2322 pid, (int) size, __fd, __vmessages, __vlen, __flags);
2330 return libc_sendmmsg (__fd, __message, __vlen, __flags);
2336 * Receive a message as described by MESSAGE from socket FD.
2337 * Returns the number of bytes read or -1 for errors.
2338 * This function is a cancellation point and therefore not marked
2342 vcom_recvmsg (int __fd, struct msghdr * __message, int __flags)
2344 if (vcom_init () != 0)
2349 return vcom_socket_recvmsg (__fd, __message, __flags);
2353 recvmsg (int __fd, struct msghdr * __message, int __flags)
2356 pid_t pid = getpid ();
2358 if (is_vcom_socket_fd (__fd))
2360 size = vcom_recvmsg (__fd, __message, __flags);
2364 "'%04d'='%04d', '%p', '%04x'\n",
2365 pid, (int) size, __fd, __message, __flags);
2373 return libc_recvmsg (__fd, __message, __flags);
2378 * Receive up to VLEN messages as described by VMESSAGES from socket FD.
2379 * Returns the number of messages received or -1 for errors.
2380 * This function is a cancellation point and therefore not marked
2384 vcom_recvmmsg (int __fd, struct mmsghdr *__vmessages,
2385 unsigned int __vlen, int __flags, struct timespec *__tmo)
2387 if (vcom_init () != 0)
2392 return vcom_socket_recvmmsg (__fd, __message, __vlen, __flags, __tmo);
2396 recvmmsg (int __fd, struct mmsghdr *__vmessages,
2397 unsigned int __vlen, int __flags, struct timespec *__tmo)
2400 pid_t pid = getpid ();
2402 if (is_vcom_socket_fd (__fd))
2404 size = vcom_recvmmsg (__fd, __message, __vlen, __flags, __tmo);
2408 "'%04d'='%04d', '%p', "
2409 "'%04d', '%04x', '%p'\n",
2410 pid, (int) size, __fd, __vmessages, __vlen, __flags, __tmo);
2418 return libc_recvmmsg (__fd, __message, __vlen, __flags, __tmo);
2424 * Put the current value for socket FD's option OPTNAME
2425 * at protocol level LEVEL into OPTVAL (which is *OPTLEN bytes long),
2426 * and set *OPTLEN to the value's actual length.
2427 * Returns 0 on success, -1 for errors.
2430 vcom_getsockopt (int __fd, int __level, int __optname,
2431 void *__restrict __optval, socklen_t * __restrict __optlen)
2433 if (vcom_init () != 0)
2438 return vcom_socket_getsockopt (__fd, __level, __optname,
2439 __optval, __optlen);
2443 getsockopt (int __fd, int __level, int __optname,
2444 void *__restrict __optval, socklen_t * __restrict __optlen)
2447 pid_t pid = getpid ();
2449 if (is_vcom_socket_fd (__fd))
2451 rv = vcom_getsockopt (__fd, __level, __optname, __optval, __optlen);
2455 "'%04d'='%04d', '%04d', '%04d', "
2457 pid, rv, __fd, __level, __optname, __optval, __optlen);
2465 return libc_getsockopt (__fd, __level, __optname, __optval, __optlen);
2469 * Set socket FD's option OPTNAME at protocol level LEVEL
2470 * to *OPTVAL (which is OPTLEN bytes long).
2471 * Returns 0 on success, -1 for errors.
2474 vcom_setsockopt (int __fd, int __level, int __optname,
2475 const void *__optval, socklen_t __optlen)
2477 if (vcom_init () != 0)
2482 return vcom_socket_setsockopt (__fd, __level, __optname,
2483 __optval, __optlen);
2487 setsockopt (int __fd, int __level, int __optname,
2488 const void *__optval, socklen_t __optlen)
2491 pid_t pid = getpid ();
2493 if (is_vcom_socket_fd (__fd))
2495 rv = vcom_setsockopt (__fd, __level, __optname, __optval, __optlen);
2499 "'%04d'='%04d', '%04d', '%04d', "
2501 pid, rv, __fd, __level, __optname, __optval, __optlen);
2509 return libc_setsockopt (__fd, __level, __optname, __optval, __optlen);
2513 * Prepare to accept connections on socket FD.
2514 * N connection requests will be queued before further
2515 * requests are refused.
2516 * Returns 0 on success, -1 for errors.
2519 vcom_listen (int __fd, int __n)
2521 if (vcom_init () != 0)
2526 return vcom_socket_listen (__fd, __n);
2530 listen (int __fd, int __n)
2533 pid_t pid = getpid ();
2535 if (is_vcom_socket_fd (__fd))
2537 rv = vcom_listen (__fd, __n);
2541 "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __n);
2549 return libc_listen (__fd, __n);
2553 * Await a connection on socket FD.
2554 * When a connection arrives, open a new socket to communicate
2555 * with it, set *ADDR (which is *ADDR_LEN bytes long) to the address
2556 * of the connecting peer and *ADDR_LEN to the address's actual
2557 * length, and return the new socket's descriptor, or -1 for errors.
2558 * This function is a cancellation point and therefore not marked
2562 vcom_accept (int __fd, __SOCKADDR_ARG __addr,
2563 socklen_t * __restrict __addr_len)
2566 if (vcom_init () != 0)
2570 return vcom_socket_accept (__fd, __addr, __addr_len);
2574 accept (int __fd, __SOCKADDR_ARG __addr, socklen_t * __restrict __addr_len)
2577 pid_t pid = getpid ();
2578 pthread_t tid = pthread_self ();
2580 if (is_vcom_socket_fd (__fd))
2583 vcom_socket_main_show ();
2586 "[%d][%lu (0x%lx)] accept1: "
2587 "'%04d'='%04d', '%p', '%p'\n",
2588 pid, (unsigned long) tid, (unsigned long) tid,
2589 rv, __fd, __addr, __addr_len);
2590 rv = vcom_accept (__fd, __addr, __addr_len);
2593 "[%d][%lu (0x%lx)] accept2: "
2594 "'%04d'='%04d', '%p', '%p'\n",
2595 pid, (unsigned long) tid, (unsigned long) tid,
2596 rv, __fd, __addr, __addr_len);
2598 vcom_socket_main_show ();
2606 return libc_accept (__fd, __addr, __addr_len);
2610 * Similar to 'accept' but takes an additional parameter to specify
2612 * This function is a cancellation point and therefore not marked
2616 vcom_accept4 (int __fd, __SOCKADDR_ARG __addr,
2617 socklen_t * __restrict __addr_len, int __flags)
2620 if (vcom_init () != 0)
2625 return vcom_socket_accept4 (__fd, __addr, __addr_len, __flags);
2629 accept4 (int __fd, __SOCKADDR_ARG __addr,
2630 socklen_t * __restrict __addr_len, int __flags)
2633 pid_t pid = getpid ();
2636 "[%d] accept4: in the beginning... "
2637 "'%04d'='%04d', '%p', '%p', '%04x'\n",
2638 pid, rv, __fd, __addr, __addr_len, __flags);
2640 if (is_vcom_socket_fd (__fd))
2643 vcom_socket_main_show ();
2644 rv = vcom_accept4 (__fd, __addr, __addr_len, __flags);
2647 "[%d] accept4: VCL "
2648 "'%04d'='%04d', '%p', '%p', '%04x'\n",
2649 pid, rv, __fd, __addr, __addr_len, __flags);
2651 vcom_socket_main_show ();
2660 "[%d] accept4: libc "
2661 "'%04d'='%04d', '%p', '%p', '%04x'\n",
2662 pid, rv, __fd, __addr, __addr_len, __flags);
2664 return libc_accept4 (__fd, __addr, __addr_len, __flags);
2668 * Shut down all or part of the connection open on socket FD.
2669 * HOW determines what to shut down:
2670 * SHUT_RD = No more receptions;
2671 * SHUT_WR = No more transmissions;
2672 * SHUT_RDWR = No more receptions or transmissions.
2673 * Returns 0 on success, -1 for errors.
2676 vcom_shutdown (int __fd, int __how)
2678 if (vcom_init () != 0)
2682 return vcom_socket_shutdown (__fd, __how);
2686 shutdown (int __fd, int __how)
2689 pid_t pid = getpid ();
2691 if (is_vcom_socket_fd (__fd))
2693 rv = vcom_shutdown (__fd, __how);
2697 "'%04d'='%04d', '%04d'\n", pid, rv, __fd, __how);
2705 return libc_shutdown (__fd, __how);
2709 vcom_epoll_create (int __size)
2712 if (vcom_init () != 0)
2722 /* __size argument is ignored "thereafter" */
2723 return vcom_epoll_create1 (0);
2727 * __size argument is ignored, but must be greater than zero
2730 epoll_create (int __size)
2733 pid_t pid = getpid ();
2735 rv = vcom_epoll_create (__size);
2738 "[%d] epoll_create: " "'%04d'='%04d'\n", pid, rv, __size);
2748 vcom_epoll_create1 (int __flags)
2750 if (vcom_init () != 0)
2759 if (__flags & ~EPOLL_CLOEXEC)
2763 /* __flags can be either zero or EPOLL_CLOEXEC */
2764 /* implementation */
2765 return vcom_socket_epoll_create1 (__flags);
2769 * __flags can be either zero or EPOLL_CLOEXEC
2772 epoll_create1 (int __flags)
2775 pid_t pid = getpid ();
2777 rv = vcom_epoll_create1 (__flags);
2780 "[%d] epoll_create: " "'%04d'='%08x'\n", pid, rv, __flags);
2790 ep_op_has_event (int op)
2792 return op != EPOLL_CTL_DEL;
2796 vcom_epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event)
2798 if (vcom_init () != 0)
2804 * the requested operation __op is not supported
2805 * by this interface */
2806 if (!((__op == EPOLL_CTL_ADD) ||
2807 (__op == EPOLL_CTL_MOD) || (__op == EPOLL_CTL_DEL)))
2812 /* op is ADD or MOD but event parameter is NULL */
2813 if ((ep_op_has_event (__op) && !__event))
2818 /* fd is same as epfd */
2819 /* do not permit adding an epoll file descriptor inside itself */
2825 /* implementation */
2826 return vcom_socket_epoll_ctl (__epfd, __op, __fd, __event);
2830 * implement the controller interface for epoll
2831 * that enables the insertion/removal/change of
2832 * file descriptors inside the interest set.
2835 epoll_ctl (int __epfd, int __op, int __fd, struct epoll_event *__event)
2838 pid_t pid = getpid ();
2840 rv = vcom_epoll_ctl (__epfd, __op, __fd, __event);
2844 "'%04d'='%04d', '%04d', '%04d'\n", pid, rv, __epfd, __op, __fd);
2854 epoll_wait (int __epfd, struct epoll_event *__events,
2855 int __maxevents, int __timeout)
2858 pid_t pid = getpid ();
2860 if (__maxevents <= 0 || __maxevents > EP_MAX_EVENTS)
2862 fprintf (stderr, "[%d] ERROR: epoll_wait() invalid maxevents %d\n",
2869 vcom_socket_epoll_pwait (__epfd, __events, __maxevents, __timeout, NULL);
2873 "'%04d'='%04d', '%p', "
2875 pid, rv, __epfd, __events, __maxevents, __timeout);
2886 epoll_pwait (int __epfd, struct epoll_event *__events,
2887 int __maxevents, int __timeout, const __sigset_t * __ss)
2890 pid_t pid = getpid ();
2892 if (__maxevents <= 0 || __maxevents > EP_MAX_EVENTS)
2898 if (is_vcom_epfd (__epfd))
2901 vcom_socket_epoll_pwait (__epfd, __events, __maxevents, __timeout,
2905 "[%d] epoll_pwait: "
2906 "'%04d'='%04d', '%p', "
2909 pid, rv, __epfd, __events, __maxevents, __timeout, __ss);
2926 /* Poll the file descriptors described by the NFDS structures starting at
2927 FDS. If TIMEOUT is nonzero and not -1, allow TIMEOUT milliseconds for
2928 an event to occur; if TIMEOUT is -1, block until an event occurs.
2929 Returns the number of file descriptors with events, zero if timed out,
2932 This function is a cancellation point and therefore not marked with
2936 vcom_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
2939 pid_t pid = getpid ();
2941 struct rlimit nofile_limit;
2942 struct pollfd vcom_fds[MAX_POLL_NFDS_DEFAULT];
2945 /* actual set of file descriptors to be monitored */
2946 nfds_t libc_nfds = 0;
2947 nfds_t vcom_nfds = 0;
2949 /* ready file descriptors
2951 * number of structures which have nonzero revents fields
2952 * in other words, descriptors with events or errors reported.
2954 /* after call to libc_poll () */
2956 /* after call to vcom_socket_poll () */
2960 /* timeout value in units of timespec */
2961 struct timespec timeout_ts;
2962 struct timespec start_time, now, end_time;
2965 /* get start_time */
2966 rv = clock_gettime (CLOCK_MONOTONIC, &start_time);
2973 /* set timeout_ts & end_time */
2976 /* set timeout_ts */
2977 timeout_ts.tv_sec = __timeout / MSEC_PER_SEC;
2978 timeout_ts.tv_nsec = (__timeout % MSEC_PER_SEC) * NSEC_PER_MSEC;
2979 set_normalized_timespec (&timeout_ts,
2980 timeout_ts.tv_sec, timeout_ts.tv_nsec);
2984 end_time = timespec_add (start_time, timeout_ts);
2988 end_time = start_time;
2992 if (vcom_init () != 0)
2998 /* validate __fds */
3005 /* validate __nfds */
3006 /*TBD: call getrlimit once when vcl-ldpreload library is init */
3007 rv = getrlimit (RLIMIT_NOFILE, &nofile_limit);
3013 if (__nfds >= nofile_limit.rlim_cur)
3020 * for the POC, it's fair to assume that nfds is less than 1024
3022 if (__nfds >= MAX_POLL_NFDS_DEFAULT)
3028 /* set revents field (output parameter)
3031 for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
3033 __fds[fds_idx].revents = 0;
3037 /* set revents field (output parameter)
3038 * to zero for user ignored fds
3040 for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
3043 * if negative fd, ignore events field
3044 * and set output parameter (revents field) to zero */
3045 if (__fds[fds_idx].fd < 0)
3047 __fds[fds_idx].revents = 0;
3053 * 00. prepare __fds and vcom_fds for polling
3054 * copy __fds to vcom_fds
3055 * 01. negate all except libc fds in __fds,
3056 * ignore user negated fds
3057 * 02. negate all except vcom_fds in vocm fds,
3058 * ignore user negated fds
3059 * ignore fd 0 by setting it to negative number
3061 memcpy (vcom_fds, __fds, sizeof (*__fds) * __nfds);
3064 for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
3066 /* ignore negative fds */
3067 if (__fds[fds_idx].fd < 0)
3073 * 00. ignore vcom fds in __fds
3074 * 01. ignore libc fds in vcom_fds,
3075 * ignore fd 0 by setting it to negative number.
3076 * as fd 0 cannot be ignored.
3078 if (is_vcom_socket_fd (__fds[fds_idx].fd) ||
3079 is_vcom_epfd (__fds[fds_idx].fd))
3081 __fds[fds_idx].fd = -__fds[fds_idx].fd;
3087 /* ignore fd 0 by setting it to negative number */
3088 if (!vcom_fds[fds_idx].fd)
3090 vcom_fds[fds_idx].fd = -1;
3092 vcom_fds[fds_idx].fd = -vcom_fds[fds_idx].fd;
3099 * poll on libc fds and vcom fds
3101 * specifying a timeout of zero causes libc_poll() and
3102 * vcom_socket_poll() to return immediately, even if no
3103 * file descriptors are ready
3111 * timeout parameter for libc_poll () set to zero
3112 * to poll on libc fds
3115 /* poll on libc fds */
3119 * a timeout of zero causes libc_poll()
3120 * to return immediately
3122 rlibc_nfds = libc_poll (__fds, __nfds, 0);
3126 "'%04d'='%08lu'\n", pid, rlibc_nfds, __nfds);
3131 goto poll_done_update_nfds;
3136 * timeout parameter for vcom_socket_poll () set to zero
3137 * to poll on vcom fds
3140 /* poll on vcom fds */
3144 * a timeout of zero causes vcom_socket_poll()
3145 * to return immediately
3147 rvcom_nfds = vcom_socket_poll (vcom_fds, __nfds, 0);
3151 "'%04d'='%08lu'\n", pid, rvcom_nfds, __nfds);
3155 goto poll_done_update_nfds;
3159 /* check if any file descriptors changed status */
3160 if ((libc_nfds && rlibc_nfds > 0) || (vcom_nfds && rvcom_nfds > 0))
3162 /* something interesting happened */
3163 rv = rlibc_nfds + rvcom_nfds;
3164 goto poll_done_update_nfds;
3167 rv = clock_gettime (CLOCK_MONOTONIC, &now);
3171 goto poll_done_update_nfds;
3175 /* block indefinitely || timeout elapsed */
3176 while ((__timeout < 0) || timespec_compare (&now, &end_time) < 0);
3178 /* timeout expired before anything interesting happened */
3181 poll_done_update_nfds:
3182 for (fds_idx = 0; fds_idx < __nfds; fds_idx++)
3184 /* ignore negative fds in vcom_fds
3185 * 00. user negated fds
3188 if (vcom_fds[fds_idx].fd < 0)
3193 /* from here on handle positive vcom fds */
3195 * restore vcom fds to positive number in __fds
3196 * and update revents in __fds with the events
3197 * that actually occurred in vcom fds
3199 __fds[fds_idx].fd = -__fds[fds_idx].fd;
3202 __fds[fds_idx].revents = vcom_fds[fds_idx].revents;
3208 fprintf (stderr, "[%d] vpoll: " "'%04d'='%08lu'\n", pid, rv, __nfds);
3213 * 00. The field __fds[i].fd contains a file descriptor for an
3215 * If this field is negative, then the corresponding
3216 * events field is ignored and the revents field returns zero.
3217 * The field __fds[i].events is an input parameter.
3218 * The field __fds[i].revents is an output parameter.
3219 * 01. Specifying a negative value in timeout
3220 * means an infinite timeout.
3221 * Specifying a timeout of zero causes poll() to return
3222 * immediately, even if no file descriptors are ready.
3224 * NOTE: observed __nfds is less than 128 from kubecon strace files
3229 poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
3232 pid_t pid = getpid ();
3236 fprintf (stderr, "[%d] poll1: " "'%04d'='%08lu, %d, 0x%x'\n",
3237 pid, rv, __nfds, __fds[0].fd, __fds[0].events);
3238 rv = vcom_poll (__fds, __nfds, __timeout);
3240 fprintf (stderr, "[%d] poll2: " "'%04d'='%08lu, %d, 0x%x'\n",
3241 pid, rv, __nfds, __fds[0].fd, __fds[0].revents);
3251 /* Like poll, but before waiting the threads signal mask is replaced
3252 with that specified in the fourth parameter. For better usability,
3253 the timeout value is specified using a TIMESPEC object.
3255 This function is a cancellation point and therefore not marked with
3258 vcom_ppoll (struct pollfd *__fds, nfds_t __nfds,
3259 const struct timespec *__timeout, const __sigset_t * __ss)
3261 if (vcom_init () != 0)
3270 ppoll (struct pollfd *__fds, nfds_t __nfds,
3271 const struct timespec *__timeout, const __sigset_t * __ss)
3281 void CONSTRUCTOR_ATTRIBUTE vcom_constructor (void);
3283 void DESTRUCTOR_ATTRIBUTE vcom_destructor (void);
3286 vcom_constructor (void)
3288 pid_t pid = getpid ();
3290 swrap_constructor ();
3291 if (vcom_init () != 0)
3293 printf ("\n[%d] vcom_constructor...failed!\n", pid);
3297 printf ("\n[%d] vcom_constructor...done!\n", pid);
3302 * This function is called when the library is unloaded
3305 vcom_destructor (void)
3307 pid_t pid = getpid ();
3310 swrap_destructor ();
3311 printf ("\n[%d] vcom_destructor...done!\n", pid);
3316 * fd.io coding-style-patch-verification: ON
3319 * eval: (c-set-style "gnu")