2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 Copyright (c) 2001, 2002, 2003, 2005 Eliot Dresselhaus
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
39 #include <string.h> /* strchr */
42 #include <sys/types.h>
43 #include <sys/socket.h>
46 #include <netinet/in.h>
47 #include <arpa/inet.h>
52 #include <vppinfra/mem.h>
53 #include <vppinfra/vec.h>
54 #include <vppinfra/socket.h>
55 #include <vppinfra/linux/netns.h>
56 #include <vppinfra/format.h>
57 #include <vppinfra/error.h>
60 /* IPPORT_USERRESERVED is not part of musl libc. */
61 #define IPPORT_USERRESERVED 5000
65 clib_socket_tx_add_formatted (clib_socket_t * s, char *fmt, ...)
69 clib_socket_tx_add_va_formatted (s, fmt, &va);
73 /* Return and bind to an unused port. */
75 find_free_port (word sock)
79 for (port = IPPORT_USERRESERVED; port < 1 << 16; port++)
83 clib_memset (&a, 0, sizeof (a)); /* Warnings be gone */
85 a.sin_family = PF_INET;
86 a.sin_addr.s_addr = INADDR_ANY;
87 a.sin_port = htons (port);
89 if (bind (sock, (struct sockaddr *) &a, sizeof (a)) >= 0)
93 return port < 1 << 16 ? port : -1;
97 default_socket_write (clib_socket_t * s)
99 clib_error_t *err = 0;
106 /* Map standard input to standard output.
107 Typically, fd is a socket for which read/write both work. */
111 tx_len = vec_len (s->tx_buffer);
112 written = write (fd, s->tx_buffer, tx_len);
114 /* Ignore certain errors. */
115 if (written < 0 && !unix_error_is_fatal (errno))
118 /* A "real" error occurred. */
121 err = clib_error_return_unix (0, "write %wd bytes (fd %d, '%s')",
122 tx_len, s->fd, s->config);
123 vec_free (s->tx_buffer);
127 /* Reclaim the transmitted part of the tx buffer on successful writes. */
128 else if (written > 0)
130 if (written == tx_len)
131 vec_set_len (s->tx_buffer, 0);
133 vec_delete (s->tx_buffer, written, 0);
136 /* If a non-fatal error occurred AND
137 the buffer is full, then we must free it. */
138 else if (written == 0 && tx_len > 64 * 1024)
140 vec_free (s->tx_buffer);
147 static clib_error_t *
148 default_socket_read (clib_socket_t * sock, int n_bytes)
153 /* RX side of socket is down once end of file is reached. */
154 if (sock->rx_end_of_file)
159 n_bytes = clib_max (n_bytes, 4096);
160 vec_add2 (sock->rx_buffer, buf, n_bytes);
162 if ((n_read = read (fd, buf, n_bytes)) < 0)
166 /* Ignore certain errors. */
167 if (!unix_error_is_fatal (errno))
170 return clib_error_return_unix (0, "read %d bytes (fd %d, '%s')",
171 n_bytes, sock->fd, sock->config);
174 /* Other side closed the socket. */
176 sock->rx_end_of_file = 1;
179 vec_inc_len (sock->rx_buffer, n_read - n_bytes);
184 static clib_error_t *
185 default_socket_close (clib_socket_t * s)
187 if (close (s->fd) < 0)
188 return clib_error_return_unix (0, "close (fd %d, %s)", s->fd, s->config);
192 static clib_error_t *
193 default_socket_sendmsg (clib_socket_t * s, void *msg, int msglen,
194 int fds[], int num_fds)
196 struct msghdr mh = { 0 };
198 char ctl[CMSG_SPACE (sizeof (int) * num_fds)];
201 iov[0].iov_base = msg;
202 iov[0].iov_len = msglen;
208 struct cmsghdr *cmsg;
209 clib_memset (&ctl, 0, sizeof (ctl));
210 mh.msg_control = ctl;
211 mh.msg_controllen = sizeof (ctl);
212 cmsg = CMSG_FIRSTHDR (&mh);
213 cmsg->cmsg_len = CMSG_LEN (sizeof (int) * num_fds);
214 cmsg->cmsg_level = SOL_SOCKET;
215 cmsg->cmsg_type = SCM_RIGHTS;
216 memcpy (CMSG_DATA (cmsg), fds, sizeof (int) * num_fds);
218 rv = sendmsg (s->fd, &mh, 0);
220 return clib_error_return_unix (0, "sendmsg");
225 static clib_error_t *
226 default_socket_recvmsg (clib_socket_t * s, void *msg, int msglen,
227 int fds[], int num_fds)
230 char ctl[CMSG_SPACE (sizeof (int) * num_fds) +
231 CMSG_SPACE (sizeof (struct ucred))];
232 struct ucred *cr = 0;
234 char ctl[CMSG_SPACE (sizeof (int) * num_fds)];
236 struct msghdr mh = { 0 };
239 struct cmsghdr *cmsg;
241 iov[0].iov_base = msg;
242 iov[0].iov_len = msglen;
245 mh.msg_control = ctl;
246 mh.msg_controllen = sizeof (ctl);
248 clib_memset (ctl, 0, sizeof (ctl));
250 /* receive the incoming message */
251 size = recvmsg (s->fd, &mh, 0);
254 return (size == 0) ? clib_error_return (0, "disconnected") :
255 clib_error_return_unix (0, "recvmsg: malformed message (fd %d, '%s')",
259 cmsg = CMSG_FIRSTHDR (&mh);
262 if (cmsg->cmsg_level == SOL_SOCKET)
265 if (cmsg->cmsg_type == SCM_CREDENTIALS)
267 cr = (struct ucred *) CMSG_DATA (cmsg);
274 if (cmsg->cmsg_type == SCM_RIGHTS)
276 clib_memcpy_fast (fds, CMSG_DATA (cmsg),
277 num_fds * sizeof (int));
280 cmsg = CMSG_NXTHDR (&mh, cmsg);
286 socket_init_funcs (clib_socket_t * s)
289 s->write_func = default_socket_write;
291 s->read_func = default_socket_read;
293 s->close_func = default_socket_close;
294 if (!s->sendmsg_func)
295 s->sendmsg_func = default_socket_sendmsg;
296 if (!s->recvmsg_func)
297 s->recvmsg_func = default_socket_recvmsg;
304 clib_socket_type_t type;
307 } clib_socket_type_data[] = {
310 .type = CLIB_SOCKET_TYPE_UNIX,
315 .type = CLIB_SOCKET_TYPE_INET,
317 { .prefix = "abstract:",
319 .type = CLIB_SOCKET_TYPE_LINUX_ABSTRACT,
324 .type = CLIB_SOCKET_TYPE_UNIX,
329 .type = CLIB_SOCKET_TYPE_INET,
334 .type = CLIB_SOCKET_TYPE_UNIX,
340 _clib_socket_get_string (char **p, int is_hostname)
368 clib_socket_prefix_is_valid (char *s)
370 for (typeof (clib_socket_type_data[0]) *d = clib_socket_type_data;
371 d - clib_socket_type_data < ARRAY_LEN (clib_socket_type_data); d++)
372 if (d->skip_prefix && strncmp (s, d->prefix, strlen (d->prefix)) == 0)
377 __clib_export clib_error_t *
378 clib_socket_init (clib_socket_t *s)
380 struct sockaddr_un su = { .sun_family = AF_UNIX };
381 struct sockaddr_in si = { .sin_family = AF_INET };
382 struct sockaddr *sa = 0;
383 typeof (clib_socket_type_data[0]) *data = 0;
384 socklen_t addr_len = 0;
387 clib_error_t *err = 0;
399 for (int i = 0; i < ARRAY_LEN (clib_socket_type_data); i++)
401 typeof (clib_socket_type_data[0]) *d = clib_socket_type_data + i;
403 if (d->is_local == 0 && s->local_only)
406 if (strncmp (s->config, d->prefix, strlen (d->prefix)) == 0)
414 return clib_error_return (0, "unsupported socket config '%s'", s->config);
416 s->type = data->type;
417 p = s->config + (data->skip_prefix ? strlen (data->prefix) : 0);
419 name = _clib_socket_get_string (&p, data->type == CLIB_SOCKET_TYPE_INET);
422 /* parse port type for INET sockets */
423 if (data->type == CLIB_SOCKET_TYPE_INET && p[0] == ':')
426 long long ll = strtoll (old_p, &p, 0);
430 err = clib_error_return (0, "invalid port");
434 if (ll > CLIB_U16_MAX || ll < 1)
436 err = clib_error_return (0, "port out of range");
448 else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT && netns_fd == -1 &&
449 strncmp (p, "netns_name=", 11) == 0)
452 u8 *str = _clib_socket_get_string (&p, 0);
453 u8 *pathname = format (0, "/var/run/netns/%v%c", str, 0);
454 if ((netns_fd = open ((char *) pathname, O_RDONLY)) < 0)
455 err = clib_error_return_unix (0, "open('%s')", pathname);
461 else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT && netns_fd == -1 &&
462 strncmp (p, "netns_pid=", 10) == 0)
464 char *old_p = p = p + 10;
465 u32 pid = (u32) strtol (old_p, &p, 0);
468 err = clib_error_return (0, "invalid pid");
471 u8 *pathname = format (0, "/proc/%u/ns/net%c", pid, 0);
472 if ((netns_fd = open ((char *) pathname, O_RDONLY)) < 0)
473 err = clib_error_return_unix (0, "open('%s')", pathname);
486 err = clib_error_return (0, "unknown input `%s'", p);
491 /* change netns if requested */
492 if (s->type != CLIB_SOCKET_TYPE_INET && netns_fd != -1)
494 int fd = open ("/proc/self/ns/net", O_RDONLY);
496 if (setns (netns_fd, CLONE_NEWNET) < 0)
499 err = clib_error_return_unix (0, "setns(%d)", netns_fd);
506 if (s->type == CLIB_SOCKET_TYPE_INET)
508 addr_len = sizeof (si);
509 si.sin_port = htons (port);
513 struct in_addr host_addr;
516 /* Recognize localhost to avoid host lookup in most common cast. */
517 if (!strcmp ((char *) name, "localhost"))
518 si.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
520 else if (inet_aton ((char *) name, &host_addr))
521 si.sin_addr = host_addr;
523 else if (strlen ((char *) name) > 0)
525 struct hostent *host = gethostbyname ((char *) name);
527 err = clib_error_return (0, "unknown host `%s'", name);
529 clib_memcpy (&si.sin_addr.s_addr, host->h_addr_list[0],
535 htonl (s->is_server ? INADDR_LOOPBACK : INADDR_ANY);
540 sa = (struct sockaddr *) &si;
542 else if (s->type == CLIB_SOCKET_TYPE_UNIX)
544 struct stat st = { 0 };
545 char *path = (char *) &su.sun_path;
547 if (vec_len (name) > sizeof (su.sun_path) - 1)
549 err = clib_error_return (0, "File path '%v' too long", name);
553 clib_memcpy (path, s->config, vec_len (name));
554 addr_len = sizeof (su);
555 sa = (struct sockaddr *) &su;
557 rv = stat (path, &st);
558 if (!s->is_server && rv < 0)
560 err = clib_error_return_unix (0, "stat ('%s')", path);
564 if (s->is_server && rv == 0)
566 if (S_ISSOCK (st.st_mode))
568 int client_fd = socket (AF_UNIX, SOCK_STREAM, 0);
569 int ret = connect (client_fd, (const struct sockaddr *) &su,
571 typeof (errno) connect_errno = errno;
574 if (ret == 0 || (ret < 0 && connect_errno != ECONNREFUSED))
576 err = clib_error_return (0, "Active listener on '%s'", path);
580 if (unlink (path) < 0)
582 err = clib_error_return_unix (0, "unlink ('%s')", path);
588 err = clib_error_return (0, "File '%s' already exists", path);
594 else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT)
596 if (vec_len (name) > sizeof (su.sun_path) - 2)
598 err = clib_error_return (0, "Socket name '%v' too long", name);
602 clib_memcpy (&su.sun_path[1], name, vec_len (name));
603 addr_len = sizeof (su.sun_family) + vec_len (name);
604 sa = (struct sockaddr *) &su;
605 s->allow_group_write = 0;
610 err = clib_error_return_unix (0, "unknown socket family");
614 socket_init_funcs (s);
616 if ((s->fd = socket (sa->sa_family,
617 s->is_seqpacket ? SOCK_SEQPACKET : SOCK_STREAM, 0)) < 0)
620 clib_error_return_unix (0, "socket (fd %d, '%s')", s->fd, s->config);
628 if (sa->sa_family == AF_INET && si.sin_port == 0)
630 word port = find_free_port (s->fd);
633 err = clib_error_return (0, "no free port (fd %d, '%s')", s->fd,
641 if (setsockopt (s->fd, SOL_SOCKET, SO_REUSEADDR, &((int){ 1 }),
643 clib_unix_warning ("setsockopt SO_REUSEADDR fails");
646 if (sa->sa_family == AF_UNIX && s->passcred)
648 if (setsockopt (s->fd, SOL_SOCKET, SO_PASSCRED, &((int){ 1 }),
651 err = clib_error_return_unix (0,
652 "setsockopt (SO_PASSCRED, "
660 if (need_bind && bind (s->fd, sa, addr_len) < 0)
663 clib_error_return_unix (0, "bind (fd %d, '%s')", s->fd, s->config);
667 if (listen (s->fd, 5) < 0)
669 err = clib_error_return_unix (0, "listen (fd %d, '%s')", s->fd,
674 if (s->local_only && s->allow_group_write)
676 if (fchmod (s->fd, S_IWGRP) < 0)
678 err = clib_error_return_unix (
679 0, "fchmod (fd %d, '%s', mode S_IWGRP)", s->fd, s->config);
686 if (s->non_blocking_connect && fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
688 err = clib_error_return_unix (0, "fcntl NONBLOCK (fd %d, '%s')",
693 while ((rv = connect (s->fd, sa, addr_len)) < 0 && errno == EAGAIN)
695 if (rv < 0 && !(s->non_blocking_connect && errno == EINPROGRESS))
697 err = clib_error_return_unix (0, "connect (fd %d, '%s')", s->fd,
701 /* Connect was blocking so set fd to non-blocking now unless
702 * blocking mode explicitly requested. */
703 if (!s->non_blocking_connect && !s->is_blocking &&
704 fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
706 err = clib_error_return_unix (0, "fcntl NONBLOCK2 (fd %d, '%s')",
713 if (err && s->fd > -1)
721 setns (CLONE_NEWNET, netns_fd);
729 __clib_export clib_error_t *
730 clib_socket_init_netns (clib_socket_t *s, u8 *namespace)
732 if (namespace == NULL || namespace[0] == 0)
733 return clib_socket_init (s);
736 int old_netns_fd, nfd = -1;
738 old_netns_fd = clib_netns_open (NULL /* self */);
739 if (old_netns_fd < 0)
740 return clib_error_return_unix (0, "get current netns failed");
742 if ((nfd = clib_netns_open (namespace)) == -1)
744 error = clib_error_return_unix (0, "clib_netns_open '%s'", namespace);
748 if (clib_setns (nfd) == -1)
750 error = clib_error_return_unix (0, "setns '%s'", namespace);
754 error = clib_socket_init (s);
757 if (clib_setns (old_netns_fd) == -1)
758 clib_warning ("Cannot set old ns");
760 close (old_netns_fd);
768 __clib_export clib_error_t *
769 clib_socket_accept (clib_socket_t * server, clib_socket_t * client)
771 clib_error_t *err = 0;
774 clib_memset (client, 0, sizeof (client[0]));
776 /* Accept the new socket connection. */
777 client->fd = accept (server->fd, 0, 0);
779 return clib_error_return_unix (0, "accept (fd %d, '%s')",
780 server->fd, server->config);
782 /* Set the new socket to be non-blocking. */
783 if (fcntl (client->fd, F_SETFL, O_NONBLOCK) < 0)
785 err = clib_error_return_unix (0, "fcntl O_NONBLOCK (fd %d)",
791 len = sizeof (client->peer);
792 if (getpeername (client->fd, (struct sockaddr *) &client->peer, &len) < 0)
794 err = clib_error_return_unix (0, "getpeername (fd %d)", client->fd);
798 client->flags = CLIB_SOCKET_F_IS_CLIENT;
800 socket_init_funcs (client);
809 * fd.io coding-style-patch-verification: ON
812 * eval: (c-set-style "gnu")