2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 Copyright (c) 2001, 2002, 2003, 2005 Eliot Dresselhaus
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
39 #include <string.h> /* strchr */
42 #include <sys/types.h>
43 #include <sys/socket.h>
46 #include <netinet/in.h>
47 #include <arpa/inet.h>
52 #include <vppinfra/mem.h>
53 #include <vppinfra/vec.h>
54 #include <vppinfra/socket.h>
55 #include <vppinfra/linux/netns.h>
56 #include <vppinfra/format.h>
57 #include <vppinfra/error.h>
60 /* IPPORT_USERRESERVED is not part of musl libc. */
61 #define IPPORT_USERRESERVED 5000
65 clib_socket_tx_add_formatted (clib_socket_t * s, char *fmt, ...)
69 clib_socket_tx_add_va_formatted (s, fmt, &va);
73 /* Return and bind to an unused port. */
75 find_free_port (word sock)
79 for (port = IPPORT_USERRESERVED; port < 1 << 16; port++)
83 clib_memset (&a, 0, sizeof (a)); /* Warnings be gone */
85 a.sin_family = PF_INET;
86 a.sin_addr.s_addr = INADDR_ANY;
87 a.sin_port = htons (port);
89 if (bind (sock, (struct sockaddr *) &a, sizeof (a)) >= 0)
93 return port < 1 << 16 ? port : -1;
97 default_socket_write (clib_socket_t * s)
99 clib_error_t *err = 0;
106 /* Map standard input to standard output.
107 Typically, fd is a socket for which read/write both work. */
111 tx_len = vec_len (s->tx_buffer);
112 written = write (fd, s->tx_buffer, tx_len);
114 /* Ignore certain errors. */
115 if (written < 0 && !unix_error_is_fatal (errno))
118 /* A "real" error occurred. */
121 err = clib_error_return_unix (0, "write %wd bytes (fd %d, '%s')",
122 tx_len, s->fd, s->config);
123 vec_free (s->tx_buffer);
127 /* Reclaim the transmitted part of the tx buffer on successful writes. */
128 else if (written > 0)
130 if (written == tx_len)
131 vec_set_len (s->tx_buffer, 0);
133 vec_delete (s->tx_buffer, written, 0);
136 /* If a non-fatal error occurred AND
137 the buffer is full, then we must free it. */
138 else if (written == 0 && tx_len > 64 * 1024)
140 vec_free (s->tx_buffer);
147 static clib_error_t *
148 default_socket_read (clib_socket_t * sock, int n_bytes)
153 /* RX side of socket is down once end of file is reached. */
154 if (sock->rx_end_of_file)
159 n_bytes = clib_max (n_bytes, 4096);
160 vec_add2 (sock->rx_buffer, buf, n_bytes);
162 if ((n_read = read (fd, buf, n_bytes)) < 0)
166 /* Ignore certain errors. */
167 if (!unix_error_is_fatal (errno))
170 return clib_error_return_unix (0, "read %d bytes (fd %d, '%s')",
171 n_bytes, sock->fd, sock->config);
174 /* Other side closed the socket. */
176 sock->rx_end_of_file = 1;
179 vec_inc_len (sock->rx_buffer, n_read - n_bytes);
184 static clib_error_t *
185 default_socket_close (clib_socket_t * s)
187 if (close (s->fd) < 0)
188 return clib_error_return_unix (0, "close (fd %d, %s)", s->fd, s->config);
192 static clib_error_t *
193 default_socket_sendmsg (clib_socket_t * s, void *msg, int msglen,
194 int fds[], int num_fds)
196 struct msghdr mh = { 0 };
198 char ctl[CMSG_SPACE (sizeof (int) * num_fds)];
201 iov[0].iov_base = msg;
202 iov[0].iov_len = msglen;
208 struct cmsghdr *cmsg;
209 clib_memset (&ctl, 0, sizeof (ctl));
210 mh.msg_control = ctl;
211 mh.msg_controllen = sizeof (ctl);
212 cmsg = CMSG_FIRSTHDR (&mh);
213 cmsg->cmsg_len = CMSG_LEN (sizeof (int) * num_fds);
214 cmsg->cmsg_level = SOL_SOCKET;
215 cmsg->cmsg_type = SCM_RIGHTS;
216 memcpy (CMSG_DATA (cmsg), fds, sizeof (int) * num_fds);
218 rv = sendmsg (s->fd, &mh, 0);
220 return clib_error_return_unix (0, "sendmsg");
225 static clib_error_t *
226 default_socket_recvmsg (clib_socket_t * s, void *msg, int msglen,
227 int fds[], int num_fds)
230 char ctl[CMSG_SPACE (sizeof (int) * num_fds) +
231 CMSG_SPACE (sizeof (struct ucred))];
232 struct ucred *cr = 0;
234 char ctl[CMSG_SPACE (sizeof (int) * num_fds)];
236 struct msghdr mh = { 0 };
239 struct cmsghdr *cmsg;
241 iov[0].iov_base = msg;
242 iov[0].iov_len = msglen;
245 mh.msg_control = ctl;
246 mh.msg_controllen = sizeof (ctl);
248 clib_memset (ctl, 0, sizeof (ctl));
250 /* receive the incoming message */
251 size = recvmsg (s->fd, &mh, 0);
254 return (size == 0) ? clib_error_return (0, "disconnected") :
255 clib_error_return_unix (0, "recvmsg: malformed message (fd %d, '%s')",
259 cmsg = CMSG_FIRSTHDR (&mh);
262 if (cmsg->cmsg_level == SOL_SOCKET)
265 if (cmsg->cmsg_type == SCM_CREDENTIALS)
267 cr = (struct ucred *) CMSG_DATA (cmsg);
274 if (cmsg->cmsg_type == SCM_RIGHTS)
276 clib_memcpy_fast (fds, CMSG_DATA (cmsg),
277 num_fds * sizeof (int));
280 cmsg = CMSG_NXTHDR (&mh, cmsg);
286 socket_init_funcs (clib_socket_t * s)
289 s->write_func = default_socket_write;
291 s->read_func = default_socket_read;
293 s->close_func = default_socket_close;
294 if (!s->sendmsg_func)
295 s->sendmsg_func = default_socket_sendmsg;
296 if (!s->recvmsg_func)
297 s->recvmsg_func = default_socket_recvmsg;
304 clib_socket_type_t type;
307 } clib_socket_type_data[] = {
310 .type = CLIB_SOCKET_TYPE_UNIX,
315 .type = CLIB_SOCKET_TYPE_INET,
317 { .prefix = "abstract:",
319 .type = CLIB_SOCKET_TYPE_LINUX_ABSTRACT,
324 .type = CLIB_SOCKET_TYPE_UNIX,
329 .type = CLIB_SOCKET_TYPE_INET,
334 .type = CLIB_SOCKET_TYPE_UNIX,
340 _clib_socket_get_string (char **p, int is_hostname)
368 clib_socket_prefix_is_valid (char *s)
370 for (typeof (clib_socket_type_data[0]) *d = clib_socket_type_data;
371 d - clib_socket_type_data < ARRAY_LEN (clib_socket_type_data); d++)
372 if (d->skip_prefix && strncmp (s, d->prefix, strlen (d->prefix)) == 0)
377 __clib_export clib_error_t *
378 clib_socket_init (clib_socket_t *s)
380 struct sockaddr_un su = { .sun_family = AF_UNIX };
381 struct sockaddr_in si = { .sin_family = AF_INET };
382 struct sockaddr *sa = 0;
383 typeof (clib_socket_type_data[0]) *data = 0;
384 socklen_t addr_len = 0;
387 clib_error_t *err = 0;
399 for (int i = 0; i < ARRAY_LEN (clib_socket_type_data); i++)
401 typeof (clib_socket_type_data[0]) *d = clib_socket_type_data + i;
403 if (d->is_local == 0 && s->local_only)
406 if (strncmp (s->config, d->prefix, strlen (d->prefix)) == 0)
414 return clib_error_return (0, "unsupported socket config '%s'", s->config);
416 s->type = data->type;
417 p = s->config + (data->skip_prefix ? strlen (data->prefix) : 0);
419 name = _clib_socket_get_string (&p, data->type == CLIB_SOCKET_TYPE_INET);
422 /* parse port type for INET sockets */
423 if (data->type == CLIB_SOCKET_TYPE_INET && p[0] == ':')
426 long long ll = strtoll (old_p, &p, 0);
430 err = clib_error_return (0, "invalid port");
434 if (ll > CLIB_U16_MAX || ll < 1)
436 err = clib_error_return (0, "port out of range");
448 else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT && netns_fd == -1 &&
449 strncmp (p, "netns_name=", 11) == 0)
452 u8 *str = _clib_socket_get_string (&p, 0);
455 pathname = format (0, "%v%c", str, 0);
457 pathname = format (0, "/var/run/netns/%v%c", str, 0);
458 if ((netns_fd = open ((char *) pathname, O_RDONLY)) < 0)
459 err = clib_error_return_unix (0, "open('%s')", pathname);
465 else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT && netns_fd == -1 &&
466 strncmp (p, "netns_pid=", 10) == 0)
468 char *old_p = p = p + 10;
469 u32 pid = (u32) strtol (old_p, &p, 0);
472 err = clib_error_return (0, "invalid pid");
475 u8 *pathname = format (0, "/proc/%u/ns/net%c", pid, 0);
476 if ((netns_fd = open ((char *) pathname, O_RDONLY)) < 0)
477 err = clib_error_return_unix (0, "open('%s')", pathname);
490 err = clib_error_return (0, "unknown input `%s'", p);
495 /* change netns if requested */
496 if (s->type != CLIB_SOCKET_TYPE_INET && netns_fd != -1)
498 int fd = open ("/proc/self/ns/net", O_RDONLY);
500 if (setns (netns_fd, CLONE_NEWNET) < 0)
503 err = clib_error_return_unix (0, "setns(%d)", netns_fd);
510 if (s->type == CLIB_SOCKET_TYPE_INET)
512 addr_len = sizeof (si);
513 si.sin_port = htons (port);
517 struct in_addr host_addr;
520 /* Recognize localhost to avoid host lookup in most common cast. */
521 if (!strcmp ((char *) name, "localhost"))
522 si.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
524 else if (inet_aton ((char *) name, &host_addr))
525 si.sin_addr = host_addr;
527 else if (strlen ((char *) name) > 0)
529 struct hostent *host = gethostbyname ((char *) name);
531 err = clib_error_return (0, "unknown host `%s'", name);
533 clib_memcpy (&si.sin_addr.s_addr, host->h_addr_list[0],
539 htonl (s->is_server ? INADDR_LOOPBACK : INADDR_ANY);
544 sa = (struct sockaddr *) &si;
546 else if (s->type == CLIB_SOCKET_TYPE_UNIX)
548 struct stat st = { 0 };
549 char *path = (char *) &su.sun_path;
551 if (vec_len (name) > sizeof (su.sun_path) - 1)
553 err = clib_error_return (0, "File path '%v' too long", name);
557 clib_memcpy (path, s->config, vec_len (name));
558 addr_len = sizeof (su);
559 sa = (struct sockaddr *) &su;
561 rv = stat (path, &st);
562 if (!s->is_server && rv < 0)
564 err = clib_error_return_unix (0, "stat ('%s')", path);
568 if (s->is_server && rv == 0)
570 if (S_ISSOCK (st.st_mode))
572 int client_fd = socket (AF_UNIX, SOCK_STREAM, 0);
573 int ret = connect (client_fd, (const struct sockaddr *) &su,
575 typeof (errno) connect_errno = errno;
578 if (ret == 0 || (ret < 0 && connect_errno != ECONNREFUSED))
580 err = clib_error_return (0, "Active listener on '%s'", path);
584 if (unlink (path) < 0)
586 err = clib_error_return_unix (0, "unlink ('%s')", path);
592 err = clib_error_return (0, "File '%s' already exists", path);
598 else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT)
600 if (vec_len (name) > sizeof (su.sun_path) - 2)
602 err = clib_error_return (0, "Socket name '%v' too long", name);
606 clib_memcpy (&su.sun_path[1], name, vec_len (name));
607 addr_len = sizeof (su.sun_family) + vec_len (name);
608 sa = (struct sockaddr *) &su;
609 s->allow_group_write = 0;
614 err = clib_error_return_unix (0, "unknown socket family");
618 socket_init_funcs (s);
620 if ((s->fd = socket (sa->sa_family,
621 s->is_seqpacket ? SOCK_SEQPACKET : SOCK_STREAM, 0)) < 0)
624 clib_error_return_unix (0, "socket (fd %d, '%s')", s->fd, s->config);
632 if (sa->sa_family == AF_INET && si.sin_port == 0)
634 word port = find_free_port (s->fd);
637 err = clib_error_return (0, "no free port (fd %d, '%s')", s->fd,
645 if (setsockopt (s->fd, SOL_SOCKET, SO_REUSEADDR, &((int){ 1 }),
647 clib_unix_warning ("setsockopt SO_REUSEADDR fails");
650 if (sa->sa_family == AF_UNIX && s->passcred)
652 if (setsockopt (s->fd, SOL_SOCKET, SO_PASSCRED, &((int){ 1 }),
655 err = clib_error_return_unix (0,
656 "setsockopt (SO_PASSCRED, "
664 if (need_bind && bind (s->fd, sa, addr_len) < 0)
667 clib_error_return_unix (0, "bind (fd %d, '%s')", s->fd, s->config);
671 if (listen (s->fd, 5) < 0)
673 err = clib_error_return_unix (0, "listen (fd %d, '%s')", s->fd,
678 if (s->local_only && s->allow_group_write)
680 if (fchmod (s->fd, S_IWGRP) < 0)
682 err = clib_error_return_unix (
683 0, "fchmod (fd %d, '%s', mode S_IWGRP)", s->fd, s->config);
690 if (s->non_blocking_connect && fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
692 err = clib_error_return_unix (0, "fcntl NONBLOCK (fd %d, '%s')",
697 while ((rv = connect (s->fd, sa, addr_len)) < 0 && errno == EAGAIN)
699 if (rv < 0 && !(s->non_blocking_connect && errno == EINPROGRESS))
701 err = clib_error_return_unix (0, "connect (fd %d, '%s')", s->fd,
705 /* Connect was blocking so set fd to non-blocking now unless
706 * blocking mode explicitly requested. */
707 if (!s->non_blocking_connect && !s->is_blocking &&
708 fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
710 err = clib_error_return_unix (0, "fcntl NONBLOCK2 (fd %d, '%s')",
717 if (err && s->fd > -1)
725 setns (CLONE_NEWNET, netns_fd);
733 __clib_export clib_error_t *
734 clib_socket_init_netns (clib_socket_t *s, u8 *namespace)
736 if (namespace == NULL || namespace[0] == 0)
737 return clib_socket_init (s);
740 int old_netns_fd, nfd = -1;
742 old_netns_fd = clib_netns_open (NULL /* self */);
743 if (old_netns_fd < 0)
744 return clib_error_return_unix (0, "get current netns failed");
746 if ((nfd = clib_netns_open (namespace)) == -1)
748 error = clib_error_return_unix (0, "clib_netns_open '%s'", namespace);
752 if (clib_setns (nfd) == -1)
754 error = clib_error_return_unix (0, "setns '%s'", namespace);
758 error = clib_socket_init (s);
761 if (clib_setns (old_netns_fd) == -1)
762 clib_warning ("Cannot set old ns");
764 close (old_netns_fd);
772 __clib_export clib_error_t *
773 clib_socket_accept (clib_socket_t * server, clib_socket_t * client)
775 clib_error_t *err = 0;
778 clib_memset (client, 0, sizeof (client[0]));
780 /* Accept the new socket connection. */
781 client->fd = accept (server->fd, 0, 0);
783 return clib_error_return_unix (0, "accept (fd %d, '%s')",
784 server->fd, server->config);
786 /* Set the new socket to be non-blocking. */
787 if (fcntl (client->fd, F_SETFL, O_NONBLOCK) < 0)
789 err = clib_error_return_unix (0, "fcntl O_NONBLOCK (fd %d)",
795 len = sizeof (client->peer);
796 if (getpeername (client->fd, (struct sockaddr *) &client->peer, &len) < 0)
798 err = clib_error_return_unix (0, "getpeername (fd %d)", client->fd);
802 client->flags = CLIB_SOCKET_F_IS_CLIENT;
804 socket_init_funcs (client);
813 * fd.io coding-style-patch-verification: ON
816 * eval: (c-set-style "gnu")