2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 Copyright (c) 2001, 2002, 2003, 2005 Eliot Dresselhaus
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
39 #include <string.h> /* strchr */
42 #include <sys/types.h>
43 #include <sys/socket.h>
46 #include <netinet/in.h>
47 #include <arpa/inet.h>
52 #include <vppinfra/mem.h>
53 #include <vppinfra/vec.h>
54 #include <vppinfra/socket.h>
55 #include <vppinfra/linux/netns.h>
56 #include <vppinfra/format.h>
57 #include <vppinfra/error.h>
60 /* IPPORT_USERRESERVED is not part of musl libc. */
61 #define IPPORT_USERRESERVED 5000
65 clib_socket_tx_add_formatted (clib_socket_t * s, char *fmt, ...)
69 clib_socket_tx_add_va_formatted (s, fmt, &va);
73 /* Return and bind to an unused port. */
75 find_free_port (word sock)
79 for (port = IPPORT_USERRESERVED; port < 1 << 16; port++)
83 clib_memset (&a, 0, sizeof (a)); /* Warnings be gone */
85 a.sin_family = PF_INET;
86 a.sin_addr.s_addr = INADDR_ANY;
87 a.sin_port = htons (port);
89 if (bind (sock, (struct sockaddr *) &a, sizeof (a)) >= 0)
93 return port < 1 << 16 ? port : -1;
97 default_socket_write (clib_socket_t * s)
99 clib_error_t *err = 0;
106 /* Map standard input to standard output.
107 Typically, fd is a socket for which read/write both work. */
111 tx_len = vec_len (s->tx_buffer);
112 written = write (fd, s->tx_buffer, tx_len);
114 /* Ignore certain errors. */
115 if (written < 0 && !unix_error_is_fatal (errno))
118 /* A "real" error occurred. */
121 err = clib_error_return_unix (0, "write %wd bytes (fd %d, '%s')",
122 tx_len, s->fd, s->config);
123 vec_free (s->tx_buffer);
127 /* Reclaim the transmitted part of the tx buffer on successful writes. */
128 else if (written > 0)
130 if (written == tx_len)
131 vec_set_len (s->tx_buffer, 0);
133 vec_delete (s->tx_buffer, written, 0);
136 /* If a non-fatal error occurred AND
137 the buffer is full, then we must free it. */
138 else if (written == 0 && tx_len > 64 * 1024)
140 vec_free (s->tx_buffer);
147 static clib_error_t *
148 default_socket_read (clib_socket_t * sock, int n_bytes)
153 /* RX side of socket is down once end of file is reached. */
154 if (sock->rx_end_of_file)
159 n_bytes = clib_max (n_bytes, 4096);
160 vec_add2 (sock->rx_buffer, buf, n_bytes);
162 if ((n_read = read (fd, buf, n_bytes)) < 0)
166 /* Ignore certain errors. */
167 if (!unix_error_is_fatal (errno))
170 return clib_error_return_unix (0, "read %d bytes (fd %d, '%s')",
171 n_bytes, sock->fd, sock->config);
174 /* Other side closed the socket. */
176 sock->rx_end_of_file = 1;
179 vec_inc_len (sock->rx_buffer, n_read - n_bytes);
184 static clib_error_t *
185 default_socket_close (clib_socket_t * s)
187 if (close (s->fd) < 0)
188 return clib_error_return_unix (0, "close (fd %d, %s)", s->fd, s->config);
192 static clib_error_t *
193 default_socket_sendmsg (clib_socket_t * s, void *msg, int msglen,
194 int fds[], int num_fds)
196 struct msghdr mh = { 0 };
198 char ctl[CMSG_SPACE (sizeof (int) * num_fds)];
201 iov[0].iov_base = msg;
202 iov[0].iov_len = msglen;
208 struct cmsghdr *cmsg;
209 clib_memset (&ctl, 0, sizeof (ctl));
210 mh.msg_control = ctl;
211 mh.msg_controllen = sizeof (ctl);
212 cmsg = CMSG_FIRSTHDR (&mh);
213 cmsg->cmsg_len = CMSG_LEN (sizeof (int) * num_fds);
214 cmsg->cmsg_level = SOL_SOCKET;
215 cmsg->cmsg_type = SCM_RIGHTS;
216 memcpy (CMSG_DATA (cmsg), fds, sizeof (int) * num_fds);
218 rv = sendmsg (s->fd, &mh, 0);
220 return clib_error_return_unix (0, "sendmsg");
225 static clib_error_t *
226 default_socket_recvmsg (clib_socket_t * s, void *msg, int msglen,
227 int fds[], int num_fds)
230 char ctl[CMSG_SPACE (sizeof (int) * num_fds) +
231 CMSG_SPACE (sizeof (struct ucred))];
232 struct ucred *cr = 0;
234 char ctl[CMSG_SPACE (sizeof (int) * num_fds)];
236 struct msghdr mh = { 0 };
239 struct cmsghdr *cmsg;
241 iov[0].iov_base = msg;
242 iov[0].iov_len = msglen;
245 mh.msg_control = ctl;
246 mh.msg_controllen = sizeof (ctl);
248 clib_memset (ctl, 0, sizeof (ctl));
250 /* receive the incoming message */
251 size = recvmsg (s->fd, &mh, 0);
254 return (size == 0) ? clib_error_return (0, "disconnected") :
255 clib_error_return_unix (0, "recvmsg: malformed message (fd %d, '%s')",
259 cmsg = CMSG_FIRSTHDR (&mh);
262 if (cmsg->cmsg_level == SOL_SOCKET)
265 if (cmsg->cmsg_type == SCM_CREDENTIALS)
267 cr = (struct ucred *) CMSG_DATA (cmsg);
274 if (cmsg->cmsg_type == SCM_RIGHTS)
276 clib_memcpy_fast (fds, CMSG_DATA (cmsg),
277 num_fds * sizeof (int));
280 cmsg = CMSG_NXTHDR (&mh, cmsg);
286 socket_init_funcs (clib_socket_t * s)
289 s->write_func = default_socket_write;
291 s->read_func = default_socket_read;
293 s->close_func = default_socket_close;
294 if (!s->sendmsg_func)
295 s->sendmsg_func = default_socket_sendmsg;
296 if (!s->recvmsg_func)
297 s->recvmsg_func = default_socket_recvmsg;
304 clib_socket_type_t type;
307 } clib_socket_type_data[] = {
310 .type = CLIB_SOCKET_TYPE_UNIX,
315 .type = CLIB_SOCKET_TYPE_INET,
318 { .prefix = "abstract:",
320 .type = CLIB_SOCKET_TYPE_LINUX_ABSTRACT,
323 #endif /* CLIB_LINUX */
326 .type = CLIB_SOCKET_TYPE_UNIX,
331 .type = CLIB_SOCKET_TYPE_INET,
336 .type = CLIB_SOCKET_TYPE_UNIX,
342 _clib_socket_get_string (char **p, int is_hostname)
370 clib_socket_prefix_is_valid (char *s)
372 for (typeof (clib_socket_type_data[0]) *d = clib_socket_type_data;
373 d - clib_socket_type_data < ARRAY_LEN (clib_socket_type_data); d++)
374 if (d->skip_prefix && strncmp (s, d->prefix, strlen (d->prefix)) == 0)
380 clib_socket_prefix_get_type (char *s)
382 for (typeof (clib_socket_type_data[0]) *d = clib_socket_type_data;
383 d - clib_socket_type_data < ARRAY_LEN (clib_socket_type_data); d++)
384 if (strncmp (s, d->prefix, strlen (d->prefix)) == 0)
389 __clib_export clib_error_t *
390 clib_socket_init (clib_socket_t *s)
392 struct sockaddr_un su = { .sun_family = AF_UNIX };
393 struct sockaddr_in si = { .sin_family = AF_INET };
394 struct sockaddr *sa = 0;
395 typeof (clib_socket_type_data[0]) *data = 0;
396 socklen_t addr_len = 0;
399 clib_error_t *err = 0;
411 for (int i = 0; i < ARRAY_LEN (clib_socket_type_data); i++)
413 typeof (clib_socket_type_data[0]) *d = clib_socket_type_data + i;
415 if (d->is_local == 0 && s->local_only)
418 if (strncmp (s->config, d->prefix, strlen (d->prefix)) == 0)
426 return clib_error_return (0, "unsupported socket config '%s'", s->config);
428 s->type = data->type;
429 p = s->config + (data->skip_prefix ? strlen (data->prefix) : 0);
431 name = _clib_socket_get_string (&p, data->type == CLIB_SOCKET_TYPE_INET);
434 /* parse port type for INET sockets */
435 if (data->type == CLIB_SOCKET_TYPE_INET && p[0] == ':')
438 long long ll = strtoll (old_p, &p, 0);
442 err = clib_error_return (0, "invalid port");
446 if (ll > CLIB_U16_MAX || ll < 1)
448 err = clib_error_return (0, "port out of range");
460 else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT && netns_fd == -1 &&
461 strncmp (p, "netns_name=", 11) == 0)
464 u8 *str = _clib_socket_get_string (&p, 0);
467 pathname = format (0, "%v%c", str, 0);
469 pathname = format (0, "/var/run/netns/%v%c", str, 0);
470 if ((netns_fd = open ((char *) pathname, O_RDONLY)) < 0)
471 err = clib_error_return_unix (0, "open('%s')", pathname);
477 else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT && netns_fd == -1 &&
478 strncmp (p, "netns_pid=", 10) == 0)
480 char *old_p = p = p + 10;
481 u32 pid = (u32) strtol (old_p, &p, 0);
484 err = clib_error_return (0, "invalid pid");
487 u8 *pathname = format (0, "/proc/%u/ns/net%c", pid, 0);
488 if ((netns_fd = open ((char *) pathname, O_RDONLY)) < 0)
489 err = clib_error_return_unix (0, "open('%s')", pathname);
502 err = clib_error_return (0, "unknown input `%s'", p);
507 /* change netns if requested */
508 if (s->type != CLIB_SOCKET_TYPE_INET && netns_fd != -1)
510 int fd = open ("/proc/self/ns/net", O_RDONLY);
512 if (setns (netns_fd, CLONE_NEWNET) < 0)
515 err = clib_error_return_unix (0, "setns(%d)", netns_fd);
522 if (s->type == CLIB_SOCKET_TYPE_INET)
524 addr_len = sizeof (si);
525 si.sin_port = htons (port);
529 struct in_addr host_addr;
532 /* Recognize localhost to avoid host lookup in most common cast. */
533 if (!strcmp ((char *) name, "localhost"))
534 si.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
536 else if (inet_aton ((char *) name, &host_addr))
537 si.sin_addr = host_addr;
539 else if (strlen ((char *) name) > 0)
541 struct hostent *host = gethostbyname ((char *) name);
543 err = clib_error_return (0, "unknown host `%s'", name);
545 clib_memcpy (&si.sin_addr.s_addr, host->h_addr_list[0],
551 htonl (s->is_server ? INADDR_LOOPBACK : INADDR_ANY);
556 sa = (struct sockaddr *) &si;
558 else if (s->type == CLIB_SOCKET_TYPE_UNIX)
560 struct stat st = { 0 };
561 char *path = (char *) &su.sun_path;
563 if (vec_len (name) > sizeof (su.sun_path) - 1)
565 err = clib_error_return (0, "File path '%v' too long", name);
569 clib_memcpy (path, s->config, vec_len (name));
570 addr_len = sizeof (su);
571 sa = (struct sockaddr *) &su;
573 rv = stat (path, &st);
574 if (!s->is_server && rv < 0)
576 err = clib_error_return_unix (0, "stat ('%s')", path);
580 if (s->is_server && rv == 0)
582 if (S_ISSOCK (st.st_mode))
584 int client_fd = socket (AF_UNIX, SOCK_STREAM, 0);
585 int ret = connect (client_fd, (const struct sockaddr *) &su,
587 typeof (errno) connect_errno = errno;
590 if (ret == 0 || (ret < 0 && connect_errno != ECONNREFUSED))
592 err = clib_error_return (0, "Active listener on '%s'", path);
596 if (unlink (path) < 0)
598 err = clib_error_return_unix (0, "unlink ('%s')", path);
604 err = clib_error_return (0, "File '%s' already exists", path);
610 else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT)
612 if (vec_len (name) > sizeof (su.sun_path) - 2)
614 err = clib_error_return (0, "Socket name '%v' too long", name);
618 clib_memcpy (&su.sun_path[1], name, vec_len (name));
619 addr_len = sizeof (su.sun_family) + vec_len (name);
620 sa = (struct sockaddr *) &su;
621 s->allow_group_write = 0;
626 err = clib_error_return_unix (0, "unknown socket family");
630 socket_init_funcs (s);
632 if ((s->fd = socket (sa->sa_family,
633 s->is_seqpacket ? SOCK_SEQPACKET : SOCK_STREAM, 0)) < 0)
636 clib_error_return_unix (0, "socket (fd %d, '%s')", s->fd, s->config);
644 if (sa->sa_family == AF_INET && si.sin_port == 0)
646 word port = find_free_port (s->fd);
649 err = clib_error_return (0, "no free port (fd %d, '%s')", s->fd,
657 if (setsockopt (s->fd, SOL_SOCKET, SO_REUSEADDR, &((int){ 1 }),
659 clib_unix_warning ("setsockopt SO_REUSEADDR fails");
662 if (sa->sa_family == AF_UNIX && s->passcred)
664 if (setsockopt (s->fd, SOL_SOCKET, SO_PASSCRED, &((int){ 1 }),
667 err = clib_error_return_unix (0,
668 "setsockopt (SO_PASSCRED, "
679 if (sa->sa_family == AF_UNIX && s->allow_group_write)
681 mode_t def_restrictions = umask (S_IWOTH);
682 bind_ret = bind (s->fd, sa, addr_len);
683 umask (def_restrictions);
686 bind_ret = bind (s->fd, sa, addr_len);
690 err = clib_error_return_unix (0, "bind (fd %d, '%s')", s->fd,
696 if (listen (s->fd, 5) < 0)
698 err = clib_error_return_unix (0, "listen (fd %d, '%s')", s->fd,
705 if (s->non_blocking_connect && fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
707 err = clib_error_return_unix (0, "fcntl NONBLOCK (fd %d, '%s')",
712 while ((rv = connect (s->fd, sa, addr_len)) < 0 && errno == EAGAIN)
714 if (rv < 0 && !(s->non_blocking_connect && errno == EINPROGRESS))
716 err = clib_error_return_unix (0, "connect (fd %d, '%s')", s->fd,
720 /* Connect was blocking so set fd to non-blocking now unless
721 * blocking mode explicitly requested. */
722 if (!s->non_blocking_connect && !s->is_blocking &&
723 fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
725 err = clib_error_return_unix (0, "fcntl NONBLOCK2 (fd %d, '%s')",
732 if (err && s->fd > -1)
740 setns (netns_fd, CLONE_NEWNET);
748 __clib_export clib_error_t *
749 clib_socket_accept (clib_socket_t * server, clib_socket_t * client)
751 clib_error_t *err = 0;
754 clib_memset (client, 0, sizeof (client[0]));
756 /* Accept the new socket connection. */
757 client->fd = accept (server->fd, 0, 0);
759 return clib_error_return_unix (0, "accept (fd %d, '%s')",
760 server->fd, server->config);
762 /* Set the new socket to be non-blocking. */
763 if (fcntl (client->fd, F_SETFL, O_NONBLOCK) < 0)
765 err = clib_error_return_unix (0, "fcntl O_NONBLOCK (fd %d)",
771 len = sizeof (client->peer);
772 if (getpeername (client->fd, (struct sockaddr *) &client->peer, &len) < 0)
774 err = clib_error_return_unix (0, "getpeername (fd %d)", client->fd);
778 client->flags = CLIB_SOCKET_F_IS_CLIENT;
780 socket_init_funcs (client);
789 * fd.io coding-style-patch-verification: ON
792 * eval: (c-set-style "gnu")