2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 Copyright (c) 2001, 2002, 2003, 2005 Eliot Dresselhaus
18 Permission is hereby granted, free of charge, to any person obtaining
19 a copy of this software and associated documentation files (the
20 "Software"), to deal in the Software without restriction, including
21 without limitation the rights to use, copy, modify, merge, publish,
22 distribute, sublicense, and/or sell copies of the Software, and to
23 permit persons to whom the Software is furnished to do so, subject to
24 the following conditions:
26 The above copyright notice and this permission notice shall be
27 included in all copies or substantial portions of the Software.
29 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
30 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
32 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
33 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
34 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
35 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
39 #include <string.h> /* strchr */
42 #include <sys/types.h>
43 #include <sys/socket.h>
46 #include <netinet/in.h>
47 #include <arpa/inet.h>
52 #include <vppinfra/mem.h>
53 #include <vppinfra/vec.h>
54 #include <vppinfra/socket.h>
55 #include <vppinfra/linux/netns.h>
56 #include <vppinfra/format.h>
57 #include <vppinfra/error.h>
60 /* IPPORT_USERRESERVED is not part of musl libc. */
61 #define IPPORT_USERRESERVED 5000
65 clib_socket_tx_add_formatted (clib_socket_t * s, char *fmt, ...)
69 clib_socket_tx_add_va_formatted (s, fmt, &va);
73 /* Return and bind to an unused port. */
75 find_free_port (word sock)
79 for (port = IPPORT_USERRESERVED; port < 1 << 16; port++)
83 clib_memset (&a, 0, sizeof (a)); /* Warnings be gone */
85 a.sin_family = PF_INET;
86 a.sin_addr.s_addr = INADDR_ANY;
87 a.sin_port = htons (port);
89 if (bind (sock, (struct sockaddr *) &a, sizeof (a)) >= 0)
93 return port < 1 << 16 ? port : -1;
97 default_socket_write (clib_socket_t * s)
99 clib_error_t *err = 0;
106 /* Map standard input to standard output.
107 Typically, fd is a socket for which read/write both work. */
111 tx_len = vec_len (s->tx_buffer);
112 written = write (fd, s->tx_buffer, tx_len);
114 /* Ignore certain errors. */
115 if (written < 0 && !unix_error_is_fatal (errno))
118 /* A "real" error occurred. */
121 err = clib_error_return_unix (0, "write %wd bytes (fd %d, '%s')",
122 tx_len, s->fd, s->config);
123 vec_free (s->tx_buffer);
127 /* Reclaim the transmitted part of the tx buffer on successful writes. */
128 else if (written > 0)
130 if (written == tx_len)
131 vec_set_len (s->tx_buffer, 0);
133 vec_delete (s->tx_buffer, written, 0);
136 /* If a non-fatal error occurred AND
137 the buffer is full, then we must free it. */
138 else if (written == 0 && tx_len > 64 * 1024)
140 vec_free (s->tx_buffer);
147 static clib_error_t *
148 default_socket_read (clib_socket_t * sock, int n_bytes)
153 /* RX side of socket is down once end of file is reached. */
154 if (sock->rx_end_of_file)
159 n_bytes = clib_max (n_bytes, 4096);
160 vec_add2 (sock->rx_buffer, buf, n_bytes);
162 if ((n_read = read (fd, buf, n_bytes)) < 0)
166 /* Ignore certain errors. */
167 if (!unix_error_is_fatal (errno))
170 return clib_error_return_unix (0, "read %d bytes (fd %d, '%s')",
171 n_bytes, sock->fd, sock->config);
174 /* Other side closed the socket. */
176 sock->rx_end_of_file = 1;
179 vec_inc_len (sock->rx_buffer, n_read - n_bytes);
184 static clib_error_t *
185 default_socket_close (clib_socket_t * s)
187 if (close (s->fd) < 0)
188 return clib_error_return_unix (0, "close (fd %d, %s)", s->fd, s->config);
192 static clib_error_t *
193 default_socket_sendmsg (clib_socket_t * s, void *msg, int msglen,
194 int fds[], int num_fds)
196 struct msghdr mh = { 0 };
198 char ctl[CMSG_SPACE (sizeof (int) * num_fds)];
201 iov[0].iov_base = msg;
202 iov[0].iov_len = msglen;
208 struct cmsghdr *cmsg;
209 clib_memset (&ctl, 0, sizeof (ctl));
210 mh.msg_control = ctl;
211 mh.msg_controllen = sizeof (ctl);
212 cmsg = CMSG_FIRSTHDR (&mh);
213 cmsg->cmsg_len = CMSG_LEN (sizeof (int) * num_fds);
214 cmsg->cmsg_level = SOL_SOCKET;
215 cmsg->cmsg_type = SCM_RIGHTS;
216 memcpy (CMSG_DATA (cmsg), fds, sizeof (int) * num_fds);
218 rv = sendmsg (s->fd, &mh, 0);
220 return clib_error_return_unix (0, "sendmsg");
225 static clib_error_t *
226 default_socket_recvmsg (clib_socket_t * s, void *msg, int msglen,
227 int fds[], int num_fds)
230 char ctl[CMSG_SPACE (sizeof (int) * num_fds) +
231 CMSG_SPACE (sizeof (struct ucred))];
232 struct ucred *cr = 0;
234 char ctl[CMSG_SPACE (sizeof (int) * num_fds)];
236 struct msghdr mh = { 0 };
239 struct cmsghdr *cmsg;
241 iov[0].iov_base = msg;
242 iov[0].iov_len = msglen;
245 mh.msg_control = ctl;
246 mh.msg_controllen = sizeof (ctl);
248 clib_memset (ctl, 0, sizeof (ctl));
250 /* receive the incoming message */
251 size = recvmsg (s->fd, &mh, 0);
254 return (size == 0) ? clib_error_return (0, "disconnected") :
255 clib_error_return_unix (0, "recvmsg: malformed message (fd %d, '%s')",
259 cmsg = CMSG_FIRSTHDR (&mh);
262 if (cmsg->cmsg_level == SOL_SOCKET)
265 if (cmsg->cmsg_type == SCM_CREDENTIALS)
267 cr = (struct ucred *) CMSG_DATA (cmsg);
274 if (cmsg->cmsg_type == SCM_RIGHTS)
276 clib_memcpy_fast (fds, CMSG_DATA (cmsg),
277 num_fds * sizeof (int));
280 cmsg = CMSG_NXTHDR (&mh, cmsg);
286 socket_init_funcs (clib_socket_t * s)
289 s->write_func = default_socket_write;
291 s->read_func = default_socket_read;
293 s->close_func = default_socket_close;
294 if (!s->sendmsg_func)
295 s->sendmsg_func = default_socket_sendmsg;
296 if (!s->recvmsg_func)
297 s->recvmsg_func = default_socket_recvmsg;
304 clib_socket_type_t type;
307 } clib_socket_type_data[] = {
310 .type = CLIB_SOCKET_TYPE_UNIX,
315 .type = CLIB_SOCKET_TYPE_INET,
317 { .prefix = "abstract:",
319 .type = CLIB_SOCKET_TYPE_LINUX_ABSTRACT,
324 .type = CLIB_SOCKET_TYPE_UNIX,
329 .type = CLIB_SOCKET_TYPE_INET,
334 .type = CLIB_SOCKET_TYPE_UNIX,
340 _clib_socket_get_string (char **p, int is_hostname)
368 clib_socket_prefix_is_valid (char *s)
370 for (typeof (clib_socket_type_data[0]) *d = clib_socket_type_data;
371 d - clib_socket_type_data < ARRAY_LEN (clib_socket_type_data); d++)
372 if (d->skip_prefix && strncmp (s, d->prefix, strlen (d->prefix)) == 0)
378 clib_socket_prefix_get_type (char *s)
380 for (typeof (clib_socket_type_data[0]) *d = clib_socket_type_data;
381 d - clib_socket_type_data < ARRAY_LEN (clib_socket_type_data); d++)
382 if (strncmp (s, d->prefix, strlen (d->prefix)) == 0)
387 __clib_export clib_error_t *
388 clib_socket_init (clib_socket_t *s)
390 struct sockaddr_un su = { .sun_family = AF_UNIX };
391 struct sockaddr_in si = { .sin_family = AF_INET };
392 struct sockaddr *sa = 0;
393 typeof (clib_socket_type_data[0]) *data = 0;
394 socklen_t addr_len = 0;
397 clib_error_t *err = 0;
409 for (int i = 0; i < ARRAY_LEN (clib_socket_type_data); i++)
411 typeof (clib_socket_type_data[0]) *d = clib_socket_type_data + i;
413 if (d->is_local == 0 && s->local_only)
416 if (strncmp (s->config, d->prefix, strlen (d->prefix)) == 0)
424 return clib_error_return (0, "unsupported socket config '%s'", s->config);
426 s->type = data->type;
427 p = s->config + (data->skip_prefix ? strlen (data->prefix) : 0);
429 name = _clib_socket_get_string (&p, data->type == CLIB_SOCKET_TYPE_INET);
432 /* parse port type for INET sockets */
433 if (data->type == CLIB_SOCKET_TYPE_INET && p[0] == ':')
436 long long ll = strtoll (old_p, &p, 0);
440 err = clib_error_return (0, "invalid port");
444 if (ll > CLIB_U16_MAX || ll < 1)
446 err = clib_error_return (0, "port out of range");
458 else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT && netns_fd == -1 &&
459 strncmp (p, "netns_name=", 11) == 0)
462 u8 *str = _clib_socket_get_string (&p, 0);
465 pathname = format (0, "%v%c", str, 0);
467 pathname = format (0, "/var/run/netns/%v%c", str, 0);
468 if ((netns_fd = open ((char *) pathname, O_RDONLY)) < 0)
469 err = clib_error_return_unix (0, "open('%s')", pathname);
475 else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT && netns_fd == -1 &&
476 strncmp (p, "netns_pid=", 10) == 0)
478 char *old_p = p = p + 10;
479 u32 pid = (u32) strtol (old_p, &p, 0);
482 err = clib_error_return (0, "invalid pid");
485 u8 *pathname = format (0, "/proc/%u/ns/net%c", pid, 0);
486 if ((netns_fd = open ((char *) pathname, O_RDONLY)) < 0)
487 err = clib_error_return_unix (0, "open('%s')", pathname);
500 err = clib_error_return (0, "unknown input `%s'", p);
505 /* change netns if requested */
506 if (s->type != CLIB_SOCKET_TYPE_INET && netns_fd != -1)
508 int fd = open ("/proc/self/ns/net", O_RDONLY);
510 if (setns (netns_fd, CLONE_NEWNET) < 0)
513 err = clib_error_return_unix (0, "setns(%d)", netns_fd);
520 if (s->type == CLIB_SOCKET_TYPE_INET)
522 addr_len = sizeof (si);
523 si.sin_port = htons (port);
527 struct in_addr host_addr;
530 /* Recognize localhost to avoid host lookup in most common cast. */
531 if (!strcmp ((char *) name, "localhost"))
532 si.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
534 else if (inet_aton ((char *) name, &host_addr))
535 si.sin_addr = host_addr;
537 else if (strlen ((char *) name) > 0)
539 struct hostent *host = gethostbyname ((char *) name);
541 err = clib_error_return (0, "unknown host `%s'", name);
543 clib_memcpy (&si.sin_addr.s_addr, host->h_addr_list[0],
549 htonl (s->is_server ? INADDR_LOOPBACK : INADDR_ANY);
554 sa = (struct sockaddr *) &si;
556 else if (s->type == CLIB_SOCKET_TYPE_UNIX)
558 struct stat st = { 0 };
559 char *path = (char *) &su.sun_path;
561 if (vec_len (name) > sizeof (su.sun_path) - 1)
563 err = clib_error_return (0, "File path '%v' too long", name);
567 clib_memcpy (path, s->config, vec_len (name));
568 addr_len = sizeof (su);
569 sa = (struct sockaddr *) &su;
571 rv = stat (path, &st);
572 if (!s->is_server && rv < 0)
574 err = clib_error_return_unix (0, "stat ('%s')", path);
578 if (s->is_server && rv == 0)
580 if (S_ISSOCK (st.st_mode))
582 int client_fd = socket (AF_UNIX, SOCK_STREAM, 0);
583 int ret = connect (client_fd, (const struct sockaddr *) &su,
585 typeof (errno) connect_errno = errno;
588 if (ret == 0 || (ret < 0 && connect_errno != ECONNREFUSED))
590 err = clib_error_return (0, "Active listener on '%s'", path);
594 if (unlink (path) < 0)
596 err = clib_error_return_unix (0, "unlink ('%s')", path);
602 err = clib_error_return (0, "File '%s' already exists", path);
608 else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT)
610 if (vec_len (name) > sizeof (su.sun_path) - 2)
612 err = clib_error_return (0, "Socket name '%v' too long", name);
616 clib_memcpy (&su.sun_path[1], name, vec_len (name));
617 addr_len = sizeof (su.sun_family) + vec_len (name);
618 sa = (struct sockaddr *) &su;
619 s->allow_group_write = 0;
624 err = clib_error_return_unix (0, "unknown socket family");
628 socket_init_funcs (s);
630 if ((s->fd = socket (sa->sa_family,
631 s->is_seqpacket ? SOCK_SEQPACKET : SOCK_STREAM, 0)) < 0)
634 clib_error_return_unix (0, "socket (fd %d, '%s')", s->fd, s->config);
642 if (sa->sa_family == AF_INET && si.sin_port == 0)
644 word port = find_free_port (s->fd);
647 err = clib_error_return (0, "no free port (fd %d, '%s')", s->fd,
655 if (setsockopt (s->fd, SOL_SOCKET, SO_REUSEADDR, &((int){ 1 }),
657 clib_unix_warning ("setsockopt SO_REUSEADDR fails");
660 if (sa->sa_family == AF_UNIX && s->passcred)
662 if (setsockopt (s->fd, SOL_SOCKET, SO_PASSCRED, &((int){ 1 }),
665 err = clib_error_return_unix (0,
666 "setsockopt (SO_PASSCRED, "
674 if (need_bind && bind (s->fd, sa, addr_len) < 0)
677 clib_error_return_unix (0, "bind (fd %d, '%s')", s->fd, s->config);
681 if (listen (s->fd, 5) < 0)
683 err = clib_error_return_unix (0, "listen (fd %d, '%s')", s->fd,
688 if (s->local_only && s->allow_group_write)
690 if (fchmod (s->fd, S_IWGRP) < 0)
692 err = clib_error_return_unix (
693 0, "fchmod (fd %d, '%s', mode S_IWGRP)", s->fd, s->config);
700 if (s->non_blocking_connect && fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
702 err = clib_error_return_unix (0, "fcntl NONBLOCK (fd %d, '%s')",
707 while ((rv = connect (s->fd, sa, addr_len)) < 0 && errno == EAGAIN)
709 if (rv < 0 && !(s->non_blocking_connect && errno == EINPROGRESS))
711 err = clib_error_return_unix (0, "connect (fd %d, '%s')", s->fd,
715 /* Connect was blocking so set fd to non-blocking now unless
716 * blocking mode explicitly requested. */
717 if (!s->non_blocking_connect && !s->is_blocking &&
718 fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
720 err = clib_error_return_unix (0, "fcntl NONBLOCK2 (fd %d, '%s')",
727 if (err && s->fd > -1)
735 setns (CLONE_NEWNET, netns_fd);
743 __clib_export clib_error_t *
744 clib_socket_accept (clib_socket_t * server, clib_socket_t * client)
746 clib_error_t *err = 0;
749 clib_memset (client, 0, sizeof (client[0]));
751 /* Accept the new socket connection. */
752 client->fd = accept (server->fd, 0, 0);
754 return clib_error_return_unix (0, "accept (fd %d, '%s')",
755 server->fd, server->config);
757 /* Set the new socket to be non-blocking. */
758 if (fcntl (client->fd, F_SETFL, O_NONBLOCK) < 0)
760 err = clib_error_return_unix (0, "fcntl O_NONBLOCK (fd %d)",
766 len = sizeof (client->peer);
767 if (getpeername (client->fd, (struct sockaddr *) &client->peer, &len) < 0)
769 err = clib_error_return_unix (0, "getpeername (fd %d)", client->fd);
773 client->flags = CLIB_SOCKET_F_IS_CLIENT;
775 socket_init_funcs (client);
784 * fd.io coding-style-patch-verification: ON
787 * eval: (c-set-style "gnu")