#include <netinet/in.h>
#include <sys/ioctl.h>
#include <vppinfra/byte_order.h>
+#include <svm/memfd.h>
#include <fcntl.h>
#include <sys/stat.h>
-#include <vlibsocket/api.h>
#include <vlibmemory/api.h>
-#include <vlibsocket/vl_socket_msg_enum.h> /* enumerate all vlib messages */
+#include <vlibmemory/vl_memory_msg_enum.h>
#define vl_typedefs /* define message structures */
-#include <vlibsocket/vl_socket_api_h.h>
+#include <vlibmemory/vl_memory_api_h.h>
#undef vl_typedefs
/* instantiate all the print functions we know about */
#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define vl_printfun
-#include <vlibsocket/vl_socket_api_h.h>
+#include <vlibmemory/vl_memory_api_h.h>
#undef vl_printfun
/* instantiate all the endian swap functions we know about */
#define vl_endianfun
-#include <vlibsocket/vl_socket_api_h.h>
+#include <vlibmemory/vl_memory_api_h.h>
#undef vl_endianfun
-socket_main_t socket_main;
-
void
dump_socket_clients (vlib_main_t * vm, api_main_t * am)
{
if (pool_elts (sm->registration_pool) < 2)
return;
- vlib_cli_output (vm, "TCP socket clients");
+ vlib_cli_output (vm, "Socket clients");
vlib_cli_output (vm, "%16s %8s", "Name", "Fildesc");
/* *INDENT-OFF* */
pool_foreach (reg, sm->registration_pool,
void
vl_socket_api_send (vl_api_registration_t * rp, u8 * elem)
{
- u32 nbytes = 4; /* for the length... */
u16 msg_id = ntohs (*(u16 *) elem);
- u32 msg_length;
- u32 tmp;
api_main_t *am = &api_main;
+ msgbuf_t *mb = (msgbuf_t *) (elem - offsetof (msgbuf_t, data));
+#if CLIB_DEBUG > 1
+ u32 output_length;
+#endif
+ clib_file_t *cf = rp->clib_file_index + file_main.file_pool;
ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM);
return;
}
- msg_length = am->api_trace_cfg[msg_id].size;
- nbytes += msg_length;
- tmp = clib_host_to_net_u32 (nbytes);
-
- vl_socket_add_pending_output (rp->clib_file_index
- + file_main.file_pool,
- rp->vl_api_registration_pool_index
- + socket_main.registration_pool,
- (u8 *) & tmp, sizeof (tmp));
- vl_socket_add_pending_output (rp->clib_file_index
- + file_main.file_pool,
+ /* Add the msgbuf_t to the output vector */
+ vl_socket_add_pending_output_no_flush (cf,
+ rp->vl_api_registration_pool_index +
+ socket_main.registration_pool,
+ (u8 *) mb, sizeof (*mb));
+ /* Send the message */
+ vl_socket_add_pending_output (cf,
rp->vl_api_registration_pool_index
+ socket_main.registration_pool,
- elem, msg_length);
- vl_msg_api_free ((void *) elem);
-}
-
-void
-vl_socket_api_send_with_data (vl_api_registration_t * rp,
- u8 * elem, u8 * data_vector)
-{
- u32 nbytes = 4; /* for the length... */
- u16 msg_id = ntohs (*(u16 *) elem);
- u32 msg_length;
- u32 tmp;
- api_main_t *am = &api_main;
-
- ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM);
-
- if (msg_id >= vec_len (am->api_trace_cfg))
- {
- clib_warning ("id out of range: %d", msg_id);
- vec_free (data_vector);
- vl_msg_api_free ((void *) elem);
- return;
- }
-
- msg_length = am->api_trace_cfg[msg_id].size;
- nbytes += msg_length;
- nbytes += vec_len (data_vector);
+ elem, ntohl (mb->data_len));
- /* Length in network byte order */
- tmp = clib_host_to_net_u32 (nbytes);
+#if CLIB_DEBUG > 1
+ output_length = sizeof (*mb) + ntohl (mb->data_len);
+ clib_warning ("wrote %u bytes to fd %d", output_length,
+ cf->file_descriptor);
+#endif
- vl_socket_add_pending_output (rp->clib_file_index
- + file_main.file_pool,
- rp->vl_api_registration_pool_index
- + socket_main.registration_pool,
- (u8 *) & tmp, sizeof (tmp));
- vl_socket_add_pending_output (rp->clib_file_index
- + file_main.file_pool,
- rp->vl_api_registration_pool_index
- + socket_main.registration_pool,
- elem, msg_length);
- vl_socket_add_pending_output (rp->clib_file_index
- + file_main.file_pool,
- rp->vl_api_registration_pool_index
- + socket_main.registration_pool,
- data_vector, vec_len (data_vector));
vl_msg_api_free ((void *) elem);
}
-static inline void
-vl_socket_api_send_with_length_internal (vl_api_registration_t * rp,
- u8 * elem, u32 msg_length, int free)
-{
- u32 nbytes = 4; /* for the length... */
- u16 msg_id = ntohs (*(u16 *) elem);
- u32 tmp;
- api_main_t *am = &api_main;
-
- ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM);
-
- if (msg_id >= vec_len (am->api_trace_cfg))
- {
- clib_warning ("id out of range: %d", msg_id);
- if (free)
- vl_msg_api_free ((void *) elem);
- return;
- }
-
- nbytes += msg_length;
-
- /* Length in network byte order */
- tmp = clib_host_to_net_u32 (nbytes);
-
- vl_socket_add_pending_output (rp->clib_file_index
- + file_main.file_pool,
- rp->vl_api_registration_pool_index
- + socket_main.registration_pool,
- (u8 *) & tmp, sizeof (tmp));
- vl_socket_add_pending_output (rp->clib_file_index
- + file_main.file_pool,
- rp->vl_api_registration_pool_index
- + socket_main.registration_pool,
- elem, msg_length);
- if (free)
- vl_msg_api_free ((void *) elem);
-}
-
-void
-vl_socket_api_send_with_length (vl_api_registration_t * rp,
- u8 * elem, u32 msg_length)
-{
- vl_socket_api_send_with_length_internal (rp, elem, msg_length,
- 1 /* free */ );
-}
-
-void
-vl_socket_api_send_with_length_no_free (vl_api_registration_t * rp,
- u8 * elem, u32 msg_length)
-{
- vl_socket_api_send_with_length_internal (rp, elem, msg_length,
- 0 /* free */ );
-}
-
void
vl_free_socket_registration_index (u32 pool_index)
{
+ int i;
vl_api_registration_t *rp;
if (pool_is_free_index (socket_main.registration_pool, pool_index))
{
rp = pool_elt_at_index (socket_main.registration_pool, pool_index);
ASSERT (rp->registration_type != REGISTRATION_TYPE_FREE);
+ for (i = 0; i < vec_len (rp->additional_fds_to_close); i++)
+ if (close (rp->additional_fds_to_close[i]) < 0)
+ clib_unix_warning ("close");
+ vec_free (rp->additional_fds_to_close);
vec_free (rp->name);
vec_free (rp->unprocessed_input);
vec_free (rp->output_vector);
pool_put (socket_main.registration_pool, rp);
}
-static inline void
-socket_process_msg (clib_file_t * uf, vl_api_registration_t * rp,
- i8 * input_v)
+void
+vl_api_socket_process_msg (clib_file_t * uf, vl_api_registration_t * rp,
+ i8 * input_v)
{
- u8 *the_msg = (u8 *) (input_v + sizeof (u32));
+ msgbuf_t *mbp = (msgbuf_t *) input_v;
+
+ u8 *the_msg = (u8 *) (mbp->data);
socket_main.current_uf = uf;
socket_main.current_rp = rp;
vl_msg_api_socket_handler (the_msg);
vl_socket_read_ready (clib_file_t * uf)
{
clib_file_main_t *fm = &file_main;
+ vlib_main_t *vm = vlib_get_main ();
vl_api_registration_t *rp;
int n;
i8 *msg_buffer = 0;
+ u8 *data_for_process;
u32 msg_len;
u32 save_input_buffer_length = vec_len (socket_main.input_buffer);
+ vl_socket_args_for_process_t *a;
+ msgbuf_t *mbp;
+ int mbp_set = 0;
rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data);
/*
* Look for bugs here. This code is tricky because
- * data read from a stream socket does honor message
+ * data read from a stream socket does not honor message
* boundaries. In the case of a long message (>4K bytes)
* we have to do (at least) 2 reads, etc.
*/
{
vec_append (rp->unprocessed_input, socket_main.input_buffer);
msg_buffer = rp->unprocessed_input;
- msg_len = rp->unprocessed_msg_length;
}
else
{
msg_buffer = socket_main.input_buffer;
- msg_len = 0;
+ mbp_set = 0;
}
- if (msg_len == 0)
+ if (mbp_set == 0)
{
- /* Length may be split across two reads */
- if (vec_len (msg_buffer) < sizeof (u32))
+ /* Any chance that we have a complete message? */
+ if (vec_len (msg_buffer) <= sizeof (msgbuf_t))
goto save_and_split;
- /* total length, including msg_len itself, in network byte order */
- msg_len = clib_net_to_host_u32 (*((u32 *) msg_buffer));
- }
-
- /* Happens if the client sent msg_len == 0 */
- if (msg_len == 0)
- {
- clib_warning ("msg_len == 0");
- goto turf_it;
+ mbp = (msgbuf_t *) msg_buffer;
+ msg_len = ntohl (mbp->data_len);
+ mbp_set = 1;
}
/* We don't have the entire message yet. */
- if (msg_len > vec_len (msg_buffer))
+ if (mbp_set == 0
+ || (msg_len + sizeof (msgbuf_t)) > vec_len (msg_buffer))
{
save_and_split:
- /*
- * if we were using the shared input buffer,
- * save the fragment.
- */
+ /* if we were using the input buffer save the fragment */
if (msg_buffer == socket_main.input_buffer)
{
ASSERT (vec_len (rp->unprocessed_input) == 0);
_vec_len (rp->unprocessed_input) = vec_len (msg_buffer);
}
_vec_len (socket_main.input_buffer) = save_input_buffer_length;
- rp->unprocessed_msg_length = msg_len;
return 0;
}
- socket_process_msg (uf, rp, msg_buffer);
- if (n > msg_len)
- vec_delete (msg_buffer, msg_len, 0);
+ data_for_process = (u8 *) vec_dup (msg_buffer);
+ _vec_len (data_for_process) = (msg_len + sizeof (msgbuf_t));
+ pool_get (socket_main.process_args, a);
+ a->clib_file = uf;
+ a->regp = rp;
+ a->data = data_for_process;
+
+ vlib_process_signal_event (vm, memclnt_node.index,
+ SOCKET_READ_EVENT,
+ a - socket_main.process_args);
+ if (n > (msg_len + sizeof (*mbp)))
+ vec_delete (msg_buffer, msg_len + sizeof (*mbp), 0);
else
_vec_len (msg_buffer) = 0;
- n -= msg_len;
+ n -= msg_len + sizeof (msgbuf_t);
msg_len = 0;
- rp->unprocessed_msg_length = 0;
+ mbp_set = 0;
}
while (n > 0);
-turf_it:
_vec_len (socket_main.input_buffer) = save_input_buffer_length;
return 0;
}
}
+void
+vl_socket_add_pending_output_no_flush (clib_file_t * uf,
+ vl_api_registration_t * rp,
+ u8 * buffer, uword buffer_bytes)
+{
+ vec_add (rp->output_vector, buffer, buffer_bytes);
+}
+
static void
socket_del_pending_output (clib_file_t * uf,
vl_api_registration_t * rp, uword n_bytes)
socksvr_accept_ready (clib_file_t * uf)
{
clib_file_main_t *fm = &file_main;
- struct sockaddr_in client_addr;
- int client_fd;
- int client_len;
+ socket_main_t *sm = &socket_main;
+ clib_socket_t *sock = &sm->socksvr_listen_socket;
+ clib_socket_t client;
+ clib_error_t *error;
- client_len = sizeof (client_addr);
+ error = clib_socket_accept (sock, &client);
- /*
- * Supposedly acquires the non-blocking attrib from the
- * server socket.
- */
- client_fd = accept (uf->file_descriptor,
- (struct sockaddr *) &client_addr,
- (socklen_t *) & client_len);
+ if (error)
+ return error;
- if (client_fd < 0)
- return clib_error_return_unix (0, "socksvr_accept_ready: accept");
-
- socksvr_file_add (fm, client_fd);
+ socksvr_file_add (fm, client.fd);
return 0;
}
}
}
-#define foreach_vlib_api_msg \
-_(SOCKCLNT_CREATE, sockclnt_create) \
-_(SOCKCLNT_DELETE, sockclnt_delete)
-
static clib_error_t *
-socksvr_api_init (vlib_main_t * vm)
+send_fd_msg (int socket_fd, int fd_to_share)
{
- clib_file_main_t *fm = &file_main;
- clib_file_t template = { 0 };
- int sockfd;
- int one = 1;
+ struct msghdr mh = { 0 };
+ struct iovec iov[1];
+ char ctl[CMSG_SPACE (sizeof (int))];
+ char *msg = "memfd";
int rv;
- struct sockaddr_in serv_addr;
- vl_api_registration_t *rp;
- u16 portno;
- u32 bind_address;
-
-#define _(N,n) \
- vl_msg_api_set_handlers(VL_API_##N, #n, \
- vl_api_##n##_t_handler, \
- vl_noop_handler, \
- vl_api_##n##_t_endian, \
- vl_api_##n##_t_print, \
- sizeof(vl_api_##n##_t), 1);
- foreach_vlib_api_msg;
-#undef _
- vec_resize (socket_main.input_buffer, 4096);
+ iov[0].iov_base = msg;
+ iov[0].iov_len = strlen (msg);
+ mh.msg_iov = iov;
+ mh.msg_iovlen = 1;
+
+ struct cmsghdr *cmsg;
+ memset (&ctl, 0, sizeof (ctl));
+ mh.msg_control = ctl;
+ mh.msg_controllen = sizeof (ctl);
+ cmsg = CMSG_FIRSTHDR (&mh);
+ cmsg->cmsg_len = CMSG_LEN (sizeof (int));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy (CMSG_DATA (cmsg), &fd_to_share, sizeof (int));
+
+ rv = sendmsg (socket_fd, &mh, 0);
+ if (rv < 0)
+ return clib_error_return_unix (0, "sendmsg");
+ return 0;
+}
- /* Set up non-blocking server socket on CLIENT_API_SERVER_PORT */
- sockfd = socket (AF_INET, SOCK_STREAM, 0);
+/*
+ * Create a memory-fd segment.
+ */
+void
+vl_api_memfd_segment_create_t_handler (vl_api_memfd_segment_create_t * mp)
+{
+ vl_api_memfd_segment_create_reply_t *rmp;
+ api_main_t *am = &api_main;
+ clib_file_t *cf;
+ memfd_private_t _memfd_private, *memfd = &_memfd_private;
+ vl_api_registration_t *regp;
+ vlib_main_t *vm = vlib_get_main ();
+ svm_map_region_args_t _args, *a = &_args;
+ svm_region_t *vlib_rp;
+ int rv;
- if (sockfd < 0)
- {
- return clib_error_return_unix (0, "socket");
- }
+ regp = vl_api_client_index_to_registration (mp->client_index);
- rv = ioctl (sockfd, FIONBIO, &one);
- if (rv < 0)
+ if (regp == 0)
{
- close (sockfd);
- return clib_error_return_unix (0, "FIONBIO");
+ clib_warning ("API client disconnected");
+ return;
}
- rv = setsockopt (sockfd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof (one));
- if (rv < 0)
+ if (regp->registration_type != REGISTRATION_TYPE_SOCKET_SERVER)
{
- close (sockfd);
- return clib_error_return_unix (0, "SO_REUSEADDR");
+ rv = -31; /* VNET_API_ERROR_INVALID_REGISTRATION */
+ goto reply;
}
- bzero ((char *) &serv_addr, sizeof (serv_addr));
- serv_addr.sin_family = AF_INET;
+ memset (memfd, 0, sizeof (*memfd));
- if (socket_main.bind_address)
- bind_address = socket_main.bind_address;
- else
- bind_address = INADDR_LOOPBACK;
+ /* Embed in api_main_t */
+ memfd->memfd_size = mp->requested_size;
+ memfd->requested_va = 0ULL;
+ memfd->i_am_master = 1;
+ memfd->name = format (0, "%s%c", regp->name, 0);
- if (socket_main.portno)
- portno = socket_main.portno;
- else
- portno = SOCKSVR_DEFAULT_PORT;
+ /* Set up a memfd segment of the requested size */
+ rv = memfd_master_init (memfd, mp->client_index);
- serv_addr.sin_port = clib_host_to_net_u16 (portno);
- serv_addr.sin_addr.s_addr = clib_host_to_net_u32 (bind_address);
+ if (rv)
+ goto reply;
- if (bind (sockfd, (struct sockaddr *) &serv_addr, sizeof (serv_addr)) < 0)
- {
- close (sockfd);
- return clib_error_return_unix (0, "bind");
- }
+ /* Remember to close this fd when the socket connection goes away */
+ vec_add1 (regp->additional_fds_to_close, memfd->fd);
- rv = listen (sockfd, 5);
- if (rv < 0)
+ /* And create a plausible svm_region in it */
+ memset (a, 0, sizeof (*a));
+ a->baseva = memfd->sh->memfd_va + MMAP_PAGESIZE;
+ a->size = memfd->memfd_size - MMAP_PAGESIZE;
+ /* $$$$ might want a different config parameter */
+ a->pvt_heap_size = am->api_pvt_heap_size;
+ a->flags = SVM_FLAGS_MHEAP;
+ svm_region_init_mapped_region (a, (svm_region_t *) a->baseva);
+
+ vlib_rp = (svm_region_t *) a->baseva;
+
+ /*
+ * Part deux, initialize the svm_region_t shared-memory header
+ * api allocation rings, and so on.
+ */
+ vl_init_shmem (vlib_rp, 1 /* is_vlib (dont-care) */ , 1 /* is_private */ );
+
+ vec_add1 (am->vlib_private_rps, vlib_rp);
+
+ memfd->sh->ready = 1;
+
+ /* Recompute the set of input queues to poll in memclnt_process */
+ vec_reset_length (vl_api_queue_cursizes);
+
+reply:
+
+ /* send the reply message */
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = htons (VL_API_MEMFD_SEGMENT_CREATE_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = htonl (rv);
+
+ vl_msg_api_send (regp, (u8 *) rmp);
+
+ if (rv != 0)
+ return;
+
+ /*
+ * We need the reply message to make it out the back door
+ * before we send the magic fd message.
+ */
+ vlib_process_suspend (vm, 11e-6);
+
+ cf = file_main.file_pool + regp->clib_file_index;
+
+ /* send the magic "here's your sign (aka fd)" socket message */
+ send_fd_msg (cf->file_descriptor, memfd->fd);
+}
+
+#define foreach_vlib_api_msg \
+_(SOCKCLNT_CREATE, sockclnt_create) \
+_(SOCKCLNT_DELETE, sockclnt_delete) \
+_(MEMFD_SEGMENT_CREATE, memfd_segment_create)
+
+clib_error_t *
+socksvr_api_init (vlib_main_t * vm)
+{
+ clib_file_main_t *fm = &file_main;
+ clib_file_t template = { 0 };
+ vl_api_registration_t *rp;
+ vl_msg_api_msg_config_t cfg;
+ vl_msg_api_msg_config_t *c = &cfg;
+ socket_main_t *sm = &socket_main;
+ clib_socket_t *sock = &sm->socksvr_listen_socket;
+ clib_error_t *error;
+
+ /* If not explicitly configured, do not bind/enable, etc. */
+ if (sm->socket_name == 0)
+ return 0;
+
+#define _(N,n) do { \
+ c->id = VL_API_##N; \
+ c->name = #n; \
+ c->handler = vl_api_##n##_t_handler; \
+ c->cleanup = vl_noop_handler; \
+ c->endian = vl_api_##n##_t_endian; \
+ c->print = vl_api_##n##_t_print; \
+ c->size = sizeof(vl_api_##n##_t); \
+ c->traced = 1; /* trace, so these msgs print */ \
+ c->replay = 0; /* don't replay client create/delete msgs */ \
+ c->message_bounce = 0; /* don't bounce this message */ \
+ vl_msg_api_config(c);} while (0);
+
+ foreach_vlib_api_msg;
+#undef _
+
+ vec_resize (sm->input_buffer, 4096);
+
+ sock->config = (char *) sm->socket_name;
+
+ /* mkdir of file socket, only under /run */
+ if (strncmp (sock->config, "/run", 4) == 0)
{
- close (sockfd);
- return clib_error_return_unix (0, "listen");
+ u8 *tmp = format (0, "%s", sock->config);
+ int i = vec_len (tmp);
+ while (i && tmp[--i] != '/')
+ ;
+
+ tmp[i] = 0;
+
+ if (i)
+ vlib_unix_recursive_mkdir ((char *) tmp);
+ vec_free (tmp);
}
- pool_get (socket_main.registration_pool, rp);
+ sock->flags = CLIB_SOCKET_F_IS_SERVER | CLIB_SOCKET_F_SEQPACKET |
+ CLIB_SOCKET_F_ALLOW_GROUP_WRITE;
+ error = clib_socket_init (sock);
+ if (error)
+ return error;
+
+ pool_get (sm->registration_pool, rp);
memset (rp, 0, sizeof (*rp));
rp->registration_type = REGISTRATION_TYPE_SOCKET_LISTEN;
template.read_function = socksvr_accept_ready;
template.write_function = socksvr_bogus_write;
- template.file_descriptor = sockfd;
- template.private_data = rp - socket_main.registration_pool;
+ template.file_descriptor = sock->fd;
+ template.private_data = rp - sm->registration_pool;
rp->clib_file_index = clib_file_add (fm, &template);
return 0;
socket_exit (vlib_main_t * vm)
{
clib_file_main_t *fm = &file_main;
+ socket_main_t *sm = &socket_main;
vl_api_registration_t *rp;
/* Defensive driving in case something wipes out early */
- if (socket_main.registration_pool)
+ if (sm->registration_pool)
{
u32 index;
/* *INDENT-OFF* */
- pool_foreach (rp, socket_main.registration_pool, ({
+ pool_foreach (rp, sm->registration_pool, ({
clib_file_del (fm, fm->file_pool + rp->clib_file_index);
index = rp->vl_api_registration_pool_index;
vl_free_socket_registration_index (index);
static clib_error_t *
socksvr_config (vlib_main_t * vm, unformat_input_t * input)
{
- int portno;
+ socket_main_t *sm = &socket_main;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "port %d", &portno))
+ if (unformat (input, "socket-name %s", &sm->socket_name))
+ ;
+ else if (unformat (input, "default"))
{
- socket_main.portno = portno;
+ sm->socket_name = format (0, "%s%c", API_SOCKET_FILE, 0);
}
else
{
format_unformat_error, input);
}
}
- return socksvr_api_init (vm);
+ return 0;
}
VLIB_CONFIG_FUNCTION (socksvr_config, "socksvr");
-/* argument in host byte order */
-void
-socksvr_set_port (u16 port)
-{
- socket_main.portno = port;
-}
-
-/* argument in host byte order */
-void
-socksvr_set_bind_address (u32 bind_address)
-{
- socket_main.bind_address = bind_address;
-}
-
clib_error_t *
vlibsocket_init (vlib_main_t * vm)
{