/*
*------------------------------------------------------------------
- * socksvr_vlib.c
+ * socket_api.c
*
* Copyright (c) 2009 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
#include <sys/stat.h>
#include <vppinfra/byte_order.h>
-#include <svm/memfd.h>
+#include <svm/ssvm.h>
#include <vlibmemory/api.h>
#include <vlibmemory/vl_memory_msg_enum.h>
socket_main_t socket_main;
+#define SOCK_API_REG_HANDLE_BIT (1<<31)
+
+static u32
+sock_api_registration_handle (vl_api_registration_t * regp)
+{
+ ASSERT (regp->vl_api_registration_pool_index < SOCK_API_REG_HANDLE_BIT);
+ return regp->vl_api_registration_pool_index | SOCK_API_REG_HANDLE_BIT;
+}
+
+static u32
+socket_api_registration_handle_to_index (u32 reg_index)
+{
+ return (reg_index & ~SOCK_API_REG_HANDLE_BIT);
+}
+
+u8
+vl_socket_api_registration_handle_is_valid (u32 reg_handle)
+{
+ return ((reg_handle & SOCK_API_REG_HANDLE_BIT) != 0);
+}
+
void
vl_sock_api_dump_clients (vlib_main_t * vm, api_main_t * am)
{
vl_api_registration_t *reg;
socket_main_t *sm = &socket_main;
- clib_file_main_t *fm = &file_main;
clib_file_t *f;
/*
pool_foreach (reg, sm->registration_pool,
({
if (reg->registration_type == REGISTRATION_TYPE_SOCKET_SERVER) {
- f = pool_elt_at_index (fm->file_pool, reg->clib_file_index);
- vlib_cli_output (vm, "%20s %8d",
- reg->name, f->file_descriptor);
+ f = vl_api_registration_file (reg);
+ vlib_cli_output (vm, "%20s %8d", reg->name, f->file_descriptor);
}
}));
/* *INDENT-ON* */
}
+vl_api_registration_t *
+vl_socket_api_client_handle_to_registration (u32 handle)
+{
+ socket_main_t *sm = &socket_main;
+ u32 index = socket_api_registration_handle_to_index (handle);
+ if (pool_is_free_index (sm->registration_pool, index))
+ {
+#if DEBUG > 2
+ clib_warning ("Invalid index %d\n", index);
+#endif
+ return 0;
+ }
+ return pool_elt_at_index (sm->registration_pool, index);
+}
+
void
vl_socket_api_send (vl_api_registration_t * rp, u8 * elem)
{
u16 msg_id = ntohs (*(u16 *) elem);
api_main_t *am = &api_main;
msgbuf_t *mb = (msgbuf_t *) (elem - offsetof (msgbuf_t, data));
- clib_file_t *cf = clib_file_get (&file_main, rp->clib_file_index);
vl_api_registration_t *sock_rp;
+ clib_file_main_t *fm = &file_main;
+ clib_error_t *error;
+ clib_file_t *cf;
+ cf = vl_api_registration_file (rp);
ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM);
if (msg_id >= vec_len (am->api_trace_cfg))
ASSERT (sock_rp);
/* Add the msgbuf_t to the output vector */
- vl_socket_add_pending_output_no_flush (cf, sock_rp, (u8 *) mb,
- sizeof (*mb));
- /* Send the message */
- vl_socket_add_pending_output (cf, sock_rp, elem, ntohl (mb->data_len));
+ vec_add (sock_rp->output_vector, (u8 *) mb, sizeof (*mb));
+
+ /* Try to send the message and save any error like
+ * we do in the input epoll loop */
+ vec_add (sock_rp->output_vector, elem, ntohl (mb->data_len));
+ error = clib_file_write (cf);
+ unix_save_error (&unix_main, error);
+
+ /* If we didn't finish sending everything, wait for tx space */
+ if (vec_len (sock_rp->output_vector) > 0
+ && !(cf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE))
+ {
+ cf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ fm->file_update (cf, UNIX_FILE_UPDATE_MODIFY);
+ }
#if CLIB_DEBUG > 1
output_length = sizeof (*mb) + ntohl (mb->data_len);
{
ASSERT (vec_len (rp->unprocessed_input) == 0);
vec_validate (rp->unprocessed_input, vec_len (msg_buffer) - 1);
- clib_memcpy (rp->unprocessed_input, msg_buffer,
- vec_len (msg_buffer));
+ clib_memcpy_fast (rp->unprocessed_input, msg_buffer,
+ vec_len (msg_buffer));
_vec_len (rp->unprocessed_input) = vec_len (msg_buffer);
}
_vec_len (socket_main.input_buffer) = save_input_buffer_length;
return 0;
}
-void
-vl_socket_add_pending_output (clib_file_t * uf,
- vl_api_registration_t * rp,
- u8 * buffer, uword buffer_bytes)
-{
- clib_file_main_t *fm = &file_main;
-
- vec_add (rp->output_vector, buffer, buffer_bytes);
- if (vec_len (rp->output_vector) > 0)
- {
- int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
- uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
- if (!skip_update)
- fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
- }
-}
-
-void
-vl_socket_add_pending_output_no_flush (clib_file_t * uf,
- vl_api_registration_t * rp,
- u8 * buffer, uword buffer_bytes)
-{
- vec_add (rp->output_vector, buffer, buffer_bytes);
-}
-
-static void
-socket_del_pending_output (clib_file_t * uf,
- vl_api_registration_t * rp, uword n_bytes)
-{
- clib_file_main_t *fm = &file_main;
-
- vec_delete (rp->output_vector, n_bytes, 0);
- if (vec_len (rp->output_vector) <= 0)
- {
- int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
- uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
- if (!skip_update)
- fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
- }
-}
-
clib_error_t *
vl_socket_write_ready (clib_file_t * uf)
{
rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data);
/* Flush output vector. */
- n = write (uf->file_descriptor,
- rp->output_vector, vec_len (rp->output_vector));
- if (n < 0)
+ size_t total_bytes = vec_len (rp->output_vector);
+ size_t bytes_to_send, remaining_bytes = total_bytes;
+ void *p = rp->output_vector;
+ while (remaining_bytes > 0)
{
+ bytes_to_send = remaining_bytes > 4096 ? 4096 : remaining_bytes;
+ n = write (uf->file_descriptor, p, bytes_to_send);
+ if (n < 0)
+ {
+ if (errno == EAGAIN)
+ {
+ break;
+ }
#if DEBUG > 2
- clib_warning ("write error, close the file...\n");
+ clib_warning ("write error, close the file...\n");
#endif
- clib_file_del (fm, uf);
-
- vl_socket_free_registration_index (rp - socket_main.registration_pool);
- return 0;
+ clib_file_del (fm, uf);
+ vl_socket_free_registration_index (rp -
+ socket_main.registration_pool);
+ return 0;
+ }
+ remaining_bytes -= bytes_to_send;
+ p += bytes_to_send;
}
- else if (n > 0)
- socket_del_pending_output (uf, rp, n);
+ vec_delete (rp->output_vector, total_bytes - remaining_bytes, 0);
+ if (vec_len (rp->output_vector) <= 0
+ && (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE))
+ {
+ uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
return 0;
}
clib_file_t template = { 0 };
pool_get (socket_main.registration_pool, rp);
- memset (rp, 0, sizeof (*rp));
+ clib_memset (rp, 0, sizeof (*rp));
template.read_function = vl_socket_read_ready;
template.write_function = vl_socket_write_ready;
clib_error_t *error;
error = clib_socket_accept (sock, &client);
-
if (error)
return error;
{
vl_api_registration_t *regp;
vl_api_sockclnt_create_reply_t *rp;
+ api_main_t *am = &api_main;
+ hash_pair_t *hp;
int rv = 0;
+ u32 nmsg = hash_elts (am->msg_index_by_name_and_crc);
+ u32 i = 0;
regp = socket_main.current_rp;
regp->name = format (0, "%s%c", mp->name, 0);
- rp = vl_msg_api_alloc (sizeof (*rp));
+ u32 size = sizeof (*rp) + (nmsg * sizeof (vl_api_message_table_entry_t));
+ rp = vl_msg_api_alloc (size);
rp->_vl_msg_id = htons (VL_API_SOCKCLNT_CREATE_REPLY);
- rp->handle = (uword) regp;
- rp->index = (uword) regp->vl_api_registration_pool_index;
+ rp->index = htonl (sock_api_registration_handle (regp));
rp->context = mp->context;
rp->response = htonl (rv);
-
+ rp->count = htons (nmsg);
+
+ /* *INDENT-OFF* */
+ hash_foreach_pair (hp, am->msg_index_by_name_and_crc,
+ ({
+ rp->message_table[i].index = htons(hp->value[0]);
+ strncpy((char *)rp->message_table[i].name, (char *)hp->key, 64-1);
+ i++;
+ }));
+ /* *INDENT-ON* */
vl_api_send_msg (regp, (u8 *) rp);
}
vl_api_registration_t *regp;
vl_api_sockclnt_delete_reply_t *rp;
- if (!pool_is_free_index (socket_main.registration_pool, mp->index))
- {
- regp = pool_elt_at_index (socket_main.registration_pool, mp->index);
+ regp = vl_api_client_index_to_registration (mp->client_index);
+ if (!regp)
+ return;
- rp = vl_msg_api_alloc (sizeof (*rp));
- rp->_vl_msg_id = htons (VL_API_SOCKCLNT_DELETE_REPLY);
- rp->handle = mp->handle;
- rp->response = htonl (1);
+ u32 reg_index = ntohl (mp->index);
+ rp = vl_msg_api_alloc (sizeof (*rp));
+ rp->_vl_msg_id = htons (VL_API_SOCKCLNT_DELETE_REPLY);
+ rp->context = mp->context;
+ if (!pool_is_free_index (socket_main.registration_pool, reg_index))
+ {
+ rp->response = htonl (1);
vl_api_send_msg (regp, (u8 *) rp);
- clib_file_del (&file_main, file_main.file_pool + regp->clib_file_index);
-
- vl_socket_free_registration_index (mp->index);
+ vl_api_registration_del_file (regp);
+ vl_socket_free_registration_index (reg_index);
}
else
{
- clib_warning ("unknown client ID %d", mp->index);
+ clib_warning ("unknown client ID %d", reg_index);
+ rp->response = htonl (-1);
+ vl_api_send_msg (regp, (u8 *) rp);
}
}
-static clib_error_t *
-send_fd_msg (int socket_fd, int fd_to_share)
+clib_error_t *
+vl_sock_api_send_fd_msg (int socket_fd, int fds[], int n_fds)
{
struct msghdr mh = { 0 };
struct iovec iov[1];
- char ctl[CMSG_SPACE (sizeof (int))];
- char *msg = "memfd";
+ char ctl[CMSG_SPACE (sizeof (int) * n_fds)];
+ struct cmsghdr *cmsg;
+ char *msg = "fdmsg";
int rv;
iov[0].iov_base = msg;
mh.msg_iov = iov;
mh.msg_iovlen = 1;
- struct cmsghdr *cmsg;
- memset (&ctl, 0, sizeof (ctl));
+ clib_memset (&ctl, 0, sizeof (ctl));
mh.msg_control = ctl;
mh.msg_controllen = sizeof (ctl);
cmsg = CMSG_FIRSTHDR (&mh);
- cmsg->cmsg_len = CMSG_LEN (sizeof (int));
+ cmsg->cmsg_len = CMSG_LEN (sizeof (int) * n_fds);
cmsg->cmsg_level = SOL_SOCKET;
cmsg->cmsg_type = SCM_RIGHTS;
- memcpy (CMSG_DATA (cmsg), &fd_to_share, sizeof (int));
+ clib_memcpy_fast (CMSG_DATA (cmsg), fds, sizeof (int) * n_fds);
rv = sendmsg (socket_fd, &mh, 0);
if (rv < 0)
if (!mp->nitems)
{
- vec_validate (config, 3);
+ vec_validate (config, 6);
config[0].type = VL_API_VLIB_RING;
- config[0].count = 128;
config[0].size = 256;
- config[1].type = VL_API_CLIENT_RING;
- config[1].count = 128;
+ config[0].count = 32;
+
+ config[1].type = VL_API_VLIB_RING;
config[1].size = 1024;
- config[2].type = VL_API_CLIENT_RING;
- config[2].count = 8;
+ config[1].count = 16;
+
+ config[2].type = VL_API_VLIB_RING;
config[2].size = 4096;
- config[3].type = VL_API_QUEUE;
- config[3].count = 128;
- config[3].size = sizeof (uword);
+ config[2].count = 2;
+
+ config[3].type = VL_API_CLIENT_RING;
+ config[3].size = 256;
+ config[3].count = 32;
+
+ config[4].type = VL_API_CLIENT_RING;
+ config[4].size = 1024;
+ config[4].count = 16;
+
+ config[5].type = VL_API_CLIENT_RING;
+ config[5].size = 4096;
+ config[5].count = 2;
+
+ config[6].type = VL_API_QUEUE;
+ config[6].count = 128;
+ config[6].size = sizeof (uword);
}
else
{
vl_api_sock_init_shm_t_handler (vl_api_sock_init_shm_t * mp)
{
vl_api_sock_init_shm_reply_t *rmp;
- memfd_private_t _memfd_private, *memfd = &_memfd_private;
+ ssvm_private_t _memfd_private, *memfd = &_memfd_private;
svm_map_region_args_t _args, *a = &_args;
vl_api_registration_t *regp;
api_main_t *am = &api_main;
svm_region_t *vlib_rp;
clib_file_t *cf;
vl_api_shm_elem_config_t *config = 0;
+ vl_shmem_hdr_t *shmem_hdr;
int rv;
regp = vl_api_client_index_to_registration (mp->client_index);
* Set up a memfd segment of the requested size wherein the
* shmem data structures will be initialized
*/
- memset (memfd, 0, sizeof (*memfd));
- memfd->memfd_size = mp->requested_size;
+ clib_memset (memfd, 0, sizeof (*memfd));
+ memfd->ssvm_size = mp->requested_size;
memfd->requested_va = 0ULL;
memfd->i_am_master = 1;
memfd->name = format (0, "%s%c", regp->name, 0);
- if ((rv = memfd_master_init (memfd, mp->client_index)))
+ if ((rv = ssvm_master_init_memfd (memfd)))
goto reply;
/* Remember to close this fd when the socket connection goes away */
/*
* Create a plausible svm_region in the memfd backed segment
*/
- memset (a, 0, sizeof (*a));
- a->baseva = memfd->sh->memfd_va + MMAP_PAGESIZE;
- a->size = memfd->memfd_size - MMAP_PAGESIZE;
+ clib_memset (a, 0, sizeof (*a));
+ a->baseva = memfd->sh->ssvm_va + MMAP_PAGESIZE;
+ a->size = memfd->ssvm_size - MMAP_PAGESIZE;
/* $$$$ might want a different config parameter */
a->pvt_heap_size = am->api_pvt_heap_size;
a->flags = SVM_FLAGS_MHEAP;
vlib_rp = (svm_region_t *) a->baseva;
vl_init_shmem (vlib_rp, config, 1 /* is_vlib (dont-care) */ ,
1 /* is_private */ );
+
+ /* Remember who created this. Needs to be post vl_init_shmem */
+ shmem_hdr = (vl_shmem_hdr_t *) vlib_rp->user_ctx;
+ shmem_hdr->clib_file_index = vl_api_registration_file_index (regp);
+
vec_add1 (am->vlib_private_rps, vlib_rp);
memfd->sh->ready = 1;
vec_free (config);
rmp->context = mp->context;
rmp->retval = htonl (rv);
- vl_api_send_msg (regp, (u8 *) rmp);
-
- if (rv != 0)
- return;
-
/*
- * We need the reply message to make it out the back door
- * before we send the magic fd message so force a flush
+ * Note: The reply message needs to make it out the back door
+ * before we send the magic fd message. That's taken care of by
+ * the send function.
*/
- cf = clib_file_get (&file_main, regp->clib_file_index);
- cf->write_function (cf);
-
- /* Send the magic "here's your sign (aka fd)" socket message */
- send_fd_msg (cf->file_descriptor, memfd->fd);
-}
-
-/*
- * Create a memory-fd segment.
- */
-void
-vl_api_memfd_segment_create_t_handler (vl_api_memfd_segment_create_t * mp)
-{
- vl_api_memfd_segment_create_reply_t *rmp;
- clib_file_t *cf;
- memfd_private_t _memfd_private, *memfd = &_memfd_private;
- vl_api_registration_t *regp;
- int rv;
-
- regp = vl_api_client_index_to_registration (mp->client_index);
- if (regp == 0)
- {
- clib_warning ("API client disconnected");
- return;
- }
-
- memset (memfd, 0, sizeof (*memfd));
- memfd->memfd_size = mp->requested_size;
- memfd->requested_va = 0ULL;
- memfd->i_am_master = 1;
- memfd->name = format (0, "%s%c", regp->name, 0);
-
- /* Set up a memfd segment of the requested size */
- if ((rv = memfd_master_init (memfd, mp->client_index)))
- goto reply;
-
- /* Remember to close this fd when the socket connection goes away */
- vec_add1 (regp->additional_fds_to_close, memfd->fd);
-
-reply:
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- rmp->_vl_msg_id = htons (VL_API_MEMFD_SEGMENT_CREATE_REPLY);
- rmp->context = mp->context;
- rmp->retval = htonl (rv);
-
- vl_api_send_msg (regp, (u8 *) rmp);
+ vl_socket_api_send (regp, (u8 *) rmp);
if (rv != 0)
return;
- /*
- * We need the reply message to make it out the back door
- * before we send the magic fd message.
- */
- cf = file_main.file_pool + regp->clib_file_index;
- cf->write_function (cf);
-
- /* send the magic "here's your sign (aka fd)" socket message */
- send_fd_msg (cf->file_descriptor, memfd->fd);
+ /* Send the magic "here's your sign (aka fd)" socket message */
+ cf = vl_api_registration_file (regp);
+ vl_sock_api_send_fd_msg (cf->file_descriptor, &memfd->fd, 1);
}
#define foreach_vlib_api_msg \
_(SOCKCLNT_CREATE, sockclnt_create) \
_(SOCKCLNT_DELETE, sockclnt_delete) \
-_(SOCK_INIT_SHM, sock_init_shm) \
-_(MEMFD_SEGMENT_CREATE, memfd_segment_create)
+_(SOCK_INIT_SHM, sock_init_shm)
clib_error_t *
vl_sock_api_init (vlib_main_t * vm)
return error;
pool_get (sm->registration_pool, rp);
- memset (rp, 0, sizeof (*rp));
+ clib_memset (rp, 0, sizeof (*rp));
rp->registration_type = REGISTRATION_TYPE_SOCKET_LISTEN;
static clib_error_t *
socket_exit (vlib_main_t * vm)
{
- clib_file_main_t *fm = &file_main;
socket_main_t *sm = &socket_main;
vl_api_registration_t *rp;
u32 index;
/* *INDENT-OFF* */
pool_foreach (rp, sm->registration_pool, ({
- clib_file_del (fm, fm->file_pool + rp->clib_file_index);
- index = rp->vl_api_registration_pool_index;
- vl_socket_free_registration_index (index);
+ vl_api_registration_del_file (rp);
+ index = rp->vl_api_registration_pool_index;
+ vl_socket_free_registration_index (index);
}));
/* *INDENT-ON* */
}