X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlibmemory%2Fsocksvr_vlib.c;fp=src%2Fvlibsocket%2Fsocksvr_vlib.c;h=1a263e7bf3780a484b1ab4e080dfab7502efc204;hb=59b2565cd91a67ced650739f36129650830211ac;hp=31b33df539326f5210337abe2f6021892b97390e;hpb=35ffa3e8f6b032f6e324234d495f769049d8feea;p=vpp.git diff --git a/src/vlibsocket/socksvr_vlib.c b/src/vlibmemory/socksvr_vlib.c similarity index 58% rename from src/vlibsocket/socksvr_vlib.c rename to src/vlibmemory/socksvr_vlib.c index 31b33df5393..1a263e7bf37 100644 --- a/src/vlibsocket/socksvr_vlib.c +++ b/src/vlibmemory/socksvr_vlib.c @@ -22,32 +22,30 @@ #include #include #include +#include #include #include -#include #include -#include /* enumerate all vlib messages */ +#include #define vl_typedefs /* define message structures */ -#include +#include #undef vl_typedefs /* instantiate all the print functions we know about */ #define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) #define vl_printfun -#include +#include #undef vl_printfun /* instantiate all the endian swap functions we know about */ #define vl_endianfun -#include +#include #undef vl_endianfun -socket_main_t socket_main; - void dump_socket_clients (vlib_main_t * vm, api_main_t * am) { @@ -63,7 +61,7 @@ dump_socket_clients (vlib_main_t * vm, api_main_t * am) if (pool_elts (sm->registration_pool) < 2) return; - vlib_cli_output (vm, "TCP socket clients"); + vlib_cli_output (vm, "Socket clients"); vlib_cli_output (vm, "%16s %8s", "Name", "Fildesc"); /* *INDENT-OFF* */ pool_foreach (reg, sm->registration_pool, @@ -80,11 +78,13 @@ dump_socket_clients (vlib_main_t * vm, api_main_t * am) void vl_socket_api_send (vl_api_registration_t * rp, u8 * elem) { - u32 nbytes = 4; /* for the length... */ u16 msg_id = ntohs (*(u16 *) elem); - u32 msg_length; - u32 tmp; api_main_t *am = &api_main; + msgbuf_t *mb = (msgbuf_t *) (elem - offsetof (msgbuf_t, data)); +#if CLIB_DEBUG > 1 + u32 output_length; +#endif + clib_file_t *cf = rp->clib_file_index + file_main.file_pool; ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM); @@ -95,125 +95,30 @@ vl_socket_api_send (vl_api_registration_t * rp, u8 * elem) return; } - msg_length = am->api_trace_cfg[msg_id].size; - nbytes += msg_length; - tmp = clib_host_to_net_u32 (nbytes); - - vl_socket_add_pending_output (rp->clib_file_index - + file_main.file_pool, - rp->vl_api_registration_pool_index - + socket_main.registration_pool, - (u8 *) & tmp, sizeof (tmp)); - vl_socket_add_pending_output (rp->clib_file_index - + file_main.file_pool, + /* Add the msgbuf_t to the output vector */ + vl_socket_add_pending_output_no_flush (cf, + rp->vl_api_registration_pool_index + + socket_main.registration_pool, + (u8 *) mb, sizeof (*mb)); + /* Send the message */ + vl_socket_add_pending_output (cf, rp->vl_api_registration_pool_index + socket_main.registration_pool, - elem, msg_length); - vl_msg_api_free ((void *) elem); -} - -void -vl_socket_api_send_with_data (vl_api_registration_t * rp, - u8 * elem, u8 * data_vector) -{ - u32 nbytes = 4; /* for the length... */ - u16 msg_id = ntohs (*(u16 *) elem); - u32 msg_length; - u32 tmp; - api_main_t *am = &api_main; - - ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM); - - if (msg_id >= vec_len (am->api_trace_cfg)) - { - clib_warning ("id out of range: %d", msg_id); - vec_free (data_vector); - vl_msg_api_free ((void *) elem); - return; - } - - msg_length = am->api_trace_cfg[msg_id].size; - nbytes += msg_length; - nbytes += vec_len (data_vector); + elem, ntohl (mb->data_len)); - /* Length in network byte order */ - tmp = clib_host_to_net_u32 (nbytes); +#if CLIB_DEBUG > 1 + output_length = sizeof (*mb) + ntohl (mb->data_len); + clib_warning ("wrote %u bytes to fd %d", output_length, + cf->file_descriptor); +#endif - vl_socket_add_pending_output (rp->clib_file_index - + file_main.file_pool, - rp->vl_api_registration_pool_index - + socket_main.registration_pool, - (u8 *) & tmp, sizeof (tmp)); - vl_socket_add_pending_output (rp->clib_file_index - + file_main.file_pool, - rp->vl_api_registration_pool_index - + socket_main.registration_pool, - elem, msg_length); - vl_socket_add_pending_output (rp->clib_file_index - + file_main.file_pool, - rp->vl_api_registration_pool_index - + socket_main.registration_pool, - data_vector, vec_len (data_vector)); vl_msg_api_free ((void *) elem); } -static inline void -vl_socket_api_send_with_length_internal (vl_api_registration_t * rp, - u8 * elem, u32 msg_length, int free) -{ - u32 nbytes = 4; /* for the length... */ - u16 msg_id = ntohs (*(u16 *) elem); - u32 tmp; - api_main_t *am = &api_main; - - ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM); - - if (msg_id >= vec_len (am->api_trace_cfg)) - { - clib_warning ("id out of range: %d", msg_id); - if (free) - vl_msg_api_free ((void *) elem); - return; - } - - nbytes += msg_length; - - /* Length in network byte order */ - tmp = clib_host_to_net_u32 (nbytes); - - vl_socket_add_pending_output (rp->clib_file_index - + file_main.file_pool, - rp->vl_api_registration_pool_index - + socket_main.registration_pool, - (u8 *) & tmp, sizeof (tmp)); - vl_socket_add_pending_output (rp->clib_file_index - + file_main.file_pool, - rp->vl_api_registration_pool_index - + socket_main.registration_pool, - elem, msg_length); - if (free) - vl_msg_api_free ((void *) elem); -} - -void -vl_socket_api_send_with_length (vl_api_registration_t * rp, - u8 * elem, u32 msg_length) -{ - vl_socket_api_send_with_length_internal (rp, elem, msg_length, - 1 /* free */ ); -} - -void -vl_socket_api_send_with_length_no_free (vl_api_registration_t * rp, - u8 * elem, u32 msg_length) -{ - vl_socket_api_send_with_length_internal (rp, elem, msg_length, - 0 /* free */ ); -} - void vl_free_socket_registration_index (u32 pool_index) { + int i; vl_api_registration_t *rp; if (pool_is_free_index (socket_main.registration_pool, pool_index)) { @@ -223,6 +128,10 @@ vl_free_socket_registration_index (u32 pool_index) rp = pool_elt_at_index (socket_main.registration_pool, pool_index); ASSERT (rp->registration_type != REGISTRATION_TYPE_FREE); + for (i = 0; i < vec_len (rp->additional_fds_to_close); i++) + if (close (rp->additional_fds_to_close[i]) < 0) + clib_unix_warning ("close"); + vec_free (rp->additional_fds_to_close); vec_free (rp->name); vec_free (rp->unprocessed_input); vec_free (rp->output_vector); @@ -230,11 +139,13 @@ vl_free_socket_registration_index (u32 pool_index) pool_put (socket_main.registration_pool, rp); } -static inline void -socket_process_msg (clib_file_t * uf, vl_api_registration_t * rp, - i8 * input_v) +void +vl_api_socket_process_msg (clib_file_t * uf, vl_api_registration_t * rp, + i8 * input_v) { - u8 *the_msg = (u8 *) (input_v + sizeof (u32)); + msgbuf_t *mbp = (msgbuf_t *) input_v; + + u8 *the_msg = (u8 *) (mbp->data); socket_main.current_uf = uf; socket_main.current_rp = rp; vl_msg_api_socket_handler (the_msg); @@ -246,11 +157,16 @@ clib_error_t * vl_socket_read_ready (clib_file_t * uf) { clib_file_main_t *fm = &file_main; + vlib_main_t *vm = vlib_get_main (); vl_api_registration_t *rp; int n; i8 *msg_buffer = 0; + u8 *data_for_process; u32 msg_len; u32 save_input_buffer_length = vec_len (socket_main.input_buffer); + vl_socket_args_for_process_t *a; + msgbuf_t *mbp; + int mbp_set = 0; rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data); @@ -278,7 +194,7 @@ vl_socket_read_ready (clib_file_t * uf) /* * Look for bugs here. This code is tricky because - * data read from a stream socket does honor message + * data read from a stream socket does not honor message * boundaries. In the case of a long message (>4K bytes) * we have to do (at least) 2 reads, etc. */ @@ -288,39 +204,30 @@ vl_socket_read_ready (clib_file_t * uf) { vec_append (rp->unprocessed_input, socket_main.input_buffer); msg_buffer = rp->unprocessed_input; - msg_len = rp->unprocessed_msg_length; } else { msg_buffer = socket_main.input_buffer; - msg_len = 0; + mbp_set = 0; } - if (msg_len == 0) + if (mbp_set == 0) { - /* Length may be split across two reads */ - if (vec_len (msg_buffer) < sizeof (u32)) + /* Any chance that we have a complete message? */ + if (vec_len (msg_buffer) <= sizeof (msgbuf_t)) goto save_and_split; - /* total length, including msg_len itself, in network byte order */ - msg_len = clib_net_to_host_u32 (*((u32 *) msg_buffer)); - } - - /* Happens if the client sent msg_len == 0 */ - if (msg_len == 0) - { - clib_warning ("msg_len == 0"); - goto turf_it; + mbp = (msgbuf_t *) msg_buffer; + msg_len = ntohl (mbp->data_len); + mbp_set = 1; } /* We don't have the entire message yet. */ - if (msg_len > vec_len (msg_buffer)) + if (mbp_set == 0 + || (msg_len + sizeof (msgbuf_t)) > vec_len (msg_buffer)) { save_and_split: - /* - * if we were using the shared input buffer, - * save the fragment. - */ + /* if we were using the input buffer save the fragment */ if (msg_buffer == socket_main.input_buffer) { ASSERT (vec_len (rp->unprocessed_input) == 0); @@ -330,22 +237,29 @@ vl_socket_read_ready (clib_file_t * uf) _vec_len (rp->unprocessed_input) = vec_len (msg_buffer); } _vec_len (socket_main.input_buffer) = save_input_buffer_length; - rp->unprocessed_msg_length = msg_len; return 0; } - socket_process_msg (uf, rp, msg_buffer); - if (n > msg_len) - vec_delete (msg_buffer, msg_len, 0); + data_for_process = (u8 *) vec_dup (msg_buffer); + _vec_len (data_for_process) = (msg_len + sizeof (msgbuf_t)); + pool_get (socket_main.process_args, a); + a->clib_file = uf; + a->regp = rp; + a->data = data_for_process; + + vlib_process_signal_event (vm, memclnt_node.index, + SOCKET_READ_EVENT, + a - socket_main.process_args); + if (n > (msg_len + sizeof (*mbp))) + vec_delete (msg_buffer, msg_len + sizeof (*mbp), 0); else _vec_len (msg_buffer) = 0; - n -= msg_len; + n -= msg_len + sizeof (msgbuf_t); msg_len = 0; - rp->unprocessed_msg_length = 0; + mbp_set = 0; } while (n > 0); -turf_it: _vec_len (socket_main.input_buffer) = save_input_buffer_length; return 0; @@ -368,6 +282,14 @@ vl_socket_add_pending_output (clib_file_t * uf, } } +void +vl_socket_add_pending_output_no_flush (clib_file_t * uf, + vl_api_registration_t * rp, + u8 * buffer, uword buffer_bytes) +{ + vec_add (rp->output_vector, buffer, buffer_bytes); +} + static void socket_del_pending_output (clib_file_t * uf, vl_api_registration_t * rp, uword n_bytes) @@ -451,24 +373,17 @@ static clib_error_t * socksvr_accept_ready (clib_file_t * uf) { clib_file_main_t *fm = &file_main; - struct sockaddr_in client_addr; - int client_fd; - int client_len; + socket_main_t *sm = &socket_main; + clib_socket_t *sock = &sm->socksvr_listen_socket; + clib_socket_t client; + clib_error_t *error; - client_len = sizeof (client_addr); + error = clib_socket_accept (sock, &client); - /* - * Supposedly acquires the non-blocking attrib from the - * server socket. - */ - client_fd = accept (uf->file_descriptor, - (struct sockaddr *) &client_addr, - (socklen_t *) & client_len); + if (error) + return error; - if (client_fd < 0) - return clib_error_return_unix (0, "socksvr_accept_ready: accept"); - - socksvr_file_add (fm, client_fd); + socksvr_file_add (fm, client.fd); return 0; } @@ -535,95 +450,204 @@ vl_api_sockclnt_delete_t_handler (vl_api_sockclnt_delete_t * mp) } } -#define foreach_vlib_api_msg \ -_(SOCKCLNT_CREATE, sockclnt_create) \ -_(SOCKCLNT_DELETE, sockclnt_delete) - static clib_error_t * -socksvr_api_init (vlib_main_t * vm) +send_fd_msg (int socket_fd, int fd_to_share) { - clib_file_main_t *fm = &file_main; - clib_file_t template = { 0 }; - int sockfd; - int one = 1; + struct msghdr mh = { 0 }; + struct iovec iov[1]; + char ctl[CMSG_SPACE (sizeof (int))]; + char *msg = "memfd"; int rv; - struct sockaddr_in serv_addr; - vl_api_registration_t *rp; - u16 portno; - u32 bind_address; - -#define _(N,n) \ - vl_msg_api_set_handlers(VL_API_##N, #n, \ - vl_api_##n##_t_handler, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_vlib_api_msg; -#undef _ - vec_resize (socket_main.input_buffer, 4096); + iov[0].iov_base = msg; + iov[0].iov_len = strlen (msg); + mh.msg_iov = iov; + mh.msg_iovlen = 1; + + struct cmsghdr *cmsg; + memset (&ctl, 0, sizeof (ctl)); + mh.msg_control = ctl; + mh.msg_controllen = sizeof (ctl); + cmsg = CMSG_FIRSTHDR (&mh); + cmsg->cmsg_len = CMSG_LEN (sizeof (int)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy (CMSG_DATA (cmsg), &fd_to_share, sizeof (int)); + + rv = sendmsg (socket_fd, &mh, 0); + if (rv < 0) + return clib_error_return_unix (0, "sendmsg"); + return 0; +} - /* Set up non-blocking server socket on CLIENT_API_SERVER_PORT */ - sockfd = socket (AF_INET, SOCK_STREAM, 0); +/* + * Create a memory-fd segment. + */ +void +vl_api_memfd_segment_create_t_handler (vl_api_memfd_segment_create_t * mp) +{ + vl_api_memfd_segment_create_reply_t *rmp; + api_main_t *am = &api_main; + clib_file_t *cf; + memfd_private_t _memfd_private, *memfd = &_memfd_private; + vl_api_registration_t *regp; + vlib_main_t *vm = vlib_get_main (); + svm_map_region_args_t _args, *a = &_args; + svm_region_t *vlib_rp; + int rv; - if (sockfd < 0) - { - return clib_error_return_unix (0, "socket"); - } + regp = vl_api_client_index_to_registration (mp->client_index); - rv = ioctl (sockfd, FIONBIO, &one); - if (rv < 0) + if (regp == 0) { - close (sockfd); - return clib_error_return_unix (0, "FIONBIO"); + clib_warning ("API client disconnected"); + return; } - rv = setsockopt (sockfd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof (one)); - if (rv < 0) + if (regp->registration_type != REGISTRATION_TYPE_SOCKET_SERVER) { - close (sockfd); - return clib_error_return_unix (0, "SO_REUSEADDR"); + rv = -31; /* VNET_API_ERROR_INVALID_REGISTRATION */ + goto reply; } - bzero ((char *) &serv_addr, sizeof (serv_addr)); - serv_addr.sin_family = AF_INET; + memset (memfd, 0, sizeof (*memfd)); - if (socket_main.bind_address) - bind_address = socket_main.bind_address; - else - bind_address = INADDR_LOOPBACK; + /* Embed in api_main_t */ + memfd->memfd_size = mp->requested_size; + memfd->requested_va = 0ULL; + memfd->i_am_master = 1; + memfd->name = format (0, "%s%c", regp->name, 0); - if (socket_main.portno) - portno = socket_main.portno; - else - portno = SOCKSVR_DEFAULT_PORT; + /* Set up a memfd segment of the requested size */ + rv = memfd_master_init (memfd, mp->client_index); - serv_addr.sin_port = clib_host_to_net_u16 (portno); - serv_addr.sin_addr.s_addr = clib_host_to_net_u32 (bind_address); + if (rv) + goto reply; - if (bind (sockfd, (struct sockaddr *) &serv_addr, sizeof (serv_addr)) < 0) - { - close (sockfd); - return clib_error_return_unix (0, "bind"); - } + /* Remember to close this fd when the socket connection goes away */ + vec_add1 (regp->additional_fds_to_close, memfd->fd); - rv = listen (sockfd, 5); - if (rv < 0) + /* And create a plausible svm_region in it */ + memset (a, 0, sizeof (*a)); + a->baseva = memfd->sh->memfd_va + MMAP_PAGESIZE; + a->size = memfd->memfd_size - MMAP_PAGESIZE; + /* $$$$ might want a different config parameter */ + a->pvt_heap_size = am->api_pvt_heap_size; + a->flags = SVM_FLAGS_MHEAP; + svm_region_init_mapped_region (a, (svm_region_t *) a->baseva); + + vlib_rp = (svm_region_t *) a->baseva; + + /* + * Part deux, initialize the svm_region_t shared-memory header + * api allocation rings, and so on. + */ + vl_init_shmem (vlib_rp, 1 /* is_vlib (dont-care) */ , 1 /* is_private */ ); + + vec_add1 (am->vlib_private_rps, vlib_rp); + + memfd->sh->ready = 1; + + /* Recompute the set of input queues to poll in memclnt_process */ + vec_reset_length (vl_api_queue_cursizes); + +reply: + + /* send the reply message */ + + rmp = vl_msg_api_alloc (sizeof (*rmp)); + rmp->_vl_msg_id = htons (VL_API_MEMFD_SEGMENT_CREATE_REPLY); + rmp->context = mp->context; + rmp->retval = htonl (rv); + + vl_msg_api_send (regp, (u8 *) rmp); + + if (rv != 0) + return; + + /* + * We need the reply message to make it out the back door + * before we send the magic fd message. + */ + vlib_process_suspend (vm, 11e-6); + + cf = file_main.file_pool + regp->clib_file_index; + + /* send the magic "here's your sign (aka fd)" socket message */ + send_fd_msg (cf->file_descriptor, memfd->fd); +} + +#define foreach_vlib_api_msg \ +_(SOCKCLNT_CREATE, sockclnt_create) \ +_(SOCKCLNT_DELETE, sockclnt_delete) \ +_(MEMFD_SEGMENT_CREATE, memfd_segment_create) + +clib_error_t * +socksvr_api_init (vlib_main_t * vm) +{ + clib_file_main_t *fm = &file_main; + clib_file_t template = { 0 }; + vl_api_registration_t *rp; + vl_msg_api_msg_config_t cfg; + vl_msg_api_msg_config_t *c = &cfg; + socket_main_t *sm = &socket_main; + clib_socket_t *sock = &sm->socksvr_listen_socket; + clib_error_t *error; + + /* If not explicitly configured, do not bind/enable, etc. */ + if (sm->socket_name == 0) + return 0; + +#define _(N,n) do { \ + c->id = VL_API_##N; \ + c->name = #n; \ + c->handler = vl_api_##n##_t_handler; \ + c->cleanup = vl_noop_handler; \ + c->endian = vl_api_##n##_t_endian; \ + c->print = vl_api_##n##_t_print; \ + c->size = sizeof(vl_api_##n##_t); \ + c->traced = 1; /* trace, so these msgs print */ \ + c->replay = 0; /* don't replay client create/delete msgs */ \ + c->message_bounce = 0; /* don't bounce this message */ \ + vl_msg_api_config(c);} while (0); + + foreach_vlib_api_msg; +#undef _ + + vec_resize (sm->input_buffer, 4096); + + sock->config = (char *) sm->socket_name; + + /* mkdir of file socket, only under /run */ + if (strncmp (sock->config, "/run", 4) == 0) { - close (sockfd); - return clib_error_return_unix (0, "listen"); + u8 *tmp = format (0, "%s", sock->config); + int i = vec_len (tmp); + while (i && tmp[--i] != '/') + ; + + tmp[i] = 0; + + if (i) + vlib_unix_recursive_mkdir ((char *) tmp); + vec_free (tmp); } - pool_get (socket_main.registration_pool, rp); + sock->flags = CLIB_SOCKET_F_IS_SERVER | CLIB_SOCKET_F_SEQPACKET | + CLIB_SOCKET_F_ALLOW_GROUP_WRITE; + error = clib_socket_init (sock); + if (error) + return error; + + pool_get (sm->registration_pool, rp); memset (rp, 0, sizeof (*rp)); rp->registration_type = REGISTRATION_TYPE_SOCKET_LISTEN; template.read_function = socksvr_accept_ready; template.write_function = socksvr_bogus_write; - template.file_descriptor = sockfd; - template.private_data = rp - socket_main.registration_pool; + template.file_descriptor = sock->fd; + template.private_data = rp - sm->registration_pool; rp->clib_file_index = clib_file_add (fm, &template); return 0; @@ -633,14 +657,15 @@ static clib_error_t * socket_exit (vlib_main_t * vm) { clib_file_main_t *fm = &file_main; + socket_main_t *sm = &socket_main; vl_api_registration_t *rp; /* Defensive driving in case something wipes out early */ - if (socket_main.registration_pool) + if (sm->registration_pool) { u32 index; /* *INDENT-OFF* */ - pool_foreach (rp, socket_main.registration_pool, ({ + pool_foreach (rp, sm->registration_pool, ({ clib_file_del (fm, fm->file_pool + rp->clib_file_index); index = rp->vl_api_registration_pool_index; vl_free_socket_registration_index (index); @@ -656,13 +681,15 @@ VLIB_MAIN_LOOP_EXIT_FUNCTION (socket_exit); static clib_error_t * socksvr_config (vlib_main_t * vm, unformat_input_t * input) { - int portno; + socket_main_t *sm = &socket_main; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "port %d", &portno)) + if (unformat (input, "socket-name %s", &sm->socket_name)) + ; + else if (unformat (input, "default")) { - socket_main.portno = portno; + sm->socket_name = format (0, "%s%c", API_SOCKET_FILE, 0); } else { @@ -670,25 +697,11 @@ socksvr_config (vlib_main_t * vm, unformat_input_t * input) format_unformat_error, input); } } - return socksvr_api_init (vm); + return 0; } VLIB_CONFIG_FUNCTION (socksvr_config, "socksvr"); -/* argument in host byte order */ -void -socksvr_set_port (u16 port) -{ - socket_main.portno = port; -} - -/* argument in host byte order */ -void -socksvr_set_bind_address (u32 bind_address) -{ - socket_main.bind_address = bind_address; -} - clib_error_t * vlibsocket_init (vlib_main_t * vm) {