2 *------------------------------------------------------------------
5 * Copyright (c) 2009 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
20 #include <sys/types.h>
21 #include <sys/socket.h>
22 #include <netinet/in.h>
23 #include <sys/ioctl.h>
27 #include <vppinfra/byte_order.h>
28 #include <svm/memfd.h>
29 #include <vlibmemory/api.h>
31 #include <vlibmemory/vl_memory_msg_enum.h>
33 #define vl_typedefs /* define message structures */
34 #include <vlibmemory/vl_memory_api_h.h>
37 /* instantiate all the print functions we know about */
38 #define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
40 #include <vlibmemory/vl_memory_api_h.h>
43 /* instantiate all the endian swap functions we know about */
45 #include <vlibmemory/vl_memory_api_h.h>
48 socket_main_t socket_main;
51 vl_sock_api_dump_clients (vlib_main_t * vm, api_main_t * am)
53 vl_api_registration_t *reg;
54 socket_main_t *sm = &socket_main;
55 clib_file_main_t *fm = &file_main;
59 * Must have at least one active client, not counting the
60 * REGISTRATION_TYPE_SOCKET_LISTEN bind/accept socket
62 if (pool_elts (sm->registration_pool) < 2)
65 vlib_cli_output (vm, "Socket clients");
66 vlib_cli_output (vm, "%20s %8s", "Name", "Fildesc");
68 pool_foreach (reg, sm->registration_pool,
70 if (reg->registration_type == REGISTRATION_TYPE_SOCKET_SERVER) {
71 f = pool_elt_at_index (fm->file_pool, reg->clib_file_index);
72 vlib_cli_output (vm, "%20s %8d",
73 reg->name, f->file_descriptor);
80 vl_socket_api_send (vl_api_registration_t * rp, u8 * elem)
85 socket_main_t *sm = &socket_main;
86 u16 msg_id = ntohs (*(u16 *) elem);
87 api_main_t *am = &api_main;
88 msgbuf_t *mb = (msgbuf_t *) (elem - offsetof (msgbuf_t, data));
89 clib_file_t *cf = clib_file_get (&file_main, rp->clib_file_index);
90 vl_api_registration_t *sock_rp;
92 ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM);
94 if (msg_id >= vec_len (am->api_trace_cfg))
96 clib_warning ("id out of range: %d", msg_id);
97 vl_msg_api_free ((void *) elem);
101 sock_rp = pool_elt_at_index (sm->registration_pool,
102 rp->vl_api_registration_pool_index);
105 /* Add the msgbuf_t to the output vector */
106 vl_socket_add_pending_output_no_flush (cf, sock_rp, (u8 *) mb,
108 /* Send the message */
109 vl_socket_add_pending_output (cf, sock_rp, elem, ntohl (mb->data_len));
112 output_length = sizeof (*mb) + ntohl (mb->data_len);
113 clib_warning ("wrote %u bytes to fd %d", output_length,
114 cf->file_descriptor);
117 vl_msg_api_free ((void *) elem);
121 vl_socket_free_registration_index (u32 pool_index)
124 vl_api_registration_t *rp;
125 if (pool_is_free_index (socket_main.registration_pool, pool_index))
127 clib_warning ("main pool index %d already free", pool_index);
130 rp = pool_elt_at_index (socket_main.registration_pool, pool_index);
132 ASSERT (rp->registration_type != REGISTRATION_TYPE_FREE);
133 for (i = 0; i < vec_len (rp->additional_fds_to_close); i++)
134 if (close (rp->additional_fds_to_close[i]) < 0)
135 clib_unix_warning ("close");
136 vec_free (rp->additional_fds_to_close);
138 vec_free (rp->unprocessed_input);
139 vec_free (rp->output_vector);
140 rp->registration_type = REGISTRATION_TYPE_FREE;
141 pool_put (socket_main.registration_pool, rp);
145 vl_socket_process_api_msg (clib_file_t * uf, vl_api_registration_t * rp,
148 msgbuf_t *mbp = (msgbuf_t *) input_v;
150 u8 *the_msg = (u8 *) (mbp->data);
151 socket_main.current_uf = uf;
152 socket_main.current_rp = rp;
153 vl_msg_api_socket_handler (the_msg);
154 socket_main.current_uf = 0;
155 socket_main.current_rp = 0;
159 vl_socket_read_ready (clib_file_t * uf)
161 clib_file_main_t *fm = &file_main;
162 vlib_main_t *vm = vlib_get_main ();
163 vl_api_registration_t *rp;
166 u8 *data_for_process;
168 u32 save_input_buffer_length = vec_len (socket_main.input_buffer);
169 vl_socket_args_for_process_t *a;
173 rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data);
175 n = read (uf->file_descriptor, socket_main.input_buffer,
176 vec_len (socket_main.input_buffer));
178 if (n <= 0 && errno != EAGAIN)
180 clib_file_del (fm, uf);
182 if (!pool_is_free (socket_main.registration_pool, rp))
184 u32 index = rp - socket_main.registration_pool;
185 vl_socket_free_registration_index (index);
189 clib_warning ("client index %d already free?",
190 rp->vl_api_registration_pool_index);
195 _vec_len (socket_main.input_buffer) = n;
198 * Look for bugs here. This code is tricky because
199 * data read from a stream socket does not honor message
200 * boundaries. In the case of a long message (>4K bytes)
201 * we have to do (at least) 2 reads, etc.
205 if (vec_len (rp->unprocessed_input))
207 vec_append (rp->unprocessed_input, socket_main.input_buffer);
208 msg_buffer = rp->unprocessed_input;
212 msg_buffer = socket_main.input_buffer;
218 /* Any chance that we have a complete message? */
219 if (vec_len (msg_buffer) <= sizeof (msgbuf_t))
222 mbp = (msgbuf_t *) msg_buffer;
223 msg_len = ntohl (mbp->data_len);
227 /* We don't have the entire message yet. */
229 || (msg_len + sizeof (msgbuf_t)) > vec_len (msg_buffer))
232 /* if we were using the input buffer save the fragment */
233 if (msg_buffer == socket_main.input_buffer)
235 ASSERT (vec_len (rp->unprocessed_input) == 0);
236 vec_validate (rp->unprocessed_input, vec_len (msg_buffer) - 1);
237 clib_memcpy (rp->unprocessed_input, msg_buffer,
238 vec_len (msg_buffer));
239 _vec_len (rp->unprocessed_input) = vec_len (msg_buffer);
241 _vec_len (socket_main.input_buffer) = save_input_buffer_length;
245 data_for_process = (u8 *) vec_dup (msg_buffer);
246 _vec_len (data_for_process) = (msg_len + sizeof (msgbuf_t));
247 pool_get (socket_main.process_args, a);
250 a->data = data_for_process;
252 vlib_process_signal_event (vm, vl_api_clnt_node.index,
254 a - socket_main.process_args);
255 if (n > (msg_len + sizeof (*mbp)))
256 vec_delete (msg_buffer, msg_len + sizeof (*mbp), 0);
258 _vec_len (msg_buffer) = 0;
259 n -= msg_len + sizeof (msgbuf_t);
265 _vec_len (socket_main.input_buffer) = save_input_buffer_length;
271 vl_socket_add_pending_output (clib_file_t * uf,
272 vl_api_registration_t * rp,
273 u8 * buffer, uword buffer_bytes)
275 clib_file_main_t *fm = &file_main;
277 vec_add (rp->output_vector, buffer, buffer_bytes);
278 if (vec_len (rp->output_vector) > 0)
280 int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
281 uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
283 fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
288 vl_socket_add_pending_output_no_flush (clib_file_t * uf,
289 vl_api_registration_t * rp,
290 u8 * buffer, uword buffer_bytes)
292 vec_add (rp->output_vector, buffer, buffer_bytes);
296 socket_del_pending_output (clib_file_t * uf,
297 vl_api_registration_t * rp, uword n_bytes)
299 clib_file_main_t *fm = &file_main;
301 vec_delete (rp->output_vector, n_bytes, 0);
302 if (vec_len (rp->output_vector) <= 0)
304 int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
305 uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
307 fm->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
312 vl_socket_write_ready (clib_file_t * uf)
314 clib_file_main_t *fm = &file_main;
315 vl_api_registration_t *rp;
318 rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data);
320 /* Flush output vector. */
321 n = write (uf->file_descriptor,
322 rp->output_vector, vec_len (rp->output_vector));
326 clib_warning ("write error, close the file...\n");
328 clib_file_del (fm, uf);
330 vl_socket_free_registration_index (rp - socket_main.registration_pool);
335 socket_del_pending_output (uf, rp, n);
341 vl_socket_error_ready (clib_file_t * uf)
343 vl_api_registration_t *rp;
344 clib_file_main_t *fm = &file_main;
346 rp = pool_elt_at_index (socket_main.registration_pool, uf->private_data);
347 clib_file_del (fm, uf);
348 vl_socket_free_registration_index (rp - socket_main.registration_pool);
354 socksvr_file_add (clib_file_main_t * fm, int fd)
356 vl_api_registration_t *rp;
357 clib_file_t template = { 0 };
359 pool_get (socket_main.registration_pool, rp);
360 memset (rp, 0, sizeof (*rp));
362 template.read_function = vl_socket_read_ready;
363 template.write_function = vl_socket_write_ready;
364 template.error_function = vl_socket_error_ready;
365 template.file_descriptor = fd;
366 template.private_data = rp - socket_main.registration_pool;
368 rp->registration_type = REGISTRATION_TYPE_SOCKET_SERVER;
369 rp->vl_api_registration_pool_index = rp - socket_main.registration_pool;
370 rp->clib_file_index = clib_file_add (fm, &template);
373 static clib_error_t *
374 socksvr_accept_ready (clib_file_t * uf)
376 clib_file_main_t *fm = &file_main;
377 socket_main_t *sm = &socket_main;
378 clib_socket_t *sock = &sm->socksvr_listen_socket;
379 clib_socket_t client;
382 error = clib_socket_accept (sock, &client);
387 socksvr_file_add (fm, client.fd);
391 static clib_error_t *
392 socksvr_bogus_write (clib_file_t * uf)
394 clib_warning ("why am I here?");
399 * vl_api_sockclnt_create_t_handler
402 vl_api_sockclnt_create_t_handler (vl_api_sockclnt_create_t * mp)
404 vl_api_registration_t *regp;
405 vl_api_sockclnt_create_reply_t *rp;
408 regp = socket_main.current_rp;
410 ASSERT (regp->registration_type == REGISTRATION_TYPE_SOCKET_SERVER);
412 regp->name = format (0, "%s%c", mp->name, 0);
414 rp = vl_msg_api_alloc (sizeof (*rp));
415 rp->_vl_msg_id = htons (VL_API_SOCKCLNT_CREATE_REPLY);
416 rp->handle = (uword) regp;
417 rp->index = (uword) regp->vl_api_registration_pool_index;
418 rp->context = mp->context;
419 rp->response = htonl (rv);
421 vl_api_send_msg (regp, (u8 *) rp);
425 * vl_api_sockclnt_delete_t_handler
428 vl_api_sockclnt_delete_t_handler (vl_api_sockclnt_delete_t * mp)
430 vl_api_registration_t *regp;
431 vl_api_sockclnt_delete_reply_t *rp;
433 if (!pool_is_free_index (socket_main.registration_pool, mp->index))
435 regp = pool_elt_at_index (socket_main.registration_pool, mp->index);
437 rp = vl_msg_api_alloc (sizeof (*rp));
438 rp->_vl_msg_id = htons (VL_API_SOCKCLNT_DELETE_REPLY);
439 rp->handle = mp->handle;
440 rp->response = htonl (1);
442 vl_api_send_msg (regp, (u8 *) rp);
444 clib_file_del (&file_main, file_main.file_pool + regp->clib_file_index);
446 vl_socket_free_registration_index (mp->index);
450 clib_warning ("unknown client ID %d", mp->index);
454 static clib_error_t *
455 send_fd_msg (int socket_fd, int fd_to_share)
457 struct msghdr mh = { 0 };
459 char ctl[CMSG_SPACE (sizeof (int))];
463 iov[0].iov_base = msg;
464 iov[0].iov_len = strlen (msg);
468 struct cmsghdr *cmsg;
469 memset (&ctl, 0, sizeof (ctl));
470 mh.msg_control = ctl;
471 mh.msg_controllen = sizeof (ctl);
472 cmsg = CMSG_FIRSTHDR (&mh);
473 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
474 cmsg->cmsg_level = SOL_SOCKET;
475 cmsg->cmsg_type = SCM_RIGHTS;
476 memcpy (CMSG_DATA (cmsg), &fd_to_share, sizeof (int));
478 rv = sendmsg (socket_fd, &mh, 0);
480 return clib_error_return_unix (0, "sendmsg");
484 vl_api_shm_elem_config_t *
485 vl_api_make_shm_config (vl_api_sock_init_shm_t * mp)
487 vl_api_shm_elem_config_t *config = 0, *c;
493 vec_validate (config, 3);
494 config[0].type = VL_API_VLIB_RING;
495 config[0].count = 128;
496 config[0].size = 256;
497 config[1].type = VL_API_CLIENT_RING;
498 config[1].count = 128;
499 config[1].size = 1024;
500 config[2].type = VL_API_CLIENT_RING;
502 config[2].size = 4096;
503 config[3].type = VL_API_QUEUE;
504 config[3].count = 128;
505 config[3].size = sizeof (uword);
509 vec_validate (config, mp->nitems - 1);
510 for (i = 0; i < mp->nitems; i++)
512 cfg = mp->configs[i];
513 /* Pretty much a hack but it avoids defining our own api type
515 c = (vl_api_shm_elem_config_t *) & cfg;
516 config[i].type = c->type;
517 config[i].count = c->count;
518 config[i].size = c->size;
525 * Bootstrap shm api using the socket api
528 vl_api_sock_init_shm_t_handler (vl_api_sock_init_shm_t * mp)
530 vl_api_sock_init_shm_reply_t *rmp;
531 memfd_private_t _memfd_private, *memfd = &_memfd_private;
532 svm_map_region_args_t _args, *a = &_args;
533 vl_api_registration_t *regp;
534 api_main_t *am = &api_main;
535 svm_region_t *vlib_rp;
537 vl_api_shm_elem_config_t *config = 0;
540 regp = vl_api_client_index_to_registration (mp->client_index);
543 clib_warning ("API client disconnected");
546 if (regp->registration_type != REGISTRATION_TYPE_SOCKET_SERVER)
548 rv = -31; /* VNET_API_ERROR_INVALID_REGISTRATION */
553 * Set up a memfd segment of the requested size wherein the
554 * shmem data structures will be initialized
556 memset (memfd, 0, sizeof (*memfd));
557 memfd->memfd_size = mp->requested_size;
558 memfd->requested_va = 0ULL;
559 memfd->i_am_master = 1;
560 memfd->name = format (0, "%s%c", regp->name, 0);
562 if ((rv = memfd_master_init (memfd, mp->client_index)))
565 /* Remember to close this fd when the socket connection goes away */
566 vec_add1 (regp->additional_fds_to_close, memfd->fd);
569 * Create a plausible svm_region in the memfd backed segment
571 memset (a, 0, sizeof (*a));
572 a->baseva = memfd->sh->memfd_va + MMAP_PAGESIZE;
573 a->size = memfd->memfd_size - MMAP_PAGESIZE;
574 /* $$$$ might want a different config parameter */
575 a->pvt_heap_size = am->api_pvt_heap_size;
576 a->flags = SVM_FLAGS_MHEAP;
577 svm_region_init_mapped_region (a, (svm_region_t *) a->baseva);
580 * Part deux, initialize the svm_region_t shared-memory header
581 * api allocation rings, and so on.
583 config = vl_api_make_shm_config (mp);
584 vlib_rp = (svm_region_t *) a->baseva;
585 vl_init_shmem (vlib_rp, config, 1 /* is_vlib (dont-care) */ ,
586 1 /* is_private */ );
587 vec_add1 (am->vlib_private_rps, vlib_rp);
588 memfd->sh->ready = 1;
591 /* Recompute the set of input queues to poll in memclnt_process */
592 vec_reset_length (vl_api_queue_cursizes);
596 rmp = vl_msg_api_alloc (sizeof (*rmp));
597 rmp->_vl_msg_id = htons (VL_API_SOCK_INIT_SHM_REPLY);
598 rmp->context = mp->context;
599 rmp->retval = htonl (rv);
601 vl_api_send_msg (regp, (u8 *) rmp);
607 * We need the reply message to make it out the back door
608 * before we send the magic fd message so force a flush
610 cf = clib_file_get (&file_main, regp->clib_file_index);
611 cf->write_function (cf);
613 /* Send the magic "here's your sign (aka fd)" socket message */
614 send_fd_msg (cf->file_descriptor, memfd->fd);
618 * Create a memory-fd segment.
621 vl_api_memfd_segment_create_t_handler (vl_api_memfd_segment_create_t * mp)
623 vl_api_memfd_segment_create_reply_t *rmp;
625 memfd_private_t _memfd_private, *memfd = &_memfd_private;
626 vl_api_registration_t *regp;
629 regp = vl_api_client_index_to_registration (mp->client_index);
632 clib_warning ("API client disconnected");
636 memset (memfd, 0, sizeof (*memfd));
637 memfd->memfd_size = mp->requested_size;
638 memfd->requested_va = 0ULL;
639 memfd->i_am_master = 1;
640 memfd->name = format (0, "%s%c", regp->name, 0);
642 /* Set up a memfd segment of the requested size */
643 if ((rv = memfd_master_init (memfd, mp->client_index)))
646 /* Remember to close this fd when the socket connection goes away */
647 vec_add1 (regp->additional_fds_to_close, memfd->fd);
651 rmp = vl_msg_api_alloc (sizeof (*rmp));
652 rmp->_vl_msg_id = htons (VL_API_MEMFD_SEGMENT_CREATE_REPLY);
653 rmp->context = mp->context;
654 rmp->retval = htonl (rv);
656 vl_api_send_msg (regp, (u8 *) rmp);
662 * We need the reply message to make it out the back door
663 * before we send the magic fd message.
665 cf = file_main.file_pool + regp->clib_file_index;
666 cf->write_function (cf);
668 /* send the magic "here's your sign (aka fd)" socket message */
669 send_fd_msg (cf->file_descriptor, memfd->fd);
672 #define foreach_vlib_api_msg \
673 _(SOCKCLNT_CREATE, sockclnt_create) \
674 _(SOCKCLNT_DELETE, sockclnt_delete) \
675 _(SOCK_INIT_SHM, sock_init_shm) \
676 _(MEMFD_SEGMENT_CREATE, memfd_segment_create)
679 vl_sock_api_init (vlib_main_t * vm)
681 clib_file_main_t *fm = &file_main;
682 clib_file_t template = { 0 };
683 vl_api_registration_t *rp;
684 socket_main_t *sm = &socket_main;
685 clib_socket_t *sock = &sm->socksvr_listen_socket;
688 /* If not explicitly configured, do not bind/enable, etc. */
689 if (sm->socket_name == 0)
693 vl_msg_api_set_handlers(VL_API_##N, #n, \
694 vl_api_##n##_t_handler, \
696 vl_api_##n##_t_endian, \
697 vl_api_##n##_t_print, \
698 sizeof(vl_api_##n##_t), 1);
699 foreach_vlib_api_msg;
702 vec_resize (sm->input_buffer, 4096);
704 sock->config = (char *) sm->socket_name;
706 /* mkdir of file socket, only under /run */
707 if (strncmp (sock->config, "/run", 4) == 0)
709 u8 *tmp = format (0, "%s", sock->config);
710 int i = vec_len (tmp);
711 while (i && tmp[--i] != '/')
717 vlib_unix_recursive_mkdir ((char *) tmp);
721 sock->flags = CLIB_SOCKET_F_IS_SERVER | CLIB_SOCKET_F_SEQPACKET |
722 CLIB_SOCKET_F_ALLOW_GROUP_WRITE;
723 error = clib_socket_init (sock);
727 pool_get (sm->registration_pool, rp);
728 memset (rp, 0, sizeof (*rp));
730 rp->registration_type = REGISTRATION_TYPE_SOCKET_LISTEN;
732 template.read_function = socksvr_accept_ready;
733 template.write_function = socksvr_bogus_write;
734 template.file_descriptor = sock->fd;
735 template.private_data = rp - sm->registration_pool;
737 rp->clib_file_index = clib_file_add (fm, &template);
741 static clib_error_t *
742 socket_exit (vlib_main_t * vm)
744 clib_file_main_t *fm = &file_main;
745 socket_main_t *sm = &socket_main;
746 vl_api_registration_t *rp;
748 /* Defensive driving in case something wipes out early */
749 if (sm->registration_pool)
753 pool_foreach (rp, sm->registration_pool, ({
754 clib_file_del (fm, fm->file_pool + rp->clib_file_index);
755 index = rp->vl_api_registration_pool_index;
756 vl_socket_free_registration_index (index);
764 VLIB_MAIN_LOOP_EXIT_FUNCTION (socket_exit);
766 static clib_error_t *
767 socksvr_config (vlib_main_t * vm, unformat_input_t * input)
769 socket_main_t *sm = &socket_main;
771 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
773 if (unformat (input, "socket-name %s", &sm->socket_name))
775 else if (unformat (input, "default"))
777 sm->socket_name = format (0, "%s%c", API_SOCKET_FILE, 0);
781 return clib_error_return (0, "unknown input '%U'",
782 format_unformat_error, input);
788 VLIB_CONFIG_FUNCTION (socksvr_config, "socksvr");
791 vlibsocket_init (vlib_main_t * vm)
796 VLIB_INIT_FUNCTION (vlibsocket_init);
799 * fd.io coding-style-patch-verification: ON
802 * eval: (c-set-style "gnu")