2 * Copyright (c) 2018-2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vcl/vcl_private.h>
18 static pthread_key_t vcl_worker_stop_key;
21 vcl_mq_evt_conn_alloc (vcl_worker_t * wrk)
23 vcl_mq_evt_conn_t *mqc;
24 pool_get (wrk->mq_evt_conns, mqc);
25 memset (mqc, 0, sizeof (*mqc));
30 vcl_mq_evt_conn_index (vcl_worker_t * wrk, vcl_mq_evt_conn_t * mqc)
32 return (mqc - wrk->mq_evt_conns);
36 vcl_mq_evt_conn_get (vcl_worker_t * wrk, u32 mq_conn_idx)
38 return pool_elt_at_index (wrk->mq_evt_conns, mq_conn_idx);
41 /* Add unix socket to epoll.
42 * Used only to get a notification on socket close
43 * We can't use eventfd because we don't get notifications on that fds
46 vcl_mq_epoll_add_api_sock (vcl_worker_t *wrk)
48 clib_socket_t *cs = &wrk->app_api_sock;
49 struct epoll_event e = { 0 };
53 rv = epoll_ctl (wrk->mqs_epfd, EPOLL_CTL_ADD, cs->fd, &e);
54 if (rv != EEXIST && rv < 0)
61 vcl_mq_epoll_add_evfd (vcl_worker_t * wrk, svm_msg_q_t * mq)
63 struct epoll_event e = { 0 };
64 vcl_mq_evt_conn_t *mqc;
68 mq_fd = svm_msg_q_get_eventfd (mq);
70 if (wrk->mqs_epfd < 0 || mq_fd == -1)
73 mqc = vcl_mq_evt_conn_alloc (wrk);
74 mqc_index = vcl_mq_evt_conn_index (wrk, mqc);
79 e.data.u32 = mqc_index;
80 if (epoll_ctl (wrk->mqs_epfd, EPOLL_CTL_ADD, mq_fd, &e) < 0)
82 VDBG (0, "failed to add mq eventfd to mq epoll fd");
86 if (vcl_mq_epoll_add_api_sock (wrk))
88 VDBG (0, "failed to add mq socket to mq epoll fd");
96 vcl_mq_epoll_del_evfd (vcl_worker_t * wrk, u32 mqc_index)
98 vcl_mq_evt_conn_t *mqc;
100 if (wrk->mqs_epfd || mqc_index == ~0)
103 mqc = vcl_mq_evt_conn_get (wrk, mqc_index);
104 if (epoll_ctl (wrk->mqs_epfd, EPOLL_CTL_DEL, mqc->mq_fd, 0) < 0)
106 VDBG (0, "failed to del mq eventfd to mq epoll fd");
112 static vcl_worker_t *
113 vcl_worker_alloc (void)
116 pool_get (vcm->workers, wrk);
117 memset (wrk, 0, sizeof (*wrk));
118 wrk->wrk_index = wrk - vcm->workers;
119 wrk->forked_child = ~0;
124 vcl_worker_free (vcl_worker_t * wrk)
126 pool_put (vcm->workers, wrk);
130 vcl_api_app_worker_add (void)
132 if (vcm->cfg.vpp_app_socket_api)
133 return vcl_sapi_app_worker_add ();
135 return vcl_bapi_app_worker_add ();
139 vcl_api_app_worker_del (vcl_worker_t * wrk)
141 if (vcm->cfg.vpp_app_socket_api)
142 return vcl_sapi_app_worker_del (wrk);
144 vcl_bapi_app_worker_del (wrk);
148 vcl_worker_cleanup (vcl_worker_t * wrk, u8 notify_vpp)
150 clib_spinlock_lock (&vcm->workers_lock);
152 vcl_api_app_worker_del (wrk);
154 if (wrk->mqs_epfd > 0)
155 close (wrk->mqs_epfd);
156 pool_free (wrk->sessions);
157 pool_free (wrk->mq_evt_conns);
158 hash_free (wrk->session_index_by_vpp_handles);
159 vec_free (wrk->mq_events);
160 vec_free (wrk->mq_msg_vector);
161 vec_free (wrk->unhandled_evts_vector);
162 vec_free (wrk->pending_session_wrk_updates);
163 clib_bitmap_free (wrk->rd_bitmap);
164 clib_bitmap_free (wrk->wr_bitmap);
165 clib_bitmap_free (wrk->ex_bitmap);
166 vcl_worker_free (wrk);
167 clib_spinlock_unlock (&vcm->workers_lock);
171 vcl_worker_cleanup_cb (void *arg)
176 wrk_index = vcl_get_worker_index ();
177 wrk = vcl_worker_get_if_valid (wrk_index);
181 vcl_worker_cleanup (wrk, 1 /* notify vpp */ );
182 vcl_set_worker_index (~0);
183 VDBG (0, "cleaned up worker %u", wrk_index);
187 vcl_worker_detach_sessions (vcl_worker_t *wrk)
192 close (wrk->app_api_sock.fd);
193 pool_foreach (s, wrk->sessions)
195 if (s->session_state == VCL_STATE_LISTEN)
197 s->session_state = VCL_STATE_LISTEN_NO_MQ;
200 if (s->flags & VCL_SESSION_F_IS_VEP)
203 s->session_state = VCL_STATE_DETACHED;
204 vec_add2 (wrk->unhandled_evts_vector, e, 1);
205 e->event_type = SESSION_CTRL_EVT_DISCONNECTED;
206 e->session_index = s->session_index;
210 vcl_segment_detach_all ();
214 vcl_worker_alloc_and_init ()
218 /* This was initialized already */
219 if (vcl_get_worker_index () != ~0)
222 /* Grab lock before selecting mem thread index */
223 clib_spinlock_lock (&vcm->workers_lock);
225 /* Use separate heap map entry for worker */
226 clib_mem_set_thread_index ();
228 if (pool_elts (vcm->workers) == vcm->cfg.max_workers)
230 VDBG (0, "max-workers %u limit reached", vcm->cfg.max_workers);
235 wrk = vcl_worker_alloc ();
236 vcl_set_worker_index (wrk->wrk_index);
237 wrk->thread_id = pthread_self ();
238 wrk->current_pid = getpid ();
241 if (vcm->cfg.use_mq_eventfd)
243 wrk->vcl_needs_real_epoll = 1;
244 wrk->mqs_epfd = epoll_create (1);
245 wrk->vcl_needs_real_epoll = 0;
246 if (wrk->mqs_epfd < 0)
248 clib_unix_warning ("epoll_create() returned");
253 wrk->ep_lt_current = VCL_INVALID_SESSION_INDEX;
254 wrk->session_index_by_vpp_handles = hash_create (0, sizeof (uword));
255 clib_time_init (&wrk->clib_time);
256 vec_validate (wrk->mq_events, 64);
257 vec_validate (wrk->mq_msg_vector, 128);
258 vec_reset_length (wrk->mq_msg_vector);
259 vec_validate (wrk->unhandled_evts_vector, 128);
260 vec_reset_length (wrk->unhandled_evts_vector);
263 clib_spinlock_unlock (&vcm->workers_lock);
268 vcl_worker_register_with_vpp (void)
270 vcl_worker_t *wrk = vcl_worker_get_current ();
272 clib_spinlock_lock (&vcm->workers_lock);
274 if (vcl_api_app_worker_add ())
276 VDBG (0, "failed to add worker to vpp");
277 clib_spinlock_unlock (&vcm->workers_lock);
280 if (pthread_key_create (&vcl_worker_stop_key, vcl_worker_cleanup_cb))
281 VDBG (0, "failed to add pthread cleanup function");
282 if (pthread_setspecific (vcl_worker_stop_key, &wrk->thread_id))
283 VDBG (0, "failed to setup key value");
285 clib_spinlock_unlock (&vcm->workers_lock);
287 VDBG (0, "added worker %u", wrk->wrk_index);
292 vcl_worker_ctrl_mq (vcl_worker_t * wrk)
298 vcl_session_read_ready (vcl_session_t * s)
300 if (PREDICT_FALSE (s->flags & VCL_SESSION_F_IS_VEP))
302 VDBG (0, "ERROR: session %u: cannot read from an epoll session!",
304 return VPPCOM_EBADFD;
307 if (vcl_session_is_open (s))
309 if (vcl_session_is_ct (s))
310 return svm_fifo_max_dequeue_cons (s->ct_rx_fifo);
314 session_dgram_pre_hdr_t ph;
317 max_deq = svm_fifo_max_dequeue_cons (s->rx_fifo);
318 if (max_deq <= SESSION_CONN_HDR_LEN)
320 if (svm_fifo_peek (s->rx_fifo, 0, sizeof (ph), (u8 *) & ph) < 0)
322 if (ph.data_length + SESSION_CONN_HDR_LEN > max_deq)
325 return ph.data_length;
328 return svm_fifo_max_dequeue_cons (s->rx_fifo);
330 else if (s->session_state == VCL_STATE_LISTEN)
332 return clib_fifo_elts (s->accept_evts_fifo);
336 return (s->session_state == VCL_STATE_DISCONNECT) ?
337 VPPCOM_ECONNRESET : VPPCOM_ENOTCONN;
342 vcl_session_write_ready (vcl_session_t * s)
344 if (PREDICT_FALSE (s->flags & VCL_SESSION_F_IS_VEP))
346 VDBG (0, "session %u [0x%llx]: cannot write to an epoll session!",
347 s->session_index, s->vpp_handle);
348 return VPPCOM_EBADFD;
351 if (vcl_session_is_open (s))
353 if (vcl_session_is_ct (s))
354 return svm_fifo_max_enqueue_prod (s->ct_tx_fifo);
358 u32 max_enq = svm_fifo_max_enqueue_prod (s->tx_fifo);
360 if (max_enq <= sizeof (session_dgram_hdr_t))
362 return max_enq - sizeof (session_dgram_hdr_t);
365 return svm_fifo_max_enqueue_prod (s->tx_fifo);
367 else if (s->session_state == VCL_STATE_LISTEN)
370 return svm_fifo_max_enqueue_prod (s->tx_fifo);
372 return VPPCOM_EBADFD;
374 else if (s->session_state == VCL_STATE_UPDATED)
380 return (s->session_state == VCL_STATE_DISCONNECT) ?
381 VPPCOM_ECONNRESET : VPPCOM_ENOTCONN;
386 vcl_session_alloc_ext_cfg (vcl_session_t *s,
387 transport_endpt_ext_cfg_type_t type, u32 len)
392 s->ext_config = clib_mem_alloc (len);
393 clib_memset (s->ext_config, 0, len);
394 s->ext_config->len = len;
395 s->ext_config->type = type;
401 vcl_segment_attach (u64 segment_handle, char *name, ssvm_segment_type_t type,
404 fifo_segment_create_args_t _a, *a = &_a;
407 memset (a, 0, sizeof (*a));
408 a->segment_name = name;
409 a->segment_type = type;
411 if (type == SSVM_SEGMENT_MEMFD)
414 clib_rwlock_writer_lock (&vcm->segment_table_lock);
416 if ((rv = fifo_segment_attach (&vcm->segment_main, a)))
418 clib_warning ("svm_fifo_segment_attach ('%s') failed", name);
421 hash_set (vcm->segment_table, segment_handle, a->new_segment_indices[0]);
423 clib_rwlock_writer_unlock (&vcm->segment_table_lock);
425 vec_free (a->new_segment_indices);
430 vcl_segment_table_lookup (u64 segment_handle)
434 clib_rwlock_reader_lock (&vcm->segment_table_lock);
435 seg_indexp = hash_get (vcm->segment_table, segment_handle);
436 clib_rwlock_reader_unlock (&vcm->segment_table_lock);
439 return VCL_INVALID_SEGMENT_INDEX;
440 return ((u32) * seg_indexp);
444 vcl_segment_detach (u64 segment_handle)
446 fifo_segment_main_t *sm = &vcm->segment_main;
447 fifo_segment_t *segment;
450 segment_index = vcl_segment_table_lookup (segment_handle);
451 if (segment_index == (u32) ~ 0)
454 clib_rwlock_writer_lock (&vcm->segment_table_lock);
456 segment = fifo_segment_get_segment (sm, segment_index);
457 fifo_segment_delete (sm, segment);
458 hash_unset (vcm->segment_table, segment_handle);
460 clib_rwlock_writer_unlock (&vcm->segment_table_lock);
462 VDBG (0, "detached segment %u handle %u", segment_index, segment_handle);
466 vcl_segment_detach_all ()
468 u64 *segs = 0, *seg, key;
471 clib_rwlock_reader_lock (&vcm->segment_table_lock);
473 hash_foreach (key, val, vcm->segment_table, ({ vec_add1 (segs, key); }));
475 clib_rwlock_reader_unlock (&vcm->segment_table_lock);
477 vec_foreach (seg, segs)
478 vcl_segment_detach (seg[0]);
484 vcl_segment_attach_session (uword segment_handle, uword rxf_offset,
485 uword txf_offset, uword mq_offset, u32 mq_index,
486 u8 is_ct, vcl_session_t *s)
488 u32 fs_index, eqs_index;
489 svm_fifo_t *rxf, *txf;
493 fs_index = vcl_segment_table_lookup (segment_handle);
494 if (fs_index == VCL_INVALID_SEGMENT_INDEX)
496 VDBG (0, "ERROR: segment for session %u is not mounted!",
501 if (!is_ct && mq_offset != (uword) ~0)
503 eqs_handle = vcl_vpp_worker_segment_handle (0);
504 eqs_index = vcl_segment_table_lookup (eqs_handle);
505 ASSERT (eqs_index != VCL_INVALID_SEGMENT_INDEX);
508 clib_rwlock_reader_lock (&vcm->segment_table_lock);
510 fs = fifo_segment_get_segment (&vcm->segment_main, fs_index);
511 rxf = fifo_segment_alloc_fifo_w_offset (fs, rxf_offset);
512 txf = fifo_segment_alloc_fifo_w_offset (fs, txf_offset);
513 rxf->segment_index = fs_index;
514 txf->segment_index = fs_index;
516 if (!is_ct && mq_offset != (uword) ~0)
518 fs = fifo_segment_get_segment (&vcm->segment_main, eqs_index);
519 s->vpp_evt_q = fifo_segment_msg_q_attach (fs, mq_offset, mq_index);
522 clib_rwlock_reader_unlock (&vcm->segment_table_lock);
526 rxf->shr->client_session_index = s->session_index;
527 txf->shr->client_session_index = s->session_index;
528 rxf->client_thread_index = vcl_get_worker_index ();
529 txf->client_thread_index = vcl_get_worker_index ();
543 vcl_session_detach_fifos (vcl_session_t *s)
550 clib_rwlock_reader_lock (&vcm->segment_table_lock);
552 fs = fifo_segment_get_segment_if_valid (&vcm->segment_main,
553 s->rx_fifo->segment_index);
557 fifo_segment_free_client_fifo (fs, s->rx_fifo);
558 fifo_segment_free_client_fifo (fs, s->tx_fifo);
561 fs = fifo_segment_get_segment_if_valid (&vcm->segment_main,
562 s->ct_rx_fifo->segment_index);
566 fifo_segment_free_client_fifo (fs, s->ct_rx_fifo);
567 fifo_segment_free_client_fifo (fs, s->ct_tx_fifo);
571 clib_rwlock_reader_unlock (&vcm->segment_table_lock);
575 vcl_segment_attach_mq (uword segment_handle, uword mq_offset, u32 mq_index,
581 fs_index = vcl_segment_table_lookup (segment_handle);
582 if (fs_index == VCL_INVALID_SEGMENT_INDEX)
584 VDBG (0, "ERROR: mq segment %lx for is not attached!", segment_handle);
588 clib_rwlock_reader_lock (&vcm->segment_table_lock);
590 fs = fifo_segment_get_segment (&vcm->segment_main, fs_index);
591 *mq = fifo_segment_msg_q_attach (fs, mq_offset, mq_index);
593 clib_rwlock_reader_unlock (&vcm->segment_table_lock);
599 vcl_segment_discover_mqs (uword segment_handle, int *fds, u32 n_fds)
604 fs_index = vcl_segment_table_lookup (segment_handle);
605 if (fs_index == VCL_INVALID_SEGMENT_INDEX)
607 VDBG (0, "ERROR: mq segment %lx for is not attached!", segment_handle);
611 clib_rwlock_reader_lock (&vcm->segment_table_lock);
613 fs = fifo_segment_get_segment (&vcm->segment_main, fs_index);
614 fifo_segment_msg_qs_discover (fs, fds, n_fds);
616 clib_rwlock_reader_unlock (&vcm->segment_table_lock);
622 vcl_segment_alloc_chunk (uword segment_handle, u32 slice_index, u32 size,
629 fs_index = vcl_segment_table_lookup (segment_handle);
630 if (fs_index == VCL_INVALID_SEGMENT_INDEX)
632 VDBG (0, "ERROR: mq segment %lx for is not attached!", segment_handle);
636 clib_rwlock_reader_lock (&vcm->segment_table_lock);
638 fs = fifo_segment_get_segment (&vcm->segment_main, fs_index);
639 c = fifo_segment_alloc_chunk_w_slice (fs, slice_index, size);
640 *offset = fifo_segment_chunk_offset (fs, c);
642 clib_rwlock_reader_unlock (&vcm->segment_table_lock);
648 vcl_session_share_fifos (vcl_session_t *s, svm_fifo_t *rxf, svm_fifo_t *txf)
650 vcl_worker_t *wrk = vcl_worker_get_current ();
653 clib_rwlock_reader_lock (&vcm->segment_table_lock);
655 fs = fifo_segment_get_segment (&vcm->segment_main, rxf->segment_index);
656 s->rx_fifo = fifo_segment_duplicate_fifo (fs, rxf);
657 s->tx_fifo = fifo_segment_duplicate_fifo (fs, txf);
659 clib_rwlock_reader_unlock (&vcm->segment_table_lock);
661 svm_fifo_add_subscriber (s->rx_fifo, wrk->vpp_wrk_index);
662 svm_fifo_add_subscriber (s->tx_fifo, wrk->vpp_wrk_index);
668 vcl_session_state_str (vcl_session_state_t state)
674 case VCL_STATE_CLOSED:
677 case VCL_STATE_LISTEN:
680 case VCL_STATE_READY:
683 case VCL_STATE_VPP_CLOSING:
684 st = "STATE_VPP_CLOSING";
686 case VCL_STATE_DISCONNECT:
687 st = "STATE_DISCONNECT";
689 case VCL_STATE_DETACHED:
690 st = "STATE_DETACHED";
692 case VCL_STATE_UPDATED:
693 st = "STATE_UPDATED";
695 case VCL_STATE_LISTEN_NO_MQ:
696 st = "STATE_LISTEN_NO_MQ";
699 st = "UNKNOWN_STATE";
707 vcl_format_ip4_address (u8 *s, va_list *args)
709 u8 *a = va_arg (*args, u8 *);
710 return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
714 vcl_format_ip6_address (u8 *s, va_list *args)
716 ip6_address_t *a = va_arg (*args, ip6_address_t *);
717 u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon;
719 i_max_n_zero = ARRAY_LEN (a->as_u16);
721 i_first_zero = i_max_n_zero;
723 for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
725 u32 is_zero = a->as_u16[i] == 0;
726 if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16))
732 if ((!is_zero && n_zeros > max_n_zeros) ||
733 (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros))
735 i_max_n_zero = i_first_zero;
736 max_n_zeros = n_zeros;
737 i_first_zero = ARRAY_LEN (a->as_u16);
742 last_double_colon = 0;
743 for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
745 if (i == i_max_n_zero && max_n_zeros > 1)
747 s = format (s, "::");
748 i += max_n_zeros - 1;
749 last_double_colon = 1;
753 s = format (s, "%s%x", (last_double_colon || i == 0) ? "" : ":",
754 clib_net_to_host_u16 (a->as_u16[i]));
755 last_double_colon = 0;
762 /* Format an IP46 address. */
764 vcl_format_ip46_address (u8 *s, va_list *args)
766 ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
767 ip46_type_t type = va_arg (*args, ip46_type_t);
773 is_ip4 = ip46_address_is_ip4 (ip46);
783 return is_ip4 ? format (s, "%U", vcl_format_ip4_address, &ip46->ip4) :
784 format (s, "%U", vcl_format_ip6_address, &ip46->ip6);
788 * fd.io coding-style-patch-verification: ON
791 * eval: (c-set-style "gnu")