2 * Copyright (c) 2019 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 #include <vcl/vcl_locked.h>
17 #include <vcl/vcl_private.h>
19 typedef struct vcl_locked_session_
26 u32 *workers_subscribed;
27 } vcl_locked_session_t;
29 typedef struct vcl_main_
31 vcl_locked_session_t *vls_pool;
32 clib_rwlock_t vls_table_lock;
33 uword *session_index_to_vlsh_table;
37 vls_main_t *vlsm = &vls_main;
40 vls_table_rlock (void)
42 clib_rwlock_reader_lock (&vlsm->vls_table_lock);
46 vls_table_runlock (void)
48 clib_rwlock_reader_unlock (&vlsm->vls_table_lock);
52 vls_table_wlock (void)
54 clib_rwlock_writer_lock (&vlsm->vls_table_lock);
58 vls_table_wunlock (void)
60 clib_rwlock_writer_unlock (&vlsm->vls_table_lock);
73 VLS_MT_LOCK_MQ = 1 << 0,
74 VLS_MT_LOCK_SPOOL = 1 << 1
77 static int vls_wrk_index = ~0;
78 static volatile int vls_mt_n_threads;
79 static pthread_mutex_t vls_mt_mq_mlock = PTHREAD_MUTEX_INITIALIZER;
80 static pthread_mutex_t vls_mt_spool_mlock = PTHREAD_MUTEX_INITIALIZER;
85 vls_mt_n_threads += 1;
86 vcl_set_worker_index (vls_wrk_index);
92 pthread_mutex_lock (&vls_mt_mq_mlock);
96 vls_mt_mq_unlock (void)
98 pthread_mutex_unlock (&vls_mt_mq_mlock);
102 vls_mt_spool_lock (void)
104 pthread_mutex_lock (&vls_mt_spool_mlock);
108 vls_mt_create_unlock (void)
110 pthread_mutex_unlock (&vls_mt_spool_mlock);
113 static inline vcl_session_handle_t
114 vls_to_sh (vcl_locked_session_t * vls)
116 return vcl_session_handle_from_index (vls->session_index);
119 static inline vcl_session_handle_t
120 vls_to_sh_tu (vcl_locked_session_t * vls)
122 vcl_session_handle_t sh;
123 sh = vls_to_sh (vls);
124 vls_table_runlock ();
129 vls_alloc (vcl_session_handle_t sh)
131 vcl_locked_session_t *vls;
134 pool_get (vlsm->vls_pool, vls);
135 vls->session_index = vppcom_session_index (sh);
136 vls->worker_index = vppcom_session_worker (sh);
137 vls->vls_index = vls - vlsm->vls_pool;
138 hash_set (vlsm->session_index_to_vlsh_table, vls->session_index,
140 clib_spinlock_init (&vls->lock);
141 vls_table_wunlock ();
142 return vls->vls_index;
145 static vcl_locked_session_t *
146 vls_get (vls_handle_t vlsh)
148 if (pool_is_free_index (vlsm->vls_pool, vlsh))
150 return pool_elt_at_index (vlsm->vls_pool, vlsh);
154 vls_free (vcl_locked_session_t * vls)
157 hash_unset (vlsm->session_index_to_vlsh_table, vls->session_index);
158 clib_spinlock_free (&vls->lock);
159 pool_put (vlsm->vls_pool, vls);
162 static vcl_locked_session_t *
163 vls_get_and_lock (vls_handle_t vlsh)
165 vcl_locked_session_t *vls;
166 if (pool_is_free_index (vlsm->vls_pool, vlsh))
168 vls = pool_elt_at_index (vlsm->vls_pool, vlsh);
169 clib_spinlock_lock (&vls->lock);
173 static vcl_locked_session_t *
174 vls_get_w_dlock (vls_handle_t vlsh)
176 vcl_locked_session_t *vls;
178 vls = vls_get_and_lock (vlsh);
180 vls_table_runlock ();
185 vls_unlock (vcl_locked_session_t * vls)
187 clib_spinlock_unlock (&vls->lock);
191 vls_get_and_unlock (vls_handle_t vlsh)
193 vcl_locked_session_t *vls;
195 vls = vls_get (vlsh);
197 vls_table_runlock ();
201 vls_dunlock (vcl_locked_session_t * vls)
204 vls_table_runlock ();
208 vls_is_shared (vcl_locked_session_t * vls)
210 return vec_len (vls->workers_subscribed);
214 vls_is_shared_by_wrk (vcl_locked_session_t * vls, u32 wrk_index)
217 for (i = 0; i < vec_len (vls->workers_subscribed); i++)
218 if (vls->workers_subscribed[i] == wrk_index)
224 vls_unshare_session (vcl_locked_session_t * vls, vcl_worker_t * wrk)
229 for (i = 0; i < vec_len (vls->workers_subscribed); i++)
231 if (vls->workers_subscribed[i] != wrk->wrk_index)
234 s = vcl_session_get (wrk, vls->session_index);
237 svm_fifo_del_subscriber (s->rx_fifo, wrk->vpp_wrk_index);
238 svm_fifo_del_subscriber (s->tx_fifo, wrk->vpp_wrk_index);
240 vec_del1 (vls->workers_subscribed, i);
241 vcl_session_cleanup (wrk, s, vcl_session_handle (s),
242 0 /* do_disconnect */ );
246 /* Return, if this is not the owning worker */
247 if (vls->worker_index != wrk->wrk_index)
250 s = vcl_session_get (wrk, vls->session_index);
252 /* Check if we can change owner or close */
253 if (vec_len (vls->workers_subscribed))
255 vls->worker_index = vls->workers_subscribed[0];
256 vec_del1 (vls->workers_subscribed, 0);
257 vcl_send_session_worker_update (wrk, s, vls->worker_index);
258 if (vec_len (vls->workers_subscribed))
259 clib_warning ("more workers need to be updated");
263 vcl_session_cleanup (wrk, s, vcl_session_handle (s),
264 1 /* do_disconnect */ );
271 vls_share_vcl_session (vcl_worker_t * wrk, vcl_session_t * s)
273 vcl_locked_session_t *vls;
275 vls = vls_get_w_dlock (vls_session_index_to_vlsh (s->session_index));
278 vec_add1 (vls->workers_subscribed, wrk->wrk_index);
281 svm_fifo_add_subscriber (s->rx_fifo, wrk->vpp_wrk_index);
282 svm_fifo_add_subscriber (s->tx_fifo, wrk->vpp_wrk_index);
288 vls_worker_copy_on_fork (vcl_worker_t * parent_wrk)
290 vcl_worker_t *wrk = vcl_worker_get_current ();
293 wrk->vpp_event_queues = vec_dup (parent_wrk->vpp_event_queues);
294 wrk->sessions = pool_dup (parent_wrk->sessions);
295 wrk->session_index_by_vpp_handles =
296 hash_dup (parent_wrk->session_index_by_vpp_handles);
299 pool_foreach (s, wrk->sessions, ({
300 vls_share_vcl_session (wrk, s);
306 vls_mt_acq_locks (vcl_locked_session_t * vls, vls_mt_ops_t op, int *locks_acq)
308 vcl_worker_t *wrk = vcl_worker_get_current ();
309 vcl_session_t *s = 0;
314 s = vcl_session_get (wrk, vls->session_index);
315 if (PREDICT_FALSE (!s))
317 is_nonblk = VCL_SESS_ATTR_TEST (s->attr, VCL_SESS_ATTR_NONBLOCK);
324 is_nonblk = vcl_session_read_ready (s) != 0;
328 *locks_acq |= VLS_MT_LOCK_MQ;
331 case VLS_MT_OP_WRITE:
333 is_nonblk = vcl_session_write_ready (s) != 0;
337 *locks_acq |= VLS_MT_LOCK_MQ;
340 case VLS_MT_OP_XPOLL:
342 *locks_acq |= VLS_MT_LOCK_MQ;
344 case VLS_MT_OP_SPOOL:
345 vls_mt_spool_lock ();
346 *locks_acq |= VLS_MT_LOCK_SPOOL;
354 vls_mt_rel_locks (int locks_acq)
356 if (locks_acq & VLS_MT_LOCK_MQ)
358 if (locks_acq & VLS_MT_LOCK_SPOOL)
359 vls_mt_create_unlock ();
362 #define vls_mt_guard(_vls, _op) \
363 int _locks_acq = 0; \
364 if (PREDICT_FALSE (vcl_get_worker_index () == ~0)); \
366 if (PREDICT_FALSE (vls_mt_n_threads > 1)) \
367 vls_mt_acq_locks (_vls, _op, &_locks_acq); \
369 #define vls_mt_unguard() \
370 if (PREDICT_FALSE (_locks_acq)) \
371 vls_mt_rel_locks (_locks_acq)
374 vls_write (vls_handle_t vlsh, void *buf, size_t nbytes)
376 vcl_locked_session_t *vls;
379 if (!(vls = vls_get_w_dlock (vlsh)))
380 return VPPCOM_EBADFD;
382 vls_mt_guard (vls, VLS_MT_OP_WRITE);
383 rv = vppcom_session_write (vls_to_sh_tu (vls), buf, nbytes);
385 vls_get_and_unlock (vlsh);
390 vls_write_msg (vls_handle_t vlsh, void *buf, size_t nbytes)
392 vcl_locked_session_t *vls;
395 if (!(vls = vls_get_w_dlock (vlsh)))
396 return VPPCOM_EBADFD;
397 vls_mt_guard (vls, VLS_MT_OP_WRITE);
398 rv = vppcom_session_write_msg (vls_to_sh_tu (vls), buf, nbytes);
400 vls_get_and_unlock (vlsh);
405 vls_sendto (vls_handle_t vlsh, void *buf, int buflen, int flags,
408 vcl_locked_session_t *vls;
411 if (!(vls = vls_get_w_dlock (vlsh)))
412 return VPPCOM_EBADFD;
413 vls_mt_guard (vls, VLS_MT_OP_WRITE);
414 rv = vppcom_session_sendto (vls_to_sh_tu (vls), buf, buflen, flags, ep);
416 vls_get_and_unlock (vlsh);
421 vls_read (vls_handle_t vlsh, void *buf, size_t nbytes)
423 vcl_locked_session_t *vls;
426 if (!(vls = vls_get_w_dlock (vlsh)))
427 return VPPCOM_EBADFD;
428 vls_mt_guard (vls, VLS_MT_OP_READ);
429 rv = vppcom_session_read (vls_to_sh_tu (vls), buf, nbytes);
431 vls_get_and_unlock (vlsh);
436 vls_recvfrom (vls_handle_t vlsh, void *buffer, uint32_t buflen, int flags,
439 vcl_locked_session_t *vls;
442 if (!(vls = vls_get_w_dlock (vlsh)))
443 return VPPCOM_EBADFD;
444 vls_mt_guard (vls, VLS_MT_OP_READ);
445 rv = vppcom_session_recvfrom (vls_to_sh_tu (vls), buffer, buflen, flags,
448 vls_get_and_unlock (vlsh);
453 vls_attr (vls_handle_t vlsh, uint32_t op, void *buffer, uint32_t * buflen)
455 vcl_locked_session_t *vls;
458 if (!(vls = vls_get_w_dlock (vlsh)))
459 return VPPCOM_EBADFD;
460 rv = vppcom_session_attr (vls_to_sh_tu (vls), op, buffer, buflen);
461 vls_get_and_unlock (vlsh);
466 vls_bind (vls_handle_t vlsh, vppcom_endpt_t * ep)
468 vcl_locked_session_t *vls;
471 if (!(vls = vls_get_w_dlock (vlsh)))
472 return VPPCOM_EBADFD;
473 rv = vppcom_session_bind (vls_to_sh_tu (vls), ep);
474 vls_get_and_unlock (vlsh);
479 vls_listen (vls_handle_t vlsh, int q_len)
481 vcl_locked_session_t *vls;
484 if (!(vls = vls_get_w_dlock (vlsh)))
485 return VPPCOM_EBADFD;
486 vls_mt_guard (vls, VLS_MT_OP_XPOLL);
487 rv = vppcom_session_listen (vls_to_sh_tu (vls), q_len);
489 vls_get_and_unlock (vlsh);
494 vls_connect (vls_handle_t vlsh, vppcom_endpt_t * server_ep)
496 vcl_locked_session_t *vls;
499 if (!(vls = vls_get_w_dlock (vlsh)))
500 return VPPCOM_EBADFD;
501 vls_mt_guard (vls, VLS_MT_OP_XPOLL);
502 rv = vppcom_session_connect (vls_to_sh_tu (vls), server_ep);
504 vls_get_and_unlock (vlsh);
509 vls_accept (vls_handle_t listener_vlsh, vppcom_endpt_t * ep, int flags)
511 vls_handle_t accepted_vlsh;
512 vcl_locked_session_t *vls;
515 if (!(vls = vls_get_w_dlock (listener_vlsh)))
516 return VPPCOM_EBADFD;
517 vls_mt_guard (vls, VLS_MT_OP_SPOOL);
518 sh = vppcom_session_accept (vls_to_sh_tu (vls), ep, flags);
520 vls_get_and_unlock (listener_vlsh);
523 accepted_vlsh = vls_alloc (sh);
524 if (PREDICT_FALSE (accepted_vlsh == VLS_INVALID_HANDLE))
525 vppcom_session_close (sh);
526 return accepted_vlsh;
530 vls_create (uint8_t proto, uint8_t is_nonblocking)
532 vcl_session_handle_t sh;
535 vls_mt_guard (0, VLS_MT_OP_SPOOL);
536 sh = vppcom_session_create (proto, is_nonblocking);
538 if (sh == INVALID_SESSION_ID)
539 return VLS_INVALID_HANDLE;
541 vlsh = vls_alloc (sh);
542 if (PREDICT_FALSE (vlsh == VLS_INVALID_HANDLE))
543 vppcom_session_close (sh);
549 vls_close (vls_handle_t vlsh)
551 vcl_locked_session_t *vls;
556 vls = vls_get_and_lock (vlsh);
559 vls_table_wunlock ();
560 return VPPCOM_EBADFD;
563 vls_mt_guard (0, VLS_MT_OP_SPOOL);
564 if (vls_is_shared (vls))
566 /* At least two workers share the session so vls won't be freed */
567 vls_unshare_session (vls, vcl_worker_get_current ());
570 vls_table_wunlock ();
574 rv = vppcom_session_close (vls_to_sh (vls));
578 vls_table_wunlock ();
584 vls_epoll_create (void)
586 vcl_session_handle_t sh;
589 sh = vppcom_epoll_create ();
590 if (sh == INVALID_SESSION_ID)
591 return VLS_INVALID_HANDLE;
593 vlsh = vls_alloc (sh);
594 if (vlsh == VLS_INVALID_HANDLE)
595 vppcom_session_close (sh);
601 vls_epoll_ctl (vls_handle_t ep_vlsh, int op, vls_handle_t vlsh,
602 struct epoll_event *event)
604 vcl_locked_session_t *ep_vls, *vls;
605 vcl_session_handle_t ep_sh, sh;
609 ep_vls = vls_get_and_lock (ep_vlsh);
610 vls = vls_get_and_lock (vlsh);
611 ep_sh = vls_to_sh (ep_vls);
612 sh = vls_to_sh (vls);
613 vls_table_runlock ();
615 rv = vppcom_epoll_ctl (ep_sh, op, sh, event);
618 ep_vls = vls_get (ep_vlsh);
619 vls = vls_get (vlsh);
622 vls_table_runlock ();
627 vls_epoll_wait (vls_handle_t ep_vlsh, struct epoll_event *events,
628 int maxevents, double wait_for_time)
630 vcl_locked_session_t *vls;
633 if (!(vls = vls_get_w_dlock (ep_vlsh)))
634 return VPPCOM_EBADFD;
635 vls_mt_guard (0, VLS_MT_OP_XPOLL);
636 rv = vppcom_epoll_wait (vls_to_sh_tu (vls), events, maxevents,
639 vls_get_and_unlock (ep_vlsh);
644 vls_select (int n_bits, vcl_si_set * read_map, vcl_si_set * write_map,
645 vcl_si_set * except_map, double wait_for_time)
648 vls_mt_guard (0, VLS_MT_OP_XPOLL);
649 rv = vppcom_select (n_bits, read_map, write_map, except_map, wait_for_time);
655 vlsh_to_sh (vls_handle_t vlsh)
657 vcl_locked_session_t *vls;
660 vls = vls_get_w_dlock (vlsh);
662 return INVALID_SESSION_ID;
663 rv = vls_to_sh (vls);
669 vlsh_to_session_index (vls_handle_t vlsh)
671 vcl_session_handle_t sh;
672 sh = vlsh_to_sh (vlsh);
673 return vppcom_session_index (sh);
677 vls_si_to_vlsh (u32 session_index)
680 vlshp = hash_get (vlsm->session_index_to_vlsh_table, session_index);
681 return vlshp ? *vlshp : VLS_INVALID_HANDLE;
685 vls_session_index_to_vlsh (uint32_t session_index)
690 vlsh = vls_si_to_vlsh (session_index);
691 vls_table_runlock ();
697 vls_unshare_vcl_worker_sessions (vcl_worker_t * wrk)
699 u32 current_wrk, is_current;
700 vcl_locked_session_t *vls;
703 current_wrk = vcl_get_worker_index ();
704 is_current = current_wrk == wrk->wrk_index;
708 pool_foreach (s, wrk->sessions, ({
709 vls = vls_get (vls_si_to_vlsh (s->session_index));
710 if (vls && (is_current || vls_is_shared_by_wrk (vls, current_wrk)))
711 vls_unshare_session (vls, wrk);
715 vls_table_wunlock ();
719 vls_cleanup_vcl_worker (vcl_worker_t * wrk)
721 /* Unshare sessions and also cleanup worker since child may have
722 * called _exit () and therefore vcl may not catch the event */
723 vls_unshare_vcl_worker_sessions (wrk);
724 vcl_worker_cleanup (wrk, 1 /* notify vpp */ );
728 vls_cleanup_forked_child (vcl_worker_t * wrk, vcl_worker_t * child_wrk)
730 vcl_worker_t *sub_child;
733 if (child_wrk->forked_child != ~0)
735 sub_child = vcl_worker_get_if_valid (child_wrk->forked_child);
738 /* Wait a bit, maybe the process is going away */
739 while (kill (sub_child->current_pid, 0) >= 0 && tries++ < 50)
741 if (kill (sub_child->current_pid, 0) < 0)
742 vls_cleanup_forked_child (child_wrk, sub_child);
745 vls_cleanup_vcl_worker (child_wrk);
746 VDBG (0, "Cleaned up forked child wrk %u", child_wrk->wrk_index);
747 wrk->forked_child = ~0;
750 static struct sigaction old_sa;
753 vls_intercept_sigchld_handler (int signum, siginfo_t * si, void *uc)
755 vcl_worker_t *wrk, *child_wrk;
757 if (vcl_get_worker_index () == ~0)
760 if (sigaction (SIGCHLD, &old_sa, 0))
762 VERR ("couldn't restore sigchld");
766 wrk = vcl_worker_get_current ();
767 if (wrk->forked_child == ~0)
770 child_wrk = vcl_worker_get_if_valid (wrk->forked_child);
774 if (si && si->si_pid != child_wrk->current_pid)
776 VDBG (0, "unexpected child pid %u", si->si_pid);
779 vls_cleanup_forked_child (wrk, child_wrk);
782 if (old_sa.sa_flags & SA_SIGINFO)
784 void (*fn) (int, siginfo_t *, void *) = old_sa.sa_sigaction;
789 void (*fn) (int) = old_sa.sa_handler;
796 vls_incercept_sigchld ()
799 clib_memset (&sa, 0, sizeof (sa));
800 sa.sa_sigaction = vls_intercept_sigchld_handler;
801 sa.sa_flags = SA_SIGINFO;
802 if (sigaction (SIGCHLD, &sa, &old_sa))
804 VERR ("couldn't intercept sigchld");
810 vls_app_pre_fork (void)
812 vls_incercept_sigchld ();
813 vcl_flush_mq_events ();
817 vls_app_fork_child_handler (void)
819 vcl_worker_t *parent_wrk;
820 int rv, parent_wrk_index;
823 parent_wrk_index = vcl_get_worker_index ();
824 VDBG (0, "initializing forked child %u with parent wrk %u", getpid (),
830 vcl_set_worker_index (~0);
831 if (!vcl_worker_alloc_and_init ())
832 VERR ("couldn't allocate new worker");
835 * Attach to binary api
837 child_name = format (0, "%v-child-%u%c", vcm->app_name, getpid (), 0);
839 vppcom_api_hookup ();
840 vcm->app_state = STATE_APP_START;
841 rv = vppcom_connect_to_vpp ((char *) child_name);
842 vec_free (child_name);
845 VERR ("couldn't connect to VPP!");
850 * Register worker with vpp and share sessions
852 vcl_worker_register_with_vpp ();
853 parent_wrk = vcl_worker_get (parent_wrk_index);
854 vls_worker_copy_on_fork (parent_wrk);
855 parent_wrk->forked_child = vcl_get_worker_index ();
857 /* Reset number of threads and set wrk index */
858 vls_mt_n_threads = 0;
859 vls_wrk_index = vcl_get_worker_index ();
861 VDBG (0, "forked child main worker initialized");
866 vls_app_fork_parent_handler (void)
876 /* Unshare the sessions. VCL will clean up the worker */
877 vls_unshare_vcl_worker_sessions (vcl_worker_get_current ());
881 vls_app_create (char *app_name)
885 if ((rv = vppcom_app_create (app_name)))
888 clib_rwlock_init (&vlsm->vls_table_lock);
889 pthread_atfork (vls_app_pre_fork, vls_app_fork_parent_handler,
890 vls_app_fork_child_handler);
891 atexit (vls_app_exit);
892 vls_wrk_index = vcl_get_worker_index ();
897 * fd.io coding-style-patch-verification: ON
900 * eval: (c-set-style "gnu")