From 01f3f894fc180060ef8ee1c8b4acb4421d12ebe3 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Sun, 2 Dec 2018 12:45:53 -0800 Subject: [PATCH] vcl: cleanup children that use _exit() Change-Id: Ia56c2698adb0ea7811203844dc4db10e121fbc42 Signed-off-by: Florin Coras --- src/vcl/vcl_bapi.c | 37 ++++++++++---- src/vcl/vcl_private.c | 25 +++++----- src/vcl/vcl_private.h | 16 ++++++- src/vcl/vppcom.c | 106 ++++++++++++++++++++++++++++++++++++----- src/vnet/session/application.c | 2 + 5 files changed, 154 insertions(+), 32 deletions(-) diff --git a/src/vcl/vcl_bapi.c b/src/vcl/vcl_bapi.c index f66c7f96bac..457fc18b1c2 100644 --- a/src/vcl/vcl_bapi.c +++ b/src/vcl/vcl_bapi.c @@ -187,7 +187,10 @@ vl_api_app_worker_add_del_reply_t_handler (vl_api_app_worker_add_del_reply_t * return; wrk_index = mp->context; - wrk = vcl_worker_get (wrk_index); + wrk = vcl_worker_get_if_valid (wrk_index); + if (!wrk) + return; + wrk->vpp_wrk_index = clib_net_to_host_u32 (mp->wrk_index); wrk->app_event_queue = uword_to_pointer (mp->app_event_queue_address, svm_msg_q_t *); @@ -196,7 +199,7 @@ vl_api_app_worker_add_del_reply_t_handler (vl_api_app_worker_add_del_reply_t * if (segment_handle == VCL_INVALID_SEGMENT_HANDLE) { clib_warning ("invalid segment handle"); - return; + goto failed; } if (mp->n_fds) @@ -205,9 +208,9 @@ vl_api_app_worker_add_del_reply_t_handler (vl_api_app_worker_add_del_reply_t * vl_socket_client_recv_fd_msg (fds, mp->n_fds, 5); if (mp->fd_flags & SESSION_FD_F_VPP_MQ_SEGMENT) - if (vcl_segment_attach - (vcl_vpp_worker_segment_handle (wrk->wrk_index), "vpp-worker-seg", - SSVM_SEGMENT_MEMFD, fds[n_fds++])) + if (vcl_segment_attach (vcl_vpp_worker_segment_handle (wrk_index), + "vpp-worker-seg", SSVM_SEGMENT_MEMFD, + fds[n_fds++])) goto failed; if (mp->fd_flags & SESSION_FD_F_MEMFD_SEGMENT) @@ -468,7 +471,6 @@ vcl_send_app_worker_add_del (u8 is_add) { vcl_worker_t *wrk = vcl_worker_get_current (); vl_api_app_worker_add_del_t *mp; - u32 wrk_index = wrk->wrk_index; mp = vl_msg_api_alloc (sizeof (*mp)); memset (mp, 0, sizeof (*mp)); @@ -476,10 +478,29 @@ vcl_send_app_worker_add_del (u8 is_add) mp->_vl_msg_id = ntohs (VL_API_APP_WORKER_ADD_DEL); mp->client_index = wrk->my_client_index; mp->app_index = clib_host_to_net_u32 (vcm->app_index); - mp->context = wrk_index; + mp->context = wrk->wrk_index; mp->is_add = is_add; if (!is_add) - mp->wrk_index = clib_host_to_net_u32 (wrk_index); + mp->wrk_index = clib_host_to_net_u32 (wrk->vpp_wrk_index); + + vl_msg_api_send_shmem (wrk->vl_input_queue, (u8 *) & mp); +} + +void +vcl_send_child_worker_del (vcl_worker_t * child_wrk) +{ + vcl_worker_t *wrk = vcl_worker_get_current (); + vl_api_app_worker_add_del_t *mp; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + + mp->_vl_msg_id = ntohs (VL_API_APP_WORKER_ADD_DEL); + mp->client_index = wrk->my_client_index; + mp->app_index = clib_host_to_net_u32 (vcm->app_index); + mp->context = wrk->wrk_index; + mp->is_add = 0; + mp->wrk_index = clib_host_to_net_u32 (child_wrk->vpp_wrk_index); vl_msg_api_send_shmem (wrk->vl_input_queue, (u8 *) & mp); } diff --git a/src/vcl/vcl_private.c b/src/vcl/vcl_private.c index 673b91d4576..d82a7ff3355 100644 --- a/src/vcl/vcl_private.c +++ b/src/vcl/vcl_private.c @@ -211,6 +211,7 @@ vcl_worker_alloc (void) pool_get (vcm->workers, wrk); memset (wrk, 0, sizeof (*wrk)); wrk->wrk_index = wrk - vcm->workers; + wrk->forked_child = ~0; return wrk; } @@ -221,13 +222,16 @@ vcl_worker_free (vcl_worker_t * wrk) } void -vcl_worker_cleanup (u8 notify_vpp) +vcl_worker_cleanup (vcl_worker_t * wrk, u8 notify_vpp) { - vcl_worker_t *wrk = vcl_worker_get_current (); - clib_spinlock_lock (&vcm->workers_lock); if (notify_vpp) - vcl_send_app_worker_add_del (0 /* is_add */ ); + { + if (wrk->wrk_index == vcl_get_worker_index ()) + vcl_send_app_worker_add_del (0 /* is_add */ ); + else + vcl_send_child_worker_del (wrk); + } if (wrk->mqs_epfd > 0) close (wrk->mqs_epfd); hash_free (wrk->session_index_by_vpp_handles); @@ -235,7 +239,6 @@ vcl_worker_cleanup (u8 notify_vpp) clib_spinlock_free (&wrk->ct_registration_lock); vec_free (wrk->mq_events); vec_free (wrk->mq_msg_vector); - vcl_set_worker_index (~0); vcl_worker_free (wrk); clib_spinlock_unlock (&vcm->workers_lock); } @@ -243,8 +246,10 @@ vcl_worker_cleanup (u8 notify_vpp) static void vcl_worker_cleanup_cb (void *arg) { - u32 wrk_index = vcl_get_worker_index (); - vcl_worker_cleanup (1 /* notify vpp */ ); + vcl_worker_t *wrk = vcl_worker_get_current (); + u32 wrk_index = wrk->wrk_index; + vcl_worker_cleanup (wrk, 1 /* notify vpp */ ); + vcl_set_worker_index (~0); VDBG (0, "cleaned up worker %u", wrk_index); } @@ -309,7 +314,6 @@ vcl_worker_register_with_vpp (void) clib_warning ("failed to add worker to vpp"); return -1; } - if (pthread_key_create (&vcl_worker_stop_key, vcl_worker_cleanup_cb)) clib_warning ("failed to add pthread cleanup function"); if (pthread_setspecific (vcl_worker_stop_key, &wrk->thread_id)) @@ -414,12 +418,11 @@ vcl_worker_unshare_session (vcl_worker_t * wrk, vcl_session_t * s) } void -vcl_worker_share_sessions (u32 parent_wrk_index) +vcl_worker_share_sessions (vcl_worker_t * parent_wrk) { - vcl_worker_t *parent_wrk, *wrk; vcl_session_t *new_s; + vcl_worker_t *wrk; - parent_wrk = vcl_worker_get (parent_wrk_index); if (!parent_wrk->sessions) return; diff --git a/src/vcl/vcl_private.h b/src/vcl/vcl_private.h index 7d6af6d6599..44c6520703e 100644 --- a/src/vcl/vcl_private.h +++ b/src/vcl/vcl_private.h @@ -287,6 +287,9 @@ typedef struct vcl_worker_ /** Current pid, may be different from main_pid if forked child */ pid_t current_pid; + + u32 forked_child; + } vcl_worker_t; typedef struct vppcom_main_t_ @@ -504,10 +507,10 @@ int vcl_mq_epoll_add_evfd (vcl_worker_t * wrk, svm_msg_q_t * mq); int vcl_mq_epoll_del_evfd (vcl_worker_t * wrk, u32 mqc_index); vcl_worker_t *vcl_worker_alloc_and_init (void); -void vcl_worker_cleanup (u8 notify_vpp); +void vcl_worker_cleanup (vcl_worker_t * wrk, u8 notify_vpp); int vcl_worker_register_with_vpp (void); int vcl_worker_set_bapi (void); -void vcl_worker_share_sessions (u32 parent_wrk_index); +void vcl_worker_share_sessions (vcl_worker_t * parent_wrk); int vcl_worker_unshare_session (vcl_worker_t * wrk, vcl_session_t * s); int vcl_session_get_refcnt (vcl_session_t * s); @@ -521,6 +524,14 @@ vcl_worker_get (u32 wrk_index) return pool_elt_at_index (vcm->workers, wrk_index); } +static inline vcl_worker_t * +vcl_worker_get_if_valid (u32 wrk_index) +{ + if (pool_is_free_index (vcm->workers, wrk_index)) + return 0; + return pool_elt_at_index (vcm->workers, wrk_index); +} + static inline vcl_worker_t * vcl_worker_get_current (void) { @@ -542,6 +553,7 @@ void vppcom_send_unbind_sock (u64 vpp_handle); void vppcom_api_hookup (void); void vppcom_send_accept_session_reply (u64 vpp_handle, u32 context, int rv); void vcl_send_app_worker_add_del (u8 is_add); +void vcl_send_child_worker_del (vcl_worker_t * wrk); u32 vcl_max_nsid_len (void); diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c index bf21f5204b4..07136e9c6c8 100644 --- a/src/vcl/vppcom.c +++ b/src/vcl/vppcom.c @@ -697,14 +697,96 @@ vcl_cleanup_bapi (void) vl_client_api_unmap (); } -void +static void +vcl_cleanup_forked_child (vcl_worker_t * wrk, vcl_worker_t * child_wrk) +{ + vcl_worker_t *sub_child; + int tries = 0; + + if (child_wrk->forked_child != ~0) + { + sub_child = vcl_worker_get_if_valid (child_wrk->forked_child); + if (sub_child) + { + /* Wait a bit, maybe the process is going away */ + while (kill (sub_child->current_pid, 0) >= 0 && tries++ < 50) + usleep (1e3); + if (kill (sub_child->current_pid, 0) < 0) + vcl_cleanup_forked_child (child_wrk, sub_child); + } + } + vcl_worker_cleanup (child_wrk, 1 /* notify vpp */ ); + VDBG (0, "Cleaned up wrk %u", child_wrk->wrk_index); + wrk->forked_child = ~0; +} + +static struct sigaction old_sa; + +static void +vcl_intercept_sigchld_handler (int signum, siginfo_t * si, void *uc) +{ + vcl_worker_t *wrk, *child_wrk; + + if (vcl_get_worker_index () == ~0) + return; + + sigaction (SIGCHLD, &old_sa, 0); + + wrk = vcl_worker_get_current (); + if (wrk->forked_child == ~0) + return; + + child_wrk = vcl_worker_get_if_valid (wrk->forked_child); + if (si->si_pid != child_wrk->current_pid) + { + VDBG (0, "unexpected child pid %u", si->si_pid); + return; + } + if (child_wrk) + vcl_cleanup_forked_child (wrk, child_wrk); + + if (old_sa.sa_flags & SA_SIGINFO) + { + void (*fn) (int, siginfo_t *, void *) = old_sa.sa_sigaction; + fn (signum, si, uc); + } + else + { + void (*fn) (int) = old_sa.sa_handler; + if (fn) + fn (signum); + } +} + +static void +vcl_incercept_sigchld () +{ + struct sigaction sa; + clib_memset (&sa, 0, sizeof (sa)); + sa.sa_sigaction = vcl_intercept_sigchld_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction (SIGCHLD, &sa, &old_sa)) + { + VERR ("couldn't intercept sigchld"); + exit (-1); + } +} + +static void +vcl_app_pre_fork (void) +{ + vcl_incercept_sigchld (); +} + +static void vcl_app_fork_child_handler (void) { + int rv, parent_wrk_index; + vcl_worker_t *parent_wrk; u8 *child_name; - int rv, parent_wrk; - parent_wrk = vcl_get_worker_index (); - VDBG (0, "initializing forked child with parent wrk %u", parent_wrk); + parent_wrk_index = vcl_get_worker_index (); + VDBG (0, "initializing forked child with parent wrk %u", parent_wrk_index); /* * Allocate worker @@ -732,17 +814,18 @@ vcl_app_fork_child_handler (void) * Register worker with vpp and share sessions */ vcl_worker_register_with_vpp (); + parent_wrk = vcl_worker_get (parent_wrk_index); vcl_worker_share_sessions (parent_wrk); + parent_wrk->forked_child = vcl_get_worker_index (); VDBG (0, "forked child main worker initialized"); vcm->forking = 0; } -void +static void vcl_app_fork_parent_handler (void) { vcm->forking = 1; - while (vcm->forking) ; } @@ -759,8 +842,8 @@ vppcom_app_exit (void) { if (!pool_elts (vcm->workers)) return; - - vcl_worker_cleanup (1 /* notify vpp */ ); + vcl_worker_cleanup (vcl_worker_get_current (), 1 /* notify vpp */ ); + vcl_set_worker_index (~0); vcl_elog_stop (vcm); if (vec_len (vcm->workers) == 1) vl_client_disconnect_from_vlib (); @@ -796,7 +879,7 @@ vppcom_app_create (char *app_name) pool_alloc (vcm->workers, vcl_cfg->max_workers); clib_spinlock_init (&vcm->workers_lock); clib_rwlock_init (&vcm->segment_table_lock); - pthread_atfork (NULL, vcl_app_fork_parent_handler, + pthread_atfork (vcl_app_pre_fork, vcl_app_fork_parent_handler, vcl_app_fork_child_handler); atexit (vppcom_app_exit); @@ -857,13 +940,14 @@ vppcom_app_destroy (void) VDBG (0, "application detach timed out! returning %d (%s)", rv, vppcom_retval_str (rv)); vec_free (vcm->app_name); - vcl_worker_cleanup (0 /* notify vpp */ ); + vcl_worker_cleanup (vcl_worker_get_current (), 0 /* notify vpp */ ); } else { - vcl_worker_cleanup (1 /* notify vpp */ ); + vcl_worker_cleanup (vcl_worker_get_current (), 1 /* notify vpp */ ); } + vcl_set_worker_index (~0); vcl_elog_stop (vcm); vl_client_disconnect_from_vlib (); } diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c index f3628966ae5..01dc818216d 100644 --- a/src/vnet/session/application.c +++ b/src/vnet/session/application.c @@ -94,6 +94,8 @@ app_worker_map_free (application_t * app, app_worker_map_t * map) static app_worker_map_t * app_worker_map_get (application_t * app, u32 map_index) { + if (pool_is_free_index (app->worker_maps, map_index)) + return 0; return pool_elt_at_index (app->worker_maps, map_index); } -- 2.16.6