X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fsession%2Fsession.c;h=f1d1a4e2cfefeb15cd6ce3ee65e5dfbd5ddcdf40;hb=6bd8d3fbba74f8f80a0c09f87c6cbfddd054042f;hp=caa362e1f1d091f74a543d88be8e9468626dd4c8;hpb=7357043d2abc8d3811362f256e7c9086e7b0d378;p=vpp.git diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index caa362e1f1d..f1d1a4e2cfe 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -18,26 +18,22 @@ */ #include -#include #include #include #include session_main_t session_main; -#if SESSION_DEBUG -session_dbg_main_t session_dbg_main; -#endif - static inline int session_send_evt_to_thread (void *data, void *args, u32 thread_index, session_evt_type_t evt_type) { + session_worker_t *wrk = session_main_get_worker (thread_index); session_event_t *evt; svm_msg_q_msg_t msg; svm_msg_q_t *mq; - mq = session_main_get_vpp_event_queue (thread_index); + mq = wrk->vpp_event_queue; if (PREDICT_FALSE (svm_msg_q_lock (mq))) return -1; if (PREDICT_FALSE (svm_msg_q_is_full (mq) @@ -77,13 +73,17 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index, evt->event_type = evt_type; svm_msg_q_add_and_unlock (mq, &msg); + + if (PREDICT_FALSE (wrk->state == SESSION_WRK_INTERRUPT)) + vlib_node_set_interrupt_pending (wrk->vm, session_queue_node.index); + return 0; } int session_send_io_evt_to_thread (svm_fifo_t * f, session_evt_type_t evt_type) { - return session_send_evt_to_thread (&f->master_session_index, 0, + return session_send_evt_to_thread (&f->shr->master_session_index, 0, f->master_thread_index, evt_type); } @@ -97,9 +97,10 @@ session_send_io_evt_to_thread_custom (void *data, u32 thread_index, int session_send_ctrl_evt_to_thread (session_t * s, session_evt_type_t evt_type) { - /* only events supported are disconnect and reset */ - ASSERT (evt_type == SESSION_CTRL_EVT_CLOSE - || evt_type == SESSION_CTRL_EVT_RESET); + /* only events supported are disconnect, shutdown and reset */ + ASSERT (evt_type == SESSION_CTRL_EVT_CLOSE || + evt_type == SESSION_CTRL_EVT_HALF_CLOSE || + evt_type == SESSION_CTRL_EVT_RESET); return session_send_evt_to_thread (s, 0, s->thread_index, evt_type); } @@ -126,19 +127,20 @@ session_send_rpc_evt_to_thread (u32 thread_index, void *fp, void *rpc_args) void session_add_self_custom_tx_evt (transport_connection_t * tc, u8 has_prio) { - session_t *s; + session_t *s = session_get (tc->s_index, tc->thread_index); - s = session_get (tc->s_index, tc->thread_index); ASSERT (s->thread_index == vlib_get_thread_index ()); ASSERT (s->session_state != SESSION_STATE_TRANSPORT_DELETED); + if (!(s->flags & SESSION_F_CUSTOM_TX)) { s->flags |= SESSION_F_CUSTOM_TX; if (svm_fifo_set_event (s->tx_fifo) || transport_connection_is_descheduled (tc)) { - session_worker_t *wrk; session_evt_elt_t *elt; + session_worker_t *wrk; + wrk = session_main_get_worker (tc->thread_index); if (has_prio) elt = session_evt_alloc_new (wrk); @@ -147,6 +149,10 @@ session_add_self_custom_tx_evt (transport_connection_t * tc, u8 has_prio) elt->evt.session_index = tc->s_index; elt->evt.event_type = SESSION_IO_EVT_TX; tc->flags &= ~TRANSPORT_CONNECTION_F_DESCHED; + + if (PREDICT_FALSE (wrk->state == SESSION_WRK_INTERRUPT)) + vlib_node_set_interrupt_pending (wrk->vm, + session_queue_node.index); } } } @@ -162,6 +168,9 @@ sesssion_reschedule_tx (transport_connection_t * tc) elt = session_evt_alloc_new (wrk); elt->evt.session_index = tc->s_index; elt->evt.event_type = SESSION_IO_EVT_TX; + + if (PREDICT_FALSE (wrk->state == SESSION_WRK_INTERRUPT)) + vlib_node_set_interrupt_pending (wrk->vm, session_queue_node.index); } static void @@ -180,34 +189,39 @@ session_program_transport_ctrl_evt (session_t * s, session_evt_type_t evt) clib_memset (&elt->evt, 0, sizeof (session_event_t)); elt->evt.session_handle = session_handle (s); elt->evt.event_type = evt; + + if (PREDICT_FALSE (wrk->state == SESSION_WRK_INTERRUPT)) + vlib_node_set_interrupt_pending (wrk->vm, session_queue_node.index); } else session_send_ctrl_evt_to_thread (s, evt); } +static void +session_pool_realloc_rpc (void *rpc_args) +{ + session_worker_t *wrk; + u32 thread_index; + + thread_index = pointer_to_uword (rpc_args); + wrk = &session_main.wrk[thread_index]; + + pool_realloc_safe_aligned (wrk->sessions, CLIB_CACHE_LINE_BYTES); +} + session_t * session_alloc (u32 thread_index) { session_worker_t *wrk = &session_main.wrk[thread_index]; session_t *s; - u8 will_expand = 0; - pool_get_aligned_will_expand (wrk->sessions, will_expand, - CLIB_CACHE_LINE_BYTES); - /* If we have peekers, let them finish */ - if (PREDICT_FALSE (will_expand && vlib_num_workers ())) - { - clib_rwlock_writer_lock (&wrk->peekers_rw_locks); - pool_get_aligned (wrk->sessions, s, CLIB_CACHE_LINE_BYTES); - clib_rwlock_writer_unlock (&wrk->peekers_rw_locks); - } - else - { - pool_get_aligned (wrk->sessions, s, CLIB_CACHE_LINE_BYTES); - } + + pool_get_aligned_safe (wrk->sessions, s, thread_index, + session_pool_realloc_rpc, CLIB_CACHE_LINE_BYTES); clib_memset (s, 0, sizeof (*s)); s->session_index = s - wrk->sessions; s->thread_index = thread_index; s->app_index = APP_INVALID_INDEX; + return s; } @@ -233,9 +247,6 @@ session_is_valid (u32 si, u8 thread_index) s = pool_elt_at_index (session_main.wrk[thread_index].sessions, si); - if (!s) - return 1; - if (s->thread_index != thread_index || s->session_index != si) return 0; @@ -243,6 +254,10 @@ session_is_valid (u32 si, u8 thread_index) || s->session_state <= SESSION_STATE_LISTENING) return 1; + if (s->session_state == SESSION_STATE_CONNECTING && + (s->flags & SESSION_F_HALF_OPEN)) + return 1; + tc = session_get_transport (s); if (s->connection_index != tc->c_index || s->thread_index != tc->thread_index || tc->s_index != si) @@ -287,6 +302,94 @@ session_delete (session_t * s) session_free_w_fifos (s); } +void +session_cleanup_half_open (session_handle_t ho_handle) +{ + session_t *ho = session_get_from_handle (ho_handle); + + /* App transports can migrate their half-opens */ + if (ho->flags & SESSION_F_IS_MIGRATING) + { + /* Session still migrating, move to closed state to signal that the + * session should be removed. */ + if (ho->connection_index == ~0) + { + ho->session_state = SESSION_STATE_CLOSED; + return; + } + /* Migrated transports are no longer half-opens */ + transport_cleanup (session_get_transport_proto (ho), + ho->connection_index, ho->app_index /* overloaded */); + } + else + transport_cleanup_half_open (session_get_transport_proto (ho), + ho->connection_index); + session_free (ho); +} + +static void +session_half_open_free (session_t *ho) +{ + app_worker_t *app_wrk; + + ASSERT (vlib_get_thread_index () <= 1); + app_wrk = app_worker_get (ho->app_wrk_index); + app_worker_del_half_open (app_wrk, ho); + session_free (ho); +} + +static void +session_half_open_free_rpc (void *args) +{ + session_t *ho = ho_session_get (pointer_to_uword (args)); + session_half_open_free (ho); +} + +void +session_half_open_delete_notify (transport_connection_t *tc) +{ + /* Notification from ctrl thread accepted without rpc */ + if (!tc->thread_index) + { + session_half_open_free (ho_session_get (tc->s_index)); + } + else + { + void *args = uword_to_pointer ((uword) tc->s_index, void *); + session_send_rpc_evt_to_thread_force (0, session_half_open_free_rpc, + args); + } +} + +void +session_half_open_migrate_notify (transport_connection_t *tc) +{ + session_t *ho; + + ho = ho_session_get (tc->s_index); + ho->flags |= SESSION_F_IS_MIGRATING; + ho->connection_index = ~0; +} + +int +session_half_open_migrated_notify (transport_connection_t *tc) +{ + session_t *ho; + + ho = ho_session_get (tc->s_index); + + /* App probably detached so the half-open must be cleaned up */ + if (ho->session_state == SESSION_STATE_CLOSED) + { + session_half_open_delete_notify (tc); + return -1; + } + ho->connection_index = tc->c_index; + /* Overload app index for half-open with new thread */ + ho->app_index = tc->thread_index; + return 0; +} + session_t * session_alloc_for_connection (transport_connection_t * tc) { @@ -306,6 +409,18 @@ session_alloc_for_connection (transport_connection_t * tc) return s; } +session_t * +session_alloc_for_half_open (transport_connection_t *tc) +{ + session_t *s; + + s = ho_session_alloc (); + s->session_type = session_type_from_proto_and_ip (tc->proto, tc->is_ip4); + s->connection_index = tc->c_index; + tc->s_index = s->session_index; + return s; +} + /** * Discards bytes from buffer chain * @@ -427,8 +542,8 @@ session_fifo_tuning (session_t * s, svm_fifo_t * f, { segment_manager_t *sm; sm = segment_manager_get (f->segment_manager); - ASSERT (f->size >= 4096); - ASSERT (f->size <= sm->max_fifo_size); + ASSERT (f->shr->size >= 4096); + ASSERT (f->shr->size <= sm->max_fifo_size); } } } @@ -507,22 +622,49 @@ session_enqueue_dgram_connection (session_t * s, session_dgram_hdr_t * hdr, vlib_buffer_t * b, u8 proto, u8 queue_event) { - int enqueued = 0, rv, in_order_off; + int rv; ASSERT (svm_fifo_max_enqueue_prod (s->rx_fifo) >= b->current_length + sizeof (*hdr)); - svm_fifo_enqueue (s->rx_fifo, sizeof (session_dgram_hdr_t), (u8 *) hdr); - enqueued = svm_fifo_enqueue (s->rx_fifo, b->current_length, - vlib_buffer_get_current (b)); - if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && enqueued >= 0)) + if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))) { - in_order_off = enqueued > b->current_length ? enqueued : 0; - rv = session_enqueue_chain_tail (s, b, in_order_off, 1); - if (rv > 0) - enqueued += rv; + /* *INDENT-OFF* */ + svm_fifo_seg_t segs[2] = { + { (u8 *) hdr, sizeof (*hdr) }, + { vlib_buffer_get_current (b), b->current_length } + }; + /* *INDENT-ON* */ + + rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, 2, + 0 /* allow_partial */ ); } - if (queue_event) + else + { + vlib_main_t *vm = vlib_get_main (); + svm_fifo_seg_t *segs = 0, *seg; + vlib_buffer_t *it = b; + u32 n_segs = 1; + + vec_add2 (segs, seg, 1); + seg->data = (u8 *) hdr; + seg->len = sizeof (*hdr); + while (it) + { + vec_add2 (segs, seg, 1); + seg->data = vlib_buffer_get_current (it); + seg->len = it->current_length; + n_segs++; + if (!(it->flags & VLIB_BUFFER_NEXT_PRESENT)) + break; + it = vlib_get_buffer (vm, it->next_buffer); + } + rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, n_segs, + 0 /* allow partial */ ); + vec_free (segs); + } + + if (queue_event && rv > 0) { /* Queue RX event on this fifo. Eventually these will need to be flushed * by calling stream_server_flush_enqueue_events () */ @@ -537,7 +679,7 @@ session_enqueue_dgram_connection (session_t * s, session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0); } - return enqueued; + return rv > 0 ? rv : 0; } int @@ -575,9 +717,9 @@ session_notify_subscribers (u32 app_index, session_t * s, if (!app) return -1; - for (i = 0; i < f->n_subscribers; i++) + for (i = 0; i < f->shr->n_subscribers; i++) { - app_wrk = application_get_worker (app, f->subscribers[i]); + app_wrk = application_get_worker (app, f->shr->subscribers[i]); if (!app_wrk) continue; if (app_worker_lock_and_send_event (app_wrk, s, evt_type)) @@ -615,6 +757,11 @@ session_enqueue_notify_inline (session_t * s) SESSION_EVT (SESSION_EVT_ENQ, s, svm_fifo_max_dequeue_prod (s->rx_fifo)); s->flags &= ~SESSION_F_RX_EVT; + + /* Application didn't confirm accept yet */ + if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING)) + return 0; + if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s, SESSION_IO_EVT_RX))) return -1; @@ -682,7 +829,7 @@ session_dequeue_notify (session_t * s) SESSION_IO_EVT_TX))) return -1; - if (PREDICT_FALSE (s->tx_fifo->n_subscribers)) + if (PREDICT_FALSE (s->tx_fifo->shr->n_subscribers)) return session_notify_subscribers (app_wrk->app_index, s, s->tx_fifo, SESSION_IO_EVT_TX); @@ -739,36 +886,25 @@ session_main_flush_all_enqueue_events (u8 transport_proto) return errors; } -static inline int -session_stream_connect_notify_inline (transport_connection_t * tc, - session_error_t err, - session_state_t opened_state) +int +session_stream_connect_notify (transport_connection_t * tc, + session_error_t err) { u32 opaque = 0, new_ti, new_si; app_worker_t *app_wrk; - session_t *s = 0; - u64 ho_handle; + session_t *s = 0, *ho; /* - * Find connection handle and cleanup half-open table + * Cleanup half-open table */ - ho_handle = session_lookup_half_open_handle (tc); - if (ho_handle == HALF_OPEN_LOOKUP_INVALID_VALUE) - { - SESSION_DBG ("half-open was removed!"); - return -1; - } session_lookup_del_half_open (tc); - /* Get the app's index from the handle we stored when opening connection - * and the opaque (api_context for external apps) from transport session - * index */ - app_wrk = app_worker_get_if_valid (ho_handle >> 32); + ho = ho_session_get (tc->s_index); + opaque = ho->opaque; + app_wrk = app_worker_get_if_valid (ho->app_wrk_index); if (!app_wrk) return -1; - opaque = tc->s_index; - if (err) return app_worker_connect_notify (app_wrk, s, err, opaque); @@ -786,53 +922,56 @@ session_stream_connect_notify_inline (transport_connection_t * tc, } s = session_get (new_si, new_ti); - s->session_state = opened_state; + s->session_state = SESSION_STATE_READY; session_lookup_add_connection (tc, session_handle (s)); if (app_worker_connect_notify (app_wrk, s, SESSION_E_NONE, opaque)) { + session_lookup_del_connection (tc); + /* Avoid notifying app about rejected session cleanup */ s = session_get (new_si, new_ti); - session_free_w_fifos (s); + segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo); + session_free (s); return -1; } return 0; } -int -session_stream_connect_notify (transport_connection_t * tc, - session_error_t err) +typedef union session_switch_pool_reply_args_ { - return session_stream_connect_notify_inline (tc, err, SESSION_STATE_READY); -} + struct + { + u32 session_index; + u16 thread_index; + u8 is_closed; + }; + u64 as_u64; +} session_switch_pool_reply_args_t; -int -session_ho_stream_connect_notify (transport_connection_t * tc, - session_error_t err) -{ - return session_stream_connect_notify_inline (tc, err, SESSION_STATE_OPENED); -} +STATIC_ASSERT (sizeof (session_switch_pool_reply_args_t) <= sizeof (uword), + "switch pool reply args size"); static void session_switch_pool_reply (void *arg) { - u32 session_index = pointer_to_uword (arg); - segment_manager_t *sm; - app_worker_t *app_wrk; + session_switch_pool_reply_args_t rargs; session_t *s; - s = session_get_if_valid (session_index, vlib_get_thread_index ()); + rargs.as_u64 = pointer_to_uword (arg); + s = session_get_if_valid (rargs.session_index, rargs.thread_index); if (!s) return; - app_wrk = app_worker_get_if_valid (s->app_wrk_index); - if (!app_wrk) - return; - - /* Attach fifos to the right session and segment slice */ - sm = app_worker_get_connect_segment_manager (app_wrk); - segment_manager_attach_fifo (sm, s->rx_fifo, s); - segment_manager_attach_fifo (sm, s->tx_fifo, s); + /* Session closed during migration. Clean everything up */ + if (rargs.is_closed) + { + transport_cleanup (session_get_transport_proto (s), s->connection_index, + s->thread_index); + segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo); + session_free (s); + return; + } /* Notify app that it has data on the new session */ session_enqueue_notify (s); @@ -853,37 +992,44 @@ static void session_switch_pool (void *cb_args) { session_switch_pool_args_t *args = (session_switch_pool_args_t *) cb_args; + session_switch_pool_reply_args_t rargs; session_handle_t new_sh; segment_manager_t *sm; app_worker_t *app_wrk; session_t *s; - void *rargs; ASSERT (args->thread_index == vlib_get_thread_index ()); s = session_get (args->session_index, args->thread_index); + /* Check if session closed during migration */ + rargs.is_closed = s->session_state >= SESSION_STATE_TRANSPORT_CLOSING; + transport_cleanup (session_get_transport_proto (s), s->connection_index, s->thread_index); - new_sh = session_make_handle (args->new_session_index, - args->new_thread_index); - app_wrk = app_worker_get_if_valid (s->app_wrk_index); if (app_wrk) { /* Cleanup fifo segment slice state for fifos */ sm = app_worker_get_connect_segment_manager (app_wrk); - segment_manager_detach_fifo (sm, s->rx_fifo); - segment_manager_detach_fifo (sm, s->tx_fifo); + segment_manager_detach_fifo (sm, &s->rx_fifo); + segment_manager_detach_fifo (sm, &s->tx_fifo); /* Notify app, using old session, about the migration event */ - app_worker_migrate_notify (app_wrk, s, new_sh); + if (!rargs.is_closed) + { + new_sh = session_make_handle (args->new_session_index, + args->new_thread_index); + app_worker_migrate_notify (app_wrk, s, new_sh); + } } /* Trigger app read and fifo updates on the new thread */ - rargs = uword_to_pointer (args->new_session_index, void *); + rargs.session_index = args->new_session_index; + rargs.thread_index = args->new_thread_index; session_send_rpc_evt_to_thread (args->new_thread_index, - session_switch_pool_reply, rargs); + session_switch_pool_reply, + uword_to_pointer (rargs.as_u64, void *)); session_free (s); clib_mem_free (cb_args); @@ -898,6 +1044,8 @@ session_dgram_connect_notify (transport_connection_t * tc, { session_t *new_s; session_switch_pool_args_t *rpc_args; + segment_manager_t *sm; + app_worker_t *app_wrk; /* * Clone half-open session to the right thread. @@ -907,7 +1055,17 @@ session_dgram_connect_notify (transport_connection_t * tc, new_s->session_state = SESSION_STATE_READY; new_s->flags |= SESSION_F_IS_MIGRATING; - session_lookup_add_connection (tc, session_handle (new_s)); + if (!(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP)) + session_lookup_add_connection (tc, session_handle (new_s)); + + app_wrk = app_worker_get_if_valid (new_s->app_wrk_index); + if (app_wrk) + { + /* New set of fifos attached to the same shared memory */ + sm = app_worker_get_connect_segment_manager (app_wrk); + segment_manager_attach_fifo (sm, &new_s->rx_fifo, new_s); + segment_manager_attach_fifo (sm, &new_s->tx_fifo, new_s); + } /* * Ask thread owning the old session to clean it up and make us the tx @@ -943,6 +1101,15 @@ session_transport_closing_notify (transport_connection_t * tc) s = session_get (tc->s_index, tc->thread_index); if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) return; + + /* Wait for reply from app before sending notification as the + * accept might be rejected */ + if (s->session_state == SESSION_STATE_ACCEPTING) + { + s->session_state = SESSION_STATE_TRANSPORT_CLOSING; + return; + } + s->session_state = SESSION_STATE_TRANSPORT_CLOSING; app_wrk = app_worker_get (s->app_wrk_index); app_worker_close_notify (app_wrk, s); @@ -1032,7 +1199,9 @@ session_transport_closed_notify (transport_connection_t * tc) return; /* Transport thinks that app requested close but it actually didn't. - * Can happen for tcp if fin and rst are received in close succession. */ + * Can happen for tcp: + * 1)if fin and rst are received in close succession. + * 2)if app shutdown the connection. */ if (s->session_state == SESSION_STATE_READY) { session_transport_closing_notify (tc); @@ -1067,6 +1236,11 @@ session_transport_reset_notify (transport_connection_t * tc) svm_fifo_dequeue_drop_all (s->tx_fifo); if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) return; + if (s->session_state == SESSION_STATE_ACCEPTING) + { + s->session_state = SESSION_STATE_TRANSPORT_CLOSING; + return; + } s->session_state = SESSION_STATE_TRANSPORT_CLOSING; app_wrk = app_worker_get (s->app_wrk_index); app_worker_reset_notify (app_wrk, s); @@ -1082,6 +1256,8 @@ session_stream_accept_notify (transport_connection_t * tc) app_wrk = app_worker_get_if_valid (s->app_wrk_index); if (!app_wrk) return -1; + if (s->session_state != SESSION_STATE_CREATED) + return 0; s->session_state = SESSION_STATE_ACCEPTING; if (app_worker_accept_notify (app_wrk, s)) { @@ -1148,22 +1324,22 @@ session_dgram_accept (transport_connection_t * tc, u32 listener_index, return rv; } + session_lookup_add_connection (tc, session_handle (s)); + app_wrk = app_worker_get (s->app_wrk_index); if ((rv = app_worker_accept_notify (app_wrk, s))) { + session_lookup_del_session (s); segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo); session_free (s); return rv; } - s->session_state = SESSION_STATE_READY; - session_lookup_add_connection (tc, session_handle (s)); - return 0; } int -session_open_cl (u32 app_wrk_index, session_endpoint_t * rmt, u32 opaque) +session_open_cl (session_endpoint_cfg_t *rmt, session_handle_t *rsh) { transport_connection_t *tc; transport_endpoint_cfg_t *tep; @@ -1183,7 +1359,7 @@ session_open_cl (u32 app_wrk_index, session_endpoint_t * rmt, u32 opaque) tc = transport_get_half_open (rmt->transport_proto, (u32) rv); /* For dgram type of service, allocate session and fifos now */ - app_wrk = app_worker_get (app_wrk_index); + app_wrk = app_worker_get (rmt->app_wrk_index); s = session_alloc_for_connection (tc); s->app_wrk_index = app_wrk->wrk_index; s->session_state = SESSION_STATE_OPENED; @@ -1194,16 +1370,19 @@ session_open_cl (u32 app_wrk_index, session_endpoint_t * rmt, u32 opaque) } sh = session_handle (s); + *rsh = sh; + session_lookup_add_connection (tc, sh); - return app_worker_connect_notify (app_wrk, s, SESSION_E_NONE, opaque); + return app_worker_connect_notify (app_wrk, s, SESSION_E_NONE, rmt->opaque); } int -session_open_vc (u32 app_wrk_index, session_endpoint_t * rmt, u32 opaque) +session_open_vc (session_endpoint_cfg_t *rmt, session_handle_t *rsh) { transport_connection_t *tc; transport_endpoint_cfg_t *tep; - u64 handle; + app_worker_t *app_wrk; + session_t *ho; int rv; tep = session_endpoint_to_transport_cfg (rmt); @@ -1216,38 +1395,40 @@ session_open_vc (u32 app_wrk_index, session_endpoint_t * rmt, u32 opaque) tc = transport_get_half_open (rmt->transport_proto, (u32) rv); - /* If transport offers a stream service, only allocate session once the - * connection has been established. - * Add connection to half-open table and save app and tc index. The - * latter is needed to help establish the connection while the former - * is needed when the connect notify comes and we have to notify the - * external app - */ - handle = (((u64) app_wrk_index) << 32) | (u64) tc->c_index; - session_lookup_add_half_open (tc, handle); + app_wrk = app_worker_get (rmt->app_wrk_index); - /* Store api_context (opaque) for when the reply comes. Not the nicest - * thing but better than allocating a separate half-open pool. + /* If transport offers a vc service, only allocate established + * session once the connection has been established. + * In the meantime allocate half-open session for tracking purposes + * associate half-open connection to it and add session to app-worker + * half-open table. These are needed to allocate the established + * session on transport notification, and to cleanup the half-open + * session if the app detaches before connection establishment. */ - tc->s_index = opaque; - if (transport_half_open_has_fifos (rmt->transport_proto)) - return session_ho_stream_connect_notify (tc, 0 /* is_fail */ ); + ho = session_alloc_for_half_open (tc); + ho->app_wrk_index = app_wrk->wrk_index; + ho->ho_index = app_worker_add_half_open (app_wrk, session_handle (ho)); + ho->opaque = rmt->opaque; + *rsh = session_handle (ho); + + if (!(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP)) + session_lookup_add_half_open (tc, tc->c_index); + return 0; } int -session_open_app (u32 app_wrk_index, session_endpoint_t * rmt, u32 opaque) +session_open_app (session_endpoint_cfg_t *rmt, session_handle_t *rsh) { - session_endpoint_cfg_t *sep = (session_endpoint_cfg_t *) rmt; - transport_endpoint_cfg_t *tep_cfg = session_endpoint_to_transport_cfg (sep); - - sep->app_wrk_index = app_wrk_index; - sep->opaque = opaque; + transport_endpoint_cfg_t *tep_cfg = session_endpoint_to_transport_cfg (rmt); + /* Not supported for now */ + *rsh = SESSION_INVALID_HANDLE; return transport_connect (rmt->transport_proto, tep_cfg); } -typedef int (*session_open_service_fn) (u32, session_endpoint_t *, u32); +typedef int (*session_open_service_fn) (session_endpoint_cfg_t *, + session_handle_t *); /* *INDENT-OFF* */ static session_open_service_fn session_open_srv_fns[TRANSPORT_N_SERVICES] = { @@ -1271,11 +1452,11 @@ static session_open_service_fn session_open_srv_fns[TRANSPORT_N_SERVICES] = { * on open completion. */ int -session_open (u32 app_wrk_index, session_endpoint_t * rmt, u32 opaque) +session_open (session_endpoint_cfg_t *rmt, session_handle_t *rsh) { transport_service_type_t tst; tst = transport_protocol_service_type (rmt->transport_proto); - return session_open_srv_fns[tst] (app_wrk_index, rmt, opaque); + return session_open_srv_fns[tst](rmt, rsh); } /** @@ -1289,12 +1470,12 @@ session_open (u32 app_wrk_index, session_endpoint_t * rmt, u32 opaque) int session_listen (session_t * ls, session_endpoint_cfg_t * sep) { - transport_endpoint_t *tep; + transport_endpoint_cfg_t *tep; int tc_index; u32 s_index; /* Transport bind/listen */ - tep = session_endpoint_to_transport (sep); + tep = session_endpoint_to_transport_cfg (sep); s_index = ls->session_index; tc_index = transport_start_listen (session_get_transport_proto (ls), s_index, tep); @@ -1337,6 +1518,20 @@ session_stop_listen (session_t * s) return 0; } +/** + * Initialize session half-closing procedure. + * + * Note that half-closing will not change the state of the session. + */ +void +session_half_close (session_t *s) +{ + if (!s) + return; + + session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_HALF_CLOSE); +} + /** * Initialize session closing procedure. * @@ -1359,6 +1554,8 @@ session_close (session_t * s) return; } + /* App closed so stop propagating dequeue notifications */ + svm_fifo_clear_deq_ntf (s->tx_fifo); s->session_state = SESSION_STATE_CLOSING; session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_CLOSE); } @@ -1377,6 +1574,24 @@ session_reset (session_t * s) session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_RESET); } +/** + * Notify transport the session can be half-disconnected. + * + * Must be called from the session's thread. + */ +void +session_transport_half_close (session_t *s) +{ + /* Only READY session can be half-closed */ + if (s->session_state != SESSION_STATE_READY) + { + return; + } + + transport_half_close (session_get_transport_proto (s), s->connection_index, + s->thread_index); +} + /** * Notify transport the session can be disconnected. This should eventually * result in a delete notification that allows us to cleanup session state. @@ -1451,83 +1666,63 @@ session_transport_cleanup (session_t * s) } /** - * Allocate event queues in the shared-memory segment + * Allocate worker mqs in share-able segment * - * That can either be a newly created memfd segment, that will need to be - * mapped by all stack users, or the binary api's svm region. The latter is - * assumed to be already mapped. NOTE that this assumption DOES NOT hold if - * api clients bootstrap shm api over sockets (i.e. use memfd segments) and - * vpp uses api svm region for event queues. + * That can only be a newly created memfd segment, that must be mapped + * by all apps/stack users unless private rx mqs are enabled. */ void -session_vpp_event_queues_allocate (session_main_t * smm) +session_vpp_wrk_mqs_alloc (session_main_t *smm) { - u32 evt_q_length = 2048, evt_size = sizeof (session_event_t); - ssvm_private_t *eqs = &smm->evt_qs_segment; - uword eqs_size = 64 << 20; - pid_t vpp_pid = getpid (); - void *oldheap; + u32 mq_q_length = 2048, evt_size = sizeof (session_event_t); + fifo_segment_t *mqs_seg = &smm->wrk_mqs_segment; + svm_msg_q_cfg_t _cfg, *cfg = &_cfg; + uword mqs_seg_size; int i; - if (smm->configured_event_queue_length) - evt_q_length = smm->configured_event_queue_length; + mq_q_length = clib_max (mq_q_length, smm->configured_wrk_mq_length); - if (smm->evt_qs_use_memfd_seg) - { - if (smm->evt_qs_segment_size) - eqs_size = smm->evt_qs_segment_size; + svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = { + { mq_q_length, evt_size, 0 }, { mq_q_length >> 1, 256, 0 } + }; + cfg->consumer_pid = 0; + cfg->n_rings = 2; + cfg->q_nitems = mq_q_length; + cfg->ring_cfgs = rc; - eqs->ssvm_size = eqs_size; - eqs->i_am_master = 1; - eqs->my_pid = vpp_pid; - eqs->name = format (0, "%s%c", "evt-qs-segment", 0); - eqs->requested_va = smm->session_baseva; - - if (ssvm_master_init (eqs, SSVM_SEGMENT_MEMFD)) - { - clib_warning ("failed to initialize queue segment"); - return; - } - } + /* + * Compute mqs segment size based on rings config and leave space + * for passing extended configuration messages, i.e., data allocated + * outside of the rings. If provided with a config value, accept it + * if larger than minimum size. + */ + mqs_seg_size = svm_msg_q_size_to_alloc (cfg) * vec_len (smm->wrk); + mqs_seg_size = mqs_seg_size + (32 << 10); + mqs_seg_size = clib_max (mqs_seg_size, smm->wrk_mqs_segment_size); - if (smm->evt_qs_use_memfd_seg) - oldheap = ssvm_push_heap (eqs->sh); - else - oldheap = vl_msg_push_heap (); + mqs_seg->ssvm.ssvm_size = mqs_seg_size; + mqs_seg->ssvm.my_pid = getpid (); + mqs_seg->ssvm.name = format (0, "%s%c", "session: wrk-mqs-segment", 0); - for (i = 0; i < vec_len (smm->wrk); i++) + if (ssvm_server_init (&mqs_seg->ssvm, SSVM_SEGMENT_MEMFD)) { - svm_msg_q_cfg_t _cfg, *cfg = &_cfg; - svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = { - {evt_q_length, evt_size, 0} - , - {evt_q_length >> 1, 256, 0} - }; - cfg->consumer_pid = 0; - cfg->n_rings = 2; - cfg->q_nitems = evt_q_length; - cfg->ring_cfgs = rc; - smm->wrk[i].vpp_event_queue = svm_msg_q_alloc (cfg); - if (smm->evt_qs_use_memfd_seg) - { - if (svm_msg_q_alloc_consumer_eventfd (smm->wrk[i].vpp_event_queue)) - clib_warning ("eventfd returned"); - } + clib_warning ("failed to initialize queue segment"); + return; } - if (smm->evt_qs_use_memfd_seg) - ssvm_pop_heap (oldheap); - else - vl_msg_pop_heap (oldheap); + fifo_segment_init (mqs_seg); + + /* Special fifo segment that's filled only with mqs */ + mqs_seg->h->n_mqs = vec_len (smm->wrk); + + for (i = 0; i < vec_len (smm->wrk); i++) + smm->wrk[i].vpp_event_queue = fifo_segment_msg_q_alloc (mqs_seg, i, cfg); } -ssvm_private_t * -session_main_get_evt_q_segment (void) +fifo_segment_t * +session_main_get_wrk_mqs_segment (void) { - session_main_t *smm = &session_main; - if (smm->evt_qs_use_memfd_seg) - return &smm->evt_qs_segment; - return 0; + return &session_main.wrk_mqs_segment; } u64 @@ -1566,22 +1761,48 @@ session_register_transport (transport_proto_t transport_proto, vec_validate (smm->session_type_to_next, session_type); vec_validate (smm->session_tx_fns, session_type); - /* *INDENT-OFF* */ if (output_node != ~0) - { - foreach_vlib_main (({ - next_index = vlib_node_add_next (this_vlib_main, - session_queue_node.index, - output_node); - })); - } - /* *INDENT-ON* */ + next_index = vlib_node_add_next (vlib_get_main (), + session_queue_node.index, output_node); smm->session_type_to_next[session_type] = next_index; smm->session_tx_fns[session_type] = session_tx_fns[vft->transport_options.tx_type]; } +void +session_register_update_time_fn (session_update_time_fn fn, u8 is_add) +{ + session_main_t *smm = &session_main; + session_update_time_fn *fi; + u32 fi_pos = ~0; + u8 found = 0; + + vec_foreach (fi, smm->update_time_fns) + { + if (*fi == fn) + { + fi_pos = fi - smm->update_time_fns; + found = 1; + break; + } + } + + if (is_add) + { + if (found) + { + clib_warning ("update time fn %p already registered", fn); + return; + } + vec_add1 (smm->update_time_fns, fn); + } + else + { + vec_del1 (smm->update_time_fns, fi_pos); + } +} + transport_proto_t session_add_transport_proto (void) { @@ -1623,6 +1844,18 @@ session_get_endpoint (session_t * s, transport_endpoint_t * tep, u8 is_lcl) s->connection_index, tep, is_lcl); } +int +session_transport_attribute (session_t *s, u8 is_get, + transport_endpt_attr_t *attr) +{ + if (s->session_state < SESSION_STATE_READY) + return -1; + + return transport_connection_attribute (session_get_transport_proto (s), + s->connection_index, s->thread_index, + is_get, attr); +} + transport_connection_t * listen_session_get_transport (session_t * s) { @@ -1634,20 +1867,22 @@ void session_queue_run_on_main_thread (vlib_main_t * vm) { ASSERT (vlib_get_thread_index () == 0); - vlib_process_signal_event_mt (vm, session_queue_process_node.index, - SESSION_Q_PROCESS_RUN_ON_MAIN, 0); + vlib_node_set_interrupt_pending (vm, session_queue_node.index); } static clib_error_t * session_manager_main_enable (vlib_main_t * vm) { - segment_manager_main_init_args_t _sm_args = { 0 }, *sm_args = &_sm_args; session_main_t *smm = &session_main; vlib_thread_main_t *vtm = vlib_get_thread_main (); u32 num_threads, preallocated_sessions_per_worker; session_worker_t *wrk; int i; + /* We only initialize once and do not de-initialized on disable */ + if (smm->is_initialized) + goto done; + num_threads = 1 /* main thread */ + vtm->n_threads; if (num_threads < 1) @@ -1655,6 +1890,7 @@ session_manager_main_enable (vlib_main_t * vm) /* Allocate cache line aligned worker contexts */ vec_validate_aligned (smm->wrk, num_threads - 1, CLIB_CACHE_LINE_BYTES); + clib_spinlock_init (&session_main.pool_realloc_lock); for (i = 0; i < num_threads; i++) { @@ -1662,22 +1898,24 @@ session_manager_main_enable (vlib_main_t * vm) wrk->ctrl_head = clib_llist_make_head (wrk->event_elts, evt_list); wrk->new_head = clib_llist_make_head (wrk->event_elts, evt_list); wrk->old_head = clib_llist_make_head (wrk->event_elts, evt_list); - wrk->vm = vlib_mains[i]; - wrk->last_vlib_time = vlib_time_now (vlib_mains[i]); + wrk->pending_connects = clib_llist_make_head (wrk->event_elts, evt_list); + wrk->evts_pending_main = + clib_llist_make_head (wrk->event_elts, evt_list); + wrk->vm = vlib_get_main_by_index (i); + wrk->last_vlib_time = vlib_time_now (vm); wrk->last_vlib_us_time = wrk->last_vlib_time * CLIB_US_TIME_FREQ; + wrk->timerfd = -1; vec_validate (wrk->session_to_enqueue, smm->last_transport_proto_type); - if (num_threads > 1) - clib_rwlock_init (&smm->wrk[i].peekers_rw_locks); + if (!smm->no_adaptive && smm->use_private_rx_mqs) + session_wrk_enable_adaptive_mode (wrk); } /* Allocate vpp event queues segment and queue */ - session_vpp_event_queues_allocate (smm); + session_vpp_wrk_mqs_alloc (smm); - /* Initialize fifo segment main baseva and timeout */ - sm_args->baseva = smm->session_baseva + smm->evt_qs_segment_size; - sm_args->size = smm->session_va_space_size; - segment_manager_main_init (sm_args); + /* Initialize segment manager properties */ + segment_manager_main_init (); /* Preallocate sessions */ if (smm->preallocated_sessions) @@ -1704,57 +1942,65 @@ session_manager_main_enable (vlib_main_t * vm) session_lookup_init (); app_namespaces_init (); transport_init (); + smm->is_initialized = 1; + +done: smm->is_enabled = 1; /* Enable transports */ transport_enable_disable (vm, 1); - -#if SESSION_DEBUG - session_dbg_main_t *sdm = &session_dbg_main; - vec_validate_aligned (sdm->wrk, num_threads - 1, CLIB_CACHE_LINE_BYTES); - int thread; - for (thread = 0; thread < num_threads; thread++) - { - clib_memset (&sdm->wrk[thread], 0, sizeof (session_dbg_evts_t)); - } -#endif /* SESSION_DEBUG */ + session_debug_init (); return 0; } +static void +session_manager_main_disable (vlib_main_t * vm) +{ + transport_enable_disable (vm, 0 /* is_en */ ); +} + void session_node_enable_disable (u8 is_en) { + u8 mstate = is_en ? VLIB_NODE_STATE_INTERRUPT : VLIB_NODE_STATE_DISABLED; u8 state = is_en ? VLIB_NODE_STATE_POLLING : VLIB_NODE_STATE_DISABLED; - vlib_thread_main_t *vtm = vlib_get_thread_main (); - u8 have_workers = vtm->n_threads != 0; - - /* *INDENT-OFF* */ - foreach_vlib_main (({ - if (have_workers && ii == 0) - { - vlib_node_set_state (this_vlib_main, session_queue_process_node.index, - state); - if (is_en) - { - vlib_node_t *n = vlib_get_node (this_vlib_main, - session_queue_process_node.index); - vlib_start_process (this_vlib_main, n->runtime_index); - } - else - { - vlib_process_signal_event_mt (this_vlib_main, - session_queue_process_node.index, - SESSION_Q_PROCESS_STOP, 0); - } + session_main_t *sm = &session_main; + vlib_main_t *vm; + vlib_node_t *n; + int n_vlibs, i; - continue; - } - vlib_node_set_state (this_vlib_main, session_queue_node.index, - state); - })); - /* *INDENT-ON* */ + n_vlibs = vlib_get_n_threads (); + for (i = 0; i < n_vlibs; i++) + { + vm = vlib_get_main_by_index (i); + /* main thread with workers and not polling */ + if (i == 0 && n_vlibs > 1) + { + vlib_node_set_state (vm, session_queue_node.index, mstate); + if (is_en) + { + session_main_get_worker (0)->state = SESSION_WRK_INTERRUPT; + vlib_node_set_state (vm, session_queue_process_node.index, + state); + n = vlib_get_node (vm, session_queue_process_node.index); + vlib_start_process (vm, n->runtime_index); + } + else + { + vlib_process_signal_event_mt (vm, + session_queue_process_node.index, + SESSION_Q_PROCESS_STOP, 0); + } + if (!sm->poll_main) + continue; + } + vlib_node_set_state (vm, session_queue_node.index, state); + } + + if (sm->use_private_rx_mqs) + application_enable_rx_mqs_nodes (is_en); } clib_error_t * @@ -1772,6 +2018,7 @@ vnet_session_enable_disable (vlib_main_t * vm, u8 is_en) else { session_main.is_enabled = 0; + session_manager_main_disable (vm); session_node_enable_disable (is_en); } @@ -1785,17 +2032,10 @@ session_main_init (vlib_main_t * vm) smm->is_enabled = 0; smm->session_enable_asap = 0; - smm->session_baseva = HIGH_SEGMENT_BASEVA; - -#if (HIGH_SEGMENT_BASEVA > (4ULL << 30)) - smm->session_va_space_size = 128ULL << 30; - smm->evt_qs_segment_size = 64 << 20; -#else - smm->session_va_space_size = 128 << 20; - smm->evt_qs_segment_size = 1 << 20; -#endif - - smm->last_transport_proto_type = TRANSPORT_PROTO_QUIC; + smm->poll_main = 0; + smm->use_private_rx_mqs = 0; + smm->no_adaptive = 0; + smm->last_transport_proto_type = TRANSPORT_PROTO_HTTP; return 0; } @@ -1825,10 +2065,10 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input) while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "event-queue-length %d", &nitems)) + if (unformat (input, "wrk-mq-length %d", &nitems)) { if (nitems >= 2048) - smm->configured_event_queue_length = nitems; + smm->configured_wrk_mq_length = nitems; else clib_warning ("event queue length %d too small, ignored", nitems); } @@ -1890,13 +2130,33 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "local-endpoints-table-buckets %d", &smm->local_endpoints_table_buckets)) ; + else if (unformat (input, "enable")) + smm->session_enable_asap = 1; + else if (unformat (input, "use-app-socket-api")) + (void) appns_sapi_enable_disable (1 /* is_enable */); + else if (unformat (input, "poll-main")) + smm->poll_main = 1; + else if (unformat (input, "use-private-rx-mqs")) + smm->use_private_rx_mqs = 1; + else if (unformat (input, "no-adaptive")) + smm->no_adaptive = 1; + /* + * Deprecated but maintained for compatibility + */ else if (unformat (input, "evt_qs_memfd_seg")) - smm->evt_qs_use_memfd_seg = 1; + ; + else if (unformat (input, "segment-baseva 0x%lx", &tmp)) + ; else if (unformat (input, "evt_qs_seg_size %U", unformat_memory_size, - &smm->evt_qs_segment_size)) + &tmp)) ; - else if (unformat (input, "enable")) - smm->session_enable_asap = 1; + else if (unformat (input, "event-queue-length %d", &nitems)) + { + if (nitems >= 2048) + smm->configured_wrk_mq_length = nitems; + else + clib_warning ("event queue length %d too small, ignored", nitems); + } else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input);