X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fsession%2Fsession.c;h=4c57b1ac9bb832706d3f8bdb87313c694ce2004c;hb=20de85b6da7e0b6dd0d73bc8358e7c67564d0c36;hp=d6a531e3f8491a1ec1a2cfea3440ff82db41fe4b;hpb=a48dffeed3d5182d1faa41f2ba64beb20bf10ea0;p=vpp.git diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index d6a531e3f84..4c57b1ac9bb 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include session_main_t session_main; @@ -36,8 +38,7 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index, mq = wrk->vpp_event_queue; if (PREDICT_FALSE (svm_msg_q_lock (mq))) return -1; - if (PREDICT_FALSE (svm_msg_q_is_full (mq) - || svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING))) + if (PREDICT_FALSE (svm_msg_q_or_ring_is_full (mq, SESSION_MQ_IO_EVT_RING))) { svm_msg_q_unlock (mq); return -2; @@ -202,39 +203,25 @@ session_alloc (u32 thread_index) { session_worker_t *wrk = &session_main.wrk[thread_index]; session_t *s; - u8 will_expand = 0; - pool_get_aligned_will_expand (wrk->sessions, will_expand, - CLIB_CACHE_LINE_BYTES); - /* If we have peekers, let them finish */ - if (PREDICT_FALSE (will_expand && vlib_num_workers ())) - { - clib_rwlock_writer_lock (&wrk->peekers_rw_locks); - pool_get_aligned (wrk->sessions, s, CLIB_CACHE_LINE_BYTES); - clib_rwlock_writer_unlock (&wrk->peekers_rw_locks); - } - else - { - pool_get_aligned (wrk->sessions, s, CLIB_CACHE_LINE_BYTES); - } + + pool_get_aligned_safe (wrk->sessions, s, CLIB_CACHE_LINE_BYTES); clib_memset (s, 0, sizeof (*s)); s->session_index = s - wrk->sessions; s->thread_index = thread_index; s->app_index = APP_INVALID_INDEX; + return s; } void session_free (session_t * s) { - if (CLIB_DEBUG) - { - u8 thread_index = s->thread_index; - clib_memset (s, 0xFA, sizeof (*s)); - pool_put (session_main.wrk[thread_index].sessions, s); - return; - } + session_worker_t *wrk = &session_main.wrk[s->thread_index]; + SESSION_EVT (SESSION_EVT_FREE, s); - pool_put (session_main.wrk[s->thread_index].sessions, s); + if (CLIB_DEBUG) + clib_memset (s, 0xFA, sizeof (*s)); + pool_put (wrk->sessions, s); } u8 @@ -347,16 +334,15 @@ void session_half_open_delete_notify (transport_connection_t *tc) { /* Notification from ctrl thread accepted without rpc */ - if (tc->thread_index <= 1) + if (!tc->thread_index) { session_half_open_free (ho_session_get (tc->s_index)); } else { void *args = uword_to_pointer ((uword) tc->s_index, void *); - u32 ctrl_thread = vlib_num_workers () ? 1 : 0; - session_send_rpc_evt_to_thread (ctrl_thread, session_half_open_free_rpc, - args); + session_send_rpc_evt_to_thread_force (0, session_half_open_free_rpc, + args); } } @@ -937,16 +923,41 @@ session_stream_connect_notify (transport_connection_t * tc, return 0; } +typedef union session_switch_pool_reply_args_ +{ + struct + { + u32 session_index; + u16 thread_index; + u8 is_closed; + }; + u64 as_u64; +} session_switch_pool_reply_args_t; + +STATIC_ASSERT (sizeof (session_switch_pool_reply_args_t) <= sizeof (uword), + "switch pool reply args size"); + static void session_switch_pool_reply (void *arg) { - u32 session_index = pointer_to_uword (arg); + session_switch_pool_reply_args_t rargs; session_t *s; - s = session_get_if_valid (session_index, vlib_get_thread_index ()); + rargs.as_u64 = pointer_to_uword (arg); + s = session_get_if_valid (rargs.session_index, rargs.thread_index); if (!s) return; + /* Session closed during migration. Clean everything up */ + if (rargs.is_closed) + { + transport_cleanup (session_get_transport_proto (s), s->connection_index, + s->thread_index); + segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo); + session_free (s); + return; + } + /* Notify app that it has data on the new session */ session_enqueue_notify (s); } @@ -966,21 +977,21 @@ static void session_switch_pool (void *cb_args) { session_switch_pool_args_t *args = (session_switch_pool_args_t *) cb_args; + session_switch_pool_reply_args_t rargs; session_handle_t new_sh; segment_manager_t *sm; app_worker_t *app_wrk; session_t *s; - void *rargs; ASSERT (args->thread_index == vlib_get_thread_index ()); s = session_get (args->session_index, args->thread_index); + /* Check if session closed during migration */ + rargs.is_closed = s->session_state >= SESSION_STATE_TRANSPORT_CLOSING; + transport_cleanup (session_get_transport_proto (s), s->connection_index, s->thread_index); - new_sh = session_make_handle (args->new_session_index, - args->new_thread_index); - app_wrk = app_worker_get_if_valid (s->app_wrk_index); if (app_wrk) { @@ -990,13 +1001,20 @@ session_switch_pool (void *cb_args) segment_manager_detach_fifo (sm, &s->tx_fifo); /* Notify app, using old session, about the migration event */ - app_worker_migrate_notify (app_wrk, s, new_sh); + if (!rargs.is_closed) + { + new_sh = session_make_handle (args->new_session_index, + args->new_thread_index); + app_worker_migrate_notify (app_wrk, s, new_sh); + } } /* Trigger app read and fifo updates on the new thread */ - rargs = uword_to_pointer (args->new_session_index, void *); + rargs.session_index = args->new_session_index; + rargs.thread_index = args->new_thread_index; session_send_rpc_evt_to_thread (args->new_thread_index, - session_switch_pool_reply, rargs); + session_switch_pool_reply, + uword_to_pointer (rargs.as_u64, void *)); session_free (s); clib_mem_free (cb_args); @@ -1068,6 +1086,15 @@ session_transport_closing_notify (transport_connection_t * tc) s = session_get (tc->s_index, tc->thread_index); if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) return; + + /* Wait for reply from app before sending notification as the + * accept might be rejected */ + if (s->session_state == SESSION_STATE_ACCEPTING) + { + s->session_state = SESSION_STATE_TRANSPORT_CLOSING; + return; + } + s->session_state = SESSION_STATE_TRANSPORT_CLOSING; app_wrk = app_worker_get (s->app_wrk_index); app_worker_close_notify (app_wrk, s); @@ -1194,6 +1221,11 @@ session_transport_reset_notify (transport_connection_t * tc) svm_fifo_dequeue_drop_all (s->tx_fifo); if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING) return; + if (s->session_state == SESSION_STATE_ACCEPTING) + { + s->session_state = SESSION_STATE_TRANSPORT_CLOSING; + return; + } s->session_state = SESSION_STATE_TRANSPORT_CLOSING; app_wrk = app_worker_get (s->app_wrk_index); app_worker_reset_notify (app_wrk, s); @@ -1278,6 +1310,7 @@ session_dgram_accept (transport_connection_t * tc, u32 listener_index, } session_lookup_add_connection (tc, session_handle (s)); + s->session_state = SESSION_STATE_ACCEPTING; app_wrk = app_worker_get (s->app_wrk_index); if ((rv = app_worker_accept_notify (app_wrk, s))) @@ -1288,8 +1321,6 @@ session_dgram_accept (transport_connection_t * tc, u32 listener_index, return rv; } - s->session_state = SESSION_STATE_READY; - return 0; } @@ -1425,12 +1456,12 @@ session_open (session_endpoint_cfg_t *rmt, session_handle_t *rsh) int session_listen (session_t * ls, session_endpoint_cfg_t * sep) { - transport_endpoint_t *tep; + transport_endpoint_cfg_t *tep; int tc_index; u32 s_index; /* Transport bind/listen */ - tep = session_endpoint_to_transport (sep); + tep = session_endpoint_to_transport_cfg (sep); s_index = ls->session_index; tc_index = transport_start_listen (session_get_transport_proto (ls), s_index, tep); @@ -1442,6 +1473,7 @@ session_listen (session_t * ls, session_endpoint_cfg_t * sep) * worker because local tables (for ct sessions) are not backed by a fib */ ls = listen_session_get (s_index); ls->connection_index = tc_index; + ls->opaque = sep->opaque; return 0; } @@ -1509,6 +1541,8 @@ session_close (session_t * s) return; } + /* App closed so stop propagating dequeue notifications */ + svm_fifo_clear_deq_ntf (s->tx_fifo); s->session_state = SESSION_STATE_CLOSING; session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_CLOSE); } @@ -1619,64 +1653,63 @@ session_transport_cleanup (session_t * s) } /** - * Allocate event queues in the shared-memory segment + * Allocate worker mqs in share-able segment * - * That can only be a newly created memfd segment, that must be - * mapped by all apps/stack users. + * That can only be a newly created memfd segment, that must be mapped + * by all apps/stack users unless private rx mqs are enabled. */ void -session_vpp_event_queues_allocate (session_main_t * smm) +session_vpp_wrk_mqs_alloc (session_main_t *smm) { - u32 evt_q_length = 2048, evt_size = sizeof (session_event_t); - fifo_segment_t *eqs = &smm->evt_qs_segment; - uword eqs_size = 64 << 20; - pid_t vpp_pid = getpid (); + u32 mq_q_length = 2048, evt_size = sizeof (session_event_t); + fifo_segment_t *mqs_seg = &smm->wrk_mqs_segment; + svm_msg_q_cfg_t _cfg, *cfg = &_cfg; + uword mqs_seg_size; int i; - if (smm->configured_event_queue_length) - evt_q_length = smm->configured_event_queue_length; + mq_q_length = clib_max (mq_q_length, smm->configured_wrk_mq_length); + + svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = { + { mq_q_length, evt_size, 0 }, { mq_q_length >> 1, 256, 0 } + }; + cfg->consumer_pid = 0; + cfg->n_rings = 2; + cfg->q_nitems = mq_q_length; + cfg->ring_cfgs = rc; - if (smm->evt_qs_segment_size) - eqs_size = smm->evt_qs_segment_size; + /* + * Compute mqs segment size based on rings config and leave space + * for passing extended configuration messages, i.e., data allocated + * outside of the rings. If provided with a config value, accept it + * if larger than minimum size. + */ + mqs_seg_size = svm_msg_q_size_to_alloc (cfg) * vec_len (smm->wrk); + mqs_seg_size = mqs_seg_size + (1 << 20); + mqs_seg_size = clib_max (mqs_seg_size, smm->wrk_mqs_segment_size); - eqs->ssvm.ssvm_size = eqs_size; - eqs->ssvm.my_pid = vpp_pid; - eqs->ssvm.name = format (0, "%s%c", "session: evt-qs-segment", 0); - /* clib_mem_vm_map_shared consumes first page before requested_va */ - eqs->ssvm.requested_va = smm->session_baseva + clib_mem_get_page_size (); + mqs_seg->ssvm.ssvm_size = mqs_seg_size; + mqs_seg->ssvm.my_pid = getpid (); + mqs_seg->ssvm.name = format (0, "%s%c", "session: wrk-mqs-segment", 0); - if (ssvm_server_init (&eqs->ssvm, SSVM_SEGMENT_MEMFD)) + if (ssvm_server_init (&mqs_seg->ssvm, SSVM_SEGMENT_MEMFD)) { clib_warning ("failed to initialize queue segment"); return; } - fifo_segment_init (eqs); + fifo_segment_init (mqs_seg); /* Special fifo segment that's filled only with mqs */ - eqs->h->n_mqs = vec_len (smm->wrk); + mqs_seg->h->n_mqs = vec_len (smm->wrk); for (i = 0; i < vec_len (smm->wrk); i++) - { - svm_msg_q_cfg_t _cfg, *cfg = &_cfg; - svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = { - {evt_q_length, evt_size, 0} - , - {evt_q_length >> 1, 256, 0} - }; - cfg->consumer_pid = 0; - cfg->n_rings = 2; - cfg->q_nitems = evt_q_length; - cfg->ring_cfgs = rc; - - smm->wrk[i].vpp_event_queue = fifo_segment_msg_q_alloc (eqs, i, cfg); - } + smm->wrk[i].vpp_event_queue = fifo_segment_msg_q_alloc (mqs_seg, i, cfg); } fifo_segment_t * -session_main_get_evt_q_segment (void) +session_main_get_wrk_mqs_segment (void) { - return &session_main.evt_qs_segment; + return &session_main.wrk_mqs_segment; } u64 @@ -1724,6 +1757,39 @@ session_register_transport (transport_proto_t transport_proto, session_tx_fns[vft->transport_options.tx_type]; } +void +session_register_update_time_fn (session_update_time_fn fn, u8 is_add) +{ + session_main_t *smm = &session_main; + session_update_time_fn *fi; + u32 fi_pos = ~0; + u8 found = 0; + + vec_foreach (fi, smm->update_time_fns) + { + if (*fi == fn) + { + fi_pos = fi - smm->update_time_fns; + found = 1; + break; + } + } + + if (is_add) + { + if (found) + { + clib_warning ("update time fn %p already registered", fn); + return; + } + vec_add1 (smm->update_time_fns, fn); + } + else + { + vec_del1 (smm->update_time_fns, fi_pos); + } +} + transport_proto_t session_add_transport_proto (void) { @@ -1791,6 +1857,44 @@ session_queue_run_on_main_thread (vlib_main_t * vm) vlib_node_set_interrupt_pending (vm, session_queue_node.index); } +static void +session_stats_collector_fn (vlib_stats_collector_data_t *d) +{ + u32 i, n_workers, n_wrk_sessions, n_sessions = 0; + session_main_t *smm = &session_main; + session_worker_t *wrk; + counter_t **counters; + counter_t *cb; + + n_workers = vec_len (smm->wrk); + vlib_stats_validate (d->entry_index, 0, n_workers - 1); + counters = d->entry->data; + cb = counters[0]; + + for (i = 0; i < vec_len (smm->wrk); i++) + { + wrk = session_main_get_worker (i); + n_wrk_sessions = pool_elts (wrk->sessions); + cb[i] = n_wrk_sessions; + n_sessions += n_wrk_sessions; + } + + vlib_stats_set_gauge (d->private_data, n_sessions); +} + +static void +session_stats_collector_init (void) +{ + vlib_stats_collector_reg_t reg = {}; + + reg.entry_index = + vlib_stats_add_counter_vector ("/sys/session/sessions_per_worker"); + reg.private_data = vlib_stats_add_gauge ("/sys/session/sessions_total"); + reg.collect_fn = session_stats_collector_fn; + vlib_stats_register_collector_fn (®); + vlib_stats_validate (reg.entry_index, 0, vlib_get_n_threads ()); +} + static clib_error_t * session_manager_main_enable (vlib_main_t * vm) { @@ -1811,6 +1915,7 @@ session_manager_main_enable (vlib_main_t * vm) /* Allocate cache line aligned worker contexts */ vec_validate_aligned (smm->wrk, num_threads - 1, CLIB_CACHE_LINE_BYTES); + clib_spinlock_init (&session_main.pool_realloc_lock); for (i = 0; i < num_threads; i++) { @@ -1819,21 +1924,20 @@ session_manager_main_enable (vlib_main_t * vm) wrk->new_head = clib_llist_make_head (wrk->event_elts, evt_list); wrk->old_head = clib_llist_make_head (wrk->event_elts, evt_list); wrk->pending_connects = clib_llist_make_head (wrk->event_elts, evt_list); + wrk->evts_pending_main = + clib_llist_make_head (wrk->event_elts, evt_list); wrk->vm = vlib_get_main_by_index (i); wrk->last_vlib_time = vlib_time_now (vm); wrk->last_vlib_us_time = wrk->last_vlib_time * CLIB_US_TIME_FREQ; wrk->timerfd = -1; vec_validate (wrk->session_to_enqueue, smm->last_transport_proto_type); - if (num_threads > 1) - clib_rwlock_init (&smm->wrk[i].peekers_rw_locks); - if (!smm->no_adaptive && smm->use_private_rx_mqs) session_wrk_enable_adaptive_mode (wrk); } /* Allocate vpp event queues segment and queue */ - session_vpp_event_queues_allocate (smm); + session_vpp_wrk_mqs_alloc (smm); /* Initialize segment manager properties */ segment_manager_main_init (); @@ -1863,6 +1967,7 @@ session_manager_main_enable (vlib_main_t * vm) session_lookup_init (); app_namespaces_init (); transport_init (); + session_stats_collector_init (); smm->is_initialized = 1; done: @@ -1882,6 +1987,86 @@ session_manager_main_disable (vlib_main_t * vm) transport_enable_disable (vm, 0 /* is_en */ ); } +/* in this new callback, cookie hint the index */ +void +session_dma_completion_cb (vlib_main_t *vm, struct vlib_dma_batch *batch) +{ + session_worker_t *wrk; + wrk = session_main_get_worker (vm->thread_index); + session_dma_transfer *dma_transfer; + + dma_transfer = &wrk->dma_trans[wrk->trans_head]; + vec_add (wrk->pending_tx_buffers, dma_transfer->pending_tx_buffers, + vec_len (dma_transfer->pending_tx_buffers)); + vec_add (wrk->pending_tx_nexts, dma_transfer->pending_tx_nexts, + vec_len (dma_transfer->pending_tx_nexts)); + vec_reset_length (dma_transfer->pending_tx_buffers); + vec_reset_length (dma_transfer->pending_tx_nexts); + wrk->trans_head++; + if (wrk->trans_head == wrk->trans_size) + wrk->trans_head = 0; + return; +} + +static void +session_prepare_dma_args (vlib_dma_config_t *args) +{ + args->max_transfers = DMA_TRANS_SIZE; + args->max_transfer_size = 65536; + args->features = 0; + args->sw_fallback = 1; + args->barrier_before_last = 1; + args->callback_fn = session_dma_completion_cb; +} + +static void +session_node_enable_dma (u8 is_en, int n_vlibs) +{ + vlib_dma_config_t args; + session_prepare_dma_args (&args); + session_worker_t *wrk; + vlib_main_t *vm; + + int config_index = -1; + + if (is_en) + { + vm = vlib_get_main_by_index (0); + config_index = vlib_dma_config_add (vm, &args); + } + else + { + vm = vlib_get_main_by_index (0); + wrk = session_main_get_worker (0); + if (wrk->config_index >= 0) + vlib_dma_config_del (vm, wrk->config_index); + } + int i; + for (i = 0; i < n_vlibs; i++) + { + vm = vlib_get_main_by_index (i); + wrk = session_main_get_worker (vm->thread_index); + wrk->config_index = config_index; + if (is_en) + { + if (config_index >= 0) + wrk->dma_enabled = true; + wrk->dma_trans = (session_dma_transfer *) clib_mem_alloc ( + sizeof (session_dma_transfer) * DMA_TRANS_SIZE); + bzero (wrk->dma_trans, + sizeof (session_dma_transfer) * DMA_TRANS_SIZE); + } + else + { + if (wrk->dma_trans) + clib_mem_free (wrk->dma_trans); + } + wrk->trans_head = 0; + wrk->trans_tail = 0; + wrk->trans_size = DMA_TRANS_SIZE; + } +} + void session_node_enable_disable (u8 is_en) { @@ -1900,9 +2085,9 @@ session_node_enable_disable (u8 is_en) if (i == 0 && n_vlibs > 1) { vlib_node_set_state (vm, session_queue_node.index, mstate); - session_main_get_worker (0)->state = SESSION_WRK_INTERRUPT; if (is_en) { + session_main_get_worker (0)->state = SESSION_WRK_INTERRUPT; vlib_node_set_state (vm, session_queue_process_node.index, state); n = vlib_get_node (vm, session_queue_process_node.index); @@ -1922,6 +2107,9 @@ session_node_enable_disable (u8 is_en) if (sm->use_private_rx_mqs) application_enable_rx_mqs_nodes (is_en); + + if (sm->dma_enabled) + session_node_enable_dma (is_en, n_vlibs); } clib_error_t * @@ -1956,17 +2144,7 @@ session_main_init (vlib_main_t * vm) smm->poll_main = 0; smm->use_private_rx_mqs = 0; smm->no_adaptive = 0; - smm->session_baseva = HIGH_SEGMENT_BASEVA; - -#if (HIGH_SEGMENT_BASEVA > (4ULL << 30)) - smm->session_va_space_size = 128ULL << 30; - smm->evt_qs_segment_size = 64 << 20; -#else - smm->session_va_space_size = 128 << 20; - smm->evt_qs_segment_size = 1 << 20; -#endif - - smm->last_transport_proto_type = TRANSPORT_PROTO_DTLS; + smm->last_transport_proto_type = TRANSPORT_PROTO_HTTP; return 0; } @@ -1996,13 +2174,16 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input) while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - if (unformat (input, "event-queue-length %d", &nitems)) + if (unformat (input, "wrk-mq-length %d", &nitems)) { if (nitems >= 2048) - smm->configured_event_queue_length = nitems; + smm->configured_wrk_mq_length = nitems; else clib_warning ("event queue length %d too small, ignored", nitems); } + else if (unformat (input, "wrk-mqs-segment-size %U", + unformat_memory_size, &smm->wrk_mqs_segment_size)) + ; else if (unformat (input, "preallocated-sessions %d", &smm->preallocated_sessions)) ; @@ -2061,24 +2242,35 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "local-endpoints-table-buckets %d", &smm->local_endpoints_table_buckets)) ; - /* Deprecated but maintained for compatibility */ - else if (unformat (input, "evt_qs_memfd_seg")) - ; - else if (unformat (input, "evt_qs_seg_size %U", unformat_memory_size, - &smm->evt_qs_segment_size)) - ; else if (unformat (input, "enable")) smm->session_enable_asap = 1; - else if (unformat (input, "segment-baseva 0x%lx", &smm->session_baseva)) - ; else if (unformat (input, "use-app-socket-api")) - appns_sapi_enable (); + (void) appns_sapi_enable_disable (1 /* is_enable */); else if (unformat (input, "poll-main")) smm->poll_main = 1; else if (unformat (input, "use-private-rx-mqs")) smm->use_private_rx_mqs = 1; else if (unformat (input, "no-adaptive")) smm->no_adaptive = 1; + else if (unformat (input, "use-dma")) + smm->dma_enabled = 1; + /* + * Deprecated but maintained for compatibility + */ + else if (unformat (input, "evt_qs_memfd_seg")) + ; + else if (unformat (input, "segment-baseva 0x%lx", &tmp)) + ; + else if (unformat (input, "evt_qs_seg_size %U", unformat_memory_size, + &smm->wrk_mqs_segment_size)) + ; + else if (unformat (input, "event-queue-length %d", &nitems)) + { + if (nitems >= 2048) + smm->configured_wrk_mq_length = nitems; + else + clib_warning ("event queue length %d too small, ignored", nitems); + } else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input);