X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Funix%2Finput.c;h=321e443dee9ab5cf817d2343a7ad08f5ce18b2c3;hb=eb987d3a09f669787014b1553f032219522149e1;hp=07096ed27dc25b3da7712380188570a44c46ac4d;hpb=7cd468a3d7dee7d6c92f69a0bb7061ae208ec727;p=vpp.git diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c index 07096ed27dc..321e443dee9 100644 --- a/src/vlib/unix/input.c +++ b/src/vlib/unix/input.c @@ -40,6 +40,8 @@ #include #include #include +#include +#include /* FIXME autoconf */ #define HAVE_LINUX_EPOLL @@ -50,119 +52,166 @@ typedef struct { + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); int epoll_fd; struct epoll_event *epoll_events; + int n_epoll_fds; /* Statistics. */ u64 epoll_files_ready; u64 epoll_waits; } linux_epoll_main_t; -static linux_epoll_main_t linux_epoll_main; +static linux_epoll_main_t *linux_epoll_mains = 0; static void -linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) +linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type) { - unix_main_t *um = &unix_main; - linux_epoll_main_t *em = &linux_epoll_main; - struct epoll_event e; - - memset (&e, 0, sizeof (e)); + clib_file_main_t *fm = &file_main; + linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains, + f->polling_thread_index); + struct epoll_event e = { 0 }; + int op, add_del = 0; e.events = EPOLLIN; if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE) e.events |= EPOLLOUT; if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED) e.events |= EPOLLET; - e.data.u32 = f - um->file_pool; - - if (epoll_ctl (em->epoll_fd, - (update_type == UNIX_FILE_UPDATE_ADD - ? EPOLL_CTL_ADD - : (update_type == UNIX_FILE_UPDATE_MODIFY - ? EPOLL_CTL_MOD - : EPOLL_CTL_DEL)), f->file_descriptor, &e) < 0) - clib_warning ("epoll_ctl"); + e.data.u32 = f - fm->file_pool; + + op = -1; + + switch (update_type) + { + case UNIX_FILE_UPDATE_ADD: + op = EPOLL_CTL_ADD; + add_del = 1; + break; + + case UNIX_FILE_UPDATE_MODIFY: + op = EPOLL_CTL_MOD; + break; + + case UNIX_FILE_UPDATE_DELETE: + op = EPOLL_CTL_DEL; + add_del = -1; + break; + + default: + clib_warning ("unknown update_type %d", update_type); + return; + } + + /* worker threads open epoll fd only if needed */ + if (update_type == UNIX_FILE_UPDATE_ADD && em->epoll_fd == -1) + { + em->epoll_fd = epoll_create (1); + if (em->epoll_fd < 0) + { + clib_unix_warning ("epoll_create"); + return; + } + em->n_epoll_fds = 0; + } + + if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0) + { + clib_unix_warning ("epoll_ctl"); + return; + } + + em->n_epoll_fds += add_del; + + if (em->n_epoll_fds == 0) + { + close (em->epoll_fd); + em->epoll_fd = -1; + } } -static uword -linux_epoll_input (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +static_always_inline uword +linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, u32 thread_index) { unix_main_t *um = &unix_main; - linux_epoll_main_t *em = &linux_epoll_main; + clib_file_main_t *fm = &file_main; + linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains, thread_index); struct epoll_event *e; int n_fds_ready; + int is_main = (thread_index == 0); { vlib_node_main_t *nm = &vm->node_main; - u64 t = nm->cpu_time_next_process_ready; + u32 ticks_until_expiration; f64 timeout; - int timeout_ms, max_timeout_ms = 10; + int timeout_ms = 0, max_timeout_ms = 10; f64 vector_rate = vlib_last_vectors_per_main_loop (vm); - if (t == ~0ULL) + /* If we're not working very hard, decide how long to sleep */ + if (is_main && vector_rate < 2 && vm->api_queue_nonempty == 0 + && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0) { - timeout = 10e-3; - timeout_ms = max_timeout_ms; - } - else - { - timeout = - (((i64) t - (i64) clib_cpu_time_now ()) - * vm->clib_time.seconds_per_clock) - /* subtract off some slop time */ - 50e-6; + ticks_until_expiration = TW (tw_timer_first_expires_in_ticks) + ((TWT (tw_timer_wheel) *) nm->timing_wheel); - if (timeout < 1e3) + /* Nothing on the fast wheel, sleep 10ms */ + if (ticks_until_expiration == TW_SLOTS_PER_RING) { - /* We have event happenning in less than 1 ms so - don't allow epoll to wait */ - timeout_ms = 0; + timeout = 10e-3; + timeout_ms = max_timeout_ms; } else { - timeout_ms = timeout * 1e3; - - /* Must be between 1 and 10 ms. */ - timeout_ms = clib_max (1, timeout_ms); - timeout_ms = clib_min (max_timeout_ms, timeout_ms); + timeout = (f64) ticks_until_expiration *1e-5; + if (timeout < 1e-3) + timeout_ms = 0; + else + { + timeout_ms = timeout * 1e3; + /* Must be between 1 and 10 ms. */ + timeout_ms = clib_max (1, timeout_ms); + timeout_ms = clib_min (max_timeout_ms, timeout_ms); + } } + node->input_main_loops_per_call = 0; } - - /* If we still have input nodes polling (e.g. vnet packet generator) - don't sleep. */ - if (nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] > 0) - timeout_ms = 0; - - /* - * When busy: don't wait & only epoll for input - * every 1024 times through main loop. - */ - if (vector_rate > 1 || vm->api_queue_nonempty) + else if (is_main == 0 && vector_rate < 2 && + nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0) + { + timeout = 10e-3; + timeout_ms = max_timeout_ms; + node->input_main_loops_per_call = 0; + } + else /* busy */ { - timeout_ms = 0; + /* Don't come back for a respectable number of dispatch cycles */ node->input_main_loops_per_call = 1024; } - else - /* We're not busy; go to sleep for a while. */ - node->input_main_loops_per_call = 0; /* Allow any signal to wakeup our sleep. */ - { - static sigset_t unblock_all_signals; - n_fds_ready = epoll_pwait (em->epoll_fd, - em->epoll_events, - vec_len (em->epoll_events), - timeout_ms, &unblock_all_signals); - - /* This kludge is necessary to run over absurdly old kernels */ - if (n_fds_ready < 0 && errno == ENOSYS) - { - n_fds_ready = epoll_wait (em->epoll_fd, - em->epoll_events, - vec_len (em->epoll_events), timeout_ms); - } - } + if (is_main || em->epoll_fd != -1) + { + static sigset_t unblock_all_signals; + n_fds_ready = epoll_pwait (em->epoll_fd, + em->epoll_events, + vec_len (em->epoll_events), + timeout_ms, &unblock_all_signals); + + /* This kludge is necessary to run over absurdly old kernels */ + if (n_fds_ready < 0 && errno == ENOSYS) + { + n_fds_ready = epoll_wait (em->epoll_fd, + em->epoll_events, + vec_len (em->epoll_events), timeout_ms); + } + } + else + { + if (timeout_ms) + usleep (timeout_ms * 1000); + return 0; + } } if (n_fds_ready < 0) @@ -180,7 +229,7 @@ linux_epoll_input (vlib_main_t * vm, for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++) { u32 i = e->data.u32; - unix_file_t *f = pool_elt_at_index (um->file_pool, i); + clib_file_t *f = pool_elt_at_index (fm->file_pool, i); clib_error_t *errors[4]; int n_errors = 0; @@ -189,11 +238,13 @@ linux_epoll_input (vlib_main_t * vm, if (e->events & EPOLLIN) { errors[n_errors] = f->read_function (f); + f->read_events++; n_errors += errors[n_errors] != 0; } if (e->events & EPOLLOUT) { errors[n_errors] = f->write_function (f); + f->write_events++; n_errors += errors[n_errors] != 0; } } @@ -202,6 +253,7 @@ linux_epoll_input (vlib_main_t * vm, if (f->error_function) { errors[n_errors] = f->error_function (f); + f->error_events++; n_errors += errors[n_errors] != 0; } else @@ -218,6 +270,18 @@ linux_epoll_input (vlib_main_t * vm, return 0; } +static uword +linux_epoll_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 thread_index = vlib_get_thread_index (); + + if (thread_index == 0) + return linux_epoll_input_inline (vm, node, frame, 0); + else + return linux_epoll_input_inline (vm, node, frame, thread_index); +} + /* *INDENT-OFF* */ VLIB_REGISTER_NODE (linux_epoll_input_node,static) = { .function = linux_epoll_input, @@ -229,17 +293,30 @@ VLIB_REGISTER_NODE (linux_epoll_input_node,static) = { clib_error_t * linux_epoll_input_init (vlib_main_t * vm) { - linux_epoll_main_t *em = &linux_epoll_main; - unix_main_t *um = &unix_main; + linux_epoll_main_t *em; + clib_file_main_t *fm = &file_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); - /* Allocate some events. */ - vec_resize (em->epoll_events, VLIB_FRAME_SIZE); - em->epoll_fd = epoll_create (vec_len (em->epoll_events)); - if (em->epoll_fd < 0) - return clib_error_return_unix (0, "epoll_create"); + vec_validate_aligned (linux_epoll_mains, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + + vec_foreach (em, linux_epoll_mains) + { + /* Allocate some events. */ + vec_resize (em->epoll_events, VLIB_FRAME_SIZE); + + if (linux_epoll_mains == em) + { + em->epoll_fd = epoll_create (1); + if (em->epoll_fd < 0) + return clib_error_return_unix (0, "epoll_create"); + } + else + em->epoll_fd = -1; + } - um->file_update = linux_epoll_file_update; + fm->file_update = linux_epoll_file_update; return 0; }