X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvlib%2Funix%2Finput.c;h=321e443dee9ab5cf817d2343a7ad08f5ce18b2c3;hb=eb987d3a09f669787014b1553f032219522149e1;hp=515dae94811c61f19601fff3a2539fe9fb48ea0c;hpb=5c20a0131a6a2516c14d5ccfc6db90fd13ec8a33;p=vpp.git diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c index 515dae94811..321e443dee9 100644 --- a/src/vlib/unix/input.c +++ b/src/vlib/unix/input.c @@ -40,6 +40,7 @@ #include #include #include +#include #include /* FIXME autoconf */ @@ -51,32 +52,33 @@ typedef struct { + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); int epoll_fd; struct epoll_event *epoll_events; + int n_epoll_fds; /* Statistics. */ u64 epoll_files_ready; u64 epoll_waits; } linux_epoll_main_t; -static linux_epoll_main_t linux_epoll_main; +static linux_epoll_main_t *linux_epoll_mains = 0; static void -linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) +linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type) { - unix_main_t *um = &unix_main; - linux_epoll_main_t *em = &linux_epoll_main; - struct epoll_event e; - int op; - - memset (&e, 0, sizeof (e)); + clib_file_main_t *fm = &file_main; + linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains, + f->polling_thread_index); + struct epoll_event e = { 0 }; + int op, add_del = 0; e.events = EPOLLIN; if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE) e.events |= EPOLLOUT; if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED) e.events |= EPOLLET; - e.data.u32 = f - um->file_pool; + e.data.u32 = f - fm->file_pool; op = -1; @@ -84,6 +86,7 @@ linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) { case UNIX_FILE_UPDATE_ADD: op = EPOLL_CTL_ADD; + add_del = 1; break; case UNIX_FILE_UPDATE_MODIFY: @@ -92,6 +95,7 @@ linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) case UNIX_FILE_UPDATE_DELETE: op = EPOLL_CTL_DEL; + add_del = -1; break; default: @@ -99,18 +103,43 @@ linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type) return; } + /* worker threads open epoll fd only if needed */ + if (update_type == UNIX_FILE_UPDATE_ADD && em->epoll_fd == -1) + { + em->epoll_fd = epoll_create (1); + if (em->epoll_fd < 0) + { + clib_unix_warning ("epoll_create"); + return; + } + em->n_epoll_fds = 0; + } + if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0) - clib_unix_warning ("epoll_ctl"); + { + clib_unix_warning ("epoll_ctl"); + return; + } + + em->n_epoll_fds += add_del; + + if (em->n_epoll_fds == 0) + { + close (em->epoll_fd); + em->epoll_fd = -1; + } } -static uword -linux_epoll_input (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +static_always_inline uword +linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, u32 thread_index) { unix_main_t *um = &unix_main; - linux_epoll_main_t *em = &linux_epoll_main; + clib_file_main_t *fm = &file_main; + linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains, thread_index); struct epoll_event *e; int n_fds_ready; + int is_main = (thread_index == 0); { vlib_node_main_t *nm = &vm->node_main; @@ -120,7 +149,7 @@ linux_epoll_input (vlib_main_t * vm, f64 vector_rate = vlib_last_vectors_per_main_loop (vm); /* If we're not working very hard, decide how long to sleep */ - if (vector_rate < 2 && vm->api_queue_nonempty == 0 + if (is_main && vector_rate < 2 && vm->api_queue_nonempty == 0 && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0) { ticks_until_expiration = TW (tw_timer_first_expires_in_ticks) @@ -147,6 +176,13 @@ linux_epoll_input (vlib_main_t * vm, } node->input_main_loops_per_call = 0; } + else if (is_main == 0 && vector_rate < 2 && + nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0) + { + timeout = 10e-3; + timeout_ms = max_timeout_ms; + node->input_main_loops_per_call = 0; + } else /* busy */ { /* Don't come back for a respectable number of dispatch cycles */ @@ -154,21 +190,28 @@ linux_epoll_input (vlib_main_t * vm, } /* Allow any signal to wakeup our sleep. */ - { - static sigset_t unblock_all_signals; - n_fds_ready = epoll_pwait (em->epoll_fd, - em->epoll_events, - vec_len (em->epoll_events), - timeout_ms, &unblock_all_signals); - - /* This kludge is necessary to run over absurdly old kernels */ - if (n_fds_ready < 0 && errno == ENOSYS) - { - n_fds_ready = epoll_wait (em->epoll_fd, - em->epoll_events, - vec_len (em->epoll_events), timeout_ms); - } - } + if (is_main || em->epoll_fd != -1) + { + static sigset_t unblock_all_signals; + n_fds_ready = epoll_pwait (em->epoll_fd, + em->epoll_events, + vec_len (em->epoll_events), + timeout_ms, &unblock_all_signals); + + /* This kludge is necessary to run over absurdly old kernels */ + if (n_fds_ready < 0 && errno == ENOSYS) + { + n_fds_ready = epoll_wait (em->epoll_fd, + em->epoll_events, + vec_len (em->epoll_events), timeout_ms); + } + } + else + { + if (timeout_ms) + usleep (timeout_ms * 1000); + return 0; + } } if (n_fds_ready < 0) @@ -186,7 +229,7 @@ linux_epoll_input (vlib_main_t * vm, for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++) { u32 i = e->data.u32; - unix_file_t *f = pool_elt_at_index (um->file_pool, i); + clib_file_t *f = pool_elt_at_index (fm->file_pool, i); clib_error_t *errors[4]; int n_errors = 0; @@ -195,11 +238,13 @@ linux_epoll_input (vlib_main_t * vm, if (e->events & EPOLLIN) { errors[n_errors] = f->read_function (f); + f->read_events++; n_errors += errors[n_errors] != 0; } if (e->events & EPOLLOUT) { errors[n_errors] = f->write_function (f); + f->write_events++; n_errors += errors[n_errors] != 0; } } @@ -208,6 +253,7 @@ linux_epoll_input (vlib_main_t * vm, if (f->error_function) { errors[n_errors] = f->error_function (f); + f->error_events++; n_errors += errors[n_errors] != 0; } else @@ -224,6 +270,18 @@ linux_epoll_input (vlib_main_t * vm, return 0; } +static uword +linux_epoll_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 thread_index = vlib_get_thread_index (); + + if (thread_index == 0) + return linux_epoll_input_inline (vm, node, frame, 0); + else + return linux_epoll_input_inline (vm, node, frame, thread_index); +} + /* *INDENT-OFF* */ VLIB_REGISTER_NODE (linux_epoll_input_node,static) = { .function = linux_epoll_input, @@ -235,17 +293,30 @@ VLIB_REGISTER_NODE (linux_epoll_input_node,static) = { clib_error_t * linux_epoll_input_init (vlib_main_t * vm) { - linux_epoll_main_t *em = &linux_epoll_main; - unix_main_t *um = &unix_main; + linux_epoll_main_t *em; + clib_file_main_t *fm = &file_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + - /* Allocate some events. */ - vec_resize (em->epoll_events, VLIB_FRAME_SIZE); + vec_validate_aligned (linux_epoll_mains, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); - em->epoll_fd = epoll_create (vec_len (em->epoll_events)); - if (em->epoll_fd < 0) - return clib_error_return_unix (0, "epoll_create"); + vec_foreach (em, linux_epoll_mains) + { + /* Allocate some events. */ + vec_resize (em->epoll_events, VLIB_FRAME_SIZE); + + if (linux_epoll_mains == em) + { + em->epoll_fd = epoll_create (1); + if (em->epoll_fd < 0) + return clib_error_return_unix (0, "epoll_create"); + } + else + em->epoll_fd = -1; + } - um->file_update = linux_epoll_file_update; + fm->file_update = linux_epoll_file_update; return 0; }