2 * Copyright (c) 2015 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
16 * input.c: Unix file input
18 * Copyright (c) 2008 Eliot Dresselhaus
20 * Permission is hereby granted, free of charge, to any person obtaining
21 * a copy of this software and associated documentation files (the
22 * "Software"), to deal in the Software without restriction, including
23 * without limitation the rights to use, copy, modify, merge, publish,
24 * distribute, sublicense, and/or sell copies of the Software, and to
25 * permit persons to whom the Software is furnished to do so, subject to
26 * the following conditions:
28 * The above copyright notice and this permission notice shall be
29 * included in all copies or substantial portions of the Software.
31 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
35 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
36 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
37 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
40 #include <vlib/vlib.h>
41 #include <vlib/unix/unix.h>
44 #include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
47 #define HAVE_LINUX_EPOLL
49 #ifdef HAVE_LINUX_EPOLL
51 #include <sys/epoll.h>
55 CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
57 struct epoll_event *epoll_events;
61 u64 epoll_files_ready;
65 static linux_epoll_main_t *linux_epoll_mains = 0;
68 linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type)
70 clib_file_main_t *fm = &file_main;
71 linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains,
72 f->polling_thread_index);
73 struct epoll_event e = { 0 };
77 if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE)
79 if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED)
81 e.data.u32 = f - fm->file_pool;
87 case UNIX_FILE_UPDATE_ADD:
92 case UNIX_FILE_UPDATE_MODIFY:
96 case UNIX_FILE_UPDATE_DELETE:
102 clib_warning ("unknown update_type %d", update_type);
106 /* worker threads open epoll fd only if needed */
107 if (update_type == UNIX_FILE_UPDATE_ADD && em->epoll_fd == -1)
109 em->epoll_fd = epoll_create (1);
110 if (em->epoll_fd < 0)
112 clib_unix_warning ("epoll_create");
118 if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0)
120 clib_unix_warning ("epoll_ctl");
124 em->n_epoll_fds += add_del;
126 if (em->n_epoll_fds == 0)
128 close (em->epoll_fd);
133 static_always_inline uword
134 linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
135 vlib_frame_t * frame, u32 thread_index)
137 unix_main_t *um = &unix_main;
138 clib_file_main_t *fm = &file_main;
139 linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains, thread_index);
140 struct epoll_event *e;
142 int is_main = (thread_index == 0);
145 vlib_node_main_t *nm = &vm->node_main;
146 u32 ticks_until_expiration;
149 int timeout_ms = 0, max_timeout_ms = 10;
150 f64 vector_rate = vlib_last_vectors_per_main_loop (vm);
153 now = vlib_time_now (vm);
156 * If we've been asked for a fixed-sleep between main loop polls,
159 if (PREDICT_FALSE (is_main && um->poll_sleep_usec))
161 struct timespec ts, tsrem;
164 node->input_main_loops_per_call = 0;
166 ts.tv_nsec = 1000 * um->poll_sleep_usec;
168 while (nanosleep (&ts, &tsrem) < 0)
173 /* If we're not working very hard, decide how long to sleep */
174 else if (is_main && vector_rate < 2 && vm->api_queue_nonempty == 0
175 && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
177 ticks_until_expiration = TW (tw_timer_first_expires_in_ticks)
178 ((TWT (tw_timer_wheel) *) nm->timing_wheel);
180 /* Nothing on the fast wheel, sleep 10ms */
181 if (ticks_until_expiration == TW_SLOTS_PER_RING)
184 timeout_ms = max_timeout_ms;
188 timeout = (f64) ticks_until_expiration *1e-5;
193 timeout_ms = timeout * 1e3;
194 /* Must be between 1 and 10 ms. */
195 timeout_ms = clib_max (1, timeout_ms);
196 timeout_ms = clib_min (max_timeout_ms, timeout_ms);
199 node->input_main_loops_per_call = 0;
201 else if (is_main == 0 && vector_rate < 2
202 && (vlib_global_main.time_last_barrier_release + 0.5 < now)
203 && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
206 timeout_ms = max_timeout_ms;
207 node->input_main_loops_per_call = 0;
211 /* Don't come back for a respectable number of dispatch cycles */
212 node->input_main_loops_per_call = 1024;
215 /* Allow any signal to wakeup our sleep. */
216 if (is_main || em->epoll_fd != -1)
218 static sigset_t unblock_all_signals;
219 n_fds_ready = epoll_pwait (em->epoll_fd,
221 vec_len (em->epoll_events),
222 timeout_ms, &unblock_all_signals);
224 /* This kludge is necessary to run over absurdly old kernels */
225 if (n_fds_ready < 0 && errno == ENOSYS)
227 n_fds_ready = epoll_wait (em->epoll_fd,
229 vec_len (em->epoll_events), timeout_ms);
236 * Worker thread, no epoll fd's, sleep for 100us at a time
237 * and check for a barrier sync request
241 struct timespec ts, tsrem;
242 f64 limit = now + (f64) timeout_ms * 1e-3;
244 while (vlib_time_now (vm) < limit)
246 /* Sleep for 100us at a time */
248 ts.tv_nsec = 1000 * 100;
250 while (nanosleep (&ts, &tsrem) < 0)
252 if (*vlib_worker_threads->wait_at_barrier)
262 if (unix_error_is_fatal (errno))
263 vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait"));
265 /* non fatal error (e.g. EINTR). */
269 em->epoll_waits += 1;
270 em->epoll_files_ready += n_fds_ready;
272 for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++)
276 clib_error_t *errors[4];
280 * Under rare scenarios, epoll may still post us events for the
281 * deleted file descriptor. We just deal with it and throw away the
282 * events for the corresponding file descriptor.
284 f = fm->file_pool + i;
285 if (PREDICT_FALSE (pool_is_free (fm->file_pool, f)))
287 if (e->events & EPOLLIN)
290 clib_error_return (0, "epoll event EPOLLIN dropped due "
291 "to free index %u", i);
294 if (e->events & EPOLLOUT)
297 clib_error_return (0, "epoll event EPOLLOUT dropped due "
298 "to free index %u", i);
301 if (e->events & EPOLLERR)
304 clib_error_return (0, "epoll event EPOLLERR dropped due "
305 "to free index %u", i);
309 else if (PREDICT_TRUE (!(e->events & EPOLLERR)))
311 if (e->events & EPOLLIN)
314 errors[n_errors] = f->read_function (f);
315 /* Make sure f is valid if the file pool moves */
316 if (pool_is_free_index (fm->file_pool, i))
318 f = pool_elt_at_index (fm->file_pool, i);
319 n_errors += errors[n_errors] != 0;
321 if (e->events & EPOLLOUT)
324 errors[n_errors] = f->write_function (f);
325 n_errors += errors[n_errors] != 0;
330 if (f->error_function)
333 errors[n_errors] = f->error_function (f);
334 n_errors += errors[n_errors] != 0;
337 close (f->file_descriptor);
340 ASSERT (n_errors < ARRAY_LEN (errors));
341 for (i = 0; i < n_errors; i++)
343 unix_save_error (um, errors[i]);
348 if (PREDICT_FALSE (vm->cpu_id != clib_get_current_cpu_id ()))
350 vm->cpu_id = clib_get_current_cpu_id ();
351 vm->numa_node = clib_get_current_numa_node ();
358 linux_epoll_input (vlib_main_t * vm,
359 vlib_node_runtime_t * node, vlib_frame_t * frame)
361 u32 thread_index = vlib_get_thread_index ();
363 if (thread_index == 0)
364 return linux_epoll_input_inline (vm, node, frame, 0);
366 return linux_epoll_input_inline (vm, node, frame, thread_index);
370 VLIB_REGISTER_NODE (linux_epoll_input_node,static) = {
371 .function = linux_epoll_input,
372 .type = VLIB_NODE_TYPE_PRE_INPUT,
373 .name = "unix-epoll-input",
378 linux_epoll_input_init (vlib_main_t * vm)
380 linux_epoll_main_t *em;
381 clib_file_main_t *fm = &file_main;
382 vlib_thread_main_t *tm = vlib_get_thread_main ();
385 vec_validate_aligned (linux_epoll_mains, tm->n_vlib_mains,
386 CLIB_CACHE_LINE_BYTES);
388 vec_foreach (em, linux_epoll_mains)
390 /* Allocate some events. */
391 vec_resize (em->epoll_events, VLIB_FRAME_SIZE);
393 if (linux_epoll_mains == em)
395 em->epoll_fd = epoll_create (1);
396 if (em->epoll_fd < 0)
397 return clib_error_return_unix (0, "epoll_create");
403 fm->file_update = linux_epoll_file_update;
408 VLIB_INIT_FUNCTION (linux_epoll_input_init);
410 #endif /* HAVE_LINUX_EPOLL */
412 static clib_error_t *
413 unix_input_init (vlib_main_t * vm)
419 VLIB_INIT_FUNCTION (unix_input_init) =
421 .runs_before = VLIB_INITS ("linux_epoll_input_init"),
426 * fd.io coding-style-patch-verification: ON
429 * eval: (c-set-style "gnu")