diff options
author | Damjan Marion <damarion@cisco.com> | 2018-01-19 20:56:12 +0100 |
---|---|---|
committer | Dave Barach <openvpp@barachs.net> | 2018-02-06 18:17:46 +0000 |
commit | ceab7882f8016c2407a4383f87277bad069885b1 (patch) | |
tree | e90e247184d9dc3086b6f11f25dff8bd6e7ce51e /src/vlib | |
parent | df5a99cef13ff6a22c195091be45152dc65f5d71 (diff) |
vlib: epoll on worker threads
This patch teaches worer threads to sleep and to be waken up by
kernel if there is activity on file desctiptors assigned to that thread.
It also adds counters to epoll file descriptors and new
debug cli 'show unix file'.
Change-Id: Iaf67869f4aa88ff5b0a08982e1c08474013107c4
Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vlib')
-rw-r--r-- | src/vlib/main.c | 13 | ||||
-rw-r--r-- | src/vlib/unix/cli.c | 51 | ||||
-rw-r--r-- | src/vlib/unix/input.c | 135 |
3 files changed, 158 insertions, 41 deletions
diff --git a/src/vlib/main.c b/src/vlib/main.c index f915aa41b3b..5efeac26f79 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -1493,13 +1493,12 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) } /* Process pre-input nodes. */ - if (is_main) - vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) - cpu_time_now = dispatch_node (vm, n, - VLIB_NODE_TYPE_PRE_INPUT, - VLIB_NODE_STATE_POLLING, - /* frame */ 0, - cpu_time_now); + vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) + cpu_time_now = dispatch_node (vm, n, + VLIB_NODE_TYPE_PRE_INPUT, + VLIB_NODE_STATE_POLLING, + /* frame */ 0, + cpu_time_now); /* Next process input nodes. */ vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]) diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c index 0cf4ed38fe3..5838bbbc7a2 100644 --- a/src/vlib/unix/cli.c +++ b/src/vlib/unix/cli.c @@ -59,6 +59,7 @@ #include <sys/ioctl.h> #include <sys/types.h> #include <unistd.h> +#include <limits.h> /** ANSI escape code. */ #define ESC "\x1b" @@ -2556,6 +2557,9 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd) clib_file_t template = { 0 }; vlib_main_t *vm = um->vlib_main; vlib_node_t *n; + u8 *file_desc = 0; + + file_desc = format (0, "%s", name); name = (char *) format (0, "unix-cli-%s", name); @@ -2595,6 +2599,7 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd) template.error_function = unix_cli_error_detected; template.file_descriptor = fd; template.private_data = cf - cm->cli_file_pool; + template.description = file_desc; cf->process_node_index = n->index; cf->clib_file_index = clib_file_add (fm, &template); @@ -2867,6 +2872,7 @@ unix_cli_config (vlib_main_t * vm, unformat_input_t * input) template.read_function = unix_cli_listen_read_ready; template.file_descriptor = s->fd; + template.description = format (0, "cli listener %s", s->config); clib_file_add (fm, &template); } @@ -3111,12 +3117,55 @@ done: /* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_unix_show_errors, static) = { - .path = "show unix-errors", + .path = "show unix errors", .short_help = "Show Unix system call error history", .function = unix_show_errors, }; /* *INDENT-ON* */ +/** CLI command to show various unix error statistics. */ +static clib_error_t * +unix_show_files (vlib_main_t * vm, + unformat_input_t * input, vlib_cli_command_t * cmd) +{ + clib_error_t *error = 0; + clib_file_main_t *fm = &file_main; + clib_file_t *f; + char path[PATH_MAX]; + u8 *s = 0; + + vlib_cli_output (vm, "%3s %6s %12s %12s %12s %-32s %s", "FD", "Thread", + "Read", "Write", "Error", "File Name", "Description"); + + /* *INDENT-OFF* */ + pool_foreach (f, fm->file_pool,( + { + int rv; + s = format (s, "/proc/self/fd/%d%c", f->file_descriptor, 0); + rv = readlink((char *) s, path, PATH_MAX - 1); + + path[rv < 0 ? 0 : rv] = 0; + + vlib_cli_output (vm, "%3d %6d %12d %12d %12d %-32s %v", + f->file_descriptor, f->polling_thread_index, + f->read_events, f->write_events, f->error_events, + path, f->description); + vec_reset_length (s); + })); + /* *INDENT-ON* */ + vec_free (s); + + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (cli_unix_show_files, static) = { + .path = "show unix files", + .short_help = "Show Unix files in use", + .function = unix_show_files, +}; +/* *INDENT-ON* */ + /** CLI command to show session command history. */ static clib_error_t * unix_cli_show_history (vlib_main_t * vm, diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c index ecf659bc824..0c298446636 100644 --- a/src/vlib/unix/input.c +++ b/src/vlib/unix/input.c @@ -40,6 +40,7 @@ #include <vlib/vlib.h> #include <vlib/unix/unix.h> #include <signal.h> +#include <unistd.h> #include <vppinfra/tw_timer_1t_3w_1024sl_ov.h> /* FIXME autoconf */ @@ -53,23 +54,23 @@ typedef struct { int epoll_fd; struct epoll_event *epoll_events; + int n_epoll_fds; /* Statistics. */ u64 epoll_files_ready; u64 epoll_waits; } linux_epoll_main_t; -static linux_epoll_main_t linux_epoll_main; +static linux_epoll_main_t *linux_epoll_mains = 0; static void linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type) { clib_file_main_t *fm = &file_main; - linux_epoll_main_t *em = &linux_epoll_main; - struct epoll_event e; - int op; - - memset (&e, 0, sizeof (e)); + linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains, + f->polling_thread_index); + struct epoll_event e = { 0 }; + int op, add_del = 0; e.events = EPOLLIN; if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE) @@ -84,6 +85,7 @@ linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type) { case UNIX_FILE_UPDATE_ADD: op = EPOLL_CTL_ADD; + add_del = 1; break; case UNIX_FILE_UPDATE_MODIFY: @@ -92,6 +94,7 @@ linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type) case UNIX_FILE_UPDATE_DELETE: op = EPOLL_CTL_DEL; + add_del = -1; break; default: @@ -99,19 +102,43 @@ linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type) return; } + /* worker threads open epoll fd only if needed */ + if (update_type == UNIX_FILE_UPDATE_ADD && em->epoll_fd == -1) + { + em->epoll_fd = epoll_create (1); + if (em->epoll_fd < 0) + { + clib_unix_warning ("epoll_create"); + return; + } + em->n_epoll_fds = 0; + } + if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0) - clib_unix_warning ("epoll_ctl"); + { + clib_unix_warning ("epoll_ctl"); + return; + } + + em->n_epoll_fds += add_del; + + if (em->n_epoll_fds == 0) + { + close (em->epoll_fd); + em->epoll_fd = -1; + } } -static uword -linux_epoll_input (vlib_main_t * vm, - vlib_node_runtime_t * node, vlib_frame_t * frame) +static_always_inline uword +linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, u32 thread_index) { unix_main_t *um = &unix_main; clib_file_main_t *fm = &file_main; - linux_epoll_main_t *em = &linux_epoll_main; + linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains, thread_index); struct epoll_event *e; int n_fds_ready; + int is_main = (thread_index == 0); { vlib_node_main_t *nm = &vm->node_main; @@ -121,7 +148,7 @@ linux_epoll_input (vlib_main_t * vm, f64 vector_rate = vlib_last_vectors_per_main_loop (vm); /* If we're not working very hard, decide how long to sleep */ - if (vector_rate < 2 && vm->api_queue_nonempty == 0 + if (is_main && vector_rate < 2 && vm->api_queue_nonempty == 0 && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0) { ticks_until_expiration = TW (tw_timer_first_expires_in_ticks) @@ -148,6 +175,13 @@ linux_epoll_input (vlib_main_t * vm, } node->input_main_loops_per_call = 0; } + else if (is_main == 0 && vector_rate < 2 && + nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0) + { + timeout = 10e-3; + timeout_ms = max_timeout_ms; + node->input_main_loops_per_call = 0; + } else /* busy */ { /* Don't come back for a respectable number of dispatch cycles */ @@ -155,21 +189,28 @@ linux_epoll_input (vlib_main_t * vm, } /* Allow any signal to wakeup our sleep. */ - { - static sigset_t unblock_all_signals; - n_fds_ready = epoll_pwait (em->epoll_fd, - em->epoll_events, - vec_len (em->epoll_events), - timeout_ms, &unblock_all_signals); - - /* This kludge is necessary to run over absurdly old kernels */ - if (n_fds_ready < 0 && errno == ENOSYS) - { - n_fds_ready = epoll_wait (em->epoll_fd, - em->epoll_events, - vec_len (em->epoll_events), timeout_ms); - } - } + if (is_main || em->epoll_fd != -1) + { + static sigset_t unblock_all_signals; + n_fds_ready = epoll_pwait (em->epoll_fd, + em->epoll_events, + vec_len (em->epoll_events), + timeout_ms, &unblock_all_signals); + + /* This kludge is necessary to run over absurdly old kernels */ + if (n_fds_ready < 0 && errno == ENOSYS) + { + n_fds_ready = epoll_wait (em->epoll_fd, + em->epoll_events, + vec_len (em->epoll_events), timeout_ms); + } + } + else + { + if (timeout_ms) + usleep (timeout_ms * 1000); + return 0; + } } if (n_fds_ready < 0) @@ -196,11 +237,13 @@ linux_epoll_input (vlib_main_t * vm, if (e->events & EPOLLIN) { errors[n_errors] = f->read_function (f); + f->read_events++; n_errors += errors[n_errors] != 0; } if (e->events & EPOLLOUT) { errors[n_errors] = f->write_function (f); + f->write_events++; n_errors += errors[n_errors] != 0; } } @@ -209,6 +252,7 @@ linux_epoll_input (vlib_main_t * vm, if (f->error_function) { errors[n_errors] = f->error_function (f); + f->error_events++; n_errors += errors[n_errors] != 0; } else @@ -225,6 +269,18 @@ linux_epoll_input (vlib_main_t * vm, return 0; } +static uword +linux_epoll_input (vlib_main_t * vm, + vlib_node_runtime_t * node, vlib_frame_t * frame) +{ + u32 thread_index = vlib_get_thread_index (); + + if (thread_index == 0) + return linux_epoll_input_inline (vm, node, frame, 0); + else + return linux_epoll_input_inline (vm, node, frame, thread_index); +} + /* *INDENT-OFF* */ VLIB_REGISTER_NODE (linux_epoll_input_node,static) = { .function = linux_epoll_input, @@ -236,15 +292,28 @@ VLIB_REGISTER_NODE (linux_epoll_input_node,static) = { clib_error_t * linux_epoll_input_init (vlib_main_t * vm) { - linux_epoll_main_t *em = &linux_epoll_main; + linux_epoll_main_t *em; clib_file_main_t *fm = &file_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + - /* Allocate some events. */ - vec_resize (em->epoll_events, VLIB_FRAME_SIZE); + vec_validate_aligned (linux_epoll_mains, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); - em->epoll_fd = epoll_create (vec_len (em->epoll_events)); - if (em->epoll_fd < 0) - return clib_error_return_unix (0, "epoll_create"); + vec_foreach (em, linux_epoll_mains) + { + /* Allocate some events. */ + vec_resize (em->epoll_events, VLIB_FRAME_SIZE); + + if (linux_epoll_mains == em) + { + em->epoll_fd = epoll_create (1); + if (em->epoll_fd < 0) + return clib_error_return_unix (0, "epoll_create"); + } + else + em->epoll_fd = -1; + } fm->file_update = linux_epoll_file_update; |