aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2018-01-19 20:56:12 +0100
committerDave Barach <openvpp@barachs.net>2018-02-06 18:17:46 +0000
commitceab7882f8016c2407a4383f87277bad069885b1 (patch)
treee90e247184d9dc3086b6f11f25dff8bd6e7ce51e
parentdf5a99cef13ff6a22c195091be45152dc65f5d71 (diff)
vlib: epoll on worker threads
This patch teaches worer threads to sleep and to be waken up by kernel if there is activity on file desctiptors assigned to that thread. It also adds counters to epoll file descriptors and new debug cli 'show unix file'. Change-Id: Iaf67869f4aa88ff5b0a08982e1c08474013107c4 Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r--src/plugins/memif/memif.c7
-rw-r--r--src/plugins/memif/socket.c4
-rw-r--r--src/vlib/main.c13
-rw-r--r--src/vlib/unix/cli.c51
-rw-r--r--src/vlib/unix/input.c135
-rw-r--r--src/vnet/devices/af_packet/af_packet.c2
-rw-r--r--src/vnet/devices/af_packet/af_packet.h2
-rw-r--r--src/vnet/devices/af_packet/device.c2
-rw-r--r--src/vnet/devices/virtio/virtio.c2
-rw-r--r--src/vnet/devices/virtio/virtio.h2
-rw-r--r--src/vnet/ip/punt.c1
-rw-r--r--src/vppinfra/file.h25
12 files changed, 204 insertions, 42 deletions
diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c
index dc92e7e24ad..7267ef2e000 100644
--- a/src/plugins/memif/memif.c
+++ b/src/plugins/memif/memif.c
@@ -221,6 +221,9 @@ memif_connect (memif_if_t * mif)
{
template.file_descriptor = mq->int_fd;
template.private_data = (mif->dev_instance << 16) | (i & 0xFFFF);
+ template.description = format (0, "%U rx %u int",
+ format_memif_device_name,
+ mif->dev_instance, i);
memif_file_add (&mq->int_clib_file_index, &template);
}
vnet_hw_interface_assign_rx_thread (vnm, mif->hw_if_index, i, ~0);
@@ -440,6 +443,9 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
t.file_descriptor = sock->fd;
t.private_data = mif->dev_instance;
memif_file_add (&sock->private_data, &t);
+ t.description = format (0, "%U ctl",
+ format_memif_device_name,
+ mif->dev_instance);
hash_set (msf->dev_instance_by_fd, sock->fd, mif->dev_instance);
mif->flags |= MEMIF_IF_FLAG_CONNECTING;
@@ -805,6 +811,7 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
template.read_function = memif_conn_fd_accept_ready;
template.file_descriptor = msf->sock->fd;
template.private_data = mif->socket_file_index;
+ template.description = format (0, "memif listener %s", msf->filename);
memif_file_add (&msf->sock->private_data, &template);
}
diff --git a/src/plugins/memif/socket.c b/src/plugins/memif/socket.c
index 9aa4a2e2e90..5e14f08ac93 100644
--- a/src/plugins/memif/socket.c
+++ b/src/plugins/memif/socket.c
@@ -440,6 +440,9 @@ memif_msg_receive (memif_if_t ** mifp, clib_socket_t * sock, clib_file_t * uf)
if ((err = memif_msg_receive_init (mifp, &msg, sock, uf->private_data)))
return err;
mif = *mifp;
+ vec_reset_length (uf->description);
+ uf->description = format (uf->description, "%U ctl",
+ format_memif_device_name, mif->dev_instance);
memif_msg_enq_ack (mif);
break;
@@ -645,6 +648,7 @@ memif_conn_fd_accept_ready (clib_file_t * uf)
template.error_function = memif_master_conn_fd_error;
template.file_descriptor = client->fd;
template.private_data = uf->private_data;
+ template.description = format (0, "memif in conn on %s", msf->filename);
memif_file_add (&client->private_data, &template);
diff --git a/src/vlib/main.c b/src/vlib/main.c
index f915aa41b3b..5efeac26f79 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -1493,13 +1493,12 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
}
/* Process pre-input nodes. */
- if (is_main)
- vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
- cpu_time_now = dispatch_node (vm, n,
- VLIB_NODE_TYPE_PRE_INPUT,
- VLIB_NODE_STATE_POLLING,
- /* frame */ 0,
- cpu_time_now);
+ vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
+ cpu_time_now = dispatch_node (vm, n,
+ VLIB_NODE_TYPE_PRE_INPUT,
+ VLIB_NODE_STATE_POLLING,
+ /* frame */ 0,
+ cpu_time_now);
/* Next process input nodes. */
vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT])
diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c
index 0cf4ed38fe3..5838bbbc7a2 100644
--- a/src/vlib/unix/cli.c
+++ b/src/vlib/unix/cli.c
@@ -59,6 +59,7 @@
#include <sys/ioctl.h>
#include <sys/types.h>
#include <unistd.h>
+#include <limits.h>
/** ANSI escape code. */
#define ESC "\x1b"
@@ -2556,6 +2557,9 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd)
clib_file_t template = { 0 };
vlib_main_t *vm = um->vlib_main;
vlib_node_t *n;
+ u8 *file_desc = 0;
+
+ file_desc = format (0, "%s", name);
name = (char *) format (0, "unix-cli-%s", name);
@@ -2595,6 +2599,7 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd)
template.error_function = unix_cli_error_detected;
template.file_descriptor = fd;
template.private_data = cf - cm->cli_file_pool;
+ template.description = file_desc;
cf->process_node_index = n->index;
cf->clib_file_index = clib_file_add (fm, &template);
@@ -2867,6 +2872,7 @@ unix_cli_config (vlib_main_t * vm, unformat_input_t * input)
template.read_function = unix_cli_listen_read_ready;
template.file_descriptor = s->fd;
+ template.description = format (0, "cli listener %s", s->config);
clib_file_add (fm, &template);
}
@@ -3111,12 +3117,55 @@ done:
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_unix_show_errors, static) = {
- .path = "show unix-errors",
+ .path = "show unix errors",
.short_help = "Show Unix system call error history",
.function = unix_show_errors,
};
/* *INDENT-ON* */
+/** CLI command to show various unix error statistics. */
+static clib_error_t *
+unix_show_files (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *f;
+ char path[PATH_MAX];
+ u8 *s = 0;
+
+ vlib_cli_output (vm, "%3s %6s %12s %12s %12s %-32s %s", "FD", "Thread",
+ "Read", "Write", "Error", "File Name", "Description");
+
+ /* *INDENT-OFF* */
+ pool_foreach (f, fm->file_pool,(
+ {
+ int rv;
+ s = format (s, "/proc/self/fd/%d%c", f->file_descriptor, 0);
+ rv = readlink((char *) s, path, PATH_MAX - 1);
+
+ path[rv < 0 ? 0 : rv] = 0;
+
+ vlib_cli_output (vm, "%3d %6d %12d %12d %12d %-32s %v",
+ f->file_descriptor, f->polling_thread_index,
+ f->read_events, f->write_events, f->error_events,
+ path, f->description);
+ vec_reset_length (s);
+ }));
+ /* *INDENT-ON* */
+ vec_free (s);
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_unix_show_files, static) = {
+ .path = "show unix files",
+ .short_help = "Show Unix files in use",
+ .function = unix_show_files,
+};
+/* *INDENT-ON* */
+
/** CLI command to show session command history. */
static clib_error_t *
unix_cli_show_history (vlib_main_t * vm,
diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c
index ecf659bc824..0c298446636 100644
--- a/src/vlib/unix/input.c
+++ b/src/vlib/unix/input.c
@@ -40,6 +40,7 @@
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <signal.h>
+#include <unistd.h>
#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
/* FIXME autoconf */
@@ -53,23 +54,23 @@ typedef struct
{
int epoll_fd;
struct epoll_event *epoll_events;
+ int n_epoll_fds;
/* Statistics. */
u64 epoll_files_ready;
u64 epoll_waits;
} linux_epoll_main_t;
-static linux_epoll_main_t linux_epoll_main;
+static linux_epoll_main_t *linux_epoll_mains = 0;
static void
linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type)
{
clib_file_main_t *fm = &file_main;
- linux_epoll_main_t *em = &linux_epoll_main;
- struct epoll_event e;
- int op;
-
- memset (&e, 0, sizeof (e));
+ linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains,
+ f->polling_thread_index);
+ struct epoll_event e = { 0 };
+ int op, add_del = 0;
e.events = EPOLLIN;
if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE)
@@ -84,6 +85,7 @@ linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type)
{
case UNIX_FILE_UPDATE_ADD:
op = EPOLL_CTL_ADD;
+ add_del = 1;
break;
case UNIX_FILE_UPDATE_MODIFY:
@@ -92,6 +94,7 @@ linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type)
case UNIX_FILE_UPDATE_DELETE:
op = EPOLL_CTL_DEL;
+ add_del = -1;
break;
default:
@@ -99,19 +102,43 @@ linux_epoll_file_update (clib_file_t * f, clib_file_update_type_t update_type)
return;
}
+ /* worker threads open epoll fd only if needed */
+ if (update_type == UNIX_FILE_UPDATE_ADD && em->epoll_fd == -1)
+ {
+ em->epoll_fd = epoll_create (1);
+ if (em->epoll_fd < 0)
+ {
+ clib_unix_warning ("epoll_create");
+ return;
+ }
+ em->n_epoll_fds = 0;
+ }
+
if (epoll_ctl (em->epoll_fd, op, f->file_descriptor, &e) < 0)
- clib_unix_warning ("epoll_ctl");
+ {
+ clib_unix_warning ("epoll_ctl");
+ return;
+ }
+
+ em->n_epoll_fds += add_del;
+
+ if (em->n_epoll_fds == 0)
+ {
+ close (em->epoll_fd);
+ em->epoll_fd = -1;
+ }
}
-static uword
-linux_epoll_input (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
+static_always_inline uword
+linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, u32 thread_index)
{
unix_main_t *um = &unix_main;
clib_file_main_t *fm = &file_main;
- linux_epoll_main_t *em = &linux_epoll_main;
+ linux_epoll_main_t *em = vec_elt_at_index (linux_epoll_mains, thread_index);
struct epoll_event *e;
int n_fds_ready;
+ int is_main = (thread_index == 0);
{
vlib_node_main_t *nm = &vm->node_main;
@@ -121,7 +148,7 @@ linux_epoll_input (vlib_main_t * vm,
f64 vector_rate = vlib_last_vectors_per_main_loop (vm);
/* If we're not working very hard, decide how long to sleep */
- if (vector_rate < 2 && vm->api_queue_nonempty == 0
+ if (is_main && vector_rate < 2 && vm->api_queue_nonempty == 0
&& nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
{
ticks_until_expiration = TW (tw_timer_first_expires_in_ticks)
@@ -148,6 +175,13 @@ linux_epoll_input (vlib_main_t * vm,
}
node->input_main_loops_per_call = 0;
}
+ else if (is_main == 0 && vector_rate < 2 &&
+ nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0)
+ {
+ timeout = 10e-3;
+ timeout_ms = max_timeout_ms;
+ node->input_main_loops_per_call = 0;
+ }
else /* busy */
{
/* Don't come back for a respectable number of dispatch cycles */
@@ -155,21 +189,28 @@ linux_epoll_input (vlib_main_t * vm,
}
/* Allow any signal to wakeup our sleep. */
- {
- static sigset_t unblock_all_signals;
- n_fds_ready = epoll_pwait (em->epoll_fd,
- em->epoll_events,
- vec_len (em->epoll_events),
- timeout_ms, &unblock_all_signals);
-
- /* This kludge is necessary to run over absurdly old kernels */
- if (n_fds_ready < 0 && errno == ENOSYS)
- {
- n_fds_ready = epoll_wait (em->epoll_fd,
- em->epoll_events,
- vec_len (em->epoll_events), timeout_ms);
- }
- }
+ if (is_main || em->epoll_fd != -1)
+ {
+ static sigset_t unblock_all_signals;
+ n_fds_ready = epoll_pwait (em->epoll_fd,
+ em->epoll_events,
+ vec_len (em->epoll_events),
+ timeout_ms, &unblock_all_signals);
+
+ /* This kludge is necessary to run over absurdly old kernels */
+ if (n_fds_ready < 0 && errno == ENOSYS)
+ {
+ n_fds_ready = epoll_wait (em->epoll_fd,
+ em->epoll_events,
+ vec_len (em->epoll_events), timeout_ms);
+ }
+ }
+ else
+ {
+ if (timeout_ms)
+ usleep (timeout_ms * 1000);
+ return 0;
+ }
}
if (n_fds_ready < 0)
@@ -196,11 +237,13 @@ linux_epoll_input (vlib_main_t * vm,
if (e->events & EPOLLIN)
{
errors[n_errors] = f->read_function (f);
+ f->read_events++;
n_errors += errors[n_errors] != 0;
}
if (e->events & EPOLLOUT)
{
errors[n_errors] = f->write_function (f);
+ f->write_events++;
n_errors += errors[n_errors] != 0;
}
}
@@ -209,6 +252,7 @@ linux_epoll_input (vlib_main_t * vm,
if (f->error_function)
{
errors[n_errors] = f->error_function (f);
+ f->error_events++;
n_errors += errors[n_errors] != 0;
}
else
@@ -225,6 +269,18 @@ linux_epoll_input (vlib_main_t * vm,
return 0;
}
+static uword
+linux_epoll_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u32 thread_index = vlib_get_thread_index ();
+
+ if (thread_index == 0)
+ return linux_epoll_input_inline (vm, node, frame, 0);
+ else
+ return linux_epoll_input_inline (vm, node, frame, thread_index);
+}
+
/* *INDENT-OFF* */
VLIB_REGISTER_NODE (linux_epoll_input_node,static) = {
.function = linux_epoll_input,
@@ -236,15 +292,28 @@ VLIB_REGISTER_NODE (linux_epoll_input_node,static) = {
clib_error_t *
linux_epoll_input_init (vlib_main_t * vm)
{
- linux_epoll_main_t *em = &linux_epoll_main;
+ linux_epoll_main_t *em;
clib_file_main_t *fm = &file_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
- /* Allocate some events. */
- vec_resize (em->epoll_events, VLIB_FRAME_SIZE);
+ vec_validate_aligned (linux_epoll_mains, tm->n_vlib_mains,
+ CLIB_CACHE_LINE_BYTES);
- em->epoll_fd = epoll_create (vec_len (em->epoll_events));
- if (em->epoll_fd < 0)
- return clib_error_return_unix (0, "epoll_create");
+ vec_foreach (em, linux_epoll_mains)
+ {
+ /* Allocate some events. */
+ vec_resize (em->epoll_events, VLIB_FRAME_SIZE);
+
+ if (linux_epoll_mains == em)
+ {
+ em->epoll_fd = epoll_create (1);
+ if (em->epoll_fd < 0)
+ return clib_error_return_unix (0, "epoll_create");
+ }
+ else
+ em->epoll_fd = -1;
+ }
fm->file_update = linux_epoll_file_update;
diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c
index c51fded00c9..2a135105721 100644
--- a/src/vnet/devices/af_packet/af_packet.c
+++ b/src/vnet/devices/af_packet/af_packet.c
@@ -288,6 +288,8 @@ af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set,
template.file_descriptor = fd;
template.private_data = if_index;
template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
+ template.description = format (0, "%U", format_af_packet_device_name,
+ if_index);
apif->clib_file_index = clib_file_add (&file_main, &template);
}
diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h
index b095e7ffcd1..18822f838db 100644
--- a/src/vnet/devices/af_packet/af_packet.h
+++ b/src/vnet/devices/af_packet/af_packet.h
@@ -66,6 +66,8 @@ int af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name);
int af_packet_set_l4_cksum_offload (vlib_main_t * vm, u32 sw_if_index,
u8 set);
+format_function_t format_af_packet_device_name;
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c
index b9492b36a69..2c2489b55ab 100644
--- a/src/vnet/devices/af_packet/device.c
+++ b/src/vnet/devices/af_packet/device.c
@@ -51,7 +51,7 @@ static char *af_packet_tx_func_error_strings[] = {
};
-static u8 *
+u8 *
format_af_packet_device_name (u8 * s, va_list * args)
{
u32 i = va_arg (*args, u32);
diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c
index 02ded3f5f52..aa9db3a0ef3 100644
--- a/src/vnet/devices/virtio/virtio.c
+++ b/src/vnet/devices/virtio/virtio.c
@@ -109,6 +109,8 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
t.read_function = call_read_ready;
t.file_descriptor = vring->call_fd;
t.private_data = vif->dev_instance << 16 | idx;
+ t.description = format (0, "%U vring %u", format_virtio_device_name,
+ vif->dev_instance, idx);
vring->call_file_index = clib_file_add (&file_main, &t);
state.index = idx;
diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h
index 90eeb536ab1..7ffb2ea3dce 100644
--- a/src/vnet/devices/virtio/virtio.h
+++ b/src/vnet/devices/virtio/virtio.h
@@ -132,6 +132,8 @@ clib_error_t *virtio_vring_free (vlib_main_t * vm, virtio_if_t * vif,
u32 idx);
extern void virtio_free_used_desc (vlib_main_t * vm, virtio_vring_t * vring);
+format_function_t format_virtio_device_name;
+
#endif /* _VNET_DEVICES_VIRTIO_VIRTIO_H_ */
/*
diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c
index 4a027bfdadb..f24a43fdece 100644
--- a/src/vnet/ip/punt.c
+++ b/src/vnet/ip/punt.c
@@ -890,6 +890,7 @@ punt_config (vlib_main_t * vm, unformat_input_t * input)
clib_file_t template = { 0 };
template.read_function = punt_socket_read_ready;
template.file_descriptor = pm->socket_fd;
+ template.description = format (0, "%s", socket_path);
pm->clib_file_index = clib_file_add (fm, &template);
pm->is_configured = true;
diff --git a/src/vppinfra/file.h b/src/vppinfra/file.h
index f9349721a7c..b5a0507c3b8 100644
--- a/src/vppinfra/file.h
+++ b/src/vppinfra/file.h
@@ -57,11 +57,22 @@ typedef struct clib_file
#define UNIX_FILE_DATA_AVAILABLE_TO_WRITE (1 << 0)
#define UNIX_FILE_EVENT_EDGE_TRIGGERED (1 << 1)
+ /* polling thread index */
+ u32 polling_thread_index;
+
/* Data available for function's use. */
uword private_data;
/* Functions to be called when read/write data becomes ready. */
clib_file_function_t *read_function, *write_function, *error_function;
+
+ /* Description */
+ u8 *description;
+
+ /* Stats */
+ u64 read_events;
+ u64 write_events;
+ u64 error_events;
} clib_file_t;
typedef enum
@@ -87,6 +98,9 @@ clib_file_add (clib_file_main_t * um, clib_file_t * template)
clib_file_t *f;
pool_get (um->file_pool, f);
f[0] = template[0];
+ f->read_events = 0;
+ f->write_events = 0;
+ f->error_events = 0;
um->file_update (f, UNIX_FILE_UPDATE_ADD);
return f - um->file_pool;
}
@@ -97,6 +111,7 @@ clib_file_del (clib_file_main_t * um, clib_file_t * f)
um->file_update (f, UNIX_FILE_UPDATE_DELETE);
close (f->file_descriptor);
f->file_descriptor = ~0;
+ vec_free (f->description);
pool_put (um->file_pool, f);
}
@@ -108,6 +123,16 @@ clib_file_del_by_index (clib_file_main_t * um, uword index)
clib_file_del (um, uf);
}
+always_inline void
+clib_file_set_polling_thread (clib_file_main_t * um, uword index,
+ u32 thread_index)
+{
+ clib_file_t *f = pool_elt_at_index (um->file_pool, index);
+ um->file_update (f, UNIX_FILE_UPDATE_DELETE);
+ f->polling_thread_index = thread_index;
+ um->file_update (f, UNIX_FILE_UPDATE_ADD);
+}
+
always_inline uword
clib_file_set_data_available_to_write (clib_file_main_t * um,
u32 clib_file_index,