aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/devices/virtio
diff options
context:
space:
mode:
authorSteven <sluong@cisco.com>2017-03-15 21:18:55 -0700
committerDamjan Marion <dmarion.lists@gmail.com>2017-03-22 08:54:33 +0000
commit7312cc7785a9d1198519e1091a645fecc019a6b8 (patch)
tree7f46d3e5b36c766949ddfc37329cffeeebcc7323 /src/vnet/devices/virtio
parentb616e9fdc270e786c31b9ee9de5464497f0b1f6d (diff)
vhost: support interrupt mode
vhost currently supports only polling mode. This patch is to add interrupt mode. When the interface is configured for interrupt mode, our input node does not get called unless there is a packet in the vring. If a particular CPU has one interface configured for polling mode and another in interrupt, the input node is set to polling for that CPU. This diffs also includes two crashes in vlib's dispatch_node. One is included in https://gerrit.fd.io/r/#/c/5516. The other crash is in the ASSERT. The ASSERT can become true when the caller of dispatch_node is in a loop. The first call converted the node to polling. The second call thereafter will hit the ASSERT. Change-Id: If17b6d48b20d7d8605c6a161459828637173cd32 Signed-off-by: Steven <sluong@cisco.com>
Diffstat (limited to 'src/vnet/devices/virtio')
-rw-r--r--src/vnet/devices/virtio/vhost-user.c333
-rw-r--r--src/vnet/devices/virtio/vhost-user.h11
2 files changed, 322 insertions, 22 deletions
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index 3cbeca9b3fc..5a5beb1554d 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -368,6 +368,8 @@ vhost_user_rx_thread_placement ()
vhost_user_intf_t *vui;
vhost_cpu_t *vhc;
u32 *workers = 0;
+ u32 cpu_index;
+ vlib_main_t *vm;
//Let's list all workers cpu indexes
u32 i;
@@ -398,19 +400,59 @@ vhost_user_rx_thread_placement ()
continue;
i %= vec_len (vui_workers);
- u32 cpu_index = vui_workers[i];
+ cpu_index = vui_workers[i];
i++;
vhc = &vum->cpus[cpu_index];
iaq.qid = qid;
iaq.vhost_iface_index = vui - vum->vhost_user_interfaces;
vec_add1 (vhc->rx_queues, iaq);
- vlib_node_set_state (vlib_mains[cpu_index],
- vhost_user_input_node.index,
- VLIB_NODE_STATE_POLLING);
}
});
/* *INDENT-ON* */
+
+ vec_foreach (vhc, vum->cpus)
+ {
+ vhost_iface_and_queue_t *vhiq;
+ u8 mode = VHOST_USER_INTERRUPT_MODE;
+
+ vec_foreach (vhiq, vhc->rx_queues)
+ {
+ vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
+ if (vui->operation_mode == VHOST_USER_POLLING_MODE)
+ {
+ /* At least one interface is polling, cpu is set to polling */
+ mode = VHOST_USER_POLLING_MODE;
+ break;
+ }
+ }
+ vhc->operation_mode = mode;
+ }
+
+ for (cpu_index = vum->input_cpu_first_index;
+ cpu_index < vum->input_cpu_first_index + vum->input_cpu_count;
+ cpu_index++)
+ {
+ vlib_node_state_t state = VLIB_NODE_STATE_POLLING;
+
+ vhc = &vum->cpus[cpu_index];
+ vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main;
+ switch (vhc->operation_mode)
+ {
+ case VHOST_USER_INTERRUPT_MODE:
+ state = VLIB_NODE_STATE_INTERRUPT;
+ break;
+ case VHOST_USER_POLLING_MODE:
+ state = VLIB_NODE_STATE_POLLING;
+ break;
+ default:
+ clib_warning ("BUG: bad operation mode %d", vhc->operation_mode);
+ break;
+ }
+ vlib_node_set_state (vm, vhost_user_input_node.index, state);
+ }
+
+ vec_free (workers);
}
static int
@@ -485,12 +527,68 @@ vhost_user_update_iface_state (vhost_user_intf_t * vui)
vhost_user_tx_thread_placement (vui);
}
+static void
+vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq)
+{
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_cpu_t *vhc;
+ u32 cpu_index;
+ vhost_iface_and_queue_t *vhiq;
+ vlib_main_t *vm;
+ u32 ifq2;
+ u8 done = 0;
+
+ if (vhost_user_intf_ready (vui))
+ {
+ vec_foreach (vhc, vum->cpus)
+ {
+ if (vhc->operation_mode == VHOST_USER_POLLING_MODE)
+ continue;
+
+ vec_foreach (vhiq, vhc->rx_queues)
+ {
+ /*
+ * Match the interface and the virtqueue number
+ */
+ if ((vhiq->vhost_iface_index == (ifq >> 8)) &&
+ (VHOST_VRING_IDX_TX (vhiq->qid) == (ifq & 0xff)))
+ {
+ cpu_index = vhc - vum->cpus;
+ vm = vlib_mains ? vlib_mains[cpu_index] : &vlib_global_main;
+ /*
+ * Convert RX virtqueue number in the lower byte to vring
+ * queue index for the input node process. Top bytes contain
+ * the interface, lower byte contains the queue index.
+ */
+ ifq2 = ((ifq >> 8) << 8) | vhiq->qid;
+ vhc->pending_input_bitmap =
+ clib_bitmap_set (vhc->pending_input_bitmap, ifq2, 1);
+ vlib_node_set_interrupt_pending (vm,
+ vhost_user_input_node.index);
+ done = 1;
+ break;
+ }
+ }
+ if (done)
+ break;
+ }
+ }
+}
+
static clib_error_t *
vhost_user_callfd_read_ready (unix_file_t * uf)
{
__attribute__ ((unused)) int n;
u8 buff[8];
+ vhost_user_intf_t *vui =
+ pool_elt_at_index (vhost_user_main.vhost_user_interfaces,
+ uf->private_data >> 8);
+
n = read (uf->file_descriptor, ((char *) &buff), 8);
+ DBG_SOCK ("if %d CALL queue %d", uf->private_data >> 8,
+ uf->private_data & 0xff);
+ vhost_user_set_interrupt_pending (vui, uf->private_data);
+
return 0;
}
@@ -503,13 +601,20 @@ vhost_user_kickfd_read_ready (unix_file_t * uf)
pool_elt_at_index (vhost_user_main.vhost_user_interfaces,
uf->private_data >> 8);
u32 qid = uf->private_data & 0xff;
+
n = read (uf->file_descriptor, ((char *) &buff), 8);
DBG_SOCK ("if %d KICK queue %d", uf->private_data >> 8, qid);
vlib_worker_thread_barrier_sync (vlib_get_main ());
- vui->vrings[qid].started = 1;
- vhost_user_update_iface_state (vui);
+ if (!vui->vrings[qid].started ||
+ (vhost_user_intf_ready (vui) != vui->is_up))
+ {
+ vui->vrings[qid].started = 1;
+ vhost_user_update_iface_state (vui);
+ }
vlib_worker_thread_barrier_release (vlib_get_main ());
+
+ vhost_user_set_interrupt_pending (vui, uf->private_data);
return 0;
}
@@ -907,8 +1012,12 @@ vhost_user_socket_read (unix_file_t * uf)
vui->vrings[msg.state.index].last_avail_idx =
vui->vrings[msg.state.index].used->idx;
- /* tell driver that we don't want interrupts */
- vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
+ if (vui->operation_mode == VHOST_USER_POLLING_MODE)
+ /* tell driver that we don't want interrupts */
+ vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
+ else
+ /* tell driver that we want interrupts */
+ vui->vrings[msg.state.index].used->flags = 0;
break;
case VHOST_USER_SET_OWNER:
@@ -1811,7 +1920,8 @@ vhost_user_if_input (vlib_main_t * vm,
vhost_user_log_dirty_ring (vui, txvq, idx);
/* interrupt (call) handling */
- if ((txvq->callfd_idx != ~0) && !(txvq->avail->flags & 1))
+ if ((txvq->callfd_idx != ~0) &&
+ !(txvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
{
txvq->n_since_last_int += n_rx_packets;
@@ -1837,16 +1947,33 @@ vhost_user_input (vlib_main_t * vm,
vhost_user_main_t *vum = &vhost_user_main;
uword n_rx_packets = 0;
u32 cpu_index = os_get_cpu_number ();
+ vhost_iface_and_queue_t *vhiq;
+ vhost_user_intf_t *vui;
+ vhost_cpu_t *vhc;
+ vhc = &vum->cpus[cpu_index];
+ if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE))
+ {
+ vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
+ {
+ vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
+ n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node);
+ }
+ }
+ else
+ {
+ int i;
- vhost_iface_and_queue_t *vhiq;
- vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
- {
- vhost_user_intf_t *vui =
- &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
- n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node);
- }
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (i, vhc->pending_input_bitmap, ({
+ int qid = i & 0xff;
+ clib_bitmap_set (vhc->pending_input_bitmap, i, 0);
+ vui = pool_elt_at_index (vum->vhost_user_interfaces, i >> 8);
+ n_rx_packets += vhost_user_if_input (vm, vum, vui, qid, node);
+ }));
+ /* *INDENT-ON* */
+ }
return n_rx_packets;
}
@@ -2241,7 +2368,8 @@ done:
}
/* interrupt (call) handling */
- if ((rxvq->callfd_idx != ~0) && !(rxvq->avail->flags & 1))
+ if ((rxvq->callfd_idx != ~0) &&
+ !(rxvq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
{
rxvq->n_since_last_int += frame->n_vectors - n_left;
@@ -2595,6 +2723,95 @@ vhost_user_vui_init (vnet_main_t * vnm,
vhost_user_tx_thread_placement (vui);
}
+static uword
+vhost_user_send_interrupt_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ vhost_user_intf_t *vui;
+ f64 timeout = 3153600000.0 /* 100 years */ ;
+ uword event_type, *event_data = 0;
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_iface_and_queue_t *vhiq;
+ vhost_cpu_t *vhc;
+ f64 now, poll_time_remaining;
+
+ while (1)
+ {
+ poll_time_remaining =
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ /*
+ * Use the remaining timeout if it is less than coalesce time to avoid
+ * resetting the existing timer in the middle of expiration
+ */
+ timeout = poll_time_remaining;
+ if (vlib_process_suspend_time_is_zero (timeout) ||
+ (timeout > vum->coalesce_time))
+ timeout = vum->coalesce_time;
+
+ now = vlib_time_now (vm);
+ switch (event_type)
+ {
+ case VHOST_USER_EVENT_START_TIMER:
+ if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
+ break;
+ /* fall through */
+
+ case ~0:
+ vec_foreach (vhc, vum->cpus)
+ {
+ u32 cpu_index = vhc - vum->cpus;
+ f64 next_timeout;
+
+ next_timeout = timeout;
+ vec_foreach (vhiq, vum->cpus[cpu_index].rx_queues)
+ {
+ vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
+ vhost_user_vring_t *rxvq =
+ &vui->vrings[VHOST_VRING_IDX_RX (vhiq->qid)];
+ vhost_user_vring_t *txvq =
+ &vui->vrings[VHOST_VRING_IDX_TX (vhiq->qid)];
+
+ if (txvq->n_since_last_int)
+ {
+ if (now >= txvq->int_deadline)
+ vhost_user_send_call (vm, txvq);
+ else
+ next_timeout = txvq->int_deadline - now;
+ }
+
+ if (rxvq->n_since_last_int)
+ {
+ if (now >= rxvq->int_deadline)
+ vhost_user_send_call (vm, rxvq);
+ else
+ next_timeout = rxvq->int_deadline - now;
+ }
+
+ if ((next_timeout < timeout) && (next_timeout > 0.0))
+ timeout = next_timeout;
+ }
+ }
+ break;
+
+ default:
+ clib_warning ("BUG: unhandled event type %d", event_type);
+ break;
+ }
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = {
+ .function = vhost_user_send_interrupt_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "vhost-user-send-interrupt-process",
+};
+/* *INDENT-ON* */
+
int
vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
const char *sock_filename,
@@ -2608,8 +2825,10 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
u32 sw_if_idx = ~0;
int rv = 0;
int server_sock_fd = -1;
+ vhost_user_main_t *vum = &vhost_user_main;
- if (operation_mode != VHOST_USER_POLLING_MODE)
+ if ((operation_mode != VHOST_USER_POLLING_MODE) &&
+ (operation_mode != VHOST_USER_INTERRUPT_MODE))
return VNET_API_ERROR_UNIMPLEMENTED;
if (sock_filename == NULL || !(strlen (sock_filename) > 0))
@@ -2640,6 +2859,15 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
// Process node must connect
vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
+
+ if ((operation_mode == VHOST_USER_INTERRUPT_MODE) &&
+ !vum->interrupt_mode && (vum->coalesce_time > 0.0) &&
+ (vum->coalesce_frames > 0))
+ {
+ vum->interrupt_mode = 1;
+ vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_START_TIMER, 0);
+ }
return rv;
}
@@ -2658,7 +2886,8 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
int rv = 0;
vnet_hw_interface_t *hwif;
- if (operation_mode != VHOST_USER_POLLING_MODE)
+ if ((operation_mode != VHOST_USER_POLLING_MODE) &&
+ (operation_mode != VHOST_USER_INTERRUPT_MODE))
return VNET_API_ERROR_UNIMPLEMENTED;
if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
hwif->dev_class_index != vhost_user_dev_class.index)
@@ -2682,9 +2911,34 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
// Process node must connect
vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
+
+ if ((operation_mode == VHOST_USER_INTERRUPT_MODE) &&
+ !vum->interrupt_mode && (vum->coalesce_time > 0.0) &&
+ (vum->coalesce_frames > 0))
+ {
+ vum->interrupt_mode = 1;
+ vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_START_TIMER, 0);
+ }
return rv;
}
+static uword
+unformat_vhost_user_operation_mode (unformat_input_t * input, va_list * args)
+{
+ u8 *operation_mode = va_arg (*args, u8 *);
+ uword rc = 1;
+
+ if (unformat (input, "interrupt"))
+ *operation_mode = VHOST_USER_INTERRUPT_MODE;
+ else if (unformat (input, "polling"))
+ *operation_mode = VHOST_USER_POLLING_MODE;
+ else
+ rc = 0;
+
+ return rc;
+}
+
clib_error_t *
vhost_user_connect_command_fn (vlib_main_t * vm,
unformat_input_t * input,
@@ -2722,6 +2976,9 @@ vhost_user_connect_command_fn (vlib_main_t * vm,
{
renumber = 1;
}
+ else if (unformat (line_input, "mode %U",
+ unformat_vhost_user_operation_mode, &operation_mode))
+ ;
else
{
error = clib_error_return (0, "unknown input `%U'",
@@ -2851,6 +3108,25 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
return rv;
}
+static u8 *
+format_vhost_user_operation_mode (u8 * s, va_list * va)
+{
+ int operation_mode = va_arg (*va, int);
+
+ switch (operation_mode)
+ {
+ case VHOST_USER_POLLING_MODE:
+ s = format (s, "%s", "polling");
+ break;
+ case VHOST_USER_INTERRUPT_MODE:
+ s = format (s, "%s", "interrupt");
+ break;
+ default:
+ s = format (s, "%s", "invalid");
+ }
+ return s;
+}
+
clib_error_t *
show_vhost_user_command_fn (vlib_main_t * vm,
unformat_input_t * input,
@@ -2959,14 +3235,22 @@ show_vhost_user_command_fn (vlib_main_t * vm,
(vui->unix_server_index != ~0) ? "server" : "client",
strerror (vui->sock_errno));
+ vlib_cli_output (vm, " configured mode: %U\n",
+ format_vhost_user_operation_mode, vui->operation_mode);
vlib_cli_output (vm, " rx placement: ");
vec_foreach (vhc, vum->cpus)
{
vec_foreach (vhiq, vhc->rx_queues)
{
if (vhiq->vhost_iface_index == vui - vum->vhost_user_interfaces)
- vlib_cli_output (vm, " thread %d on vring %d\n",
- vhc - vum->cpus, VHOST_VRING_IDX_TX (vhiq->qid));
+ {
+ vlib_cli_output (vm, " thread %d on vring %d\n",
+ vhc - vum->cpus,
+ VHOST_VRING_IDX_TX (vhiq->qid));
+ vlib_cli_output (vm, " mode: %U\n",
+ format_vhost_user_operation_mode,
+ vhc->operation_mode);
+ }
}
}
@@ -3096,6 +3380,9 @@ done:
* in the name to be specified. If instance already exists, name will be used
* anyway and multiple instances will have the same name. Use with caution.
*
+ * - <b>mode [interrupt | polling]</b> - Optional parameter specifying
+ * the input thread polling policy.
+ *
* @cliexpar
* Example of how to create a vhost interface with VPP as the client and all features enabled:
* @cliexstart{create vhost-user socket /tmp/vhost1.sock}
@@ -3112,7 +3399,9 @@ done:
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
.path = "create vhost-user",
- .short_help = "create vhost-user socket <socket-filename> [server] [feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>]",
+ .short_help = "create vhost-user socket <socket-filename> [server] "
+ "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] "
+ "[mode {interrupt | polling}]",
.function = vhost_user_connect_command_fn,
};
/* *INDENT-ON* */
diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h
index 6b928f05e28..67f18b8ed8e 100644
--- a/src/vnet/devices/virtio/vhost-user.h
+++ b/src/vnet/devices/virtio/vhost-user.h
@@ -216,6 +216,8 @@ typedef struct
#define VHOST_USER_INTERRUPT_MODE 1
#define VHOST_USER_ADAPTIVE_MODE 2
+#define VHOST_USER_EVENT_START_TIMER 1
+
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -298,6 +300,12 @@ typedef struct
/* This is here so it doesn't end-up
* using stack or registers. */
vhost_trace_t *current_trace;
+
+ /* bitmap of pending rx interfaces */
+ uword *pending_input_bitmap;
+
+ /* The operation mode computed per cpu based on interface setting */
+ u8 operation_mode;
} vhost_cpu_t;
typedef struct
@@ -320,6 +328,9 @@ typedef struct
/** Pseudo random iterator */
u32 random;
+
+ /* Node is in interrupt mode */
+ u8 interrupt_mode;
} vhost_user_main_t;
typedef struct