aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet/devices/virtio
diff options
context:
space:
mode:
authorSteven <sluong@cisco.com>2017-05-01 14:03:02 -0700
committerSteven <sluong@cisco.com>2017-05-22 16:15:38 -0700
commitf3b53643e87e7521c57cccc157385d2fa4bd0d80 (patch)
tree19b0716291d3d69c2a787b2b45661916a564f585 /src/vnet/devices/virtio
parent10980465ce97eceff05ac94a69a13d63d3cfa70a (diff)
vhost: migrate to use device infra for worker thread assignment, rx-mode.
and add adaptive mode support to receive queue - Migrate vhost to use device infra which does the interface/queue to worker thread assignment. - Retire vhost thread CLI and corresponding code which assigns interface/queue to worker thread. set interface placement should be used instead to customize the interface/queue to worker thread assignment. - Retire vhost interrupt/polling option when creating vhost-user interface. Instead, set interface rx-mode should be used. - Add code in vnet_device_input_unassign_thread to change the node state to interrupt if the last polling interface has left the worker thread for the device of the corresponding interface/queue. - Add adaptive mode support. The node state is set to interrupt initially. When the scheduler detects a burst of traffic, it switches the input node to polling. Then we inform the device that we don't need interrupt notification. When the traffic subsides, the scheduler switches the input node back to interrupt. Then we immediately tell the driver that we want interrupt notification again. - Remove some duplicate code in vlib/main.c Change-Id: Id19bb1b9e50e6521c6464f470f5825c26924d3a8 Signed-off-by: Steven <sluong@cisco.com>
Diffstat (limited to 'src/vnet/devices/virtio')
-rw-r--r--src/vnet/devices/virtio/vhost-user.c748
-rw-r--r--src/vnet/devices/virtio/vhost-user.h45
-rw-r--r--src/vnet/devices/virtio/vhost_user_api.c7
3 files changed, 316 insertions, 484 deletions
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index 451ae4342b6..231889342e8 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -362,140 +362,71 @@ vhost_user_tx_thread_placement (vhost_user_intf_t * vui)
}
}
+/**
+ * @brief Unassign existing interface/queue to thread mappings and re-assign
+ * new interface/queue to thread mappings
+ */
static void
vhost_user_rx_thread_placement ()
{
vhost_user_main_t *vum = &vhost_user_main;
vhost_user_intf_t *vui;
- vhost_cpu_t *vhc;
- u32 *workers = 0;
- u32 thread_index;
- vlib_main_t *vm;
-
- //Let's list all workers cpu indexes
- u32 i;
- for (i = vum->input_cpu_first_index;
- i < vum->input_cpu_first_index + vum->input_cpu_count; i++)
- {
- vlib_node_set_state (vlib_mains[i], vhost_user_input_node.index,
- VLIB_NODE_STATE_DISABLED);
- vec_add1 (workers, i);
- }
-
- vec_foreach (vhc, vum->cpus)
- {
- vec_reset_length (vhc->rx_queues);
- }
+ vhost_user_vring_t *txvq;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 qid;
+ int rv;
+ u16 *queue;
- i = 0;
- vhost_iface_and_queue_t iaq;
+ // Scrap all existing mappings for all interfaces/queues
/* *INDENT-OFF* */
pool_foreach (vui, vum->vhost_user_interfaces, {
- u32 *vui_workers = vec_len (vui->workers) ? vui->workers : workers;
- u32 qid;
- for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++)
+ vec_foreach (queue, vui->rx_queues)
{
- vhost_user_vring_t *txvq =
- &vui->vrings[VHOST_VRING_IDX_TX (qid)];
- if (!txvq->started)
- continue;
-
- i %= vec_len (vui_workers);
- thread_index = vui_workers[i];
- i++;
- vhc = &vum->cpus[thread_index];
- txvq->interrupt_thread_index = thread_index;
-
- iaq.qid = qid;
- iaq.vhost_iface_index = vui - vum->vhost_user_interfaces;
- vec_add1 (vhc->rx_queues, iaq);
+ rv = vnet_hw_interface_unassign_rx_thread (vnm, vui->hw_if_index,
+ *queue);
+ if (rv)
+ clib_warning ("Warning: unable to unassign interface %d, "
+ "queue %d: rc=%d", vui->hw_if_index, *queue, rv);
}
+ vec_reset_length (vui->rx_queues);
});
/* *INDENT-ON* */
- vec_foreach (vhc, vum->cpus)
- {
- vhost_iface_and_queue_t *vhiq;
- u8 mode = VHOST_USER_INTERRUPT_MODE;
-
- vec_foreach (vhiq, vhc->rx_queues)
- {
- vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
- if (vui->operation_mode == VHOST_USER_POLLING_MODE)
+ // Create the rx_queues for all interfaces
+ /* *INDENT-OFF* */
+ pool_foreach (vui, vum->vhost_user_interfaces, {
+ for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++)
{
- /* At least one interface is polling, cpu is set to polling */
- mode = VHOST_USER_POLLING_MODE;
- break;
+ txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+ if (txvq->started)
+ {
+ if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_UNKNOWN)
+ /* Set polling as the default */
+ txvq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING;
+ vec_add1 (vui->rx_queues, qid);
+ }
}
- }
- vhc->operation_mode = mode;
- }
-
- for (thread_index = vum->input_cpu_first_index;
- thread_index < vum->input_cpu_first_index + vum->input_cpu_count;
- thread_index++)
- {
- vlib_node_state_t state = VLIB_NODE_STATE_POLLING;
+ });
+ /* *INDENT-ON* */
- vhc = &vum->cpus[thread_index];
- vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
- switch (vhc->operation_mode)
+ // Assign new mappings for all interfaces/queues
+ /* *INDENT-OFF* */
+ pool_foreach (vui, vum->vhost_user_interfaces, {
+ vnet_hw_interface_set_input_node (vnm, vui->hw_if_index,
+ vhost_user_input_node.index);
+ vec_foreach (queue, vui->rx_queues)
{
- case VHOST_USER_INTERRUPT_MODE:
- state = VLIB_NODE_STATE_INTERRUPT;
- break;
- case VHOST_USER_POLLING_MODE:
- state = VLIB_NODE_STATE_POLLING;
- break;
- default:
- clib_warning ("BUG: bad operation mode %d", vhc->operation_mode);
- break;
+ vnet_hw_interface_assign_rx_thread (vnm, vui->hw_if_index, *queue,
+ ~0);
+ txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
+ rv = vnet_hw_interface_set_rx_mode (vnm, vui->hw_if_index, *queue,
+ txvq->mode);
+ if (rv)
+ clib_warning ("Warning: unable to set rx mode for interface %d, "
+ "queue %d: rc=%d", vui->hw_if_index, *queue, rv);
}
- vlib_node_set_state (vm, vhost_user_input_node.index, state);
- }
-
- vec_free (workers);
-}
-
-static int
-vhost_user_thread_placement (u32 sw_if_index, u32 worker_thread_index, u8 del)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
- vnet_hw_interface_t *hw;
-
- if (worker_thread_index < vum->input_cpu_first_index ||
- worker_thread_index >=
- vum->input_cpu_first_index + vum->input_cpu_count)
- return -1;
-
- if (!(hw = vnet_get_sup_hw_interface (vnet_get_main (), sw_if_index)))
- return -2;
-
- vui = pool_elt_at_index (vum->vhost_user_interfaces, hw->dev_instance);
- u32 found = ~0, *w;
- vec_foreach (w, vui->workers)
- {
- if (*w == worker_thread_index)
- {
- found = w - vui->workers;
- break;
- }
- }
-
- if (del)
- {
- if (found == ~0)
- return -3;
- vec_del1 (vui->workers, found);
- }
- else if (found == ~0)
- {
- vec_add1 (vui->workers, worker_thread_index);
- }
-
- vhost_user_rx_thread_placement ();
- return 0;
+ });
+ /* *INDENT-ON* */
}
/** @brief Returns whether at least one TX and one RX vring are enabled */
@@ -532,37 +463,17 @@ vhost_user_update_iface_state (vhost_user_intf_t * vui)
static void
vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq)
{
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_cpu_t *vhc;
- u32 thread_index;
- vlib_main_t *vm;
- u32 ifq2, qid;
- vhost_user_vring_t *txvq;
+ u32 qid;
+ vnet_main_t *vnm = vnet_get_main ();
qid = ifq & 0xff;
- if ((qid % 2) == 0)
- /* Only care about the odd number virtqueue which is TX */
+ if ((qid & 1) == 0)
+ /* Only care about the odd number, or TX, virtqueue */
return;
if (vhost_user_intf_ready (vui))
- {
- txvq = &vui->vrings[qid];
- thread_index = txvq->interrupt_thread_index;
- vhc = &vum->cpus[thread_index];
- if (vhc->operation_mode == VHOST_USER_INTERRUPT_MODE)
- {
- vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
- /*
- * Convert virtqueue number in the lower byte to vring
- * queue index for the input node process. Top bytes contain
- * the interface, lower byte contains the queue index.
- */
- ifq2 = ((ifq >> 8) << 8) | qid / 2;
- vhc->pending_input_bitmap =
- clib_bitmap_set (vhc->pending_input_bitmap, ifq2, 1);
- vlib_node_set_interrupt_pending (vm, vhost_user_input_node.index);
- }
- }
+ // qid >> 1 is to convert virtqueue number to vring queue index
+ vnet_device_input_set_interrupt_pending (vnm, vui->hw_if_index, qid >> 1);
}
static clib_error_t *
@@ -570,14 +481,10 @@ vhost_user_callfd_read_ready (unix_file_t * uf)
{
__attribute__ ((unused)) int n;
u8 buff[8];
- vhost_user_intf_t *vui =
- pool_elt_at_index (vhost_user_main.vhost_user_interfaces,
- uf->private_data >> 8);
n = read (uf->file_descriptor, ((char *) &buff), 8);
DBG_SOCK ("if %d CALL queue %d", uf->private_data >> 8,
uf->private_data & 0xff);
- vhost_user_set_interrupt_pending (vui, uf->private_data);
return 0;
}
@@ -1001,12 +908,8 @@ vhost_user_socket_read (unix_file_t * uf)
vui->vrings[msg.state.index].last_avail_idx =
vui->vrings[msg.state.index].used->idx;
- if (vui->operation_mode == VHOST_USER_POLLING_MODE)
- /* tell driver that we don't want interrupts */
- vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
- else
- /* tell driver that we want interrupts */
- vui->vrings[msg.state.index].used->flags = 0;
+ /* tell driver that we don't want interrupts */
+ vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
break;
case VHOST_USER_SET_OWNER:
@@ -1315,8 +1218,6 @@ vhost_user_init (vlib_main_t * vm)
clib_error_t *error;
vhost_user_main_t *vum = &vhost_user_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
- vlib_thread_registration_t *tr;
- uword *p;
error = vlib_call_init_function (vm, ip4_init);
if (error)
@@ -1335,18 +1236,6 @@ vhost_user_init (vlib_main_t * vm)
cpu->rx_buffers_len = 0;
}
- /* find out which cpus will be used for input */
- vum->input_cpu_first_index = 0;
- vum->input_cpu_count = 1;
- p = hash_get_mem (tm->thread_registrations_by_name, "workers");
- tr = p ? (vlib_thread_registration_t *) p[0] : 0;
-
- if (tr && tr->count > 0)
- {
- vum->input_cpu_first_index = tr->first_index;
- vum->input_cpu_count = tr->count;
- }
-
vum->random = random_default_seed ();
mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword));
@@ -1447,9 +1336,16 @@ vhost_user_send_call (vlib_main_t * vm, vhost_user_vring_t * vq)
vhost_user_main_t *vum = &vhost_user_main;
u64 x = 1;
int fd = UNIX_GET_FD (vq->callfd_idx);
- int rv __attribute__ ((unused));
- /* TODO: pay attention to rv */
+ int rv;
+
rv = write (fd, &x, sizeof (x));
+ if (rv <= 0)
+ {
+ clib_unix_warning
+ ("Error: Could not write to unix socket for callfd %d", fd);
+ return;
+ }
+
vq->n_since_last_int = 0;
vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
}
@@ -1564,7 +1460,8 @@ static u32
vhost_user_if_input (vlib_main_t * vm,
vhost_user_main_t * vum,
vhost_user_intf_t * vui,
- u16 qid, vlib_node_runtime_t * node)
+ u16 qid, vlib_node_runtime_t * node,
+ vnet_hw_interface_rx_mode mode)
{
vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
u16 n_rx_packets = 0;
@@ -1590,6 +1487,26 @@ vhost_user_if_input (vlib_main_t * vm,
vhost_user_send_call (vm, rxvq);
}
+ /*
+ * For adaptive mode, it is optimized to reduce interrupts.
+ * If the scheduler switches the input node to polling due
+ * to burst of traffic, we tell the driver no interrupt.
+ * When the traffic subsides, the scheduler switches the node back to
+ * interrupt mode. We must tell the driver we want interrupt.
+ */
+ if (PREDICT_FALSE (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
+ {
+ if ((node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
+ !(node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
+ /* Tell driver we want notification */
+ txvq->used->flags = 0;
+ else
+ /* Tell driver we don't want notification */
+ txvq->used->flags = VRING_USED_F_NO_NOTIFY;
+ }
+
if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE))
return 0;
@@ -1926,34 +1843,23 @@ vhost_user_input (vlib_main_t * vm,
{
vhost_user_main_t *vum = &vhost_user_main;
uword n_rx_packets = 0;
- u32 thread_index = vlib_get_thread_index ();
- vhost_iface_and_queue_t *vhiq;
vhost_user_intf_t *vui;
- vhost_cpu_t *vhc;
+ vnet_device_input_runtime_t *rt =
+ (vnet_device_input_runtime_t *) node->runtime_data;
+ vnet_device_and_queue_t *dq;
- vhc = &vum->cpus[thread_index];
- if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE))
- {
- vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
+ vec_foreach (dq, rt->devices_and_queues)
+ {
+ if (clib_smp_swap (&dq->interrupt_pending, 0) ||
+ (node->state == VLIB_NODE_STATE_POLLING))
{
- vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
- n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node);
+ vui =
+ pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance);
+ n_rx_packets = vhost_user_if_input (vm, vum, vui, dq->queue_id, node,
+ dq->mode);
}
- }
- else
- {
- int i;
-
- /* *INDENT-OFF* */
- clib_bitmap_foreach (i, vhc->pending_input_bitmap, ({
- int qid = i & 0xff;
+ }
- clib_bitmap_set (vhc->pending_input_bitmap, i, 0);
- vui = pool_elt_at_index (vum->vhost_user_interfaces, i >> 8);
- n_rx_packets += vhost_user_if_input (vm, vum, vui, qid, node);
- }));
- /* *INDENT-ON* */
- }
return n_rx_packets;
}
@@ -2371,6 +2277,161 @@ done3:
return frame->n_vectors;
}
+static uword
+vhost_user_send_interrupt_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ vhost_user_intf_t *vui;
+ f64 timeout = 3153600000.0 /* 100 years */ ;
+ uword event_type, *event_data = 0;
+ vhost_user_main_t *vum = &vhost_user_main;
+ u16 *queue;
+ f64 now, poll_time_remaining;
+ f64 next_timeout;
+ u8 stop_timer = 0;
+
+ while (1)
+ {
+ poll_time_remaining =
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ /*
+ * Use the remaining timeout if it is less than coalesce time to avoid
+ * resetting the existing timer in the middle of expiration
+ */
+ timeout = poll_time_remaining;
+ if (vlib_process_suspend_time_is_zero (timeout) ||
+ (timeout > vum->coalesce_time))
+ timeout = vum->coalesce_time;
+
+ now = vlib_time_now (vm);
+ switch (event_type)
+ {
+ case VHOST_USER_EVENT_STOP_TIMER:
+ stop_timer = 1;
+ break;
+
+ case VHOST_USER_EVENT_START_TIMER:
+ stop_timer = 0;
+ if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
+ break;
+ /* fall through */
+
+ case ~0:
+ /* *INDENT-OFF* */
+ pool_foreach (vui, vum->vhost_user_interfaces, {
+ next_timeout = timeout;
+ vec_foreach (queue, vui->rx_queues)
+ {
+ vhost_user_vring_t *rxvq =
+ &vui->vrings[VHOST_VRING_IDX_RX (*queue)];
+ vhost_user_vring_t *txvq =
+ &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
+
+ if (txvq->n_since_last_int)
+ {
+ if (now >= txvq->int_deadline)
+ vhost_user_send_call (vm, txvq);
+ else
+ next_timeout = txvq->int_deadline - now;
+ }
+
+ if (rxvq->n_since_last_int)
+ {
+ if (now >= rxvq->int_deadline)
+ vhost_user_send_call (vm, rxvq);
+ else
+ next_timeout = rxvq->int_deadline - now;
+ }
+
+ if ((next_timeout < timeout) && (next_timeout > 0.0))
+ timeout = next_timeout;
+ }
+ });
+ /* *INDENT-ON* */
+ break;
+
+ default:
+ clib_warning ("BUG: unhandled event type %d", event_type);
+ break;
+ }
+ /* No less than 1 millisecond */
+ if (timeout < 1e-3)
+ timeout = 1e-3;
+ if (stop_timer)
+ timeout = 3153600000.0;
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = {
+ .function = vhost_user_send_interrupt_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "vhost-user-send-interrupt-process",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
+ u32 qid, vnet_hw_interface_rx_mode mode)
+{
+ vlib_main_t *vm = vnm->vlib_main;
+ vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui =
+ pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
+ vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+
+ if ((mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
+ (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
+ {
+ if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ {
+ vum->ifq_count++;
+ // Start the timer if this is the first encounter on interrupt
+ // interface/queue
+ if ((vum->ifq_count == 1) &&
+ (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
+ vlib_process_signal_event (vm,
+ vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_START_TIMER, 0);
+ }
+ }
+ else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ {
+ if (((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
+ (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) &&
+ vum->ifq_count)
+ {
+ vum->ifq_count--;
+ // Stop the timer if there is no more interrupt interface/queue
+ if ((vum->ifq_count == 0) &&
+ (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
+ vlib_process_signal_event (vm,
+ vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_STOP_TIMER, 0);
+ }
+ }
+
+ txvq->mode = mode;
+ if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ txvq->used->flags = VRING_USED_F_NO_NOTIFY;
+ else if ((mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE) ||
+ (mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT))
+ txvq->used->flags = 0;
+ else
+ {
+ clib_warning ("BUG: unhandled mode %d changed for if %d queue %d", mode,
+ hw_if_index, qid);
+ return clib_error_return (0, "unsupported");
+ }
+
+ return 0;
+}
+
static clib_error_t *
vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
u32 flags)
@@ -2399,6 +2460,7 @@ VNET_DEVICE_CLASS (vhost_user_dev_class,static) = {
.format_device_name = format_vhost_user_interface_name,
.name_renumber = vhost_user_name_renumber,
.admin_up_down_function = vhost_user_interface_admin_up_down,
+ .rx_mode_change_function = vhost_user_interface_rx_mode_change,
.format_tx_trace = format_vhost_trace,
};
@@ -2523,8 +2585,6 @@ vhost_user_term_if (vhost_user_intf_t * vui)
int q;
vhost_user_main_t *vum = &vhost_user_main;
- // Delete configured thread pinning
- vec_reset_length (vui->workers);
// disconnect interface sockets
vhost_user_if_disconnect (vui);
vhost_user_update_iface_state (vui);
@@ -2555,6 +2615,7 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index)
vhost_user_intf_t *vui;
int rv = 0;
vnet_hw_interface_t *hwif;
+ u16 *queue;
if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
hwif->dev_class_index != vhost_user_dev_class.index)
@@ -2565,6 +2626,28 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index)
vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
+ vec_foreach (queue, vui->rx_queues)
+ {
+ vhost_user_vring_t *txvq;
+
+ txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
+ if ((vum->ifq_count > 0) &&
+ ((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
+ (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)))
+ {
+ vum->ifq_count--;
+ // Stop the timer if there is no more interrupt interface/queue
+ if ((vum->ifq_count == 0) &&
+ (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
+ {
+ vlib_process_signal_event (vm,
+ vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_STOP_TIMER, 0);
+ break;
+ }
+ }
+ }
+
// Disable and reset interface
vhost_user_term_if (vui);
@@ -2687,13 +2770,15 @@ vhost_user_vui_init (vnet_main_t * vnm,
vhost_user_intf_t * vui,
int server_sock_fd,
const char *sock_filename,
- u64 feature_mask, u32 * sw_if_index, u8 operation_mode)
+ u64 feature_mask, u32 * sw_if_index)
{
vnet_sw_interface_t *sw;
- sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
int q;
vhost_user_main_t *vum = &vhost_user_main;
+ vnet_hw_interface_t *hw;
+ hw = vnet_get_hw_interface (vnm, vui->hw_if_index);
+ sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
if (server_sock_fd != -1)
{
unix_file_t template = { 0 };
@@ -2715,7 +2800,6 @@ vhost_user_vui_init (vnet_main_t * vnm,
vui->feature_mask = feature_mask;
vui->unix_file_index = ~0;
vui->log_base_addr = 0;
- vui->operation_mode = operation_mode;
vui->if_index = vui - vum->vhost_user_interfaces;
mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename,
&vui->if_index, 0);
@@ -2723,6 +2807,7 @@ vhost_user_vui_init (vnet_main_t * vnm,
for (q = 0; q < VHOST_VRING_MAX_N; q++)
vhost_user_vring_init (vui, q);
+ hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
if (sw_if_index)
@@ -2740,106 +2825,13 @@ vhost_user_vui_init (vnet_main_t * vnm,
vhost_user_tx_thread_placement (vui);
}
-static uword
-vhost_user_send_interrupt_process (vlib_main_t * vm,
- vlib_node_runtime_t * rt, vlib_frame_t * f)
-{
- vhost_user_intf_t *vui;
- f64 timeout = 3153600000.0 /* 100 years */ ;
- uword event_type, *event_data = 0;
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_iface_and_queue_t *vhiq;
- vhost_cpu_t *vhc;
- f64 now, poll_time_remaining;
-
- while (1)
- {
- poll_time_remaining =
- vlib_process_wait_for_event_or_clock (vm, timeout);
- event_type = vlib_process_get_events (vm, &event_data);
- vec_reset_length (event_data);
-
- /*
- * Use the remaining timeout if it is less than coalesce time to avoid
- * resetting the existing timer in the middle of expiration
- */
- timeout = poll_time_remaining;
- if (vlib_process_suspend_time_is_zero (timeout) ||
- (timeout > vum->coalesce_time))
- timeout = vum->coalesce_time;
-
- now = vlib_time_now (vm);
- switch (event_type)
- {
- case VHOST_USER_EVENT_START_TIMER:
- if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
- break;
- /* fall through */
-
- case ~0:
- vec_foreach (vhc, vum->cpus)
- {
- u32 thread_index = vhc - vum->cpus;
- f64 next_timeout;
-
- next_timeout = timeout;
- vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
- {
- vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
- vhost_user_vring_t *rxvq =
- &vui->vrings[VHOST_VRING_IDX_RX (vhiq->qid)];
- vhost_user_vring_t *txvq =
- &vui->vrings[VHOST_VRING_IDX_TX (vhiq->qid)];
-
- if (txvq->n_since_last_int)
- {
- if (now >= txvq->int_deadline)
- vhost_user_send_call (vm, txvq);
- else
- next_timeout = txvq->int_deadline - now;
- }
-
- if (rxvq->n_since_last_int)
- {
- if (now >= rxvq->int_deadline)
- vhost_user_send_call (vm, rxvq);
- else
- next_timeout = rxvq->int_deadline - now;
- }
-
- if ((next_timeout < timeout) && (next_timeout > 0.0))
- timeout = next_timeout;
- }
- }
- break;
-
- default:
- clib_warning ("BUG: unhandled event type %d", event_type);
- break;
- }
- /* No less than 1 millisecond */
- if (timeout < 1e-3)
- timeout = 1e-3;
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = {
- .function = vhost_user_send_interrupt_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "vhost-user-send-interrupt-process",
-};
-/* *INDENT-ON* */
-
int
vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
const char *sock_filename,
u8 is_server,
u32 * sw_if_index,
u64 feature_mask,
- u8 renumber, u32 custom_dev_instance, u8 * hwaddr,
- u8 operation_mode)
+ u8 renumber, u32 custom_dev_instance, u8 * hwaddr)
{
vhost_user_intf_t *vui = NULL;
u32 sw_if_idx = ~0;
@@ -2848,10 +2840,6 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
vhost_user_main_t *vum = &vhost_user_main;
uword *if_index;
- if ((operation_mode != VHOST_USER_POLLING_MODE) &&
- (operation_mode != VHOST_USER_INTERRUPT_MODE))
- return VNET_API_ERROR_UNIMPLEMENTED;
-
if (sock_filename == NULL || !(strlen (sock_filename) > 0))
{
return VNET_API_ERROR_INVALID_ARGUMENT;
@@ -2881,7 +2869,7 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
vhost_user_create_ethernet (vnm, vm, vui, hwaddr);
vhost_user_vui_init (vnm, vui, server_sock_fd, sock_filename,
- feature_mask, &sw_if_idx, operation_mode);
+ feature_mask, &sw_if_idx);
if (renumber)
vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
@@ -2892,14 +2880,6 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
// Process node must connect
vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
- if ((operation_mode == VHOST_USER_INTERRUPT_MODE) &&
- !vum->interrupt_mode && (vum->coalesce_time > 0.0) &&
- (vum->coalesce_frames > 0))
- {
- vum->interrupt_mode = 1;
- vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index,
- VHOST_USER_EVENT_START_TIMER, 0);
- }
return rv;
}
@@ -2908,8 +2888,7 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
const char *sock_filename,
u8 is_server,
u32 sw_if_index,
- u64 feature_mask, u8 renumber, u32 custom_dev_instance,
- u8 operation_mode)
+ u64 feature_mask, u8 renumber, u32 custom_dev_instance)
{
vhost_user_main_t *vum = &vhost_user_main;
vhost_user_intf_t *vui = NULL;
@@ -2919,9 +2898,6 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
vnet_hw_interface_t *hwif;
uword *if_index;
- if ((operation_mode != VHOST_USER_POLLING_MODE) &&
- (operation_mode != VHOST_USER_INTERRUPT_MODE))
- return VNET_API_ERROR_UNIMPLEMENTED;
if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
hwif->dev_class_index != vhost_user_dev_class.index)
return VNET_API_ERROR_INVALID_SW_IF_INDEX;
@@ -2947,8 +2923,7 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
vhost_user_term_if (vui);
vhost_user_vui_init (vnm, vui, server_sock_fd,
- sock_filename, feature_mask, &sw_if_idx,
- operation_mode);
+ sock_filename, feature_mask, &sw_if_idx);
if (renumber)
vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
@@ -2956,33 +2931,9 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
// Process node must connect
vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
- if ((operation_mode == VHOST_USER_INTERRUPT_MODE) &&
- !vum->interrupt_mode && (vum->coalesce_time > 0.0) &&
- (vum->coalesce_frames > 0))
- {
- vum->interrupt_mode = 1;
- vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index,
- VHOST_USER_EVENT_START_TIMER, 0);
- }
return rv;
}
-static uword
-unformat_vhost_user_operation_mode (unformat_input_t * input, va_list * args)
-{
- u8 *operation_mode = va_arg (*args, u8 *);
- uword rc = 1;
-
- if (unformat (input, "interrupt"))
- *operation_mode = VHOST_USER_INTERRUPT_MODE;
- else if (unformat (input, "polling"))
- *operation_mode = VHOST_USER_POLLING_MODE;
- else
- rc = 0;
-
- return rc;
-}
-
clib_error_t *
vhost_user_connect_command_fn (vlib_main_t * vm,
unformat_input_t * input,
@@ -2998,7 +2949,6 @@ vhost_user_connect_command_fn (vlib_main_t * vm,
u8 hwaddr[6];
u8 *hw = NULL;
clib_error_t *error = NULL;
- u8 operation_mode = VHOST_USER_POLLING_MODE;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -3020,9 +2970,6 @@ vhost_user_connect_command_fn (vlib_main_t * vm,
{
renumber = 1;
}
- else if (unformat (line_input, "mode %U",
- unformat_vhost_user_operation_mode, &operation_mode))
- ;
else
{
error = clib_error_return (0, "unknown input `%U'",
@@ -3036,8 +2983,7 @@ vhost_user_connect_command_fn (vlib_main_t * vm,
int rv;
if ((rv = vhost_user_create_if (vnm, vm, (char *) sock_filename,
is_server, &sw_if_index, feature_mask,
- renumber, custom_dev_instance, hw,
- operation_mode)))
+ renumber, custom_dev_instance, hw)))
{
error = clib_error_return (0, "vhost_user_create_if returned %d", rv);
goto done;
@@ -3127,7 +3073,6 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
vec_add2 (r_vuids, vuid, 1);
- vuid->operation_mode = vui->operation_mode;
vuid->sw_if_index = vui->sw_if_index;
vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz;
vuid->features = vui->features;
@@ -3152,25 +3097,6 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
return rv;
}
-static u8 *
-format_vhost_user_operation_mode (u8 * s, va_list * va)
-{
- int operation_mode = va_arg (*va, int);
-
- switch (operation_mode)
- {
- case VHOST_USER_POLLING_MODE:
- s = format (s, "%s", "polling");
- break;
- case VHOST_USER_INTERRUPT_MODE:
- s = format (s, "%s", "interrupt");
- break;
- default:
- s = format (s, "%s", "invalid");
- }
- return s;
-}
-
clib_error_t *
show_vhost_user_command_fn (vlib_main_t * vm,
unformat_input_t * input,
@@ -3182,10 +3108,8 @@ show_vhost_user_command_fn (vlib_main_t * vm,
vhost_user_intf_t *vui;
u32 hw_if_index, *hw_if_indices = 0;
vnet_hw_interface_t *hi;
- vhost_cpu_t *vhc;
- vhost_iface_and_queue_t *vhiq;
+ u16 *queue;
u32 ci;
-
int i, j, q;
int show_descr = 0;
struct feat_struct
@@ -3238,6 +3162,8 @@ show_vhost_user_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "Virtio vhost-user interfaces");
vlib_cli_output (vm, "Global:\n coalesce frames %d time %e",
vum->coalesce_frames, vum->coalesce_time);
+ vlib_cli_output (vm, " number of rx virtqueues in interrupt mode: %d",
+ vum->ifq_count);
for (i = 0; i < vec_len (hw_if_indices); i++)
{
@@ -3279,23 +3205,21 @@ show_vhost_user_command_fn (vlib_main_t * vm,
(vui->unix_server_index != ~0) ? "server" : "client",
strerror (vui->sock_errno));
- vlib_cli_output (vm, " configured mode: %U\n",
- format_vhost_user_operation_mode, vui->operation_mode);
vlib_cli_output (vm, " rx placement: ");
- vec_foreach (vhc, vum->cpus)
+
+ vec_foreach (queue, vui->rx_queues)
{
- vec_foreach (vhiq, vhc->rx_queues)
- {
- if (vhiq->vhost_iface_index == vui - vum->vhost_user_interfaces)
- {
- vlib_cli_output (vm, " thread %d on vring %d\n",
- vhc - vum->cpus,
- VHOST_VRING_IDX_TX (vhiq->qid));
- vlib_cli_output (vm, " mode: %U\n",
- format_vhost_user_operation_mode,
- vhc->operation_mode);
- }
- }
+ vnet_main_t *vnm = vnet_get_main ();
+ uword thread_index;
+ vnet_hw_interface_rx_mode mode;
+
+ thread_index = vnet_get_device_input_thread_index (vnm,
+ vui->hw_if_index,
+ *queue);
+ vnet_hw_interface_get_rx_mode (vnm, vui->hw_if_index, *queue, &mode);
+ vlib_cli_output (vm, " thread %d on vring %d, %U\n",
+ thread_index, VHOST_VRING_IDX_TX (*queue),
+ format_vnet_hw_interface_rx_mode, mode);
}
vlib_cli_output (vm, " tx placement: %s\n",
@@ -3444,8 +3368,7 @@ done:
VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
.path = "create vhost-user",
.short_help = "create vhost-user socket <socket-filename> [server] "
- "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] "
- "[mode {interrupt | polling}]",
+ "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] ",
.function = vhost_user_connect_command_fn,
};
/* *INDENT-ON* */
@@ -3648,69 +3571,6 @@ vhost_user_unmap_all (void)
}
}
-static clib_error_t *
-vhost_thread_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- u32 worker_thread_index;
- u32 sw_if_index;
- u8 del = 0;
- int rv;
- clib_error_t *error = NULL;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- if (!unformat
- (line_input, "%U %d", unformat_vnet_sw_interface, vnet_get_main (),
- &sw_if_index, &worker_thread_index))
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- goto done;
- }
-
- if (unformat (line_input, "del"))
- del = 1;
-
- if ((rv =
- vhost_user_thread_placement (sw_if_index, worker_thread_index, del)))
- {
- error = clib_error_return (0, "vhost_user_thread_placement returned %d",
- rv);
- goto done;
- }
-
-done:
- unformat_free (line_input);
-
- return error;
-}
-
-
-/*?
- * This command is used to move the RX processing for the given
- * interfaces to the provided thread. If the '<em>del</em>' option is used,
- * the forced thread assignment is removed and the thread assigment is
- * reassigned automatically. Use '<em>show vhost-user <interface></em>'
- * to see the thread assignment.
- *
- * @cliexpar
- * Example of how to move the RX processing for a given interface to a given thread:
- * @cliexcmd{vhost thread VirtualEthernet0/0/0 1}
- * Example of how to remove the forced thread assignment for a given interface:
- * @cliexcmd{vhost thread VirtualEthernet0/0/0 1 del}
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (vhost_user_thread_command, static) = {
- .path = "vhost thread",
- .short_help = "vhost thread <iface> <worker-index> [del]",
- .function = vhost_thread_command_fn,
-};
-/* *INDENT-ON* */
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h
index 56b65477b19..ceced342c92 100644
--- a/src/vnet/devices/virtio/vhost-user.h
+++ b/src/vnet/devices/virtio/vhost-user.h
@@ -66,13 +66,11 @@ typedef enum
int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
const char *sock_filename, u8 is_server,
u32 * sw_if_index, u64 feature_mask,
- u8 renumber, u32 custom_dev_instance, u8 * hwaddr,
- u8 operation_mode);
+ u8 renumber, u32 custom_dev_instance, u8 * hwaddr);
int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
const char *sock_filename, u8 is_server,
u32 sw_if_index, u64 feature_mask,
- u8 renumber, u32 custom_dev_instance,
- u8 operation_mode);
+ u8 renumber, u32 custom_dev_instance);
int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm,
u32 sw_if_index);
@@ -210,14 +208,13 @@ typedef struct
u32 callfd_idx;
u32 kickfd_idx;
u64 log_guest_addr;
- u32 interrupt_thread_index;
-} vhost_user_vring_t;
-#define VHOST_USER_POLLING_MODE 0
-#define VHOST_USER_INTERRUPT_MODE 1
-#define VHOST_USER_ADAPTIVE_MODE 2
+ /* The rx queue policy (interrupt/adaptive/polling) for this queue */
+ u32 mode;
+} vhost_user_vring_t;
#define VHOST_USER_EVENT_START_TIMER 1
+#define VHOST_USER_EVENT_STOP_TIMER 2
typedef struct
{
@@ -258,20 +255,12 @@ typedef struct
u8 use_tx_spinlock;
u16 *per_cpu_tx_qid;
- /* Vector of workers for this interface */
- u32 *workers;
-
- u8 operation_mode;
+ /* Vector of active rx queues for this interface */
+ u16 *rx_queues;
} vhost_user_intf_t;
typedef struct
{
- u16 vhost_iface_index;
- u16 qid;
-} vhost_iface_and_queue_t;
-
-typedef struct
-{
uword dst;
uword src;
u32 len;
@@ -292,7 +281,6 @@ typedef struct
typedef struct
{
- vhost_iface_and_queue_t *rx_queues;
u32 rx_buffers_len;
u32 rx_buffers[VHOST_USER_RX_BUFFERS_N];
@@ -302,12 +290,6 @@ typedef struct
/* This is here so it doesn't end-up
* using stack or registers. */
vhost_trace_t *current_trace;
-
- /* bitmap of pending rx interfaces */
- uword *pending_input_bitmap;
-
- /* The operation mode computed per cpu based on interface setting */
- u8 operation_mode;
} vhost_cpu_t;
typedef struct
@@ -320,20 +302,14 @@ typedef struct
f64 coalesce_time;
int dont_dump_vhost_user_memory;
- /** first cpu index */
- u32 input_cpu_first_index;
-
- /** total cpu count */
- u32 input_cpu_count;
-
/** Per-CPU data for vhost-user */
vhost_cpu_t *cpus;
/** Pseudo random iterator */
u32 random;
- /* Node is in interrupt mode */
- u8 interrupt_mode;
+ /* The number of rx interface/queue pairs in interrupt mode */
+ u32 ifq_count;
} vhost_user_main_t;
typedef struct
@@ -346,7 +322,6 @@ typedef struct
u8 sock_filename[256];
u32 num_regions;
int sock_errno;
- u8 operation_mode;
} vhost_user_intf_details_t;
int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c
index ac7afa611b6..8dbd032b117 100644
--- a/src/vnet/devices/virtio/vhost_user_api.c
+++ b/src/vnet/devices/virtio/vhost_user_api.c
@@ -81,8 +81,7 @@ vl_api_create_vhost_user_if_t_handler (vl_api_create_vhost_user_if_t * mp)
rv = vhost_user_create_if (vnm, vm, (char *) mp->sock_filename,
mp->is_server, &sw_if_index, (u64) ~ 0,
mp->renumber, ntohl (mp->custom_dev_instance),
- (mp->use_custom_mac) ? mp->mac_address : NULL,
- mp->operation_mode);
+ (mp->use_custom_mac) ? mp->mac_address : NULL);
/* Remember an interface tag for the new interface */
if (rv == 0)
@@ -117,8 +116,7 @@ vl_api_modify_vhost_user_if_t_handler (vl_api_modify_vhost_user_if_t * mp)
rv = vhost_user_modify_if (vnm, vm, (char *) mp->sock_filename,
mp->is_server, sw_if_index, (u64) ~ 0,
- mp->renumber, ntohl (mp->custom_dev_instance),
- mp->operation_mode);
+ mp->renumber, ntohl (mp->custom_dev_instance));
REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_REPLY);
}
@@ -164,7 +162,6 @@ send_sw_interface_vhost_user_details (vpe_api_main_t * am,
mp->virtio_net_hdr_sz = ntohl (vui->virtio_net_hdr_sz);
mp->features = clib_net_to_host_u64 (vui->features);
mp->is_server = vui->is_server;
- mp->operation_mode = vui->operation_mode;
mp->num_regions = ntohl (vui->num_regions);
mp->sock_errno = ntohl (vui->sock_errno);
mp->context = context;