aboutsummaryrefslogtreecommitdiffstats
path: root/src/vnet
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet')
-rw-r--r--src/vnet/devices/devices.c20
-rw-r--r--src/vnet/devices/virtio/vhost-user.c748
-rw-r--r--src/vnet/devices/virtio/vhost-user.h45
-rw-r--r--src/vnet/devices/virtio/vhost_user_api.c7
4 files changed, 336 insertions, 484 deletions
diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c
index e71be602237..58c72077f24 100644
--- a/src/vnet/devices/devices.c
+++ b/src/vnet/devices/devices.c
@@ -150,6 +150,7 @@ vnet_hw_interface_assign_rx_thread (vnet_main_t * vnm, u32 hw_if_index,
dq->dev_instance = hw->dev_instance;
dq->queue_id = queue_id;
dq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING;
+ rt->enabled_node_state = VLIB_NODE_STATE_POLLING;
vnet_device_queue_update (vnm, rt);
vec_validate (hw->input_node_thread_index_by_queue, queue_id);
@@ -168,6 +169,7 @@ vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index,
vnet_device_input_runtime_t *rt;
vnet_device_and_queue_t *dq;
uword old_thread_index;
+ vnet_hw_interface_rx_mode mode;
if (hw->input_node_thread_index_by_queue == 0)
return VNET_API_ERROR_INVALID_INTERFACE;
@@ -184,6 +186,7 @@ vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index,
vec_foreach (dq, rt->devices_and_queues)
if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id)
{
+ mode = dq->mode;
vec_del1 (rt->devices_and_queues, dq - rt->devices_and_queues);
goto deleted;
}
@@ -197,6 +200,23 @@ deleted:
if (vec_len (rt->devices_and_queues) == 0)
vlib_node_set_state (vm, hw->input_node_index, VLIB_NODE_STATE_DISABLED);
+ else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ {
+ /*
+ * if the deleted interface is polling, we may need to set the node state
+ * to interrupt if there is no more polling interface for this device's
+ * corresponding thread. This is because mixed interfaces
+ * (polling and interrupt), assigned to the same thread, set the
+ * thread to polling prior to the deletion.
+ */
+ vec_foreach (dq, rt->devices_and_queues)
+ {
+ if (dq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ return 0;
+ }
+ rt->enabled_node_state = VLIB_NODE_STATE_INTERRUPT;
+ vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state);
+ }
return 0;
}
diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c
index 451ae4342b6..231889342e8 100644
--- a/src/vnet/devices/virtio/vhost-user.c
+++ b/src/vnet/devices/virtio/vhost-user.c
@@ -362,140 +362,71 @@ vhost_user_tx_thread_placement (vhost_user_intf_t * vui)
}
}
+/**
+ * @brief Unassign existing interface/queue to thread mappings and re-assign
+ * new interface/queue to thread mappings
+ */
static void
vhost_user_rx_thread_placement ()
{
vhost_user_main_t *vum = &vhost_user_main;
vhost_user_intf_t *vui;
- vhost_cpu_t *vhc;
- u32 *workers = 0;
- u32 thread_index;
- vlib_main_t *vm;
-
- //Let's list all workers cpu indexes
- u32 i;
- for (i = vum->input_cpu_first_index;
- i < vum->input_cpu_first_index + vum->input_cpu_count; i++)
- {
- vlib_node_set_state (vlib_mains[i], vhost_user_input_node.index,
- VLIB_NODE_STATE_DISABLED);
- vec_add1 (workers, i);
- }
-
- vec_foreach (vhc, vum->cpus)
- {
- vec_reset_length (vhc->rx_queues);
- }
+ vhost_user_vring_t *txvq;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 qid;
+ int rv;
+ u16 *queue;
- i = 0;
- vhost_iface_and_queue_t iaq;
+ // Scrap all existing mappings for all interfaces/queues
/* *INDENT-OFF* */
pool_foreach (vui, vum->vhost_user_interfaces, {
- u32 *vui_workers = vec_len (vui->workers) ? vui->workers : workers;
- u32 qid;
- for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++)
+ vec_foreach (queue, vui->rx_queues)
{
- vhost_user_vring_t *txvq =
- &vui->vrings[VHOST_VRING_IDX_TX (qid)];
- if (!txvq->started)
- continue;
-
- i %= vec_len (vui_workers);
- thread_index = vui_workers[i];
- i++;
- vhc = &vum->cpus[thread_index];
- txvq->interrupt_thread_index = thread_index;
-
- iaq.qid = qid;
- iaq.vhost_iface_index = vui - vum->vhost_user_interfaces;
- vec_add1 (vhc->rx_queues, iaq);
+ rv = vnet_hw_interface_unassign_rx_thread (vnm, vui->hw_if_index,
+ *queue);
+ if (rv)
+ clib_warning ("Warning: unable to unassign interface %d, "
+ "queue %d: rc=%d", vui->hw_if_index, *queue, rv);
}
+ vec_reset_length (vui->rx_queues);
});
/* *INDENT-ON* */
- vec_foreach (vhc, vum->cpus)
- {
- vhost_iface_and_queue_t *vhiq;
- u8 mode = VHOST_USER_INTERRUPT_MODE;
-
- vec_foreach (vhiq, vhc->rx_queues)
- {
- vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
- if (vui->operation_mode == VHOST_USER_POLLING_MODE)
+ // Create the rx_queues for all interfaces
+ /* *INDENT-OFF* */
+ pool_foreach (vui, vum->vhost_user_interfaces, {
+ for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++)
{
- /* At least one interface is polling, cpu is set to polling */
- mode = VHOST_USER_POLLING_MODE;
- break;
+ txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+ if (txvq->started)
+ {
+ if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_UNKNOWN)
+ /* Set polling as the default */
+ txvq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING;
+ vec_add1 (vui->rx_queues, qid);
+ }
}
- }
- vhc->operation_mode = mode;
- }
-
- for (thread_index = vum->input_cpu_first_index;
- thread_index < vum->input_cpu_first_index + vum->input_cpu_count;
- thread_index++)
- {
- vlib_node_state_t state = VLIB_NODE_STATE_POLLING;
+ });
+ /* *INDENT-ON* */
- vhc = &vum->cpus[thread_index];
- vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
- switch (vhc->operation_mode)
+ // Assign new mappings for all interfaces/queues
+ /* *INDENT-OFF* */
+ pool_foreach (vui, vum->vhost_user_interfaces, {
+ vnet_hw_interface_set_input_node (vnm, vui->hw_if_index,
+ vhost_user_input_node.index);
+ vec_foreach (queue, vui->rx_queues)
{
- case VHOST_USER_INTERRUPT_MODE:
- state = VLIB_NODE_STATE_INTERRUPT;
- break;
- case VHOST_USER_POLLING_MODE:
- state = VLIB_NODE_STATE_POLLING;
- break;
- default:
- clib_warning ("BUG: bad operation mode %d", vhc->operation_mode);
- break;
+ vnet_hw_interface_assign_rx_thread (vnm, vui->hw_if_index, *queue,
+ ~0);
+ txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
+ rv = vnet_hw_interface_set_rx_mode (vnm, vui->hw_if_index, *queue,
+ txvq->mode);
+ if (rv)
+ clib_warning ("Warning: unable to set rx mode for interface %d, "
+ "queue %d: rc=%d", vui->hw_if_index, *queue, rv);
}
- vlib_node_set_state (vm, vhost_user_input_node.index, state);
- }
-
- vec_free (workers);
-}
-
-static int
-vhost_user_thread_placement (u32 sw_if_index, u32 worker_thread_index, u8 del)
-{
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_user_intf_t *vui;
- vnet_hw_interface_t *hw;
-
- if (worker_thread_index < vum->input_cpu_first_index ||
- worker_thread_index >=
- vum->input_cpu_first_index + vum->input_cpu_count)
- return -1;
-
- if (!(hw = vnet_get_sup_hw_interface (vnet_get_main (), sw_if_index)))
- return -2;
-
- vui = pool_elt_at_index (vum->vhost_user_interfaces, hw->dev_instance);
- u32 found = ~0, *w;
- vec_foreach (w, vui->workers)
- {
- if (*w == worker_thread_index)
- {
- found = w - vui->workers;
- break;
- }
- }
-
- if (del)
- {
- if (found == ~0)
- return -3;
- vec_del1 (vui->workers, found);
- }
- else if (found == ~0)
- {
- vec_add1 (vui->workers, worker_thread_index);
- }
-
- vhost_user_rx_thread_placement ();
- return 0;
+ });
+ /* *INDENT-ON* */
}
/** @brief Returns whether at least one TX and one RX vring are enabled */
@@ -532,37 +463,17 @@ vhost_user_update_iface_state (vhost_user_intf_t * vui)
static void
vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq)
{
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_cpu_t *vhc;
- u32 thread_index;
- vlib_main_t *vm;
- u32 ifq2, qid;
- vhost_user_vring_t *txvq;
+ u32 qid;
+ vnet_main_t *vnm = vnet_get_main ();
qid = ifq & 0xff;
- if ((qid % 2) == 0)
- /* Only care about the odd number virtqueue which is TX */
+ if ((qid & 1) == 0)
+ /* Only care about the odd number, or TX, virtqueue */
return;
if (vhost_user_intf_ready (vui))
- {
- txvq = &vui->vrings[qid];
- thread_index = txvq->interrupt_thread_index;
- vhc = &vum->cpus[thread_index];
- if (vhc->operation_mode == VHOST_USER_INTERRUPT_MODE)
- {
- vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main;
- /*
- * Convert virtqueue number in the lower byte to vring
- * queue index for the input node process. Top bytes contain
- * the interface, lower byte contains the queue index.
- */
- ifq2 = ((ifq >> 8) << 8) | qid / 2;
- vhc->pending_input_bitmap =
- clib_bitmap_set (vhc->pending_input_bitmap, ifq2, 1);
- vlib_node_set_interrupt_pending (vm, vhost_user_input_node.index);
- }
- }
+ // qid >> 1 is to convert virtqueue number to vring queue index
+ vnet_device_input_set_interrupt_pending (vnm, vui->hw_if_index, qid >> 1);
}
static clib_error_t *
@@ -570,14 +481,10 @@ vhost_user_callfd_read_ready (unix_file_t * uf)
{
__attribute__ ((unused)) int n;
u8 buff[8];
- vhost_user_intf_t *vui =
- pool_elt_at_index (vhost_user_main.vhost_user_interfaces,
- uf->private_data >> 8);
n = read (uf->file_descriptor, ((char *) &buff), 8);
DBG_SOCK ("if %d CALL queue %d", uf->private_data >> 8,
uf->private_data & 0xff);
- vhost_user_set_interrupt_pending (vui, uf->private_data);
return 0;
}
@@ -1001,12 +908,8 @@ vhost_user_socket_read (unix_file_t * uf)
vui->vrings[msg.state.index].last_avail_idx =
vui->vrings[msg.state.index].used->idx;
- if (vui->operation_mode == VHOST_USER_POLLING_MODE)
- /* tell driver that we don't want interrupts */
- vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
- else
- /* tell driver that we want interrupts */
- vui->vrings[msg.state.index].used->flags = 0;
+ /* tell driver that we don't want interrupts */
+ vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
break;
case VHOST_USER_SET_OWNER:
@@ -1315,8 +1218,6 @@ vhost_user_init (vlib_main_t * vm)
clib_error_t *error;
vhost_user_main_t *vum = &vhost_user_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
- vlib_thread_registration_t *tr;
- uword *p;
error = vlib_call_init_function (vm, ip4_init);
if (error)
@@ -1335,18 +1236,6 @@ vhost_user_init (vlib_main_t * vm)
cpu->rx_buffers_len = 0;
}
- /* find out which cpus will be used for input */
- vum->input_cpu_first_index = 0;
- vum->input_cpu_count = 1;
- p = hash_get_mem (tm->thread_registrations_by_name, "workers");
- tr = p ? (vlib_thread_registration_t *) p[0] : 0;
-
- if (tr && tr->count > 0)
- {
- vum->input_cpu_first_index = tr->first_index;
- vum->input_cpu_count = tr->count;
- }
-
vum->random = random_default_seed ();
mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword));
@@ -1447,9 +1336,16 @@ vhost_user_send_call (vlib_main_t * vm, vhost_user_vring_t * vq)
vhost_user_main_t *vum = &vhost_user_main;
u64 x = 1;
int fd = UNIX_GET_FD (vq->callfd_idx);
- int rv __attribute__ ((unused));
- /* TODO: pay attention to rv */
+ int rv;
+
rv = write (fd, &x, sizeof (x));
+ if (rv <= 0)
+ {
+ clib_unix_warning
+ ("Error: Could not write to unix socket for callfd %d", fd);
+ return;
+ }
+
vq->n_since_last_int = 0;
vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time;
}
@@ -1564,7 +1460,8 @@ static u32
vhost_user_if_input (vlib_main_t * vm,
vhost_user_main_t * vum,
vhost_user_intf_t * vui,
- u16 qid, vlib_node_runtime_t * node)
+ u16 qid, vlib_node_runtime_t * node,
+ vnet_hw_interface_rx_mode mode)
{
vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
u16 n_rx_packets = 0;
@@ -1590,6 +1487,26 @@ vhost_user_if_input (vlib_main_t * vm,
vhost_user_send_call (vm, rxvq);
}
+ /*
+ * For adaptive mode, it is optimized to reduce interrupts.
+ * If the scheduler switches the input node to polling due
+ * to burst of traffic, we tell the driver no interrupt.
+ * When the traffic subsides, the scheduler switches the node back to
+ * interrupt mode. We must tell the driver we want interrupt.
+ */
+ if (PREDICT_FALSE (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
+ {
+ if ((node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) ||
+ !(node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))
+ /* Tell driver we want notification */
+ txvq->used->flags = 0;
+ else
+ /* Tell driver we don't want notification */
+ txvq->used->flags = VRING_USED_F_NO_NOTIFY;
+ }
+
if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE))
return 0;
@@ -1926,34 +1843,23 @@ vhost_user_input (vlib_main_t * vm,
{
vhost_user_main_t *vum = &vhost_user_main;
uword n_rx_packets = 0;
- u32 thread_index = vlib_get_thread_index ();
- vhost_iface_and_queue_t *vhiq;
vhost_user_intf_t *vui;
- vhost_cpu_t *vhc;
+ vnet_device_input_runtime_t *rt =
+ (vnet_device_input_runtime_t *) node->runtime_data;
+ vnet_device_and_queue_t *dq;
- vhc = &vum->cpus[thread_index];
- if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE))
- {
- vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
+ vec_foreach (dq, rt->devices_and_queues)
+ {
+ if (clib_smp_swap (&dq->interrupt_pending, 0) ||
+ (node->state == VLIB_NODE_STATE_POLLING))
{
- vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
- n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node);
+ vui =
+ pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance);
+ n_rx_packets = vhost_user_if_input (vm, vum, vui, dq->queue_id, node,
+ dq->mode);
}
- }
- else
- {
- int i;
-
- /* *INDENT-OFF* */
- clib_bitmap_foreach (i, vhc->pending_input_bitmap, ({
- int qid = i & 0xff;
+ }
- clib_bitmap_set (vhc->pending_input_bitmap, i, 0);
- vui = pool_elt_at_index (vum->vhost_user_interfaces, i >> 8);
- n_rx_packets += vhost_user_if_input (vm, vum, vui, qid, node);
- }));
- /* *INDENT-ON* */
- }
return n_rx_packets;
}
@@ -2371,6 +2277,161 @@ done3:
return frame->n_vectors;
}
+static uword
+vhost_user_send_interrupt_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ vhost_user_intf_t *vui;
+ f64 timeout = 3153600000.0 /* 100 years */ ;
+ uword event_type, *event_data = 0;
+ vhost_user_main_t *vum = &vhost_user_main;
+ u16 *queue;
+ f64 now, poll_time_remaining;
+ f64 next_timeout;
+ u8 stop_timer = 0;
+
+ while (1)
+ {
+ poll_time_remaining =
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ /*
+ * Use the remaining timeout if it is less than coalesce time to avoid
+ * resetting the existing timer in the middle of expiration
+ */
+ timeout = poll_time_remaining;
+ if (vlib_process_suspend_time_is_zero (timeout) ||
+ (timeout > vum->coalesce_time))
+ timeout = vum->coalesce_time;
+
+ now = vlib_time_now (vm);
+ switch (event_type)
+ {
+ case VHOST_USER_EVENT_STOP_TIMER:
+ stop_timer = 1;
+ break;
+
+ case VHOST_USER_EVENT_START_TIMER:
+ stop_timer = 0;
+ if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
+ break;
+ /* fall through */
+
+ case ~0:
+ /* *INDENT-OFF* */
+ pool_foreach (vui, vum->vhost_user_interfaces, {
+ next_timeout = timeout;
+ vec_foreach (queue, vui->rx_queues)
+ {
+ vhost_user_vring_t *rxvq =
+ &vui->vrings[VHOST_VRING_IDX_RX (*queue)];
+ vhost_user_vring_t *txvq =
+ &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
+
+ if (txvq->n_since_last_int)
+ {
+ if (now >= txvq->int_deadline)
+ vhost_user_send_call (vm, txvq);
+ else
+ next_timeout = txvq->int_deadline - now;
+ }
+
+ if (rxvq->n_since_last_int)
+ {
+ if (now >= rxvq->int_deadline)
+ vhost_user_send_call (vm, rxvq);
+ else
+ next_timeout = rxvq->int_deadline - now;
+ }
+
+ if ((next_timeout < timeout) && (next_timeout > 0.0))
+ timeout = next_timeout;
+ }
+ });
+ /* *INDENT-ON* */
+ break;
+
+ default:
+ clib_warning ("BUG: unhandled event type %d", event_type);
+ break;
+ }
+ /* No less than 1 millisecond */
+ if (timeout < 1e-3)
+ timeout = 1e-3;
+ if (stop_timer)
+ timeout = 3153600000.0;
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = {
+ .function = vhost_user_send_interrupt_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "vhost-user-send-interrupt-process",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
+ u32 qid, vnet_hw_interface_rx_mode mode)
+{
+ vlib_main_t *vm = vnm->vlib_main;
+ vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
+ vhost_user_main_t *vum = &vhost_user_main;
+ vhost_user_intf_t *vui =
+ pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
+ vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+
+ if ((mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
+ (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))
+ {
+ if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ {
+ vum->ifq_count++;
+ // Start the timer if this is the first encounter on interrupt
+ // interface/queue
+ if ((vum->ifq_count == 1) &&
+ (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
+ vlib_process_signal_event (vm,
+ vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_START_TIMER, 0);
+ }
+ }
+ else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ {
+ if (((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
+ (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) &&
+ vum->ifq_count)
+ {
+ vum->ifq_count--;
+ // Stop the timer if there is no more interrupt interface/queue
+ if ((vum->ifq_count == 0) &&
+ (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
+ vlib_process_signal_event (vm,
+ vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_STOP_TIMER, 0);
+ }
+ }
+
+ txvq->mode = mode;
+ if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING)
+ txvq->used->flags = VRING_USED_F_NO_NOTIFY;
+ else if ((mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE) ||
+ (mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT))
+ txvq->used->flags = 0;
+ else
+ {
+ clib_warning ("BUG: unhandled mode %d changed for if %d queue %d", mode,
+ hw_if_index, qid);
+ return clib_error_return (0, "unsupported");
+ }
+
+ return 0;
+}
+
static clib_error_t *
vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
u32 flags)
@@ -2399,6 +2460,7 @@ VNET_DEVICE_CLASS (vhost_user_dev_class,static) = {
.format_device_name = format_vhost_user_interface_name,
.name_renumber = vhost_user_name_renumber,
.admin_up_down_function = vhost_user_interface_admin_up_down,
+ .rx_mode_change_function = vhost_user_interface_rx_mode_change,
.format_tx_trace = format_vhost_trace,
};
@@ -2523,8 +2585,6 @@ vhost_user_term_if (vhost_user_intf_t * vui)
int q;
vhost_user_main_t *vum = &vhost_user_main;
- // Delete configured thread pinning
- vec_reset_length (vui->workers);
// disconnect interface sockets
vhost_user_if_disconnect (vui);
vhost_user_update_iface_state (vui);
@@ -2555,6 +2615,7 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index)
vhost_user_intf_t *vui;
int rv = 0;
vnet_hw_interface_t *hwif;
+ u16 *queue;
if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
hwif->dev_class_index != vhost_user_dev_class.index)
@@ -2565,6 +2626,28 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index)
vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance);
+ vec_foreach (queue, vui->rx_queues)
+ {
+ vhost_user_vring_t *txvq;
+
+ txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)];
+ if ((vum->ifq_count > 0) &&
+ ((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) ||
+ (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)))
+ {
+ vum->ifq_count--;
+ // Stop the timer if there is no more interrupt interface/queue
+ if ((vum->ifq_count == 0) &&
+ (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
+ {
+ vlib_process_signal_event (vm,
+ vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_STOP_TIMER, 0);
+ break;
+ }
+ }
+ }
+
// Disable and reset interface
vhost_user_term_if (vui);
@@ -2687,13 +2770,15 @@ vhost_user_vui_init (vnet_main_t * vnm,
vhost_user_intf_t * vui,
int server_sock_fd,
const char *sock_filename,
- u64 feature_mask, u32 * sw_if_index, u8 operation_mode)
+ u64 feature_mask, u32 * sw_if_index)
{
vnet_sw_interface_t *sw;
- sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
int q;
vhost_user_main_t *vum = &vhost_user_main;
+ vnet_hw_interface_t *hw;
+ hw = vnet_get_hw_interface (vnm, vui->hw_if_index);
+ sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
if (server_sock_fd != -1)
{
unix_file_t template = { 0 };
@@ -2715,7 +2800,6 @@ vhost_user_vui_init (vnet_main_t * vnm,
vui->feature_mask = feature_mask;
vui->unix_file_index = ~0;
vui->log_base_addr = 0;
- vui->operation_mode = operation_mode;
vui->if_index = vui - vum->vhost_user_interfaces;
mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename,
&vui->if_index, 0);
@@ -2723,6 +2807,7 @@ vhost_user_vui_init (vnet_main_t * vnm,
for (q = 0; q < VHOST_VRING_MAX_N; q++)
vhost_user_vring_init (vui, q);
+ hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE;
vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
if (sw_if_index)
@@ -2740,106 +2825,13 @@ vhost_user_vui_init (vnet_main_t * vnm,
vhost_user_tx_thread_placement (vui);
}
-static uword
-vhost_user_send_interrupt_process (vlib_main_t * vm,
- vlib_node_runtime_t * rt, vlib_frame_t * f)
-{
- vhost_user_intf_t *vui;
- f64 timeout = 3153600000.0 /* 100 years */ ;
- uword event_type, *event_data = 0;
- vhost_user_main_t *vum = &vhost_user_main;
- vhost_iface_and_queue_t *vhiq;
- vhost_cpu_t *vhc;
- f64 now, poll_time_remaining;
-
- while (1)
- {
- poll_time_remaining =
- vlib_process_wait_for_event_or_clock (vm, timeout);
- event_type = vlib_process_get_events (vm, &event_data);
- vec_reset_length (event_data);
-
- /*
- * Use the remaining timeout if it is less than coalesce time to avoid
- * resetting the existing timer in the middle of expiration
- */
- timeout = poll_time_remaining;
- if (vlib_process_suspend_time_is_zero (timeout) ||
- (timeout > vum->coalesce_time))
- timeout = vum->coalesce_time;
-
- now = vlib_time_now (vm);
- switch (event_type)
- {
- case VHOST_USER_EVENT_START_TIMER:
- if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
- break;
- /* fall through */
-
- case ~0:
- vec_foreach (vhc, vum->cpus)
- {
- u32 thread_index = vhc - vum->cpus;
- f64 next_timeout;
-
- next_timeout = timeout;
- vec_foreach (vhiq, vum->cpus[thread_index].rx_queues)
- {
- vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index];
- vhost_user_vring_t *rxvq =
- &vui->vrings[VHOST_VRING_IDX_RX (vhiq->qid)];
- vhost_user_vring_t *txvq =
- &vui->vrings[VHOST_VRING_IDX_TX (vhiq->qid)];
-
- if (txvq->n_since_last_int)
- {
- if (now >= txvq->int_deadline)
- vhost_user_send_call (vm, txvq);
- else
- next_timeout = txvq->int_deadline - now;
- }
-
- if (rxvq->n_since_last_int)
- {
- if (now >= rxvq->int_deadline)
- vhost_user_send_call (vm, rxvq);
- else
- next_timeout = rxvq->int_deadline - now;
- }
-
- if ((next_timeout < timeout) && (next_timeout > 0.0))
- timeout = next_timeout;
- }
- }
- break;
-
- default:
- clib_warning ("BUG: unhandled event type %d", event_type);
- break;
- }
- /* No less than 1 millisecond */
- if (timeout < 1e-3)
- timeout = 1e-3;
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = {
- .function = vhost_user_send_interrupt_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "vhost-user-send-interrupt-process",
-};
-/* *INDENT-ON* */
-
int
vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
const char *sock_filename,
u8 is_server,
u32 * sw_if_index,
u64 feature_mask,
- u8 renumber, u32 custom_dev_instance, u8 * hwaddr,
- u8 operation_mode)
+ u8 renumber, u32 custom_dev_instance, u8 * hwaddr)
{
vhost_user_intf_t *vui = NULL;
u32 sw_if_idx = ~0;
@@ -2848,10 +2840,6 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
vhost_user_main_t *vum = &vhost_user_main;
uword *if_index;
- if ((operation_mode != VHOST_USER_POLLING_MODE) &&
- (operation_mode != VHOST_USER_INTERRUPT_MODE))
- return VNET_API_ERROR_UNIMPLEMENTED;
-
if (sock_filename == NULL || !(strlen (sock_filename) > 0))
{
return VNET_API_ERROR_INVALID_ARGUMENT;
@@ -2881,7 +2869,7 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
vhost_user_create_ethernet (vnm, vm, vui, hwaddr);
vhost_user_vui_init (vnm, vui, server_sock_fd, sock_filename,
- feature_mask, &sw_if_idx, operation_mode);
+ feature_mask, &sw_if_idx);
if (renumber)
vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
@@ -2892,14 +2880,6 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
// Process node must connect
vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
- if ((operation_mode == VHOST_USER_INTERRUPT_MODE) &&
- !vum->interrupt_mode && (vum->coalesce_time > 0.0) &&
- (vum->coalesce_frames > 0))
- {
- vum->interrupt_mode = 1;
- vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index,
- VHOST_USER_EVENT_START_TIMER, 0);
- }
return rv;
}
@@ -2908,8 +2888,7 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
const char *sock_filename,
u8 is_server,
u32 sw_if_index,
- u64 feature_mask, u8 renumber, u32 custom_dev_instance,
- u8 operation_mode)
+ u64 feature_mask, u8 renumber, u32 custom_dev_instance)
{
vhost_user_main_t *vum = &vhost_user_main;
vhost_user_intf_t *vui = NULL;
@@ -2919,9 +2898,6 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
vnet_hw_interface_t *hwif;
uword *if_index;
- if ((operation_mode != VHOST_USER_POLLING_MODE) &&
- (operation_mode != VHOST_USER_INTERRUPT_MODE))
- return VNET_API_ERROR_UNIMPLEMENTED;
if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) ||
hwif->dev_class_index != vhost_user_dev_class.index)
return VNET_API_ERROR_INVALID_SW_IF_INDEX;
@@ -2947,8 +2923,7 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
vhost_user_term_if (vui);
vhost_user_vui_init (vnm, vui, server_sock_fd,
- sock_filename, feature_mask, &sw_if_idx,
- operation_mode);
+ sock_filename, feature_mask, &sw_if_idx);
if (renumber)
vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
@@ -2956,33 +2931,9 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
// Process node must connect
vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0);
- if ((operation_mode == VHOST_USER_INTERRUPT_MODE) &&
- !vum->interrupt_mode && (vum->coalesce_time > 0.0) &&
- (vum->coalesce_frames > 0))
- {
- vum->interrupt_mode = 1;
- vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index,
- VHOST_USER_EVENT_START_TIMER, 0);
- }
return rv;
}
-static uword
-unformat_vhost_user_operation_mode (unformat_input_t * input, va_list * args)
-{
- u8 *operation_mode = va_arg (*args, u8 *);
- uword rc = 1;
-
- if (unformat (input, "interrupt"))
- *operation_mode = VHOST_USER_INTERRUPT_MODE;
- else if (unformat (input, "polling"))
- *operation_mode = VHOST_USER_POLLING_MODE;
- else
- rc = 0;
-
- return rc;
-}
-
clib_error_t *
vhost_user_connect_command_fn (vlib_main_t * vm,
unformat_input_t * input,
@@ -2998,7 +2949,6 @@ vhost_user_connect_command_fn (vlib_main_t * vm,
u8 hwaddr[6];
u8 *hw = NULL;
clib_error_t *error = NULL;
- u8 operation_mode = VHOST_USER_POLLING_MODE;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -3020,9 +2970,6 @@ vhost_user_connect_command_fn (vlib_main_t * vm,
{
renumber = 1;
}
- else if (unformat (line_input, "mode %U",
- unformat_vhost_user_operation_mode, &operation_mode))
- ;
else
{
error = clib_error_return (0, "unknown input `%U'",
@@ -3036,8 +2983,7 @@ vhost_user_connect_command_fn (vlib_main_t * vm,
int rv;
if ((rv = vhost_user_create_if (vnm, vm, (char *) sock_filename,
is_server, &sw_if_index, feature_mask,
- renumber, custom_dev_instance, hw,
- operation_mode)))
+ renumber, custom_dev_instance, hw)))
{
error = clib_error_return (0, "vhost_user_create_if returned %d", rv);
goto done;
@@ -3127,7 +3073,6 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance);
vec_add2 (r_vuids, vuid, 1);
- vuid->operation_mode = vui->operation_mode;
vuid->sw_if_index = vui->sw_if_index;
vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz;
vuid->features = vui->features;
@@ -3152,25 +3097,6 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
return rv;
}
-static u8 *
-format_vhost_user_operation_mode (u8 * s, va_list * va)
-{
- int operation_mode = va_arg (*va, int);
-
- switch (operation_mode)
- {
- case VHOST_USER_POLLING_MODE:
- s = format (s, "%s", "polling");
- break;
- case VHOST_USER_INTERRUPT_MODE:
- s = format (s, "%s", "interrupt");
- break;
- default:
- s = format (s, "%s", "invalid");
- }
- return s;
-}
-
clib_error_t *
show_vhost_user_command_fn (vlib_main_t * vm,
unformat_input_t * input,
@@ -3182,10 +3108,8 @@ show_vhost_user_command_fn (vlib_main_t * vm,
vhost_user_intf_t *vui;
u32 hw_if_index, *hw_if_indices = 0;
vnet_hw_interface_t *hi;
- vhost_cpu_t *vhc;
- vhost_iface_and_queue_t *vhiq;
+ u16 *queue;
u32 ci;
-
int i, j, q;
int show_descr = 0;
struct feat_struct
@@ -3238,6 +3162,8 @@ show_vhost_user_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "Virtio vhost-user interfaces");
vlib_cli_output (vm, "Global:\n coalesce frames %d time %e",
vum->coalesce_frames, vum->coalesce_time);
+ vlib_cli_output (vm, " number of rx virtqueues in interrupt mode: %d",
+ vum->ifq_count);
for (i = 0; i < vec_len (hw_if_indices); i++)
{
@@ -3279,23 +3205,21 @@ show_vhost_user_command_fn (vlib_main_t * vm,
(vui->unix_server_index != ~0) ? "server" : "client",
strerror (vui->sock_errno));
- vlib_cli_output (vm, " configured mode: %U\n",
- format_vhost_user_operation_mode, vui->operation_mode);
vlib_cli_output (vm, " rx placement: ");
- vec_foreach (vhc, vum->cpus)
+
+ vec_foreach (queue, vui->rx_queues)
{
- vec_foreach (vhiq, vhc->rx_queues)
- {
- if (vhiq->vhost_iface_index == vui - vum->vhost_user_interfaces)
- {
- vlib_cli_output (vm, " thread %d on vring %d\n",
- vhc - vum->cpus,
- VHOST_VRING_IDX_TX (vhiq->qid));
- vlib_cli_output (vm, " mode: %U\n",
- format_vhost_user_operation_mode,
- vhc->operation_mode);
- }
- }
+ vnet_main_t *vnm = vnet_get_main ();
+ uword thread_index;
+ vnet_hw_interface_rx_mode mode;
+
+ thread_index = vnet_get_device_input_thread_index (vnm,
+ vui->hw_if_index,
+ *queue);
+ vnet_hw_interface_get_rx_mode (vnm, vui->hw_if_index, *queue, &mode);
+ vlib_cli_output (vm, " thread %d on vring %d, %U\n",
+ thread_index, VHOST_VRING_IDX_TX (*queue),
+ format_vnet_hw_interface_rx_mode, mode);
}
vlib_cli_output (vm, " tx placement: %s\n",
@@ -3444,8 +3368,7 @@ done:
VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
.path = "create vhost-user",
.short_help = "create vhost-user socket <socket-filename> [server] "
- "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] "
- "[mode {interrupt | polling}]",
+ "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] ",
.function = vhost_user_connect_command_fn,
};
/* *INDENT-ON* */
@@ -3648,69 +3571,6 @@ vhost_user_unmap_all (void)
}
}
-static clib_error_t *
-vhost_thread_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- u32 worker_thread_index;
- u32 sw_if_index;
- u8 del = 0;
- int rv;
- clib_error_t *error = NULL;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- if (!unformat
- (line_input, "%U %d", unformat_vnet_sw_interface, vnet_get_main (),
- &sw_if_index, &worker_thread_index))
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- goto done;
- }
-
- if (unformat (line_input, "del"))
- del = 1;
-
- if ((rv =
- vhost_user_thread_placement (sw_if_index, worker_thread_index, del)))
- {
- error = clib_error_return (0, "vhost_user_thread_placement returned %d",
- rv);
- goto done;
- }
-
-done:
- unformat_free (line_input);
-
- return error;
-}
-
-
-/*?
- * This command is used to move the RX processing for the given
- * interfaces to the provided thread. If the '<em>del</em>' option is used,
- * the forced thread assignment is removed and the thread assigment is
- * reassigned automatically. Use '<em>show vhost-user <interface></em>'
- * to see the thread assignment.
- *
- * @cliexpar
- * Example of how to move the RX processing for a given interface to a given thread:
- * @cliexcmd{vhost thread VirtualEthernet0/0/0 1}
- * Example of how to remove the forced thread assignment for a given interface:
- * @cliexcmd{vhost thread VirtualEthernet0/0/0 1 del}
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (vhost_user_thread_command, static) = {
- .path = "vhost thread",
- .short_help = "vhost thread <iface> <worker-index> [del]",
- .function = vhost_thread_command_fn,
-};
-/* *INDENT-ON* */
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h
index 56b65477b19..ceced342c92 100644
--- a/src/vnet/devices/virtio/vhost-user.h
+++ b/src/vnet/devices/virtio/vhost-user.h
@@ -66,13 +66,11 @@ typedef enum
int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
const char *sock_filename, u8 is_server,
u32 * sw_if_index, u64 feature_mask,
- u8 renumber, u32 custom_dev_instance, u8 * hwaddr,
- u8 operation_mode);
+ u8 renumber, u32 custom_dev_instance, u8 * hwaddr);
int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
const char *sock_filename, u8 is_server,
u32 sw_if_index, u64 feature_mask,
- u8 renumber, u32 custom_dev_instance,
- u8 operation_mode);
+ u8 renumber, u32 custom_dev_instance);
int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm,
u32 sw_if_index);
@@ -210,14 +208,13 @@ typedef struct
u32 callfd_idx;
u32 kickfd_idx;
u64 log_guest_addr;
- u32 interrupt_thread_index;
-} vhost_user_vring_t;
-#define VHOST_USER_POLLING_MODE 0
-#define VHOST_USER_INTERRUPT_MODE 1
-#define VHOST_USER_ADAPTIVE_MODE 2
+ /* The rx queue policy (interrupt/adaptive/polling) for this queue */
+ u32 mode;
+} vhost_user_vring_t;
#define VHOST_USER_EVENT_START_TIMER 1
+#define VHOST_USER_EVENT_STOP_TIMER 2
typedef struct
{
@@ -258,20 +255,12 @@ typedef struct
u8 use_tx_spinlock;
u16 *per_cpu_tx_qid;
- /* Vector of workers for this interface */
- u32 *workers;
-
- u8 operation_mode;
+ /* Vector of active rx queues for this interface */
+ u16 *rx_queues;
} vhost_user_intf_t;
typedef struct
{
- u16 vhost_iface_index;
- u16 qid;
-} vhost_iface_and_queue_t;
-
-typedef struct
-{
uword dst;
uword src;
u32 len;
@@ -292,7 +281,6 @@ typedef struct
typedef struct
{
- vhost_iface_and_queue_t *rx_queues;
u32 rx_buffers_len;
u32 rx_buffers[VHOST_USER_RX_BUFFERS_N];
@@ -302,12 +290,6 @@ typedef struct
/* This is here so it doesn't end-up
* using stack or registers. */
vhost_trace_t *current_trace;
-
- /* bitmap of pending rx interfaces */
- uword *pending_input_bitmap;
-
- /* The operation mode computed per cpu based on interface setting */
- u8 operation_mode;
} vhost_cpu_t;
typedef struct
@@ -320,20 +302,14 @@ typedef struct
f64 coalesce_time;
int dont_dump_vhost_user_memory;
- /** first cpu index */
- u32 input_cpu_first_index;
-
- /** total cpu count */
- u32 input_cpu_count;
-
/** Per-CPU data for vhost-user */
vhost_cpu_t *cpus;
/** Pseudo random iterator */
u32 random;
- /* Node is in interrupt mode */
- u8 interrupt_mode;
+ /* The number of rx interface/queue pairs in interrupt mode */
+ u32 ifq_count;
} vhost_user_main_t;
typedef struct
@@ -346,7 +322,6 @@ typedef struct
u8 sock_filename[256];
u32 num_regions;
int sock_errno;
- u8 operation_mode;
} vhost_user_intf_details_t;
int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c
index ac7afa611b6..8dbd032b117 100644
--- a/src/vnet/devices/virtio/vhost_user_api.c
+++ b/src/vnet/devices/virtio/vhost_user_api.c
@@ -81,8 +81,7 @@ vl_api_create_vhost_user_if_t_handler (vl_api_create_vhost_user_if_t * mp)
rv = vhost_user_create_if (vnm, vm, (char *) mp->sock_filename,
mp->is_server, &sw_if_index, (u64) ~ 0,
mp->renumber, ntohl (mp->custom_dev_instance),
- (mp->use_custom_mac) ? mp->mac_address : NULL,
- mp->operation_mode);
+ (mp->use_custom_mac) ? mp->mac_address : NULL);
/* Remember an interface tag for the new interface */
if (rv == 0)
@@ -117,8 +116,7 @@ vl_api_modify_vhost_user_if_t_handler (vl_api_modify_vhost_user_if_t * mp)
rv = vhost_user_modify_if (vnm, vm, (char *) mp->sock_filename,
mp->is_server, sw_if_index, (u64) ~ 0,
- mp->renumber, ntohl (mp->custom_dev_instance),
- mp->operation_mode);
+ mp->renumber, ntohl (mp->custom_dev_instance));
REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_REPLY);
}
@@ -164,7 +162,6 @@ send_sw_interface_vhost_user_details (vpe_api_main_t * am,
mp->virtio_net_hdr_sz = ntohl (vui->virtio_net_hdr_sz);
mp->features = clib_net_to_host_u64 (vui->features);
mp->is_server = vui->is_server;
- mp->operation_mode = vui->operation_mode;
mp->num_regions = ntohl (vui->num_regions);
mp->sock_errno = ntohl (vui->sock_errno);
mp->context = context;