diff options
Diffstat (limited to 'src/vnet/devices')
-rw-r--r-- | src/vnet/devices/devices.c | 20 | ||||
-rw-r--r-- | src/vnet/devices/virtio/vhost-user.c | 748 | ||||
-rw-r--r-- | src/vnet/devices/virtio/vhost-user.h | 45 | ||||
-rw-r--r-- | src/vnet/devices/virtio/vhost_user_api.c | 7 |
4 files changed, 336 insertions, 484 deletions
diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c index e71be602237..58c72077f24 100644 --- a/src/vnet/devices/devices.c +++ b/src/vnet/devices/devices.c @@ -150,6 +150,7 @@ vnet_hw_interface_assign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, dq->dev_instance = hw->dev_instance; dq->queue_id = queue_id; dq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING; + rt->enabled_node_state = VLIB_NODE_STATE_POLLING; vnet_device_queue_update (vnm, rt); vec_validate (hw->input_node_thread_index_by_queue, queue_id); @@ -168,6 +169,7 @@ vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, vnet_device_input_runtime_t *rt; vnet_device_and_queue_t *dq; uword old_thread_index; + vnet_hw_interface_rx_mode mode; if (hw->input_node_thread_index_by_queue == 0) return VNET_API_ERROR_INVALID_INTERFACE; @@ -184,6 +186,7 @@ vnet_hw_interface_unassign_rx_thread (vnet_main_t * vnm, u32 hw_if_index, vec_foreach (dq, rt->devices_and_queues) if (dq->hw_if_index == hw_if_index && dq->queue_id == queue_id) { + mode = dq->mode; vec_del1 (rt->devices_and_queues, dq - rt->devices_and_queues); goto deleted; } @@ -197,6 +200,23 @@ deleted: if (vec_len (rt->devices_and_queues) == 0) vlib_node_set_state (vm, hw->input_node_index, VLIB_NODE_STATE_DISABLED); + else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + { + /* + * if the deleted interface is polling, we may need to set the node state + * to interrupt if there is no more polling interface for this device's + * corresponding thread. This is because mixed interfaces + * (polling and interrupt), assigned to the same thread, set the + * thread to polling prior to the deletion. + */ + vec_foreach (dq, rt->devices_and_queues) + { + if (dq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + return 0; + } + rt->enabled_node_state = VLIB_NODE_STATE_INTERRUPT; + vlib_node_set_state (vm, hw->input_node_index, rt->enabled_node_state); + } return 0; } diff --git a/src/vnet/devices/virtio/vhost-user.c b/src/vnet/devices/virtio/vhost-user.c index 451ae4342b6..231889342e8 100644 --- a/src/vnet/devices/virtio/vhost-user.c +++ b/src/vnet/devices/virtio/vhost-user.c @@ -362,140 +362,71 @@ vhost_user_tx_thread_placement (vhost_user_intf_t * vui) } } +/** + * @brief Unassign existing interface/queue to thread mappings and re-assign + * new interface/queue to thread mappings + */ static void vhost_user_rx_thread_placement () { vhost_user_main_t *vum = &vhost_user_main; vhost_user_intf_t *vui; - vhost_cpu_t *vhc; - u32 *workers = 0; - u32 thread_index; - vlib_main_t *vm; - - //Let's list all workers cpu indexes - u32 i; - for (i = vum->input_cpu_first_index; - i < vum->input_cpu_first_index + vum->input_cpu_count; i++) - { - vlib_node_set_state (vlib_mains[i], vhost_user_input_node.index, - VLIB_NODE_STATE_DISABLED); - vec_add1 (workers, i); - } - - vec_foreach (vhc, vum->cpus) - { - vec_reset_length (vhc->rx_queues); - } + vhost_user_vring_t *txvq; + vnet_main_t *vnm = vnet_get_main (); + u32 qid; + int rv; + u16 *queue; - i = 0; - vhost_iface_and_queue_t iaq; + // Scrap all existing mappings for all interfaces/queues /* *INDENT-OFF* */ pool_foreach (vui, vum->vhost_user_interfaces, { - u32 *vui_workers = vec_len (vui->workers) ? vui->workers : workers; - u32 qid; - for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++) + vec_foreach (queue, vui->rx_queues) { - vhost_user_vring_t *txvq = - &vui->vrings[VHOST_VRING_IDX_TX (qid)]; - if (!txvq->started) - continue; - - i %= vec_len (vui_workers); - thread_index = vui_workers[i]; - i++; - vhc = &vum->cpus[thread_index]; - txvq->interrupt_thread_index = thread_index; - - iaq.qid = qid; - iaq.vhost_iface_index = vui - vum->vhost_user_interfaces; - vec_add1 (vhc->rx_queues, iaq); + rv = vnet_hw_interface_unassign_rx_thread (vnm, vui->hw_if_index, + *queue); + if (rv) + clib_warning ("Warning: unable to unassign interface %d, " + "queue %d: rc=%d", vui->hw_if_index, *queue, rv); } + vec_reset_length (vui->rx_queues); }); /* *INDENT-ON* */ - vec_foreach (vhc, vum->cpus) - { - vhost_iface_and_queue_t *vhiq; - u8 mode = VHOST_USER_INTERRUPT_MODE; - - vec_foreach (vhiq, vhc->rx_queues) - { - vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; - if (vui->operation_mode == VHOST_USER_POLLING_MODE) + // Create the rx_queues for all interfaces + /* *INDENT-OFF* */ + pool_foreach (vui, vum->vhost_user_interfaces, { + for (qid = 0; qid < VHOST_VRING_MAX_N / 2; qid++) { - /* At least one interface is polling, cpu is set to polling */ - mode = VHOST_USER_POLLING_MODE; - break; + txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; + if (txvq->started) + { + if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_UNKNOWN) + /* Set polling as the default */ + txvq->mode = VNET_HW_INTERFACE_RX_MODE_POLLING; + vec_add1 (vui->rx_queues, qid); + } } - } - vhc->operation_mode = mode; - } - - for (thread_index = vum->input_cpu_first_index; - thread_index < vum->input_cpu_first_index + vum->input_cpu_count; - thread_index++) - { - vlib_node_state_t state = VLIB_NODE_STATE_POLLING; + }); + /* *INDENT-ON* */ - vhc = &vum->cpus[thread_index]; - vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main; - switch (vhc->operation_mode) + // Assign new mappings for all interfaces/queues + /* *INDENT-OFF* */ + pool_foreach (vui, vum->vhost_user_interfaces, { + vnet_hw_interface_set_input_node (vnm, vui->hw_if_index, + vhost_user_input_node.index); + vec_foreach (queue, vui->rx_queues) { - case VHOST_USER_INTERRUPT_MODE: - state = VLIB_NODE_STATE_INTERRUPT; - break; - case VHOST_USER_POLLING_MODE: - state = VLIB_NODE_STATE_POLLING; - break; - default: - clib_warning ("BUG: bad operation mode %d", vhc->operation_mode); - break; + vnet_hw_interface_assign_rx_thread (vnm, vui->hw_if_index, *queue, + ~0); + txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)]; + rv = vnet_hw_interface_set_rx_mode (vnm, vui->hw_if_index, *queue, + txvq->mode); + if (rv) + clib_warning ("Warning: unable to set rx mode for interface %d, " + "queue %d: rc=%d", vui->hw_if_index, *queue, rv); } - vlib_node_set_state (vm, vhost_user_input_node.index, state); - } - - vec_free (workers); -} - -static int -vhost_user_thread_placement (u32 sw_if_index, u32 worker_thread_index, u8 del) -{ - vhost_user_main_t *vum = &vhost_user_main; - vhost_user_intf_t *vui; - vnet_hw_interface_t *hw; - - if (worker_thread_index < vum->input_cpu_first_index || - worker_thread_index >= - vum->input_cpu_first_index + vum->input_cpu_count) - return -1; - - if (!(hw = vnet_get_sup_hw_interface (vnet_get_main (), sw_if_index))) - return -2; - - vui = pool_elt_at_index (vum->vhost_user_interfaces, hw->dev_instance); - u32 found = ~0, *w; - vec_foreach (w, vui->workers) - { - if (*w == worker_thread_index) - { - found = w - vui->workers; - break; - } - } - - if (del) - { - if (found == ~0) - return -3; - vec_del1 (vui->workers, found); - } - else if (found == ~0) - { - vec_add1 (vui->workers, worker_thread_index); - } - - vhost_user_rx_thread_placement (); - return 0; + }); + /* *INDENT-ON* */ } /** @brief Returns whether at least one TX and one RX vring are enabled */ @@ -532,37 +463,17 @@ vhost_user_update_iface_state (vhost_user_intf_t * vui) static void vhost_user_set_interrupt_pending (vhost_user_intf_t * vui, u32 ifq) { - vhost_user_main_t *vum = &vhost_user_main; - vhost_cpu_t *vhc; - u32 thread_index; - vlib_main_t *vm; - u32 ifq2, qid; - vhost_user_vring_t *txvq; + u32 qid; + vnet_main_t *vnm = vnet_get_main (); qid = ifq & 0xff; - if ((qid % 2) == 0) - /* Only care about the odd number virtqueue which is TX */ + if ((qid & 1) == 0) + /* Only care about the odd number, or TX, virtqueue */ return; if (vhost_user_intf_ready (vui)) - { - txvq = &vui->vrings[qid]; - thread_index = txvq->interrupt_thread_index; - vhc = &vum->cpus[thread_index]; - if (vhc->operation_mode == VHOST_USER_INTERRUPT_MODE) - { - vm = vlib_mains ? vlib_mains[thread_index] : &vlib_global_main; - /* - * Convert virtqueue number in the lower byte to vring - * queue index for the input node process. Top bytes contain - * the interface, lower byte contains the queue index. - */ - ifq2 = ((ifq >> 8) << 8) | qid / 2; - vhc->pending_input_bitmap = - clib_bitmap_set (vhc->pending_input_bitmap, ifq2, 1); - vlib_node_set_interrupt_pending (vm, vhost_user_input_node.index); - } - } + // qid >> 1 is to convert virtqueue number to vring queue index + vnet_device_input_set_interrupt_pending (vnm, vui->hw_if_index, qid >> 1); } static clib_error_t * @@ -570,14 +481,10 @@ vhost_user_callfd_read_ready (unix_file_t * uf) { __attribute__ ((unused)) int n; u8 buff[8]; - vhost_user_intf_t *vui = - pool_elt_at_index (vhost_user_main.vhost_user_interfaces, - uf->private_data >> 8); n = read (uf->file_descriptor, ((char *) &buff), 8); DBG_SOCK ("if %d CALL queue %d", uf->private_data >> 8, uf->private_data & 0xff); - vhost_user_set_interrupt_pending (vui, uf->private_data); return 0; } @@ -1001,12 +908,8 @@ vhost_user_socket_read (unix_file_t * uf) vui->vrings[msg.state.index].last_avail_idx = vui->vrings[msg.state.index].used->idx; - if (vui->operation_mode == VHOST_USER_POLLING_MODE) - /* tell driver that we don't want interrupts */ - vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY; - else - /* tell driver that we want interrupts */ - vui->vrings[msg.state.index].used->flags = 0; + /* tell driver that we don't want interrupts */ + vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY; break; case VHOST_USER_SET_OWNER: @@ -1315,8 +1218,6 @@ vhost_user_init (vlib_main_t * vm) clib_error_t *error; vhost_user_main_t *vum = &vhost_user_main; vlib_thread_main_t *tm = vlib_get_thread_main (); - vlib_thread_registration_t *tr; - uword *p; error = vlib_call_init_function (vm, ip4_init); if (error) @@ -1335,18 +1236,6 @@ vhost_user_init (vlib_main_t * vm) cpu->rx_buffers_len = 0; } - /* find out which cpus will be used for input */ - vum->input_cpu_first_index = 0; - vum->input_cpu_count = 1; - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - tr = p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - vum->input_cpu_first_index = tr->first_index; - vum->input_cpu_count = tr->count; - } - vum->random = random_default_seed (); mhash_init_c_string (&vum->if_index_by_sock_name, sizeof (uword)); @@ -1447,9 +1336,16 @@ vhost_user_send_call (vlib_main_t * vm, vhost_user_vring_t * vq) vhost_user_main_t *vum = &vhost_user_main; u64 x = 1; int fd = UNIX_GET_FD (vq->callfd_idx); - int rv __attribute__ ((unused)); - /* TODO: pay attention to rv */ + int rv; + rv = write (fd, &x, sizeof (x)); + if (rv <= 0) + { + clib_unix_warning + ("Error: Could not write to unix socket for callfd %d", fd); + return; + } + vq->n_since_last_int = 0; vq->int_deadline = vlib_time_now (vm) + vum->coalesce_time; } @@ -1564,7 +1460,8 @@ static u32 vhost_user_if_input (vlib_main_t * vm, vhost_user_main_t * vum, vhost_user_intf_t * vui, - u16 qid, vlib_node_runtime_t * node) + u16 qid, vlib_node_runtime_t * node, + vnet_hw_interface_rx_mode mode) { vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; u16 n_rx_packets = 0; @@ -1590,6 +1487,26 @@ vhost_user_if_input (vlib_main_t * vm, vhost_user_send_call (vm, rxvq); } + /* + * For adaptive mode, it is optimized to reduce interrupts. + * If the scheduler switches the input node to polling due + * to burst of traffic, we tell the driver no interrupt. + * When the traffic subsides, the scheduler switches the node back to + * interrupt mode. We must tell the driver we want interrupt. + */ + if (PREDICT_FALSE (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) + { + if ((node->flags & + VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) || + !(node->flags & + VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)) + /* Tell driver we want notification */ + txvq->used->flags = 0; + else + /* Tell driver we don't want notification */ + txvq->used->flags = VRING_USED_F_NO_NOTIFY; + } + if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE)) return 0; @@ -1926,34 +1843,23 @@ vhost_user_input (vlib_main_t * vm, { vhost_user_main_t *vum = &vhost_user_main; uword n_rx_packets = 0; - u32 thread_index = vlib_get_thread_index (); - vhost_iface_and_queue_t *vhiq; vhost_user_intf_t *vui; - vhost_cpu_t *vhc; + vnet_device_input_runtime_t *rt = + (vnet_device_input_runtime_t *) node->runtime_data; + vnet_device_and_queue_t *dq; - vhc = &vum->cpus[thread_index]; - if (PREDICT_TRUE (vhc->operation_mode == VHOST_USER_POLLING_MODE)) - { - vec_foreach (vhiq, vum->cpus[thread_index].rx_queues) + vec_foreach (dq, rt->devices_and_queues) + { + if (clib_smp_swap (&dq->interrupt_pending, 0) || + (node->state == VLIB_NODE_STATE_POLLING)) { - vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; - n_rx_packets += vhost_user_if_input (vm, vum, vui, vhiq->qid, node); + vui = + pool_elt_at_index (vum->vhost_user_interfaces, dq->dev_instance); + n_rx_packets = vhost_user_if_input (vm, vum, vui, dq->queue_id, node, + dq->mode); } - } - else - { - int i; - - /* *INDENT-OFF* */ - clib_bitmap_foreach (i, vhc->pending_input_bitmap, ({ - int qid = i & 0xff; + } - clib_bitmap_set (vhc->pending_input_bitmap, i, 0); - vui = pool_elt_at_index (vum->vhost_user_interfaces, i >> 8); - n_rx_packets += vhost_user_if_input (vm, vum, vui, qid, node); - })); - /* *INDENT-ON* */ - } return n_rx_packets; } @@ -2371,6 +2277,161 @@ done3: return frame->n_vectors; } +static uword +vhost_user_send_interrupt_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + vhost_user_intf_t *vui; + f64 timeout = 3153600000.0 /* 100 years */ ; + uword event_type, *event_data = 0; + vhost_user_main_t *vum = &vhost_user_main; + u16 *queue; + f64 now, poll_time_remaining; + f64 next_timeout; + u8 stop_timer = 0; + + while (1) + { + poll_time_remaining = + vlib_process_wait_for_event_or_clock (vm, timeout); + event_type = vlib_process_get_events (vm, &event_data); + vec_reset_length (event_data); + + /* + * Use the remaining timeout if it is less than coalesce time to avoid + * resetting the existing timer in the middle of expiration + */ + timeout = poll_time_remaining; + if (vlib_process_suspend_time_is_zero (timeout) || + (timeout > vum->coalesce_time)) + timeout = vum->coalesce_time; + + now = vlib_time_now (vm); + switch (event_type) + { + case VHOST_USER_EVENT_STOP_TIMER: + stop_timer = 1; + break; + + case VHOST_USER_EVENT_START_TIMER: + stop_timer = 0; + if (!vlib_process_suspend_time_is_zero (poll_time_remaining)) + break; + /* fall through */ + + case ~0: + /* *INDENT-OFF* */ + pool_foreach (vui, vum->vhost_user_interfaces, { + next_timeout = timeout; + vec_foreach (queue, vui->rx_queues) + { + vhost_user_vring_t *rxvq = + &vui->vrings[VHOST_VRING_IDX_RX (*queue)]; + vhost_user_vring_t *txvq = + &vui->vrings[VHOST_VRING_IDX_TX (*queue)]; + + if (txvq->n_since_last_int) + { + if (now >= txvq->int_deadline) + vhost_user_send_call (vm, txvq); + else + next_timeout = txvq->int_deadline - now; + } + + if (rxvq->n_since_last_int) + { + if (now >= rxvq->int_deadline) + vhost_user_send_call (vm, rxvq); + else + next_timeout = rxvq->int_deadline - now; + } + + if ((next_timeout < timeout) && (next_timeout > 0.0)) + timeout = next_timeout; + } + }); + /* *INDENT-ON* */ + break; + + default: + clib_warning ("BUG: unhandled event type %d", event_type); + break; + } + /* No less than 1 millisecond */ + if (timeout < 1e-3) + timeout = 1e-3; + if (stop_timer) + timeout = 3153600000.0; + } + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = { + .function = vhost_user_send_interrupt_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "vhost-user-send-interrupt-process", +}; +/* *INDENT-ON* */ + +static clib_error_t * +vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, + u32 qid, vnet_hw_interface_rx_mode mode) +{ + vlib_main_t *vm = vnm->vlib_main; + vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index); + vhost_user_main_t *vum = &vhost_user_main; + vhost_user_intf_t *vui = + pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance); + vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)]; + + if ((mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) || + (mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) + { + if (txvq->mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + { + vum->ifq_count++; + // Start the timer if this is the first encounter on interrupt + // interface/queue + if ((vum->ifq_count == 1) && + (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0)) + vlib_process_signal_event (vm, + vhost_user_send_interrupt_node.index, + VHOST_USER_EVENT_START_TIMER, 0); + } + } + else if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + { + if (((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) || + (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE)) && + vum->ifq_count) + { + vum->ifq_count--; + // Stop the timer if there is no more interrupt interface/queue + if ((vum->ifq_count == 0) && + (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0)) + vlib_process_signal_event (vm, + vhost_user_send_interrupt_node.index, + VHOST_USER_EVENT_STOP_TIMER, 0); + } + } + + txvq->mode = mode; + if (mode == VNET_HW_INTERFACE_RX_MODE_POLLING) + txvq->used->flags = VRING_USED_F_NO_NOTIFY; + else if ((mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE) || + (mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT)) + txvq->used->flags = 0; + else + { + clib_warning ("BUG: unhandled mode %d changed for if %d queue %d", mode, + hw_if_index, qid); + return clib_error_return (0, "unsupported"); + } + + return 0; +} + static clib_error_t * vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags) @@ -2399,6 +2460,7 @@ VNET_DEVICE_CLASS (vhost_user_dev_class,static) = { .format_device_name = format_vhost_user_interface_name, .name_renumber = vhost_user_name_renumber, .admin_up_down_function = vhost_user_interface_admin_up_down, + .rx_mode_change_function = vhost_user_interface_rx_mode_change, .format_tx_trace = format_vhost_trace, }; @@ -2523,8 +2585,6 @@ vhost_user_term_if (vhost_user_intf_t * vui) int q; vhost_user_main_t *vum = &vhost_user_main; - // Delete configured thread pinning - vec_reset_length (vui->workers); // disconnect interface sockets vhost_user_if_disconnect (vui); vhost_user_update_iface_state (vui); @@ -2555,6 +2615,7 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index) vhost_user_intf_t *vui; int rv = 0; vnet_hw_interface_t *hwif; + u16 *queue; if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) || hwif->dev_class_index != vhost_user_dev_class.index) @@ -2565,6 +2626,28 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index) vui = pool_elt_at_index (vum->vhost_user_interfaces, hwif->dev_instance); + vec_foreach (queue, vui->rx_queues) + { + vhost_user_vring_t *txvq; + + txvq = &vui->vrings[VHOST_VRING_IDX_TX (*queue)]; + if ((vum->ifq_count > 0) && + ((txvq->mode == VNET_HW_INTERFACE_RX_MODE_INTERRUPT) || + (txvq->mode == VNET_HW_INTERFACE_RX_MODE_ADAPTIVE))) + { + vum->ifq_count--; + // Stop the timer if there is no more interrupt interface/queue + if ((vum->ifq_count == 0) && + (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0)) + { + vlib_process_signal_event (vm, + vhost_user_send_interrupt_node.index, + VHOST_USER_EVENT_STOP_TIMER, 0); + break; + } + } + } + // Disable and reset interface vhost_user_term_if (vui); @@ -2687,13 +2770,15 @@ vhost_user_vui_init (vnet_main_t * vnm, vhost_user_intf_t * vui, int server_sock_fd, const char *sock_filename, - u64 feature_mask, u32 * sw_if_index, u8 operation_mode) + u64 feature_mask, u32 * sw_if_index) { vnet_sw_interface_t *sw; - sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index); int q; vhost_user_main_t *vum = &vhost_user_main; + vnet_hw_interface_t *hw; + hw = vnet_get_hw_interface (vnm, vui->hw_if_index); + sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index); if (server_sock_fd != -1) { unix_file_t template = { 0 }; @@ -2715,7 +2800,6 @@ vhost_user_vui_init (vnet_main_t * vnm, vui->feature_mask = feature_mask; vui->unix_file_index = ~0; vui->log_base_addr = 0; - vui->operation_mode = operation_mode; vui->if_index = vui - vum->vhost_user_interfaces; mhash_set_mem (&vum->if_index_by_sock_name, vui->sock_filename, &vui->if_index, 0); @@ -2723,6 +2807,7 @@ vhost_user_vui_init (vnet_main_t * vnm, for (q = 0; q < VHOST_VRING_MAX_N; q++) vhost_user_vring_init (vui, q); + hw->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_INT_MODE; vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0); if (sw_if_index) @@ -2740,106 +2825,13 @@ vhost_user_vui_init (vnet_main_t * vnm, vhost_user_tx_thread_placement (vui); } -static uword -vhost_user_send_interrupt_process (vlib_main_t * vm, - vlib_node_runtime_t * rt, vlib_frame_t * f) -{ - vhost_user_intf_t *vui; - f64 timeout = 3153600000.0 /* 100 years */ ; - uword event_type, *event_data = 0; - vhost_user_main_t *vum = &vhost_user_main; - vhost_iface_and_queue_t *vhiq; - vhost_cpu_t *vhc; - f64 now, poll_time_remaining; - - while (1) - { - poll_time_remaining = - vlib_process_wait_for_event_or_clock (vm, timeout); - event_type = vlib_process_get_events (vm, &event_data); - vec_reset_length (event_data); - - /* - * Use the remaining timeout if it is less than coalesce time to avoid - * resetting the existing timer in the middle of expiration - */ - timeout = poll_time_remaining; - if (vlib_process_suspend_time_is_zero (timeout) || - (timeout > vum->coalesce_time)) - timeout = vum->coalesce_time; - - now = vlib_time_now (vm); - switch (event_type) - { - case VHOST_USER_EVENT_START_TIMER: - if (!vlib_process_suspend_time_is_zero (poll_time_remaining)) - break; - /* fall through */ - - case ~0: - vec_foreach (vhc, vum->cpus) - { - u32 thread_index = vhc - vum->cpus; - f64 next_timeout; - - next_timeout = timeout; - vec_foreach (vhiq, vum->cpus[thread_index].rx_queues) - { - vui = &vum->vhost_user_interfaces[vhiq->vhost_iface_index]; - vhost_user_vring_t *rxvq = - &vui->vrings[VHOST_VRING_IDX_RX (vhiq->qid)]; - vhost_user_vring_t *txvq = - &vui->vrings[VHOST_VRING_IDX_TX (vhiq->qid)]; - - if (txvq->n_since_last_int) - { - if (now >= txvq->int_deadline) - vhost_user_send_call (vm, txvq); - else - next_timeout = txvq->int_deadline - now; - } - - if (rxvq->n_since_last_int) - { - if (now >= rxvq->int_deadline) - vhost_user_send_call (vm, rxvq); - else - next_timeout = rxvq->int_deadline - now; - } - - if ((next_timeout < timeout) && (next_timeout > 0.0)) - timeout = next_timeout; - } - } - break; - - default: - clib_warning ("BUG: unhandled event type %d", event_type); - break; - } - /* No less than 1 millisecond */ - if (timeout < 1e-3) - timeout = 1e-3; - } - return 0; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (vhost_user_send_interrupt_node,static) = { - .function = vhost_user_send_interrupt_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "vhost-user-send-interrupt-process", -}; -/* *INDENT-ON* */ - int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, const char *sock_filename, u8 is_server, u32 * sw_if_index, u64 feature_mask, - u8 renumber, u32 custom_dev_instance, u8 * hwaddr, - u8 operation_mode) + u8 renumber, u32 custom_dev_instance, u8 * hwaddr) { vhost_user_intf_t *vui = NULL; u32 sw_if_idx = ~0; @@ -2848,10 +2840,6 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, vhost_user_main_t *vum = &vhost_user_main; uword *if_index; - if ((operation_mode != VHOST_USER_POLLING_MODE) && - (operation_mode != VHOST_USER_INTERRUPT_MODE)) - return VNET_API_ERROR_UNIMPLEMENTED; - if (sock_filename == NULL || !(strlen (sock_filename) > 0)) { return VNET_API_ERROR_INVALID_ARGUMENT; @@ -2881,7 +2869,7 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, vhost_user_create_ethernet (vnm, vm, vui, hwaddr); vhost_user_vui_init (vnm, vui, server_sock_fd, sock_filename, - feature_mask, &sw_if_idx, operation_mode); + feature_mask, &sw_if_idx); if (renumber) vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); @@ -2892,14 +2880,6 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, // Process node must connect vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0); - if ((operation_mode == VHOST_USER_INTERRUPT_MODE) && - !vum->interrupt_mode && (vum->coalesce_time > 0.0) && - (vum->coalesce_frames > 0)) - { - vum->interrupt_mode = 1; - vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index, - VHOST_USER_EVENT_START_TIMER, 0); - } return rv; } @@ -2908,8 +2888,7 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, const char *sock_filename, u8 is_server, u32 sw_if_index, - u64 feature_mask, u8 renumber, u32 custom_dev_instance, - u8 operation_mode) + u64 feature_mask, u8 renumber, u32 custom_dev_instance) { vhost_user_main_t *vum = &vhost_user_main; vhost_user_intf_t *vui = NULL; @@ -2919,9 +2898,6 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, vnet_hw_interface_t *hwif; uword *if_index; - if ((operation_mode != VHOST_USER_POLLING_MODE) && - (operation_mode != VHOST_USER_INTERRUPT_MODE)) - return VNET_API_ERROR_UNIMPLEMENTED; if (!(hwif = vnet_get_sup_hw_interface (vnm, sw_if_index)) || hwif->dev_class_index != vhost_user_dev_class.index) return VNET_API_ERROR_INVALID_SW_IF_INDEX; @@ -2947,8 +2923,7 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, vhost_user_term_if (vui); vhost_user_vui_init (vnm, vui, server_sock_fd, - sock_filename, feature_mask, &sw_if_idx, - operation_mode); + sock_filename, feature_mask, &sw_if_idx); if (renumber) vnet_interface_name_renumber (sw_if_idx, custom_dev_instance); @@ -2956,33 +2931,9 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, // Process node must connect vlib_process_signal_event (vm, vhost_user_process_node.index, 0, 0); - if ((operation_mode == VHOST_USER_INTERRUPT_MODE) && - !vum->interrupt_mode && (vum->coalesce_time > 0.0) && - (vum->coalesce_frames > 0)) - { - vum->interrupt_mode = 1; - vlib_process_signal_event (vm, vhost_user_send_interrupt_node.index, - VHOST_USER_EVENT_START_TIMER, 0); - } return rv; } -static uword -unformat_vhost_user_operation_mode (unformat_input_t * input, va_list * args) -{ - u8 *operation_mode = va_arg (*args, u8 *); - uword rc = 1; - - if (unformat (input, "interrupt")) - *operation_mode = VHOST_USER_INTERRUPT_MODE; - else if (unformat (input, "polling")) - *operation_mode = VHOST_USER_POLLING_MODE; - else - rc = 0; - - return rc; -} - clib_error_t * vhost_user_connect_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -2998,7 +2949,6 @@ vhost_user_connect_command_fn (vlib_main_t * vm, u8 hwaddr[6]; u8 *hw = NULL; clib_error_t *error = NULL; - u8 operation_mode = VHOST_USER_POLLING_MODE; /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) @@ -3020,9 +2970,6 @@ vhost_user_connect_command_fn (vlib_main_t * vm, { renumber = 1; } - else if (unformat (line_input, "mode %U", - unformat_vhost_user_operation_mode, &operation_mode)) - ; else { error = clib_error_return (0, "unknown input `%U'", @@ -3036,8 +2983,7 @@ vhost_user_connect_command_fn (vlib_main_t * vm, int rv; if ((rv = vhost_user_create_if (vnm, vm, (char *) sock_filename, is_server, &sw_if_index, feature_mask, - renumber, custom_dev_instance, hw, - operation_mode))) + renumber, custom_dev_instance, hw))) { error = clib_error_return (0, "vhost_user_create_if returned %d", rv); goto done; @@ -3127,7 +3073,6 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, vui = pool_elt_at_index (vum->vhost_user_interfaces, hi->dev_instance); vec_add2 (r_vuids, vuid, 1); - vuid->operation_mode = vui->operation_mode; vuid->sw_if_index = vui->sw_if_index; vuid->virtio_net_hdr_sz = vui->virtio_net_hdr_sz; vuid->features = vui->features; @@ -3152,25 +3097,6 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, return rv; } -static u8 * -format_vhost_user_operation_mode (u8 * s, va_list * va) -{ - int operation_mode = va_arg (*va, int); - - switch (operation_mode) - { - case VHOST_USER_POLLING_MODE: - s = format (s, "%s", "polling"); - break; - case VHOST_USER_INTERRUPT_MODE: - s = format (s, "%s", "interrupt"); - break; - default: - s = format (s, "%s", "invalid"); - } - return s; -} - clib_error_t * show_vhost_user_command_fn (vlib_main_t * vm, unformat_input_t * input, @@ -3182,10 +3108,8 @@ show_vhost_user_command_fn (vlib_main_t * vm, vhost_user_intf_t *vui; u32 hw_if_index, *hw_if_indices = 0; vnet_hw_interface_t *hi; - vhost_cpu_t *vhc; - vhost_iface_and_queue_t *vhiq; + u16 *queue; u32 ci; - int i, j, q; int show_descr = 0; struct feat_struct @@ -3238,6 +3162,8 @@ show_vhost_user_command_fn (vlib_main_t * vm, vlib_cli_output (vm, "Virtio vhost-user interfaces"); vlib_cli_output (vm, "Global:\n coalesce frames %d time %e", vum->coalesce_frames, vum->coalesce_time); + vlib_cli_output (vm, " number of rx virtqueues in interrupt mode: %d", + vum->ifq_count); for (i = 0; i < vec_len (hw_if_indices); i++) { @@ -3279,23 +3205,21 @@ show_vhost_user_command_fn (vlib_main_t * vm, (vui->unix_server_index != ~0) ? "server" : "client", strerror (vui->sock_errno)); - vlib_cli_output (vm, " configured mode: %U\n", - format_vhost_user_operation_mode, vui->operation_mode); vlib_cli_output (vm, " rx placement: "); - vec_foreach (vhc, vum->cpus) + + vec_foreach (queue, vui->rx_queues) { - vec_foreach (vhiq, vhc->rx_queues) - { - if (vhiq->vhost_iface_index == vui - vum->vhost_user_interfaces) - { - vlib_cli_output (vm, " thread %d on vring %d\n", - vhc - vum->cpus, - VHOST_VRING_IDX_TX (vhiq->qid)); - vlib_cli_output (vm, " mode: %U\n", - format_vhost_user_operation_mode, - vhc->operation_mode); - } - } + vnet_main_t *vnm = vnet_get_main (); + uword thread_index; + vnet_hw_interface_rx_mode mode; + + thread_index = vnet_get_device_input_thread_index (vnm, + vui->hw_if_index, + *queue); + vnet_hw_interface_get_rx_mode (vnm, vui->hw_if_index, *queue, &mode); + vlib_cli_output (vm, " thread %d on vring %d, %U\n", + thread_index, VHOST_VRING_IDX_TX (*queue), + format_vnet_hw_interface_rx_mode, mode); } vlib_cli_output (vm, " tx placement: %s\n", @@ -3444,8 +3368,7 @@ done: VLIB_CLI_COMMAND (vhost_user_connect_command, static) = { .path = "create vhost-user", .short_help = "create vhost-user socket <socket-filename> [server] " - "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] " - "[mode {interrupt | polling}]", + "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] ", .function = vhost_user_connect_command_fn, }; /* *INDENT-ON* */ @@ -3648,69 +3571,6 @@ vhost_user_unmap_all (void) } } -static clib_error_t * -vhost_thread_command_fn (vlib_main_t * vm, - unformat_input_t * input, vlib_cli_command_t * cmd) -{ - unformat_input_t _line_input, *line_input = &_line_input; - u32 worker_thread_index; - u32 sw_if_index; - u8 del = 0; - int rv; - clib_error_t *error = NULL; - - /* Get a line of input. */ - if (!unformat_user (input, unformat_line_input, line_input)) - return 0; - - if (!unformat - (line_input, "%U %d", unformat_vnet_sw_interface, vnet_get_main (), - &sw_if_index, &worker_thread_index)) - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, line_input); - goto done; - } - - if (unformat (line_input, "del")) - del = 1; - - if ((rv = - vhost_user_thread_placement (sw_if_index, worker_thread_index, del))) - { - error = clib_error_return (0, "vhost_user_thread_placement returned %d", - rv); - goto done; - } - -done: - unformat_free (line_input); - - return error; -} - - -/*? - * This command is used to move the RX processing for the given - * interfaces to the provided thread. If the '<em>del</em>' option is used, - * the forced thread assignment is removed and the thread assigment is - * reassigned automatically. Use '<em>show vhost-user <interface></em>' - * to see the thread assignment. - * - * @cliexpar - * Example of how to move the RX processing for a given interface to a given thread: - * @cliexcmd{vhost thread VirtualEthernet0/0/0 1} - * Example of how to remove the forced thread assignment for a given interface: - * @cliexcmd{vhost thread VirtualEthernet0/0/0 1 del} -?*/ -/* *INDENT-OFF* */ -VLIB_CLI_COMMAND (vhost_user_thread_command, static) = { - .path = "vhost thread", - .short_help = "vhost thread <iface> <worker-index> [del]", - .function = vhost_thread_command_fn, -}; -/* *INDENT-ON* */ - /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/devices/virtio/vhost-user.h b/src/vnet/devices/virtio/vhost-user.h index 56b65477b19..ceced342c92 100644 --- a/src/vnet/devices/virtio/vhost-user.h +++ b/src/vnet/devices/virtio/vhost-user.h @@ -66,13 +66,11 @@ typedef enum int vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm, const char *sock_filename, u8 is_server, u32 * sw_if_index, u64 feature_mask, - u8 renumber, u32 custom_dev_instance, u8 * hwaddr, - u8 operation_mode); + u8 renumber, u32 custom_dev_instance, u8 * hwaddr); int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm, const char *sock_filename, u8 is_server, u32 sw_if_index, u64 feature_mask, - u8 renumber, u32 custom_dev_instance, - u8 operation_mode); + u8 renumber, u32 custom_dev_instance); int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index); @@ -210,14 +208,13 @@ typedef struct u32 callfd_idx; u32 kickfd_idx; u64 log_guest_addr; - u32 interrupt_thread_index; -} vhost_user_vring_t; -#define VHOST_USER_POLLING_MODE 0 -#define VHOST_USER_INTERRUPT_MODE 1 -#define VHOST_USER_ADAPTIVE_MODE 2 + /* The rx queue policy (interrupt/adaptive/polling) for this queue */ + u32 mode; +} vhost_user_vring_t; #define VHOST_USER_EVENT_START_TIMER 1 +#define VHOST_USER_EVENT_STOP_TIMER 2 typedef struct { @@ -258,20 +255,12 @@ typedef struct u8 use_tx_spinlock; u16 *per_cpu_tx_qid; - /* Vector of workers for this interface */ - u32 *workers; - - u8 operation_mode; + /* Vector of active rx queues for this interface */ + u16 *rx_queues; } vhost_user_intf_t; typedef struct { - u16 vhost_iface_index; - u16 qid; -} vhost_iface_and_queue_t; - -typedef struct -{ uword dst; uword src; u32 len; @@ -292,7 +281,6 @@ typedef struct typedef struct { - vhost_iface_and_queue_t *rx_queues; u32 rx_buffers_len; u32 rx_buffers[VHOST_USER_RX_BUFFERS_N]; @@ -302,12 +290,6 @@ typedef struct /* This is here so it doesn't end-up * using stack or registers. */ vhost_trace_t *current_trace; - - /* bitmap of pending rx interfaces */ - uword *pending_input_bitmap; - - /* The operation mode computed per cpu based on interface setting */ - u8 operation_mode; } vhost_cpu_t; typedef struct @@ -320,20 +302,14 @@ typedef struct f64 coalesce_time; int dont_dump_vhost_user_memory; - /** first cpu index */ - u32 input_cpu_first_index; - - /** total cpu count */ - u32 input_cpu_count; - /** Per-CPU data for vhost-user */ vhost_cpu_t *cpus; /** Pseudo random iterator */ u32 random; - /* Node is in interrupt mode */ - u8 interrupt_mode; + /* The number of rx interface/queue pairs in interrupt mode */ + u32 ifq_count; } vhost_user_main_t; typedef struct @@ -346,7 +322,6 @@ typedef struct u8 sock_filename[256]; u32 num_regions; int sock_errno; - u8 operation_mode; } vhost_user_intf_details_t; int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm, diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/vnet/devices/virtio/vhost_user_api.c index ac7afa611b6..8dbd032b117 100644 --- a/src/vnet/devices/virtio/vhost_user_api.c +++ b/src/vnet/devices/virtio/vhost_user_api.c @@ -81,8 +81,7 @@ vl_api_create_vhost_user_if_t_handler (vl_api_create_vhost_user_if_t * mp) rv = vhost_user_create_if (vnm, vm, (char *) mp->sock_filename, mp->is_server, &sw_if_index, (u64) ~ 0, mp->renumber, ntohl (mp->custom_dev_instance), - (mp->use_custom_mac) ? mp->mac_address : NULL, - mp->operation_mode); + (mp->use_custom_mac) ? mp->mac_address : NULL); /* Remember an interface tag for the new interface */ if (rv == 0) @@ -117,8 +116,7 @@ vl_api_modify_vhost_user_if_t_handler (vl_api_modify_vhost_user_if_t * mp) rv = vhost_user_modify_if (vnm, vm, (char *) mp->sock_filename, mp->is_server, sw_if_index, (u64) ~ 0, - mp->renumber, ntohl (mp->custom_dev_instance), - mp->operation_mode); + mp->renumber, ntohl (mp->custom_dev_instance)); REPLY_MACRO (VL_API_MODIFY_VHOST_USER_IF_REPLY); } @@ -164,7 +162,6 @@ send_sw_interface_vhost_user_details (vpe_api_main_t * am, mp->virtio_net_hdr_sz = ntohl (vui->virtio_net_hdr_sz); mp->features = clib_net_to_host_u64 (vui->features); mp->is_server = vui->is_server; - mp->operation_mode = vui->operation_mode; mp->num_regions = ntohl (vui->num_regions); mp->sock_errno = ntohl (vui->sock_errno); mp->context = context; |