summaryrefslogtreecommitdiffstats
path: root/src/vnet/devices/virtio/vhost_user.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/devices/virtio/vhost_user.c')
-rw-r--r--src/vnet/devices/virtio/vhost_user.c311
1 files changed, 264 insertions, 47 deletions
diff --git a/src/vnet/devices/virtio/vhost_user.c b/src/vnet/devices/virtio/vhost_user.c
index 7094a00fb33..d24e516a93c 100644
--- a/src/vnet/devices/virtio/vhost_user.c
+++ b/src/vnet/devices/virtio/vhost_user.c
@@ -466,6 +466,8 @@ vhost_user_socket_read (clib_file_t * uf)
if (vui->enable_gso)
msg.u64 |= FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS;
+ if (vui->enable_packed)
+ msg.u64 |= (1ULL << FEAT_VIRTIO_F_RING_PACKED);
msg.size = sizeof (msg.u64);
vu_log_debug (vui, "if %d msg VHOST_USER_GET_FEATURES - reply "
@@ -655,7 +657,11 @@ vhost_user_socket_read (clib_file_t * uf)
vui->vrings[msg.state.index].used->idx;
/* tell driver that we don't want interrupts */
- vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
+ if (vhost_user_is_packed_ring_supported (vui))
+ vui->vrings[msg.state.index].used_event->flags =
+ VRING_EVENT_F_DISABLE;
+ else
+ vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
vlib_worker_thread_barrier_release (vm);
vhost_user_update_iface_state (vui);
break;
@@ -762,10 +768,47 @@ vhost_user_socket_read (clib_file_t * uf)
break;
case VHOST_USER_SET_VRING_BASE:
- vu_log_debug (vui, "if %d msg VHOST_USER_SET_VRING_BASE idx %d num %d",
+ vu_log_debug (vui,
+ "if %d msg VHOST_USER_SET_VRING_BASE idx %d num 0x%x",
vui->hw_if_index, msg.state.index, msg.state.num);
vlib_worker_thread_barrier_sync (vm);
vui->vrings[msg.state.index].last_avail_idx = msg.state.num;
+ if (vhost_user_is_packed_ring_supported (vui))
+ {
+ /*
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | last avail idx | | last used idx | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * ^ ^
+ * | |
+ * avail wrap counter used wrap counter
+ */
+ /* last avail idx at bit 0-14. */
+ vui->vrings[msg.state.index].last_avail_idx =
+ msg.state.num & 0x7fff;
+ /* avail wrap counter at bit 15 */
+ vui->vrings[msg.state.index].avail_wrap_counter =
+ ! !(msg.state.num & (1 << 15));
+
+ /*
+ * Although last_used_idx is passed in the upper 16 bits in qemu
+ * implementation, in practice, last_avail_idx and last_used_idx are
+ * usually the same. As a result, DPDK does not bother to pass us
+ * last_used_idx. The spec is not clear on thex coding. I figured it
+ * out by reading the qemu code. So let's just read last_avail_idx
+ * and set last_used_idx equals to last_avail_idx.
+ */
+ vui->vrings[msg.state.index].last_used_idx =
+ vui->vrings[msg.state.index].last_avail_idx;
+ vui->vrings[msg.state.index].used_wrap_counter =
+ vui->vrings[msg.state.index].avail_wrap_counter;
+
+ if (vui->vrings[msg.state.index].avail_wrap_counter == 1)
+ vui->vrings[msg.state.index].avail_wrap_counter =
+ VIRTQ_DESC_F_AVAIL;
+ }
vlib_worker_thread_barrier_release (vm);
break;
@@ -784,6 +827,15 @@ vhost_user_socket_read (clib_file_t * uf)
* closing the vring also initializes the vring last_avail_idx
*/
msg.state.num = vui->vrings[msg.state.index].last_avail_idx;
+ if (vhost_user_is_packed_ring_supported (vui))
+ {
+ msg.state.num =
+ (vui->vrings[msg.state.index].last_avail_idx & 0x7fff) |
+ (! !vui->vrings[msg.state.index].avail_wrap_counter << 15);
+ msg.state.num |=
+ ((vui->vrings[msg.state.index].last_used_idx & 0x7fff) |
+ (! !vui->vrings[msg.state.index].used_wrap_counter << 15)) << 16;
+ }
msg.flags |= 4;
msg.size = sizeof (msg.state);
@@ -793,7 +845,8 @@ vhost_user_socket_read (clib_file_t * uf)
*/
vhost_user_vring_close (vui, msg.state.index);
vlib_worker_thread_barrier_release (vm);
- vu_log_debug (vui, "if %d msg VHOST_USER_GET_VRING_BASE idx %d num %d",
+ vu_log_debug (vui,
+ "if %d msg VHOST_USER_GET_VRING_BASE idx %d num 0x%x",
vui->hw_if_index, msg.state.index, msg.state.num);
n =
send (uf->file_descriptor, &msg, VHOST_USER_MSG_HDR_SZ + msg.size, 0);
@@ -1440,7 +1493,8 @@ vhost_user_vui_init (vnet_main_t * vnm,
vhost_user_intf_t * vui,
int server_sock_fd,
const char *sock_filename,
- u64 feature_mask, u32 * sw_if_index, u8 enable_gso)
+ u64 feature_mask, u32 * sw_if_index, u8 enable_gso,
+ u8 enable_packed)
{
vnet_sw_interface_t *sw;
int q;
@@ -1472,6 +1526,7 @@ vhost_user_vui_init (vnet_main_t * vnm,
vui->log_base_addr = 0;
vui->if_index = vui - vum->vhost_user_interfaces;
vui->enable_gso = enable_gso;
+ vui->enable_packed = enable_packed;
/*
* enable_gso takes precedence over configurable feature mask if there
* is a clash.
@@ -1519,7 +1574,7 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
u32 * sw_if_index,
u64 feature_mask,
u8 renumber, u32 custom_dev_instance, u8 * hwaddr,
- u8 enable_gso)
+ u8 enable_gso, u8 enable_packed)
{
vhost_user_intf_t *vui = NULL;
u32 sw_if_idx = ~0;
@@ -1560,7 +1615,7 @@ vhost_user_create_if (vnet_main_t * vnm, vlib_main_t * vm,
vlib_worker_thread_barrier_release (vm);
vhost_user_vui_init (vnm, vui, server_sock_fd, sock_filename,
- feature_mask, &sw_if_idx, enable_gso);
+ feature_mask, &sw_if_idx, enable_gso, enable_packed);
vnet_sw_interface_set_mtu (vnm, vui->sw_if_index, 9000);
vhost_user_rx_thread_placement (vui, 1);
@@ -1582,7 +1637,7 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
u8 is_server,
u32 sw_if_index,
u64 feature_mask, u8 renumber, u32 custom_dev_instance,
- u8 enable_gso)
+ u8 enable_gso, u8 enable_packed)
{
vhost_user_main_t *vum = &vhost_user_main;
vhost_user_intf_t *vui = NULL;
@@ -1619,7 +1674,8 @@ vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
vhost_user_term_if (vui);
vhost_user_vui_init (vnm, vui, server_sock_fd,
- sock_filename, feature_mask, &sw_if_idx, enable_gso);
+ sock_filename, feature_mask, &sw_if_idx, enable_gso,
+ enable_packed);
if (renumber)
vnet_interface_name_renumber (sw_if_idx, custom_dev_instance);
@@ -1645,7 +1701,7 @@ vhost_user_connect_command_fn (vlib_main_t * vm,
u8 hwaddr[6];
u8 *hw = NULL;
clib_error_t *error = NULL;
- u8 enable_gso = 0;
+ u8 enable_gso = 0, enable_packed = 0;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -1653,6 +1709,8 @@ vhost_user_connect_command_fn (vlib_main_t * vm,
/* GSO feature is disable by default */
feature_mask &= ~FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS;
+ /* packed-ring feature is disable by default */
+ feature_mask &= ~(1ULL << FEAT_VIRTIO_F_RING_PACKED);
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (line_input, "socket %s", &sock_filename))
@@ -1661,6 +1719,8 @@ vhost_user_connect_command_fn (vlib_main_t * vm,
is_server = 1;
else if (unformat (line_input, "gso"))
enable_gso = 1;
+ else if (unformat (line_input, "packed"))
+ enable_packed = 1;
else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask))
;
else
@@ -1685,7 +1745,7 @@ vhost_user_connect_command_fn (vlib_main_t * vm,
if ((rv = vhost_user_create_if (vnm, vm, (char *) sock_filename,
is_server, &sw_if_index, feature_mask,
renumber, custom_dev_instance, hw,
- enable_gso)))
+ enable_gso, enable_packed)))
{
error = clib_error_return (0, "vhost_user_create_if returned %d", rv);
goto done;
@@ -1799,6 +1859,186 @@ vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
return rv;
}
+static u8 *
+format_vhost_user_desc (u8 * s, va_list * args)
+{
+ char *fmt = va_arg (*args, char *);
+ vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *);
+ vring_desc_t *desc_table = va_arg (*args, vring_desc_t *);
+ int idx = va_arg (*args, int);
+ u32 *mem_hint = va_arg (*args, u32 *);
+
+ s = format (s, fmt, idx, desc_table[idx].addr, desc_table[idx].len,
+ desc_table[idx].flags, desc_table[idx].next,
+ pointer_to_uword (map_guest_mem (vui, desc_table[idx].addr,
+ mem_hint)));
+ return s;
+}
+
+static u8 *
+format_vhost_user_vring (u8 * s, va_list * args)
+{
+ char *fmt = va_arg (*args, char *);
+ vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *);
+ int q = va_arg (*args, int);
+
+ s = format (s, fmt, vui->vrings[q].avail->flags, vui->vrings[q].avail->idx,
+ vui->vrings[q].used->flags, vui->vrings[q].used->idx);
+ return s;
+}
+
+static void
+vhost_user_show_fds (vlib_main_t * vm, vhost_user_intf_t * vui, int q)
+{
+ int kickfd = UNIX_GET_FD (vui->vrings[q].kickfd_idx);
+ int callfd = UNIX_GET_FD (vui->vrings[q].callfd_idx);
+
+ vlib_cli_output (vm, " kickfd %d callfd %d errfd %d\n", kickfd, callfd,
+ vui->vrings[q].errfd);
+}
+
+static void
+vhost_user_show_desc (vlib_main_t * vm, vhost_user_intf_t * vui, int q,
+ int show_descr, int show_verbose)
+{
+ int j;
+ u32 mem_hint = 0;
+ u32 idx;
+ u32 n_entries;
+ vring_desc_t *desc_table;
+
+ if (vui->vrings[q].avail && vui->vrings[q].used)
+ vlib_cli_output (vm, "%U", format_vhost_user_vring,
+ " avail.flags %x avail.idx %d used.flags %x used.idx %d\n",
+ vui, q);
+
+ vhost_user_show_fds (vm, vui, q);
+
+ if (show_descr)
+ {
+ vlib_cli_output (vm, "\n descriptor table:\n");
+ vlib_cli_output (vm,
+ " slot addr len flags next "
+ "user_addr\n");
+ vlib_cli_output (vm,
+ " ===== ================== ===== ====== ===== "
+ "==================\n");
+ for (j = 0; j < vui->vrings[q].qsz_mask + 1; j++)
+ {
+ desc_table = vui->vrings[q].desc;
+ vlib_cli_output (vm, "%U", format_vhost_user_desc,
+ " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n", vui,
+ desc_table, j, &mem_hint);
+ if (show_verbose && (desc_table[j].flags & VIRTQ_DESC_F_INDIRECT))
+ {
+ n_entries = desc_table[j].len / sizeof (vring_desc_t);
+ desc_table = map_guest_mem (vui, desc_table[j].addr, &mem_hint);
+ if (desc_table)
+ {
+ for (idx = 0; idx < clib_min (20, n_entries); idx++)
+ {
+ vlib_cli_output
+ (vm, "%U", format_vhost_user_desc,
+ "> %-4u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
+ desc_table, idx, &mem_hint);
+ }
+ if (n_entries >= 20)
+ vlib_cli_output (vm, "Skip displaying entries 20...%u\n",
+ n_entries);
+ }
+ }
+ }
+ }
+}
+
+static u8 *
+format_vhost_user_packed_desc (u8 * s, va_list * args)
+{
+ char *fmt = va_arg (*args, char *);
+ vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *);
+ vring_packed_desc_t *desc_table = va_arg (*args, vring_packed_desc_t *);
+ int idx = va_arg (*args, int);
+ u32 *mem_hint = va_arg (*args, u32 *);
+
+ s = format (s, fmt, idx, desc_table[idx].addr, desc_table[idx].len,
+ desc_table[idx].flags, desc_table[idx].id,
+ pointer_to_uword (map_guest_mem (vui, desc_table[idx].addr,
+ mem_hint)));
+ return s;
+}
+
+static u8 *
+format_vhost_user_vring_packed (u8 * s, va_list * args)
+{
+ char *fmt = va_arg (*args, char *);
+ vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *);
+ int q = va_arg (*args, int);
+
+ s = format (s, fmt, vui->vrings[q].avail_event->flags,
+ vui->vrings[q].avail_event->off_wrap,
+ vui->vrings[q].used_event->flags,
+ vui->vrings[q].used_event->off_wrap,
+ vui->vrings[q].avail_wrap_counter,
+ vui->vrings[q].used_wrap_counter);
+ return s;
+}
+
+static void
+vhost_user_show_desc_packed (vlib_main_t * vm, vhost_user_intf_t * vui, int q,
+ int show_descr, int show_verbose)
+{
+ int j;
+ u32 mem_hint = 0;
+ u32 idx;
+ u32 n_entries;
+ vring_packed_desc_t *desc_table;
+
+ if (vui->vrings[q].avail_event && vui->vrings[q].used_event)
+ vlib_cli_output (vm, "%U", format_vhost_user_vring_packed,
+ " avail_event.flags %x avail_event.off_wrap %u "
+ "used_event.flags %x used_event.off_wrap %u\n"
+ " avail wrap counter %u, used wrap counter %u\n",
+ vui, q);
+
+ vhost_user_show_fds (vm, vui, q);
+
+ if (show_descr)
+ {
+ vlib_cli_output (vm, "\n descriptor table:\n");
+ vlib_cli_output (vm,
+ " slot addr len flags id "
+ "user_addr\n");
+ vlib_cli_output (vm,
+ " ===== ================== ===== ====== ===== "
+ "==================\n");
+ for (j = 0; j < vui->vrings[q].qsz_mask + 1; j++)
+ {
+ desc_table = vui->vrings[q].packed_desc;
+ vlib_cli_output (vm, "%U", format_vhost_user_packed_desc,
+ " %-5u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
+ desc_table, j, &mem_hint);
+ if (show_verbose && (desc_table[j].flags & VIRTQ_DESC_F_INDIRECT))
+ {
+ n_entries = desc_table[j].len >> 4;
+ desc_table = map_guest_mem (vui, desc_table[j].addr, &mem_hint);
+ if (desc_table)
+ {
+ for (idx = 0; idx < clib_min (20, n_entries); idx++)
+ {
+ vlib_cli_output
+ (vm, "%U", format_vhost_user_packed_desc,
+ "> %-4u 0x%016lx %-5u 0x%04x %-5u 0x%016lx\n", vui,
+ desc_table, idx, &mem_hint);
+ }
+ if (n_entries >= 20)
+ vlib_cli_output (vm, "Skip displaying entries 20...%u\n",
+ n_entries);
+ }
+ }
+ }
+ }
+}
+
clib_error_t *
show_vhost_user_command_fn (vlib_main_t * vm,
unformat_input_t * input,
@@ -1814,6 +2054,7 @@ show_vhost_user_command_fn (vlib_main_t * vm,
u32 ci;
int i, j, q;
int show_descr = 0;
+ int show_verbose = 0;
struct feat_struct
{
u8 bit;
@@ -1855,6 +2096,8 @@ show_vhost_user_command_fn (vlib_main_t * vm,
}
else if (unformat (input, "descriptors") || unformat (input, "desc"))
show_descr = 1;
+ else if (unformat (input, "verbose"))
+ show_verbose = 1;
else
{
error = clib_error_return (0, "unknown input `%U'",
@@ -1884,6 +2127,8 @@ show_vhost_user_command_fn (vlib_main_t * vm,
hw_if_indices[i]);
if (vui->enable_gso)
vlib_cli_output (vm, " GSO enable");
+ if (vui->enable_packed)
+ vlib_cli_output (vm, " Packed ring enable");
vlib_cli_output (vm, "virtio_net_hdr_sz %d\n"
" features mask (0x%llx): \n"
@@ -1985,41 +2230,11 @@ show_vhost_user_command_fn (vlib_main_t * vm,
vui->vrings[q].last_avail_idx,
vui->vrings[q].last_used_idx);
- if (vui->vrings[q].avail && vui->vrings[q].used)
- vlib_cli_output (vm,
- " avail.flags %x avail.idx %d used.flags %x used.idx %d\n",
- vui->vrings[q].avail->flags,
- vui->vrings[q].avail->idx,
- vui->vrings[q].used->flags,
- vui->vrings[q].used->idx);
-
- int kickfd = UNIX_GET_FD (vui->vrings[q].kickfd_idx);
- int callfd = UNIX_GET_FD (vui->vrings[q].callfd_idx);
- vlib_cli_output (vm, " kickfd %d callfd %d errfd %d\n",
- kickfd, callfd, vui->vrings[q].errfd);
-
- if (show_descr)
- {
- vlib_cli_output (vm, "\n descriptor table:\n");
- vlib_cli_output (vm,
- " id addr len flags next user_addr\n");
- vlib_cli_output (vm,
- " ===== ================== ===== ====== ===== ==================\n");
- for (j = 0; j < vui->vrings[q].qsz_mask + 1; j++)
- {
- u32 mem_hint = 0;
- vlib_cli_output (vm,
- " %-5d 0x%016lx %-5d 0x%04x %-5d 0x%016lx\n",
- j, vui->vrings[q].desc[j].addr,
- vui->vrings[q].desc[j].len,
- vui->vrings[q].desc[j].flags,
- vui->vrings[q].desc[j].next,
- pointer_to_uword (map_guest_mem
- (vui,
- vui->vrings[q].desc[j].
- addr, &mem_hint)));
- }
- }
+ if (vhost_user_is_packed_ring_supported (vui))
+ vhost_user_show_desc_packed (vm, vui, q, show_descr,
+ show_verbose);
+ else
+ vhost_user_show_desc (vm, vui, q, show_descr, show_verbose);
}
vlib_cli_output (vm, "\n");
}
@@ -2090,7 +2305,8 @@ done:
VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
.path = "create vhost-user",
.short_help = "create vhost-user socket <socket-filename> [server] "
- "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] [gso]",
+ "[feature-mask <hex>] [hwaddr <mac-addr>] [renumber <dev_instance>] [gso] "
+ "[packed]",
.function = vhost_user_connect_command_fn,
.is_mp_safe = 1,
};
@@ -2251,7 +2467,8 @@ VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_vhost_user_command, static) = {
.path = "show vhost-user",
- .short_help = "show vhost-user [<interface> [<interface> [..]]] [descriptors]",
+ .short_help = "show vhost-user [<interface> [<interface> [..]]] "
+ "[[descriptors] [verbose]]",
.function = show_vhost_user_command_fn,
};
/* *INDENT-ON* */