summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--vnet/vnet/devices/virtio/vhost-user.c274
-rw-r--r--vnet/vnet/devices/virtio/vhost-user.h2
2 files changed, 182 insertions, 94 deletions
diff --git a/vnet/vnet/devices/virtio/vhost-user.c b/vnet/vnet/devices/virtio/vhost-user.c
index 6fa1c652c8f..59daf871200 100644
--- a/vnet/vnet/devices/virtio/vhost-user.c
+++ b/vnet/vnet/devices/virtio/vhost-user.c
@@ -64,7 +64,9 @@ vlib_node_registration_t vhost_user_input_node;
_(NONE, "no error") \
_(NOT_READY, "vhost user state error") \
_(PKT_DROP_NOBUF, "tx packet drops (no available descriptors)") \
- _(MMAP_FAIL, "mmap failure")
+ _(PKT_DROP_NOMRG, "tx packet drops (cannot merge descriptors)") \
+ _(MMAP_FAIL, "mmap failure") \
+ _(INDIRECT_OVERFLOW, "indirect descriptor table overflow")
typedef enum
{
@@ -84,7 +86,9 @@ static char *vhost_user_tx_func_error_strings[] = {
_(NO_ERROR, "no error") \
_(NO_BUFFER, "no available buffer") \
_(MMAP_FAIL, "mmap failure") \
- _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)")
+ _(INDIRECT_OVERFLOW, "indirect descriptor overflows table") \
+ _(UNDERSIZED_FRAME, "undersized ethernet frame received (< 14 bytes)") \
+ _(FULL_RX_QUEUE, "full rx queue (possible driver tx drop)")
typedef enum
{
@@ -383,6 +387,7 @@ vhost_user_socket_read (unix_file_t * uf)
msg.flags |= 4;
msg.u64 = (1 << FEAT_VIRTIO_NET_F_MRG_RXBUF) |
(1 << FEAT_VIRTIO_F_ANY_LAYOUT) |
+ (1 << FEAT_VIRTIO_F_INDIRECT_DESC) |
(1 << FEAT_VHOST_F_LOG_ALL) |
(1 << FEAT_VIRTIO_NET_F_GUEST_ANNOUNCE) |
(1 << FEAT_VHOST_USER_F_PROTOCOL_FEATURES);
@@ -957,14 +962,18 @@ vhost_user_if_input (vlib_main_t * vm,
if (PREDICT_FALSE (txvq->avail->flags & 0xFFFE))
return 0;
+ n_left = (u16) (txvq->avail->idx - txvq->last_avail_idx);
+
/* nothing to do */
- if (txvq->avail->idx == txvq->last_avail_idx)
+ if (PREDICT_FALSE (n_left == 0))
return 0;
- if (PREDICT_TRUE (txvq->avail->idx > txvq->last_avail_idx))
- n_left = txvq->avail->idx - txvq->last_avail_idx;
- else /* wrapped */
- n_left = (u16) - 1 - txvq->last_avail_idx + txvq->avail->idx;
+ if (PREDICT_FALSE (n_left == txvq->qsz))
+ {
+ //Informational error logging when VPP is not receiving packets fast enough
+ vlib_error_count (vm, node->node_index,
+ VHOST_USER_INPUT_FUNC_ERROR_FULL_RX_QUEUE, 1);
+ }
if (PREDICT_FALSE (!vui->admin_up))
{
@@ -977,9 +986,6 @@ vhost_user_if_input (vlib_main_t * vm,
return 0;
}
- if (PREDICT_FALSE (n_left > txvq->qsz))
- return 0;
-
qsz_mask = txvq->qsz - 1;
cpu_index = os_get_cpu_number ();
drops = 0;
@@ -997,7 +1003,7 @@ vhost_user_if_input (vlib_main_t * vm,
*/
if (PREDICT_FALSE (!vum->rx_buffers[cpu_index]))
{
- vec_alloc (vum->rx_buffers[cpu_index], VLIB_FRAME_SIZE);
+ vec_alloc (vum->rx_buffers[cpu_index], 2 * VLIB_FRAME_SIZE);
if (PREDICT_FALSE (!vum->rx_buffers[cpu_index]))
flush = n_left; //Drop all input
@@ -1005,14 +1011,12 @@ vhost_user_if_input (vlib_main_t * vm,
if (PREDICT_FALSE (_vec_len (vum->rx_buffers[cpu_index]) < n_left))
{
+ u32 curr_len = _vec_len (vum->rx_buffers[cpu_index]);
_vec_len (vum->rx_buffers[cpu_index]) +=
vlib_buffer_alloc_from_free_list (vm,
vum->rx_buffers[cpu_index] +
- _vec_len (vum->rx_buffers
- [cpu_index]),
- VLIB_FRAME_SIZE -
- _vec_len (vum->rx_buffers
- [cpu_index]),
+ curr_len,
+ 2 * VLIB_FRAME_SIZE - curr_len,
VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
if (PREDICT_FALSE (n_left > _vec_len (vum->rx_buffers[cpu_index])))
@@ -1053,6 +1057,20 @@ vhost_user_if_input (vlib_main_t * vm,
u16 desc_chain_head, desc_current;
u8 error = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
+ if (PREDICT_TRUE (n_left > 1))
+ {
+ u32 next_desc =
+ txvq->avail->ring[(txvq->last_avail_idx + 1) & qsz_mask];
+ void *buffer_addr =
+ map_guest_mem (vui, txvq->desc[next_desc].addr);
+ if (PREDICT_TRUE (buffer_addr != 0))
+ CLIB_PREFETCH (buffer_addr, 64, STORE);
+
+ u32 bi = vum->rx_buffers[cpu_index][rx_len - 2];
+ vlib_prefetch_buffer_with_index (vm, bi, STORE);
+ CLIB_PREFETCH (vlib_get_buffer (vm, bi)->data, 128, STORE);
+ }
+
desc_chain_head = desc_current =
txvq->avail->ring[txvq->last_avail_idx & qsz_mask];
bi_head = bi_current = vum->rx_buffers[cpu_index][--rx_len];
@@ -1061,7 +1079,8 @@ vhost_user_if_input (vlib_main_t * vm,
uword offset;
if (PREDICT_TRUE (vui->is_any_layout) ||
- !(txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT))
+ (!(txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT) &&
+ !(txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)))
{
/* ANYLAYOUT or single buffer */
offset = vui->virtio_net_hdr_sz;
@@ -1072,14 +1091,35 @@ vhost_user_if_input (vlib_main_t * vm,
offset = txvq->desc[desc_current].len;
}
+ vring_desc_t *desc_table = txvq->desc;
+ u32 desc_index = desc_current;
+
+ if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_INDIRECT)
+ {
+ desc_table = map_guest_mem (vui, txvq->desc[desc_current].addr);
+ desc_index = 0;
+ if (PREDICT_FALSE (desc_table == 0))
+ {
+ error = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
+ goto out;
+ }
+ }
+
while (1)
{
void *buffer_addr =
- map_guest_mem (vui, txvq->desc[desc_current].addr);
+ map_guest_mem (vui, desc_table[desc_index].addr);
if (PREDICT_FALSE (buffer_addr == 0))
{
error = VHOST_USER_INPUT_FUNC_ERROR_MMAP_FAIL;
- break;
+ goto out;
+ }
+
+ if (PREDICT_TRUE
+ (desc_table[desc_index].flags & VIRTQ_DESC_F_NEXT))
+ {
+ CLIB_PREFETCH (&desc_table[desc_table[desc_index].next],
+ sizeof (vring_desc_t), STORE);
}
#if VHOST_USER_COPY_TX_HDR == 1
@@ -1087,9 +1127,9 @@ vhost_user_if_input (vlib_main_t * vm,
clib_memcpy (b->pre_data, buffer_addr, sizeof (virtio_net_hdr_t)); /* 12 byte hdr is not used on tx */
#endif
- if (txvq->desc[desc_current].len > offset)
+ if (desc_table[desc_index].len > offset)
{
- u16 len = txvq->desc[desc_current].len - offset;
+ u16 len = desc_table[desc_index].len - offset;
u16 copied = vlib_buffer_chain_append_data_with_alloc (vm,
VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX,
b_head,
@@ -1098,7 +1138,6 @@ vhost_user_if_input (vlib_main_t * vm,
+
offset,
len);
-
if (copied != len)
{
error = VHOST_USER_INPUT_FUNC_ERROR_NO_BUFFER;
@@ -1108,11 +1147,12 @@ vhost_user_if_input (vlib_main_t * vm,
offset = 0;
/* if next flag is set, take next desc in the chain */
- if (txvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
- desc_current = txvq->desc[desc_current].next;
+ if ((desc_table[desc_index].flags & VIRTQ_DESC_F_NEXT))
+ desc_index = desc_table[desc_index].next;
else
- break;
+ goto out;
}
+ out:
/* consume the descriptor and return it as used */
txvq->last_avail_idx++;
@@ -1123,11 +1163,14 @@ vhost_user_if_input (vlib_main_t * vm,
ring[txvq->last_used_idx & qsz_mask]);
txvq->last_used_idx++;
+ //It is important to free RX as fast as possible such that the TX
+ //process does not drop packets
+ if ((txvq->last_used_idx & 0x3f) == 0) // Every 64 packets
+ txvq->used->idx = txvq->last_used_idx;
+
if (PREDICT_FALSE (b_head->current_length < 14 &&
error == VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR))
- {
- error = VHOST_USER_INPUT_FUNC_ERROR_UNDERSIZED_FRAME;
- }
+ error = VHOST_USER_INPUT_FUNC_ERROR_UNDERSIZED_FRAME;
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b_head);
@@ -1162,6 +1205,7 @@ vhost_user_if_input (vlib_main_t * vm,
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
}
if (PREDICT_TRUE (vum->rx_buffers[cpu_index] != 0))
@@ -1264,7 +1308,6 @@ vhost_user_intfc_tx (vlib_main_t * vm,
{
u32 *buffers = vlib_frame_args (frame);
u32 n_left = 0;
- u16 used_index;
vhost_user_main_t *vum = &vhost_user_main;
uword n_packets = 0;
vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
@@ -1274,6 +1317,8 @@ vhost_user_intfc_tx (vlib_main_t * vm,
u16 qsz_mask;
u8 error = VHOST_USER_TX_FUNC_ERROR_NONE;
+ n_left = n_packets = frame->n_vectors;
+
if (PREDICT_FALSE (!vui->is_up))
goto done2;
@@ -1304,23 +1349,18 @@ vhost_user_intfc_tx (vlib_main_t * vm,
goto done2;
}
- n_left = n_packets = frame->n_vectors;
- used_index = rxvq->used->idx;
qsz_mask = rxvq->qsz - 1; /* qsz is always power of 2 */
while (n_left > 0)
{
vlib_buffer_t *b0, *current_b0;
- u16 desc_chain_head, desc_current, desc_len;
+ u16 desc_head, desc_index, desc_len;
+ vring_desc_t *desc_table;
void *buffer_addr;
- uword offset;
-
- if (n_left >= 2)
- vlib_prefetch_buffer_with_index (vm, buffers[1], LOAD);
+ u32 buffer_len;
b0 = vlib_get_buffer (vm, buffers[0]);
buffers++;
- n_left--;
if (PREDICT_FALSE (rxvq->last_avail_idx == rxvq->avail->idx))
{
@@ -1328,41 +1368,54 @@ vhost_user_intfc_tx (vlib_main_t * vm,
goto done;
}
- desc_current = desc_chain_head =
+ desc_table = rxvq->desc;
+ desc_head = desc_index =
rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask];
- offset = vui->virtio_net_hdr_sz;
- desc_len = offset;
+ if (rxvq->desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT)
+ {
+ if (PREDICT_FALSE
+ (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
+ goto done;
+ }
+ if (PREDICT_FALSE
+ (!(desc_table =
+ map_guest_mem (vui, rxvq->desc[desc_index].addr))))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
+ goto done;
+ }
+ desc_index = 0;
+ }
+
+ desc_len = vui->virtio_net_hdr_sz;
+
if (PREDICT_FALSE
- (!(buffer_addr =
- map_guest_mem (vui, rxvq->desc[desc_current].addr))))
+ (!(buffer_addr = map_guest_mem (vui, desc_table[desc_index].addr))))
{
error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
goto done;
}
- CLIB_PREFETCH (buffer_addr, clib_min (rxvq->desc[desc_current].len,
- 4 * CLIB_CACHE_LINE_BYTES),
- STORE);
+ buffer_len = desc_table[desc_index].len;
+
+ CLIB_PREFETCH (buffer_addr,
+ clib_min (buffer_len, 2 * CLIB_CACHE_LINE_BYTES), STORE);
virtio_net_hdr_mrg_rxbuf_t *hdr =
(virtio_net_hdr_mrg_rxbuf_t *) buffer_addr;
hdr->hdr.flags = 0;
hdr->hdr.gso_type = 0;
-
- vhost_user_log_dirty_pages (vui, rxvq->desc[desc_current].addr,
- vui->virtio_net_hdr_sz);
-
if (vui->virtio_net_hdr_sz == 12)
hdr->num_buffers = 1;
+ vhost_user_log_dirty_pages (vui, desc_table[desc_index].addr,
+ vui->virtio_net_hdr_sz);
+
u16 bytes_left = b0->current_length;
- buffer_addr += offset;
+ buffer_addr += vui->virtio_net_hdr_sz;
+ buffer_len -= vui->virtio_net_hdr_sz;
current_b0 = b0;
-
- //FIXME: This was in the code but I don't think it is valid
- /*if (PREDICT_FALSE(!vui->is_any_layout && (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT))) {
- rxvq->desc[desc_current].len = vui->virtio_net_hdr_sz;
- } */
-
while (1)
{
if (!bytes_left)
@@ -1379,99 +1432,132 @@ vhost_user_intfc_tx (vlib_main_t * vm,
}
}
- if (rxvq->desc[desc_current].len <= offset)
+ if (buffer_len == 0)
{ //Get new output
- if (rxvq->desc[desc_current].flags & VIRTQ_DESC_F_NEXT)
+ if (desc_table[desc_index].flags & VIRTQ_DESC_F_NEXT)
{
- offset = 0;
- desc_current = rxvq->desc[desc_current].next;
+ //Next one is chained
+ desc_index = desc_table[desc_index].next;
if (PREDICT_FALSE
(!(buffer_addr =
- map_guest_mem (vui, rxvq->desc[desc_current].addr))))
+ map_guest_mem (vui, desc_table[desc_index].addr))))
{
- used_index -= hdr->num_buffers - 1;
+ rxvq->last_used_idx -= hdr->num_buffers - 1;
rxvq->last_avail_idx -= hdr->num_buffers - 1;
error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
goto done;
}
+ buffer_len = desc_table[desc_index].len;
}
- else if (vui->virtio_net_hdr_sz == 12)
- { //MRG is available
-
+ else if (vui->virtio_net_hdr_sz == 12) //MRG is available
+ {
//Move from available to used buffer
- rxvq->used->ring[used_index & qsz_mask].id =
- desc_chain_head;
- rxvq->used->ring[used_index & qsz_mask].len = desc_len;
+ rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id =
+ desc_head;
+ rxvq->used->ring[rxvq->last_used_idx & qsz_mask].len =
+ desc_len;
vhost_user_log_dirty_ring (vui, rxvq,
- ring[used_index & qsz_mask]);
+ ring[rxvq->last_used_idx &
+ qsz_mask]);
rxvq->last_avail_idx++;
- used_index++;
+ rxvq->last_used_idx++;
hdr->num_buffers++;
if (PREDICT_FALSE
(rxvq->last_avail_idx == rxvq->avail->idx))
{
//Dequeue queued descriptors for this packet
- used_index -= hdr->num_buffers - 1;
+ rxvq->last_used_idx -= hdr->num_buffers - 1;
rxvq->last_avail_idx -= hdr->num_buffers - 1;
error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
goto done;
}
- //Look at next one
- desc_chain_head =
+ desc_table = rxvq->desc;
+ desc_head = desc_index =
rxvq->avail->ring[rxvq->last_avail_idx & qsz_mask];
- desc_current = desc_chain_head;
- desc_len = 0;
- offset = 0;
+ if (PREDICT_FALSE
+ (rxvq->desc[desc_head].flags & VIRTQ_DESC_F_INDIRECT))
+ {
+ //It is seriously unlikely that a driver will put indirect descriptor
+ //after non-indirect descriptor.
+ if (PREDICT_FALSE
+ (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
+ goto done;
+ }
+ if (PREDICT_FALSE
+ (!(desc_table =
+ map_guest_mem (vui,
+ rxvq->desc[desc_index].addr))))
+ {
+ error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
+ goto done;
+ }
+ desc_index = 0;
+ }
+
if (PREDICT_FALSE
(!(buffer_addr =
- map_guest_mem (vui, rxvq->desc[desc_current].addr))))
+ map_guest_mem (vui, desc_table[desc_index].addr))))
{
- //Dequeue queued descriptors for this packet
- used_index -= hdr->num_buffers - 1;
- rxvq->last_avail_idx -= hdr->num_buffers - 1;
error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
goto done;
}
+ buffer_len = desc_table[desc_index].len;
+ CLIB_PREFETCH (buffer_addr,
+ clib_min (buffer_len,
+ 2 * CLIB_CACHE_LINE_BYTES), STORE);
}
else
{
- error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOBUF;
+ error = VHOST_USER_TX_FUNC_ERROR_PKT_DROP_NOMRG;
goto done;
}
}
- u16 bytes_to_copy =
- bytes_left >
- (rxvq->desc[desc_current].len -
- offset) ? (rxvq->desc[desc_current].len - offset) : bytes_left;
+ u16 bytes_to_copy = bytes_left;
+ bytes_to_copy =
+ (bytes_to_copy > buffer_len) ? buffer_len : bytes_to_copy;
clib_memcpy (buffer_addr,
vlib_buffer_get_current (current_b0) +
current_b0->current_length - bytes_left,
bytes_to_copy);
vhost_user_log_dirty_pages (vui,
- rxvq->desc[desc_current].addr + offset,
+ desc_table[desc_index].addr +
+ desc_table[desc_index].len -
+ bytes_left - bytes_to_copy,
bytes_to_copy);
+
bytes_left -= bytes_to_copy;
- offset += bytes_to_copy;
+ buffer_len -= bytes_to_copy;
buffer_addr += bytes_to_copy;
desc_len += bytes_to_copy;
}
+ if (PREDICT_TRUE (n_left >= 2))
+ {
+ vlib_prefetch_buffer_with_index (vm, buffers[1], STORE);
+ CLIB_PREFETCH (&n_left, sizeof (n_left), STORE);
+ }
+
//Move from available to used ring
- rxvq->used->ring[used_index & qsz_mask].id = desc_chain_head;
- rxvq->used->ring[used_index & qsz_mask].len = desc_len;
- vhost_user_log_dirty_ring (vui, rxvq, ring[used_index & qsz_mask]);
+ rxvq->used->ring[rxvq->last_used_idx & qsz_mask].id = desc_head;
+ rxvq->used->ring[rxvq->last_used_idx & qsz_mask].len = desc_len;
+ vhost_user_log_dirty_ring (vui, rxvq,
+ ring[rxvq->last_used_idx & qsz_mask]);
rxvq->last_avail_idx++;
- used_index++;
+ rxvq->last_used_idx++;
+
+ n_left--; //At the end for error counting when 'goto done' is invoked
}
done:
CLIB_MEMORY_BARRIER ();
- rxvq->used->idx = used_index;
+ rxvq->used->idx = rxvq->last_used_idx;
vhost_user_log_dirty_ring (vui, rxvq, idx);
/* interrupt (call) handling */
@@ -2221,8 +2307,8 @@ show_vhost_user_command_fn (vlib_main_t * vm,
vui->vrings[q].desc[j].next,
pointer_to_uword (map_guest_mem
(vui,
- vui->vrings[q].
- desc[j].addr)));
+ vui->vrings[q].desc[j].
+ addr)));
}
}
}
diff --git a/vnet/vnet/devices/virtio/vhost-user.h b/vnet/vnet/devices/virtio/vhost-user.h
index 4934a8c0c1e..fa41403874b 100644
--- a/vnet/vnet/devices/virtio/vhost-user.h
+++ b/vnet/vnet/devices/virtio/vhost-user.h
@@ -24,6 +24,7 @@
#define VHOST_NET_VRING_NUM 2
#define VIRTQ_DESC_F_NEXT 1
+#define VIRTQ_DESC_F_INDIRECT 4
#define VHOST_USER_REPLY_MASK (0x1 << 2)
#define VHOST_USER_PROTOCOL_F_MQ 0
@@ -51,6 +52,7 @@
#define foreach_virtio_net_feature \
_ (VIRTIO_NET_F_MRG_RXBUF, 15) \
_ (VIRTIO_F_ANY_LAYOUT, 27) \
+ _ (VIRTIO_F_INDIRECT_DESC, 28) \
_ (VHOST_F_LOG_ALL, 26) \
_ (VIRTIO_NET_F_GUEST_ANNOUNCE, 21) \
_ (VHOST_USER_F_PROTOCOL_FEATURES, 30)