summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLuca Boccassi <luca.boccassi@gmail.com>2018-04-23 14:17:55 +0100
committerLuca Boccassi <luca.boccassi@gmail.com>2018-04-23 14:18:31 +0100
commit18af4227fa5eee002b1a79207935620f6112803e (patch)
tree2478ed36888eb88a9bcd49fd025f86ee9cab2bef
parent39157ec04095ab012d11db23c462844634bfbb8f (diff)
New upstream version 16.11.6upstream/16.11.6
Change-Id: I7c0e5e32dc051256867f1db5600e269b4b917106 Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
-rw-r--r--doc/guides/rel_notes/release_16_11.rst11
-rw-r--r--lib/librte_eal/common/include/rte_version.h2
-rw-r--r--lib/librte_vhost/vhost.h19
-rw-r--r--lib/librte_vhost/vhost_user.c49
-rw-r--r--lib/librte_vhost/virtio_net.c355
-rw-r--r--pkg/dpdk.spec2
6 files changed, 378 insertions, 60 deletions
diff --git a/doc/guides/rel_notes/release_16_11.rst b/doc/guides/rel_notes/release_16_11.rst
index 3289c3c9..e398e291 100644
--- a/doc/guides/rel_notes/release_16_11.rst
+++ b/doc/guides/rel_notes/release_16_11.rst
@@ -1160,3 +1160,14 @@ Fixes in 16.11 LTS Release
* vhost: fix error code check when creating thread
* vhost: fix mbuf free
* vhost: protect active rings from async ring changes
+
+16.11.6
+~~~~~~~
+
+* vhost: add support for non-contiguous indirect descs tables (fixes CVE-2018-1059)
+* vhost: check all range is mapped when translating GPAs (fixes CVE-2018-1059)
+* vhost: ensure all range is mapped when translating QVAs (fixes CVE-2018-1059)
+* vhost: handle virtually non-contiguous buffers in Rx (fixes CVE-2018-1059)
+* vhost: handle virtually non-contiguous buffers in Rx-mrg (fixes CVE-2018-1059)
+* vhost: handle virtually non-contiguous buffers in Tx (fixes CVE-2018-1059)
+* vhost-user: fix deadlock in case of NUMA realloc
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index 4a9f4821..17a00538 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -66,7 +66,7 @@ extern "C" {
/**
* Patch level number i.e. the z in yy.mm.z
*/
-#define RTE_VER_MINOR 5
+#define RTE_VER_MINOR 6
/**
* Extra string to be appended to version number
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 9f60ff81..c49db0c0 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -225,19 +225,24 @@ extern struct virtio_net *vhost_devices[MAX_VHOST_DEVICE];
/* Convert guest physical Address to host virtual address */
static inline uint64_t __attribute__((always_inline))
-gpa_to_vva(struct virtio_net *dev, uint64_t gpa)
+gpa_to_vva(struct virtio_net *dev, uint64_t gpa, uint64_t *len)
{
- struct virtio_memory_region *reg;
+ struct virtio_memory_region *r;
uint32_t i;
for (i = 0; i < dev->mem->nregions; i++) {
- reg = &dev->mem->regions[i];
- if (gpa >= reg->guest_phys_addr &&
- gpa < reg->guest_phys_addr + reg->size) {
- return gpa - reg->guest_phys_addr +
- reg->host_user_addr;
+ r = &dev->mem->regions[i];
+ if (gpa >= r->guest_phys_addr &&
+ gpa < r->guest_phys_addr + r->size) {
+
+ if (unlikely(*len > r->guest_phys_addr + r->size - gpa))
+ *len = r->guest_phys_addr + r->size - gpa;
+
+ return gpa - r->guest_phys_addr +
+ r->host_user_addr;
}
}
+ *len = 0;
return 0;
}
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index 80348dbf..550a1329 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -303,21 +303,26 @@ numa_realloc(struct virtio_net *dev, int index __rte_unused)
* used to convert the ring addresses to our address space.
*/
static uint64_t
-qva_to_vva(struct virtio_net *dev, uint64_t qva)
+qva_to_vva(struct virtio_net *dev, uint64_t qva, uint64_t *len)
{
- struct virtio_memory_region *reg;
+ struct virtio_memory_region *r;
uint32_t i;
/* Find the region where the address lives. */
for (i = 0; i < dev->mem->nregions; i++) {
- reg = &dev->mem->regions[i];
+ r = &dev->mem->regions[i];
+
+ if (qva >= r->guest_user_addr &&
+ qva < r->guest_user_addr + r->size) {
- if (qva >= reg->guest_user_addr &&
- qva < reg->guest_user_addr + reg->size) {
- return qva - reg->guest_user_addr +
- reg->host_user_addr;
+ if (unlikely(*len > r->guest_user_addr + r->size - qva))
+ *len = r->guest_user_addr + r->size - qva;
+
+ return qva - r->guest_user_addr +
+ r->host_user_addr;
}
}
+ *len = 0;
return 0;
}
@@ -327,9 +332,12 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva)
* This function then converts these to our address space.
*/
static int
-vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr)
+vhost_user_set_vring_addr(struct virtio_net **pdev,
+ struct vhost_vring_addr *addr)
{
struct vhost_virtqueue *vq;
+ struct virtio_net *dev = *pdev;
+ uint64_t size, req_size;
if (dev->mem == NULL)
return -1;
@@ -338,30 +346,39 @@ vhost_user_set_vring_addr(struct virtio_net *dev, struct vhost_vring_addr *addr)
vq = dev->virtqueue[addr->index];
/* The addresses are converted from QEMU virtual to Vhost virtual. */
+ req_size = sizeof(struct vring_desc) * vq->size;
+ size = req_size;
vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev,
- addr->desc_user_addr);
- if (vq->desc == 0) {
+ addr->desc_user_addr, &size);
+ if (vq->desc == 0 || size != req_size) {
RTE_LOG(ERR, VHOST_CONFIG,
- "(%d) failed to find desc ring address.\n",
+ "(%d) failed to map desc ring address.\n",
dev->vid);
return -1;
}
dev = numa_realloc(dev, addr->index);
+ *pdev = dev;
+
vq = dev->virtqueue[addr->index];
+ req_size = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size;
+ size = req_size;
vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev,
- addr->avail_user_addr);
- if (vq->avail == 0) {
+ addr->avail_user_addr, &size);
+ if (vq->avail == 0 || size != req_size) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) failed to find avail ring address.\n",
dev->vid);
return -1;
}
+ req_size = sizeof(struct vring_used);
+ req_size += sizeof(struct vring_used_elem) * vq->size;
+ size = req_size;
vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev,
- addr->used_user_addr);
- if (vq->used == 0) {
+ addr->used_user_addr, &size);
+ if (vq->used == 0 || size != req_size) {
RTE_LOG(ERR, VHOST_CONFIG,
"(%d) failed to find used ring address.\n",
dev->vid);
@@ -1092,7 +1109,7 @@ vhost_user_msg_handler(int vid, int fd)
vhost_user_set_vring_num(dev, &msg.payload.state);
break;
case VHOST_USER_SET_VRING_ADDR:
- vhost_user_set_vring_addr(dev, &msg.payload.addr);
+ vhost_user_set_vring_addr(&dev, &msg.payload.addr);
break;
case VHOST_USER_SET_VRING_BASE:
vhost_user_set_vring_base(dev, &msg.payload.state);
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 0024f729..745cc53f 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -45,6 +45,7 @@
#include <rte_sctp.h>
#include <rte_arp.h>
#include <rte_spinlock.h>
+#include <rte_malloc.h>
#include "vhost.h"
@@ -101,6 +102,44 @@ is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb)
return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM;
}
+static inline struct vring_desc *__attribute__((always_inline))
+alloc_copy_ind_table(struct virtio_net *dev, struct vring_desc *desc)
+{
+ struct vring_desc *idesc;
+ uint64_t src, dst;
+ uint64_t len, remain = desc->len;
+ uint64_t desc_addr = desc->addr;
+
+ idesc = rte_malloc(__func__, desc->len, 0);
+ if (unlikely(!idesc))
+ return 0;
+
+ dst = (uint64_t)(uintptr_t)idesc;
+
+ while (remain) {
+ len = remain;
+ src = gpa_to_vva(dev, desc_addr, &len);
+ if (unlikely(!src || !len)) {
+ rte_free(idesc);
+ return 0;
+ }
+
+ rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len);
+
+ remain -= len;
+ dst += len;
+ desc_addr += len;
+ }
+
+ return idesc;
+}
+
+static inline void __attribute__((always_inline))
+free_ind_table(struct vring_desc *idesc)
+{
+ rte_free(idesc);
+}
+
static inline void __attribute__((always_inline))
do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint16_t to, uint16_t from, uint16_t size)
@@ -216,14 +255,17 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
uint32_t desc_avail, desc_offset;
uint32_t mbuf_avail, mbuf_offset;
uint32_t cpy_len;
+ uint64_t desc_chunck_len;
struct vring_desc *desc;
- uint64_t desc_addr;
+ uint64_t desc_addr, desc_gaddr;
struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
/* A counter to avoid desc dead loop chain */
uint16_t nr_desc = 1;
desc = &descs[desc_idx];
- desc_addr = gpa_to_vva(dev, desc->addr);
+ desc_chunck_len = desc->len;
+ desc_gaddr = desc->addr;
+ desc_addr = gpa_to_vva(dev, desc_gaddr, &desc_chunck_len);
/*
* Checking of 'desc_addr' placed outside of 'unlikely' macro to avoid
* performance issue with some versions of gcc (4.8.4 and 5.3.0) which
@@ -235,12 +277,51 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
rte_prefetch0((void *)(uintptr_t)desc_addr);
virtio_enqueue_offload(m, &virtio_hdr.hdr);
- copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
- vhost_log_write(dev, desc->addr, dev->vhost_hlen);
- PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
+ if (likely(desc_chunck_len >= dev->vhost_hlen)) {
+ copy_virtio_net_hdr(dev, desc_addr, virtio_hdr);
+
+ virtio_enqueue_offload(m,
+ (struct virtio_net_hdr *)(uintptr_t)desc_addr);
+ PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
+ } else {
+ uint64_t remain = dev->vhost_hlen;
+ uint64_t len;
+ uint64_t src = (uint64_t)(uintptr_t)&virtio_hdr, dst;
+ uint64_t guest_addr = desc_gaddr;
+
+ while (remain) {
+ len = remain;
+ dst = gpa_to_vva(dev, guest_addr, &len);
+ if (unlikely(!dst || !len))
+ return -1;
+
+ rte_memcpy((void *)(uintptr_t)dst,
+ (void *)(uintptr_t)src, len);
+
+ PRINT_PACKET(dev, (uintptr_t)dst, len, 0);
+ remain -= len;
+ guest_addr += len;
+ dst += len;
+ }
+ }
+
+ vhost_log_write(dev, desc_gaddr, dev->vhost_hlen);
- desc_offset = dev->vhost_hlen;
desc_avail = desc->len - dev->vhost_hlen;
+ if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
+ desc_chunck_len = desc_avail;
+ desc_gaddr += dev->vhost_hlen;
+ desc_addr = gpa_to_vva(dev,
+ desc_gaddr,
+ &desc_chunck_len);
+ if (unlikely(!desc_addr))
+ return -1;
+
+ desc_offset = 0;
+ } else {
+ desc_offset = dev->vhost_hlen;
+ desc_chunck_len -= dev->vhost_hlen;
+ }
mbuf_avail = rte_pktmbuf_data_len(m);
mbuf_offset = 0;
@@ -263,19 +344,31 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
return -1;
desc = &descs[desc->next];
- desc_addr = gpa_to_vva(dev, desc->addr);
+ desc_chunck_len = desc->len;
+ desc_gaddr = desc->addr;
+ desc_addr = gpa_to_vva(dev,
+ desc_gaddr, &desc_chunck_len);
if (unlikely(!desc_addr))
return -1;
desc_offset = 0;
desc_avail = desc->len;
+ } else if (unlikely(desc_chunck_len == 0)) {
+ desc_chunck_len = desc_avail;
+ desc_gaddr += desc_offset;
+ desc_addr = gpa_to_vva(dev,
+ desc_gaddr, &desc_chunck_len);
+ if (unlikely(!desc_addr))
+ return -1;
+
+ desc_offset = 0;
}
cpy_len = RTE_MIN(desc_avail, mbuf_avail);
rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
cpy_len);
- vhost_log_write(dev, desc->addr + desc_offset, cpy_len);
+ vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len);
PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
cpy_len, 0);
@@ -283,6 +376,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs,
mbuf_offset += cpy_len;
desc_avail -= cpy_len;
desc_offset += cpy_len;
+ desc_chunck_len -= cpy_len;
}
return 0;
@@ -305,6 +399,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
struct vring_desc *descs;
uint16_t used_idx;
uint32_t i, sz;
+ uint64_t dlen;
LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) {
@@ -346,17 +441,32 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
rte_prefetch0(&vq->desc[desc_indexes[0]]);
for (i = 0; i < count; i++) {
+ struct vring_desc *idesc = NULL;
uint16_t desc_idx = desc_indexes[i];
int err;
if (vq->desc[desc_idx].flags & VRING_DESC_F_INDIRECT) {
+ dlen = vq->desc[desc_idx].len;
descs = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev,
- vq->desc[desc_idx].addr);
+ vq->desc[desc_idx].addr, &dlen);
if (unlikely(!descs)) {
count = i;
break;
}
+ if (unlikely(dlen < vq->desc[desc_idx].len)) {
+ /*
+ * The indirect desc table is not contiguous
+ * in process VA space, we have to copy it.
+ */
+ idesc = alloc_copy_ind_table(dev,
+ &vq->desc[desc_idx]);
+ if (unlikely(!idesc))
+ break;
+
+ descs = idesc;
+ }
+
desc_idx = 0;
sz = vq->desc[desc_idx].len / sizeof(*descs);
} else {
@@ -375,6 +485,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
if (i + 1 < count)
rte_prefetch0(&vq->desc[desc_indexes[i+1]]);
+
+ if (unlikely(!!idesc))
+ free_ind_table(idesc);
}
rte_smp_wmb();
@@ -408,22 +521,40 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
uint32_t vec_id = *vec_idx;
uint32_t len = 0;
+ uint64_t dlen;
struct vring_desc *descs = vq->desc;
+ struct vring_desc *idesc = NULL;
*desc_chain_head = idx;
if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) {
+ dlen = vq->desc[idx].len;
descs = (struct vring_desc *)(uintptr_t)
- gpa_to_vva(dev, vq->desc[idx].addr);
+ gpa_to_vva(dev, vq->desc[idx].addr,
+ &dlen);
if (unlikely(!descs))
return -1;
+ if (unlikely(dlen < vq->desc[idx].len)) {
+ /*
+ * The indirect desc table is not contiguous
+ * in process VA space, we have to copy it.
+ */
+ idesc = alloc_copy_ind_table(dev, &vq->desc[idx]);
+ if (unlikely(!idesc))
+ return -1;
+
+ descs = idesc;
+ }
+
idx = 0;
}
while (1) {
- if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size))
+ if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size)) {
+ free_ind_table(idesc);
return -1;
+ }
len += descs[idx].len;
buf_vec[vec_id].buf_addr = descs[idx].addr;
@@ -440,6 +571,9 @@ fill_vec_buf(struct virtio_net *dev, struct vhost_virtqueue *vq,
*desc_chain_len = len;
*vec_idx = vec_id;
+ if (unlikely(!!idesc))
+ free_ind_table(idesc);
+
return 0;
}
@@ -493,8 +627,10 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
struct buf_vector *buf_vec, uint16_t num_buffers)
{
struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0};
+ struct virtio_net_hdr_mrg_rxbuf *hdr;
uint32_t vec_idx = 0;
- uint64_t desc_addr;
+ uint64_t desc_addr, desc_gaddr;
+ uint64_t desc_chunck_len;
uint32_t mbuf_offset, mbuf_avail;
uint32_t desc_offset, desc_avail;
uint32_t cpy_len;
@@ -504,12 +640,19 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
if (unlikely(m == NULL))
return -1;
- desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
- if (buf_vec[vec_idx].buf_len < dev->vhost_hlen || !desc_addr)
+ desc_chunck_len = buf_vec[vec_idx].buf_len;
+ desc_gaddr = buf_vec[vec_idx].buf_addr;
+ desc_addr = gpa_to_vva(dev, desc_gaddr, &desc_chunck_len);
+ if (buf_vec[vec_idx].buf_len < dev->vhost_hlen ||
+ !desc_addr)
return -1;
hdr_mbuf = m;
hdr_addr = desc_addr;
+ if (unlikely(desc_chunck_len < dev->vhost_hlen))
+ hdr = &virtio_hdr;
+ else
+ hdr = (struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)hdr_addr;
hdr_phys_addr = buf_vec[vec_idx].buf_addr;
rte_prefetch0((void *)(uintptr_t)hdr_addr);
@@ -518,7 +661,21 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
dev->vid, num_buffers);
desc_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen;
- desc_offset = dev->vhost_hlen;
+ if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
+ desc_chunck_len = desc_avail;
+ desc_gaddr += dev->vhost_hlen;
+ desc_addr = gpa_to_vva(dev,
+ desc_gaddr,
+ &desc_chunck_len);
+ if (unlikely(!desc_addr))
+ return -1;
+
+ desc_offset = 0;
+ } else {
+ desc_offset = dev->vhost_hlen;
+ desc_chunck_len -= dev->vhost_hlen;
+ }
+
mbuf_avail = rte_pktmbuf_data_len(m);
mbuf_offset = 0;
@@ -526,7 +683,10 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
/* done with current desc buf, get the next one */
if (desc_avail == 0) {
vec_idx++;
- desc_addr = gpa_to_vva(dev, buf_vec[vec_idx].buf_addr);
+ desc_gaddr = buf_vec[vec_idx].buf_addr;
+ desc_chunck_len = buf_vec[vec_idx].buf_len;
+ desc_addr = gpa_to_vva(dev, desc_gaddr,
+ &desc_chunck_len);
if (unlikely(!desc_addr))
return -1;
@@ -534,6 +694,16 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
rte_prefetch0((void *)(uintptr_t)desc_addr);
desc_offset = 0;
desc_avail = buf_vec[vec_idx].buf_len;
+ } else if (unlikely(desc_chunck_len == 0)) {
+ desc_chunck_len = desc_avail;
+ desc_gaddr += desc_offset;
+ desc_addr = gpa_to_vva(dev,
+ desc_gaddr,
+ &desc_chunck_len);
+ if (unlikely(!desc_addr))
+ return -1;
+
+ desc_offset = 0;
}
/* done with current mbuf, get the next one */
@@ -546,7 +716,33 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
if (hdr_addr) {
virtio_enqueue_offload(hdr_mbuf, &virtio_hdr.hdr);
- copy_virtio_net_hdr(dev, hdr_addr, virtio_hdr);
+ if (likely(hdr != &virtio_hdr)) {
+ copy_virtio_net_hdr(dev, hdr_addr, virtio_hdr);
+ } else {
+ uint64_t len;
+ uint64_t remain = dev->vhost_hlen;
+ uint64_t src = (uint64_t)(uintptr_t)&virtio_hdr;
+ uint64_t dst;
+ uint64_t guest_addr = hdr_phys_addr;
+
+ while (remain) {
+ len = remain;
+ dst = gpa_to_vva(dev, guest_addr, &len);
+ if (unlikely(!dst || !len))
+ return -1;
+
+ rte_memcpy((void *)(uintptr_t)dst,
+ (void *)(uintptr_t)src,
+ len);
+
+ PRINT_PACKET(dev, (uintptr_t)dst,
+ len, 0);
+
+ remain -= len;
+ guest_addr += len;
+ dst += len;
+ }
+ }
vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen);
PRINT_PACKET(dev, (uintptr_t)hdr_addr,
dev->vhost_hlen, 0);
@@ -554,12 +750,11 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
hdr_addr = 0;
}
- cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+ cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)),
rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
cpy_len);
- vhost_log_write(dev, buf_vec[vec_idx].buf_addr + desc_offset,
- cpy_len);
+ vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len);
PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
cpy_len, 0);
@@ -567,6 +762,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m,
mbuf_offset += cpy_len;
desc_avail -= cpy_len;
desc_offset += cpy_len;
+ desc_chunck_len -= cpy_len;
}
return 0;
@@ -823,11 +1019,13 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
struct rte_mempool *mbuf_pool)
{
struct vring_desc *desc;
- uint64_t desc_addr;
+ uint64_t desc_addr, desc_gaddr;
uint32_t desc_avail, desc_offset;
uint32_t mbuf_avail, mbuf_offset;
uint32_t cpy_len;
+ uint64_t desc_chunck_len;
struct rte_mbuf *cur = m, *prev = m;
+ struct virtio_net_hdr tmp_hdr;
struct virtio_net_hdr *hdr = NULL;
/* A counter to avoid desc dead loop chain */
uint32_t nr_desc = 1;
@@ -837,13 +1035,43 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
(desc->flags & VRING_DESC_F_INDIRECT))
return -1;
- desc_addr = gpa_to_vva(dev, desc->addr);
+ desc_chunck_len = desc->len;
+ desc_gaddr = desc->addr;
+ desc_addr = gpa_to_vva(dev, desc_gaddr, &desc_chunck_len);
if (unlikely(!desc_addr))
return -1;
if (virtio_net_with_host_offload(dev)) {
- hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
- rte_prefetch0(hdr);
+ if (unlikely(desc_chunck_len < sizeof(struct virtio_net_hdr))) {
+ uint64_t len = desc_chunck_len;
+ uint64_t remain = sizeof(struct virtio_net_hdr);
+ uint64_t src = desc_addr;
+ uint64_t dst = (uint64_t)(uintptr_t)&tmp_hdr;
+ uint64_t guest_addr = desc_gaddr;
+
+ /*
+ * No luck, the virtio-net header doesn't fit
+ * in a contiguous virtual area.
+ */
+ while (remain) {
+ len = remain;
+ src = gpa_to_vva(dev, guest_addr, &len);
+ if (unlikely(!src || !len))
+ return -1;
+
+ rte_memcpy((void *)(uintptr_t)dst,
+ (void *)(uintptr_t)src, len);
+
+ guest_addr += len;
+ remain -= len;
+ dst += len;
+ }
+
+ hdr = &tmp_hdr;
+ } else {
+ hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr);
+ rte_prefetch0(hdr);
+ }
}
/*
@@ -857,7 +1085,9 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
return -1;
- desc_addr = gpa_to_vva(dev, desc->addr);
+ desc_chunck_len = desc->len;
+ desc_gaddr = desc->addr;
+ desc_addr = gpa_to_vva(dev, desc_gaddr, &desc_chunck_len);
if (unlikely(!desc_addr))
return -1;
@@ -866,19 +1096,34 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
nr_desc += 1;
} else {
desc_avail = desc->len - dev->vhost_hlen;
- desc_offset = dev->vhost_hlen;
+
+ if (unlikely(desc_chunck_len < dev->vhost_hlen)) {
+ desc_chunck_len = desc_avail;
+ desc_gaddr += dev->vhost_hlen;
+ desc_addr = gpa_to_vva(dev,
+ desc_gaddr,
+ &desc_chunck_len);
+ if (unlikely(!desc_addr))
+ return -1;
+
+ desc_offset = 0;
+ } else {
+ desc_offset = dev->vhost_hlen;
+ desc_chunck_len -= dev->vhost_hlen;
+ }
}
rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset));
- PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), desc_avail, 0);
+ PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
+ desc_chunck_len, 0);
mbuf_offset = 0;
mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM;
while (1) {
uint64_t hpa;
- cpy_len = RTE_MIN(desc_avail, mbuf_avail);
+ cpy_len = RTE_MIN(desc_chunck_len, mbuf_avail);
/*
* A desc buf might across two host physical pages that are
@@ -886,11 +1131,11 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
* will be copied even though zero copy is enabled.
*/
if (unlikely(dev->dequeue_zero_copy && (hpa = gpa_to_hpa(dev,
- desc->addr + desc_offset, cpy_len)))) {
+ desc_gaddr + desc_offset, cpy_len)))) {
cur->data_len = cpy_len;
cur->data_off = 0;
- cur->buf_addr = (void *)(uintptr_t)(desc_addr
- + desc_offset);
+ cur->buf_addr = (void *)(uintptr_t)(desc_gaddr
+ + desc_offset);
cur->buf_physaddr = hpa;
/*
@@ -908,6 +1153,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
mbuf_avail -= cpy_len;
mbuf_offset += cpy_len;
desc_avail -= cpy_len;
+ desc_chunck_len -= cpy_len;
desc_offset += cpy_len;
/* This desc reaches to its end, get the next one */
@@ -922,7 +1168,10 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
if (unlikely(desc->flags & VRING_DESC_F_INDIRECT))
return -1;
- desc_addr = gpa_to_vva(dev, desc->addr);
+ desc_chunck_len = desc->len;
+ desc_gaddr = desc->addr;
+ desc_addr = gpa_to_vva(dev, desc_gaddr,
+ &desc_chunck_len);
if (unlikely(!desc_addr))
return -1;
@@ -931,7 +1180,21 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
desc_offset = 0;
desc_avail = desc->len;
- PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0);
+ PRINT_PACKET(dev, (uintptr_t)desc_addr,
+ desc_chunck_len, 0);
+ } else if (unlikely(desc_chunck_len == 0)) {
+ desc_chunck_len = desc_avail;
+ desc_gaddr += desc_offset;
+ desc_addr = gpa_to_vva(dev,
+ desc_gaddr,
+ &desc_chunck_len);
+ if (unlikely(!desc_addr))
+ return -1;
+
+ desc_offset = 0;
+
+ PRINT_PACKET(dev, (uintptr_t)desc_addr,
+ desc_chunck_len, 0);
}
/*
@@ -1180,19 +1443,35 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
/* Prefetch descriptor index. */
rte_prefetch0(&vq->desc[desc_indexes[0]]);
for (i = 0; i < count; i++) {
- struct vring_desc *desc;
+ struct vring_desc *desc, *idesc = NULL;
uint16_t sz, idx;
+ uint64_t dlen;
int err;
if (likely(i + 1 < count))
rte_prefetch0(&vq->desc[desc_indexes[i + 1]]);
if (vq->desc[desc_indexes[i]].flags & VRING_DESC_F_INDIRECT) {
+ dlen = vq->desc[desc_indexes[i]].len;
desc = (struct vring_desc *)(uintptr_t)gpa_to_vva(dev,
- vq->desc[desc_indexes[i]].addr);
+ vq->desc[desc_indexes[i]].addr,
+ &dlen);
if (unlikely(!desc))
break;
+ if (unlikely(dlen < vq->desc[desc_indexes[i]].len)) {
+ /*
+ * The indirect desc table is not contiguous
+ * in process VA space, we have to copy it.
+ */
+ idesc = alloc_copy_ind_table(dev,
+ &vq->desc[desc_indexes[i]]);
+ if (unlikely(!idesc))
+ break;
+
+ desc = idesc;
+ }
+
rte_prefetch0(desc);
sz = vq->desc[desc_indexes[i]].len / sizeof(*desc);
idx = 0;
@@ -1206,12 +1485,14 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
if (unlikely(pkts[i] == NULL)) {
RTE_LOG(ERR, VHOST_DATA,
"Failed to allocate memory for mbuf.\n");
+ free_ind_table(idesc);
break;
}
err = copy_desc_to_mbuf(dev, desc, sz, pkts[i], idx, mbuf_pool);
if (unlikely(err)) {
rte_pktmbuf_free(pkts[i]);
+ free_ind_table(idesc);
break;
}
@@ -1221,6 +1502,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
zmbuf = get_zmbuf(vq);
if (!zmbuf) {
rte_pktmbuf_free(pkts[i]);
+ free_ind_table(idesc);
break;
}
zmbuf->mbuf = pkts[i];
@@ -1237,6 +1519,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
vq->nr_zmbuf += 1;
TAILQ_INSERT_TAIL(&vq->zmbuf_list, zmbuf, next);
}
+
+ if (unlikely(!!idesc))
+ free_ind_table(idesc);
}
vq->last_avail_idx += i;
diff --git a/pkg/dpdk.spec b/pkg/dpdk.spec
index 76bab303..3582066b 100644
--- a/pkg/dpdk.spec
+++ b/pkg/dpdk.spec
@@ -30,7 +30,7 @@
# OF THE POSSIBILITY OF SUCH DAMAGE.
Name: dpdk
-Version: 16.11.5
+Version: 16.11.6
Release: 1
Packager: packaging@6wind.com
URL: http://dpdk.org