summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/librte_cryptodev/rte_crypto_sym.h11
-rw-r--r--lib/librte_cryptodev/rte_cryptodev_pmd.h8
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_pci.c6
-rw-r--r--lib/librte_eal/bsdapp/nic_uio/nic_uio.c44
-rw-r--r--lib/librte_eal/common/include/rte_dev.h14
-rw-r--r--lib/librte_eal/common/include/rte_version.h2
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_interrupts.c4
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_uio.c1
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_vfio.c3
-rw-r--r--lib/librte_eal/linuxapp/kni/compat.h6
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_dev.h6
-rw-r--r--lib/librte_ether/rte_ethdev.c2
-rw-r--r--lib/librte_ether/rte_ethdev.h9
-rw-r--r--lib/librte_kni/rte_kni.c46
-rw-r--r--lib/librte_kni/rte_kni_fifo.h9
-rw-r--r--lib/librte_mbuf/rte_mbuf_ptype.h3
-rw-r--r--lib/librte_net/rte_ether.h4
-rw-r--r--lib/librte_vhost/socket.c48
-rw-r--r--lib/librte_vhost/vhost.c2
-rw-r--r--lib/librte_vhost/vhost.h18
-rw-r--r--lib/librte_vhost/virtio_net.c18
21 files changed, 192 insertions, 72 deletions
diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h
index d3d38e4f..8f81d254 100644
--- a/lib/librte_cryptodev/rte_crypto_sym.h
+++ b/lib/librte_cryptodev/rte_crypto_sym.h
@@ -276,11 +276,10 @@ struct rte_crypto_auth_xform {
* this specifies the length of the digest to be compared for the
* session.
*
+ * It is the caller's responsibility to ensure that the
+ * digest length is compliant with the hash algorithm being used.
* If the value is less than the maximum length allowed by the hash,
- * the result shall be truncated. If the value is greater than the
- * maximum length allowed by the hash then an error will be generated
- * by *rte_cryptodev_sym_session_create* or by the
- * *rte_cryptodev_sym_enqueue_burst* if using session-less APIs.
+ * the result shall be truncated.
*/
uint32_t add_auth_data_length;
@@ -571,7 +570,9 @@ struct rte_crypto_sym_op {
phys_addr_t phys_addr;
/**< Physical address of digest */
uint16_t length;
- /**< Length of digest */
+ /**< Length of digest. This must be the same value as
+ * @ref rte_crypto_auth_xform.digest_length.
+ */
} digest; /**< Digest parameters */
struct {
diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h
index c6a57945..e82dcf07 100644
--- a/lib/librte_cryptodev/rte_cryptodev_pmd.h
+++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h
@@ -57,14 +57,6 @@ extern "C" {
#include "rte_crypto.h"
#include "rte_cryptodev.h"
-
-#ifdef RTE_LIBRTE_CRYPTODEV_DEBUG
-#define RTE_PMD_DEBUG_TRACE(...) \
- rte_pmd_debug_trace(__func__, __VA_ARGS__)
-#else
-#define RTE_PMD_DEBUG_TRACE(...)
-#endif
-
struct rte_cryptodev_session {
RTE_STD_C11
struct {
diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
index 8b3ed881..f1de16ea 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -323,6 +323,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
/* device is valid, add in list (sorted) */
if (TAILQ_EMPTY(&pci_device_list)) {
+ rte_eal_device_insert(&dev->device);
TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
}
else {
@@ -335,7 +336,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
continue;
else if (ret < 0) {
TAILQ_INSERT_BEFORE(dev2, dev, next);
- return 0;
+ rte_eal_device_insert(&dev->device);
} else { /* already registered */
dev2->kdrv = dev->kdrv;
dev2->max_vfs = dev->max_vfs;
@@ -343,9 +344,10 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
dev->mem_resource,
sizeof(dev->mem_resource));
free(dev);
- return 0;
}
+ return 0;
}
+ rte_eal_device_insert(&dev->device);
TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
}
diff --git a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c
index 99a4975c..4bd7545a 100644
--- a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c
+++ b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c
@@ -180,6 +180,10 @@ nic_uio_probe (device_t dev)
unsigned int device = pci_get_slot(dev);
unsigned int function = pci_get_function(dev);
+ char bdf_str[256];
+ char *token, *remaining;
+
+ /* First check if we found this on load */
for (i = 0; i < num_detached; i++)
if (bus == pci_get_bus(detached_devices[i]) &&
device == pci_get_slot(detached_devices[i]) &&
@@ -188,6 +192,45 @@ nic_uio_probe (device_t dev)
return BUS_PROBE_SPECIFIC;
}
+ /* otherwise check if it's a new device and if it matches the BDF */
+ memset(bdf_str, 0, sizeof(bdf_str));
+ TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str));
+ remaining = bdf_str;
+ while (1) {
+ if (remaining == NULL || remaining[0] == '\0')
+ break;
+ token = strsep(&remaining, ",:");
+ if (token == NULL)
+ break;
+ bus = strtol(token, NULL, 10);
+ token = strsep(&remaining, ",:");
+ if (token == NULL)
+ break;
+ device = strtol(token, NULL, 10);
+ token = strsep(&remaining, ",:");
+ if (token == NULL)
+ break;
+ function = strtol(token, NULL, 10);
+
+ if (bus == pci_get_bus(dev) &&
+ device == pci_get_slot(dev) &&
+ function == pci_get_function(dev)) {
+
+ if (num_detached < MAX_DETACHED_DEVICES) {
+ printf("%s: probed dev=%p\n",
+ __func__, dev);
+ detached_devices[num_detached++] = dev;
+ device_set_desc(dev, "DPDK PCI Device");
+ return BUS_PROBE_SPECIFIC;
+ } else {
+ printf("%s: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n",
+ __func__, MAX_DETACHED_DEVICES,
+ dev);
+ break;
+ }
+ }
+ }
+
return ENXIO;
}
@@ -248,6 +291,7 @@ nic_uio_load(void)
memset(bdf_str, 0, sizeof(bdf_str));
TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str));
remaining = bdf_str;
+ printf("nic_uio: hw.nic_uio.bdfs = '%s'\n", bdf_str);
/*
* Users should specify PCI BDFs in the format "b:d:f,b:d:f,b:d:f".
* But the code below does not try differentiate between : and ,
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index 8840380d..dcf33d0e 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -49,6 +49,7 @@ extern "C" {
#include <stdio.h>
#include <sys/queue.h>
+#include <rte_config.h>
#include <rte_log.h>
__attribute__((format(printf, 2, 0)))
@@ -70,6 +71,19 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", func_name, buffer);
}
+/*
+ * Enable RTE_PMD_DEBUG_TRACE() when at least one component relying on the
+ * RTE_*_RET() macros defined below is compiled in debug mode.
+ */
+#if defined(RTE_LIBRTE_ETHDEV_DEBUG) || \
+ defined(RTE_LIBRTE_CRYPTODEV_DEBUG) || \
+ defined(RTE_LIBRTE_EVENTDEV_DEBUG)
+#define RTE_PMD_DEBUG_TRACE(...) \
+ rte_pmd_debug_trace(__func__, __VA_ARGS__)
+#else
+#define RTE_PMD_DEBUG_TRACE(...) (void)0
+#endif
+
/* Macros for checking for restricting functions to primary instance only */
#define RTE_PROC_PRIMARY_OR_ERR_RET(retval) do { \
if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index 0de35fb7..5bbe906b 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -66,7 +66,7 @@ extern "C" {
/**
* Patch level number i.e. the z in yy.mm.z
*/
-#define RTE_VER_MINOR 1
+#define RTE_VER_MINOR 2
/**
* Extra string to be appended to version number
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 47a3b20a..368863f9 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -194,14 +194,14 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) {
irq_set = (struct vfio_irq_set *) irq_set_buf;
irq_set->argsz = len;
irq_set->count = 1;
- irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK;
irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
irq_set->start = 0;
ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
if (ret) {
- RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n",
+ RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n",
intr_handle->fd);
return -1;
}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
index 3e4ffb57..aac05d7a 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
@@ -38,6 +38,7 @@
#include <inttypes.h>
#include <sys/stat.h>
#include <sys/mman.h>
+#include <sys/sysmacros.h>
#include <linux/pci_regs.h>
#if defined(RTE_ARCH_X86)
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index 5f478c59..7d8b9fb2 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -355,7 +355,8 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
} else {
/* if we're in a secondary process, just find our tailq entry */
TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
- if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
+ if (rte_eal_compare_pci_addr(&vfio_res->pci_addr,
+ &dev->addr))
continue;
break;
}
diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h
index 78da08e5..d96275af 100644
--- a/lib/librte_eal/linuxapp/kni/compat.h
+++ b/lib/librte_eal/linuxapp/kni/compat.h
@@ -2,6 +2,8 @@
* Minimal wrappers to allow compiling kni on older kernels.
*/
+#include <linux/version.h>
+
#ifndef RHEL_RELEASE_VERSION
#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
#endif
@@ -67,3 +69,7 @@
(LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)))
#undef NET_NAME_UNKNOWN
#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+#define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/kni_dev.h b/lib/librte_eal/linuxapp/kni/kni_dev.h
index 58cbadd3..f0f6e61f 100644
--- a/lib/librte_eal/linuxapp/kni/kni_dev.h
+++ b/lib/librte_eal/linuxapp/kni/kni_dev.h
@@ -30,9 +30,15 @@
#endif
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include "compat.h"
+
#include <linux/if.h>
#include <linux/wait.h>
+#ifdef HAVE_SIGNAL_FUNCTIONS_OWN_HEADER
+#include <linux/sched/signal.h>
+#else
#include <linux/sched.h>
+#endif
#include <linux/netdevice.h>
#include <linux/spinlock.h>
#include <linux/list.h>
diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c
index 5a317594..ea545250 100644
--- a/lib/librte_ether/rte_ethdev.c
+++ b/lib/librte_ether/rte_ethdev.c
@@ -1051,8 +1051,10 @@ rte_eth_dev_close(uint8_t port_id)
dev->data->dev_started = 0;
(*dev->dev_ops->dev_close)(dev);
+ dev->data->nb_rx_queues = 0;
rte_free(dev->data->rx_queues);
dev->data->rx_queues = NULL;
+ dev->data->nb_tx_queues = 0;
rte_free(dev->data->tx_queues);
dev->data->tx_queues = NULL;
}
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index 96781792..11ec1fa8 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1001,15 +1001,6 @@ struct rte_eth_dev_callback;
/** @internal Structure to keep track of registered callbacks */
TAILQ_HEAD(rte_eth_dev_cb_list, rte_eth_dev_callback);
-
-#ifdef RTE_LIBRTE_ETHDEV_DEBUG
-#define RTE_PMD_DEBUG_TRACE(...) \
- rte_pmd_debug_trace(__func__, __VA_ARGS__)
-#else
-#define RTE_PMD_DEBUG_TRACE(...)
-#endif
-
-
/* Macros to check for valid port */
#define RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, retval) do { \
if (!rte_eth_dev_is_valid_port(port_id)) { \
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index a80cefd2..c3f9208c 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -451,17 +451,35 @@ kni_free_fifo(struct rte_kni_fifo *fifo)
} while (ret);
}
+static void *
+va2pa(struct rte_mbuf *m)
+{
+ return (void *)((unsigned long)m -
+ ((unsigned long)m->buf_addr -
+ (unsigned long)m->buf_physaddr));
+}
+
+static void
+obj_free(struct rte_mempool *mp __rte_unused, void *opaque, void *obj,
+ unsigned obj_idx __rte_unused)
+{
+ struct rte_mbuf *m = obj;
+ void *mbuf_phys = opaque;
+
+ if (va2pa(m) == mbuf_phys)
+ rte_pktmbuf_free(m);
+}
+
static void
-kni_free_fifo_phy(struct rte_kni_fifo *fifo)
+kni_free_fifo_phy(struct rte_mempool *mp, struct rte_kni_fifo *fifo)
{
void *mbuf_phys;
int ret;
do {
ret = kni_fifo_get(fifo, &mbuf_phys, 1);
- /*
- * TODO: free mbufs
- */
+ if (ret)
+ rte_mempool_obj_iter(mp, obj_free, mbuf_phys);
} while (ret);
}
@@ -470,6 +488,7 @@ rte_kni_release(struct rte_kni *kni)
{
struct rte_kni_device_info dev_info;
uint32_t slot_id;
+ uint32_t retry = 5;
if (!kni || !kni->in_use)
return -1;
@@ -481,9 +500,16 @@ rte_kni_release(struct rte_kni *kni)
}
/* mbufs in all fifo should be released, except request/response */
+
+ /* wait until all rxq packets processed by kernel */
+ while (kni_fifo_count(kni->rx_q) && retry--)
+ usleep(1000);
+
+ if (kni_fifo_count(kni->rx_q))
+ RTE_LOG(ERR, KNI, "Fail to free all Rx-q items\n");
+
+ kni_free_fifo_phy(kni->pktmbuf_pool, kni->alloc_q);
kni_free_fifo(kni->tx_q);
- kni_free_fifo_phy(kni->rx_q);
- kni_free_fifo_phy(kni->alloc_q);
kni_free_fifo(kni->free_q);
slot_id = kni->slot_id;
@@ -549,14 +575,6 @@ rte_kni_handle_request(struct rte_kni *kni)
return 0;
}
-static void *
-va2pa(struct rte_mbuf *m)
-{
- return (void *)((unsigned long)m -
- ((unsigned long)m->buf_addr -
- (unsigned long)m->buf_physaddr));
-}
-
unsigned
rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num)
{
diff --git a/lib/librte_kni/rte_kni_fifo.h b/lib/librte_kni/rte_kni_fifo.h
index 8cb85873..c7cd5c26 100644
--- a/lib/librte_kni/rte_kni_fifo.h
+++ b/lib/librte_kni/rte_kni_fifo.h
@@ -91,3 +91,12 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
fifo->read = new_read;
return i;
}
+
+/**
+ * Get the num of elements in the fifo
+ */
+static inline uint32_t
+kni_fifo_count(struct rte_kni_fifo *fifo)
+{
+ return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1);
+}
diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h
index ff6de9d1..a3269c4c 100644
--- a/lib/librte_mbuf/rte_mbuf_ptype.h
+++ b/lib/librte_mbuf/rte_mbuf_ptype.h
@@ -91,6 +91,9 @@
* RTE_PTYPE_INNER_L4_UDP.
*/
+#include <stddef.h>
+#include <stdint.h>
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/lib/librte_net/rte_ether.h b/lib/librte_net/rte_ether.h
index ff3d0654..5edf66c3 100644
--- a/lib/librte_net/rte_ether.h
+++ b/lib/librte_net/rte_ether.h
@@ -357,7 +357,7 @@ static inline int rte_vlan_strip(struct rte_mbuf *m)
return -1;
struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1);
- m->ol_flags |= PKT_RX_VLAN_PKT;
+ m->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED;
m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci);
/* Copy ether header over rather than moving whole packet */
@@ -407,6 +407,8 @@ static inline int rte_vlan_insert(struct rte_mbuf **m)
vh = (struct vlan_hdr *) (nh + 1);
vh->vlan_tci = rte_cpu_to_be_16((*m)->vlan_tci);
+ (*m)->ol_flags &= ~PKT_RX_VLAN_STRIPPED;
+
return 0;
}
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index aaa9c270..84e05951 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -52,14 +52,18 @@
#include "vhost.h"
#include "vhost_user.h"
+
+TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection);
+
/*
* Every time rte_vhost_driver_register() is invoked, an associated
* vhost_user_socket struct will be created.
*/
struct vhost_user_socket {
+ struct vhost_user_connection_list conn_list;
+ pthread_mutex_t conn_mutex;
char *path;
int listenfd;
- int connfd;
bool is_server;
bool reconnect;
bool dequeue_zero_copy;
@@ -67,7 +71,10 @@ struct vhost_user_socket {
struct vhost_user_connection {
struct vhost_user_socket *vsocket;
+ int connfd;
int vid;
+
+ TAILQ_ENTRY(vhost_user_connection) next;
};
#define MAX_VHOST_SOCKET 1024
@@ -209,19 +216,24 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket)
RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid);
- vsocket->connfd = fd;
+ conn->connfd = fd;
conn->vsocket = vsocket;
conn->vid = vid;
ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb,
NULL, conn);
if (ret < 0) {
- vsocket->connfd = -1;
+ conn->connfd = -1;
free(conn);
close(fd);
RTE_LOG(ERR, VHOST_CONFIG,
"failed to add fd %d into vhost server fdset\n",
fd);
+ return;
}
+
+ pthread_mutex_lock(&vsocket->conn_mutex);
+ TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
+ pthread_mutex_unlock(&vsocket->conn_mutex);
}
/* call back when there is new vhost-user connection from client */
@@ -247,10 +259,14 @@ vhost_user_read_cb(int connfd, void *dat, int *remove)
ret = vhost_user_msg_handler(conn->vid, connfd);
if (ret < 0) {
- vsocket->connfd = -1;
close(connfd);
*remove = 1;
vhost_destroy_device(conn->vid);
+
+ pthread_mutex_lock(&vsocket->conn_mutex);
+ TAILQ_REMOVE(&vsocket->conn_list, conn, next);
+ pthread_mutex_unlock(&vsocket->conn_mutex);
+
free(conn);
if (vsocket->reconnect)
@@ -448,7 +464,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
return 0;
}
- RTE_LOG(ERR, VHOST_CONFIG,
+ RTE_LOG(WARNING, VHOST_CONFIG,
"failed to connect to %s: %s\n",
path, strerror(errno));
@@ -457,7 +473,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket)
return -1;
}
- RTE_LOG(ERR, VHOST_CONFIG, "%s: reconnecting...\n", path);
+ RTE_LOG(INFO, VHOST_CONFIG, "%s: reconnecting...\n", path);
reconn = malloc(sizeof(*reconn));
if (reconn == NULL) {
RTE_LOG(ERR, VHOST_CONFIG,
@@ -502,7 +518,8 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
goto out;
memset(vsocket, 0, sizeof(struct vhost_user_socket));
vsocket->path = strdup(path);
- vsocket->connfd = -1;
+ TAILQ_INIT(&vsocket->conn_list);
+ pthread_mutex_init(&vsocket->conn_mutex, NULL);
vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
@@ -565,7 +582,7 @@ rte_vhost_driver_unregister(const char *path)
{
int i;
int count;
- struct vhost_user_connection *conn;
+ struct vhost_user_connection *conn, *next;
pthread_mutex_lock(&vhost_user.mutex);
@@ -581,15 +598,22 @@ rte_vhost_driver_unregister(const char *path)
vhost_user_remove_reconnect(vsocket);
}
- conn = fdset_del(&vhost_user.fdset, vsocket->connfd);
- if (conn) {
+ pthread_mutex_lock(&vsocket->conn_mutex);
+ for (conn = TAILQ_FIRST(&vsocket->conn_list);
+ conn != NULL;
+ conn = next) {
+ next = TAILQ_NEXT(conn, next);
+
+ fdset_del(&vhost_user.fdset, conn->connfd);
RTE_LOG(INFO, VHOST_CONFIG,
"free connfd = %d for device '%s'\n",
- vsocket->connfd, path);
- close(vsocket->connfd);
+ conn->connfd, path);
+ close(conn->connfd);
vhost_destroy_device(conn->vid);
+ TAILQ_REMOVE(&vsocket->conn_list, conn, next);
free(conn);
}
+ pthread_mutex_unlock(&vsocket->conn_mutex);
free(vsocket->path);
free(vsocket);
diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
index e4150934..3c3f6a42 100644
--- a/lib/librte_vhost/vhost.c
+++ b/lib/librte_vhost/vhost.c
@@ -56,7 +56,7 @@
(1ULL << VIRTIO_NET_F_CTRL_VQ) | \
(1ULL << VIRTIO_NET_F_CTRL_RX) | \
(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \
- (VHOST_SUPPORTS_MQ) | \
+ (1ULL << VIRTIO_NET_F_MQ) | \
(1ULL << VIRTIO_F_VERSION_1) | \
(1ULL << VHOST_F_LOG_ALL) | \
(1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 22564f1c..d97df1d8 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -110,25 +110,15 @@ struct vhost_virtqueue {
uint16_t shadow_used_idx;
} __rte_cache_aligned;
-/* Old kernels have no such macro defined */
+/* Old kernels have no such macros defined */
#ifndef VIRTIO_NET_F_GUEST_ANNOUNCE
#define VIRTIO_NET_F_GUEST_ANNOUNCE 21
#endif
-
-/*
- * Make an extra wrapper for VIRTIO_NET_F_MQ and
- * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX as they are
- * introduced since kernel v3.8. This makes our
- * code buildable for older kernel.
- */
-#ifdef VIRTIO_NET_F_MQ
- #define VHOST_MAX_QUEUE_PAIRS VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX
- #define VHOST_SUPPORTS_MQ (1ULL << VIRTIO_NET_F_MQ)
-#else
- #define VHOST_MAX_QUEUE_PAIRS 1
- #define VHOST_SUPPORTS_MQ 0
+#ifndef VIRTIO_NET_F_MQ
+ #define VIRTIO_NET_F_MQ 22
#endif
+#define VHOST_MAX_QUEUE_PAIRS 0x80
/*
* Define virtio 1.0 for older kernels
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index 337470d6..ea027f14 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -905,6 +905,8 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs,
"allocate memory for mbuf.\n");
return -1;
}
+ if (unlikely(dev->dequeue_zero_copy))
+ rte_mbuf_refcnt_update(cur, 1);
prev->next = cur;
prev->data_len = mbuf_offset;
@@ -1056,9 +1058,21 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
* array, to looks like that guest actually send such packet.
*
* Check user_send_rarp() for more information.
+ *
+ * broadcast_rarp shares a cacheline in the virtio_net structure
+ * with some fields that are accessed during enqueue and
+ * rte_atomic16_cmpset() causes a write if using cmpxchg. This could
+ * result in false sharing between enqueue and dequeue.
+ *
+ * Prevent unnecessary false sharing by reading broadcast_rarp first
+ * and only performing cmpset if the read indicates it is likely to
+ * be set.
*/
- if (unlikely(rte_atomic16_cmpset((volatile uint16_t *)
- &dev->broadcast_rarp.cnt, 1, 0))) {
+
+ if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) &&
+ rte_atomic16_cmpset((volatile uint16_t *)
+ &dev->broadcast_rarp.cnt, 1, 0))) {
+
rarp_mbuf = rte_pktmbuf_alloc(mbuf_pool);
if (rarp_mbuf == NULL) {
RTE_LOG(ERR, VHOST_DATA,