diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/librte_cryptodev/rte_crypto_sym.h | 11 | ||||
-rw-r--r-- | lib/librte_cryptodev/rte_cryptodev_pmd.h | 8 | ||||
-rw-r--r-- | lib/librte_eal/bsdapp/eal/eal_pci.c | 6 | ||||
-rw-r--r-- | lib/librte_eal/bsdapp/nic_uio/nic_uio.c | 44 | ||||
-rw-r--r-- | lib/librte_eal/common/include/rte_dev.h | 14 | ||||
-rw-r--r-- | lib/librte_eal/common/include/rte_version.h | 2 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal_interrupts.c | 4 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 1 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 3 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/kni/compat.h | 6 | ||||
-rw-r--r-- | lib/librte_eal/linuxapp/kni/kni_dev.h | 6 | ||||
-rw-r--r-- | lib/librte_ether/rte_ethdev.c | 2 | ||||
-rw-r--r-- | lib/librte_ether/rte_ethdev.h | 9 | ||||
-rw-r--r-- | lib/librte_kni/rte_kni.c | 46 | ||||
-rw-r--r-- | lib/librte_kni/rte_kni_fifo.h | 9 | ||||
-rw-r--r-- | lib/librte_mbuf/rte_mbuf_ptype.h | 3 | ||||
-rw-r--r-- | lib/librte_net/rte_ether.h | 4 | ||||
-rw-r--r-- | lib/librte_vhost/socket.c | 48 | ||||
-rw-r--r-- | lib/librte_vhost/vhost.c | 2 | ||||
-rw-r--r-- | lib/librte_vhost/vhost.h | 18 | ||||
-rw-r--r-- | lib/librte_vhost/virtio_net.c | 18 |
21 files changed, 192 insertions, 72 deletions
diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h index d3d38e4f..8f81d254 100644 --- a/lib/librte_cryptodev/rte_crypto_sym.h +++ b/lib/librte_cryptodev/rte_crypto_sym.h @@ -276,11 +276,10 @@ struct rte_crypto_auth_xform { * this specifies the length of the digest to be compared for the * session. * + * It is the caller's responsibility to ensure that the + * digest length is compliant with the hash algorithm being used. * If the value is less than the maximum length allowed by the hash, - * the result shall be truncated. If the value is greater than the - * maximum length allowed by the hash then an error will be generated - * by *rte_cryptodev_sym_session_create* or by the - * *rte_cryptodev_sym_enqueue_burst* if using session-less APIs. + * the result shall be truncated. */ uint32_t add_auth_data_length; @@ -571,7 +570,9 @@ struct rte_crypto_sym_op { phys_addr_t phys_addr; /**< Physical address of digest */ uint16_t length; - /**< Length of digest */ + /**< Length of digest. This must be the same value as + * @ref rte_crypto_auth_xform.digest_length. + */ } digest; /**< Digest parameters */ struct { diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h index c6a57945..e82dcf07 100644 --- a/lib/librte_cryptodev/rte_cryptodev_pmd.h +++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h @@ -57,14 +57,6 @@ extern "C" { #include "rte_crypto.h" #include "rte_cryptodev.h" - -#ifdef RTE_LIBRTE_CRYPTODEV_DEBUG -#define RTE_PMD_DEBUG_TRACE(...) \ - rte_pmd_debug_trace(__func__, __VA_ARGS__) -#else -#define RTE_PMD_DEBUG_TRACE(...) -#endif - struct rte_cryptodev_session { RTE_STD_C11 struct { diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c index 8b3ed881..f1de16ea 100644 --- a/lib/librte_eal/bsdapp/eal/eal_pci.c +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c @@ -323,6 +323,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) /* device is valid, add in list (sorted) */ if (TAILQ_EMPTY(&pci_device_list)) { + rte_eal_device_insert(&dev->device); TAILQ_INSERT_TAIL(&pci_device_list, dev, next); } else { @@ -335,7 +336,7 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) continue; else if (ret < 0) { TAILQ_INSERT_BEFORE(dev2, dev, next); - return 0; + rte_eal_device_insert(&dev->device); } else { /* already registered */ dev2->kdrv = dev->kdrv; dev2->max_vfs = dev->max_vfs; @@ -343,9 +344,10 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf) dev->mem_resource, sizeof(dev->mem_resource)); free(dev); - return 0; } + return 0; } + rte_eal_device_insert(&dev->device); TAILQ_INSERT_TAIL(&pci_device_list, dev, next); } diff --git a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c index 99a4975c..4bd7545a 100644 --- a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c +++ b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c @@ -180,6 +180,10 @@ nic_uio_probe (device_t dev) unsigned int device = pci_get_slot(dev); unsigned int function = pci_get_function(dev); + char bdf_str[256]; + char *token, *remaining; + + /* First check if we found this on load */ for (i = 0; i < num_detached; i++) if (bus == pci_get_bus(detached_devices[i]) && device == pci_get_slot(detached_devices[i]) && @@ -188,6 +192,45 @@ nic_uio_probe (device_t dev) return BUS_PROBE_SPECIFIC; } + /* otherwise check if it's a new device and if it matches the BDF */ + memset(bdf_str, 0, sizeof(bdf_str)); + TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str)); + remaining = bdf_str; + while (1) { + if (remaining == NULL || remaining[0] == '\0') + break; + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + bus = strtol(token, NULL, 10); + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + device = strtol(token, NULL, 10); + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + function = strtol(token, NULL, 10); + + if (bus == pci_get_bus(dev) && + device == pci_get_slot(dev) && + function == pci_get_function(dev)) { + + if (num_detached < MAX_DETACHED_DEVICES) { + printf("%s: probed dev=%p\n", + __func__, dev); + detached_devices[num_detached++] = dev; + device_set_desc(dev, "DPDK PCI Device"); + return BUS_PROBE_SPECIFIC; + } else { + printf("%s: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n", + __func__, MAX_DETACHED_DEVICES, + dev); + break; + } + } + } + return ENXIO; } @@ -248,6 +291,7 @@ nic_uio_load(void) memset(bdf_str, 0, sizeof(bdf_str)); TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str)); remaining = bdf_str; + printf("nic_uio: hw.nic_uio.bdfs = '%s'\n", bdf_str); /* * Users should specify PCI BDFs in the format "b:d:f,b:d:f,b:d:f". * But the code below does not try differentiate between : and , diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h index 8840380d..dcf33d0e 100644 --- a/lib/librte_eal/common/include/rte_dev.h +++ b/lib/librte_eal/common/include/rte_dev.h @@ -49,6 +49,7 @@ extern "C" { #include <stdio.h> #include <sys/queue.h> +#include <rte_config.h> #include <rte_log.h> __attribute__((format(printf, 2, 0))) @@ -70,6 +71,19 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", func_name, buffer); } +/* + * Enable RTE_PMD_DEBUG_TRACE() when at least one component relying on the + * RTE_*_RET() macros defined below is compiled in debug mode. + */ +#if defined(RTE_LIBRTE_ETHDEV_DEBUG) || \ + defined(RTE_LIBRTE_CRYPTODEV_DEBUG) || \ + defined(RTE_LIBRTE_EVENTDEV_DEBUG) +#define RTE_PMD_DEBUG_TRACE(...) \ + rte_pmd_debug_trace(__func__, __VA_ARGS__) +#else +#define RTE_PMD_DEBUG_TRACE(...) (void)0 +#endif + /* Macros for checking for restricting functions to primary instance only */ #define RTE_PROC_PRIMARY_OR_ERR_RET(retval) do { \ if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \ diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index 0de35fb7..5bbe906b 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -66,7 +66,7 @@ extern "C" { /** * Patch level number i.e. the z in yy.mm.z */ -#define RTE_VER_MINOR 1 +#define RTE_VER_MINOR 2 /** * Extra string to be appended to version number diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 47a3b20a..368863f9 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -194,14 +194,14 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) { irq_set = (struct vfio_irq_set *) irq_set_buf; irq_set->argsz = len; irq_set->count = 1; - irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK; irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; irq_set->start = 0; ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); if (ret) { - RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n", intr_handle->fd); return -1; } diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c index 3e4ffb57..aac05d7a 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -38,6 +38,7 @@ #include <inttypes.h> #include <sys/stat.h> #include <sys/mman.h> +#include <sys/sysmacros.h> #include <linux/pci_regs.h> #if defined(RTE_ARCH_X86) diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c index 5f478c59..7d8b9fb2 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -355,7 +355,8 @@ pci_vfio_map_resource(struct rte_pci_device *dev) } else { /* if we're in a secondary process, just find our tailq entry */ TAILQ_FOREACH(vfio_res, vfio_res_list, next) { - if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) + if (rte_eal_compare_pci_addr(&vfio_res->pci_addr, + &dev->addr)) continue; break; } diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h index 78da08e5..d96275af 100644 --- a/lib/librte_eal/linuxapp/kni/compat.h +++ b/lib/librte_eal/linuxapp/kni/compat.h @@ -2,6 +2,8 @@ * Minimal wrappers to allow compiling kni on older kernels. */ +#include <linux/version.h> + #ifndef RHEL_RELEASE_VERSION #define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) #endif @@ -67,3 +69,7 @@ (LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34))) #undef NET_NAME_UNKNOWN #endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) +#define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER +#endif diff --git a/lib/librte_eal/linuxapp/kni/kni_dev.h b/lib/librte_eal/linuxapp/kni/kni_dev.h index 58cbadd3..f0f6e61f 100644 --- a/lib/librte_eal/linuxapp/kni/kni_dev.h +++ b/lib/librte_eal/linuxapp/kni/kni_dev.h @@ -30,9 +30,15 @@ #endif #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include "compat.h" + #include <linux/if.h> #include <linux/wait.h> +#ifdef HAVE_SIGNAL_FUNCTIONS_OWN_HEADER +#include <linux/sched/signal.h> +#else #include <linux/sched.h> +#endif #include <linux/netdevice.h> #include <linux/spinlock.h> #include <linux/list.h> diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index 5a317594..ea545250 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -1051,8 +1051,10 @@ rte_eth_dev_close(uint8_t port_id) dev->data->dev_started = 0; (*dev->dev_ops->dev_close)(dev); + dev->data->nb_rx_queues = 0; rte_free(dev->data->rx_queues); dev->data->rx_queues = NULL; + dev->data->nb_tx_queues = 0; rte_free(dev->data->tx_queues); dev->data->tx_queues = NULL; } diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 96781792..11ec1fa8 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -1001,15 +1001,6 @@ struct rte_eth_dev_callback; /** @internal Structure to keep track of registered callbacks */ TAILQ_HEAD(rte_eth_dev_cb_list, rte_eth_dev_callback); - -#ifdef RTE_LIBRTE_ETHDEV_DEBUG -#define RTE_PMD_DEBUG_TRACE(...) \ - rte_pmd_debug_trace(__func__, __VA_ARGS__) -#else -#define RTE_PMD_DEBUG_TRACE(...) -#endif - - /* Macros to check for valid port */ #define RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, retval) do { \ if (!rte_eth_dev_is_valid_port(port_id)) { \ diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c index a80cefd2..c3f9208c 100644 --- a/lib/librte_kni/rte_kni.c +++ b/lib/librte_kni/rte_kni.c @@ -451,17 +451,35 @@ kni_free_fifo(struct rte_kni_fifo *fifo) } while (ret); } +static void * +va2pa(struct rte_mbuf *m) +{ + return (void *)((unsigned long)m - + ((unsigned long)m->buf_addr - + (unsigned long)m->buf_physaddr)); +} + +static void +obj_free(struct rte_mempool *mp __rte_unused, void *opaque, void *obj, + unsigned obj_idx __rte_unused) +{ + struct rte_mbuf *m = obj; + void *mbuf_phys = opaque; + + if (va2pa(m) == mbuf_phys) + rte_pktmbuf_free(m); +} + static void -kni_free_fifo_phy(struct rte_kni_fifo *fifo) +kni_free_fifo_phy(struct rte_mempool *mp, struct rte_kni_fifo *fifo) { void *mbuf_phys; int ret; do { ret = kni_fifo_get(fifo, &mbuf_phys, 1); - /* - * TODO: free mbufs - */ + if (ret) + rte_mempool_obj_iter(mp, obj_free, mbuf_phys); } while (ret); } @@ -470,6 +488,7 @@ rte_kni_release(struct rte_kni *kni) { struct rte_kni_device_info dev_info; uint32_t slot_id; + uint32_t retry = 5; if (!kni || !kni->in_use) return -1; @@ -481,9 +500,16 @@ rte_kni_release(struct rte_kni *kni) } /* mbufs in all fifo should be released, except request/response */ + + /* wait until all rxq packets processed by kernel */ + while (kni_fifo_count(kni->rx_q) && retry--) + usleep(1000); + + if (kni_fifo_count(kni->rx_q)) + RTE_LOG(ERR, KNI, "Fail to free all Rx-q items\n"); + + kni_free_fifo_phy(kni->pktmbuf_pool, kni->alloc_q); kni_free_fifo(kni->tx_q); - kni_free_fifo_phy(kni->rx_q); - kni_free_fifo_phy(kni->alloc_q); kni_free_fifo(kni->free_q); slot_id = kni->slot_id; @@ -549,14 +575,6 @@ rte_kni_handle_request(struct rte_kni *kni) return 0; } -static void * -va2pa(struct rte_mbuf *m) -{ - return (void *)((unsigned long)m - - ((unsigned long)m->buf_addr - - (unsigned long)m->buf_physaddr)); -} - unsigned rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num) { diff --git a/lib/librte_kni/rte_kni_fifo.h b/lib/librte_kni/rte_kni_fifo.h index 8cb85873..c7cd5c26 100644 --- a/lib/librte_kni/rte_kni_fifo.h +++ b/lib/librte_kni/rte_kni_fifo.h @@ -91,3 +91,12 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num) fifo->read = new_read; return i; } + +/** + * Get the num of elements in the fifo + */ +static inline uint32_t +kni_fifo_count(struct rte_kni_fifo *fifo) +{ + return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1); +} diff --git a/lib/librte_mbuf/rte_mbuf_ptype.h b/lib/librte_mbuf/rte_mbuf_ptype.h index ff6de9d1..a3269c4c 100644 --- a/lib/librte_mbuf/rte_mbuf_ptype.h +++ b/lib/librte_mbuf/rte_mbuf_ptype.h @@ -91,6 +91,9 @@ * RTE_PTYPE_INNER_L4_UDP. */ +#include <stddef.h> +#include <stdint.h> + #ifdef __cplusplus extern "C" { #endif diff --git a/lib/librte_net/rte_ether.h b/lib/librte_net/rte_ether.h index ff3d0654..5edf66c3 100644 --- a/lib/librte_net/rte_ether.h +++ b/lib/librte_net/rte_ether.h @@ -357,7 +357,7 @@ static inline int rte_vlan_strip(struct rte_mbuf *m) return -1; struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1); - m->ol_flags |= PKT_RX_VLAN_PKT; + m->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED; m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci); /* Copy ether header over rather than moving whole packet */ @@ -407,6 +407,8 @@ static inline int rte_vlan_insert(struct rte_mbuf **m) vh = (struct vlan_hdr *) (nh + 1); vh->vlan_tci = rte_cpu_to_be_16((*m)->vlan_tci); + (*m)->ol_flags &= ~PKT_RX_VLAN_STRIPPED; + return 0; } diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index aaa9c270..84e05951 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -52,14 +52,18 @@ #include "vhost.h" #include "vhost_user.h" + +TAILQ_HEAD(vhost_user_connection_list, vhost_user_connection); + /* * Every time rte_vhost_driver_register() is invoked, an associated * vhost_user_socket struct will be created. */ struct vhost_user_socket { + struct vhost_user_connection_list conn_list; + pthread_mutex_t conn_mutex; char *path; int listenfd; - int connfd; bool is_server; bool reconnect; bool dequeue_zero_copy; @@ -67,7 +71,10 @@ struct vhost_user_socket { struct vhost_user_connection { struct vhost_user_socket *vsocket; + int connfd; int vid; + + TAILQ_ENTRY(vhost_user_connection) next; }; #define MAX_VHOST_SOCKET 1024 @@ -209,19 +216,24 @@ vhost_user_add_connection(int fd, struct vhost_user_socket *vsocket) RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", vid); - vsocket->connfd = fd; + conn->connfd = fd; conn->vsocket = vsocket; conn->vid = vid; ret = fdset_add(&vhost_user.fdset, fd, vhost_user_read_cb, NULL, conn); if (ret < 0) { - vsocket->connfd = -1; + conn->connfd = -1; free(conn); close(fd); RTE_LOG(ERR, VHOST_CONFIG, "failed to add fd %d into vhost server fdset\n", fd); + return; } + + pthread_mutex_lock(&vsocket->conn_mutex); + TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next); + pthread_mutex_unlock(&vsocket->conn_mutex); } /* call back when there is new vhost-user connection from client */ @@ -247,10 +259,14 @@ vhost_user_read_cb(int connfd, void *dat, int *remove) ret = vhost_user_msg_handler(conn->vid, connfd); if (ret < 0) { - vsocket->connfd = -1; close(connfd); *remove = 1; vhost_destroy_device(conn->vid); + + pthread_mutex_lock(&vsocket->conn_mutex); + TAILQ_REMOVE(&vsocket->conn_list, conn, next); + pthread_mutex_unlock(&vsocket->conn_mutex); + free(conn); if (vsocket->reconnect) @@ -448,7 +464,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket) return 0; } - RTE_LOG(ERR, VHOST_CONFIG, + RTE_LOG(WARNING, VHOST_CONFIG, "failed to connect to %s: %s\n", path, strerror(errno)); @@ -457,7 +473,7 @@ vhost_user_create_client(struct vhost_user_socket *vsocket) return -1; } - RTE_LOG(ERR, VHOST_CONFIG, "%s: reconnecting...\n", path); + RTE_LOG(INFO, VHOST_CONFIG, "%s: reconnecting...\n", path); reconn = malloc(sizeof(*reconn)); if (reconn == NULL) { RTE_LOG(ERR, VHOST_CONFIG, @@ -502,7 +518,8 @@ rte_vhost_driver_register(const char *path, uint64_t flags) goto out; memset(vsocket, 0, sizeof(struct vhost_user_socket)); vsocket->path = strdup(path); - vsocket->connfd = -1; + TAILQ_INIT(&vsocket->conn_list); + pthread_mutex_init(&vsocket->conn_mutex, NULL); vsocket->dequeue_zero_copy = flags & RTE_VHOST_USER_DEQUEUE_ZERO_COPY; if ((flags & RTE_VHOST_USER_CLIENT) != 0) { @@ -565,7 +582,7 @@ rte_vhost_driver_unregister(const char *path) { int i; int count; - struct vhost_user_connection *conn; + struct vhost_user_connection *conn, *next; pthread_mutex_lock(&vhost_user.mutex); @@ -581,15 +598,22 @@ rte_vhost_driver_unregister(const char *path) vhost_user_remove_reconnect(vsocket); } - conn = fdset_del(&vhost_user.fdset, vsocket->connfd); - if (conn) { + pthread_mutex_lock(&vsocket->conn_mutex); + for (conn = TAILQ_FIRST(&vsocket->conn_list); + conn != NULL; + conn = next) { + next = TAILQ_NEXT(conn, next); + + fdset_del(&vhost_user.fdset, conn->connfd); RTE_LOG(INFO, VHOST_CONFIG, "free connfd = %d for device '%s'\n", - vsocket->connfd, path); - close(vsocket->connfd); + conn->connfd, path); + close(conn->connfd); vhost_destroy_device(conn->vid); + TAILQ_REMOVE(&vsocket->conn_list, conn, next); free(conn); } + pthread_mutex_unlock(&vsocket->conn_mutex); free(vsocket->path); free(vsocket); diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index e4150934..3c3f6a42 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -56,7 +56,7 @@ (1ULL << VIRTIO_NET_F_CTRL_VQ) | \ (1ULL << VIRTIO_NET_F_CTRL_RX) | \ (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \ - (VHOST_SUPPORTS_MQ) | \ + (1ULL << VIRTIO_NET_F_MQ) | \ (1ULL << VIRTIO_F_VERSION_1) | \ (1ULL << VHOST_F_LOG_ALL) | \ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \ diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 22564f1c..d97df1d8 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -110,25 +110,15 @@ struct vhost_virtqueue { uint16_t shadow_used_idx; } __rte_cache_aligned; -/* Old kernels have no such macro defined */ +/* Old kernels have no such macros defined */ #ifndef VIRTIO_NET_F_GUEST_ANNOUNCE #define VIRTIO_NET_F_GUEST_ANNOUNCE 21 #endif - -/* - * Make an extra wrapper for VIRTIO_NET_F_MQ and - * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX as they are - * introduced since kernel v3.8. This makes our - * code buildable for older kernel. - */ -#ifdef VIRTIO_NET_F_MQ - #define VHOST_MAX_QUEUE_PAIRS VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX - #define VHOST_SUPPORTS_MQ (1ULL << VIRTIO_NET_F_MQ) -#else - #define VHOST_MAX_QUEUE_PAIRS 1 - #define VHOST_SUPPORTS_MQ 0 +#ifndef VIRTIO_NET_F_MQ + #define VIRTIO_NET_F_MQ 22 #endif +#define VHOST_MAX_QUEUE_PAIRS 0x80 /* * Define virtio 1.0 for older kernels diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 337470d6..ea027f14 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -905,6 +905,8 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, "allocate memory for mbuf.\n"); return -1; } + if (unlikely(dev->dequeue_zero_copy)) + rte_mbuf_refcnt_update(cur, 1); prev->next = cur; prev->data_len = mbuf_offset; @@ -1056,9 +1058,21 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, * array, to looks like that guest actually send such packet. * * Check user_send_rarp() for more information. + * + * broadcast_rarp shares a cacheline in the virtio_net structure + * with some fields that are accessed during enqueue and + * rte_atomic16_cmpset() causes a write if using cmpxchg. This could + * result in false sharing between enqueue and dequeue. + * + * Prevent unnecessary false sharing by reading broadcast_rarp first + * and only performing cmpset if the read indicates it is likely to + * be set. */ - if (unlikely(rte_atomic16_cmpset((volatile uint16_t *) - &dev->broadcast_rarp.cnt, 1, 0))) { + + if (unlikely(rte_atomic16_read(&dev->broadcast_rarp) && + rte_atomic16_cmpset((volatile uint16_t *) + &dev->broadcast_rarp.cnt, 1, 0))) { + rarp_mbuf = rte_pktmbuf_alloc(mbuf_pool); if (rarp_mbuf == NULL) { RTE_LOG(ERR, VHOST_DATA, |