diff options
author | Luca Boccassi <luca.boccassi@gmail.com> | 2018-04-23 14:16:57 +0100 |
---|---|---|
committer | Luca Boccassi <luca.boccassi@gmail.com> | 2018-04-23 14:17:34 +0100 |
commit | 39157ec04095ab012d11db23c462844634bfbb8f (patch) | |
tree | 643f83dc46445aa7834fe271ce2c21a5cb278cee /lib | |
parent | 47d9763a1dd3103d732da9eec350cfc1cd784717 (diff) |
New upstream version 16.11.5upstream/16.11.5
Change-Id: I47171042629a57c6958d50251351e668ca5f3d8b
Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'lib')
29 files changed, 443 insertions, 58 deletions
diff --git a/lib/librte_eal/bsdapp/contigmem/contigmem.c b/lib/librte_eal/bsdapp/contigmem/contigmem.c index e8fb9087..9676d8b3 100644 --- a/lib/librte_eal/bsdapp/contigmem/contigmem.c +++ b/lib/librte_eal/bsdapp/contigmem/contigmem.c @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$"); #include <sys/rwlock.h> #include <sys/systm.h> #include <sys/sysctl.h> +#include <sys/vmmeter.h> #include <machine/bus.h> diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c index 3614da8d..248312d9 100644 --- a/lib/librte_eal/bsdapp/eal/eal_memory.c +++ b/lib/librte_eal/bsdapp/eal/eal_memory.c @@ -150,7 +150,7 @@ rte_eal_hugepage_attach(void) /* Map the shared hugepage_info into the process address spaces */ hpi = mmap(NULL, sizeof(struct hugepage_info), PROT_READ, MAP_PRIVATE, fd_hugepage_info, 0); - if (hpi == NULL) { + if (hpi == MAP_FAILED) { RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); goto error; } diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index 64f4e0ad..b58d85b7 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -236,7 +236,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, return NULL; } - const struct malloc_elem *elem = malloc_elem_from_data(mz_addr); + struct malloc_elem *elem = malloc_elem_from_data(mz_addr); /* fill the zone in config */ mz = get_next_free_memzone(); @@ -244,6 +244,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, if (mz == NULL) { RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room " "in config!\n", __func__); + malloc_elem_free(elem); rte_errno = ENOSPC; return NULL; } diff --git a/lib/librte_eal/common/eal_common_pci_uio.c b/lib/librte_eal/common/eal_common_pci_uio.c index 367a6816..6f91ff9c 100644 --- a/lib/librte_eal/common/eal_common_pci_uio.c +++ b/lib/librte_eal/common/eal_common_pci_uio.c @@ -117,7 +117,6 @@ pci_uio_map_resource(struct rte_pci_device *dev) dev->intr_handle.fd = -1; dev->intr_handle.uio_cfg_fd = -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; /* secondary processes - use already recorded details */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h index fb4fccb4..37f5eff2 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h @@ -64,9 +64,9 @@ extern "C" { * occur before the STORE operations generated after. */ #ifdef RTE_ARCH_64 -#define rte_wmb() {asm volatile("lwsync" : : : "memory"); } +#define rte_wmb() asm volatile("lwsync" : : : "memory") #else -#define rte_wmb() {asm volatile("sync" : : : "memory"); } +#define rte_wmb() asm volatile("sync" : : : "memory") #endif /** @@ -76,9 +76,9 @@ extern "C" { * occur before the LOAD operations generated after. */ #ifdef RTE_ARCH_64 -#define rte_rmb() {asm volatile("lwsync" : : : "memory"); } +#define rte_rmb() asm volatile("lwsync" : : : "memory") #else -#define rte_rmb() {asm volatile("sync" : : : "memory"); } +#define rte_rmb() asm volatile("sync" : : : "memory") #endif #define rte_smp_mb() rte_mb() diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h index 00b1cdf5..d12b679a 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h @@ -55,12 +55,52 @@ extern "C" { #define rte_rmb() _mm_lfence() -#define rte_smp_mb() rte_mb() - #define rte_smp_wmb() rte_compiler_barrier() #define rte_smp_rmb() rte_compiler_barrier() +/* + * From Intel Software Development Manual; Vol 3; + * 8.2.2 Memory Ordering in P6 and More Recent Processor Families: + * ... + * . Reads are not reordered with other reads. + * . Writes are not reordered with older reads. + * . Writes to memory are not reordered with other writes, + * with the following exceptions: + * . streaming stores (writes) executed with the non-temporal move + * instructions (MOVNTI, MOVNTQ, MOVNTDQ, MOVNTPS, and MOVNTPD); and + * . string operations (see Section 8.2.4.1). + * ... + * . Reads may be reordered with older writes to different locations but not + * with older writes to the same location. + * . Reads or writes cannot be reordered with I/O instructions, + * locked instructions, or serializing instructions. + * . Reads cannot pass earlier LFENCE and MFENCE instructions. + * . Writes ... cannot pass earlier LFENCE, SFENCE, and MFENCE instructions. + * . LFENCE instructions cannot pass earlier reads. + * . SFENCE instructions cannot pass earlier writes ... + * . MFENCE instructions cannot pass earlier reads, writes ... + * + * As pointed by Java guys, that makes possible to use lock-prefixed + * instructions to get the same effect as mfence and on most modern HW + * that gives a better perfomance then using mfence: + * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ + * Basic idea is to use lock prefixed add with some dummy memory location + * as the destination. From their experiments 128B(2 cache lines) below + * current stack pointer looks like a good candidate. + * So below we use that techinque for rte_smp_mb() implementation. + */ + +static inline void __attribute__((always_inline)) +rte_smp_mb(void) +{ +#ifdef RTE_ARCH_I686 + asm volatile("lock addl $0, -128(%%esp); " ::: "memory"); +#else + asm volatile("lock addl $0, -128(%%rsp); " ::: "memory"); +#endif +} + /*------------------------- 16 bit atomic operations -------------------------*/ #ifndef RTE_FORCE_INTRINSICS diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h index cab6fb4c..ec1dce03 100644 --- a/lib/librte_eal/common/include/rte_debug.h +++ b/lib/librte_eal/common/include/rte_debug.h @@ -86,7 +86,7 @@ void rte_dump_registers(void); #endif #define RTE_VERIFY(exp) do { \ if (unlikely(!(exp))) \ - rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \ + rte_panic("line %d\tassert \"%s\" failed\n", __LINE__, #exp); \ } while (0) /* diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index e92737d2..4a9f4821 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -66,7 +66,7 @@ extern "C" { /** * Patch level number i.e. the z in yy.mm.z */ -#define RTE_VER_MINOR 4 +#define RTE_VER_MINOR 5 /** * Extra string to be appended to version number diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c index 77a86151..e2bd3ac2 100644 --- a/lib/librte_eal/common/malloc_elem.c +++ b/lib/librte_eal/common/malloc_elem.c @@ -98,6 +98,7 @@ elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align, if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) { end_pt = RTE_ALIGN_FLOOR(end_pt, bound); new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); + end_pt = new_data_start + size; if (((end_pt - 1) & bmask) != (new_data_start & bmask)) return NULL; } diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index 267a4c6c..c731f1cd 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -178,12 +178,14 @@ malloc_heap_alloc(struct malloc_heap *heap, * Function to retrieve data for heap on given socket */ int -malloc_heap_get_stats(const struct malloc_heap *heap, +malloc_heap_get_stats(struct malloc_heap *heap, struct rte_malloc_socket_stats *socket_stats) { size_t idx; struct malloc_elem *elem; + rte_spinlock_lock(&heap->lock); + /* Initialise variables for heap */ socket_stats->free_count = 0; socket_stats->heap_freesz_bytes = 0; @@ -205,6 +207,8 @@ malloc_heap_get_stats(const struct malloc_heap *heap, socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes - socket_stats->heap_freesz_bytes); socket_stats->alloc_count = heap->alloc_count; + + rte_spinlock_unlock(&heap->lock); return 0; } diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h index 3ccbef0f..3b1166f0 100644 --- a/lib/librte_eal/common/malloc_heap.h +++ b/lib/librte_eal/common/malloc_heap.h @@ -57,7 +57,7 @@ malloc_heap_alloc(struct malloc_heap *heap, const char *type, size_t size, unsigned flags, size_t align, size_t bound); int -malloc_heap_get_stats(const struct malloc_heap *heap, +malloc_heap_get_stats(struct malloc_heap *heap, struct rte_malloc_socket_stats *socket_stats); int diff --git a/lib/librte_eal/common/rte_keepalive.c b/lib/librte_eal/common/rte_keepalive.c index 9765d1bd..4625fab0 100644 --- a/lib/librte_eal/common/rte_keepalive.c +++ b/lib/librte_eal/common/rte_keepalive.c @@ -42,8 +42,12 @@ struct rte_keepalive { /** Core Liveness. */ - enum rte_keepalive_state __rte_cache_aligned state_flags[ - RTE_KEEPALIVE_MAXCORES]; + struct { + /* + * Each element must be cache aligned to prevent false sharing. + */ + enum rte_keepalive_state core_state __rte_cache_aligned; + } live_data[RTE_KEEPALIVE_MAXCORES]; /** Last-seen-alive timestamps */ uint64_t last_alive[RTE_KEEPALIVE_MAXCORES]; @@ -96,19 +100,22 @@ rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer, if (keepcfg->active_cores[idx_core] == 0) continue; - switch (keepcfg->state_flags[idx_core]) { + switch (keepcfg->live_data[idx_core].core_state) { case RTE_KA_STATE_UNUSED: break; case RTE_KA_STATE_ALIVE: /* Alive */ - keepcfg->state_flags[idx_core] = RTE_KA_STATE_MISSING; + keepcfg->live_data[idx_core].core_state = + RTE_KA_STATE_MISSING; keepcfg->last_alive[idx_core] = rte_rdtsc(); break; case RTE_KA_STATE_MISSING: /* MIA */ print_trace("Core MIA. ", keepcfg, idx_core); - keepcfg->state_flags[idx_core] = RTE_KA_STATE_DEAD; + keepcfg->live_data[idx_core].core_state = + RTE_KA_STATE_DEAD; break; case RTE_KA_STATE_DEAD: /* Dead */ - keepcfg->state_flags[idx_core] = RTE_KA_STATE_GONE; + keepcfg->live_data[idx_core].core_state = + RTE_KA_STATE_GONE; print_trace("Core died. ", keepcfg, idx_core); if (keepcfg->callback) keepcfg->callback( @@ -119,7 +126,8 @@ rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer, case RTE_KA_STATE_GONE: /* Buried */ break; case RTE_KA_STATE_DOZING: /* Core going idle */ - keepcfg->state_flags[idx_core] = RTE_KA_STATE_SLEEP; + keepcfg->live_data[idx_core].core_state = + RTE_KA_STATE_SLEEP; keepcfg->last_alive[idx_core] = rte_rdtsc(); break; case RTE_KA_STATE_SLEEP: /* Idled core */ @@ -129,7 +137,7 @@ rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer, keepcfg->relay_callback( keepcfg->relay_callback_data, idx_core, - keepcfg->state_flags[idx_core], + keepcfg->live_data[idx_core].core_state, keepcfg->last_alive[idx_core] ); } @@ -173,11 +181,11 @@ rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core) void rte_keepalive_mark_alive(struct rte_keepalive *keepcfg) { - keepcfg->state_flags[rte_lcore_id()] = RTE_KA_STATE_ALIVE; + keepcfg->live_data[rte_lcore_id()].core_state = RTE_KA_STATE_ALIVE; } void rte_keepalive_mark_sleep(struct rte_keepalive *keepcfg) { - keepcfg->state_flags[rte_lcore_id()] = RTE_KA_STATE_DOZING; + keepcfg->live_data[rte_lcore_id()].core_state = RTE_KA_STATE_DOZING; } diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index 876ba381..b0d0c3c6 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -623,7 +623,6 @@ pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, if (!found) return -1; - dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; p->base = start; RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start); diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 702f7a2e..dd451071 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -50,12 +50,15 @@ static struct vfio_config vfio_cfg; static int vfio_type1_dma_map(int); +static int vfio_spapr_dma_map(int); static int vfio_noiommu_dma_map(int); /* IOMMU types we support */ static const struct vfio_iommu_type iommu_types[] = { /* x86 IOMMU, otherwise known as type 1 */ { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map}, + /* ppc64 IOMMU, otherwise known as spapr */ + { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map}, /* IOMMU-less mode */ { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map}, }; @@ -339,7 +342,7 @@ vfio_enable(const char *modname) int vfio_is_enabled(const char *modname) { - const int mod_available = rte_eal_check_module(modname); + const int mod_available = rte_eal_check_module(modname) > 0; return vfio_cfg.vfio_enabled && mod_available; } @@ -540,6 +543,93 @@ vfio_type1_dma_map(int vfio_container_fd) } static int +vfio_spapr_dma_map(int vfio_container_fd) +{ + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); + int i, ret; + + struct vfio_iommu_spapr_register_memory reg = { + .argsz = sizeof(reg), + .flags = 0 + }; + struct vfio_iommu_spapr_tce_info info = { + .argsz = sizeof(info), + }; + struct vfio_iommu_spapr_tce_create create = { + .argsz = sizeof(create), + }; + struct vfio_iommu_spapr_tce_remove remove = { + .argsz = sizeof(remove), + }; + + /* query spapr iommu info */ + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info); + if (ret) { + RTE_LOG(ERR, EAL, " cannot get iommu info, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* remove default DMA of 32 bit window */ + remove.start_addr = info.dma32_window_start; + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove); + if (ret) { + RTE_LOG(ERR, EAL, " cannot remove default DMA window, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* calculate window size based on number of hugepages configured */ + create.window_size = rte_eal_get_physmem_size(); + create.page_shift = __builtin_ctzll(ms->hugepage_sz); + create.levels = 2; + + ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create); + if (ret) { + RTE_LOG(ERR, EAL, " cannot create new DMA window, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + struct vfio_iommu_type1_dma_map dma_map; + + if (ms[i].addr == NULL) + break; + + reg.vaddr = (uintptr_t) ms[i].addr; + reg.size = ms[i].len; + ret = ioctl(vfio_container_fd, + VFIO_IOMMU_SPAPR_REGISTER_MEMORY, ®); + if (ret) { + RTE_LOG(ERR, EAL, " cannot register vaddr for IOMMU, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + memset(&dma_map, 0, sizeof(dma_map)); + dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); + dma_map.vaddr = ms[i].addr_64; + dma_map.size = ms[i].len; + dma_map.iova = ms[i].phys_addr; + dma_map.flags = VFIO_DMA_MAP_FLAG_READ | + VFIO_DMA_MAP_FLAG_WRITE; + + ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); + + if (ret) { + RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + } + + return 0; +} + +static int vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) { /* No-IOMMU mode does not need DMA mapping */ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h index 29f7f3ec..ac31a4fc 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -54,6 +54,31 @@ #define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU +#ifndef VFIO_SPAPR_TCE_v2_IOMMU +#define RTE_VFIO_SPAPR 7 +#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17) +#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19) +#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20) +struct vfio_iommu_spapr_register_memory { + uint32_t argsz; + uint32_t flags; + uint64_t vaddr; + uint64_t size; +}; +struct vfio_iommu_spapr_tce_create { + uint32_t argsz; + uint32_t page_shift; + uint64_t window_size; + uint32_t levels; +}; +struct vfio_iommu_spapr_tce_remove { + uint32_t argsz; + uint64_t start_addr; +}; +#else +#define RTE_VFIO_SPAPR VFIO_SPAPR_TCE_v2_IOMMU +#endif + #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) #define RTE_VFIO_NOIOMMU 8 #else diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h index 0d781e48..38259330 100644 --- a/lib/librte_eal/linuxapp/igb_uio/compat.h +++ b/lib/librte_eal/linuxapp/igb_uio/compat.h @@ -123,3 +123,7 @@ static bool pci_check_and_mask_intx(struct pci_dev *pdev) } #endif /* < 3.3.0 */ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0) +#define HAVE_ALLOC_IRQ_VECTORS 1 +#endif diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c index df41e457..9f00f07a 100644 --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -325,7 +325,9 @@ static int igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) { struct rte_uio_pci_dev *udev; +#ifndef HAVE_ALLOC_IRQ_VECTORS struct msix_entry msix_entry; +#endif int err; udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL); @@ -379,6 +381,7 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) switch (igbuio_intr_mode_preferred) { case RTE_INTR_MODE_MSIX: /* Only 1 msi-x vector needed */ +#ifndef HAVE_ALLOC_IRQ_VECTORS msix_entry.entry = 0; if (pci_enable_msix(dev, &msix_entry, 1) == 0) { dev_dbg(&dev->dev, "using MSI-X"); @@ -386,6 +389,15 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) udev->mode = RTE_INTR_MODE_MSIX; break; } +#else + if (pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_MSIX) == 1) { + dev_dbg(&dev->dev, "using MSI-X"); + udev->info.irq_flags = IRQF_NO_THREAD; + udev->info.irq = pci_irq_vector(dev, 0); + udev->mode = RTE_INTR_MODE_MSIX; + break; + } +#endif /* fall back to INTX */ case RTE_INTR_MODE_LEGACY: if (pci_intx_mask_supported(dev)) { @@ -429,8 +441,13 @@ fail_remove_group: sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); fail_release_iomem: igbuio_pci_release_iomem(&udev->info); +#ifndef HAVE_ALLOC_IRQ_VECTORS if (udev->mode == RTE_INTR_MODE_MSIX) pci_disable_msix(udev->pdev); +#else + if (udev->mode == RTE_INTR_MODE_MSIX) + pci_free_irq_vectors(udev->pdev); +#endif pci_disable_device(dev); fail_free: kfree(udev); @@ -446,8 +463,13 @@ igbuio_pci_remove(struct pci_dev *dev) sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); uio_unregister_device(&udev->info); igbuio_pci_release_iomem(&udev->info); +#ifndef HAVE_ALLOC_IRQ_VECTORS if (udev->mode == RTE_INTR_MODE_MSIX) pci_disable_msix(dev); +#else + if (udev->mode == RTE_INTR_MODE_MSIX) + pci_free_irq_vectors(dev); +#endif pci_disable_device(dev); pci_set_drvdata(dev, NULL); kfree(udev); diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c index acb1a69b..3c683e17 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c @@ -137,11 +137,20 @@ static void igb_clean_all_tx_rings(struct igb_adapter *); static void igb_clean_all_rx_rings(struct igb_adapter *); static void igb_clean_tx_ring(struct igb_ring *); static void igb_set_rx_mode(struct net_device *); +#ifdef HAVE_TIMER_SETUP +static void igb_update_phy_info(struct timer_list *); +static void igb_watchdog(struct timer_list *); +#else static void igb_update_phy_info(unsigned long); static void igb_watchdog(unsigned long); +#endif static void igb_watchdog_task(struct work_struct *); static void igb_dma_err_task(struct work_struct *); +#ifdef HAVE_TIMER_SETUP +static void igb_dma_err_timer(struct timer_list *); +#else static void igb_dma_err_timer(unsigned long data); +#endif static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); static struct net_device_stats *igb_get_stats(struct net_device *); static int igb_change_mtu(struct net_device *, int); @@ -2806,6 +2815,12 @@ static int __devinit igb_probe(struct pci_dev *pdev, /* Check if Media Autosense is enabled */ if (hw->mac.type == e1000_82580) igb_init_mas(adapter); +#ifdef HAVE_TIMER_SETUP + timer_setup(&adapter->watchdog_timer, &igb_watchdog, 0); + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + timer_setup(&adapter->dma_err_timer, &igb_dma_err_timer, 0); + timer_setup(&adapter->phy_info_timer, &igb_update_phy_info, 0); +#else setup_timer(&adapter->watchdog_timer, &igb_watchdog, (unsigned long) adapter); if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) @@ -2813,6 +2828,7 @@ static int __devinit igb_probe(struct pci_dev *pdev, (unsigned long) adapter); setup_timer(&adapter->phy_info_timer, &igb_update_phy_info, (unsigned long) adapter); +#endif INIT_WORK(&adapter->reset_task, igb_reset_task); INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); @@ -4543,9 +4559,15 @@ static void igb_spoof_check(struct igb_adapter *adapter) /* Need to wait a few seconds after link up to get diagnostic information from * the phy */ +#ifdef HAVE_TIMER_SETUP +static void igb_update_phy_info(struct timer_list *t) +{ + struct igb_adapter *adapter = from_timer(adapter, t, phy_info_timer); +#else static void igb_update_phy_info(unsigned long data) { struct igb_adapter *adapter = (struct igb_adapter *) data; +#endif e1000_get_phy_info(&adapter->hw); } @@ -4594,9 +4616,15 @@ bool igb_has_link(struct igb_adapter *adapter) * igb_watchdog - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ +#ifdef HAVE_TIMER_SETUP +static void igb_watchdog(struct timer_list *t) +{ + struct igb_adapter *adapter = from_timer(adapter, t, watchdog_timer); +#else static void igb_watchdog(unsigned long data) { struct igb_adapter *adapter = (struct igb_adapter *)data; +#endif /* Do the rest outside of interrupt context */ schedule_work(&adapter->watchdog_task); } @@ -4854,9 +4882,15 @@ dma_timer_reset: * igb_dma_err_timer - Timer Call-back * @data: pointer to adapter cast into an unsigned long **/ +#ifdef HAVE_TIMER_SETUP +static void igb_dma_err_timer(struct timer_list *t) +{ + struct igb_adapter *adapter = from_timer(adapter, t, dma_err_timer); +#else static void igb_dma_err_timer(unsigned long data) { struct igb_adapter *adapter = (struct igb_adapter *)data; +#endif /* Do the rest outside of interrupt context */ schedule_work(&adapter->dma_err_task); } @@ -10051,6 +10085,12 @@ int igb_kni_probe(struct pci_dev *pdev, igb_init_mas(adapter); #ifdef NO_KNI +#ifdef HAVE_TIMER_SETUP + timer_setup(&adapter->watchdog_timer, &igb_watchdog, 0); + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + timer_setup(&adapter->dma_err_timer, &igb_dma_err_timer, 0); + timer_setup(&adapter->phy_info_timer, &igb_update_phy_info, 0); +#else setup_timer(&adapter->watchdog_timer, &igb_watchdog, (unsigned long) adapter); if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) @@ -10058,6 +10098,7 @@ int igb_kni_probe(struct pci_dev *pdev, (unsigned long) adapter); setup_timer(&adapter->phy_info_timer, &igb_update_phy_info, (unsigned long) adapter); +#endif INIT_WORK(&adapter->reset_task, igb_reset_task); INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h index aea253b1..88bd18ec 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h @@ -3937,4 +3937,8 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) #define HAVE_PCI_ENABLE_MSIX #endif +#if defined(timer_setup) && defined(from_timer) +#define HAVE_TIMER_SETUP +#endif + #endif /* _KCOMPAT_H_ */ diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index ea545250..4679dc69 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -94,6 +94,7 @@ static const struct rte_eth_xstats_name_off rte_stats_strings[] = { {"tx_good_packets", offsetof(struct rte_eth_stats, opackets)}, {"rx_good_bytes", offsetof(struct rte_eth_stats, ibytes)}, {"tx_good_bytes", offsetof(struct rte_eth_stats, obytes)}, + {"rx_missed_errors", offsetof(struct rte_eth_stats, imissed)}, {"rx_errors", offsetof(struct rte_eth_stats, ierrors)}, {"tx_errors", offsetof(struct rte_eth_stats, oerrors)}, {"rx_mbuf_allocation_errors", offsetof(struct rte_eth_stats, diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 11ec1fa8..dc6d0cc9 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -262,17 +262,17 @@ __extension__ struct rte_eth_link { uint32_t link_speed; /**< ETH_SPEED_NUM_ */ uint16_t link_duplex : 1; /**< ETH_LINK_[HALF/FULL]_DUPLEX */ - uint16_t link_autoneg : 1; /**< ETH_LINK_SPEED_[AUTONEG/FIXED] */ + uint16_t link_autoneg : 1; /**< ETH_LINK_[AUTONEG/FIXED] */ uint16_t link_status : 1; /**< ETH_LINK_[DOWN/UP] */ } __attribute__((aligned(8))); /**< aligned for atomic64 read/write */ /* Utility constants */ -#define ETH_LINK_HALF_DUPLEX 0 /**< Half-duplex connection. */ -#define ETH_LINK_FULL_DUPLEX 1 /**< Full-duplex connection. */ -#define ETH_LINK_DOWN 0 /**< Link is down. */ -#define ETH_LINK_UP 1 /**< Link is up. */ -#define ETH_LINK_FIXED 0 /**< No autonegotiation. */ -#define ETH_LINK_AUTONEG 1 /**< Autonegotiated. */ +#define ETH_LINK_HALF_DUPLEX 0 /**< Half-duplex connection (see link_duplex). */ +#define ETH_LINK_FULL_DUPLEX 1 /**< Full-duplex connection (see link_duplex). */ +#define ETH_LINK_DOWN 0 /**< Link is down (see link_status). */ +#define ETH_LINK_UP 1 /**< Link is up (see link_status). */ +#define ETH_LINK_FIXED 0 /**< No autonegotiation (see link_autoneg). */ +#define ETH_LINK_AUTONEG 1 /**< Autonegotiated (see link_autoneg). */ /** * A structure used to configure the ring threshold registers of an RX/TX @@ -1694,7 +1694,7 @@ struct rte_eth_dev_data { enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ int numa_node; /**< NUMA node connection */ const char *drv_name; /**< Driver name */ -}; +} __rte_cache_aligned; /** Device supports hotplug detach */ #define RTE_ETH_DEV_DETACHABLE 0x0001 @@ -1965,7 +1965,7 @@ int rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, * the DMA memory allocated for the transmit descriptors of the ring. * @param tx_conf * The pointer to the configuration data to be used for the transmit queue. - * NULL value is allowed, in which case default RX configuration + * NULL value is allowed, in which case default TX configuration * will be used. * The *tx_conf* structure contains the following data: * - The *tx_thresh* structure with the values of the Prefetch, Host, and diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c index 978ac601..56fc8b0b 100644 --- a/lib/librte_lpm/rte_lpm.c +++ b/lib/librte_lpm/rte_lpm.c @@ -908,7 +908,7 @@ add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth, */ struct rte_lpm_tbl_entry_v20 new_tbl24_entry = { - { .group_idx = (uint8_t)tbl8_group_index, }, + .group_idx = (uint8_t)tbl8_group_index, .valid = VALID, .valid_group = 1, .depth = 0, @@ -954,7 +954,7 @@ add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth, */ struct rte_lpm_tbl_entry_v20 new_tbl24_entry = { - { .group_idx = (uint8_t)tbl8_group_index, }, + .group_idx = (uint8_t)tbl8_group_index, .valid = VALID, .valid_group = 1, .depth = 0, @@ -1361,7 +1361,7 @@ delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, */ struct rte_lpm_tbl_entry_v20 new_tbl24_entry = { - {.next_hop = lpm->rules_tbl[sub_rule_index].next_hop,}, + .next_hop = lpm->rules_tbl[sub_rule_index].next_hop, .valid = VALID, .valid_group = 0, .depth = sub_rule_depth, @@ -1664,7 +1664,7 @@ delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, } else if (tbl8_recycle_index > -1) { /* Update tbl24 entry. */ struct rte_lpm_tbl_entry_v20 new_tbl24_entry = { - { .next_hop = lpm->tbl8[tbl8_recycle_index].next_hop, }, + .next_hop = lpm->tbl8[tbl8_recycle_index].next_hop, .valid = VALID, .valid_group = 0, .depth = lpm->tbl8[tbl8_recycle_index].depth, diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index 8a814df2..bc015d03 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -1233,13 +1233,14 @@ rte_pktmbuf_free_seg(struct rte_mbuf *m) * segment is added back into its original mempool. * * @param m - * The packet mbuf to be freed. + * The packet mbuf to be freed. If NULL, the function does nothing. */ static inline void rte_pktmbuf_free(struct rte_mbuf *m) { struct rte_mbuf *m_next; - __rte_mbuf_sanity_check(m, 1); + if (m != NULL) + __rte_mbuf_sanity_check(m, 1); while (m != NULL) { m_next = m->next; @@ -1361,12 +1362,10 @@ static inline uint16_t rte_pktmbuf_tailroom(const struct rte_mbuf *m) */ static inline struct rte_mbuf *rte_pktmbuf_lastseg(struct rte_mbuf *m) { - struct rte_mbuf *m2 = (struct rte_mbuf *)m; - __rte_mbuf_sanity_check(m, 1); - while (m2->next != NULL) - m2 = m2->next; - return m2; + while (m->next != NULL) + m = m->next; + return m; } /** diff --git a/lib/librte_pdump/rte_pdump.c b/lib/librte_pdump/rte_pdump.c index e64bf598..1a5147aa 100644 --- a/lib/librte_pdump/rte_pdump.c +++ b/lib/librte_pdump/rte_pdump.c @@ -582,7 +582,7 @@ rte_pdump_init(const char *path) if (ret != 0) { RTE_LOG(ERR, PDUMP, "Failed to create the pdump thread:%s, %s:%d\n", - strerror(errno), __func__, __LINE__); + strerror(ret), __func__, __LINE__); return -1; } /* Set thread_name for aid in debugging. */ @@ -605,7 +605,7 @@ rte_pdump_uninit(void) if (ret != 0) { RTE_LOG(ERR, PDUMP, "Failed to cancel the pdump thread:%s, %s:%d\n", - strerror(errno), __func__, __LINE__); + strerror(ret), __func__, __LINE__); return -1; } diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index 84e05951..a95fbfbb 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -438,7 +438,7 @@ vhost_user_reconnect_init(void) ret = pthread_create(&reconn_tid, NULL, vhost_user_client_reconnect, NULL); - if (ret < 0) + if (ret != 0) RTE_LOG(ERR, VHOST_CONFIG, "failed to create reconnect thread"); return ret; @@ -525,7 +525,7 @@ rte_vhost_driver_register(const char *path, uint64_t flags) if ((flags & RTE_VHOST_USER_CLIENT) != 0) { vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT); if (vsocket->reconnect && reconn_tid == 0) { - if (vhost_user_reconnect_init() < 0) { + if (vhost_user_reconnect_init() != 0) { free(vsocket->path); free(vsocket); goto out; diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c index 3c3f6a42..36fdfb55 100644 --- a/lib/librte_vhost/vhost.c +++ b/lib/librte_vhost/vhost.c @@ -202,6 +202,8 @@ alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx) dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ; init_vring_queue_pair(dev, qp_idx); + rte_spinlock_init(&dev->virtqueue[virt_rx_q_idx]->access_lock); + rte_spinlock_init(&dev->virtqueue[virt_tx_q_idx]->access_lock); dev->virt_qp_nb += 1; diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index d97df1d8..9f60ff81 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -91,6 +91,8 @@ struct vhost_virtqueue { /* Backend value to determine if device should started/stopped */ int backend; + rte_spinlock_t access_lock; + /* Used to notify the guest (trigger interrupt) */ int callfd; /* Currently unused as polling mode is enabled */ diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index d25e1c02..80348dbf 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -39,6 +39,7 @@ #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> +#include <stdbool.h> #include <assert.h> #ifdef RTE_LIBRTE_VHOST_NUMA #include <numaif.h> @@ -488,6 +489,30 @@ dump_guest_pages(struct virtio_net *dev) #define dump_guest_pages(dev) #endif +static bool +vhost_memory_changed(struct VhostUserMemory *new, + struct virtio_memory *old) +{ + uint32_t i; + + if (new->nregions != old->nregions) + return true; + + for (i = 0; i < new->nregions; ++i) { + VhostUserMemoryRegion *new_r = &new->regions[i]; + struct virtio_memory_region *old_r = &old->regions[i]; + + if (new_r->guest_phys_addr != old_r->guest_phys_addr) + return true; + if (new_r->memory_size != old_r->size) + return true; + if (new_r->userspace_addr != old_r->guest_user_addr) + return true; + } + + return false; +} + static int vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) { @@ -500,6 +525,16 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) uint32_t i; int fd; + if (dev->mem && !vhost_memory_changed(&memory, dev->mem)) { + RTE_LOG(INFO, VHOST_CONFIG, + "(%d) memory regions not changed\n", dev->vid); + + for (i = 0; i < memory.nregions; i++) + close(pmsg->fds[i]); + + return 0; + } + /* Remove from the data plane. */ if (dev->flags & VIRTIO_DEV_RUNNING) { dev->flags &= ~VIRTIO_DEV_RUNNING; @@ -917,12 +952,47 @@ send_vhost_message(int sockfd, struct VhostUserMsg *msg) return ret; } +static void +vhost_user_lock_all_queue_pairs(struct virtio_net *dev) +{ + unsigned int i = 0; + unsigned int vq_num = 0; + + while (vq_num < dev->virt_qp_nb * 2) { + struct vhost_virtqueue *vq = dev->virtqueue[i]; + + if (vq) { + rte_spinlock_lock(&vq->access_lock); + vq_num++; + } + i++; + } +} + +static void +vhost_user_unlock_all_queue_pairs(struct virtio_net *dev) +{ + unsigned int i = 0; + unsigned int vq_num = 0; + + while (vq_num < dev->virt_qp_nb * 2) { + struct vhost_virtqueue *vq = dev->virtqueue[i]; + + if (vq) { + rte_spinlock_unlock(&vq->access_lock); + vq_num++; + } + i++; + } +} + int vhost_user_msg_handler(int vid, int fd) { struct virtio_net *dev; struct VhostUserMsg msg; int ret; + int unlock_required = 0; dev = get_device(vid); if (dev == NULL) @@ -945,6 +1015,37 @@ vhost_user_msg_handler(int vid, int fd) RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n", vhost_message_str[msg.request]); + + /* + * Note: we don't lock all queues on VHOST_USER_GET_VRING_BASE + * and VHOST_USER_RESET_OWNER, since it is sent when virtio stops + * and device is destroyed. destroy_device waits for queues to be + * inactive, so it is safe. Otherwise taking the access_lock + * would cause a dead lock. + */ + switch (msg.request) { + case VHOST_USER_SET_FEATURES: + case VHOST_USER_SET_PROTOCOL_FEATURES: + case VHOST_USER_SET_OWNER: + case VHOST_USER_SET_MEM_TABLE: + case VHOST_USER_SET_LOG_BASE: + case VHOST_USER_SET_LOG_FD: + case VHOST_USER_SET_VRING_NUM: + case VHOST_USER_SET_VRING_ADDR: + case VHOST_USER_SET_VRING_BASE: + case VHOST_USER_SET_VRING_KICK: + case VHOST_USER_SET_VRING_CALL: + case VHOST_USER_SET_VRING_ERR: + case VHOST_USER_SET_VRING_ENABLE: + case VHOST_USER_SEND_RARP: + vhost_user_lock_all_queue_pairs(dev); + unlock_required = 1; + break; + default: + break; + + } + switch (msg.request) { case VHOST_USER_GET_FEATURES: msg.payload.u64 = vhost_user_get_features(); @@ -1034,5 +1135,8 @@ vhost_user_msg_handler(int vid, int fd) } + if (unlock_required) + vhost_user_unlock_all_queue_pairs(dev); + return 0; } diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 0027f393..0024f729 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -44,6 +44,7 @@ #include <rte_udp.h> #include <rte_sctp.h> #include <rte_arp.h> +#include <rte_spinlock.h> #include "vhost.h" @@ -313,8 +314,11 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, } vq = dev->virtqueue[queue_id]; + + rte_spinlock_lock(&vq->access_lock); + if (unlikely(vq->enabled == 0)) - return 0; + goto out_access_unlock; avail_idx = *((volatile uint16_t *)&vq->avail->idx); start_idx = vq->last_used_idx; @@ -322,7 +326,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, count = RTE_MIN(count, free_entries); count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST); if (count == 0) - return 0; + goto out_access_unlock; LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n", dev->vid, start_idx, start_idx + count); @@ -388,6 +392,10 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) && (vq->callfd >= 0)) eventfd_write(vq->callfd, (eventfd_t)1); + +out_access_unlock: + rte_spinlock_unlock(&vq->access_lock); + return count; } @@ -582,12 +590,15 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, } vq = dev->virtqueue[queue_id]; + + rte_spinlock_lock(&vq->access_lock); + if (unlikely(vq->enabled == 0)) - return 0; + goto out_access_unlock; count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); if (count == 0) - return 0; + goto out_access_unlock; rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); @@ -631,6 +642,9 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, eventfd_write(vq->callfd, (eventfd_t)1); } +out_access_unlock: + rte_spinlock_unlock(&vq->access_lock); + return pkt_idx; } @@ -875,7 +889,8 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, desc->addr + desc_offset, cpy_len)))) { cur->data_len = cpy_len; cur->data_off = 0; - cur->buf_addr = (void *)(uintptr_t)desc_addr; + cur->buf_addr = (void *)(uintptr_t)(desc_addr + + desc_offset); cur->buf_physaddr = hpa; /* @@ -1027,6 +1042,22 @@ mbuf_is_consumed(struct rte_mbuf *m) return true; } +static inline void __attribute__((always_inline)) +restore_mbuf(struct rte_mbuf *m) +{ + uint32_t mbuf_size, priv_size; + + while (m) { + priv_size = rte_pktmbuf_priv_size(m->pool); + mbuf_size = sizeof(struct rte_mbuf) + priv_size; + /* start of buffer is after mbuf structure and priv data */ + + m->buf_addr = (char *)m + mbuf_size; + m->buf_physaddr = rte_mempool_virt2phy(NULL, m) + mbuf_size; + m = m->next; + } +} + uint16_t rte_vhost_dequeue_burst(int vid, uint16_t queue_id, struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) @@ -1051,9 +1082,13 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, } vq = dev->virtqueue[queue_id]; - if (unlikely(vq->enabled == 0)) + + if (unlikely(rte_spinlock_trylock(&vq->access_lock) == 0)) return 0; + if (unlikely(vq->enabled == 0)) + goto out_access_unlock; + if (unlikely(dev->dequeue_zero_copy)) { struct zcopy_mbuf *zmbuf, *next; int nr_updated = 0; @@ -1069,6 +1104,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, nr_updated += 1; TAILQ_REMOVE(&vq->zmbuf_list, zmbuf, next); + restore_mbuf(zmbuf->mbuf); rte_pktmbuf_free(zmbuf->mbuf); put_zmbuf(zmbuf); vq->nr_zmbuf -= 1; @@ -1102,7 +1138,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, if (rarp_mbuf == NULL) { RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for mbuf.\n"); - return 0; + goto out_access_unlock; } if (make_rarp_packet(rarp_mbuf, &dev->mac)) { @@ -1116,7 +1152,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, free_entries = *((volatile uint16_t *)&vq->avail->idx) - vq->last_avail_idx; if (free_entries == 0) - goto out; + goto out_access_unlock; LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); @@ -1209,7 +1245,9 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, update_used_idx(dev, vq, i); } -out: +out_access_unlock: + rte_spinlock_unlock(&vq->access_lock); + if (unlikely(rarp_mbuf != NULL)) { /* * Inject it to the head of "pkts" array, so that switch's mac |