summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/librte_bitratestats/rte_bitrate.c6
-rw-r--r--lib/librte_eal/common/eal_common_memory.c43
-rw-r--r--lib/librte_eal/common/include/rte_bitmap.h8
-rw-r--r--lib/librte_eal/common/include/rte_memory.h3
-rw-r--r--lib/librte_eal/common/include/rte_version.h2
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_interrupts.c2
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memory.c57
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_thread.c4
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c7
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h5
-rw-r--r--lib/librte_eal/rte_eal_version.map1
-rw-r--r--lib/librte_ether/rte_ethdev.h5
-rw-r--r--lib/librte_eventdev/rte_event_eth_rx_adapter.c38
-rw-r--r--lib/librte_eventdev/rte_event_ring.c15
-rw-r--r--lib/librte_hash/rte_cuckoo_hash.c21
-rw-r--r--lib/librte_hash/rte_cuckoo_hash_x86.h3
-rw-r--r--lib/librte_hash/rte_hash.h20
-rw-r--r--lib/librte_kni/rte_kni.c3
-rw-r--r--lib/librte_metrics/rte_metrics.c15
-rw-r--r--lib/librte_ring/rte_ring.h2
-rw-r--r--lib/librte_security/rte_security.c3
-rw-r--r--lib/librte_vhost/iotlb.c10
-rw-r--r--lib/librte_vhost/iotlb.h2
-rw-r--r--lib/librte_vhost/vhost.h120
-rw-r--r--lib/librte_vhost/vhost_user.c29
-rw-r--r--lib/librte_vhost/virtio_net.c31
26 files changed, 376 insertions, 79 deletions
diff --git a/lib/librte_bitratestats/rte_bitrate.c b/lib/librte_bitratestats/rte_bitrate.c
index f373697a..d39edbf6 100644
--- a/lib/librte_bitratestats/rte_bitrate.c
+++ b/lib/librte_bitratestats/rte_bitrate.c
@@ -76,6 +76,9 @@ rte_stats_bitrate_reg(struct rte_stats_bitrates *bitrate_data)
};
int return_value;
+ if (bitrate_data == NULL)
+ return -EINVAL;
+
return_value = rte_metrics_reg_names(&names[0], ARRAY_SIZE(names));
if (return_value >= 0)
bitrate_data->id_stats_set = return_value;
@@ -94,6 +97,9 @@ rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data,
const int64_t alpha_percent = 20;
uint64_t values[6];
+ if (bitrate_data == NULL)
+ return -EINVAL;
+
ret_code = rte_eth_stats_get(port_id, &eth_stats);
if (ret_code != 0)
return ret_code;
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index fc6c44da..a0922f18 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -109,6 +109,49 @@ rte_dump_physmem_layout(FILE *f)
}
}
+/* 63 bits is good enough for a sanity check */
+#define MAX_DMA_MASK_BITS 63
+
+/* check memseg iovas are within the required range based on dma mask */
+int
+rte_eal_check_dma_mask(uint8_t maskbits)
+{
+
+ const struct rte_mem_config *mcfg;
+ uint64_t mask;
+ int i;
+
+ /* sanity check */
+ if (maskbits > MAX_DMA_MASK_BITS) {
+ RTE_LOG(INFO, EAL, "wrong dma mask size %u (Max: %u)\n",
+ maskbits, MAX_DMA_MASK_BITS);
+ return -1;
+ }
+
+ /* create dma mask */
+ mask = ~((1ULL << maskbits) - 1);
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ if (mcfg->memseg[i].addr == NULL)
+ break;
+
+ if (mcfg->memseg[i].iova & mask) {
+ RTE_LOG(INFO, EAL,
+ "memseg[%d] iova %"PRIx64" out of range:\n",
+ i, mcfg->memseg[i].iova);
+
+ RTE_LOG(INFO, EAL, "\tusing dma mask %"PRIx64"\n",
+ mask);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
/* return the number of memory channels */
unsigned rte_memory_get_nchannel(void)
{
diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h
index b9067e67..13bfd9cb 100644
--- a/lib/librte_eal/common/include/rte_bitmap.h
+++ b/lib/librte_eal/common/include/rte_bitmap.h
@@ -227,12 +227,12 @@ rte_bitmap_get_memory_footprint(uint32_t n_bits) {
/**
* Bitmap initialization
*
- * @param mem_size
- * Minimum expected size of bitmap.
+ * @param n_bits
+ * Number of pre-allocated bits in array2.
* @param mem
* Base address of array1 and array2.
- * @param n_bits
- * Number of pre-allocated bits in array2. Must be non-zero and multiple of 512.
+ * @param mem_size
+ * Minimum expected size of bitmap.
* @return
* Handle to bitmap instance.
*/
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 80a8fc02..b2a01689 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -209,6 +209,9 @@ unsigned rte_memory_get_nchannel(void);
*/
unsigned rte_memory_get_nrank(void);
+/* check memsegs iovas are within a range based on dma mask */
+int rte_eal_check_dma_mask(uint8_t maskbits);
+
/**
* Drivers based on uio will not load unless physical
* addresses are obtainable. It is only possible to get
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index 99ae35e5..4d0a9f7c 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -66,7 +66,7 @@ extern "C" {
/**
* Patch level number i.e. the z in yy.mm.z
*/
-#define RTE_VER_MINOR 3
+#define RTE_VER_MINOR 4
/**
* Extra string to be appended to version number
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 1c20693d..e1179b85 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -449,7 +449,7 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
TAILQ_FOREACH(src, &intr_sources, next) {
if (src->intr_handle.fd == intr_handle->fd) {
/* we had no interrupts for this */
- if TAILQ_EMPTY(&src->callbacks)
+ if (TAILQ_EMPTY(&src->callbacks))
wake_thread = 1;
TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 17c20d4b..bac969a1 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -88,6 +88,23 @@
static uint64_t baseaddr_offset;
+#ifdef RTE_ARCH_64
+/*
+ * Linux kernel uses a really high address as starting address for serving
+ * mmaps calls. If there exists addressing limitations and IOVA mode is VA,
+ * this starting address is likely too high for those devices. However, it
+ * is possible to use a lower address in the process virtual address space
+ * as with 64 bits there is a lot of available space.
+ *
+ * Current known limitations are 39 or 40 bits. Setting the starting address
+ * at 4GB implies there are 508GB or 1020GB for mapping the available
+ * hugepages. This is likely enough for most systems, although a device with
+ * addressing limitations should call rte_dev_check_dma_mask for ensuring all
+ * memory is within supported range.
+ */
+static uint64_t baseaddr = 0x100000000;
+#endif
+
static bool phys_addrs_available = true;
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
@@ -95,7 +112,7 @@ static bool phys_addrs_available = true;
static void
test_phys_addrs_available(void)
{
- uint64_t tmp;
+ uint64_t tmp = 0;
phys_addr_t physaddr;
if (!rte_eal_has_hugepages()) {
@@ -250,6 +267,23 @@ aslr_enabled(void)
}
}
+static void *
+get_addr_hint(void)
+{
+ if (internal_config.base_virtaddr != 0) {
+ return (void *) (uintptr_t)
+ (internal_config.base_virtaddr +
+ baseaddr_offset);
+ } else {
+#ifdef RTE_ARCH_64
+ return (void *) (uintptr_t) (baseaddr +
+ baseaddr_offset);
+#else
+ return NULL;
+#endif
+ }
+}
+
/*
* Try to mmap *size bytes in /dev/zero. If it is successful, return the
* pointer to the mmap'd area and keep *size unmodified. Else, retry
@@ -260,16 +294,10 @@ aslr_enabled(void)
static void *
get_virtual_area(size_t *size, size_t hugepage_sz)
{
- void *addr;
+ void *addr, *addr_hint;
int fd;
long aligned_addr;
- if (internal_config.base_virtaddr != 0) {
- addr = (void*) (uintptr_t) (internal_config.base_virtaddr +
- baseaddr_offset);
- }
- else addr = NULL;
-
RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
fd = open("/dev/zero", O_RDONLY);
@@ -278,7 +306,9 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
return NULL;
}
do {
- addr = mmap(addr,
+ addr_hint = get_addr_hint();
+
+ addr = mmap(addr_hint,
(*size) + hugepage_sz, PROT_READ,
#ifdef RTE_ARCH_PPC_64
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
@@ -286,8 +316,15 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
MAP_PRIVATE,
#endif
fd, 0);
- if (addr == MAP_FAILED)
+ if (addr == MAP_FAILED) {
+ /* map failed. Let's try with less memory */
*size -= hugepage_sz;
+ } else if (addr_hint && addr != addr_hint) {
+ /* hint was not used. Try with another offset */
+ munmap(addr, (*size) + hugepage_sz);
+ addr = MAP_FAILED;
+ baseaddr_offset += 0x100000000;
+ }
} while (addr == MAP_FAILED && *size > 0);
if (addr == MAP_FAILED) {
diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c
index e9a579e4..c3947d73 100644
--- a/lib/librte_eal/linuxapp/eal/eal_thread.c
+++ b/lib/librte_eal/linuxapp/eal/eal_thread.c
@@ -205,7 +205,7 @@ int rte_sys_gettid(void)
int rte_thread_setname(pthread_t id, const char *name)
{
- int ret = -1;
+ int ret = ENOSYS;
#if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
#if __GLIBC_PREREQ(2, 12)
ret = pthread_setname_np(id, name);
@@ -213,5 +213,5 @@ int rte_thread_setname(pthread_t id, const char *name)
#endif
RTE_SET_USED(id);
RTE_SET_USED(name);
- return ret;
+ return -ret;
}
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
index 95e262b7..aed14bcc 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
@@ -826,9 +826,10 @@ static void igb_get_drvinfo(struct net_device *netdev,
strncpy(drvinfo->driver, igb_driver_name, sizeof(drvinfo->driver) - 1);
strncpy(drvinfo->version, igb_driver_version, sizeof(drvinfo->version) - 1);
- strncpy(drvinfo->fw_version, adapter->fw_version,
- sizeof(drvinfo->fw_version) - 1);
- strncpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info) -1);
+ strlcpy(drvinfo->fw_version, adapter->fw_version,
+ sizeof(drvinfo->fw_version));
+ strlcpy(drvinfo->bus_info, pci_name(adapter->pdev),
+ sizeof(drvinfo->bus_info));
drvinfo->n_stats = IGB_STATS_LEN;
drvinfo->testinfo_len = IGB_TEST_LEN;
drvinfo->regdump_len = igb_get_regs_len(netdev);
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
index 6691edf1..6b738911 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
@@ -3944,6 +3944,11 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
#endif
#endif
+#if (defined(RHEL_RELEASE_CODE) && \
+ (RHEL_RELEASE_VERSION(7, 5) <= RHEL_RELEASE_CODE))
+#define ndo_change_mtu ndo_change_mtu_rh74
+#endif
+
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)
#define HAVE_PCI_ENABLE_MSIX
#endif
diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map
index f4f46c1b..aa6cf871 100644
--- a/lib/librte_eal/rte_eal_version.map
+++ b/lib/librte_eal/rte_eal_version.map
@@ -184,6 +184,7 @@ DPDK_17.11 {
rte_eal_create_uio_dev;
rte_bus_get_iommu_class;
+ rte_eal_check_dma_mask;
rte_eal_has_pci;
rte_eal_iova_mode;
rte_eal_mbuf_default_mempool_ops;
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index eba11ca5..47e37a6f 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -1877,7 +1877,6 @@ struct rte_eth_dev *rte_eth_dev_allocated(const char *name);
* to that slot for the driver to use.
*
* @param name Unique identifier name for each Ethernet device
- * @param type Device type of this Ethernet device
* @return
* - Slot in the rte_dev_devices array for a new device;
*/
@@ -2543,7 +2542,7 @@ void rte_eth_xstats_reset(uint16_t port_id);
* @param stat_idx
* The per-queue packet statistics functionality number that the transmit
* queue is to be assigned.
- * The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1].
+ * The value must be in the range [0, RTE_ETHDEV_QUEUE_STAT_CNTRS - 1].
* @return
* Zero if successful. Non-zero otherwise.
*/
@@ -2563,7 +2562,7 @@ int rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id,
* @param stat_idx
* The per-queue packet statistics functionality number that the receive
* queue is to be assigned.
- * The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1].
+ * The value must be in the range [0, RTE_ETHDEV_QUEUE_STAT_CNTRS - 1].
* @return
* Zero if successful. Non-zero otherwise.
*/
diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
index 90106e6c..d5c3fd56 100644
--- a/lib/librte_eventdev/rte_event_eth_rx_adapter.c
+++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c
@@ -87,6 +87,8 @@ struct rte_event_eth_rx_adapter {
int socket_id;
/* Per adapter EAL service */
uint32_t service_id;
+ /* Adapter started flag */
+ uint8_t rxa_started;
} __rte_cache_aligned;
/* Per eth device */
@@ -220,6 +222,8 @@ eth_poll_wrr_calc(struct rte_event_eth_rx_adapter *rx_adapter)
nb_rx_queues = dev_info->dev->data->nb_rx_queues;
if (dev_info->rx_queue == NULL)
continue;
+ if (dev_info->internal_event_port)
+ continue;
for (q = 0; q < nb_rx_queues; q++) {
struct eth_rx_queue_info *queue_info =
&dev_info->rx_queue[q];
@@ -414,14 +418,14 @@ flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter)
static inline void
fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
- uint8_t dev_id,
+ uint16_t eth_dev_id,
uint16_t rx_queue_id,
struct rte_mbuf **mbufs,
uint16_t num)
{
uint32_t i;
struct eth_device_info *eth_device_info =
- &rx_adapter->eth_devices[dev_id];
+ &rx_adapter->eth_devices[eth_dev_id];
struct eth_rx_queue_info *eth_rx_queue_info =
&eth_device_info->rx_queue[rx_queue_id];
@@ -476,7 +480,7 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter,
* the hypervisor's switching layer where adjustments can be made to deal with
* it.
*/
-static inline uint32_t
+static inline void
eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter)
{
uint32_t num_queue;
@@ -503,8 +507,10 @@ eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter)
*/
if (buf->count >= BATCH_SIZE)
flush_event_buffer(rx_adapter);
- if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count))
- break;
+ if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count)) {
+ rx_adapter->wrr_pos = wrr_pos;
+ return;
+ }
stats->rx_poll_count++;
n = rte_eth_rx_burst(d, qid, mbufs, BATCH_SIZE);
@@ -519,7 +525,7 @@ eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter)
if (nb_rx > max_nb_rx) {
rx_adapter->wrr_pos =
(wrr_pos + 1) % rx_adapter->wrr_len;
- return nb_rx;
+ break;
}
}
@@ -527,20 +533,22 @@ eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter)
wrr_pos = 0;
}
- return nb_rx;
+ if (buf->count >= BATCH_SIZE)
+ flush_event_buffer(rx_adapter);
}
static int
event_eth_rx_adapter_service_func(void *args)
{
struct rte_event_eth_rx_adapter *rx_adapter = args;
- struct rte_eth_event_enqueue_buffer *buf;
- buf = &rx_adapter->event_enqueue_buffer;
if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0)
return 0;
- if (eth_rx_poll(rx_adapter) == 0 && buf->count)
- flush_event_buffer(rx_adapter);
+ if (!rx_adapter->rxa_started) {
+ return 0;
+ rte_spinlock_unlock(&rx_adapter->rx_lock);
+ }
+ eth_rx_poll(rx_adapter);
rte_spinlock_unlock(&rx_adapter->rx_lock);
return 0;
}
@@ -829,8 +837,12 @@ rx_adapter_ctrl(uint8_t id, int start)
&rte_eth_devices[i]);
}
- if (use_service)
+ if (use_service) {
+ rte_spinlock_lock(&rx_adapter->rx_lock);
+ rx_adapter->rxa_started = start;
rte_service_runstate_set(rx_adapter->service_id, start);
+ rte_spinlock_unlock(&rx_adapter->rx_lock);
+ }
return 0;
}
@@ -1032,6 +1044,7 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id,
&rte_eth_devices[eth_dev_id],
rx_queue_id, queue_conf);
if (ret == 0) {
+ dev_info->internal_event_port = 1;
update_queue_info(rx_adapter,
&rx_adapter->eth_devices[eth_dev_id],
rx_queue_id,
@@ -1039,6 +1052,7 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id,
}
} else {
rte_spinlock_lock(&rx_adapter->rx_lock);
+ dev_info->internal_event_port = 0;
ret = init_service(rx_adapter, id);
if (ret == 0)
ret = add_rx_queue(rx_adapter, eth_dev_id, rx_queue_id,
diff --git a/lib/librte_eventdev/rte_event_ring.c b/lib/librte_eventdev/rte_event_ring.c
index b14c2127..c0603cb9 100644
--- a/lib/librte_eventdev/rte_event_ring.c
+++ b/lib/librte_eventdev/rte_event_ring.c
@@ -110,11 +110,16 @@ rte_event_ring_create(const char *name, unsigned int count, int socket_id,
mz = rte_memzone_reserve(mz_name, ring_size, socket_id, mz_flags);
if (mz != NULL) {
r = mz->addr;
- /*
- * no need to check return value here, we already checked the
- * arguments above
- */
- rte_event_ring_init(r, name, requested_count, flags);
+ /* Check return value in case rte_ring_init() fails on size */
+ int err = rte_event_ring_init(r, name, requested_count, flags);
+ if (err) {
+ RTE_LOG(ERR, RING, "Ring init failed\n");
+ if (rte_memzone_free(mz) != 0)
+ RTE_LOG(ERR, RING, "Cannot free memzone\n");
+ rte_free(te);
+ rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+ return NULL;
+ }
te->data = (void *) r;
r->r.memzone = mz;
diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index cbf78fab..9be514dc 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -154,13 +154,13 @@ rte_hash_create(const struct rte_hash_parameters *params)
* except for the first cache
*/
num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
- LCORE_CACHE_SIZE + 1;
+ (LCORE_CACHE_SIZE - 1) + 1;
else
num_key_slots = params->entries + 1;
snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
/* Create ring (Dummy slot index is not enqueued) */
- r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots - 1),
+ r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
params->socket_id, 0);
if (r == NULL) {
RTE_LOG(ERR, HASH, "memory allocation failed\n");
@@ -302,14 +302,17 @@ rte_hash_create(const struct rte_hash_parameters *params)
h->add_key = ADD_KEY_MULTIWRITER;
h->multiwriter_lock = rte_malloc(NULL,
sizeof(rte_spinlock_t),
- LCORE_CACHE_SIZE);
+ RTE_CACHE_LINE_SIZE);
+ if (h->multiwriter_lock == NULL)
+ goto err_unlock;
+
rte_spinlock_init(h->multiwriter_lock);
}
} else
h->add_key = ADD_KEY_SINGLEWRITER;
/* Populate free slots ring. Entry zero is reserved for key misses. */
- for (i = 1; i < params->entries + 1; i++)
+ for (i = 1; i < num_key_slots; i++)
rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
te->data = (void *) h;
@@ -391,7 +394,7 @@ void
rte_hash_reset(struct rte_hash *h)
{
void *ptr;
- unsigned i;
+ uint32_t tot_ring_cnt, i;
if (h == NULL)
return;
@@ -404,7 +407,13 @@ rte_hash_reset(struct rte_hash *h)
rte_pause();
/* Repopulate the free slots ring. Entry zero is reserved for key misses */
- for (i = 1; i < h->entries + 1; i++)
+ if (h->hw_trans_mem_support)
+ tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
+ (LCORE_CACHE_SIZE - 1);
+ else
+ tot_ring_cnt = h->entries;
+
+ for (i = 1; i < tot_ring_cnt + 1; i++)
rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
if (h->hw_trans_mem_support) {
diff --git a/lib/librte_hash/rte_cuckoo_hash_x86.h b/lib/librte_hash/rte_cuckoo_hash_x86.h
index 0c94244a..a2f1663e 100644
--- a/lib/librte_hash/rte_cuckoo_hash_x86.h
+++ b/lib/librte_hash/rte_cuckoo_hash_x86.h
@@ -95,6 +95,9 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h,
while (try < RTE_HASH_TSX_MAX_RETRY) {
status = rte_xbegin();
if (likely(status == RTE_XBEGIN_STARTED)) {
+ /* In case empty slot was gone before entering TSX */
+ if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT)
+ rte_xabort(RTE_XABORT_CUCKOO_PATH_INVALIDED);
while (likely(curr_node->prev != NULL)) {
prev_node = curr_node->prev;
prev_bkt = prev_node->bkt;
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index 622c1800..5539e338 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -283,8 +283,8 @@ rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t
* Output containing a pointer to the key
* @return
* - 0 if retrieved successfully
- * - EINVAL if the parameters are invalid.
- * - ENOENT if no valid key is found in the given position.
+ * - -EINVAL if the parameters are invalid.
+ * - -ENOENT if no valid key is found in the given position.
*/
int
rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
@@ -301,9 +301,11 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
* @param data
* Output with pointer to data returned from the hash table.
* @return
- * 0 if successful lookup
- * - EINVAL if the parameters are invalid.
- * - ENOENT if the key is not found.
+ * - A positive value that can be used by the caller as an offset into an
+ * array of user data. This value is unique for this key, and is the same
+ * value that was returned when the key was added.
+ * - -EINVAL if the parameters are invalid.
+ * - -ENOENT if the key is not found.
*/
int
rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data);
@@ -322,9 +324,11 @@ rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data);
* @param data
* Output with pointer to data returned from the hash table.
* @return
- * 0 if successful lookup
- * - EINVAL if the parameters are invalid.
- * - ENOENT if the key is not found.
+ * - A positive value that can be used by the caller as an offset into an
+ * array of user data. This value is unique for this key, and is the same
+ * value that was returned when the key was added.
+ * - -EINVAL if the parameters are invalid.
+ * - -ENOENT if the key is not found.
*/
int
rte_hash_lookup_with_hash_data(const struct rte_hash *h, const void *key,
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 8eca8c03..4b45d1a0 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -681,6 +681,9 @@ rte_kni_get(const char *name)
struct rte_kni_memzone_slot *it;
struct rte_kni *kni;
+ if (name == NULL || name[0] == '\0')
+ return NULL;
+
/* Note: could be improved perf-wise if necessary */
for (i = 0; i < kni_memzone_pool.max_ifaces; i++) {
it = &kni_memzone_pool.slots[i];
diff --git a/lib/librte_metrics/rte_metrics.c b/lib/librte_metrics/rte_metrics.c
index d9404001..dc634467 100644
--- a/lib/librte_metrics/rte_metrics.c
+++ b/lib/librte_metrics/rte_metrics.c
@@ -124,6 +124,9 @@ rte_metrics_reg_names(const char * const *names, uint16_t cnt_names)
/* Some sanity checks */
if (cnt_names < 1 || names == NULL)
return -EINVAL;
+ for (idx_name = 0; idx_name < cnt_names; idx_name++)
+ if (names[idx_name] == NULL)
+ return -EINVAL;
memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME);
if (memzone == NULL)
@@ -190,6 +193,11 @@ rte_metrics_update_values(int port_id,
stats = memzone->addr;
rte_spinlock_lock(&stats->lock);
+
+ if (key >= stats->cnt_stats) {
+ rte_spinlock_unlock(&stats->lock);
+ return -EINVAL;
+ }
idx_metric = key;
cnt_setsize = 1;
while (idx_metric < stats->cnt_stats) {
@@ -231,9 +239,8 @@ rte_metrics_get_names(struct rte_metric_name *names,
int return_value;
memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME);
- /* If not allocated, fail silently */
if (memzone == NULL)
- return 0;
+ return -EIO;
stats = memzone->addr;
rte_spinlock_lock(&stats->lock);
@@ -269,9 +276,9 @@ rte_metrics_get_values(int port_id,
return -EINVAL;
memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME);
- /* If not allocated, fail silently */
if (memzone == NULL)
- return 0;
+ return -EIO;
+
stats = memzone->addr;
rte_spinlock_lock(&stats->lock);
diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h
index 7069d52e..fc433b05 100644
--- a/lib/librte_ring/rte_ring.h
+++ b/lib/librte_ring/rte_ring.h
@@ -582,7 +582,7 @@ __rte_ring_do_dequeue(struct rte_ring *r, void **obj_table,
uint32_t cons_head, cons_next;
uint32_t entries;
- n = __rte_ring_move_cons_head(r, is_sc, n, behavior,
+ n = __rte_ring_move_cons_head(r, (int)is_sc, n, behavior,
&cons_head, &cons_next, &entries);
if (n == 0)
goto end;
diff --git a/lib/librte_security/rte_security.c b/lib/librte_security/rte_security.c
index 685729f4..503f5b53 100644
--- a/lib/librte_security/rte_security.c
+++ b/lib/librte_security/rte_security.c
@@ -84,7 +84,6 @@ rte_security_session_destroy(struct rte_security_ctx *instance,
struct rte_security_session *sess)
{
int ret;
- struct rte_mempool *mp = rte_mempool_from_obj(sess);
RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_destroy, -ENOTSUP);
@@ -93,7 +92,7 @@ rte_security_session_destroy(struct rte_security_ctx *instance,
ret = instance->ops->session_destroy(instance->device, sess);
if (!ret)
- rte_mempool_put(mp, (void *)sess);
+ rte_mempool_put(rte_mempool_from_obj(sess), (void *)sess);
return ret;
}
diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c
index c11ebcaa..c6354fef 100644
--- a/lib/librte_vhost/iotlb.c
+++ b/lib/librte_vhost/iotlb.c
@@ -303,6 +303,13 @@ out:
return vva;
}
+void
+vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq)
+{
+ vhost_user_iotlb_cache_remove_all(vq);
+ vhost_user_iotlb_pending_remove_all(vq);
+}
+
int
vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
{
@@ -315,8 +322,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index)
* The cache has already been initialized,
* just drop all cached and pending entries.
*/
- vhost_user_iotlb_cache_remove_all(vq);
- vhost_user_iotlb_pending_remove_all(vq);
+ vhost_user_iotlb_flush_all(vq);
}
#ifdef RTE_LIBRTE_VHOST_NUMA
diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h
index e7083e37..60b9e4c5 100644
--- a/lib/librte_vhost/iotlb.h
+++ b/lib/librte_vhost/iotlb.h
@@ -73,7 +73,7 @@ void vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, uint64_t iova,
uint8_t perm);
void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova,
uint64_t size, uint8_t perm);
-
+void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq);
int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index);
#endif /* _VHOST_IOTLB_H_ */
diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 16d6b891..11ce2711 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -59,6 +59,8 @@
#define BUF_VECTOR_MAX 256
+#define VHOST_LOG_CACHE_NR 32
+
/**
* Structure contains buffer address, length and descriptor index
* from vring to do scatter RX.
@@ -92,6 +94,14 @@ struct batch_copy_elem {
uint64_t log_addr;
};
+/*
+ * Structure that contains the info for batched dirty logging.
+ */
+struct log_cache_entry {
+ uint32_t offset;
+ unsigned long val;
+};
+
/**
* Structure contains variables relevant to RX/TX virtqueues.
*/
@@ -133,6 +143,9 @@ struct vhost_virtqueue {
struct batch_copy_elem *batch_copy_elems;
uint16_t batch_copy_nb_elems;
+ struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR];
+ uint16_t log_cache_nb_elem;
+
rte_rwlock_t iotlb_lock;
rte_rwlock_t iotlb_pending_lock;
struct rte_mempool *iotlb_pool;
@@ -266,7 +279,15 @@ struct virtio_net {
static __rte_always_inline void
vhost_set_bit(unsigned int nr, volatile uint8_t *addr)
{
- __sync_fetch_and_or_8(addr, (1U << nr));
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
+ /*
+ * __sync_ built-ins are deprecated, but __atomic_ ones
+ * are sub-optimized in older GCC versions.
+ */
+ __sync_fetch_and_or_1(addr, (1U << nr));
+#else
+ __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED);
+#endif
}
static __rte_always_inline void
@@ -298,6 +319,103 @@ vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len)
}
static __rte_always_inline void
+vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq)
+{
+ unsigned long *log_base;
+ int i;
+
+ if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
+ !dev->log_base))
+ return;
+
+ log_base = (unsigned long *)(uintptr_t)dev->log_base;
+
+ /*
+ * It is expected a write memory barrier has been issued
+ * before this function is called.
+ */
+
+ for (i = 0; i < vq->log_cache_nb_elem; i++) {
+ struct log_cache_entry *elem = vq->log_cache + i;
+
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100)
+ /*
+ * '__sync' builtins are deprecated, but '__atomic' ones
+ * are sub-optimized in older GCC versions.
+ */
+ __sync_fetch_and_or(log_base + elem->offset, elem->val);
+#else
+ __atomic_fetch_or(log_base + elem->offset, elem->val,
+ __ATOMIC_RELAXED);
+#endif
+ }
+
+ rte_smp_wmb();
+
+ vq->log_cache_nb_elem = 0;
+}
+
+static __rte_always_inline void
+vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t page)
+{
+ uint32_t bit_nr = page % (sizeof(unsigned long) << 3);
+ uint32_t offset = page / (sizeof(unsigned long) << 3);
+ int i;
+
+ for (i = 0; i < vq->log_cache_nb_elem; i++) {
+ struct log_cache_entry *elem = vq->log_cache + i;
+
+ if (elem->offset == offset) {
+ elem->val |= (1UL << bit_nr);
+ return;
+ }
+ }
+
+ if (unlikely(i >= VHOST_LOG_CACHE_NR)) {
+ /*
+ * No more room for a new log cache entry,
+ * so write the dirty log map directly.
+ */
+ rte_smp_wmb();
+ vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page);
+
+ return;
+ }
+
+ vq->log_cache[i].offset = offset;
+ vq->log_cache[i].val = (1UL << bit_nr);
+ vq->log_cache_nb_elem++;
+}
+
+static __rte_always_inline void
+vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t addr, uint64_t len)
+{
+ uint64_t page;
+
+ if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) ||
+ !dev->log_base || !len))
+ return;
+
+ if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8)))
+ return;
+
+ page = addr / VHOST_LOG_PAGE;
+ while (page * VHOST_LOG_PAGE < addr + len) {
+ vhost_log_cache_page(dev, vq, page);
+ page += 1;
+ }
+}
+
+static __rte_always_inline void
+vhost_log_cache_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
+ uint64_t offset, uint64_t len)
+{
+ vhost_log_cache_write(dev, vq, vq->log_guest_addr + offset, len);
+}
+
+static __rte_always_inline void
vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq,
uint64_t offset, uint64_t len)
{
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index c0efb310..0eb5e0d6 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -623,8 +623,9 @@ vhost_memory_changed(struct VhostUserMemory *new,
}
static int
-vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
{
+ struct virtio_net *dev = *pdev;
struct VhostUserMemory memory = pmsg->payload.memory;
struct rte_vhost_mem_region *reg;
void *mmap_addr;
@@ -650,6 +651,11 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
dev->mem = NULL;
}
+ /* Flush IOTLB cache as previous HVAs are now invalid */
+ if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
+ for (i = 0; i < dev->nr_vring; i++)
+ vhost_user_iotlb_flush_all(dev->virtqueue[i]);
+
dev->nr_guest_pages = 0;
if (!dev->guest_pages) {
dev->max_guest_pages = 8;
@@ -743,6 +749,25 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg)
mmap_offset);
}
+ for (i = 0; i < dev->nr_vring; i++) {
+ struct vhost_virtqueue *vq = dev->virtqueue[i];
+
+ if (vq->desc || vq->avail || vq->used) {
+ /*
+ * If the memory table got updated, the ring addresses
+ * need to be translated again as virtual addresses have
+ * changed.
+ */
+ vring_invalidate(dev, vq);
+
+ dev = translate_ring_addresses(dev, i);
+ if (!dev)
+ return -1;
+
+ *pdev = dev;
+ }
+ }
+
dump_guest_pages(dev);
return 0;
@@ -1404,7 +1429,7 @@ vhost_user_msg_handler(int vid, int fd)
break;
case VHOST_USER_SET_MEM_TABLE:
- ret = vhost_user_set_mem_table(dev, &msg);
+ ret = vhost_user_set_mem_table(&dev, &msg);
break;
case VHOST_USER_SET_LOG_BASE:
diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c
index ecfabca3..bde360f3 100644
--- a/lib/librte_vhost/virtio_net.c
+++ b/lib/librte_vhost/virtio_net.c
@@ -107,7 +107,7 @@ do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
rte_memcpy(&vq->used->ring[to],
&vq->shadow_used_ring[from],
size * sizeof(struct vring_used_elem));
- vhost_log_used_vring(dev, vq,
+ vhost_log_cache_used_vring(dev, vq,
offsetof(struct vring_used, ring[to]),
size * sizeof(struct vring_used_elem));
}
@@ -135,6 +135,8 @@ flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq)
rte_smp_wmb();
+ vhost_log_cache_sync(dev, vq);
+
*(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx;
vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
sizeof(vq->used->idx));
@@ -159,7 +161,7 @@ do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq)
for (i = 0; i < count; i++) {
rte_memcpy(elem[i].dst, elem[i].src, elem[i].len);
- vhost_log_write(dev, elem[i].log_addr, elem[i].len);
+ vhost_log_cache_write(dev, vq, elem[i].log_addr, elem[i].len);
PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0);
}
}
@@ -275,7 +277,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
virtio_enqueue_offload(m,
(struct virtio_net_hdr *)(uintptr_t)desc_addr);
PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0);
- vhost_log_write(dev, desc_gaddr, dev->vhost_hlen);
+ vhost_log_cache_write(dev, vq, desc_gaddr, dev->vhost_hlen);
} else {
struct virtio_net_hdr vnet_hdr;
uint64_t remain = dev->vhost_hlen;
@@ -298,7 +300,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
(void *)(uintptr_t)src, len);
PRINT_PACKET(dev, (uintptr_t)dst, (uint32_t)len, 0);
- vhost_log_write(dev, guest_addr, len);
+ vhost_log_cache_write(dev, vq, guest_addr, len);
remain -= len;
guest_addr += len;
dst += len;
@@ -379,7 +381,8 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
desc_offset)),
rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
cpy_len);
- vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len);
+ vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,
+ cpy_len);
PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
cpy_len, 0);
} else {
@@ -468,7 +471,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
vq->used->ring[used_idx].id = desc_indexes[i];
vq->used->ring[used_idx].len = pkts[i]->pkt_len +
dev->vhost_hlen;
- vhost_log_used_vring(dev, vq,
+ vhost_log_cache_used_vring(dev, vq,
offsetof(struct vring_used, ring[used_idx]),
sizeof(vq->used->ring[used_idx]));
}
@@ -528,6 +531,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
rte_smp_wmb();
+ vhost_log_cache_sync(dev, vq);
+
*(volatile uint16_t *)&vq->used->idx += count;
vq->last_used_idx += count;
vhost_log_used_vring(dev, vq,
@@ -797,7 +802,8 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
PRINT_PACKET(dev, (uintptr_t)dst,
(uint32_t)len, 0);
- vhost_log_write(dev, guest_addr, len);
+ vhost_log_cache_write(dev, vq,
+ guest_addr, len);
remain -= len;
guest_addr += len;
@@ -806,7 +812,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
} else {
PRINT_PACKET(dev, (uintptr_t)hdr_addr,
dev->vhost_hlen, 0);
- vhost_log_write(dev, hdr_phys_addr,
+ vhost_log_cache_write(dev, vq, hdr_phys_addr,
dev->vhost_hlen);
}
@@ -820,7 +826,8 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq,
desc_offset)),
rte_pktmbuf_mtod_offset(m, void *, mbuf_offset),
cpy_len);
- vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len);
+ vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset,
+ cpy_len);
PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset),
cpy_len, 0);
} else {
@@ -1384,7 +1391,7 @@ update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq,
{
vq->used->ring[used_idx].id = desc_idx;
vq->used->ring[used_idx].len = 0;
- vhost_log_used_vring(dev, vq,
+ vhost_log_cache_used_vring(dev, vq,
offsetof(struct vring_used, ring[used_idx]),
sizeof(vq->used->ring[used_idx]));
}
@@ -1399,6 +1406,8 @@ update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq,
rte_smp_wmb();
rte_smp_rmb();
+ vhost_log_cache_sync(dev, vq);
+
vq->used->idx += count;
vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx),
sizeof(vq->used->idx));
@@ -1557,7 +1566,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id,
if (rarp_mbuf == NULL) {
RTE_LOG(ERR, VHOST_DATA,
"Failed to allocate memory for mbuf.\n");
- return 0;
+ goto out;
}
if (make_rarp_packet(rarp_mbuf, &dev->mac)) {