diff options
Diffstat (limited to 'lib')
26 files changed, 376 insertions, 79 deletions
diff --git a/lib/librte_bitratestats/rte_bitrate.c b/lib/librte_bitratestats/rte_bitrate.c index f373697a..d39edbf6 100644 --- a/lib/librte_bitratestats/rte_bitrate.c +++ b/lib/librte_bitratestats/rte_bitrate.c @@ -76,6 +76,9 @@ rte_stats_bitrate_reg(struct rte_stats_bitrates *bitrate_data) }; int return_value; + if (bitrate_data == NULL) + return -EINVAL; + return_value = rte_metrics_reg_names(&names[0], ARRAY_SIZE(names)); if (return_value >= 0) bitrate_data->id_stats_set = return_value; @@ -94,6 +97,9 @@ rte_stats_bitrate_calc(struct rte_stats_bitrates *bitrate_data, const int64_t alpha_percent = 20; uint64_t values[6]; + if (bitrate_data == NULL) + return -EINVAL; + ret_code = rte_eth_stats_get(port_id, ð_stats); if (ret_code != 0) return ret_code; diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index fc6c44da..a0922f18 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -109,6 +109,49 @@ rte_dump_physmem_layout(FILE *f) } } +/* 63 bits is good enough for a sanity check */ +#define MAX_DMA_MASK_BITS 63 + +/* check memseg iovas are within the required range based on dma mask */ +int +rte_eal_check_dma_mask(uint8_t maskbits) +{ + + const struct rte_mem_config *mcfg; + uint64_t mask; + int i; + + /* sanity check */ + if (maskbits > MAX_DMA_MASK_BITS) { + RTE_LOG(INFO, EAL, "wrong dma mask size %u (Max: %u)\n", + maskbits, MAX_DMA_MASK_BITS); + return -1; + } + + /* create dma mask */ + mask = ~((1ULL << maskbits) - 1); + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + if (mcfg->memseg[i].addr == NULL) + break; + + if (mcfg->memseg[i].iova & mask) { + RTE_LOG(INFO, EAL, + "memseg[%d] iova %"PRIx64" out of range:\n", + i, mcfg->memseg[i].iova); + + RTE_LOG(INFO, EAL, "\tusing dma mask %"PRIx64"\n", + mask); + return -1; + } + } + + return 0; +} + /* return the number of memory channels */ unsigned rte_memory_get_nchannel(void) { diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h index b9067e67..13bfd9cb 100644 --- a/lib/librte_eal/common/include/rte_bitmap.h +++ b/lib/librte_eal/common/include/rte_bitmap.h @@ -227,12 +227,12 @@ rte_bitmap_get_memory_footprint(uint32_t n_bits) { /** * Bitmap initialization * - * @param mem_size - * Minimum expected size of bitmap. + * @param n_bits + * Number of pre-allocated bits in array2. * @param mem * Base address of array1 and array2. - * @param n_bits - * Number of pre-allocated bits in array2. Must be non-zero and multiple of 512. + * @param mem_size + * Minimum expected size of bitmap. * @return * Handle to bitmap instance. */ diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index 80a8fc02..b2a01689 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -209,6 +209,9 @@ unsigned rte_memory_get_nchannel(void); */ unsigned rte_memory_get_nrank(void); +/* check memsegs iovas are within a range based on dma mask */ +int rte_eal_check_dma_mask(uint8_t maskbits); + /** * Drivers based on uio will not load unless physical * addresses are obtainable. It is only possible to get diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index 99ae35e5..4d0a9f7c 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -66,7 +66,7 @@ extern "C" { /** * Patch level number i.e. the z in yy.mm.z */ -#define RTE_VER_MINOR 3 +#define RTE_VER_MINOR 4 /** * Extra string to be appended to version number diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 1c20693d..e1179b85 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -449,7 +449,7 @@ rte_intr_callback_register(const struct rte_intr_handle *intr_handle, TAILQ_FOREACH(src, &intr_sources, next) { if (src->intr_handle.fd == intr_handle->fd) { /* we had no interrupts for this */ - if TAILQ_EMPTY(&src->callbacks) + if (TAILQ_EMPTY(&src->callbacks)) wake_thread = 1; TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 17c20d4b..bac969a1 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -88,6 +88,23 @@ static uint64_t baseaddr_offset; +#ifdef RTE_ARCH_64 +/* + * Linux kernel uses a really high address as starting address for serving + * mmaps calls. If there exists addressing limitations and IOVA mode is VA, + * this starting address is likely too high for those devices. However, it + * is possible to use a lower address in the process virtual address space + * as with 64 bits there is a lot of available space. + * + * Current known limitations are 39 or 40 bits. Setting the starting address + * at 4GB implies there are 508GB or 1020GB for mapping the available + * hugepages. This is likely enough for most systems, although a device with + * addressing limitations should call rte_dev_check_dma_mask for ensuring all + * memory is within supported range. + */ +static uint64_t baseaddr = 0x100000000; +#endif + static bool phys_addrs_available = true; #define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" @@ -95,7 +112,7 @@ static bool phys_addrs_available = true; static void test_phys_addrs_available(void) { - uint64_t tmp; + uint64_t tmp = 0; phys_addr_t physaddr; if (!rte_eal_has_hugepages()) { @@ -250,6 +267,23 @@ aslr_enabled(void) } } +static void * +get_addr_hint(void) +{ + if (internal_config.base_virtaddr != 0) { + return (void *) (uintptr_t) + (internal_config.base_virtaddr + + baseaddr_offset); + } else { +#ifdef RTE_ARCH_64 + return (void *) (uintptr_t) (baseaddr + + baseaddr_offset); +#else + return NULL; +#endif + } +} + /* * Try to mmap *size bytes in /dev/zero. If it is successful, return the * pointer to the mmap'd area and keep *size unmodified. Else, retry @@ -260,16 +294,10 @@ aslr_enabled(void) static void * get_virtual_area(size_t *size, size_t hugepage_sz) { - void *addr; + void *addr, *addr_hint; int fd; long aligned_addr; - if (internal_config.base_virtaddr != 0) { - addr = (void*) (uintptr_t) (internal_config.base_virtaddr + - baseaddr_offset); - } - else addr = NULL; - RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size); fd = open("/dev/zero", O_RDONLY); @@ -278,7 +306,9 @@ get_virtual_area(size_t *size, size_t hugepage_sz) return NULL; } do { - addr = mmap(addr, + addr_hint = get_addr_hint(); + + addr = mmap(addr_hint, (*size) + hugepage_sz, PROT_READ, #ifdef RTE_ARCH_PPC_64 MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, @@ -286,8 +316,15 @@ get_virtual_area(size_t *size, size_t hugepage_sz) MAP_PRIVATE, #endif fd, 0); - if (addr == MAP_FAILED) + if (addr == MAP_FAILED) { + /* map failed. Let's try with less memory */ *size -= hugepage_sz; + } else if (addr_hint && addr != addr_hint) { + /* hint was not used. Try with another offset */ + munmap(addr, (*size) + hugepage_sz); + addr = MAP_FAILED; + baseaddr_offset += 0x100000000; + } } while (addr == MAP_FAILED && *size > 0); if (addr == MAP_FAILED) { diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c index e9a579e4..c3947d73 100644 --- a/lib/librte_eal/linuxapp/eal/eal_thread.c +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c @@ -205,7 +205,7 @@ int rte_sys_gettid(void) int rte_thread_setname(pthread_t id, const char *name) { - int ret = -1; + int ret = ENOSYS; #if defined(__GLIBC__) && defined(__GLIBC_PREREQ) #if __GLIBC_PREREQ(2, 12) ret = pthread_setname_np(id, name); @@ -213,5 +213,5 @@ int rte_thread_setname(pthread_t id, const char *name) #endif RTE_SET_USED(id); RTE_SET_USED(name); - return ret; + return -ret; } diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c index 95e262b7..aed14bcc 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c @@ -826,9 +826,10 @@ static void igb_get_drvinfo(struct net_device *netdev, strncpy(drvinfo->driver, igb_driver_name, sizeof(drvinfo->driver) - 1); strncpy(drvinfo->version, igb_driver_version, sizeof(drvinfo->version) - 1); - strncpy(drvinfo->fw_version, adapter->fw_version, - sizeof(drvinfo->fw_version) - 1); - strncpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info) -1); + strlcpy(drvinfo->fw_version, adapter->fw_version, + sizeof(drvinfo->fw_version)); + strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), + sizeof(drvinfo->bus_info)); drvinfo->n_stats = IGB_STATS_LEN; drvinfo->testinfo_len = IGB_TEST_LEN; drvinfo->regdump_len = igb_get_regs_len(netdev); diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h index 6691edf1..6b738911 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h @@ -3944,6 +3944,11 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) #endif #endif +#if (defined(RHEL_RELEASE_CODE) && \ + (RHEL_RELEASE_VERSION(7, 5) <= RHEL_RELEASE_CODE)) +#define ndo_change_mtu ndo_change_mtu_rh74 +#endif + #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) #define HAVE_PCI_ENABLE_MSIX #endif diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index f4f46c1b..aa6cf871 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -184,6 +184,7 @@ DPDK_17.11 { rte_eal_create_uio_dev; rte_bus_get_iommu_class; + rte_eal_check_dma_mask; rte_eal_has_pci; rte_eal_iova_mode; rte_eal_mbuf_default_mempool_ops; diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index eba11ca5..47e37a6f 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -1877,7 +1877,6 @@ struct rte_eth_dev *rte_eth_dev_allocated(const char *name); * to that slot for the driver to use. * * @param name Unique identifier name for each Ethernet device - * @param type Device type of this Ethernet device * @return * - Slot in the rte_dev_devices array for a new device; */ @@ -2543,7 +2542,7 @@ void rte_eth_xstats_reset(uint16_t port_id); * @param stat_idx * The per-queue packet statistics functionality number that the transmit * queue is to be assigned. - * The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1]. + * The value must be in the range [0, RTE_ETHDEV_QUEUE_STAT_CNTRS - 1]. * @return * Zero if successful. Non-zero otherwise. */ @@ -2563,7 +2562,7 @@ int rte_eth_dev_set_tx_queue_stats_mapping(uint16_t port_id, * @param stat_idx * The per-queue packet statistics functionality number that the receive * queue is to be assigned. - * The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1]. + * The value must be in the range [0, RTE_ETHDEV_QUEUE_STAT_CNTRS - 1]. * @return * Zero if successful. Non-zero otherwise. */ diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c b/lib/librte_eventdev/rte_event_eth_rx_adapter.c index 90106e6c..d5c3fd56 100644 --- a/lib/librte_eventdev/rte_event_eth_rx_adapter.c +++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c @@ -87,6 +87,8 @@ struct rte_event_eth_rx_adapter { int socket_id; /* Per adapter EAL service */ uint32_t service_id; + /* Adapter started flag */ + uint8_t rxa_started; } __rte_cache_aligned; /* Per eth device */ @@ -220,6 +222,8 @@ eth_poll_wrr_calc(struct rte_event_eth_rx_adapter *rx_adapter) nb_rx_queues = dev_info->dev->data->nb_rx_queues; if (dev_info->rx_queue == NULL) continue; + if (dev_info->internal_event_port) + continue; for (q = 0; q < nb_rx_queues; q++) { struct eth_rx_queue_info *queue_info = &dev_info->rx_queue[q]; @@ -414,14 +418,14 @@ flush_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter) static inline void fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter, - uint8_t dev_id, + uint16_t eth_dev_id, uint16_t rx_queue_id, struct rte_mbuf **mbufs, uint16_t num) { uint32_t i; struct eth_device_info *eth_device_info = - &rx_adapter->eth_devices[dev_id]; + &rx_adapter->eth_devices[eth_dev_id]; struct eth_rx_queue_info *eth_rx_queue_info = ð_device_info->rx_queue[rx_queue_id]; @@ -476,7 +480,7 @@ fill_event_buffer(struct rte_event_eth_rx_adapter *rx_adapter, * the hypervisor's switching layer where adjustments can be made to deal with * it. */ -static inline uint32_t +static inline void eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter) { uint32_t num_queue; @@ -503,8 +507,10 @@ eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter) */ if (buf->count >= BATCH_SIZE) flush_event_buffer(rx_adapter); - if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count)) - break; + if (BATCH_SIZE > (ETH_EVENT_BUFFER_SIZE - buf->count)) { + rx_adapter->wrr_pos = wrr_pos; + return; + } stats->rx_poll_count++; n = rte_eth_rx_burst(d, qid, mbufs, BATCH_SIZE); @@ -519,7 +525,7 @@ eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter) if (nb_rx > max_nb_rx) { rx_adapter->wrr_pos = (wrr_pos + 1) % rx_adapter->wrr_len; - return nb_rx; + break; } } @@ -527,20 +533,22 @@ eth_rx_poll(struct rte_event_eth_rx_adapter *rx_adapter) wrr_pos = 0; } - return nb_rx; + if (buf->count >= BATCH_SIZE) + flush_event_buffer(rx_adapter); } static int event_eth_rx_adapter_service_func(void *args) { struct rte_event_eth_rx_adapter *rx_adapter = args; - struct rte_eth_event_enqueue_buffer *buf; - buf = &rx_adapter->event_enqueue_buffer; if (rte_spinlock_trylock(&rx_adapter->rx_lock) == 0) return 0; - if (eth_rx_poll(rx_adapter) == 0 && buf->count) - flush_event_buffer(rx_adapter); + if (!rx_adapter->rxa_started) { + return 0; + rte_spinlock_unlock(&rx_adapter->rx_lock); + } + eth_rx_poll(rx_adapter); rte_spinlock_unlock(&rx_adapter->rx_lock); return 0; } @@ -829,8 +837,12 @@ rx_adapter_ctrl(uint8_t id, int start) &rte_eth_devices[i]); } - if (use_service) + if (use_service) { + rte_spinlock_lock(&rx_adapter->rx_lock); + rx_adapter->rxa_started = start; rte_service_runstate_set(rx_adapter->service_id, start); + rte_spinlock_unlock(&rx_adapter->rx_lock); + } return 0; } @@ -1032,6 +1044,7 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id, &rte_eth_devices[eth_dev_id], rx_queue_id, queue_conf); if (ret == 0) { + dev_info->internal_event_port = 1; update_queue_info(rx_adapter, &rx_adapter->eth_devices[eth_dev_id], rx_queue_id, @@ -1039,6 +1052,7 @@ rte_event_eth_rx_adapter_queue_add(uint8_t id, } } else { rte_spinlock_lock(&rx_adapter->rx_lock); + dev_info->internal_event_port = 0; ret = init_service(rx_adapter, id); if (ret == 0) ret = add_rx_queue(rx_adapter, eth_dev_id, rx_queue_id, diff --git a/lib/librte_eventdev/rte_event_ring.c b/lib/librte_eventdev/rte_event_ring.c index b14c2127..c0603cb9 100644 --- a/lib/librte_eventdev/rte_event_ring.c +++ b/lib/librte_eventdev/rte_event_ring.c @@ -110,11 +110,16 @@ rte_event_ring_create(const char *name, unsigned int count, int socket_id, mz = rte_memzone_reserve(mz_name, ring_size, socket_id, mz_flags); if (mz != NULL) { r = mz->addr; - /* - * no need to check return value here, we already checked the - * arguments above - */ - rte_event_ring_init(r, name, requested_count, flags); + /* Check return value in case rte_ring_init() fails on size */ + int err = rte_event_ring_init(r, name, requested_count, flags); + if (err) { + RTE_LOG(ERR, RING, "Ring init failed\n"); + if (rte_memzone_free(mz) != 0) + RTE_LOG(ERR, RING, "Cannot free memzone\n"); + rte_free(te); + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return NULL; + } te->data = (void *) r; r->r.memzone = mz; diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index cbf78fab..9be514dc 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -154,13 +154,13 @@ rte_hash_create(const struct rte_hash_parameters *params) * except for the first cache */ num_key_slots = params->entries + (RTE_MAX_LCORE - 1) * - LCORE_CACHE_SIZE + 1; + (LCORE_CACHE_SIZE - 1) + 1; else num_key_slots = params->entries + 1; snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name); /* Create ring (Dummy slot index is not enqueued) */ - r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots - 1), + r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots), params->socket_id, 0); if (r == NULL) { RTE_LOG(ERR, HASH, "memory allocation failed\n"); @@ -302,14 +302,17 @@ rte_hash_create(const struct rte_hash_parameters *params) h->add_key = ADD_KEY_MULTIWRITER; h->multiwriter_lock = rte_malloc(NULL, sizeof(rte_spinlock_t), - LCORE_CACHE_SIZE); + RTE_CACHE_LINE_SIZE); + if (h->multiwriter_lock == NULL) + goto err_unlock; + rte_spinlock_init(h->multiwriter_lock); } } else h->add_key = ADD_KEY_SINGLEWRITER; /* Populate free slots ring. Entry zero is reserved for key misses. */ - for (i = 1; i < params->entries + 1; i++) + for (i = 1; i < num_key_slots; i++) rte_ring_sp_enqueue(r, (void *)((uintptr_t) i)); te->data = (void *) h; @@ -391,7 +394,7 @@ void rte_hash_reset(struct rte_hash *h) { void *ptr; - unsigned i; + uint32_t tot_ring_cnt, i; if (h == NULL) return; @@ -404,7 +407,13 @@ rte_hash_reset(struct rte_hash *h) rte_pause(); /* Repopulate the free slots ring. Entry zero is reserved for key misses */ - for (i = 1; i < h->entries + 1; i++) + if (h->hw_trans_mem_support) + tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) * + (LCORE_CACHE_SIZE - 1); + else + tot_ring_cnt = h->entries; + + for (i = 1; i < tot_ring_cnt + 1; i++) rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i)); if (h->hw_trans_mem_support) { diff --git a/lib/librte_hash/rte_cuckoo_hash_x86.h b/lib/librte_hash/rte_cuckoo_hash_x86.h index 0c94244a..a2f1663e 100644 --- a/lib/librte_hash/rte_cuckoo_hash_x86.h +++ b/lib/librte_hash/rte_cuckoo_hash_x86.h @@ -95,6 +95,9 @@ rte_hash_cuckoo_move_insert_mw_tm(const struct rte_hash *h, while (try < RTE_HASH_TSX_MAX_RETRY) { status = rte_xbegin(); if (likely(status == RTE_XBEGIN_STARTED)) { + /* In case empty slot was gone before entering TSX */ + if (curr_bkt->key_idx[curr_slot] != EMPTY_SLOT) + rte_xabort(RTE_XABORT_CUCKOO_PATH_INVALIDED); while (likely(curr_node->prev != NULL)) { prev_node = curr_node->prev; prev_bkt = prev_node->bkt; diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h index 622c1800..5539e338 100644 --- a/lib/librte_hash/rte_hash.h +++ b/lib/librte_hash/rte_hash.h @@ -283,8 +283,8 @@ rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t * Output containing a pointer to the key * @return * - 0 if retrieved successfully - * - EINVAL if the parameters are invalid. - * - ENOENT if no valid key is found in the given position. + * - -EINVAL if the parameters are invalid. + * - -ENOENT if no valid key is found in the given position. */ int rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position, @@ -301,9 +301,11 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position, * @param data * Output with pointer to data returned from the hash table. * @return - * 0 if successful lookup - * - EINVAL if the parameters are invalid. - * - ENOENT if the key is not found. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key, and is the same + * value that was returned when the key was added. + * - -EINVAL if the parameters are invalid. + * - -ENOENT if the key is not found. */ int rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data); @@ -322,9 +324,11 @@ rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data); * @param data * Output with pointer to data returned from the hash table. * @return - * 0 if successful lookup - * - EINVAL if the parameters are invalid. - * - ENOENT if the key is not found. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key, and is the same + * value that was returned when the key was added. + * - -EINVAL if the parameters are invalid. + * - -ENOENT if the key is not found. */ int rte_hash_lookup_with_hash_data(const struct rte_hash *h, const void *key, diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c index 8eca8c03..4b45d1a0 100644 --- a/lib/librte_kni/rte_kni.c +++ b/lib/librte_kni/rte_kni.c @@ -681,6 +681,9 @@ rte_kni_get(const char *name) struct rte_kni_memzone_slot *it; struct rte_kni *kni; + if (name == NULL || name[0] == '\0') + return NULL; + /* Note: could be improved perf-wise if necessary */ for (i = 0; i < kni_memzone_pool.max_ifaces; i++) { it = &kni_memzone_pool.slots[i]; diff --git a/lib/librte_metrics/rte_metrics.c b/lib/librte_metrics/rte_metrics.c index d9404001..dc634467 100644 --- a/lib/librte_metrics/rte_metrics.c +++ b/lib/librte_metrics/rte_metrics.c @@ -124,6 +124,9 @@ rte_metrics_reg_names(const char * const *names, uint16_t cnt_names) /* Some sanity checks */ if (cnt_names < 1 || names == NULL) return -EINVAL; + for (idx_name = 0; idx_name < cnt_names; idx_name++) + if (names[idx_name] == NULL) + return -EINVAL; memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME); if (memzone == NULL) @@ -190,6 +193,11 @@ rte_metrics_update_values(int port_id, stats = memzone->addr; rte_spinlock_lock(&stats->lock); + + if (key >= stats->cnt_stats) { + rte_spinlock_unlock(&stats->lock); + return -EINVAL; + } idx_metric = key; cnt_setsize = 1; while (idx_metric < stats->cnt_stats) { @@ -231,9 +239,8 @@ rte_metrics_get_names(struct rte_metric_name *names, int return_value; memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME); - /* If not allocated, fail silently */ if (memzone == NULL) - return 0; + return -EIO; stats = memzone->addr; rte_spinlock_lock(&stats->lock); @@ -269,9 +276,9 @@ rte_metrics_get_values(int port_id, return -EINVAL; memzone = rte_memzone_lookup(RTE_METRICS_MEMZONE_NAME); - /* If not allocated, fail silently */ if (memzone == NULL) - return 0; + return -EIO; + stats = memzone->addr; rte_spinlock_lock(&stats->lock); diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h index 7069d52e..fc433b05 100644 --- a/lib/librte_ring/rte_ring.h +++ b/lib/librte_ring/rte_ring.h @@ -582,7 +582,7 @@ __rte_ring_do_dequeue(struct rte_ring *r, void **obj_table, uint32_t cons_head, cons_next; uint32_t entries; - n = __rte_ring_move_cons_head(r, is_sc, n, behavior, + n = __rte_ring_move_cons_head(r, (int)is_sc, n, behavior, &cons_head, &cons_next, &entries); if (n == 0) goto end; diff --git a/lib/librte_security/rte_security.c b/lib/librte_security/rte_security.c index 685729f4..503f5b53 100644 --- a/lib/librte_security/rte_security.c +++ b/lib/librte_security/rte_security.c @@ -84,7 +84,6 @@ rte_security_session_destroy(struct rte_security_ctx *instance, struct rte_security_session *sess) { int ret; - struct rte_mempool *mp = rte_mempool_from_obj(sess); RTE_FUNC_PTR_OR_ERR_RET(*instance->ops->session_destroy, -ENOTSUP); @@ -93,7 +92,7 @@ rte_security_session_destroy(struct rte_security_ctx *instance, ret = instance->ops->session_destroy(instance->device, sess); if (!ret) - rte_mempool_put(mp, (void *)sess); + rte_mempool_put(rte_mempool_from_obj(sess), (void *)sess); return ret; } diff --git a/lib/librte_vhost/iotlb.c b/lib/librte_vhost/iotlb.c index c11ebcaa..c6354fef 100644 --- a/lib/librte_vhost/iotlb.c +++ b/lib/librte_vhost/iotlb.c @@ -303,6 +303,13 @@ out: return vva; } +void +vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq) +{ + vhost_user_iotlb_cache_remove_all(vq); + vhost_user_iotlb_pending_remove_all(vq); +} + int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) { @@ -315,8 +322,7 @@ vhost_user_iotlb_init(struct virtio_net *dev, int vq_index) * The cache has already been initialized, * just drop all cached and pending entries. */ - vhost_user_iotlb_cache_remove_all(vq); - vhost_user_iotlb_pending_remove_all(vq); + vhost_user_iotlb_flush_all(vq); } #ifdef RTE_LIBRTE_VHOST_NUMA diff --git a/lib/librte_vhost/iotlb.h b/lib/librte_vhost/iotlb.h index e7083e37..60b9e4c5 100644 --- a/lib/librte_vhost/iotlb.h +++ b/lib/librte_vhost/iotlb.h @@ -73,7 +73,7 @@ void vhost_user_iotlb_pending_insert(struct vhost_virtqueue *vq, uint64_t iova, uint8_t perm); void vhost_user_iotlb_pending_remove(struct vhost_virtqueue *vq, uint64_t iova, uint64_t size, uint8_t perm); - +void vhost_user_iotlb_flush_all(struct vhost_virtqueue *vq); int vhost_user_iotlb_init(struct virtio_net *dev, int vq_index); #endif /* _VHOST_IOTLB_H_ */ diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 16d6b891..11ce2711 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -59,6 +59,8 @@ #define BUF_VECTOR_MAX 256 +#define VHOST_LOG_CACHE_NR 32 + /** * Structure contains buffer address, length and descriptor index * from vring to do scatter RX. @@ -92,6 +94,14 @@ struct batch_copy_elem { uint64_t log_addr; }; +/* + * Structure that contains the info for batched dirty logging. + */ +struct log_cache_entry { + uint32_t offset; + unsigned long val; +}; + /** * Structure contains variables relevant to RX/TX virtqueues. */ @@ -133,6 +143,9 @@ struct vhost_virtqueue { struct batch_copy_elem *batch_copy_elems; uint16_t batch_copy_nb_elems; + struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR]; + uint16_t log_cache_nb_elem; + rte_rwlock_t iotlb_lock; rte_rwlock_t iotlb_pending_lock; struct rte_mempool *iotlb_pool; @@ -266,7 +279,15 @@ struct virtio_net { static __rte_always_inline void vhost_set_bit(unsigned int nr, volatile uint8_t *addr) { - __sync_fetch_and_or_8(addr, (1U << nr)); +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100) + /* + * __sync_ built-ins are deprecated, but __atomic_ ones + * are sub-optimized in older GCC versions. + */ + __sync_fetch_and_or_1(addr, (1U << nr)); +#else + __atomic_fetch_or(addr, (1U << nr), __ATOMIC_RELAXED); +#endif } static __rte_always_inline void @@ -298,6 +319,103 @@ vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) } static __rte_always_inline void +vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + unsigned long *log_base; + int i; + + if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || + !dev->log_base)) + return; + + log_base = (unsigned long *)(uintptr_t)dev->log_base; + + /* + * It is expected a write memory barrier has been issued + * before this function is called. + */ + + for (i = 0; i < vq->log_cache_nb_elem; i++) { + struct log_cache_entry *elem = vq->log_cache + i; + +#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70100) + /* + * '__sync' builtins are deprecated, but '__atomic' ones + * are sub-optimized in older GCC versions. + */ + __sync_fetch_and_or(log_base + elem->offset, elem->val); +#else + __atomic_fetch_or(log_base + elem->offset, elem->val, + __ATOMIC_RELAXED); +#endif + } + + rte_smp_wmb(); + + vq->log_cache_nb_elem = 0; +} + +static __rte_always_inline void +vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t page) +{ + uint32_t bit_nr = page % (sizeof(unsigned long) << 3); + uint32_t offset = page / (sizeof(unsigned long) << 3); + int i; + + for (i = 0; i < vq->log_cache_nb_elem; i++) { + struct log_cache_entry *elem = vq->log_cache + i; + + if (elem->offset == offset) { + elem->val |= (1UL << bit_nr); + return; + } + } + + if (unlikely(i >= VHOST_LOG_CACHE_NR)) { + /* + * No more room for a new log cache entry, + * so write the dirty log map directly. + */ + rte_smp_wmb(); + vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); + + return; + } + + vq->log_cache[i].offset = offset; + vq->log_cache[i].val = (1UL << bit_nr); + vq->log_cache_nb_elem++; +} + +static __rte_always_inline void +vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t addr, uint64_t len) +{ + uint64_t page; + + if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || + !dev->log_base || !len)) + return; + + if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) + return; + + page = addr / VHOST_LOG_PAGE; + while (page * VHOST_LOG_PAGE < addr + len) { + vhost_log_cache_page(dev, vq, page); + page += 1; + } +} + +static __rte_always_inline void +vhost_log_cache_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t offset, uint64_t len) +{ + vhost_log_cache_write(dev, vq, vq->log_guest_addr + offset, len); +} + +static __rte_always_inline void vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, uint64_t offset, uint64_t len) { diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index c0efb310..0eb5e0d6 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -623,8 +623,9 @@ vhost_memory_changed(struct VhostUserMemory *new, } static int -vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) +vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg) { + struct virtio_net *dev = *pdev; struct VhostUserMemory memory = pmsg->payload.memory; struct rte_vhost_mem_region *reg; void *mmap_addr; @@ -650,6 +651,11 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) dev->mem = NULL; } + /* Flush IOTLB cache as previous HVAs are now invalid */ + if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + for (i = 0; i < dev->nr_vring; i++) + vhost_user_iotlb_flush_all(dev->virtqueue[i]); + dev->nr_guest_pages = 0; if (!dev->guest_pages) { dev->max_guest_pages = 8; @@ -743,6 +749,25 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) mmap_offset); } + for (i = 0; i < dev->nr_vring; i++) { + struct vhost_virtqueue *vq = dev->virtqueue[i]; + + if (vq->desc || vq->avail || vq->used) { + /* + * If the memory table got updated, the ring addresses + * need to be translated again as virtual addresses have + * changed. + */ + vring_invalidate(dev, vq); + + dev = translate_ring_addresses(dev, i); + if (!dev) + return -1; + + *pdev = dev; + } + } + dump_guest_pages(dev); return 0; @@ -1404,7 +1429,7 @@ vhost_user_msg_handler(int vid, int fd) break; case VHOST_USER_SET_MEM_TABLE: - ret = vhost_user_set_mem_table(dev, &msg); + ret = vhost_user_set_mem_table(&dev, &msg); break; case VHOST_USER_SET_LOG_BASE: diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index ecfabca3..bde360f3 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -107,7 +107,7 @@ do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_memcpy(&vq->used->ring[to], &vq->shadow_used_ring[from], size * sizeof(struct vring_used_elem)); - vhost_log_used_vring(dev, vq, + vhost_log_cache_used_vring(dev, vq, offsetof(struct vring_used, ring[to]), size * sizeof(struct vring_used_elem)); } @@ -135,6 +135,8 @@ flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq) rte_smp_wmb(); + vhost_log_cache_sync(dev, vq); + *(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx; vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), sizeof(vq->used->idx)); @@ -159,7 +161,7 @@ do_data_copy_enqueue(struct virtio_net *dev, struct vhost_virtqueue *vq) for (i = 0; i < count; i++) { rte_memcpy(elem[i].dst, elem[i].src, elem[i].len); - vhost_log_write(dev, elem[i].log_addr, elem[i].len); + vhost_log_cache_write(dev, vq, elem[i].log_addr, elem[i].len); PRINT_PACKET(dev, (uintptr_t)elem[i].dst, elem[i].len, 0); } } @@ -275,7 +277,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, virtio_enqueue_offload(m, (struct virtio_net_hdr *)(uintptr_t)desc_addr); PRINT_PACKET(dev, (uintptr_t)desc_addr, dev->vhost_hlen, 0); - vhost_log_write(dev, desc_gaddr, dev->vhost_hlen); + vhost_log_cache_write(dev, vq, desc_gaddr, dev->vhost_hlen); } else { struct virtio_net_hdr vnet_hdr; uint64_t remain = dev->vhost_hlen; @@ -298,7 +300,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, (void *)(uintptr_t)src, len); PRINT_PACKET(dev, (uintptr_t)dst, (uint32_t)len, 0); - vhost_log_write(dev, guest_addr, len); + vhost_log_cache_write(dev, vq, guest_addr, len); remain -= len; guest_addr += len; dst += len; @@ -379,7 +381,8 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, desc_offset)), rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), cpy_len); - vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len); + vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset, + cpy_len); PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), cpy_len, 0); } else { @@ -468,7 +471,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, vq->used->ring[used_idx].id = desc_indexes[i]; vq->used->ring[used_idx].len = pkts[i]->pkt_len + dev->vhost_hlen; - vhost_log_used_vring(dev, vq, + vhost_log_cache_used_vring(dev, vq, offsetof(struct vring_used, ring[used_idx]), sizeof(vq->used->ring[used_idx])); } @@ -528,6 +531,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, rte_smp_wmb(); + vhost_log_cache_sync(dev, vq); + *(volatile uint16_t *)&vq->used->idx += count; vq->last_used_idx += count; vhost_log_used_vring(dev, vq, @@ -797,7 +802,8 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, PRINT_PACKET(dev, (uintptr_t)dst, (uint32_t)len, 0); - vhost_log_write(dev, guest_addr, len); + vhost_log_cache_write(dev, vq, + guest_addr, len); remain -= len; guest_addr += len; @@ -806,7 +812,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, } else { PRINT_PACKET(dev, (uintptr_t)hdr_addr, dev->vhost_hlen, 0); - vhost_log_write(dev, hdr_phys_addr, + vhost_log_cache_write(dev, vq, hdr_phys_addr, dev->vhost_hlen); } @@ -820,7 +826,8 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, desc_offset)), rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), cpy_len); - vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len); + vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset, + cpy_len); PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), cpy_len, 0); } else { @@ -1384,7 +1391,7 @@ update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, { vq->used->ring[used_idx].id = desc_idx; vq->used->ring[used_idx].len = 0; - vhost_log_used_vring(dev, vq, + vhost_log_cache_used_vring(dev, vq, offsetof(struct vring_used, ring[used_idx]), sizeof(vq->used->ring[used_idx])); } @@ -1399,6 +1406,8 @@ update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_smp_wmb(); rte_smp_rmb(); + vhost_log_cache_sync(dev, vq); + vq->used->idx += count; vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), sizeof(vq->used->idx)); @@ -1557,7 +1566,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, if (rarp_mbuf == NULL) { RTE_LOG(ERR, VHOST_DATA, "Failed to allocate memory for mbuf.\n"); - return 0; + goto out; } if (make_rarp_packet(rarp_mbuf, &dev->mac)) { |