diff options
Diffstat (limited to 'lib')
31 files changed, 311 insertions, 107 deletions
diff --git a/lib/librte_acl/acl_gen.c b/lib/librte_acl/acl_gen.c index bed66be0..35a0140b 100644 --- a/lib/librte_acl/acl_gen.c +++ b/lib/librte_acl/acl_gen.c @@ -163,7 +163,7 @@ acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one) for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) { if (bits->bits[n / (sizeof(bits_t) * 8)] & - (1 << (n % (sizeof(bits_t) * 8)))) { + (1U << (n % (sizeof(bits_t) * 8)))) { if (zero_one == 1 && last_bit != 1) ranges++; last_bit = 1; diff --git a/lib/librte_compressdev/rte_comp.c b/lib/librte_compressdev/rte_comp.c index c663be59..4634c127 100644 --- a/lib/librte_compressdev/rte_comp.c +++ b/lib/librte_compressdev/rte_comp.c @@ -174,7 +174,7 @@ rte_comp_op_alloc(struct rte_mempool *mempool) int retval; retval = rte_comp_op_raw_bulk_alloc(mempool, &op, 1); - if (unlikely(retval < 0)) + if (unlikely(retval != 1)) return NULL; rte_comp_op_reset(op); @@ -186,12 +186,12 @@ int __rte_experimental rte_comp_op_bulk_alloc(struct rte_mempool *mempool, struct rte_comp_op **ops, uint16_t nb_ops) { - int ret; + int retval; uint16_t i; - ret = rte_comp_op_raw_bulk_alloc(mempool, ops, nb_ops); - if (unlikely(ret < nb_ops)) - return ret; + retval = rte_comp_op_raw_bulk_alloc(mempool, ops, nb_ops); + if (unlikely(retval != nb_ops)) + return 0; for (i = 0; i < nb_ops; i++) rte_comp_op_reset(ops[i]); diff --git a/lib/librte_compressdev/rte_compressdev.h b/lib/librte_compressdev/rte_compressdev.h index 5b4fca4d..7b68170a 100644 --- a/lib/librte_compressdev/rte_compressdev.h +++ b/lib/librte_compressdev/rte_compressdev.h @@ -408,6 +408,13 @@ rte_compressdev_dequeue_burst(uint8_t dev_id, uint16_t qp_id, * @note All compression operations are Out-of-place (OOP) operations, * as the size of the output data is different to the size of the input data. * + * @note The rte_comp_op contains both input and output parameters and is the + * vehicle for the application to pass data into and out of the PMD. While an + * op is inflight, i.e. once it has been enqueued, the private_xform or stream + * attached to it and any mbufs or memory referenced by it should not be altered + * or freed by the application. The PMD may use or change some of this data at + * any time until it has been returned in a dequeue operation. + * * @note The flush flag only applies to operations which return SUCCESS. * In OUT_OF_SPACE cases whether STATEFUL or STATELESS, data in dest buffer * is as if flush flag was FLUSH_NONE. diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c index 62e9ed47..5759ec2d 100644 --- a/lib/librte_eal/common/eal_common_dev.c +++ b/lib/librte_eal/common/eal_common_dev.c @@ -186,7 +186,7 @@ err_devarg: return ret; } -int __rte_experimental +int rte_dev_probe(const char *devargs) { struct eal_dev_mp_req req; @@ -322,7 +322,7 @@ local_dev_remove(struct rte_device *dev) return 0; } -int __rte_experimental +int rte_dev_remove(struct rte_device *dev) { struct eal_dev_mp_req req; diff --git a/lib/librte_eal/common/eal_common_errno.c b/lib/librte_eal/common/eal_common_errno.c index 56b492f5..c63a943b 100644 --- a/lib/librte_eal/common/eal_common_errno.c +++ b/lib/librte_eal/common/eal_common_errno.c @@ -2,6 +2,9 @@ * Copyright(c) 2010-2014 Intel Corporation */ +/* Use XSI-compliant portable version of strerror_r() */ +#undef _GNU_SOURCE + #include <stdint.h> #include <stdio.h> #include <string.h> diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 12dcedf5..87fd9921 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -49,7 +49,7 @@ static uint64_t system_page_sz; * Current known limitations are 39 or 40 bits. Setting the starting address * at 4GB implies there are 508GB or 1020GB for mapping the available * hugepages. This is likely enough for most systems, although a device with - * addressing limitations should call rte_eal_check_dma_mask for ensuring all + * addressing limitations should call rte_mem_check_dma_mask for ensuring all * memory is within supported range. */ static uint64_t baseaddr = 0x100000000; @@ -446,11 +446,12 @@ check_iova(const struct rte_memseg_list *msl __rte_unused, #endif /* check memseg iovas are within the required range based on dma mask */ -int __rte_experimental -rte_eal_check_dma_mask(uint8_t maskbits) +static int __rte_experimental +check_dma_mask(uint8_t maskbits, bool thread_unsafe) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; uint64_t mask; + int ret; /* sanity check */ if (maskbits > MAX_DMA_MASK_BITS) { @@ -462,7 +463,12 @@ rte_eal_check_dma_mask(uint8_t maskbits) /* create dma mask */ mask = ~((1ULL << maskbits) - 1); - if (rte_memseg_walk(check_iova, &mask)) + if (thread_unsafe) + ret = rte_memseg_walk_thread_unsafe(check_iova, &mask); + else + ret = rte_memseg_walk(check_iova, &mask); + + if (ret) /* * Dma mask precludes hugepage usage. * This device can not be used and we do not need to keep @@ -480,6 +486,34 @@ rte_eal_check_dma_mask(uint8_t maskbits) return 0; } +int __rte_experimental +rte_mem_check_dma_mask(uint8_t maskbits) +{ + return check_dma_mask(maskbits, false); +} + +int __rte_experimental +rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits) +{ + return check_dma_mask(maskbits, true); +} + +/* + * Set dma mask to use when memory initialization is done. + * + * This function should ONLY be used by code executed before the memory + * initialization. PMDs should use rte_mem_check_dma_mask if addressing + * limitations by the device. + */ +void __rte_experimental +rte_mem_set_dma_mask(uint8_t maskbits) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + + mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits : + RTE_MIN(mcfg->dma_maskbits, maskbits); +} + /* return the number of memory channels */ unsigned rte_memory_get_nchannel(void) { diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index b82f3ddd..e31eca5c 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -222,7 +222,7 @@ eal_plugin_add(const char *path) return -1; } memset(solib, 0, sizeof(*solib)); - strncpy(solib->name, path, PATH_MAX-1); + strlcpy(solib->name, path, PATH_MAX-1); solib->name[PATH_MAX-1] = 0; TAILQ_INSERT_TAIL(&solib_list, solib, next); diff --git a/lib/librte_eal/common/hotplug_mp.c b/lib/librte_eal/common/hotplug_mp.c index 84f59d95..7c9fcc46 100644 --- a/lib/librte_eal/common/hotplug_mp.c +++ b/lib/librte_eal/common/hotplug_mp.c @@ -243,7 +243,7 @@ static void __handle_primary_request(void *param) da = calloc(1, sizeof(*da)); if (da == NULL) { ret = -ENOMEM; - goto quit; + break; } ret = rte_devargs_parse(da, req->devargs); @@ -266,6 +266,8 @@ static void __handle_primary_request(void *param) ret = local_dev_remove(dev); quit: + free(da->args); + free(da); break; default: ret = -EINVAL; @@ -355,6 +357,7 @@ int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req) resp = (struct eal_dev_mp_req *)mp_reply.msgs[0].param; req->result = resp->result; + free(mp_reply.msgs); return ret; } @@ -379,6 +382,7 @@ int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req) if (mp_reply.nb_sent != mp_reply.nb_received) { RTE_LOG(ERR, EAL, "not all secondary reply\n"); + free(mp_reply.msgs); return -1; } @@ -397,6 +401,7 @@ int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req) } } + free(mp_reply.msgs); return 0; } diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h index cd6c187c..a9724dc9 100644 --- a/lib/librte_eal/common/include/rte_dev.h +++ b/lib/librte_eal/common/include/rte_dev.h @@ -196,9 +196,6 @@ int rte_eal_hotplug_add(const char *busname, const char *devname, const char *drvargs); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Add matching devices. * * In multi-process, it will request other processes to add the same device. @@ -209,7 +206,7 @@ int rte_eal_hotplug_add(const char *busname, const char *devname, * @return * 0 on success, negative on error. */ -int __rte_experimental rte_dev_probe(const char *devargs); +int rte_dev_probe(const char *devargs); /** * Hotplug remove a given device from a specific bus. @@ -227,9 +224,6 @@ int __rte_experimental rte_dev_probe(const char *devargs); int rte_eal_hotplug_remove(const char *busname, const char *devname); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Remove one device. * * In multi-process, it will request other processes to remove the same device. @@ -240,7 +234,7 @@ int rte_eal_hotplug_remove(const char *busname, const char *devname); * @return * 0 on success, negative on error. */ -int __rte_experimental rte_dev_remove(struct rte_device *dev); +int rte_dev_remove(struct rte_device *dev); /** * Device comparison function. diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index ce937058..d970825d 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -463,8 +463,45 @@ unsigned rte_memory_get_nchannel(void); */ unsigned rte_memory_get_nrank(void); -/* check memsegs iovas are within a range based on dma mask */ -int __rte_experimental rte_eal_check_dma_mask(uint8_t maskbits); +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Check if all currently allocated memory segments are compliant with + * supplied DMA address width. + * + * @param maskbits + * Address width to check against. + */ +int __rte_experimental rte_mem_check_dma_mask(uint8_t maskbits); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Check if all currently allocated memory segments are compliant with + * supplied DMA address width. This function will use + * rte_memseg_walk_thread_unsafe instead of rte_memseg_walk implying + * memory_hotplug_lock will not be acquired avoiding deadlock during + * memory initialization. + * + * This function is just for EAL core memory internal use. Drivers should + * use the previous rte_mem_check_dma_mask. + * + * @param maskbits + * Address width to check against. + */ +int __rte_experimental rte_mem_check_dma_mask_thread_unsafe(uint8_t maskbits); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Set dma mask to use once memory initialization is done. Previous functions + * rte_mem_check_dma_mask and rte_mem_check_dma_mask_thread_unsafe can not be + * used safely until memory has been initialized. + */ +void __rte_experimental rte_mem_set_dma_mask(uint8_t maskbits); /** * Drivers based on uio will not load unless physical diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index 412ed2db..80c516d3 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -49,7 +49,7 @@ extern "C" { * 0-15 = release candidates * 16 = release */ -#define RTE_VER_RELEASE 1 +#define RTE_VER_RELEASE 2 /** * Macro to compute a version number usable for comparisons diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index 1973b6e6..c6a6d4f6 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -294,7 +294,6 @@ alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, size_t alloc_sz; int allocd_pages; void *ret, *map_addr; - uint64_t mask; alloc_sz = (size_t)pg_sz * n_segs; @@ -322,14 +321,44 @@ alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size, goto fail; } - if (mcfg->dma_maskbits) { - mask = ~((1ULL << mcfg->dma_maskbits) - 1); - if (rte_eal_check_dma_mask(mask)) { + /* + * Once we have all the memseg lists configured, if there is a dma mask + * set, check iova addresses are not out of range. Otherwise the device + * setting the dma mask could have problems with the mapped memory. + * + * There are two situations when this can happen: + * 1) memory initialization + * 2) dynamic memory allocation + * + * For 1), an error when checking dma mask implies app can not be + * executed. For 2) implies the new memory can not be added. + */ + if (mcfg->dma_maskbits && + rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) { + /* + * Currently this can only happen if IOMMU is enabled + * and the address width supported by the IOMMU hw is + * not enough for using the memory mapped IOVAs. + * + * If IOVA is VA, advice to try with '--iova-mode pa' + * which could solve some situations when IOVA VA is not + * really needed. + */ + RTE_LOG(ERR, EAL, + "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask\n", + __func__); + + /* + * If IOVA is VA and it is possible to run with IOVA PA, + * because user is root, give and advice for solving the + * problem. + */ + if ((rte_eal_iova_mode() == RTE_IOVA_VA) && + rte_eal_using_phys_addrs()) RTE_LOG(ERR, EAL, - "%s(): couldn't allocate memory due to DMA mask\n", + "%s(): Please try initializing EAL with --iova-mode=pa parameter\n", __func__); - goto fail; - } + goto fail; } /* add newly minted memsegs to malloc heap */ diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index 9e61dc41..0da5ad5e 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -349,8 +349,7 @@ rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len, strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == RTE_HEAP_NAME_MAX_LEN) { rte_errno = EINVAL; - ret = -1; - goto unlock; + return -1; } rte_rwlock_write_lock(&mcfg->memory_hotplug_lock); diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c index 8767c722..0f3695c4 100644 --- a/lib/librte_eal/common/rte_service.c +++ b/lib/librte_eal/common/rte_service.c @@ -795,6 +795,9 @@ rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s, return; } + if (f == NULL) + return; + fprintf(f, " %s: stats %d\tcalls %"PRIu64"\tcycles %" PRIu64"\tavg: %"PRIu64"\n", s->spec.name, service_stats_enabled(s), s->calls, diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c index 39252a88..cbac451e 100644 --- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -700,7 +700,7 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) bool call = false; int n, bytes_read; struct rte_intr_source *src; - struct rte_intr_callback *cb; + struct rte_intr_callback *cb, *next; union rte_intr_read_buffer buf; struct rte_intr_callback active_cb; @@ -780,6 +780,23 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds) "descriptor %d: %s\n", events[n].data.fd, strerror(errno)); + /* + * The device is unplugged or buggy, remove + * it as an interrupt source and return to + * force the wait list to be rebuilt. + */ + rte_spinlock_lock(&intr_lock); + TAILQ_REMOVE(&intr_sources, src, next); + rte_spinlock_unlock(&intr_lock); + + for (cb = TAILQ_FIRST(&src->callbacks); cb; + cb = next) { + next = TAILQ_NEXT(cb, next); + TAILQ_REMOVE(&src->callbacks, cb, next); + free(cb); + } + free(src); + return -1; } else if (bytes_read == 0) RTE_LOG(ERR, EAL, "Read nothing from file " "descriptor %d\n", events[n].data.fd); diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index fce86fda..c1b5e079 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -1393,6 +1393,18 @@ eal_legacy_hugepage_init(void) addr = RTE_PTR_ADD(addr, (size_t)page_sz); } + if (mcfg->dma_maskbits && + rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) { + RTE_LOG(ERR, EAL, + "%s(): couldnt allocate memory due to IOVA exceeding limits of current DMA mask.\n", + __func__); + if (rte_eal_iova_mode() == RTE_IOVA_VA && + rte_eal_using_phys_addrs()) + RTE_LOG(ERR, EAL, + "%s(): Please try initializing EAL with --iova-mode=pa parameter.\n", + __func__); + goto fail; + } return 0; } @@ -1628,6 +1640,14 @@ eal_legacy_hugepage_init(void) rte_fbarray_destroy(&msl->memseg_arr); } + if (mcfg->dma_maskbits && + rte_mem_check_dma_mask_thread_unsafe(mcfg->dma_maskbits)) { + RTE_LOG(ERR, EAL, + "%s(): couldn't allocate memory due to IOVA exceeding limits of current DMA mask.\n", + __func__); + goto fail; + } + return 0; fail: diff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map index 04f62424..3fe78260 100644 --- a/lib/librte_eal/rte_eal_version.map +++ b/lib/librte_eal/rte_eal_version.map @@ -259,6 +259,8 @@ DPDK_18.08 { DPDK_18.11 { global: + rte_dev_probe; + rte_dev_remove; rte_eal_get_runtime_dir; rte_eal_hotplug_add; rte_eal_hotplug_remove; @@ -285,8 +287,6 @@ EXPERIMENTAL { rte_dev_is_probed; rte_dev_iterator_init; rte_dev_iterator_next; - rte_dev_probe; - rte_dev_remove; rte_devargs_add; rte_devargs_dump; rte_devargs_insert; @@ -295,7 +295,6 @@ EXPERIMENTAL { rte_devargs_parsef; rte_devargs_remove; rte_devargs_type_count; - rte_eal_check_dma_mask; rte_eal_cleanup; rte_fbarray_attach; rte_fbarray_destroy; @@ -331,9 +330,12 @@ EXPERIMENTAL { rte_malloc_heap_socket_is_external; rte_mem_alloc_validator_register; rte_mem_alloc_validator_unregister; + rte_mem_check_dma_mask; + rte_mem_check_dma_mask_thread_unsafe; rte_mem_event_callback_register; rte_mem_event_callback_unregister; rte_mem_iova2virt; + rte_mem_set_dma_mask; rte_mem_virt2memseg; rte_mem_virt2memseg_list; rte_memseg_contig_walk; diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c index 9d348138..8eaa5fcc 100644 --- a/lib/librte_ethdev/rte_ethdev.c +++ b/lib/librte_ethdev/rte_ethdev.c @@ -187,7 +187,7 @@ enum { STAT_QMAP_RX }; -int __rte_experimental +int rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs_str) { int ret; @@ -288,7 +288,7 @@ error: return ret; } -uint16_t __rte_experimental +uint16_t rte_eth_iterator_next(struct rte_dev_iterator *iter) { if (iter->cls == NULL) /* invalid ethdev iterator */ @@ -317,7 +317,7 @@ rte_eth_iterator_next(struct rte_dev_iterator *iter) return RTE_MAX_ETHPORTS; } -void __rte_experimental +void rte_eth_iterator_cleanup(struct rte_dev_iterator *iter) { if (iter->bus_str == NULL) @@ -3647,11 +3647,10 @@ rte_eth_dev_destroy(struct rte_eth_dev *ethdev, return -ENODEV; RTE_FUNC_PTR_OR_ERR_RET(*ethdev_uninit, -EINVAL); - if (ethdev_uninit) { - ret = ethdev_uninit(ethdev); - if (ret) - return ret; - } + + ret = ethdev_uninit(ethdev); + if (ret) + return ret; return rte_eth_dev_release_port(ethdev); } diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h index 769a6943..8a92d91e 100644 --- a/lib/librte_ethdev/rte_ethdev.h +++ b/lib/librte_ethdev/rte_ethdev.h @@ -167,9 +167,6 @@ extern int rte_eth_dev_logtype; struct rte_mbuf; /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice. - * * Initializes a device iterator. * * This iterator allows accessing a list of devices matching some devargs. @@ -185,13 +182,9 @@ struct rte_mbuf; * @return * 0 on successful initialization, negative otherwise. */ -__rte_experimental int rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice. - * * Iterates on devices with devargs filter. * The ownership is not checked. * @@ -205,13 +198,9 @@ int rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs); * @return * A port id if found, RTE_MAX_ETHPORTS otherwise. */ -__rte_experimental uint16_t rte_eth_iterator_next(struct rte_dev_iterator *iter); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice. - * * Free some allocated fields of the iterator. * * This function is automatically called by rte_eth_iterator_next() @@ -223,7 +212,6 @@ uint16_t rte_eth_iterator_next(struct rte_dev_iterator *iter); * Device iterator handle initialized by rte_eth_iterator_init(). * The fields bus_str and cls_str are freed if needed. */ -__rte_experimental void rte_eth_iterator_cleanup(struct rte_dev_iterator *iter); /** diff --git a/lib/librte_ethdev/rte_ethdev_version.map b/lib/librte_ethdev/rte_ethdev_version.map index 3560c288..92ac3de2 100644 --- a/lib/librte_ethdev/rte_ethdev_version.map +++ b/lib/librte_ethdev/rte_ethdev_version.map @@ -223,6 +223,9 @@ DPDK_18.11 { rte_eth_dev_rx_offload_name; rte_eth_dev_tx_offload_name; + rte_eth_iterator_cleanup; + rte_eth_iterator_init; + rte_eth_iterator_next; } DPDK_18.08; @@ -242,9 +245,6 @@ EXPERIMENTAL { rte_eth_dev_owner_set; rte_eth_dev_owner_unset; rte_eth_dev_rx_intr_ctl_q_get_fd; - rte_eth_iterator_cleanup; - rte_eth_iterator_init; - rte_eth_iterator_next; rte_eth_switch_domain_alloc; rte_eth_switch_domain_free; rte_flow_conv; diff --git a/lib/librte_ip_frag/ip_frag_common.h b/lib/librte_ip_frag/ip_frag_common.h index 0f62e2e1..a17a7407 100644 --- a/lib/librte_ip_frag/ip_frag_common.h +++ b/lib/librte_ip_frag/ip_frag_common.h @@ -58,20 +58,14 @@ struct rte_mbuf *ipv6_frag_reassemble(struct ip_frag_pkt *fp); static inline int ip_frag_key_is_empty(const struct ip_frag_key * key) { - uint32_t i; - for (i = 0; i < RTE_MIN(key->key_len, RTE_DIM(key->src_dst)); i++) - if (key->src_dst[i] != 0) - return 0; - return 1; + return (key->key_len == 0); } -/* empty the key */ +/* invalidate the key */ static inline void ip_frag_key_invalidate(struct ip_frag_key * key) { - uint32_t i; - for (i = 0; i < key->key_len; i++) - key->src_dst[i] = 0; + key->key_len = 0; } /* compare two keys */ @@ -80,7 +74,7 @@ ip_frag_key_cmp(const struct ip_frag_key * k1, const struct ip_frag_key * k2) { uint32_t i; uint64_t val; - val = k1->id ^ k2->id; + val = k1->id_key_len ^ k2->id_key_len; for (i = 0; i < k1->key_len; i++) val |= k1->src_dst[i] ^ k2->src_dst[i]; return val; diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h index 7f425f61..a4ccaf9d 100644 --- a/lib/librte_ip_frag/rte_ip_frag.h +++ b/lib/librte_ip_frag/rte_ip_frag.h @@ -44,9 +44,17 @@ struct ip_frag { /** @internal <src addr, dst_addr, id> to uniquely identify fragmented datagram. */ struct ip_frag_key { - uint64_t src_dst[4]; /**< src address, first 8 bytes used for IPv4 */ - uint32_t id; /**< dst address */ - uint32_t key_len; /**< src/dst key length */ + uint64_t src_dst[4]; + /**< src and dst address, only first 8 bytes used for IPv4 */ + RTE_STD_C11 + union { + uint64_t id_key_len; /**< combined for easy fetch */ + __extension__ + struct { + uint32_t id; /**< packet id */ + uint32_t key_len; /**< src/dst key length */ + }; + }; }; /** diff --git a/lib/librte_ip_frag/rte_ipv4_reassembly.c b/lib/librte_ip_frag/rte_ipv4_reassembly.c index 4956b99e..1029b7ab 100644 --- a/lib/librte_ip_frag/rte_ipv4_reassembly.c +++ b/lib/librte_ip_frag/rte_ipv4_reassembly.c @@ -36,8 +36,11 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp) /* previous fragment found. */ if(fp->frags[i].ofs + fp->frags[i].len == ofs) { + RTE_ASSERT(curr_idx != i); + /* adjust start of the last fragment data. */ - rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); + rte_pktmbuf_adj(m, + (uint16_t)(m->l2_len + m->l3_len)); rte_pktmbuf_chain(fp->frags[i].mb, m); /* this mbuf should not be accessed directly */ @@ -96,14 +99,14 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp) */ struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, - struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, - struct ipv4_hdr *ip_hdr) + struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, + struct ipv4_hdr *ip_hdr) { struct ip_frag_pkt *fp; struct ip_frag_key key; const unaligned_uint64_t *psd; - uint16_t ip_len; uint16_t flag_offset, ip_ofs, ip_flag; + int32_t ip_len; flag_offset = rte_be_to_cpu_16(ip_hdr->fragment_offset); ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK); @@ -116,12 +119,11 @@ rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, key.key_len = IPV4_KEYLEN; ip_ofs *= IPV4_HDR_OFFSET_UNITS; - ip_len = (uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length) - - mb->l3_len); + ip_len = rte_be_to_cpu_16(ip_hdr->total_length) - mb->l3_len; IP_FRAG_LOG(DEBUG, "%s:%d:\n" "mbuf: %p, tms: %" PRIu64 - ", key: <%" PRIx64 ", %#x>, ofs: %u, len: %u, flags: %#x\n" + ", key: <%" PRIx64 ", %#x>, ofs: %u, len: %d, flags: %#x\n" "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " "max_entries: %u, use_entries: %u\n\n", __func__, __LINE__, @@ -129,6 +131,12 @@ rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, tbl->use_entries); + /* check that fragment length is greater then zero. */ + if (ip_len <= 0) { + IP_FRAG_MBUF2DR(dr, mb); + return NULL; + } + /* try to find/add entry into the fragment's table. */ if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) { IP_FRAG_MBUF2DR(dr, mb); diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c index db249fe6..855e3f74 100644 --- a/lib/librte_ip_frag/rte_ipv6_reassembly.c +++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c @@ -59,8 +59,11 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp) /* previous fragment found. */ if (fp->frags[i].ofs + fp->frags[i].len == ofs) { + RTE_ASSERT(curr_idx != i); + /* adjust start of the last fragment data. */ - rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); + rte_pktmbuf_adj(m, + (uint16_t)(m->l2_len + m->l3_len)); rte_pktmbuf_chain(fp->frags[i].mb, m); /* this mbuf should not be accessed directly */ @@ -135,12 +138,13 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp) #define FRAG_OFFSET(x) (rte_cpu_to_be_16(x) >> 3) struct rte_mbuf * rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, - struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, - struct ipv6_hdr *ip_hdr, struct ipv6_extension_fragment *frag_hdr) + struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, + struct ipv6_hdr *ip_hdr, struct ipv6_extension_fragment *frag_hdr) { struct ip_frag_pkt *fp; struct ip_frag_key key; - uint16_t ip_len, ip_ofs; + uint16_t ip_ofs; + int32_t ip_len; rte_memcpy(&key.src_dst[0], ip_hdr->src_addr, 16); rte_memcpy(&key.src_dst[2], ip_hdr->dst_addr, 16); @@ -151,15 +155,17 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, ip_ofs = FRAG_OFFSET(frag_hdr->frag_data) * 8; /* - * as per RFC2460, payload length contains all extension headers as well. - * since we don't support anything but frag headers, this is what we remove - * from the payload len. + * as per RFC2460, payload length contains all extension headers + * as well. + * since we don't support anything but frag headers, + * this is what we remove from the payload len. */ ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr); IP_FRAG_LOG(DEBUG, "%s:%d:\n" "mbuf: %p, tms: %" PRIu64 - ", key: <" IPv6_KEY_BYTES_FMT ", %#x>, ofs: %u, len: %u, flags: %#x\n" + ", key: <" IPv6_KEY_BYTES_FMT ", %#x>, " + "ofs: %u, len: %d, flags: %#x\n" "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " "max_entries: %u, use_entries: %u\n\n", __func__, __LINE__, @@ -168,6 +174,12 @@ rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, tbl->use_entries); + /* check that fragment length is greater then zero. */ + if (ip_len <= 0) { + IP_FRAG_MBUF2DR(dr, mb); + return NULL; + } + /* try to find/add entry into the fragment's table. */ fp = ip_frag_find(tbl, dr, &key, tms); if (fp == NULL) { diff --git a/lib/librte_net/rte_gre.h b/lib/librte_net/rte_gre.h index 69499bb8..05aa9d14 100644 --- a/lib/librte_net/rte_gre.h +++ b/lib/librte_net/rte_gre.h @@ -15,6 +15,7 @@ extern "C" { /** * GRE Header */ +__extension__ struct gre_hdr { #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN uint16_t res2:4; /**< Reserved */ diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index 73ac3a95..dca0830e 100644 --- a/lib/librte_net/rte_net_crc.c +++ b/lib/librte_net/rte_net_crc.c @@ -69,8 +69,8 @@ reflect_32bits(uint32_t val) uint32_t i, res = 0; for (i = 0; i < 32; i++) - if ((val & (1 << i)) != 0) - res |= (uint32_t)(1 << (31 - i)); + if ((val & (1U << i)) != 0) + res |= (uint32_t)(1U << (31 - i)); return res; } diff --git a/lib/librte_ring/rte_ring_c11_mem.h b/lib/librte_ring/rte_ring_c11_mem.h index 94df3c4a..7bc74a4c 100644 --- a/lib/librte_ring/rte_ring_c11_mem.h +++ b/lib/librte_ring/rte_ring_c11_mem.h @@ -57,23 +57,27 @@ __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp, uint32_t *free_entries) { const uint32_t capacity = r->capacity; + uint32_t cons_tail; unsigned int max = n; int success; + *old_head = __atomic_load_n(&r->prod.head, __ATOMIC_ACQUIRE); do { /* Reset n to the initial burst count */ n = max; - *old_head = __atomic_load_n(&r->prod.head, + /* load-acquire synchronize with store-release of ht->tail + * in update_tail. + */ + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); - /* - * The subtraction is done between two unsigned 32bits value + /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * *old_head > cons_tail). So 'free_entries' is always between 0 * and capacity (which is < size). */ - *free_entries = (capacity + r->cons.tail - *old_head); + *free_entries = (capacity + cons_tail - *old_head); /* check that we have enough room in ring */ if (unlikely(n > *free_entries)) @@ -87,6 +91,7 @@ __rte_ring_move_prod_head(struct rte_ring *r, unsigned int is_sp, if (is_sp) r->prod.head = *new_head, success = 1; else + /* on failure, *old_head is updated */ success = __atomic_compare_exchange_n(&r->prod.head, old_head, *new_head, 0, __ATOMIC_ACQUIRE, @@ -125,13 +130,19 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc, uint32_t *entries) { unsigned int max = n; + uint32_t prod_tail; int success; /* move cons.head atomically */ + *old_head = __atomic_load_n(&r->cons.head, __ATOMIC_ACQUIRE); do { /* Restore n as it may change every loop */ n = max; - *old_head = __atomic_load_n(&r->cons.head, + + /* this load-acquire synchronize with store-release of ht->tail + * in update_tail. + */ + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); /* The subtraction is done between two unsigned 32bits value @@ -139,7 +150,7 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc, * cons_head > prod_tail). So 'entries' is always between 0 * and size(ring)-1. */ - *entries = (r->prod.tail - *old_head); + *entries = (prod_tail - *old_head); /* Set the actual entries for dequeue */ if (n > *entries) @@ -152,6 +163,7 @@ __rte_ring_move_cons_head(struct rte_ring *r, int is_sc, if (is_sc) r->cons.head = *new_head, success = 1; else + /* on failure, *old_head will be updated */ success = __atomic_compare_exchange_n(&r->cons.head, old_head, *new_head, 0, __ATOMIC_ACQUIRE, diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c index c2c5dff1..e7d849ee 100644 --- a/lib/librte_vhost/vdpa.c +++ b/lib/librte_vhost/vdpa.c @@ -63,6 +63,9 @@ rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr, break; } + if (i == MAX_VHOST_DEVICE) + return -1; + sprintf(device_name, "vdpa-dev-%d", i); dev = rte_zmalloc(device_name, sizeof(struct rte_vdpa_device), RTE_CACHE_LINE_SIZE); diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index b4abad30..760f4219 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -275,7 +275,8 @@ struct vring_packed_desc_event { (1ULL << VIRTIO_RING_F_EVENT_IDX) | \ (1ULL << VIRTIO_NET_F_MTU) | \ (1ULL << VIRTIO_F_IN_ORDER) | \ - (1ULL << VIRTIO_F_IOMMU_PLATFORM)) + (1ULL << VIRTIO_F_IOMMU_PLATFORM) | \ + (1ULL << VIRTIO_F_RING_PACKED)) struct guest_page { diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c index 9811a232..5472bead 100644 --- a/lib/librte_vhost/vhost_crypto.c +++ b/lib/librte_vhost/vhost_crypto.c @@ -238,7 +238,7 @@ transform_cipher_param(struct rte_crypto_sym_xform *xform, return ret; xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; - xform->cipher.algo = (uint32_t)ret; + xform->cipher.algo = (enum rte_crypto_cipher_algorithm)ret; xform->cipher.key.length = param->cipher_key_len; if (xform->cipher.key.length > 0) xform->cipher.key.data = param->cipher_key_buf; @@ -288,7 +288,7 @@ transform_chain_param(struct rte_crypto_sym_xform *xforms, if (unlikely(ret < 0)) return ret; xform_cipher->type = RTE_CRYPTO_SYM_XFORM_CIPHER; - xform_cipher->cipher.algo = (uint32_t)ret; + xform_cipher->cipher.algo = (enum rte_crypto_cipher_algorithm)ret; xform_cipher->cipher.key.length = param->cipher_key_len; xform_cipher->cipher.key.data = param->cipher_key_buf; ret = get_iv_len(xform_cipher->cipher.algo); @@ -302,7 +302,7 @@ transform_chain_param(struct rte_crypto_sym_xform *xforms, ret = auth_algo_transform(param->hash_algo); if (unlikely(ret < 0)) return ret; - xform_auth->auth.algo = (uint32_t)ret; + xform_auth->auth.algo = (enum rte_crypto_auth_algorithm)ret; xform_auth->auth.digest_length = param->digest_len; xform_auth->auth.key.length = param->auth_key_len; xform_auth->auth.key.data = param->auth_key_buf; diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index 508228a3..cc154f31 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -696,10 +696,27 @@ vhost_user_set_vring_base(struct virtio_net **pdev, int main_fd __rte_unused) { struct virtio_net *dev = *pdev; - dev->virtqueue[msg->payload.state.index]->last_used_idx = - msg->payload.state.num; - dev->virtqueue[msg->payload.state.index]->last_avail_idx = - msg->payload.state.num; + struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index]; + uint64_t val = msg->payload.state.num; + + if (vq_is_packed(dev)) { + /* + * Bit[0:14]: avail index + * Bit[15]: avail wrap counter + */ + vq->last_avail_idx = val & 0x7fff; + vq->avail_wrap_counter = !!(val & (0x1 << 15)); + /* + * Set used index to same value as available one, as + * their values should be the same since ring processing + * was stopped at get time. + */ + vq->last_used_idx = vq->last_avail_idx; + vq->used_wrap_counter = vq->avail_wrap_counter; + } else { + vq->last_used_idx = msg->payload.state.num; + vq->last_avail_idx = msg->payload.state.num; + } return VH_RESULT_OK; } @@ -1208,6 +1225,7 @@ vhost_user_get_vring_base(struct virtio_net **pdev, { struct virtio_net *dev = *pdev; struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index]; + uint64_t val; /* We have to stop the queue (virtio) if it is running. */ vhost_destroy_device_notify(dev); @@ -1215,8 +1233,18 @@ vhost_user_get_vring_base(struct virtio_net **pdev, dev->flags &= ~VIRTIO_DEV_READY; dev->flags &= ~VIRTIO_DEV_VDPA_CONFIGURED; - /* Here we are safe to get the last avail index */ - msg->payload.state.num = vq->last_avail_idx; + /* Here we are safe to get the indexes */ + if (vq_is_packed(dev)) { + /* + * Bit[0:14]: avail index + * Bit[15]: avail wrap counter + */ + val = vq->last_avail_idx & 0x7fff; + val |= vq->avail_wrap_counter << 15; + msg->payload.state.num = val; + } else { + msg->payload.state.num = vq->last_avail_idx; + } RTE_LOG(INFO, VHOST_CONFIG, "vring base idx:%d file:%d\n", msg->payload.state.index, |