diff options
Diffstat (limited to 'lib')
52 files changed, 803 insertions, 283 deletions
diff --git a/lib/librte_compressdev/rte_comp.h b/lib/librte_compressdev/rte_comp.h index 395ce29f..ea306d5f 100644 --- a/lib/librte_compressdev/rte_comp.h +++ b/lib/librte_compressdev/rte_comp.h @@ -310,7 +310,7 @@ struct rte_comp_op { struct rte_mbuf *m_src; /**< source mbuf * The total size of the input buffer(s) can be retrieved using - * rte_pktmbuf_data_len(m_src). The max data size which can fit in a + * rte_pktmbuf_pkt_len(m_src). The max data size which can fit in a * single mbuf is limited by the uint16_t rte_mbuf.data_len to 64k-1. * If the input data is bigger than this it can be passed to the PMD in * a chain of mbufs if the PMD's capabilities indicate it supports this. @@ -318,7 +318,7 @@ struct rte_comp_op { struct rte_mbuf *m_dst; /**< destination mbuf * The total size of the output buffer(s) can be retrieved using - * rte_pktmbuf_data_len(m_dst). The max data size which can fit in a + * rte_pktmbuf_pkt_len(m_dst). The max data size which can fit in a * single mbuf is limited by the uint16_t rte_mbuf.data_len to 64k-1. * If the output data is expected to be bigger than this a chain of * mbufs can be passed to the PMD if the PMD's capabilities indicate diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c index b8152a75..f01495e3 100644 --- a/lib/librte_eal/bsdapp/eal/eal.c +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -115,7 +115,7 @@ eal_create_runtime_dir(void) /* create prefix-specific subdirectory under DPDK runtime dir */ ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s", - tmp, internal_config.hugefile_prefix); + tmp, eal_get_hugefile_prefix()); if (ret < 0 || ret == sizeof(runtime_dir)) { RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n"); return -1; @@ -141,6 +141,16 @@ eal_create_runtime_dir(void) return 0; } +int +eal_clean_runtime_dir(void) +{ + /* FreeBSD doesn't need this implemented for now, because, unlike Linux, + * FreeBSD doesn't create per-process files, so no need to clean up. + */ + return 0; +} + + const char * rte_eal_get_runtime_dir(void) { @@ -447,9 +457,21 @@ eal_parse_args(int argc, char **argv) switch (opt) { case OPT_MBUF_POOL_OPS_NAME_NUM: - internal_config.user_mbuf_pool_ops_name = - strdup(optarg); + { + char *ops_name = strdup(optarg); + if (ops_name == NULL) + RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n"); + else { + /* free old ops name */ + if (internal_config.user_mbuf_pool_ops_name != + NULL) + free(internal_config.user_mbuf_pool_ops_name); + + internal_config.user_mbuf_pool_ops_name = + ops_name; + } break; + } case 'h': eal_usage(prgname); exit(EXIT_SUCCESS); @@ -807,6 +829,18 @@ rte_eal_init(int argc, char **argv) return -1; } + /* + * Clean up unused files in runtime directory. We do this at the end of + * init and not at the beginning because we want to clean stuff up + * whether we are primary or secondary process, but we cannot remove + * primary process' files because secondary should be able to run even + * if primary process is dead. + */ + if (eal_clean_runtime_dir() < 0) { + rte_eal_init_alert("Cannot clear runtime directory\n"); + return -1; + } + rte_eal_mcfg_complete(); /* Call each registered callback, if enabled */ @@ -819,6 +853,8 @@ int __rte_experimental rte_eal_cleanup(void) { rte_service_finalize(); + rte_mp_channel_cleanup(); + eal_cleanup_config(&internal_config); return 0; } diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index d47ea493..999ba24b 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -704,6 +704,12 @@ rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms) return -1; } + /* segment fd API is not supported for external segments */ + if (msl->external) { + rte_errno = ENOTSUP; + return -1; + } + ret = eal_memalloc_get_seg_fd(msl_idx, seg_idx); if (ret < 0) { rte_errno = -ret; @@ -754,6 +760,12 @@ rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms, return -1; } + /* segment fd API is not supported for external segments */ + if (msl->external) { + rte_errno = ENOTSUP; + return -1; + } + ret = eal_memalloc_get_seg_fd_offset(msl_idx, seg_idx, offset); if (ret < 0) { rte_errno = -ret; diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index b7081afb..664df5b9 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -365,6 +365,7 @@ int rte_eal_memzone_init(void) { struct rte_mem_config *mcfg; + int ret = 0; /* get pointer to global configuration */ mcfg = rte_eal_get_configuration()->mem_config; @@ -375,17 +376,16 @@ rte_eal_memzone_init(void) rte_fbarray_init(&mcfg->memzones, "memzone", RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) { RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n"); - return -1; + ret = -1; } else if (rte_eal_process_type() == RTE_PROC_SECONDARY && rte_fbarray_attach(&mcfg->memzones)) { RTE_LOG(ERR, EAL, "Cannot attach to memzone list\n"); - rte_rwlock_write_unlock(&mcfg->mlock); - return -1; + ret = -1; } rte_rwlock_write_unlock(&mcfg->mlock); - return 0; + return ret; } /* Walk all reserved memory zones */ diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index e31eca5c..f6dfbc73 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -168,6 +168,14 @@ eal_option_device_parse(void) return ret; } +const char * +eal_get_hugefile_prefix(void) +{ + if (internal_config.hugefile_prefix != NULL) + return internal_config.hugefile_prefix; + return HUGEFILE_PREFIX_DEFAULT; +} + void eal_reset_internal_config(struct internal_config *internal_cfg) { @@ -176,7 +184,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg) internal_cfg->memory = 0; internal_cfg->force_nrank = 0; internal_cfg->force_nchannel = 0; - internal_cfg->hugefile_prefix = HUGEFILE_PREFIX_DEFAULT; + internal_cfg->hugefile_prefix = NULL; internal_cfg->hugepage_dir = NULL; internal_cfg->force_sockets = 0; /* zero out the NUMA config */ @@ -591,7 +599,9 @@ eal_parse_corelist(const char *corelist) if (*corelist == '\0') return -1; errno = 0; - idx = strtoul(corelist, &end, 10); + idx = strtol(corelist, &end, 10); + if (idx < 0 || idx >= (int)cfg->lcore_count) + return -1; if (errno || end == NULL) return -1; while (isblank(*end)) @@ -1102,6 +1112,7 @@ eal_parse_common_option(int opt, const char *optarg, { static int b_used; static int w_used; + struct rte_config *cfg = rte_eal_get_configuration(); switch (opt) { /* blacklist */ @@ -1144,7 +1155,9 @@ eal_parse_common_option(int opt, const char *optarg, /* corelist */ case 'l': if (eal_parse_corelist(optarg) < 0) { - RTE_LOG(ERR, EAL, "invalid core list\n"); + RTE_LOG(ERR, EAL, + "invalid core list, please check core numbers are in [0, %u] range\n", + cfg->lcore_count-1); return -1; } @@ -1347,6 +1360,19 @@ eal_auto_detect_cores(struct rte_config *cfg) } int +eal_cleanup_config(struct internal_config *internal_cfg) +{ + if (internal_cfg->hugefile_prefix != NULL) + free(internal_cfg->hugefile_prefix); + if (internal_cfg->hugepage_dir != NULL) + free(internal_cfg->hugepage_dir); + if (internal_cfg->user_mbuf_pool_ops_name != NULL) + free(internal_cfg->user_mbuf_pool_ops_name); + + return 0; +} + +int eal_adjust_config(struct internal_config *internal_cfg) { int i; @@ -1361,6 +1387,8 @@ eal_adjust_config(struct internal_config *internal_cfg) /* default master lcore is the first one */ if (!master_lcore_parsed) { cfg->master_lcore = rte_get_next_lcore(-1, 0, 0); + if (cfg->master_lcore >= RTE_MAX_LCORE) + return -1; lcore_config[cfg->master_lcore].core_role = ROLE_RTE; } @@ -1386,7 +1414,22 @@ eal_check_common_options(struct internal_config *internal_cfg) RTE_LOG(ERR, EAL, "Invalid process type specified\n"); return -1; } - if (index(internal_cfg->hugefile_prefix, '%') != NULL) { + if (internal_cfg->hugefile_prefix != NULL && + strlen(internal_cfg->hugefile_prefix) < 1) { + RTE_LOG(ERR, EAL, "Invalid length of --" OPT_FILE_PREFIX " option\n"); + return -1; + } + if (internal_cfg->hugepage_dir != NULL && + strlen(internal_cfg->hugepage_dir) < 1) { + RTE_LOG(ERR, EAL, "Invalid length of --" OPT_HUGE_DIR" option\n"); + return -1; + } + if (internal_cfg->user_mbuf_pool_ops_name != NULL && + strlen(internal_cfg->user_mbuf_pool_ops_name) < 1) { + RTE_LOG(ERR, EAL, "Invalid length of --" OPT_MBUF_POOL_OPS_NAME" option\n"); + return -1; + } + if (index(eal_get_hugefile_prefix(), '%') != NULL) { RTE_LOG(ERR, EAL, "Invalid char, '%%', in --"OPT_FILE_PREFIX" " "option\n"); return -1; diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c index 1c3f09aa..b46d644b 100644 --- a/lib/librte_eal/common/eal_common_proc.c +++ b/lib/librte_eal/common/eal_common_proc.c @@ -37,6 +37,7 @@ static int mp_fd = -1; static char mp_filter[PATH_MAX]; /* Filter for secondary process sockets */ static char mp_dir_path[PATH_MAX]; /* The directory path for all mp sockets */ static pthread_mutex_t mp_mutex_action = PTHREAD_MUTEX_INITIALIZER; +static char peer_name[PATH_MAX]; struct action_entry { TAILQ_ENTRY(action_entry) next; @@ -511,9 +512,9 @@ async_reply_handle(void *arg) static int open_socket_fd(void) { - char peer_name[PATH_MAX] = {0}; struct sockaddr_un un; + peer_name[0] = '\0'; if (rte_eal_process_type() == RTE_PROC_SECONDARY) snprintf(peer_name, sizeof(peer_name), "%d_%"PRIx64, getpid(), rte_rdtsc()); @@ -542,27 +543,17 @@ open_socket_fd(void) return mp_fd; } -static int -unlink_sockets(const char *filter) +static void +close_socket_fd(void) { - int dir_fd; - DIR *mp_dir; - struct dirent *ent; - - mp_dir = opendir(mp_dir_path); - if (!mp_dir) { - RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path); - return -1; - } - dir_fd = dirfd(mp_dir); + char path[PATH_MAX]; - while ((ent = readdir(mp_dir))) { - if (fnmatch(filter, ent->d_name, 0) == 0) - unlinkat(dir_fd, ent->d_name, 0); - } + if (mp_fd < 0) + return; - closedir(mp_dir); - return 0; + close(mp_fd); + create_socket_path(peer_name, path, sizeof(path)); + unlink(path); } int @@ -603,13 +594,6 @@ rte_mp_channel_init(void) return -1; } - if (rte_eal_process_type() == RTE_PROC_PRIMARY && - unlink_sockets(mp_filter)) { - RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n"); - close(dir_fd); - return -1; - } - if (open_socket_fd() < 0) { close(dir_fd); return -1; @@ -632,6 +616,12 @@ rte_mp_channel_init(void) return 0; } +void +rte_mp_channel_cleanup(void) +{ + close_socket_fd(); +} + /** * Return -1, as fail to send message and it's caused by the local side. * Return 0, as fail to send message and it's caused by the remote side. diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h index 6e0331fd..89a3adde 100644 --- a/lib/librte_eal/common/eal_filesystem.h +++ b/lib/librte_eal/common/eal_filesystem.h @@ -25,6 +25,13 @@ int eal_create_runtime_dir(void); +int +eal_clean_runtime_dir(void); + +/** Function to return hugefile prefix that's currently set up */ +const char * +eal_get_hugefile_prefix(void); + #define RUNTIME_CONFIG_FNAME "config" static inline const char * eal_runtime_config_path(void) @@ -86,7 +93,7 @@ static inline const char * eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id) { snprintf(buffer, buflen, HUGEFILE_FMT, hugedir, - internal_config.hugefile_prefix, f_id); + eal_get_hugefile_prefix(), f_id); buffer[buflen - 1] = '\0'; return buffer; } diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h index 737f17e3..783ce7de 100644 --- a/lib/librte_eal/common/eal_internal_cfg.h +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -64,9 +64,9 @@ struct internal_config { volatile int syslog_facility; /**< facility passed to openlog() */ /** default interrupt mode for VFIO */ volatile enum rte_intr_mode vfio_intr_mode; - const char *hugefile_prefix; /**< the base filename of hugetlbfs files */ - const char *hugepage_dir; /**< specific hugetlbfs directory to use */ - const char *user_mbuf_pool_ops_name; + char *hugefile_prefix; /**< the base filename of hugetlbfs files */ + char *hugepage_dir; /**< specific hugetlbfs directory to use */ + char *user_mbuf_pool_ops_name; /**< user defined mbuf pool ops name */ unsigned num_hugepage_sizes; /**< how many sizes on this system */ struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES]; diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h index 5271f944..327c95e9 100644 --- a/lib/librte_eal/common/eal_options.h +++ b/lib/librte_eal/common/eal_options.h @@ -75,6 +75,7 @@ int eal_parse_common_option(int opt, const char *argv, struct internal_config *conf); int eal_option_device_parse(void); int eal_adjust_config(struct internal_config *internal_cfg); +int eal_cleanup_config(struct internal_config *internal_cfg); int eal_check_common_options(struct internal_config *internal_cfg); void eal_common_usage(void); enum rte_proc_type_t eal_proc_type_detect(void); diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 442c6dc4..4f483833 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -255,10 +255,14 @@ struct rte_bus *rte_bus_find_by_device_name(const char *str); * 0 on success; * (<0) on failure. */ - int rte_mp_channel_init(void); /** + * Primary/secondary communication cleanup. + */ +void rte_mp_channel_cleanup(void); + +/** * @internal * Parse a device string and store its information in an * rte_devargs structure. diff --git a/lib/librte_eal/common/hotplug_mp.c b/lib/librte_eal/common/hotplug_mp.c index 070e2e0c..9d610a8a 100644 --- a/lib/librte_eal/common/hotplug_mp.c +++ b/lib/librte_eal/common/hotplug_mp.c @@ -208,6 +208,8 @@ handle_secondary_request(const struct rte_mp_msg *msg, const void *peer) ret = rte_eal_alarm_set(1, __handle_secondary_request, bundle); if (ret != 0) { RTE_LOG(ERR, EAL, "failed to add mp task\n"); + free(bundle->peer); + free(bundle); return send_response_to_secondary(req, ret, peer); } return 0; @@ -332,6 +334,8 @@ handle_primary_request(const struct rte_mp_msg *msg, const void *peer) */ ret = rte_eal_alarm_set(1, __handle_primary_request, bundle); if (ret != 0) { + free(bundle->peer); + free(bundle); resp->result = ret; ret = rte_mp_reply(&mp_resp, peer); if (ret != 0) { diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h index b99ba468..4afd1acc 100644 --- a/lib/librte_eal/common/include/generic/rte_atomic.h +++ b/lib/librte_eal/common/include/generic/rte_atomic.h @@ -212,7 +212,7 @@ rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val); static inline uint16_t rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val) { -#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG) +#if defined(__clang__) return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST); #else return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST); @@ -495,7 +495,7 @@ rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val); static inline uint32_t rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val) { -#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG) +#if defined(__clang__) return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST); #else return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST); @@ -777,7 +777,7 @@ rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val); static inline uint64_t rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val) { -#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG) +#if defined(__clang__) return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST); #else return __atomic_exchange_8(dst, val, __ATOMIC_SEQ_CST); diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h index 7249e6aa..54a12467 100644 --- a/lib/librte_eal/common/include/rte_malloc.h +++ b/lib/librte_eal/common/include/rte_malloc.h @@ -251,6 +251,9 @@ rte_malloc_validate(const void *ptr, size_t *size); /** * Get heap statistics for the specified heap. * + * @note This function is not thread-safe with respect to + * ``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions. + * * @param socket * An unsigned integer specifying the socket to get heap statistics for * @param socket_stats @@ -282,9 +285,9 @@ rte_malloc_get_socket_stats(int socket, * @param heap_name * Name of the heap to add memory chunk to * @param va_addr - * Start of virtual area to add to the heap + * Start of virtual area to add to the heap. Must be aligned by ``page_sz``. * @param len - * Length of virtual area to add to the heap + * Length of virtual area to add to the heap. Must be aligned by ``page_sz``. * @param iova_addrs * Array of page IOVA addresses corresponding to each page in this memory * area. Can be NULL, in which case page IOVA addresses will be set to @@ -461,6 +464,9 @@ rte_malloc_heap_socket_is_external(int socket_id); * Dump for the specified type to a file. If the type argument is * NULL, all memory types will be dumped. * + * @note This function is not thread-safe with respect to + * ``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions. + * * @param f * A pointer to a file for output * @param type @@ -473,6 +479,9 @@ rte_malloc_dump_stats(FILE *f, const char *type); /** * Dump contents of all malloc heaps to a file. * + * @note This function is not thread-safe with respect to + * ``rte_malloc_heap_create()``/``rte_malloc_heap_destroy()`` functions. + * * @param f * A pointer to a file for output */ diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index f01c227f..b4c6dd3c 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -37,7 +37,7 @@ extern "C" { /** * Patch level number i.e. the z in yy.mm.z */ -#define RTE_VER_MINOR 0 +#define RTE_VER_MINOR 1 /** * Extra string to be appended to version number diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c index 9d3dcb6a..052aeeb7 100644 --- a/lib/librte_eal/common/malloc_elem.c +++ b/lib/librte_eal/common/malloc_elem.c @@ -38,6 +38,10 @@ malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align) /* segment must start after header and with specified alignment */ contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align); + /* return if aligned address is already out of malloc element */ + if (contig_seg_start > data_end) + return 0; + /* if we're in IOVA as VA mode, or if we're in legacy mode with * hugepages, all elements are IOVA-contiguous. however, we can only * make these assumptions about internal memory - externally allocated diff --git a/lib/librte_eal/common/malloc_mp.c b/lib/librte_eal/common/malloc_mp.c index 5f2d4e0b..f3a13353 100644 --- a/lib/librte_eal/common/malloc_mp.c +++ b/lib/librte_eal/common/malloc_mp.c @@ -209,6 +209,8 @@ handle_alloc_request(const struct malloc_mp_req *m, map_addr = ms[0]->addr; + eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz); + /* we have succeeded in allocating memory, but we still need to sync * with other processes. however, since DPDK IPC is single-threaded, we * send an asynchronous request and exit this callback. @@ -258,6 +260,9 @@ handle_request(const struct rte_mp_msg *msg, const void *peer __rte_unused) if (m->t == REQ_TYPE_ALLOC) { ret = handle_alloc_request(m, entry); } else if (m->t == REQ_TYPE_FREE) { + eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, + m->free_req.addr, m->free_req.len); + ret = malloc_heap_free_pages(m->free_req.addr, m->free_req.len); } else { @@ -436,6 +441,9 @@ handle_sync_response(const struct rte_mp_msg *request, memset(&rb_msg, 0, sizeof(rb_msg)); /* we've failed to sync, so do a rollback */ + eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, + state->map_addr, state->map_len); + rollback_expand_heap(state->ms, state->ms_len, state->elem, state->map_addr, state->map_len); diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c index 0da5ad5e..47c2bec7 100644 --- a/lib/librte_eal/common/rte_malloc.c +++ b/lib/librte_eal/common/rte_malloc.c @@ -156,20 +156,14 @@ rte_malloc_get_socket_stats(int socket, struct rte_malloc_socket_stats *socket_stats) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - int heap_idx, ret = -1; - - rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + int heap_idx; heap_idx = malloc_socket_to_heap_id(socket); if (heap_idx < 0) - goto unlock; + return -1; - ret = malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx], + return malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx], socket_stats); -unlock: - rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); - - return ret; } /* @@ -181,14 +175,10 @@ rte_malloc_dump_heaps(FILE *f) struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; unsigned int idx; - rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); - for (idx = 0; idx < RTE_MAX_HEAPS; idx++) { fprintf(f, "Heap id: %u\n", idx); malloc_heap_dump(&mcfg->malloc_heaps[idx], f); } - - rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); } int @@ -262,8 +252,6 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char *type) unsigned int heap_id; struct rte_malloc_socket_stats sock_stats; - rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); - /* Iterate through all initialised heaps */ for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) { struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id]; @@ -280,7 +268,6 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char *type) fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count); fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count); } - rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); return; } @@ -345,6 +332,9 @@ rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len, if (heap_name == NULL || va_addr == NULL || page_sz == 0 || !rte_is_power_of_2(page_sz) || + RTE_ALIGN(len, page_sz) != len || + !rte_is_aligned(va_addr, page_sz) || + ((len / page_sz) != n_pages && iova_addrs != NULL) || strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 || strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == RTE_HEAP_NAME_MAX_LEN) { @@ -367,11 +357,6 @@ rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len, goto unlock; } n = len / page_sz; - if (n != n_pages && iova_addrs != NULL) { - rte_errno = EINVAL; - ret = -1; - goto unlock; - } rte_spinlock_lock(&heap->lock); ret = malloc_heap_add_external_memory(heap, va_addr, iova_addrs, n, @@ -517,13 +502,8 @@ sync_memory(const char *heap_name, void *va_addr, size_t len, bool attach) if (wa.result < 0) { rte_errno = -wa.result; ret = -1; - } else { - /* notify all subscribers that a new memory area was added */ - if (attach) - eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, - va_addr, len); + } else ret = 0; - } unlock: rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); return ret; diff --git a/lib/librte_eal/common/rte_option.c b/lib/librte_eal/common/rte_option.c index 02d59a86..198de6d2 100644 --- a/lib/librte_eal/common/rte_option.c +++ b/lib/librte_eal/common/rte_option.c @@ -35,10 +35,11 @@ void __rte_experimental rte_option_register(struct rte_option *opt) { TAILQ_FOREACH(option, &rte_option_list, next) { - if (strcmp(opt->opt_str, option->opt_str) == 0) - RTE_LOG(INFO, EAL, "Option %s has already been registered.", + if (strcmp(opt->opt_str, option->opt_str) == 0) { + RTE_LOG(ERR, EAL, "Option %s has already been registered.\n", opt->opt_str); return; + } } TAILQ_INSERT_HEAD(&rte_option_list, opt, next); diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c index 361744d4..30138b63 100644 --- a/lib/librte_eal/linuxapp/eal/eal.c +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -13,7 +13,9 @@ #include <syslog.h> #include <getopt.h> #include <sys/file.h> +#include <dirent.h> #include <fcntl.h> +#include <fnmatch.h> #include <stddef.h> #include <errno.h> #include <limits.h> @@ -123,7 +125,7 @@ eal_create_runtime_dir(void) /* create prefix-specific subdirectory under DPDK runtime dir */ ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s", - tmp, internal_config.hugefile_prefix); + tmp, eal_get_hugefile_prefix()); if (ret < 0 || ret == sizeof(runtime_dir)) { RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n"); return -1; @@ -149,6 +151,91 @@ eal_create_runtime_dir(void) return 0; } +int +eal_clean_runtime_dir(void) +{ + DIR *dir; + struct dirent *dirent; + int dir_fd, fd, lck_result; + static const char * const filters[] = { + "fbarray_*", + "mp_socket_*" + }; + + /* open directory */ + dir = opendir(runtime_dir); + if (!dir) { + RTE_LOG(ERR, EAL, "Unable to open runtime directory %s\n", + runtime_dir); + goto error; + } + dir_fd = dirfd(dir); + + /* lock the directory before doing anything, to avoid races */ + if (flock(dir_fd, LOCK_EX) < 0) { + RTE_LOG(ERR, EAL, "Unable to lock runtime directory %s\n", + runtime_dir); + goto error; + } + + dirent = readdir(dir); + if (!dirent) { + RTE_LOG(ERR, EAL, "Unable to read runtime directory %s\n", + runtime_dir); + goto error; + } + + while (dirent != NULL) { + unsigned int f_idx; + bool skip = true; + + /* skip files that don't match the patterns */ + for (f_idx = 0; f_idx < RTE_DIM(filters); f_idx++) { + const char *filter = filters[f_idx]; + + if (fnmatch(filter, dirent->d_name, 0) == 0) { + skip = false; + break; + } + } + if (skip) { + dirent = readdir(dir); + continue; + } + + /* try and lock the file */ + fd = openat(dir_fd, dirent->d_name, O_RDONLY); + + /* skip to next file */ + if (fd == -1) { + dirent = readdir(dir); + continue; + } + + /* non-blocking lock */ + lck_result = flock(fd, LOCK_EX | LOCK_NB); + + /* if lock succeeds, remove the file */ + if (lck_result != -1) + unlinkat(dir_fd, dirent->d_name, 0); + close(fd); + dirent = readdir(dir); + } + + /* closedir closes dir_fd and drops the lock */ + closedir(dir); + return 0; + +error: + if (dir) + closedir(dir); + + RTE_LOG(ERR, EAL, "Error while clearing runtime dir: %s\n", + strerror(errno)); + + return -1; +} + const char * rte_eal_get_runtime_dir(void) { @@ -494,10 +581,6 @@ eal_parse_socket_arg(char *strval, volatile uint64_t *socket_arg) socket_arg[i] = val; } - /* check if we have a positive amount of total memory */ - if (total_mem == 0) - return -1; - return 0; } @@ -639,13 +722,31 @@ eal_parse_args(int argc, char **argv) exit(EXIT_SUCCESS); case OPT_HUGE_DIR_NUM: - internal_config.hugepage_dir = strdup(optarg); + { + char *hdir = strdup(optarg); + if (hdir == NULL) + RTE_LOG(ERR, EAL, "Could not store hugepage directory\n"); + else { + /* free old hugepage dir */ + if (internal_config.hugepage_dir != NULL) + free(internal_config.hugepage_dir); + internal_config.hugepage_dir = hdir; + } break; - + } case OPT_FILE_PREFIX_NUM: - internal_config.hugefile_prefix = strdup(optarg); + { + char *prefix = strdup(optarg); + if (prefix == NULL) + RTE_LOG(ERR, EAL, "Could not store file prefix\n"); + else { + /* free old prefix */ + if (internal_config.hugefile_prefix != NULL) + free(internal_config.hugefile_prefix); + internal_config.hugefile_prefix = prefix; + } break; - + } case OPT_SOCKET_MEM_NUM: if (eal_parse_socket_arg(optarg, internal_config.socket_mem) < 0) { @@ -695,10 +796,21 @@ eal_parse_args(int argc, char **argv) break; case OPT_MBUF_POOL_OPS_NAME_NUM: - internal_config.user_mbuf_pool_ops_name = - strdup(optarg); + { + char *ops_name = strdup(optarg); + if (ops_name == NULL) + RTE_LOG(ERR, EAL, "Could not store mbuf pool ops name\n"); + else { + /* free old ops name */ + if (internal_config.user_mbuf_pool_ops_name != + NULL) + free(internal_config.user_mbuf_pool_ops_name); + + internal_config.user_mbuf_pool_ops_name = + ops_name; + } break; - + } default: if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { RTE_LOG(ERR, EAL, "Option %c is not supported " @@ -1096,6 +1208,18 @@ rte_eal_init(int argc, char **argv) return -1; } + /* + * Clean up unused files in runtime directory. We do this at the end of + * init and not at the beginning because we want to clean stuff up + * whether we are primary or secondary process, but we cannot remove + * primary process' files because secondary should be able to run even + * if primary process is dead. + */ + if (eal_clean_runtime_dir() < 0) { + rte_eal_init_alert("Cannot clear runtime directory\n"); + return -1; + } + rte_eal_mcfg_complete(); /* Call each registered callback, if enabled */ @@ -1130,6 +1254,8 @@ rte_eal_cleanup(void) if (rte_eal_process_type() == RTE_PROC_PRIMARY) rte_memseg_walk(mark_freeable, NULL); rte_service_finalize(); + rte_mp_channel_cleanup(); + eal_cleanup_config(&internal_config); return 0; } diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c index 78493956..f63d9ca6 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c +++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c @@ -23,6 +23,10 @@ #include <sys/time.h> #include <signal.h> #include <setjmp.h> +#ifdef F_ADD_SEALS /* if file sealing is supported, so is memfd */ +#include <linux/memfd.h> +#define MEMFD_SUPPORTED +#endif #ifdef RTE_EAL_NUMA_AWARE_HUGEPAGES #include <numa.h> #include <numaif.h> @@ -53,8 +57,8 @@ const int anonymous_hugepages_supported = #endif /* - * we don't actually care if memfd itself is supported - we only need to check - * if memfd supports hugetlbfs, as that already implies memfd support. + * we've already checked memfd support at compile-time, but we also need to + * check if we can create hugepage files with memfd. * * also, this is not a constant, because while we may be *compiled* with memfd * hugetlbfs support, we might not be *running* on a system that supports memfd @@ -63,10 +67,11 @@ const int anonymous_hugepages_supported = */ static int memfd_create_supported = #ifdef MFD_HUGETLB -#define MEMFD_SUPPORTED 1; +#define RTE_MFD_HUGETLB MFD_HUGETLB #else 0; +#define RTE_MFD_HUGETLB 4U #endif /* @@ -171,7 +176,7 @@ prepare_numa(int *oldpolicy, struct bitmask *oldmask, int socket_id) RTE_LOG(ERR, EAL, "Failed to get current mempolicy: %s. " "Assuming MPOL_DEFAULT.\n", strerror(errno)); - oldpolicy = MPOL_DEFAULT; + *oldpolicy = MPOL_DEFAULT; } RTE_LOG(DEBUG, EAL, "Setting policy MPOL_PREFERRED for socket %d\n", @@ -338,12 +343,12 @@ get_seg_memfd(struct hugepage_info *hi __rte_unused, int fd; char segname[250]; /* as per manpage, limit is 249 bytes plus null */ + int flags = RTE_MFD_HUGETLB | pagesz_flags(hi->hugepage_sz); + if (internal_config.single_file_segments) { fd = fd_list[list_idx].memseg_list_fd; if (fd < 0) { - int flags = MFD_HUGETLB | pagesz_flags(hi->hugepage_sz); - snprintf(segname, sizeof(segname), "seg_%i", list_idx); fd = memfd_create(segname, flags); if (fd < 0) { @@ -357,8 +362,6 @@ get_seg_memfd(struct hugepage_info *hi __rte_unused, fd = fd_list[list_idx].fds[seg_idx]; if (fd < 0) { - int flags = MFD_HUGETLB | pagesz_flags(hi->hugepage_sz); - snprintf(segname, sizeof(segname), "seg_%i-%i", list_idx, seg_idx); fd = memfd_create(segname, flags); @@ -633,13 +636,13 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id, int mmap_flags; if (internal_config.in_memory && !memfd_create_supported) { - int pagesz_flag, flags; + const int in_memory_flags = MAP_HUGETLB | MAP_FIXED | + MAP_PRIVATE | MAP_ANONYMOUS; + int pagesz_flag; pagesz_flag = pagesz_flags(alloc_sz); - flags = pagesz_flag | MAP_HUGETLB | MAP_FIXED | - MAP_PRIVATE | MAP_ANONYMOUS; fd = -1; - mmap_flags = flags; + mmap_flags = in_memory_flags | pagesz_flag; /* single-file segments codepath will never be active * here because in-memory mode is incompatible with the @@ -1542,6 +1545,17 @@ int eal_memalloc_get_seg_fd(int list_idx, int seg_idx) { int fd; + + if (internal_config.in_memory || internal_config.no_hugetlbfs) { +#ifndef MEMFD_SUPPORTED + /* in in-memory or no-huge mode, we rely on memfd support */ + return -ENOTSUP; +#endif + /* memfd supported, but hugetlbfs memfd may not be */ + if (!internal_config.no_hugetlbfs && !memfd_create_supported) + return -ENOTSUP; + } + if (internal_config.single_file_segments) { fd = fd_list[list_idx].memseg_list_fd; } else if (fd_list[list_idx].len == 0) { @@ -1565,7 +1579,7 @@ test_memfd_create(void) int pagesz_flag = pagesz_flags(pagesz); int flags; - flags = pagesz_flag | MFD_HUGETLB; + flags = pagesz_flag | RTE_MFD_HUGETLB; int fd = memfd_create("test", flags); if (fd < 0) { /* we failed - let memalloc know this isn't working */ @@ -1589,6 +1603,16 @@ eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + if (internal_config.in_memory || internal_config.no_hugetlbfs) { +#ifndef MEMFD_SUPPORTED + /* in in-memory or no-huge mode, we rely on memfd support */ + return -ENOTSUP; +#endif + /* memfd supported, but hugetlbfs memfd may not be */ + if (!internal_config.no_hugetlbfs && !memfd_create_supported) + return -ENOTSUP; + } + /* fd_list not initialized? */ if (fd_list[list_idx].len == 0) return -ENODEV; diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 32feb415..e05da74c 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -434,7 +434,7 @@ find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) } snprintf(hugedir_str, sizeof(hugedir_str), - "%s/%s", hpi->hugedir, internal_config.hugefile_prefix); + "%s/%s", hpi->hugedir, eal_get_hugefile_prefix()); /* parse numa map */ while (fgets(buf, sizeof(buf), f) != NULL) { diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index 0516b159..c821e838 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -549,6 +549,65 @@ next: } } +static int +vfio_sync_default_container(void) +{ + struct rte_mp_msg mp_req, *mp_rep; + struct rte_mp_reply mp_reply; + struct timespec ts = {.tv_sec = 5, .tv_nsec = 0}; + struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param; + int iommu_type_id; + unsigned int i; + + /* cannot be called from primary */ + if (rte_eal_process_type() != RTE_PROC_SECONDARY) + return -1; + + /* default container fd should have been opened in rte_vfio_enable() */ + if (!default_vfio_cfg->vfio_enabled || + default_vfio_cfg->vfio_container_fd < 0) { + RTE_LOG(ERR, EAL, "VFIO support is not initialized\n"); + return -1; + } + + /* find default container's IOMMU type */ + p->req = SOCKET_REQ_IOMMU_TYPE; + strcpy(mp_req.name, EAL_VFIO_MP); + mp_req.len_param = sizeof(*p); + mp_req.num_fds = 0; + + iommu_type_id = -1; + if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 && + mp_reply.nb_received == 1) { + mp_rep = &mp_reply.msgs[0]; + p = (struct vfio_mp_param *)mp_rep->param; + if (p->result == SOCKET_OK) + iommu_type_id = p->iommu_type_id; + free(mp_reply.msgs); + } + if (iommu_type_id < 0) { + RTE_LOG(ERR, EAL, "Could not get IOMMU type for default container\n"); + return -1; + } + + /* we now have an fd for default container, as well as its IOMMU type. + * now, set up default VFIO container config to match. + */ + for (i = 0; i < RTE_DIM(iommu_types); i++) { + const struct vfio_iommu_type *t = &iommu_types[i]; + if (t->type_id != iommu_type_id) + continue; + + /* we found our IOMMU type */ + default_vfio_cfg->vfio_iommu_type = t; + + return 0; + } + RTE_LOG(ERR, EAL, "Could not find IOMMU type id (%i)\n", + iommu_type_id); + return -1; +} + int rte_vfio_clear_group(int vfio_group_fd) { @@ -745,6 +804,26 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr, else RTE_LOG(DEBUG, EAL, "Installed memory event callback for VFIO\n"); } + } else if (rte_eal_process_type() != RTE_PROC_PRIMARY && + vfio_cfg == default_vfio_cfg && + vfio_cfg->vfio_iommu_type == NULL) { + /* if we're not a primary process, we do not set up the VFIO + * container because it's already been set up by the primary + * process. instead, we simply ask the primary about VFIO type + * we are using, and set the VFIO config up appropriately. + */ + ret = vfio_sync_default_container(); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Could not sync default VFIO container\n"); + close(vfio_group_fd); + rte_vfio_clear_group(vfio_group_fd); + return -1; + } + /* we have successfully initialized VFIO, notify user */ + const struct vfio_iommu_type *t = + default_vfio_cfg->vfio_iommu_type; + RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n", + t->type_id, t->name); } /* get a file descriptor for the device */ @@ -857,7 +936,8 @@ rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, /* if there are no active device groups, unregister the callback to * avoid spurious attempts to map/unmap memory from VFIO. */ - if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0) + if (vfio_cfg == default_vfio_cfg && vfio_cfg->vfio_active_groups == 0 && + rte_eal_process_type() != RTE_PROC_SECONDARY) rte_mem_event_callback_unregister(VFIO_MEM_EVENT_CLB_NAME, NULL); @@ -977,6 +1057,15 @@ vfio_get_default_container_fd(void) return -1; } +int +vfio_get_iommu_type(void) +{ + if (default_vfio_cfg->vfio_iommu_type == NULL) + return -1; + + return default_vfio_cfg->vfio_iommu_type->type_id; +} + const struct vfio_iommu_type * vfio_set_iommu_type(int vfio_container_fd) { diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h index 63ae115c..cb2d35fb 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.h +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -5,6 +5,8 @@ #ifndef EAL_VFIO_H_ #define EAL_VFIO_H_ +#include <rte_common.h> + /* * determine if VFIO is present on the system */ @@ -122,6 +124,9 @@ int vfio_get_default_container_fd(void); const struct vfio_iommu_type * vfio_set_iommu_type(int vfio_container_fd); +int +vfio_get_iommu_type(void); + /* check if we have any supported extensions */ int vfio_has_supported_extensions(int vfio_container_fd); @@ -133,6 +138,7 @@ int vfio_mp_sync_setup(void); #define SOCKET_REQ_CONTAINER 0x100 #define SOCKET_REQ_GROUP 0x200 #define SOCKET_REQ_DEFAULT_CONTAINER 0x400 +#define SOCKET_REQ_IOMMU_TYPE 0x800 #define SOCKET_OK 0x0 #define SOCKET_NO_FD 0x1 #define SOCKET_ERR 0xFF @@ -140,7 +146,11 @@ int vfio_mp_sync_setup(void); struct vfio_mp_param { int req; int result; - int group_num; + RTE_STD_C11 + union { + int group_num; + int iommu_type_id; + }; }; #endif /* VFIO_PRESENT */ diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c index a1e8c834..2a47f29d 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c @@ -77,6 +77,22 @@ vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer) reply.fds[0] = fd; } break; + case SOCKET_REQ_IOMMU_TYPE: + { + int iommu_type_id; + + r->req = SOCKET_REQ_IOMMU_TYPE; + + iommu_type_id = vfio_get_iommu_type(); + + if (iommu_type_id < 0) + r->result = SOCKET_ERR; + else { + r->iommu_type_id = iommu_type_id; + r->result = SOCKET_OK; + } + break; + } default: RTE_LOG(ERR, EAL, "vfio received invalid message!\n"); return -1; diff --git a/lib/librte_efd/rte_efd.c b/lib/librte_efd/rte_efd.c index e6e5cfda..1a97ece0 100644 --- a/lib/librte_efd/rte_efd.c +++ b/lib/librte_efd/rte_efd.c @@ -740,6 +740,8 @@ void rte_efd_free(struct rte_efd_table *table) { uint8_t socket_id; + struct rte_efd_list *efd_list; + struct rte_tailq_entry *te, *temp; if (table == NULL) return; @@ -747,6 +749,18 @@ rte_efd_free(struct rte_efd_table *table) for (socket_id = 0; socket_id < RTE_MAX_NUMA_NODES; socket_id++) rte_free(table->chunks[socket_id]); + efd_list = RTE_TAILQ_CAST(rte_efd_tailq.head, rte_efd_list); + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + TAILQ_FOREACH_SAFE(te, efd_list, next, temp) { + if (te->data == (void *) table) { + TAILQ_REMOVE(efd_list, te, next); + rte_free(te); + break; + } + } + + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); rte_ring_free(table->free_slots); rte_free(table->offline_chunks); rte_free(table->keys); diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c index 5f858174..9d5107dc 100644 --- a/lib/librte_ethdev/rte_ethdev.c +++ b/lib/librte_ethdev/rte_ethdev.c @@ -1594,7 +1594,7 @@ rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id, nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) { RTE_ETHDEV_LOG(ERR, - "Invalid value for nb_rx_desc(=%hu), should be: <= %hu, = %hu, and a product of %hu\n", + "Invalid value for nb_rx_desc(=%hu), should be: <= %hu, >= %hu, and a product of %hu\n", nb_rx_desc, dev_info.rx_desc_lim.nb_max, dev_info.rx_desc_lim.nb_min, dev_info.rx_desc_lim.nb_align); @@ -1698,7 +1698,7 @@ rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id, nb_tx_desc < dev_info.tx_desc_lim.nb_min || nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) { RTE_ETHDEV_LOG(ERR, - "Invalid value for nb_tx_desc(=%hu), should be: <= %hu, = %hu, and a product of %hu\n", + "Invalid value for nb_tx_desc(=%hu), should be: <= %hu, >= %hu, and a product of %hu\n", nb_tx_desc, dev_info.tx_desc_lim.nb_max, dev_info.tx_desc_lim.nb_min, dev_info.tx_desc_lim.nb_align); diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h index 1960f3a2..a3c864a1 100644 --- a/lib/librte_ethdev/rte_ethdev.h +++ b/lib/librte_ethdev/rte_ethdev.h @@ -4159,9 +4159,6 @@ rte_eth_tx_burst(uint16_t port_id, uint16_t queue_id, } /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Process a burst of output packets on a transmit queue of an Ethernet device. * * The rte_eth_tx_prepare() function is invoked to prepare output packets to be @@ -4225,7 +4222,7 @@ rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id, #ifdef RTE_LIBRTE_ETHDEV_DEBUG if (!rte_eth_dev_is_valid_port(port_id)) { RTE_ETHDEV_LOG(ERR, "Invalid TX port_id=%u\n", port_id); - rte_errno = -EINVAL; + rte_errno = EINVAL; return 0; } #endif @@ -4235,7 +4232,7 @@ rte_eth_tx_prepare(uint16_t port_id, uint16_t queue_id, #ifdef RTE_LIBRTE_ETHDEV_DEBUG if (queue_id >= dev->data->nb_tx_queues) { RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id); - rte_errno = -EINVAL; + rte_errno = EINVAL; return 0; } #endif diff --git a/lib/librte_eventdev/rte_event_eth_rx_adapter.c b/lib/librte_eventdev/rte_event_eth_rx_adapter.c index 8831bc35..8d178be1 100644 --- a/lib/librte_eventdev/rte_event_eth_rx_adapter.c +++ b/lib/librte_eventdev/rte_event_eth_rx_adapter.c @@ -912,7 +912,7 @@ rxa_intr_ring_enqueue(struct rte_event_eth_rx_adapter *rx_adapter, */ if (err) RTE_EDEV_LOG_ERR("Failed to enqueue interrupt" - " to ring: %s", strerror(err)); + " to ring: %s", strerror(-err)); else rte_eth_dev_rx_intr_disable(port_id, queue); } diff --git a/lib/librte_eventdev/rte_event_eth_tx_adapter.c b/lib/librte_eventdev/rte_event_eth_tx_adapter.c index ccf8a755..67216a30 100644 --- a/lib/librte_eventdev/rte_event_eth_tx_adapter.c +++ b/lib/librte_eventdev/rte_event_eth_tx_adapter.c @@ -59,6 +59,20 @@ do {\ return -EINVAL; \ } while (0) +#define TXA_CHECK_TXQ(dev, queue) \ +do {\ + if ((dev)->data->nb_tx_queues == 0) { \ + RTE_EDEV_LOG_ERR("No tx queues configured"); \ + return -EINVAL; \ + } \ + if ((queue) != -1 && \ + (uint16_t)(queue) >= (dev)->data->nb_tx_queues) { \ + RTE_EDEV_LOG_ERR("Invalid tx queue_id %" PRIu16, \ + (uint16_t)(queue)); \ + return -EINVAL; \ + } \ +} while (0) + /* Tx retry callback structure */ struct txa_retry { /* Ethernet port id */ @@ -795,20 +809,35 @@ txa_service_queue_del(uint8_t id, struct rte_eth_dev_tx_buffer *tb; uint16_t port_id; + txa = txa_service_id_to_data(id); + port_id = dev->data->port_id; + if (tx_queue_id == -1) { - uint16_t i; - int ret = -1; + uint16_t i, q, nb_queues; + int ret = 0; - for (i = 0; i < dev->data->nb_tx_queues; i++) { - ret = txa_service_queue_del(id, dev, i); - if (ret != 0) - break; + nb_queues = txa->nb_queues; + if (nb_queues == 0) + return 0; + + i = 0; + q = 0; + tqi = txa->txa_ethdev[port_id].queues; + + while (i < nb_queues) { + + if (tqi[q].added) { + ret = txa_service_queue_del(id, dev, q); + if (ret != 0) + break; + } + i++; + q++; } return ret; } txa = txa_service_id_to_data(id); - port_id = dev->data->port_id; tqi = txa_service_queue(txa, port_id, tx_queue_id); if (tqi == NULL || !tqi->added) @@ -999,11 +1028,7 @@ rte_event_eth_tx_adapter_queue_add(uint8_t id, TXA_CHECK_OR_ERR_RET(id); eth_dev = &rte_eth_devices[eth_dev_id]; - if (queue != -1 && (uint16_t)queue >= eth_dev->data->nb_tx_queues) { - RTE_EDEV_LOG_ERR("Invalid tx queue_id %" PRIu16, - (uint16_t)queue); - return -EINVAL; - } + TXA_CHECK_TXQ(eth_dev, queue); caps = 0; if (txa_dev_caps_get(id)) @@ -1034,11 +1059,6 @@ rte_event_eth_tx_adapter_queue_del(uint8_t id, TXA_CHECK_OR_ERR_RET(id); eth_dev = &rte_eth_devices[eth_dev_id]; - if (queue != -1 && (uint16_t)queue >= eth_dev->data->nb_tx_queues) { - RTE_EDEV_LOG_ERR("Invalid tx queue_id %" PRIu16, - (uint16_t)queue); - return -EINVAL; - } caps = 0; diff --git a/lib/librte_eventdev/rte_event_timer_adapter.h b/lib/librte_eventdev/rte_event_timer_adapter.h index d4ea6f17..db98dec4 100644 --- a/lib/librte_eventdev/rte_event_timer_adapter.h +++ b/lib/librte_eventdev/rte_event_timer_adapter.h @@ -461,61 +461,8 @@ rte_event_timer_adapter_stats_get(struct rte_event_timer_adapter *adapter, * - 0: Successfully reset; * - <0: Failure; error code returned. */ -int __rte_experimental rte_event_timer_adapter_stats_reset( - struct rte_event_timer_adapter *adapter); - -/** - * Retrieve the service ID of the event timer adapter. If the adapter doesn't - * use an rte_service function, this function returns -ESRCH. - * - * @param adapter - * A pointer to an event timer adapter. - * - * @param [out] service_id - * A pointer to a uint32_t, to be filled in with the service id. - * - * @return - * - 0: Success - * - <0: Error code on failure, if the event dev doesn't use a rte_service - * function, this function returns -ESRCH. - */ -int -rte_event_timer_adapter_service_id_get(struct rte_event_timer_adapter *adapter, - uint32_t *service_id); - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * - * Retrieve statistics for an event timer adapter instance. - * - * @param adapter - * A pointer to an event timer adapter structure. - * @param[out] stats - * A pointer to a structure to fill with statistics. - * - * @return - * - 0: Successfully retrieved. - * - <0: Failure; error code returned. - */ -int rte_event_timer_adapter_stats_get(struct rte_event_timer_adapter *adapter, - struct rte_event_timer_adapter_stats *stats); - -/** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * - * Reset statistics for an event timer adapter instance. - * - * @param adapter - * A pointer to an event timer adapter structure. - * - * @return - * - 0: Successfully reset; - * - <0: Failure; error code returned. - */ -int rte_event_timer_adapter_stats_reset( - struct rte_event_timer_adapter *adapter); +int __rte_experimental +rte_event_timer_adapter_stats_reset(struct rte_event_timer_adapter *adapter); /** * @warning diff --git a/lib/librte_eventdev/rte_eventdev.h b/lib/librte_eventdev/rte_eventdev.h index d7eb69d1..ef10a855 100644 --- a/lib/librte_eventdev/rte_eventdev.h +++ b/lib/librte_eventdev/rte_eventdev.h @@ -1893,7 +1893,7 @@ rte_event_dev_xstats_names_get(uint8_t dev_id, * @param ids * The id numbers of the stats to get. The ids can be got from the stat * position in the stat list from rte_event_dev_get_xstats_names(), or - * by using rte_eventdev_get_xstats_by_name() + * by using rte_event_dev_xstats_by_name_get(). * @param[out] values * The values for each stats request by ID. * @param n @@ -1921,7 +1921,7 @@ rte_event_dev_xstats_get(uint8_t dev_id, * The stat name to retrieve * @param[out] id * If non-NULL, the numerical id of the stat will be returned, so that further - * requests for the stat can be got using rte_eventdev_xstats_get, which will + * requests for the stat can be got using rte_event_dev_xstats_get, which will * be faster as it doesn't need to scan a list of names for the stat. * If the stat cannot be found, the id returned will be (unsigned)-1. * @return diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c index 2c0f35c6..7d128a43 100644 --- a/lib/librte_gro/gro_tcp4.c +++ b/lib/librte_gro/gro_tcp4.c @@ -198,7 +198,8 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, struct ipv4_hdr *ipv4_hdr; struct tcp_hdr *tcp_hdr; uint32_t sent_seq; - uint16_t tcp_dl, ip_id, hdr_len, frag_off; + int32_t tcp_dl; + uint16_t ip_id, hdr_len, frag_off; uint8_t is_atomic; struct tcp4_flow_key key; @@ -207,6 +208,13 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, int cmp; uint8_t find; + /* + * Don't process the packet whose TCP header length is greater + * than 60 bytes or less than 20 bytes. + */ + if (unlikely(INVALID_TCP_HDRLEN(pkt->l4_len))) + return -1; + eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len); tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len); diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h index 6bb30cdb..d9792488 100644 --- a/lib/librte_gro/gro_tcp4.h +++ b/lib/librte_gro/gro_tcp4.h @@ -17,6 +17,11 @@ */ #define MAX_IPV4_PKT_LENGTH UINT16_MAX +/* The maximum TCP header length */ +#define MAX_TCP_HLEN 60 +#define INVALID_TCP_HDRLEN(len) \ + (((len) < sizeof(struct tcp_hdr)) || ((len) > MAX_TCP_HLEN)) + /* Header fields representing a TCP/IPv4 flow */ struct tcp4_flow_key { struct ether_addr eth_saddr; diff --git a/lib/librte_gro/gro_vxlan_tcp4.c b/lib/librte_gro/gro_vxlan_tcp4.c index ca86f010..acb9bc91 100644 --- a/lib/librte_gro/gro_vxlan_tcp4.c +++ b/lib/librte_gro/gro_vxlan_tcp4.c @@ -295,7 +295,8 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt, struct udp_hdr *udp_hdr; struct vxlan_hdr *vxlan_hdr; uint32_t sent_seq; - uint16_t tcp_dl, frag_off, outer_ip_id, ip_id; + int32_t tcp_dl; + uint16_t frag_off, outer_ip_id, ip_id; uint8_t outer_is_atomic, is_atomic; struct vxlan_tcp4_flow_key key; @@ -305,6 +306,13 @@ gro_vxlan_tcp4_reassemble(struct rte_mbuf *pkt, uint16_t hdr_len; uint8_t find; + /* + * Don't process the packet whose TCP header length is greater + * than 60 bytes or less than 20 bytes. + */ + if (unlikely(INVALID_TCP_HDRLEN(pkt->l4_len))) + return -1; + outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); outer_ipv4_hdr = (struct ipv4_hdr *)((char *)outer_eth_hdr + pkt->outer_l2_len); diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h index 6cd764ff..b6ff1b88 100644 --- a/lib/librte_gso/gso_common.h +++ b/lib/librte_gso/gso_common.h @@ -22,12 +22,12 @@ (PKT_TX_TCP_SEG | PKT_TX_IPV4)) #define IS_IPV4_VXLAN_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \ - PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_VXLAN)) == \ + PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_MASK)) == \ (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \ PKT_TX_TUNNEL_VXLAN)) #define IS_IPV4_GRE_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \ - PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_GRE)) == \ + PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_MASK)) == \ (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \ PKT_TX_TUNNEL_GRE)) diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index c55a4f26..c01489ba 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -1347,6 +1347,9 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i) n_slots = rte_ring_mp_enqueue_burst(h->free_slots, cached_free_slots->objs, LCORE_CACHE_SIZE, NULL); + ERR_IF_TRUE((n_slots == 0), + "%s: could not enqueue free slots in global ring\n", + __func__); cached_free_slots->len -= n_slots; } /* Put index of new free slot in cache. */ @@ -1552,6 +1555,7 @@ rte_hash_free_key_with_position(const struct rte_hash *h, n_slots = rte_ring_mp_enqueue_burst(h->free_slots, cached_free_slots->objs, LCORE_CACHE_SIZE, NULL); + RETURN_IF_TRUE((n_slots == 0), -EFAULT); cached_free_slots->len -= n_slots; } /* Put index of new free slot in cache. */ @@ -2022,11 +2026,11 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, uint64_t *hit_mask, void *data[]) { if (h->readwrite_concur_lf_support) - return __rte_hash_lookup_bulk_lf(h, keys, num_keys, - positions, hit_mask, data); + __rte_hash_lookup_bulk_lf(h, keys, num_keys, positions, + hit_mask, data); else - return __rte_hash_lookup_bulk_l(h, keys, num_keys, - positions, hit_mask, data); + __rte_hash_lookup_bulk_l(h, keys, num_keys, positions, + hit_mask, data); } int diff --git a/lib/librte_hash/rte_cuckoo_hash.h b/lib/librte_hash/rte_cuckoo_hash.h index 5dfbbc48..eacdaa8d 100644 --- a/lib/librte_hash/rte_cuckoo_hash.h +++ b/lib/librte_hash/rte_cuckoo_hash.h @@ -29,6 +29,17 @@ #define RETURN_IF_TRUE(cond, retval) #endif +#if defined(RTE_LIBRTE_HASH_DEBUG) +#define ERR_IF_TRUE(cond, fmt, args...) do { \ + if (cond) { \ + RTE_LOG(ERR, HASH, fmt, ##args); \ + return; \ + } \ +} while (0) +#else +#define ERR_IF_TRUE(cond, fmt, args...) +#endif + #include <rte_hash_crc.h> #include <rte_jhash.h> diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h index a4ccaf9d..04fd9df5 100644 --- a/lib/librte_ip_frag/rte_ip_frag.h +++ b/lib/librte_ip_frag/rte_ip_frag.h @@ -115,6 +115,7 @@ struct rte_ip_frag_tbl { #define RTE_IPV6_EHDR_MF_MASK 1 #define RTE_IPV6_EHDR_FO_SHIFT 3 #define RTE_IPV6_EHDR_FO_MASK (~((1 << RTE_IPV6_EHDR_FO_SHIFT) - 1)) +#define RTE_IPV6_EHDR_FO_ALIGN (1 << RTE_IPV6_EHDR_FO_SHIFT) #define RTE_IPV6_FRAG_USED_MASK \ (RTE_IPV6_EHDR_MF_MASK | RTE_IPV6_EHDR_FO_MASK) diff --git a/lib/librte_ip_frag/rte_ipv6_fragmentation.c b/lib/librte_ip_frag/rte_ipv6_fragmentation.c index 62a7e4e8..b9437eb1 100644 --- a/lib/librte_ip_frag/rte_ipv6_fragmentation.c +++ b/lib/librte_ip_frag/rte_ipv6_fragmentation.c @@ -77,11 +77,14 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in, uint32_t out_pkt_pos, in_seg_data_pos; uint32_t more_in_segs; uint16_t fragment_offset, frag_size; + uint64_t frag_bytes_remaining; - frag_size = (uint16_t)(mtu_size - sizeof(struct ipv6_hdr)); - - /* Fragment size should be a multiple of 8. */ - RTE_ASSERT((frag_size & ~RTE_IPV6_EHDR_FO_MASK) == 0); + /* + * Ensure the IP payload length of all fragments (except the + * the last fragment) are a multiple of 8 bytes per RFC2460. + */ + frag_size = RTE_ALIGN_FLOOR(mtu_size - sizeof(struct ipv6_hdr), + RTE_IPV6_EHDR_FO_ALIGN); /* Check that pkts_out is big enough to hold all fragments */ if (unlikely (frag_size * nb_pkts_out < @@ -111,6 +114,7 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in, /* Reserve space for the IP header that will be built later */ out_pkt->data_len = sizeof(struct ipv6_hdr) + sizeof(struct ipv6_extension_fragment); out_pkt->pkt_len = sizeof(struct ipv6_hdr) + sizeof(struct ipv6_extension_fragment); + frag_bytes_remaining = frag_size; out_seg_prev = out_pkt; more_out_segs = 1; @@ -130,7 +134,7 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in, /* Prepare indirect buffer */ rte_pktmbuf_attach(out_seg, in_seg); - len = mtu_size - out_pkt->pkt_len; + len = frag_bytes_remaining; if (len > (in_seg->data_len - in_seg_data_pos)) { len = in_seg->data_len - in_seg_data_pos; } @@ -140,11 +144,11 @@ rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in, out_pkt->pkt_len); out_pkt->nb_segs += 1; in_seg_data_pos += len; + frag_bytes_remaining -= len; /* Current output packet (i.e. fragment) done ? */ - if (unlikely(out_pkt->pkt_len >= mtu_size)) { + if (unlikely(frag_bytes_remaining == 0)) more_out_segs = 0; - } /* Current input segment done ? */ if (unlikely(in_seg_data_pos == in_seg->data_len)) { diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h index f2a8904a..f9b90909 100644 --- a/lib/librte_net/rte_ip.h +++ b/lib/librte_net/rte_ip.h @@ -310,16 +310,20 @@ rte_ipv4_phdr_cksum(const struct ipv4_hdr *ipv4_hdr, uint64_t ol_flags) * @param l4_hdr * The pointer to the beginning of the L4 header. * @return - * The complemented checksum to set in the IP packet. + * The complemented checksum to set in the IP packet + * or 0 on error */ static inline uint16_t rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr) { uint32_t cksum; - uint32_t l4_len; + uint32_t l3_len, l4_len; + + l3_len = rte_be_to_cpu_16(ipv4_hdr->total_length); + if (l3_len < sizeof(struct ipv4_hdr)) + return 0; - l4_len = (uint32_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) - - sizeof(struct ipv4_hdr)); + l4_len = l3_len - sizeof(struct ipv4_hdr); cksum = rte_raw_cksum(l4_hdr, l4_len); cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0); diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 587d5e60..89c3d1e7 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -667,6 +667,7 @@ rte_sched_port_config(struct rte_sched_port_params *params) params->red_params[i][j].min_th, params->red_params[i][j].max_th, params->red_params[i][j].maxp_inv) != 0) { + rte_free(port); return NULL; } } @@ -726,6 +727,7 @@ rte_sched_port_config(struct rte_sched_port_params *params) bmp_mem_size); if (port->bmp == NULL) { RTE_LOG(ERR, SCHED, "Bitmap init error\n"); + rte_free(port); return NULL; } diff --git a/lib/librte_telemetry/rte_telemetry.c b/lib/librte_telemetry/rte_telemetry.c index 016431f1..7fb247ea 100644 --- a/lib/librte_telemetry/rte_telemetry.c +++ b/lib/librte_telemetry/rte_telemetry.c @@ -558,7 +558,7 @@ rte_telemetry_send_ports_stats_values(uint32_t *metric_ids, int num_metric_ids, } ret = rte_telemetry_update_metrics_ethdev(telemetry, - port_ids[i], telemetry->reg_index); + port_ids[i], telemetry->reg_index[i]); if (ret < 0) { TELEMETRY_LOG_ERR("Failed to update ethdev metrics"); return -1; @@ -658,23 +658,45 @@ free_xstats: static int32_t rte_telemetry_initial_accept(struct telemetry_impl *telemetry) { + struct driver_index { + const void *dev_ops; + int reg_index; + } drv_idx[RTE_MAX_ETHPORTS]; + int nb_drv_idx = 0; uint16_t pid; int ret; int selftest = 0; RTE_ETH_FOREACH_DEV(pid) { - telemetry->reg_index = rte_telemetry_reg_ethdev_to_metrics(pid); - break; - } + int i; + /* Different device types have different numbers of stats, so + * first check if the stats for this type of device have + * already been registered + */ + for (i = 0; i < nb_drv_idx; i++) { + if (rte_eth_devices[pid].dev_ops == drv_idx[i].dev_ops) { + telemetry->reg_index[pid] = drv_idx[i].reg_index; + break; + } + } + if (i < nb_drv_idx) + continue; /* we found a match, go to next port */ - if (telemetry->reg_index < 0) { - TELEMETRY_LOG_ERR("Failed to register ethdev metrics"); - return -1; + /* No match, register a new set of xstats for this port */ + ret = rte_telemetry_reg_ethdev_to_metrics(pid); + if (ret < 0) { + TELEMETRY_LOG_ERR("Failed to register ethdev metrics"); + return -1; + } + telemetry->reg_index[pid] = ret; + drv_idx[nb_drv_idx].dev_ops = rte_eth_devices[pid].dev_ops; + drv_idx[nb_drv_idx].reg_index = ret; + nb_drv_idx++; } telemetry->metrics_register_done = 1; if (selftest) { - ret = rte_telemetry_socket_messaging_testing(telemetry->reg_index, + ret = rte_telemetry_socket_messaging_testing(telemetry->reg_index[0], telemetry->server_fd); if (ret < 0) return -1; @@ -1299,7 +1321,7 @@ rte_telemetry_socket_messaging_testing(int index, int socket) } telemetry->server_fd = socket; - telemetry->reg_index = index; + telemetry->reg_index[0] = index; TELEMETRY_LOG_INFO("Beginning Telemetry socket message Selftest"); rte_telemetry_socket_test_setup(telemetry, &send_fd, &recv_fd); TELEMETRY_LOG_INFO("Register valid client test"); diff --git a/lib/librte_telemetry/rte_telemetry_internal.h b/lib/librte_telemetry/rte_telemetry_internal.h index de7afda3..c298c391 100644 --- a/lib/librte_telemetry/rte_telemetry_internal.h +++ b/lib/librte_telemetry/rte_telemetry_internal.h @@ -36,7 +36,7 @@ typedef struct telemetry_impl { pthread_t thread_id; int thread_status; uint32_t socket_id; - int reg_index; + int reg_index[RTE_MAX_ETHPORTS]; int metrics_register_done; TAILQ_HEAD(, telemetry_client) client_list_head; struct telemetry_client *request_client; diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c index 590488c7..30c7b0ab 100644 --- a/lib/librte_timer/rte_timer.c +++ b/lib/librte_timer/rte_timer.c @@ -241,24 +241,17 @@ timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore, } } -/* - * add in list, lock if needed +/* call with lock held as necessary + * add in list * timer must be in config state * timer must not be in a list */ static void -timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked) +timer_add(struct rte_timer *tim, unsigned int tim_lcore) { - unsigned lcore_id = rte_lcore_id(); unsigned lvl; struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1]; - /* if timer needs to be scheduled on another core, we need to - * lock the list; if it is on local core, we need to lock if - * we are not called from rte_timer_manage() */ - if (tim_lcore != lcore_id || !local_is_locked) - rte_spinlock_lock(&priv_timer[tim_lcore].list_lock); - /* find where exactly this element goes in the list of elements * for each depth. */ timer_get_prev_entries(tim->expire, tim_lcore, prev); @@ -282,9 +275,6 @@ timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked) * NOTE: this is not atomic on 32-bit*/ priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\ pending_head.sl_next[0]->expire; - - if (tim_lcore != lcore_id || !local_is_locked) - rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock); } /* @@ -379,8 +369,15 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire, tim->f = fct; tim->arg = arg; + /* if timer needs to be scheduled on another core, we need to + * lock the destination list; if it is on local core, we need to lock if + * we are not called from rte_timer_manage() + */ + if (tim_lcore != lcore_id || !local_is_locked) + rte_spinlock_lock(&priv_timer[tim_lcore].list_lock); + __TIMER_STAT_ADD(pending, 1); - timer_add(tim, tim_lcore, local_is_locked); + timer_add(tim, tim_lcore); /* update state: as we are in CONFIG state, only us can modify * the state so we don't need to use cmpset() here */ @@ -389,6 +386,9 @@ __rte_timer_reset(struct rte_timer *tim, uint64_t expire, status.owner = (int16_t)tim_lcore; tim->status.u32 = status.u32; + if (tim_lcore != lcore_id || !local_is_locked) + rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock); + return 0; } diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c index 38347ab1..55d4856f 100644 --- a/lib/librte_vhost/fd_man.c +++ b/lib/librte_vhost/fd_man.c @@ -129,7 +129,9 @@ fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat) pthread_mutex_lock(&pfdset->fd_mutex); i = pfdset->num < MAX_FDS ? pfdset->num++ : -1; if (i == -1) { + pthread_mutex_lock(&pfdset->fd_pooling_mutex); fdset_shrink_nolock(pfdset); + pthread_mutex_unlock(&pfdset->fd_pooling_mutex); i = pfdset->num < MAX_FDS ? pfdset->num++ : -1; if (i == -1) { pthread_mutex_unlock(&pfdset->fd_mutex); @@ -246,7 +248,9 @@ fdset_event_dispatch(void *arg) numfds = pfdset->num; pthread_mutex_unlock(&pfdset->fd_mutex); + pthread_mutex_lock(&pfdset->fd_pooling_mutex); val = poll(pfdset->rwfds, numfds, 1000 /* millisecs */); + pthread_mutex_unlock(&pfdset->fd_pooling_mutex); if (val < 0) continue; diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h index 3331bcd9..3ab5cfdd 100644 --- a/lib/librte_vhost/fd_man.h +++ b/lib/librte_vhost/fd_man.h @@ -24,6 +24,7 @@ struct fdset { struct pollfd rwfds[MAX_FDS]; struct fdentry fd[MAX_FDS]; pthread_mutex_t fd_mutex; + pthread_mutex_t fd_pooling_mutex; int num; /* current fd number of this fdset */ union pipefds { diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index 01b60ff9..9883b049 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -90,6 +90,7 @@ static struct vhost_user vhost_user = { .fdset = { .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} }, .fd_mutex = PTHREAD_MUTEX_INITIALIZER, + .fd_pooling_mutex = PTHREAD_MUTEX_INITIALIZER, .num = 0 }, .vsocket_cnt = 0, @@ -960,13 +961,13 @@ rte_vhost_driver_unregister(const char *path) int count; struct vhost_user_connection *conn, *next; +again: pthread_mutex_lock(&vhost_user.mutex); for (i = 0; i < vhost_user.vsocket_cnt; i++) { struct vhost_user_socket *vsocket = vhost_user.vsockets[i]; if (!strcmp(vsocket->path, path)) { -again: pthread_mutex_lock(&vsocket->conn_mutex); for (conn = TAILQ_FIRST(&vsocket->conn_list); conn != NULL; @@ -982,6 +983,7 @@ again: conn->connfd) == -1) { pthread_mutex_unlock( &vsocket->conn_mutex); + pthread_mutex_unlock(&vhost_user.mutex); goto again; } diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index 5218f1b1..552b9298 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -393,8 +393,10 @@ vq_is_packed(struct virtio_net *dev) static inline bool desc_is_avail(struct vring_packed_desc *desc, bool wrap_counter) { - return wrap_counter == !!(desc->flags & VRING_DESC_F_AVAIL) && - wrap_counter != !!(desc->flags & VRING_DESC_F_USED); + uint16_t flags = *((volatile uint16_t *) &desc->flags); + + return wrap_counter == !!(flags & VRING_DESC_F_AVAIL) && + wrap_counter != !!(flags & VRING_DESC_F_USED); } #define VHOST_LOG_PAGE 4096 diff --git a/lib/librte_vhost/vhost_crypto.c b/lib/librte_vhost/vhost_crypto.c index dd01afc0..0694c0a7 100644 --- a/lib/librte_vhost/vhost_crypto.c +++ b/lib/librte_vhost/vhost_crypto.c @@ -466,12 +466,17 @@ vhost_crypto_msg_post_handler(int vid, void *msg) } static __rte_always_inline struct vring_desc * -find_write_desc(struct vring_desc *head, struct vring_desc *desc) +find_write_desc(struct vring_desc *head, struct vring_desc *desc, + uint32_t *nb_descs, uint32_t vq_size) { if (desc->flags & VRING_DESC_F_WRITE) return desc; while (desc->flags & VRING_DESC_F_NEXT) { + if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) + return NULL; + (*nb_descs)--; + desc = &head[desc->next]; if (desc->flags & VRING_DESC_F_WRITE) return desc; @@ -481,13 +486,18 @@ find_write_desc(struct vring_desc *head, struct vring_desc *desc) } static struct virtio_crypto_inhdr * -reach_inhdr(struct vhost_crypto_data_req *vc_req, struct vring_desc *desc) +reach_inhdr(struct vhost_crypto_data_req *vc_req, struct vring_desc *desc, + uint32_t *nb_descs, uint32_t vq_size) { uint64_t dlen; struct virtio_crypto_inhdr *inhdr; - while (desc->flags & VRING_DESC_F_NEXT) + while (desc->flags & VRING_DESC_F_NEXT) { + if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) + return NULL; + (*nb_descs)--; desc = &vc_req->head[desc->next]; + } dlen = desc->len; inhdr = IOVA_TO_VVA(struct virtio_crypto_inhdr *, vc_req, desc->addr, @@ -500,15 +510,16 @@ reach_inhdr(struct vhost_crypto_data_req *vc_req, struct vring_desc *desc) static __rte_always_inline int move_desc(struct vring_desc *head, struct vring_desc **cur_desc, - uint32_t size) + uint32_t size, uint32_t *nb_descs, uint32_t vq_size) { struct vring_desc *desc = *cur_desc; - int left = size; - - rte_prefetch0(&head[desc->next]); - left -= desc->len; + int left = size - desc->len; while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) { + (*nb_descs)--; + if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) + return -1; + desc = &head[desc->next]; rte_prefetch0(&head[desc->next]); left -= desc->len; @@ -517,7 +528,14 @@ move_desc(struct vring_desc *head, struct vring_desc **cur_desc, if (unlikely(left > 0)) return -1; - *cur_desc = &head[desc->next]; + if (unlikely(*nb_descs == 0)) + *cur_desc = NULL; + else { + if (unlikely(desc->next >= vq_size)) + return -1; + *cur_desc = &head[desc->next]; + } + return 0; } @@ -539,7 +557,8 @@ get_data_ptr(struct vhost_crypto_data_req *vc_req, struct vring_desc *cur_desc, static int copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req, - struct vring_desc **cur_desc, uint32_t size) + struct vring_desc **cur_desc, uint32_t size, + uint32_t *nb_descs, uint32_t vq_size) { struct vring_desc *desc = *cur_desc; uint64_t remain, addr, dlen, len; @@ -548,7 +567,6 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req, uint8_t *src; int left = size; - rte_prefetch0(&vc_req->head[desc->next]); to_copy = RTE_MIN(desc->len, (uint32_t)left); dlen = to_copy; src = IOVA_TO_VVA(uint8_t *, vc_req, desc->addr, &dlen, @@ -582,6 +600,12 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req, left -= to_copy; while ((desc->flags & VRING_DESC_F_NEXT) && left > 0) { + if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) { + VC_LOG_ERR("Invalid descriptors"); + return -1; + } + (*nb_descs)--; + desc = &vc_req->head[desc->next]; rte_prefetch0(&vc_req->head[desc->next]); to_copy = RTE_MIN(desc->len, (uint32_t)left); @@ -624,7 +648,13 @@ copy_data(void *dst_data, struct vhost_crypto_data_req *vc_req, return -1; } - *cur_desc = &vc_req->head[desc->next]; + if (unlikely(*nb_descs == 0)) + *cur_desc = NULL; + else { + if (unlikely(desc->next >= vq_size)) + return -1; + *cur_desc = &vc_req->head[desc->next]; + } return 0; } @@ -635,7 +665,6 @@ write_back_data(struct vhost_crypto_data_req *vc_req) struct vhost_crypto_writeback_data *wb_data = vc_req->wb, *wb_last; while (wb_data) { - rte_prefetch0(wb_data->next); rte_memcpy(wb_data->dst, wb_data->src, wb_data->len); wb_last = wb_data; wb_data = wb_data->next; @@ -684,7 +713,8 @@ prepare_write_back_data(struct vhost_crypto_data_req *vc_req, struct vhost_crypto_writeback_data **end_wb_data, uint8_t *src, uint32_t offset, - uint64_t write_back_len) + uint64_t write_back_len, + uint32_t *nb_descs, uint32_t vq_size) { struct vhost_crypto_writeback_data *wb_data, *head; struct vring_desc *desc = *cur_desc; @@ -731,6 +761,12 @@ prepare_write_back_data(struct vhost_crypto_data_req *vc_req, offset -= desc->len; while (write_back_len) { + if (unlikely(*nb_descs == 0 || desc->next >= vq_size)) { + VC_LOG_ERR("Invalid descriptors"); + goto error_exit; + } + (*nb_descs)--; + desc = &vc_req->head[desc->next]; if (unlikely(!(desc->flags & VRING_DESC_F_WRITE))) { VC_LOG_ERR("incorrect descriptor"); @@ -770,7 +806,13 @@ prepare_write_back_data(struct vhost_crypto_data_req *vc_req, wb_data->next = NULL; } - *cur_desc = &vc_req->head[desc->next]; + if (unlikely(*nb_descs == 0)) + *cur_desc = NULL; + else { + if (unlikely(desc->next >= vq_size)) + goto error_exit; + *cur_desc = &vc_req->head[desc->next]; + } *end_wb_data = wb_data; @@ -787,7 +829,8 @@ static uint8_t prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, struct vhost_crypto_data_req *vc_req, struct virtio_crypto_cipher_data_req *cipher, - struct vring_desc *cur_desc) + struct vring_desc *cur_desc, + uint32_t *nb_descs, uint32_t vq_size) { struct vring_desc *desc = cur_desc; struct vhost_crypto_writeback_data *ewb = NULL; @@ -797,8 +840,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, /* prepare */ /* iv */ - if (unlikely(copy_data(iv_data, vc_req, &desc, - cipher->para.iv_len) < 0)) { + if (unlikely(copy_data(iv_data, vc_req, &desc, cipher->para.iv_len, + nb_descs, vq_size) < 0)) { ret = VIRTIO_CRYPTO_BADMSG; goto error_exit; } @@ -818,7 +861,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, } if (unlikely(move_desc(vc_req->head, &desc, - cipher->para.src_data_len) < 0)) { + cipher->para.src_data_len, nb_descs, + vq_size) < 0)) { VC_LOG_ERR("Incorrect descriptor"); ret = VIRTIO_CRYPTO_ERR; goto error_exit; @@ -835,8 +879,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, goto error_exit; } if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *), - vc_req, &desc, cipher->para.src_data_len) - < 0)) { + vc_req, &desc, cipher->para.src_data_len, + nb_descs, vq_size) < 0)) { ret = VIRTIO_CRYPTO_BADMSG; goto error_exit; } @@ -847,7 +891,7 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, } /* dst */ - desc = find_write_desc(vc_req->head, desc); + desc = find_write_desc(vc_req->head, desc, nb_descs, vq_size); if (unlikely(!desc)) { VC_LOG_ERR("Cannot find write location"); ret = VIRTIO_CRYPTO_BADMSG; @@ -866,7 +910,8 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, } if (unlikely(move_desc(vc_req->head, &desc, - cipher->para.dst_data_len) < 0)) { + cipher->para.dst_data_len, + nb_descs, vq_size) < 0)) { VC_LOG_ERR("Incorrect descriptor"); ret = VIRTIO_CRYPTO_ERR; goto error_exit; @@ -877,7 +922,7 @@ prepare_sym_cipher_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: vc_req->wb = prepare_write_back_data(vc_req, &desc, &ewb, rte_pktmbuf_mtod(m_src, uint8_t *), 0, - cipher->para.dst_data_len); + cipher->para.dst_data_len, nb_descs, vq_size); if (unlikely(vc_req->wb == NULL)) { ret = VIRTIO_CRYPTO_ERR; goto error_exit; @@ -919,7 +964,8 @@ static uint8_t prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, struct vhost_crypto_data_req *vc_req, struct virtio_crypto_alg_chain_data_req *chain, - struct vring_desc *cur_desc) + struct vring_desc *cur_desc, + uint32_t *nb_descs, uint32_t vq_size) { struct vring_desc *desc = cur_desc, *digest_desc; struct vhost_crypto_writeback_data *ewb = NULL, *ewb2 = NULL; @@ -932,7 +978,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, /* prepare */ /* iv */ if (unlikely(copy_data(iv_data, vc_req, &desc, - chain->para.iv_len) < 0)) { + chain->para.iv_len, nb_descs, vq_size) < 0)) { ret = VIRTIO_CRYPTO_BADMSG; goto error_exit; } @@ -953,7 +999,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, } if (unlikely(move_desc(vc_req->head, &desc, - chain->para.src_data_len) < 0)) { + chain->para.src_data_len, + nb_descs, vq_size) < 0)) { VC_LOG_ERR("Incorrect descriptor"); ret = VIRTIO_CRYPTO_ERR; goto error_exit; @@ -969,7 +1016,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, goto error_exit; } if (unlikely(copy_data(rte_pktmbuf_mtod(m_src, uint8_t *), - vc_req, &desc, chain->para.src_data_len)) < 0) { + vc_req, &desc, chain->para.src_data_len, + nb_descs, vq_size)) < 0) { ret = VIRTIO_CRYPTO_BADMSG; goto error_exit; } @@ -981,7 +1029,7 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, } /* dst */ - desc = find_write_desc(vc_req->head, desc); + desc = find_write_desc(vc_req->head, desc, nb_descs, vq_size); if (unlikely(!desc)) { VC_LOG_ERR("Cannot find write location"); ret = VIRTIO_CRYPTO_BADMSG; @@ -1000,7 +1048,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, } if (unlikely(move_desc(vc_req->head, &desc, - chain->para.dst_data_len) < 0)) { + chain->para.dst_data_len, + nb_descs, vq_size) < 0)) { VC_LOG_ERR("Incorrect descriptor"); ret = VIRTIO_CRYPTO_ERR; goto error_exit; @@ -1017,7 +1066,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, } if (unlikely(move_desc(vc_req->head, &desc, - chain->para.hash_result_len) < 0)) { + chain->para.hash_result_len, + nb_descs, vq_size) < 0)) { VC_LOG_ERR("Incorrect descriptor"); ret = VIRTIO_CRYPTO_ERR; goto error_exit; @@ -1029,7 +1079,8 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, rte_pktmbuf_mtod(m_src, uint8_t *), chain->para.cipher_start_src_offset, chain->para.dst_data_len - - chain->para.cipher_start_src_offset); + chain->para.cipher_start_src_offset, + nb_descs, vq_size); if (unlikely(vc_req->wb == NULL)) { ret = VIRTIO_CRYPTO_ERR; goto error_exit; @@ -1042,14 +1093,16 @@ prepare_sym_chain_op(struct vhost_crypto *vcrypto, struct rte_crypto_op *op, /** create a wb_data for digest */ ewb->next = prepare_write_back_data(vc_req, &desc, &ewb2, - digest_addr, 0, chain->para.hash_result_len); + digest_addr, 0, chain->para.hash_result_len, + nb_descs, vq_size); if (unlikely(ewb->next == NULL)) { ret = VIRTIO_CRYPTO_ERR; goto error_exit; } if (unlikely(copy_data(digest_addr, vc_req, &digest_desc, - chain->para.hash_result_len)) < 0) { + chain->para.hash_result_len, + nb_descs, vq_size)) < 0) { ret = VIRTIO_CRYPTO_BADMSG; goto error_exit; } @@ -1108,6 +1161,7 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto, struct vring_desc *desc = NULL; uint64_t session_id; uint64_t dlen; + uint32_t nb_descs = vq->size; int err = 0; vc_req->desc_idx = desc_idx; @@ -1116,6 +1170,10 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto, if (likely(head->flags & VRING_DESC_F_INDIRECT)) { dlen = head->len; + nb_descs = dlen / sizeof(struct vring_desc); + /* drop invalid descriptors */ + if (unlikely(nb_descs > vq->size)) + return -1; desc = IOVA_TO_VVA(struct vring_desc *, vc_req, head->addr, &dlen, VHOST_ACCESS_RO); if (unlikely(!desc || dlen != head->len)) @@ -1138,8 +1196,8 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto, goto error_exit; case RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE: req = &tmp_req; - if (unlikely(copy_data(req, vc_req, &desc, sizeof(*req)) - < 0)) { + if (unlikely(copy_data(req, vc_req, &desc, sizeof(*req), + &nb_descs, vq->size) < 0)) { err = VIRTIO_CRYPTO_BADMSG; VC_LOG_ERR("Invalid descriptor"); goto error_exit; @@ -1152,7 +1210,7 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto, } } else { if (unlikely(move_desc(vc_req->head, &desc, - sizeof(*req)) < 0)) { + sizeof(*req), &nb_descs, vq->size) < 0)) { VC_LOG_ERR("Incorrect descriptor"); goto error_exit; } @@ -1193,11 +1251,13 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto, break; case VIRTIO_CRYPTO_SYM_OP_CIPHER: err = prepare_sym_cipher_op(vcrypto, op, vc_req, - &req->u.sym_req.u.cipher, desc); + &req->u.sym_req.u.cipher, desc, + &nb_descs, vq->size); break; case VIRTIO_CRYPTO_SYM_OP_ALGORITHM_CHAINING: err = prepare_sym_chain_op(vcrypto, op, vc_req, - &req->u.sym_req.u.chain, desc); + &req->u.sym_req.u.chain, desc, + &nb_descs, vq->size); break; } if (unlikely(err != 0)) { @@ -1215,7 +1275,7 @@ vhost_crypto_process_one_req(struct vhost_crypto *vcrypto, error_exit: - inhdr = reach_inhdr(vc_req, desc); + inhdr = reach_inhdr(vc_req, desc, &nb_descs, vq->size); if (likely(inhdr != NULL)) inhdr->status = (uint8_t)err; diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index 3ea64eba..19e04c95 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -489,6 +489,9 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva, uint64_t *len) struct rte_vhost_mem_region *r; uint32_t i; + if (unlikely(!dev || !dev->mem)) + goto out_error; + /* Find the region where the address lives. */ for (i = 0; i < dev->mem->nregions; i++) { r = &dev->mem->regions[i]; @@ -503,6 +506,7 @@ qva_to_vva(struct virtio_net *dev, uint64_t qva, uint64_t *len) r->host_user_addr; } } +out_error: *len = 0; return 0; @@ -537,7 +541,7 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index) { struct vhost_virtqueue *vq = dev->virtqueue[vq_index]; struct vhost_vring_addr *addr = &vq->ring_addrs; - uint64_t len; + uint64_t len, expected_len; if (vq_is_packed(dev)) { len = sizeof(struct vring_packed_desc) * vq->size; @@ -603,11 +607,12 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index) addr = &vq->ring_addrs; len = sizeof(struct vring_avail) + sizeof(uint16_t) * vq->size; + if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) + len += sizeof(uint16_t); + expected_len = len; vq->avail = (struct vring_avail *)(uintptr_t)ring_addr_to_vva(dev, vq, addr->avail_user_addr, &len); - if (vq->avail == 0 || - len != sizeof(struct vring_avail) + - sizeof(uint16_t) * vq->size) { + if (vq->avail == 0 || len != expected_len) { RTE_LOG(DEBUG, VHOST_CONFIG, "(%d) failed to map avail ring.\n", dev->vid); @@ -616,10 +621,12 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index) len = sizeof(struct vring_used) + sizeof(struct vring_used_elem) * vq->size; + if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) + len += sizeof(uint16_t); + expected_len = len; vq->used = (struct vring_used *)(uintptr_t)ring_addr_to_vva(dev, vq, addr->used_user_addr, &len); - if (vq->used == 0 || len != sizeof(struct vring_used) + - sizeof(struct vring_used_elem) * vq->size) { + if (vq->used == 0 || len != expected_len) { RTE_LOG(DEBUG, VHOST_CONFIG, "(%d) failed to map used ring.\n", dev->vid); @@ -726,13 +733,16 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, uint64_t host_phys_addr, uint64_t size) { struct guest_page *page, *last_page; + struct guest_page *old_pages; if (dev->nr_guest_pages == dev->max_guest_pages) { dev->max_guest_pages *= 2; + old_pages = dev->guest_pages; dev->guest_pages = realloc(dev->guest_pages, dev->max_guest_pages * sizeof(*page)); if (!dev->guest_pages) { RTE_LOG(ERR, VHOST_CONFIG, "cannot realloc guest_pages\n"); + free(old_pages); return -1; } } diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 5e1a1a72..15d682c3 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -335,13 +335,22 @@ fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, uint16_t vec_id = *vec_idx; uint32_t len = 0; uint64_t dlen; + uint32_t nr_descs = vq->size; + uint32_t cnt = 0; struct vring_desc *descs = vq->desc; struct vring_desc *idesc = NULL; + if (unlikely(idx >= vq->size)) + return -1; + *desc_chain_head = idx; if (vq->desc[idx].flags & VRING_DESC_F_INDIRECT) { dlen = vq->desc[idx].len; + nr_descs = dlen / sizeof(struct vring_desc); + if (unlikely(nr_descs > vq->size)) + return -1; + descs = (struct vring_desc *)(uintptr_t) vhost_iova_to_vva(dev, vq, vq->desc[idx].addr, &dlen, @@ -366,7 +375,7 @@ fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq, } while (1) { - if (unlikely(idx >= vq->size)) { + if (unlikely(idx >= nr_descs || cnt++ >= nr_descs)) { free_ind_table(idesc); return -1; } @@ -520,6 +529,12 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, if (unlikely(!desc_is_avail(&descs[avail_idx], wrap_counter))) return -1; + /* + * The ordering between desc flags and desc + * content reads need to be enforced. + */ + rte_smp_rmb(); + *desc_count = 0; *len = 0; @@ -527,6 +542,9 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq, if (unlikely(vec_id >= BUF_VECTOR_MAX)) return -1; + if (unlikely(*desc_count >= vq->size)) + return -1; + *desc_count += 1; *buf_id = descs[avail_idx].id; @@ -791,6 +809,12 @@ virtio_dev_rx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]); avail_head = *((volatile uint16_t *)&vq->avail->idx); + /* + * The ordering between avail index and + * desc reads needs to be enforced. + */ + rte_smp_rmb(); + for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { uint32_t pkt_len = pkts[pkt_idx]->pkt_len + dev->vhost_hlen; uint16_t nr_vec = 0; @@ -1373,6 +1397,12 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq, if (free_entries == 0) return 0; + /* + * The ordering between avail index and + * desc reads needs to be enforced. + */ + rte_smp_rmb(); + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); count = RTE_MIN(count, MAX_PKT_BURST); diff --git a/lib/meson.build b/lib/meson.build index bb7f443f..df4226c5 100644 --- a/lib/meson.build +++ b/lib/meson.build @@ -79,7 +79,7 @@ foreach l:libraries foreach d:deps if not is_variable('shared_rte_' + d) error('Missing dependency ' + d + - ' for library ' + lib_name) + ' for library ' + libname) endif shared_deps += [get_variable('shared_rte_' + d)] static_deps += [get_variable('static_rte_' + d)] |