From 43192222b329b3c984687235b0081c7fbfe484ba Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Tue, 24 Jul 2018 16:52:29 +0100 Subject: New upstream version 16.11.7 Change-Id: I1dbe85956ca329c829b0066d16b3f902c237fbd3 Signed-off-by: Luca Boccassi --- lib/librte_eal/common/eal_common_memzone.c | 3 +- lib/librte_eal/common/eal_filesystem.h | 2 - .../common/include/arch/ppc_64/rte_atomic.h | 2 +- .../common/include/arch/x86/rte_spinlock.h | 4 +- lib/librte_eal/common/include/generic/rte_rwlock.h | 4 +- lib/librte_eal/common/include/rte_common.h | 4 +- lib/librte_eal/common/include/rte_dev.h | 15 +- lib/librte_eal/common/include/rte_lcore.h | 2 +- lib/librte_eal/common/include/rte_random.h | 6 +- lib/librte_eal/common/include/rte_string_fns.h | 31 +++++ lib/librte_eal/common/include/rte_version.h | 2 +- lib/librte_eal/linuxapp/eal/eal_pci.c | 7 +- lib/librte_eal/linuxapp/kni/compat.h | 5 + lib/librte_ether/rte_ethdev.c | 15 +- lib/librte_ether/rte_ethdev.h | 19 +-- lib/librte_hash/rte_hash.h | 2 +- lib/librte_hash/rte_hash_crc.h | 11 +- lib/librte_ip_frag/ip_frag_internal.c | 8 +- lib/librte_ip_frag/rte_ipv4_reassembly.c | 2 + lib/librte_ip_frag/rte_ipv6_reassembly.c | 2 + lib/librte_kvargs/rte_kvargs.c | 4 +- lib/librte_mbuf/rte_mbuf.h | 46 +++--- lib/librte_mempool/rte_mempool.c | 15 +- lib/librte_net/rte_ether.h | 5 +- lib/librte_net/rte_ip.h | 14 +- lib/librte_vhost/fd_man.c | 34 ++++- lib/librte_vhost/fd_man.h | 1 + lib/librte_vhost/socket.c | 19 ++- lib/librte_vhost/vhost.h | 20 ++- lib/librte_vhost/vhost_user.c | 41 ++++-- lib/librte_vhost/virtio_net.c | 154 +++++++++++++++++---- 31 files changed, 371 insertions(+), 128 deletions(-) (limited to 'lib') diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index b58d85b7..c5138690 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -253,7 +253,8 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, snprintf(mz->name, sizeof(mz->name), "%s", name); mz->phys_addr = rte_malloc_virt2phy(mz_addr); mz->addr = mz_addr; - mz->len = (requested_len == 0 ? elem->size : requested_len); + mz->len = (requested_len == 0 ? + (elem->size - MALLOC_ELEM_OVERHEAD) : requested_len); mz->hugepage_sz = elem->ms->hugepage_sz; mz->socket_id = elem->ms->socket_id; mz->flags = 0; diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h index 8acbd996..8f4f0200 100644 --- a/lib/librte_eal/common/eal_filesystem.h +++ b/lib/librte_eal/common/eal_filesystem.h @@ -86,8 +86,6 @@ eal_hugepage_info_path(void) /** String format for hugepage map files. */ #define HUGEFILE_FMT "%s/%smap_%d" -#define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d" - static inline const char * eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id) { diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h index 37f5eff2..790e6fd7 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h @@ -55,7 +55,7 @@ extern "C" { * Guarantees that the LOAD and STORE operations generated before the * barrier occur before the LOAD and STORE operations generated after. */ -#define rte_mb() {asm volatile("sync" : : : "memory"); } +#define rte_mb() asm volatile("sync" : : : "memory") /** * Write memory barrier. diff --git a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h index 8e630c21..577236a3 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h +++ b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h @@ -104,10 +104,12 @@ static inline int rte_tm_supported(void) static inline int rte_try_tm(volatile int *lock) { + int retries; + if (!rte_rtm_supported) return 0; - int retries = RTE_RTM_MAX_RETRIES; + retries = RTE_RTM_MAX_RETRIES; while (likely(retries--)) { diff --git a/lib/librte_eal/common/include/generic/rte_rwlock.h b/lib/librte_eal/common/include/generic/rte_rwlock.h index 7a0fdc55..e57ce210 100644 --- a/lib/librte_eal/common/include/generic/rte_rwlock.h +++ b/lib/librte_eal/common/include/generic/rte_rwlock.h @@ -99,7 +99,7 @@ rte_rwlock_read_lock(rte_rwlock_t *rwl) continue; } success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt, - x, x + 1); + (uint32_t)x, (uint32_t)(x + 1)); } } @@ -135,7 +135,7 @@ rte_rwlock_write_lock(rte_rwlock_t *rwl) continue; } success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt, - 0, -1); + 0, (uint32_t)-1); } } diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h index db5ac91c..f5e2f886 100644 --- a/lib/librte_eal/common/include/rte_common.h +++ b/lib/librte_eal/common/include/rte_common.h @@ -105,7 +105,7 @@ typedef uint16_t unaligned_uint16_t; /*********** Macros for pointer arithmetic ********/ /** - * add a byte-value offset from a pointer + * add a byte-value offset to a pointer */ #define RTE_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x))) @@ -323,7 +323,7 @@ rte_pause(void) {} static inline uint32_t rte_bsf32(uint32_t v) { - return __builtin_ctz(v); + return (uint32_t)__builtin_ctz(v); } #ifndef offsetof diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h index dcf33d0e..2ad6da86 100644 --- a/lib/librte_eal/common/include/rte_dev.h +++ b/lib/librte_eal/common/include/rte_dev.h @@ -60,15 +60,18 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) va_start(ap, fmt); - char buffer[vsnprintf(NULL, 0, fmt, ap) + 1]; + { + char buffer[vsnprintf(NULL, 0, fmt, ap) + 1]; - va_end(ap); + va_end(ap); - va_start(ap, fmt); - vsnprintf(buffer, sizeof(buffer), fmt, ap); - va_end(ap); + va_start(ap, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, ap); + va_end(ap); - rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", func_name, buffer); + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", + func_name, buffer); + } } /* diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h index fe7b5865..da48a94a 100644 --- a/lib/librte_eal/common/include/rte_lcore.h +++ b/lib/librte_eal/common/include/rte_lcore.h @@ -134,7 +134,7 @@ rte_lcore_index(int lcore_id) if (lcore_id >= RTE_MAX_LCORE) return -1; if (lcore_id < 0) - lcore_id = rte_lcore_id(); + lcore_id = (int)rte_lcore_id(); return lcore_config[lcore_id].core_index; } diff --git a/lib/librte_eal/common/include/rte_random.h b/lib/librte_eal/common/include/rte_random.h index 24ae8363..9f0e5e16 100644 --- a/lib/librte_eal/common/include/rte_random.h +++ b/lib/librte_eal/common/include/rte_random.h @@ -60,7 +60,7 @@ extern "C" { static inline void rte_srand(uint64_t seedval) { - srand48((long unsigned int)seedval); + srand48((long)seedval); } /** @@ -77,9 +77,9 @@ static inline uint64_t rte_rand(void) { uint64_t val; - val = lrand48(); + val = (uint64_t)lrand48(); val <<= 32; - val += lrand48(); + val += (uint64_t)lrand48(); return val; } diff --git a/lib/librte_eal/common/include/rte_string_fns.h b/lib/librte_eal/common/include/rte_string_fns.h index cfca2f8d..7c0ab15a 100644 --- a/lib/librte_eal/common/include/rte_string_fns.h +++ b/lib/librte_eal/common/include/rte_string_fns.h @@ -44,6 +44,8 @@ extern "C" { #endif +#include + /** * Takes string "string" parameter and splits it at character "delim" * up to maxtokens-1 times - to give "maxtokens" resulting tokens. Like @@ -74,6 +76,35 @@ int rte_strsplit(char *string, int stringlen, char **tokens, int maxtokens, char delim); +/** + * @internal + * DPDK-specific version of strlcpy for systems without + * libc or libbsd copies of the function + */ +static inline size_t +rte_strlcpy(char *dst, const char *src, size_t size) +{ + return snprintf(dst, size, "%s", src); +} + +/* pull in a strlcpy function */ +#ifdef RTE_EXEC_ENV_BSDAPP +#include +#ifndef __BSD_VISIBLE /* non-standard functions are hidden */ +#define strlcpy(dst, src, size) rte_strlcpy(dst, src, size) +#endif + + +#else /* non-BSD platforms */ +#ifdef RTE_USE_LIBBSD +#include + +#else /* no BSD header files, create own */ +#define strlcpy(dst, src, size) rte_strlcpy(dst, src, size) + +#endif /* RTE_USE_LIBBSD */ +#endif /* BSDAPP */ + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index 17a00538..ea8ba3be 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -66,7 +66,7 @@ extern "C" { /** * Patch level number i.e. the z in yy.mm.z */ -#define RTE_VER_MINOR 6 +#define RTE_VER_MINOR 7 /** * Extra string to be appended to version number diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c index b0d0c3c6..02ec2415 100644 --- a/lib/librte_eal/linuxapp/eal/eal_pci.c +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -94,7 +94,8 @@ error: } static int -pci_get_kernel_driver_by_path(const char *filename, char *dri_name) +pci_get_kernel_driver_by_path(const char *filename, char *dri_name, + size_t len) { int count; char path[PATH_MAX]; @@ -115,7 +116,7 @@ pci_get_kernel_driver_by_path(const char *filename, char *dri_name) name = strrchr(path, '/'); if (name) { - strncpy(dri_name, name + 1, strlen(name + 1) + 1); + strlcpy(dri_name, name + 1, len); return 0; } @@ -369,7 +370,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, /* parse driver */ snprintf(filename, sizeof(filename), "%s/driver", dirname); - ret = pci_get_kernel_driver_by_path(filename, driver); + ret = pci_get_kernel_driver_by_path(filename, driver, sizeof(driver)); if (ret < 0) { RTE_LOG(ERR, EAL, "Fail to get kernel driver\n"); free(dev); diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h index 3f8c0bc8..6a6968d9 100644 --- a/lib/librte_eal/linuxapp/kni/compat.h +++ b/lib/librte_eal/linuxapp/kni/compat.h @@ -101,6 +101,11 @@ #undef NET_NAME_UNKNOWN #endif +#if (defined(RHEL_RELEASE_CODE) && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5))) +#define ndo_change_mtu ndo_change_mtu_rh74 +#endif + #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) #define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER #endif diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index 4679dc69..a698fc00 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -438,8 +438,7 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id) for (i = 0; i < RTE_MAX_ETHPORTS; i++) { - if (!strncmp(name, - rte_eth_dev_data[i].name, strlen(name))) { + if (!strcmp(name, rte_eth_dev_data[i].name)) { *port_id = i; @@ -628,6 +627,12 @@ rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id) RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); dev = &rte_eth_devices[port_id]; + if (!dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be started before start any queue\n", port_id); + return -EINVAL; + } + if (rx_queue_id >= dev->data->nb_rx_queues) { RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id); return -EINVAL; @@ -680,6 +685,12 @@ rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t tx_queue_id) RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); dev = &rte_eth_devices[port_id]; + if (!dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be started before start any queue\n", port_id); + return -EINVAL; + } + if (tx_queue_id >= dev->data->nb_tx_queues) { RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id); return -EINVAL; diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index dc6d0cc9..da33b9d5 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -2646,6 +2646,7 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) { struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + uint16_t nb_rx; #ifdef RTE_LIBRTE_ETHDEV_DEBUG RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); @@ -2656,13 +2657,14 @@ rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id, return 0; } #endif - int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], + nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], rx_pkts, nb_pkts); #ifdef RTE_ETHDEV_RXTX_CALLBACKS - struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id]; + if (unlikely(dev->post_rx_burst_cbs[queue_id] != NULL)) { + struct rte_eth_rxtx_callback *cb = + dev->post_rx_burst_cbs[queue_id]; - if (unlikely(cb != NULL)) { do { nb_rx = cb->fn.rx(port_id, queue_id, rx_pkts, nb_rx, nb_pkts, cb->param); @@ -2692,7 +2694,7 @@ rte_eth_rx_queue_count(uint8_t port_id, uint16_t queue_id) struct rte_eth_dev *dev = &rte_eth_devices[port_id]; RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_count, -ENOTSUP); - return (*dev->dev_ops->rx_queue_count)(dev, queue_id); + return (int)(*dev->dev_ops->rx_queue_count)(dev, queue_id); } /** @@ -2886,8 +2888,9 @@ rte_eth_tx_buffer_flush(uint8_t port_id, uint16_t queue_id, /* All packets sent, or to be dealt with by callback below */ if (unlikely(sent != to_send)) - buffer->error_callback(&buffer->pkts[sent], to_send - sent, - buffer->error_userdata); + buffer->error_callback(&buffer->pkts[sent], + (uint16_t)(to_send - sent), + buffer->error_userdata); return sent; } @@ -4345,9 +4348,9 @@ rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id); * Get the device name from port id * * @param port_id -* pointer to port identifier of the device +* Port identifier of the device. * @param name -* pci address or name of the device +* Buffer of size RTE_ETH_NAME_MAX_LEN to store the name. * @return * - (0) if successful. * - (-EINVAL) on failure. diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h index 622c1800..f29de81a 100644 --- a/lib/librte_hash/rte_hash.h +++ b/lib/librte_hash/rte_hash.h @@ -357,7 +357,7 @@ rte_hash_lookup(const struct rte_hash *h, const void *key); * @param key * Key to find. * @param sig - * Hash value to remove from the hash table. + * Precomputed hash value for 'key'. * @return * - -EINVAL if the parameters are invalid. * - -ENOENT if the key is not found. diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h index 63e74aa4..2aec6c5f 100644 --- a/lib/librte_hash/rte_hash_crc.h +++ b/lib/librte_hash/rte_hash_crc.h @@ -366,14 +366,13 @@ crc32c_1word(uint32_t data, uint32_t init_val) static inline uint32_t crc32c_2words(uint64_t data, uint32_t init_val) { + uint32_t crc, term1, term2; union { uint64_t u64; uint32_t u32[2]; } d; d.u64 = data; - uint32_t crc, term1, term2; - crc = init_val; crc ^= d.u32[0]; @@ -427,9 +426,9 @@ crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val) } d; d.u64 = data; - init_val = crc32c_sse42_u32(d.u32[0], init_val); - init_val = crc32c_sse42_u32(d.u32[1], init_val); - return init_val; + init_val = crc32c_sse42_u32(d.u32[0], (uint32_t)init_val); + init_val = crc32c_sse42_u32(d.u32[1], (uint32_t)init_val); + return (uint32_t)init_val; } #endif @@ -441,7 +440,7 @@ crc32c_sse42_u64(uint64_t data, uint64_t init_val) "crc32q %[data], %[init_val];" : [init_val] "+r" (init_val) : [data] "rm" (data)); - return init_val; + return (uint32_t)init_val; } #endif diff --git a/lib/librte_ip_frag/ip_frag_internal.c b/lib/librte_ip_frag/ip_frag_internal.c index b679ff43..2cee6f8a 100644 --- a/lib/librte_ip_frag/ip_frag_internal.c +++ b/lib/librte_ip_frag/ip_frag_internal.c @@ -183,7 +183,7 @@ ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr, fp->frags[IP_LAST_FRAG_IDX].len); else IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" - "ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, " + "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, " "total_size: %u, frag_size: %u, last_idx: %u\n" "first fragment: ofs: %u, len: %u\n" "last fragment: ofs: %u, len: %u\n\n", @@ -241,7 +241,7 @@ ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr, fp->frags[IP_LAST_FRAG_IDX].len); else IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" - "ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, " + "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, " "total_size: %u, frag_size: %u, last_idx: %u\n" "first fragment: ofs: %u, len: %u\n" "last fragment: ofs: %u, len: %u\n\n", @@ -362,7 +362,7 @@ ip_frag_lookup(struct rte_ip_frag_tbl *tbl, if (p1->key.key_len == IPV4_KEYLEN) IP_FRAG_LOG(DEBUG, "%s:%d:\n" "tbl: %p, max_entries: %u, use_entries: %u\n" - "ipv6_frag_pkt line0: %p, index: %u from %u\n" + "ipv4_frag_pkt line0: %p, index: %u from %u\n" "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", __func__, __LINE__, tbl, tbl->max_entries, tbl->use_entries, @@ -388,7 +388,7 @@ ip_frag_lookup(struct rte_ip_frag_tbl *tbl, if (p2->key.key_len == IPV4_KEYLEN) IP_FRAG_LOG(DEBUG, "%s:%d:\n" "tbl: %p, max_entries: %u, use_entries: %u\n" - "ipv6_frag_pkt line1: %p, index: %u from %u\n" + "ipv4_frag_pkt line1: %p, index: %u from %u\n" "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", __func__, __LINE__, tbl, tbl->max_entries, tbl->use_entries, diff --git a/lib/librte_ip_frag/rte_ipv4_reassembly.c b/lib/librte_ip_frag/rte_ipv4_reassembly.c index e084ca59..847ea0d6 100644 --- a/lib/librte_ip_frag/rte_ipv4_reassembly.c +++ b/lib/librte_ip_frag/rte_ipv4_reassembly.c @@ -88,7 +88,9 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp) /* chain with the first fragment. */ rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m); + fp->frags[curr_idx].mb = NULL; m = fp->frags[IP_FIRST_FRAG_IDX].mb; + fp->frags[IP_FIRST_FRAG_IDX].mb = NULL; /* update mbuf fields for reassembled packet. */ m->ol_flags |= PKT_TX_IP_CKSUM; diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c index 21a5ef5d..d9b5d690 100644 --- a/lib/librte_ip_frag/rte_ipv6_reassembly.c +++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c @@ -111,7 +111,9 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp) /* chain with the first fragment. */ rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m); + fp->frags[curr_idx].mb = NULL; m = fp->frags[IP_FIRST_FRAG_IDX].mb; + fp->frags[IP_FIRST_FRAG_IDX].mb = NULL; /* update mbuf fields for reassembled packet. */ m->ol_flags |= PKT_TX_IP_CKSUM; diff --git a/lib/librte_kvargs/rte_kvargs.c b/lib/librte_kvargs/rte_kvargs.c index 8d56abd4..5504c6df 100644 --- a/lib/librte_kvargs/rte_kvargs.c +++ b/lib/librte_kvargs/rte_kvargs.c @@ -41,7 +41,7 @@ /* * Receive a string with a list of arguments following the pattern - * key=value;key=value;... and insert them into the list. + * key=value,key=value,... and insert them into the list. * strtok() is used so the params string will be copied to be modified. */ static int @@ -182,7 +182,7 @@ rte_kvargs_free(struct rte_kvargs *kvlist) } /* - * Parse the arguments "key=value;key=value;..." string and return + * Parse the arguments "key=value,key=value,..." string and return * an allocated structure that contains a key/value list. Also * check if only valid keys were used. */ diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index bc015d03..5b754645 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -204,12 +204,8 @@ extern "C" { * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies * PKT_TX_TCP_CKSUM) * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6 - * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum - * to 0 in the packet + * - if it's IPv4, set the PKT_TX_IP_CKSUM flag * - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz - * - calculate the pseudo header checksum without taking ip_len in account, - * and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and - * rte_ipv6_phdr_cksum() that can be used as helpers. */ #define PKT_TX_TCP_SEG (1ULL << 50) @@ -222,9 +218,6 @@ extern "C" { * - fill l2_len and l3_len in mbuf * - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6 - * - calculate the pseudo header checksum and set it in the L4 header (only - * for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum(). - * For SCTP, set the crc field to 0. */ #define PKT_TX_L4_NO_CKSUM (0ULL << 52) /**< Disable L4 cksum of TX pkt. */ #define PKT_TX_TCP_CKSUM (1ULL << 52) /**< TCP cksum of TX pkt. computed by NIC. */ @@ -236,7 +229,6 @@ extern "C" { * Offload the IP checksum in the hardware. The flag PKT_TX_IPV4 should * also be set by the application, although a PMD will only check * PKT_TX_IP_CKSUM. - * - set the IP checksum field in the packet to 0 * - fill the mbuf offload information: l2_len, l3_len */ #define PKT_TX_IP_CKSUM (1ULL << 54) @@ -261,10 +253,8 @@ extern "C" { /** * Offload the IP checksum of an external header in the hardware. The - * flag PKT_TX_OUTER_IPV4 should also be set by the application, alto ugh - * a PMD will only check PKT_TX_IP_CKSUM. The IP checksum field in the - * packet must be set to 0. - * - set the outer IP checksum field in the packet to 0 + * flag PKT_TX_OUTER_IPV4 should also be set by the application, although + * a PMD will only check PKT_TX_OUTER_IP_CKSUM. * - fill the mbuf offload information: outer_l2_len, outer_l3_len */ #define PKT_TX_OUTER_IP_CKSUM (1ULL << 58) @@ -655,7 +645,7 @@ rte_mbuf_refcnt_read(const struct rte_mbuf *m) static inline void rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value) { - rte_atomic16_set(&m->refcnt_atomic, new_value); + rte_atomic16_set(&m->refcnt_atomic, (int16_t)new_value); } /** @@ -678,8 +668,9 @@ rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) * reference counter can occur. */ if (likely(rte_mbuf_refcnt_read(m) == 1)) { - rte_mbuf_refcnt_set(m, 1 + value); - return 1 + value; + ++value; + rte_mbuf_refcnt_set(m, (uint16_t)value); + return (uint16_t)value; } return (uint16_t)(rte_atomic16_add_return(&m->refcnt_atomic, value)); @@ -998,7 +989,8 @@ rte_pktmbuf_priv_size(struct rte_mempool *mp) */ static inline void rte_pktmbuf_reset_headroom(struct rte_mbuf *m) { - m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len); + m->data_off = (uint16_t)RTE_MIN((uint16_t)RTE_PKTMBUF_HEADROOM, + (uint16_t)m->buf_len); } /** @@ -1176,10 +1168,11 @@ static inline void rte_pktmbuf_detach(struct rte_mbuf *m) { struct rte_mbuf *md = rte_mbuf_from_indirect(m); struct rte_mempool *mp = m->pool; - uint32_t mbuf_size, buf_len, priv_size; + uint32_t mbuf_size, buf_len; + uint16_t priv_size; priv_size = rte_pktmbuf_priv_size(mp); - mbuf_size = sizeof(struct rte_mbuf) + priv_size; + mbuf_size = (uint32_t)(sizeof(struct rte_mbuf) + priv_size); buf_len = rte_pktmbuf_data_room_size(mp); m->priv_size = priv_size; @@ -1463,7 +1456,10 @@ static inline char *rte_pktmbuf_prepend(struct rte_mbuf *m, if (unlikely(len > rte_pktmbuf_headroom(m))) return NULL; - m->data_off -= len; + /* NB: elaborating the subtraction like this instead of using + * -= allows us to ensure the result type is uint16_t + * avoiding compiler warnings on gcc 8.1 at least */ + m->data_off = (uint16_t)(m->data_off - len); m->data_len = (uint16_t)(m->data_len + len); m->pkt_len = (m->pkt_len + len); @@ -1523,8 +1519,11 @@ static inline char *rte_pktmbuf_adj(struct rte_mbuf *m, uint16_t len) if (unlikely(len > m->data_len)) return NULL; + /* NB: elaborating the addition like this instead of using + * += allows us to ensure the result type is uint16_t + * avoiding compiler warnings on gcc 8.1 at least */ m->data_len = (uint16_t)(m->data_len - len); - m->data_off += len; + m->data_off = (uint16_t)(m->data_off + len); m->pkt_len = (m->pkt_len - len); return (char *)m->buf_addr + m->data_off; } @@ -1636,7 +1635,10 @@ static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail cur_tail = rte_pktmbuf_lastseg(head); cur_tail->next = tail; - /* accumulate number of segments and total length. */ + /* accumulate number of segments and total length. + * NB: elaborating the addition like this instead of using + * -= allows us to ensure the result type is uint16_t + * avoiding compiler warnings on gcc 8.1 at least */ head->nb_segs = (uint8_t)(head->nb_segs + tail->nb_segs); head->pkt_len += tail->pkt_len; diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index aa513b97..3cb9e42c 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -396,12 +396,18 @@ rte_mempool_populate_phys(struct rte_mempool *mp, char *vaddr, } /* not enough room to store one object */ - if (i == 0) - return -EINVAL; + if (i == 0) { + ret = -EINVAL; + goto fail; + } STAILQ_INSERT_TAIL(&mp->mem_list, memhdr, next); mp->nb_mem_chunks++; return i; + +fail: + rte_free(memhdr); + return ret; } /* Add objects in the pool, using a table of physical pages. Return the @@ -456,9 +462,6 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr, size_t off, phys_len; int ret, cnt = 0; - /* mempool must not be populated */ - if (mp->nb_mem_chunks != 0) - return -EEXIST; /* address and len must be page-aligned */ if (RTE_PTR_ALIGN_CEIL(addr, pg_sz) != addr) return -EINVAL; @@ -622,7 +625,7 @@ rte_mempool_populate_anon(struct rte_mempool *mp) char *addr; /* mempool is already populated, error */ - if (!STAILQ_EMPTY(&mp->mem_list)) { + if ((!STAILQ_EMPTY(&mp->mem_list)) || mp->nb_mem_chunks != 0) { rte_errno = EINVAL; return 0; } diff --git a/lib/librte_net/rte_ether.h b/lib/librte_net/rte_ether.h index 5edf66c3..796fd56d 100644 --- a/lib/librte_net/rte_ether.h +++ b/lib/librte_net/rte_ether.h @@ -239,7 +239,7 @@ static inline void eth_random_addr(uint8_t *addr) uint8_t *p = (uint8_t *)&rand; rte_memcpy(addr, p, ETHER_ADDR_LEN); - addr[0] &= ~ETHER_GROUP_ADDR; /* clear multicast bit */ + addr[0] &= (uint8_t)~ETHER_GROUP_ADDR; /* clear multicast bit */ addr[0] |= ETHER_LOCAL_ADMIN_ADDR; /* set local assignment bit */ } @@ -352,11 +352,12 @@ static inline int rte_vlan_strip(struct rte_mbuf *m) { struct ether_hdr *eh = rte_pktmbuf_mtod(m, struct ether_hdr *); + struct vlan_hdr *vh; if (eh->ether_type != rte_cpu_to_be_16(ETHER_TYPE_VLAN)) return -1; - struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1); + vh = (struct vlan_hdr *)(eh + 1); m->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED; m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci); diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h index 4491b86e..19d6c95c 100644 --- a/lib/librte_net/rte_ip.h +++ b/lib/librte_net/rte_ip.h @@ -284,7 +284,7 @@ rte_raw_cksum_mbuf(const struct rte_mbuf *m, uint32_t off, uint32_t len, for (;;) { tmp = __rte_raw_cksum(buf, seglen, 0); if (done & 1) - tmp = rte_bswap16(tmp); + tmp = rte_bswap16((uint16_t)tmp); sum += tmp; done += seglen; if (done == len) @@ -315,7 +315,7 @@ rte_ipv4_cksum(const struct ipv4_hdr *ipv4_hdr) { uint16_t cksum; cksum = rte_raw_cksum(ipv4_hdr, sizeof(struct ipv4_hdr)); - return (cksum == 0xffff) ? cksum : ~cksum; + return (cksum == 0xffff) ? cksum : (uint16_t)~cksum; } /** @@ -380,8 +380,8 @@ rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr) uint32_t cksum; uint32_t l4_len; - l4_len = rte_be_to_cpu_16(ipv4_hdr->total_length) - - sizeof(struct ipv4_hdr); + l4_len = (uint32_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) - + sizeof(struct ipv4_hdr)); cksum = rte_raw_cksum(l4_hdr, l4_len); cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0); @@ -391,7 +391,7 @@ rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr) if (cksum == 0) cksum = 0xffff; - return cksum; + return (uint16_t)cksum; } /** @@ -431,7 +431,7 @@ rte_ipv6_phdr_cksum(const struct ipv6_hdr *ipv6_hdr, uint64_t ol_flags) uint32_t proto; /* L4 protocol - top 3 bytes must be zero */ } psd_hdr; - psd_hdr.proto = (ipv6_hdr->proto << 24); + psd_hdr.proto = (uint32_t)(ipv6_hdr->proto << 24); if (ol_flags & PKT_TX_TCP_SEG) { psd_hdr.len = 0; } else { @@ -474,7 +474,7 @@ rte_ipv6_udptcp_cksum(const struct ipv6_hdr *ipv6_hdr, const void *l4_hdr) if (cksum == 0) cksum = 0xffff; - return cksum; + return (uint16_t)cksum; } #ifdef __cplusplus diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c index 8a075da2..b76b8bce 100644 --- a/lib/librte_vhost/fd_man.c +++ b/lib/librte_vhost/fd_man.c @@ -192,6 +192,38 @@ fdset_del(struct fdset *pfdset, int fd) return dat; } +/** + * Unregister the fd from the fdset. + * + * If parameters are invalid, return directly -2. + * And check whether fd is busy, if yes, return -1. + * Otherwise, try to delete the fd from fdset and + * return true. + */ +int +fdset_try_del(struct fdset *pfdset, int fd) +{ + int i; + + if (pfdset == NULL || fd == -1) + return -2; + + pthread_mutex_lock(&pfdset->fd_mutex); + i = fdset_find_fd(pfdset, fd); + if (i != -1 && pfdset->fd[i].busy) { + pthread_mutex_unlock(&pfdset->fd_mutex); + return -1; + } + + if (i != -1) { + pfdset->fd[i].fd = -1; + pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL; + pfdset->fd[i].dat = NULL; + } + + pthread_mutex_unlock(&pfdset->fd_mutex); + return 0; +} /** * This functions runs in infinite blocking loop until there is no fd in @@ -275,7 +307,7 @@ fdset_event_dispatch(struct fdset *pfdset) * because the fd is closed in the cb, * the old fd val could be reused by when creates new * listen fd in another thread, we couldn't call - * fd_set_del. + * fdset_del. */ if (remove1 || remove2) { pfdentry->fd = -1; diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h index d319cac6..20781c02 100644 --- a/lib/librte_vhost/fd_man.h +++ b/lib/librte_vhost/fd_man.h @@ -63,6 +63,7 @@ int fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat); void *fdset_del(struct fdset *pfdset, int fd); +int fdset_try_del(struct fdset *pfdset, int fd); void fdset_event_dispatch(struct fdset *pfdset); diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index a95fbfbb..805b2e5b 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -166,6 +166,11 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) msgh.msg_control = control; msgh.msg_controllen = sizeof(control); cmsg = CMSG_FIRSTHDR(&msgh); + if (cmsg == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n"); + errno = EINVAL; + return -1; + } cmsg->cmsg_len = CMSG_LEN(fdsize); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; @@ -598,13 +603,25 @@ rte_vhost_driver_unregister(const char *path) vhost_user_remove_reconnect(vsocket); } +again: pthread_mutex_lock(&vsocket->conn_mutex); for (conn = TAILQ_FIRST(&vsocket->conn_list); conn != NULL; conn = next) { next = TAILQ_NEXT(conn, next); - fdset_del(&vhost_user.fdset, conn->connfd); + /* + * If r/wcb is executing, release the + * conn_mutex lock, and try again since + * the r/wcb may use the conn_mutex lock. + */ + if (fdset_try_del(&vhost_user.fdset, + conn->connfd) == -1) { + pthread_mutex_unlock( + &vsocket->conn_mutex); + goto again; + } + RTE_LOG(INFO, VHOST_CONFIG, "free connfd = %d for device '%s'\n", conn->connfd, path); diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h index c49db0c0..abc5a908 100644 --- a/lib/librte_vhost/vhost.h +++ b/lib/librte_vhost/vhost.h @@ -52,6 +52,8 @@ #define BUF_VECTOR_MAX 256 +#define VHOST_LOG_CACHE_NR 32 + /** * Structure contains buffer address, length and descriptor index * from vring to do scatter RX. @@ -75,6 +77,14 @@ struct zcopy_mbuf { }; TAILQ_HEAD(zcopy_mbuf_list, zcopy_mbuf); +/* + * Structure that contains the info for batched dirty logging. + */ +struct log_cache_entry { + uint32_t offset; + unsigned long val; +}; + /** * Structure contains variables relevant to RX/TX virtqueues. */ @@ -110,6 +120,9 @@ struct vhost_virtqueue { struct vring_used_elem *shadow_used_ring; uint16_t shadow_used_idx; + + struct log_cache_entry log_cache[VHOST_LOG_CACHE_NR]; + uint16_t log_cache_nb_elem; } __rte_cache_aligned; /* Old kernels have no such macros defined */ @@ -195,7 +208,8 @@ struct virtio_memory { #ifdef RTE_LIBRTE_VHOST_DEBUG #define VHOST_MAX_PRINT_BUFF 6072 #define LOG_LEVEL RTE_LOG_DEBUG -#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args) +#define VHOST_LOG_DEBUG(log_type, fmt, args...) \ + RTE_LOG(DEBUG, log_type, fmt, ##args) #define PRINT_PACKET(device, addr, size, header) do { \ char *pkt_addr = (char *)(addr); \ unsigned int index; \ @@ -211,11 +225,11 @@ struct virtio_memory { } \ snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \ \ - LOG_DEBUG(VHOST_DATA, "%s", packet); \ + VHOST_LOG_DEBUG(VHOST_DATA, "%s", packet); \ } while (0) #else #define LOG_LEVEL RTE_LOG_INFO -#define LOG_DEBUG(log_type, fmt, args...) do {} while (0) +#define VHOST_LOG_DEBUG(log_type, fmt, args...) do {} while (0) #define PRINT_PACKET(device, addr, size, header) do {} while (0) #endif diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index 550a1329..04c92ceb 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -169,7 +169,7 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features) } else { dev->vhost_hlen = sizeof(struct virtio_net_hdr); } - LOG_DEBUG(VHOST_CONFIG, + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mergeable RX buffers %s, virtio 1 %s\n", dev->vid, (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off", @@ -396,13 +396,13 @@ vhost_user_set_vring_addr(struct virtio_net **pdev, vq->log_guest_addr = addr->log_guest_addr; - LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n", + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address desc: %p\n", dev->vid, vq->desc); - LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n", + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address avail: %p\n", dev->vid, vq->avail); - LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n", + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) mapped address used: %p\n", dev->vid, vq->used); - LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n", + VHOST_LOG_DEBUG(VHOST_CONFIG, "(%d) log_guest_addr: %" PRIx64 "\n", dev->vid, vq->log_guest_addr); return 0; @@ -421,7 +421,7 @@ vhost_user_set_vring_base(struct virtio_net *dev, return 0; } -static void +static int add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, uint64_t host_phys_addr, uint64_t size) { @@ -431,6 +431,10 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, dev->max_guest_pages *= 2; dev->guest_pages = realloc(dev->guest_pages, dev->max_guest_pages * sizeof(*page)); + if (!dev->guest_pages) { + RTE_LOG(ERR, VHOST_CONFIG, "cannot realloc guest_pages\n"); + return -1; + } } if (dev->nr_guest_pages > 0) { @@ -439,7 +443,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, if (host_phys_addr == last_page->host_phys_addr + last_page->size) { last_page->size += size; - return; + return 0; } } @@ -447,9 +451,11 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, page->guest_phys_addr = guest_phys_addr; page->host_phys_addr = host_phys_addr; page->size = size; + + return 0; } -static void +static int add_guest_pages(struct virtio_net *dev, struct virtio_memory_region *reg, uint64_t page_size) { @@ -463,7 +469,9 @@ add_guest_pages(struct virtio_net *dev, struct virtio_memory_region *reg, size = page_size - (guest_phys_addr & (page_size - 1)); size = RTE_MIN(size, reg_size); - add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size); + if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0) + return -1; + host_user_addr += size; guest_phys_addr += size; reg_size -= size; @@ -472,12 +480,16 @@ add_guest_pages(struct virtio_net *dev, struct virtio_memory_region *reg, size = RTE_MIN(reg_size, page_size); host_phys_addr = rte_mem_virt2phy((void *)(uintptr_t) host_user_addr); - add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size); + if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, + size) < 0) + return -1; host_user_addr += size; guest_phys_addr += size; reg_size -= size; } + + return 0; } #ifdef RTE_LIBRTE_VHOST_DEBUG @@ -624,7 +636,12 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) mmap_offset; if (dev->dequeue_zero_copy) - add_guest_pages(dev, reg, alignment); + if (add_guest_pages(dev, reg, alignment) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "adding guest pages to region %u failed.\n", + i); + goto err_mmap; + } RTE_LOG(INFO, VHOST_CONFIG, "guest memory region %u, size: 0x%" PRIx64 "\n" @@ -869,7 +886,7 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg) * mmap from 0 to workaround a hugepage mmap bug: mmap will * fail when offset is not page size aligned. */ - addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); close(fd); if (addr == MAP_FAILED) { RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n"); diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index 745cc53f..2932bade 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -58,7 +58,7 @@ static inline void __attribute__((always_inline)) vhost_set_bit(unsigned int nr, volatile uint8_t *addr) { - __sync_fetch_and_or_8(addr, (1U << nr)); + __sync_fetch_and_or_1(addr, (1U << nr)); } static inline void __attribute__((always_inline)) @@ -89,6 +89,93 @@ vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) } } +static inline void __attribute__((always_inline)) +vhost_log_cache_sync(struct virtio_net *dev, struct vhost_virtqueue *vq) +{ + unsigned long *log_base; + int i; + + if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || + !dev->log_base)) + return; + + log_base = (unsigned long *)(uintptr_t)dev->log_base; + + /* + * It is expected a write memory barrier has been issued + * before this function is called. + */ + + for (i = 0; i < vq->log_cache_nb_elem; i++) { + struct log_cache_entry *elem = vq->log_cache + i; + + __sync_fetch_and_or(log_base + elem->offset, elem->val); + } + + rte_smp_wmb(); + + vq->log_cache_nb_elem = 0; +} + +static inline void __attribute__((always_inline)) +vhost_log_cache_page(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t page) +{ + uint32_t bit_nr = page % (sizeof(unsigned long) << 3); + uint32_t offset = page / (sizeof(unsigned long) << 3); + int i; + + for (i = 0; i < vq->log_cache_nb_elem; i++) { + struct log_cache_entry *elem = vq->log_cache + i; + + if (elem->offset == offset) { + elem->val |= (1UL << bit_nr); + return; + } + } + + if (unlikely(i >= VHOST_LOG_CACHE_NR)) { + /* + * No more room for a new log cache entry, + * so write the dirty log map directly. + */ + rte_smp_wmb(); + vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); + + return; + } + + vq->log_cache[i].offset = offset; + vq->log_cache[i].val = (1UL << bit_nr); +} + +static inline void __attribute__((always_inline)) +vhost_log_cache_write(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t addr, uint64_t len) +{ + uint64_t page; + + if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || + !dev->log_base || !len)) + return; + + if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) + return; + + page = addr / VHOST_LOG_PAGE; + while (page * VHOST_LOG_PAGE < addr + len) { + vhost_log_cache_page(dev, vq, page); + page += 1; + } +} + +static inline void __attribute__((always_inline)) +vhost_log_cache_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t offset, uint64_t len) +{ + vhost_log_cache_write(dev, vq, vq->log_guest_addr + offset, len); +} + static inline void __attribute__((always_inline)) vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, uint64_t offset, uint64_t len) @@ -147,7 +234,7 @@ do_flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_memcpy(&vq->used->ring[to], &vq->shadow_used_ring[from], size * sizeof(struct vring_used_elem)); - vhost_log_used_vring(dev, vq, + vhost_log_cache_used_vring(dev, vq, offsetof(struct vring_used, ring[to]), size * sizeof(struct vring_used_elem)); } @@ -175,6 +262,8 @@ flush_shadow_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq) rte_smp_wmb(); + vhost_log_cache_sync(dev, vq); + *(volatile uint16_t *)&vq->used->idx += vq->shadow_used_idx; vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), sizeof(vq->used->idx)); @@ -249,8 +338,9 @@ copy_virtio_net_hdr(struct virtio_net *dev, uint64_t desc_addr, } static inline int __attribute__((always_inline)) -copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, - struct rte_mbuf *m, uint16_t desc_idx, uint32_t size) +copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct vring_desc *descs, struct rte_mbuf *m, + uint16_t desc_idx, uint32_t size) { uint32_t desc_avail, desc_offset; uint32_t mbuf_avail, mbuf_offset; @@ -298,14 +388,14 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len); - PRINT_PACKET(dev, (uintptr_t)dst, len, 0); + PRINT_PACKET(dev, (uintptr_t)dst, (uint32_t)len, 0); remain -= len; guest_addr += len; dst += len; } } - vhost_log_write(dev, desc_gaddr, dev->vhost_hlen); + vhost_log_cache_write(dev, vq, desc_gaddr, dev->vhost_hlen); desc_avail = desc->len - dev->vhost_hlen; if (unlikely(desc_chunck_len < dev->vhost_hlen)) { @@ -368,7 +458,8 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vring_desc *descs, rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), cpy_len); - vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len); + vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset, + cpy_len); PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), cpy_len, 0); @@ -401,7 +492,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, uint32_t i, sz; uint64_t dlen; - LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", dev->vid, __func__, queue_id); @@ -423,7 +514,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, if (count == 0) goto out_access_unlock; - LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n", + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) start_idx %d | end_idx %d\n", dev->vid, start_idx, start_idx + count); /* Retrieve all of the desc indexes first to avoid caching issues. */ @@ -434,7 +525,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, vq->used->ring[used_idx].id = desc_indexes[i]; vq->used->ring[used_idx].len = pkts[i]->pkt_len + dev->vhost_hlen; - vhost_log_used_vring(dev, vq, + vhost_log_cache_used_vring(dev, vq, offsetof(struct vring_used, ring[used_idx]), sizeof(vq->used->ring[used_idx])); } @@ -474,11 +565,11 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, sz = vq->size; } - err = copy_mbuf_to_desc(dev, descs, pkts[i], desc_idx, sz); + err = copy_mbuf_to_desc(dev, vq, descs, pkts[i], desc_idx, sz); if (unlikely(err)) { used_idx = (start_idx + i) & (vq->size - 1); vq->used->ring[used_idx].len = dev->vhost_hlen; - vhost_log_used_vring(dev, vq, + vhost_log_cache_used_vring(dev, vq, offsetof(struct vring_used, ring[used_idx]), sizeof(vq->used->ring[used_idx])); } @@ -492,6 +583,8 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, rte_smp_wmb(); + vhost_log_cache_sync(dev, vq); + *(volatile uint16_t *)&vq->used->idx += count; vq->last_used_idx += count; vhost_log_used_vring(dev, vq, @@ -623,8 +716,9 @@ reserve_avail_buf_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, } static inline int __attribute__((always_inline)) -copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, - struct buf_vector *buf_vec, uint16_t num_buffers) +copy_mbuf_to_desc_mergeable(struct virtio_net *dev, + struct vhost_virtqueue *vq, struct rte_mbuf *m, + struct buf_vector *buf_vec, uint16_t num_buffers) { struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; struct virtio_net_hdr_mrg_rxbuf *hdr; @@ -657,7 +751,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, rte_prefetch0((void *)(uintptr_t)hdr_addr); virtio_hdr.num_buffers = num_buffers; - LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n", + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) RX: num merge buffers %d\n", dev->vid, num_buffers); desc_avail = buf_vec[vec_idx].buf_len - dev->vhost_hlen; @@ -736,14 +830,15 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, len); PRINT_PACKET(dev, (uintptr_t)dst, - len, 0); + (uint32_t)len, 0); remain -= len; guest_addr += len; dst += len; } } - vhost_log_write(dev, hdr_phys_addr, dev->vhost_hlen); + vhost_log_cache_write(dev, vq, hdr_phys_addr, + dev->vhost_hlen); PRINT_PACKET(dev, (uintptr_t)hdr_addr, dev->vhost_hlen, 0); @@ -754,7 +849,8 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct rte_mbuf *m, rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), cpy_len); - vhost_log_write(dev, desc_gaddr + desc_offset, cpy_len); + vhost_log_cache_write(dev, vq, desc_gaddr + desc_offset, + cpy_len); PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), cpy_len, 0); @@ -778,7 +874,7 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, struct buf_vector buf_vec[BUF_VECTOR_MAX]; uint16_t avail_head; - LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { RTE_LOG(ERR, VHOST_DATA, "(%d) %s: invalid virtqueue idx %d.\n", dev->vid, __func__, queue_id); @@ -806,18 +902,18 @@ virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, if (unlikely(reserve_avail_buf_mergeable(dev, vq, pkt_len, buf_vec, &num_buffers, avail_head) < 0)) { - LOG_DEBUG(VHOST_DATA, + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) failed to get enough desc from vring\n", dev->vid); vq->shadow_used_idx -= num_buffers; break; } - LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) current index %d | end index %d\n", dev->vid, vq->last_avail_idx, vq->last_avail_idx + num_buffers); - if (copy_mbuf_to_desc_mergeable(dev, pkts[pkt_idx], + if (copy_mbuf_to_desc_mergeable(dev, vq, pkts[pkt_idx], buf_vec, num_buffers) < 0) { vq->shadow_used_idx -= num_buffers; break; @@ -1116,7 +1212,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset)); PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), - desc_chunck_len, 0); + (uint32_t)desc_chunck_len, 0); mbuf_offset = 0; mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; @@ -1181,7 +1277,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, desc_avail = desc->len; PRINT_PACKET(dev, (uintptr_t)desc_addr, - desc_chunck_len, 0); + (uint32_t)desc_chunck_len, 0); } else if (unlikely(desc_chunck_len == 0)) { desc_chunck_len = desc_avail; desc_gaddr += desc_offset; @@ -1194,7 +1290,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vring_desc *descs, desc_offset = 0; PRINT_PACKET(dev, (uintptr_t)desc_addr, - desc_chunck_len, 0); + (uint32_t)desc_chunck_len, 0); } /* @@ -1237,7 +1333,7 @@ update_used_ring(struct virtio_net *dev, struct vhost_virtqueue *vq, { vq->used->ring[used_idx].id = desc_idx; vq->used->ring[used_idx].len = 0; - vhost_log_used_vring(dev, vq, + vhost_log_cache_used_vring(dev, vq, offsetof(struct vring_used, ring[used_idx]), sizeof(vq->used->ring[used_idx])); } @@ -1252,6 +1348,8 @@ update_used_idx(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_smp_wmb(); rte_smp_rmb(); + vhost_log_cache_sync(dev, vq); + vq->used->idx += count; vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), sizeof(vq->used->idx)); @@ -1417,7 +1515,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, if (free_entries == 0) goto out_access_unlock; - LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__); /* Prefetch available and used ring */ avail_idx = vq->last_avail_idx & (vq->size - 1); @@ -1427,7 +1525,7 @@ rte_vhost_dequeue_burst(int vid, uint16_t queue_id, count = RTE_MIN(count, MAX_PKT_BURST); count = RTE_MIN(count, free_entries); - LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n", + VHOST_LOG_DEBUG(VHOST_DATA, "(%d) about to dequeue %u buffers\n", dev->vid, count); /* Retrieve all of the head indexes first to avoid caching issues. */ -- cgit 1.2.3-korg