diff options
author | Christian Ehrhardt <christian.ehrhardt@canonical.com> | 2018-06-18 13:36:41 +0200 |
---|---|---|
committer | Christian Ehrhardt <christian.ehrhardt@canonical.com> | 2018-06-19 08:22:47 +0200 |
commit | e4df4d55df003957fc5afd7440e3d3192d7ce218 (patch) | |
tree | 4d87da85a727b31f1a76de49f6c4d31cedda71b7 /lib | |
parent | d038355bf358f713efbb182f174e2a8a09042e2b (diff) |
New upstream version 17.11.3
Change-Id: I2b901aaf362a2b94195665cc890d824b2c3a620e
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
Diffstat (limited to 'lib')
22 files changed, 197 insertions, 72 deletions
diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c index 9fe0d9da..b8e14b86 100644 --- a/lib/librte_cryptodev/rte_cryptodev.c +++ b/lib/librte_cryptodev/rte_cryptodev.c @@ -290,19 +290,40 @@ rte_cryptodev_sym_capability_get(uint8_t dev_id, } -#define param_range_check(x, y) \ - (((x < y.min) || (x > y.max)) || \ - (y.increment != 0 && (x % y.increment) != 0)) +static int +param_range_check(uint16_t size, const struct rte_crypto_param_range *range) +{ + unsigned int next_size; + + /* Check lower/upper bounds */ + if (size < range->min) + return -1; + + if (size > range->max) + return -1; + + /* If range is actually only one value, size is correct */ + if (range->increment == 0) + return 0; + + /* Check if value is one of the supported sizes */ + for (next_size = range->min; next_size <= range->max; + next_size += range->increment) + if (size == next_size) + return 0; + + return -1; +} int rte_cryptodev_sym_capability_check_cipher( const struct rte_cryptodev_symmetric_capability *capability, uint16_t key_size, uint16_t iv_size) { - if (param_range_check(key_size, capability->cipher.key_size)) + if (param_range_check(key_size, &capability->cipher.key_size) != 0) return -1; - if (param_range_check(iv_size, capability->cipher.iv_size)) + if (param_range_check(iv_size, &capability->cipher.iv_size) != 0) return -1; return 0; @@ -313,13 +334,13 @@ rte_cryptodev_sym_capability_check_auth( const struct rte_cryptodev_symmetric_capability *capability, uint16_t key_size, uint16_t digest_size, uint16_t iv_size) { - if (param_range_check(key_size, capability->auth.key_size)) + if (param_range_check(key_size, &capability->auth.key_size) != 0) return -1; - if (param_range_check(digest_size, capability->auth.digest_size)) + if (param_range_check(digest_size, &capability->auth.digest_size) != 0) return -1; - if (param_range_check(iv_size, capability->auth.iv_size)) + if (param_range_check(iv_size, &capability->auth.iv_size) != 0) return -1; return 0; @@ -331,16 +352,16 @@ rte_cryptodev_sym_capability_check_aead( uint16_t key_size, uint16_t digest_size, uint16_t aad_size, uint16_t iv_size) { - if (param_range_check(key_size, capability->aead.key_size)) + if (param_range_check(key_size, &capability->aead.key_size) != 0) return -1; - if (param_range_check(digest_size, capability->aead.digest_size)) + if (param_range_check(digest_size, &capability->aead.digest_size) != 0) return -1; - if (param_range_check(aad_size, capability->aead.aad_size)) + if (param_range_check(aad_size, &capability->aead.aad_size) != 0) return -1; - if (param_range_check(iv_size, capability->aead.iv_size)) + if (param_range_check(iv_size, &capability->aead.iv_size) != 0) return -1; return 0; diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h index 6993dd29..3a80311b 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h @@ -55,7 +55,7 @@ extern "C" { * Guarantees that the LOAD and STORE operations generated before the * barrier occur before the LOAD and STORE operations generated after. */ -#define rte_mb() {asm volatile("sync" : : : "memory"); } +#define rte_mb() asm volatile("sync" : : : "memory") /** * Write memory barrier. diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index 4616a080..99ae35e5 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -66,7 +66,7 @@ extern "C" { /** * Patch level number i.e. the z in yy.mm.z */ -#define RTE_VER_MINOR 2 +#define RTE_VER_MINOR 3 /** * Extra string to be appended to version number diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c index 16a181c3..17c20d4b 100644 --- a/lib/librte_eal/linuxapp/eal/eal_memory.c +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -491,6 +491,9 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi, hugepg_tbl[i].orig_va = virtaddr; } else { + /* rewrite physical addresses in IOVA as VA mode */ + if (rte_eal_iova_mode() == RTE_IOVA_VA) + hugepg_tbl[i].physaddr = (uintptr_t)virtaddr; hugepg_tbl[i].final_va = virtaddr; } @@ -1109,7 +1112,8 @@ rte_eal_hugepage_init(void) continue; } - if (phys_addrs_available) { + if (phys_addrs_available && + rte_eal_iova_mode() != RTE_IOVA_VA) { /* find physical addresses for each hugepage */ if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) { RTE_LOG(DEBUG, EAL, "Failed to find phys addr " diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c index fb1a6226..a75ef5a1 100644 --- a/lib/librte_eal/linuxapp/eal/eal_vfio.c +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c @@ -710,10 +710,7 @@ vfio_type1_dma_map(int vfio_container_fd) dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); dma_map.vaddr = ms[i].addr_64; dma_map.size = ms[i].len; - if (rte_eal_iova_mode() == RTE_IOVA_VA) - dma_map.iova = dma_map.vaddr; - else - dma_map.iova = ms[i].iova; + dma_map.iova = ms[i].iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); @@ -813,10 +810,7 @@ vfio_spapr_dma_map(int vfio_container_fd) dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); dma_map.vaddr = ms[i].addr_64; dma_map.size = ms[i].len; - if (rte_eal_iova_mode() == RTE_IOVA_VA) - dma_map.iova = dma_map.vaddr; - else - dma_map.iova = ms[i].iova; + dma_map.iova = ms[i].iova; dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h index 3f8c0bc8..6a6968d9 100644 --- a/lib/librte_eal/linuxapp/kni/compat.h +++ b/lib/librte_eal/linuxapp/kni/compat.h @@ -101,6 +101,11 @@ #undef NET_NAME_UNKNOWN #endif +#if (defined(RHEL_RELEASE_CODE) && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 5))) +#define ndo_change_mtu ndo_change_mtu_rh74 +#endif + #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0) #define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER #endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h index 443a3f28..6691edf1 100644 --- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h @@ -3915,7 +3915,8 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) #define HAVE_NDO_BRIDGE_GETLINK_NLFLAGS #endif /* >= 4.1.0 */ -#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0) ) +#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0) ) \ + || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,4) )) /* ndo_bridge_getlink adds new filter_mask and vlan_fill parameters */ #define HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL #endif /* >= 4.2.0 */ @@ -3933,9 +3934,15 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) #endif #if ((LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)) || \ - (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(12, 3, 0))) + (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(12, 3, 0)) || \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4))) #define HAVE_VF_VLAN_PROTO -#endif /* >= 4.9.0, >= SLES12SP3 */ +#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7, 4)) +/* In RHEL/Centos 7.4, the "new" version of ndo_set_vf_vlan + * is in the struct net_device_ops_extended */ +#define ndo_set_vf_vlan extended.ndo_set_vf_vlan +#endif +#endif #if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0) #define HAVE_PCI_ENABLE_MSIX diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c index 4d23bc1c..eea11d06 100644 --- a/lib/librte_ether/rte_ethdev.c +++ b/lib/librte_ether/rte_ethdev.c @@ -533,6 +533,12 @@ rte_eth_dev_rx_queue_stop(uint16_t port_id, uint16_t rx_queue_id) RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); dev = &rte_eth_devices[port_id]; + if (!dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be started before start any queue\n", port_id); + return -EINVAL; + } + if (rx_queue_id >= dev->data->nb_rx_queues) { RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id); return -EINVAL; @@ -585,6 +591,12 @@ rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id) RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); dev = &rte_eth_devices[port_id]; + if (!dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be started before start any queue\n", port_id); + return -EINVAL; + } + if (tx_queue_id >= dev->data->nb_tx_queues) { RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id); return -EINVAL; diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h index 2cc2eedf..eba11ca5 100644 --- a/lib/librte_ether/rte_ethdev.h +++ b/lib/librte_ether/rte_ethdev.h @@ -3125,6 +3125,9 @@ static inline int rte_eth_tx_descriptor_status(uint16_t port_id, * invoke this function concurrently on the same tx queue without SW lock. * @see rte_eth_dev_info_get, struct rte_eth_txconf::txq_flags * + * @see rte_eth_tx_prepare to perform some prior checks or adjustments + * for offloads. + * * @param port_id * The port identifier of the Ethernet device. * @param queue_id diff --git a/lib/librte_ether/rte_ethdev_pci.h b/lib/librte_ether/rte_ethdev_pci.h index ad64a169..f69316d5 100644 --- a/lib/librte_ether/rte_ethdev_pci.h +++ b/lib/librte_ether/rte_ethdev_pci.h @@ -123,9 +123,6 @@ rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size) static inline void rte_eth_dev_pci_release(struct rte_eth_dev *eth_dev) { - /* free ether device */ - rte_eth_dev_release_port(eth_dev); - if (rte_eal_process_type() == RTE_PROC_PRIMARY) rte_free(eth_dev->data->dev_private); @@ -139,6 +136,9 @@ rte_eth_dev_pci_release(struct rte_eth_dev *eth_dev) eth_dev->device = NULL; eth_dev->intr_handle = NULL; + + /* free ether device */ + rte_eth_dev_release_port(eth_dev); } typedef int (*eth_dev_pci_callback_t)(struct rte_eth_dev *eth_dev); diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 55fd7bdc..cbf78fab 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -573,7 +573,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, * Return index where key is stored, * subtracting the first dummy index */ - return prim_bkt->key_idx[i] - 1; + ret = prim_bkt->key_idx[i] - 1; + goto failure; } } } @@ -593,7 +594,8 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, * Return index where key is stored, * subtracting the first dummy index */ - return sec_bkt->key_idx[i] - 1; + ret = sec_bkt->key_idx[i] - 1; + goto failure; } } } diff --git a/lib/librte_ip_frag/rte_ipv4_reassembly.c b/lib/librte_ip_frag/rte_ipv4_reassembly.c index 040bd70a..3b84dcad 100644 --- a/lib/librte_ip_frag/rte_ipv4_reassembly.c +++ b/lib/librte_ip_frag/rte_ipv4_reassembly.c @@ -88,7 +88,9 @@ ipv4_frag_reassemble(struct ip_frag_pkt *fp) /* chain with the first fragment. */ rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m); + fp->frags[curr_idx].mb = NULL; m = fp->frags[IP_FIRST_FRAG_IDX].mb; + fp->frags[IP_FIRST_FRAG_IDX].mb = NULL; /* update mbuf fields for reassembled packet. */ m->ol_flags |= PKT_TX_IP_CKSUM; diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c index dde58cb7..26b9a883 100644 --- a/lib/librte_ip_frag/rte_ipv6_reassembly.c +++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c @@ -111,7 +111,9 @@ ipv6_frag_reassemble(struct ip_frag_pkt *fp) /* chain with the first fragment. */ rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m); + fp->frags[curr_idx].mb = NULL; m = fp->frags[IP_FIRST_FRAG_IDX].mb; + fp->frags[IP_FIRST_FRAG_IDX].mb = NULL; /* update mbuf fields for reassembled packet. */ m->ol_flags |= PKT_TX_IP_CKSUM; diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index 16a6048c..807a8d41 100644 --- a/lib/librte_mbuf/rte_mbuf.h +++ b/lib/librte_mbuf/rte_mbuf.h @@ -222,14 +222,17 @@ extern "C" { /** * Bits 45:48 used for the tunnel type. - * When doing Tx offload like TSO or checksum, the HW needs to configure the - * tunnel type into the HW descriptors. + * The tunnel type must be specified for TSO or checksum on the inner part + * of tunnel packets. + * These flags can be used with PKT_TX_TCP_SEG for TSO, or PKT_TX_xxx_CKSUM. + * The mbuf fields for inner and outer header lengths are required: + * outer_l2_len, outer_l3_len, l2_len, l3_len, l4_len and tso_segsz for TSO. */ #define PKT_TX_TUNNEL_VXLAN (0x1ULL << 45) #define PKT_TX_TUNNEL_GRE (0x2ULL << 45) #define PKT_TX_TUNNEL_IPIP (0x3ULL << 45) #define PKT_TX_TUNNEL_GENEVE (0x4ULL << 45) -/**< TX packet with MPLS-in-UDP RFC 7510 header. */ +/** TX packet with MPLS-in-UDP RFC 7510 header. */ #define PKT_TX_TUNNEL_MPLSINUDP (0x5ULL << 45) /* add new TX TUNNEL type here */ #define PKT_TX_TUNNEL_MASK (0xFULL << 45) @@ -245,12 +248,8 @@ extern "C" { * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies * PKT_TX_TCP_CKSUM) * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6 - * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum - * to 0 in the packet + * - if it's IPv4, set the PKT_TX_IP_CKSUM flag * - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz - * - calculate the pseudo header checksum without taking ip_len in account, - * and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and - * rte_ipv6_phdr_cksum() that can be used as helpers. */ #define PKT_TX_TCP_SEG (1ULL << 50) @@ -263,9 +262,6 @@ extern "C" { * - fill l2_len and l3_len in mbuf * - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6 - * - calculate the pseudo header checksum and set it in the L4 header (only - * for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum(). - * For SCTP, set the crc field to 0. */ #define PKT_TX_L4_NO_CKSUM (0ULL << 52) /**< Disable L4 cksum of TX pkt. */ #define PKT_TX_TCP_CKSUM (1ULL << 52) /**< TCP cksum of TX pkt. computed by NIC. */ @@ -277,7 +273,6 @@ extern "C" { * Offload the IP checksum in the hardware. The flag PKT_TX_IPV4 should * also be set by the application, although a PMD will only check * PKT_TX_IP_CKSUM. - * - set the IP checksum field in the packet to 0 * - fill the mbuf offload information: l2_len, l3_len */ #define PKT_TX_IP_CKSUM (1ULL << 54) @@ -302,10 +297,8 @@ extern "C" { /** * Offload the IP checksum of an external header in the hardware. The - * flag PKT_TX_OUTER_IPV4 should also be set by the application, alto ugh - * a PMD will only check PKT_TX_IP_CKSUM. The IP checksum field in the - * packet must be set to 0. - * - set the outer IP checksum field in the packet to 0 + * flag PKT_TX_OUTER_IPV4 should also be set by the application, although + * a PMD will only check PKT_TX_OUTER_IP_CKSUM. * - fill the mbuf offload information: outer_l2_len, outer_l3_len */ #define PKT_TX_OUTER_IP_CKSUM (1ULL << 58) diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c index 5bd74eac..ad174582 100644 --- a/lib/librte_mempool/rte_mempool.c +++ b/lib/librte_mempool/rte_mempool.c @@ -437,12 +437,18 @@ rte_mempool_populate_iova(struct rte_mempool *mp, char *vaddr, } /* not enough room to store one object */ - if (i == 0) - return -EINVAL; + if (i == 0) { + ret = -EINVAL; + goto fail; + } STAILQ_INSERT_TAIL(&mp->mem_list, memhdr, next); mp->nb_mem_chunks++; return i; + +fail: + rte_free(memhdr); + return ret; } int @@ -514,9 +520,6 @@ rte_mempool_populate_virt(struct rte_mempool *mp, char *addr, size_t off, phys_len; int ret, cnt = 0; - /* mempool must not be populated */ - if (mp->nb_mem_chunks != 0) - return -EEXIST; /* address and len must be page-aligned */ if (RTE_PTR_ALIGN_CEIL(addr, pg_sz) != addr) return -EINVAL; @@ -685,7 +688,7 @@ rte_mempool_populate_anon(struct rte_mempool *mp) char *addr; /* mempool is already populated, error */ - if (!STAILQ_EMPTY(&mp->mem_list)) { + if ((!STAILQ_EMPTY(&mp->mem_list)) || mp->nb_mem_chunks != 0) { rte_errno = EINVAL; return 0; } diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h index 73ec398f..23468cb9 100644 --- a/lib/librte_net/rte_ip.h +++ b/lib/librte_net/rte_ip.h @@ -406,6 +406,12 @@ struct ipv6_hdr { uint8_t dst_addr[16]; /**< IP address of destination host(s). */ } __attribute__((__packed__)); +/* IPv6 vtc_flow: IPv / TC / flow_label */ +#define IPV6_HDR_FL_SHIFT 0 +#define IPV6_HDR_TC_SHIFT 20 +#define IPV6_HDR_FL_MASK ((1u << IPV6_HDR_TC_SHIFT) - 1) +#define IPV6_HDR_TC_MASK (0xf << IPV6_HDR_TC_SHIFT) + /** * Process the pseudo-header checksum of an IPv6 header. * diff --git a/lib/librte_pci/rte_pci_version.map b/lib/librte_pci/rte_pci_version.map index 15d93d95..c0280277 100644 --- a/lib/librte_pci/rte_pci_version.map +++ b/lib/librte_pci/rte_pci_version.map @@ -3,12 +3,11 @@ DPDK_17.11 { eal_parse_pci_BDF; eal_parse_pci_DomBDF; - rte_pci_addr_cmp; - rte_pci_addr_parse; - rte_pci_device_name; pci_map_resource; pci_unmap_resource; rte_eal_compare_pci_addr; + rte_pci_addr_cmp; + rte_pci_addr_parse; rte_pci_device_name; local: *; diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c index 4c6fed41..173ec75c 100644 --- a/lib/librte_vhost/fd_man.c +++ b/lib/librte_vhost/fd_man.c @@ -200,6 +200,38 @@ fdset_del(struct fdset *pfdset, int fd) return dat; } +/** + * Unregister the fd from the fdset. + * + * If parameters are invalid, return directly -2. + * And check whether fd is busy, if yes, return -1. + * Otherwise, try to delete the fd from fdset and + * return true. + */ +int +fdset_try_del(struct fdset *pfdset, int fd) +{ + int i; + + if (pfdset == NULL || fd == -1) + return -2; + + pthread_mutex_lock(&pfdset->fd_mutex); + i = fdset_find_fd(pfdset, fd); + if (i != -1 && pfdset->fd[i].busy) { + pthread_mutex_unlock(&pfdset->fd_mutex); + return -1; + } + + if (i != -1) { + pfdset->fd[i].fd = -1; + pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL; + pfdset->fd[i].dat = NULL; + } + + pthread_mutex_unlock(&pfdset->fd_mutex); + return 0; +} /** * This functions runs in infinite blocking loop until there is no fd in diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h index 90d34db1..c0088388 100644 --- a/lib/librte_vhost/fd_man.h +++ b/lib/librte_vhost/fd_man.h @@ -63,6 +63,7 @@ int fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat); void *fdset_del(struct fdset *pfdset, int fd); +int fdset_try_del(struct fdset *pfdset, int fd); void *fdset_event_dispatch(void *arg); diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c index 811e6bf1..f816601b 100644 --- a/lib/librte_vhost/socket.c +++ b/lib/librte_vhost/socket.c @@ -181,6 +181,11 @@ send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) msgh.msg_control = control; msgh.msg_controllen = sizeof(control); cmsg = CMSG_FIRSTHDR(&msgh); + if (cmsg == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, "cmsg == NULL\n"); + errno = EINVAL; + return -1; + } cmsg->cmsg_len = CMSG_LEN(fdsize); cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_RIGHTS; @@ -756,13 +761,25 @@ rte_vhost_driver_unregister(const char *path) vhost_user_remove_reconnect(vsocket); } +again: pthread_mutex_lock(&vsocket->conn_mutex); for (conn = TAILQ_FIRST(&vsocket->conn_list); conn != NULL; conn = next) { next = TAILQ_NEXT(conn, next); - fdset_del(&vhost_user.fdset, conn->connfd); + /* + * If r/wcb is executing, release the + * conn_mutex lock, and try again since + * the r/wcb may use the conn_mutex lock. + */ + if (fdset_try_del(&vhost_user.fdset, + conn->connfd) == -1) { + pthread_mutex_unlock( + &vsocket->conn_mutex); + goto again; + } + RTE_LOG(INFO, VHOST_CONFIG, "free connfd = %d for device '%s'\n", conn->connfd, path); diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c index 50e654db..c0efb310 100644 --- a/lib/librte_vhost/vhost_user.c +++ b/lib/librte_vhost/vhost_user.c @@ -476,7 +476,7 @@ vhost_user_set_vring_addr(struct virtio_net **pdev, VhostUserMsg *msg) if (vq->enabled && (dev->features & (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) { - dev = translate_ring_addresses(dev, msg->payload.state.index); + dev = translate_ring_addresses(dev, msg->payload.addr.index); if (!dev) return -1; @@ -501,7 +501,7 @@ vhost_user_set_vring_base(struct virtio_net *dev, return 0; } -static void +static int add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, uint64_t host_phys_addr, uint64_t size) { @@ -511,6 +511,10 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, dev->max_guest_pages *= 2; dev->guest_pages = realloc(dev->guest_pages, dev->max_guest_pages * sizeof(*page)); + if (!dev->guest_pages) { + RTE_LOG(ERR, VHOST_CONFIG, "cannot realloc guest_pages\n"); + return -1; + } } if (dev->nr_guest_pages > 0) { @@ -519,7 +523,7 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, if (host_phys_addr == last_page->host_phys_addr + last_page->size) { last_page->size += size; - return; + return 0; } } @@ -527,9 +531,11 @@ add_one_guest_page(struct virtio_net *dev, uint64_t guest_phys_addr, page->guest_phys_addr = guest_phys_addr; page->host_phys_addr = host_phys_addr; page->size = size; + + return 0; } -static void +static int add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, uint64_t page_size) { @@ -543,7 +549,9 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, size = page_size - (guest_phys_addr & (page_size - 1)); size = RTE_MIN(size, reg_size); - add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size); + if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size) < 0) + return -1; + host_user_addr += size; guest_phys_addr += size; reg_size -= size; @@ -552,12 +560,16 @@ add_guest_pages(struct virtio_net *dev, struct rte_vhost_mem_region *reg, size = RTE_MIN(reg_size, page_size); host_phys_addr = rte_mem_virt2iova((void *)(uintptr_t) host_user_addr); - add_one_guest_page(dev, guest_phys_addr, host_phys_addr, size); + if (add_one_guest_page(dev, guest_phys_addr, host_phys_addr, + size) < 0) + return -1; host_user_addr += size; guest_phys_addr += size; reg_size -= size; } + + return 0; } #ifdef RTE_LIBRTE_VHOST_DEBUG @@ -705,7 +717,12 @@ vhost_user_set_mem_table(struct virtio_net *dev, struct VhostUserMsg *pmsg) mmap_offset; if (dev->dequeue_zero_copy) - add_guest_pages(dev, reg, alignment); + if (add_guest_pages(dev, reg, alignment) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "adding guest pages to region %u failed.\n", + i); + goto err_mmap; + } RTE_LOG(INFO, VHOST_CONFIG, "guest memory region %u, size: 0x%" PRIx64 "\n" @@ -857,8 +874,8 @@ vhost_user_get_vring_base(struct virtio_net *dev, dev->flags &= ~VIRTIO_DEV_READY; - /* Here we are safe to get the last used index */ - msg->payload.state.num = vq->last_used_idx; + /* Here we are safe to get the last avail index */ + msg->payload.state.num = vq->last_avail_idx; RTE_LOG(INFO, VHOST_CONFIG, "vring base idx:%d file:%d\n", msg->payload.state.index, @@ -873,6 +890,11 @@ vhost_user_get_vring_base(struct virtio_net *dev, vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; + if (vq->callfd >= 0) + close(vq->callfd); + + vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; + if (dev->dequeue_zero_copy) free_zmbufs(vq); rte_free(vq->shadow_used_ring); @@ -967,7 +989,7 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg) * mmap from 0 to workaround a hugepage mmap bug: mmap will * fail when offset is not page size aligned. */ - addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); close(fd); if (addr == MAP_FAILED) { RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n"); diff --git a/lib/librte_vhost/virtio_net.c b/lib/librte_vhost/virtio_net.c index a013c07b..ecfabca3 100644 --- a/lib/librte_vhost/virtio_net.c +++ b/lib/librte_vhost/virtio_net.c @@ -297,7 +297,7 @@ copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_memcpy((void *)(uintptr_t)dst, (void *)(uintptr_t)src, len); - PRINT_PACKET(dev, (uintptr_t)dst, len, 0); + PRINT_PACKET(dev, (uintptr_t)dst, (uint32_t)len, 0); vhost_log_write(dev, guest_addr, len); remain -= len; guest_addr += len; @@ -796,7 +796,7 @@ copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, len); PRINT_PACKET(dev, (uintptr_t)dst, - len, 0); + (uint32_t)len, 0); vhost_log_write(dev, guest_addr, len); remain -= len; @@ -1234,7 +1234,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, rte_prefetch0((void *)(uintptr_t)(desc_addr + desc_offset)); PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), - desc_chunck_len, 0); + (uint32_t)desc_chunck_len, 0); mbuf_offset = 0; mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; @@ -1322,7 +1322,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, desc_avail = desc->len; PRINT_PACKET(dev, (uintptr_t)desc_addr, - desc_chunck_len, 0); + (uint32_t)desc_chunck_len, 0); } else if (unlikely(desc_chunck_len == 0)) { desc_chunck_len = desc_avail; desc_gaddr += desc_offset; @@ -1337,7 +1337,7 @@ copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, desc_offset = 0; PRINT_PACKET(dev, (uintptr_t)desc_addr, - desc_chunck_len, 0); + (uint32_t)desc_chunck_len, 0); } /* |