diff options
Diffstat (limited to 'drivers/net/mlx5')
-rw-r--r-- | drivers/net/mlx5/mlx5.c | 39 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5.h | 20 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_defs.h | 15 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_ethdev.c | 5 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_mr.c | 583 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxq.c | 38 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxtx.c | 3 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxtx.h | 188 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxtx_vec.c | 2 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxtx_vec.h | 6 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 2 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxtx_vec_sse.h | 2 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_stats.c | 3 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_trigger.c | 14 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_txq.c | 23 |
15 files changed, 525 insertions, 418 deletions
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 36f3a056..e117ec84 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -78,6 +78,12 @@ */ #define MLX5_TXQS_MIN_INLINE "txqs_min_inline" +/* + * Device parameter to configure the number of TX queues threshold for + * enabling vectorized Tx. + */ +#define MLX5_TXQS_MAX_VEC "txqs_max_vec" + /* Device parameter to enable multi-packet send WQEs. */ #define MLX5_TXQ_MPW_EN "txq_mpw_en" @@ -112,6 +118,7 @@ struct mlx5_args { int cqe_comp; int txq_inline; int txqs_inline; + int txqs_vec; int mps; int mpw_hdr_dseg; int inline_max_packet_sz; @@ -236,6 +243,7 @@ mlx5_dev_close(struct rte_eth_dev *dev) priv->txqs_n = 0; priv->txqs = NULL; } + mlx5_mr_deregister_memseg(dev); if (priv->pd != NULL) { assert(priv->ctx != NULL); claim_zero(ibv_dealloc_pd(priv->pd)); @@ -276,10 +284,6 @@ mlx5_dev_close(struct rte_eth_dev *dev) if (ret) DRV_LOG(WARNING, "port %u some flows still remain", dev->data->port_id); - ret = mlx5_mr_verify(dev); - if (ret) - DRV_LOG(WARNING, "port %u some memory region still remain", - dev->data->port_id); memset(priv, 0, sizeof(*priv)); } @@ -442,6 +446,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque) args->txq_inline = tmp; } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { args->txqs_inline = tmp; + } else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) { + args->txqs_vec = tmp; } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { args->mps = !!tmp; } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) { @@ -480,6 +486,7 @@ mlx5_args(struct mlx5_args *args, struct rte_devargs *devargs) MLX5_RXQ_CQE_COMP_EN, MLX5_TXQ_INLINE, MLX5_TXQS_MIN_INLINE, + MLX5_TXQS_MAX_VEC, MLX5_TXQ_MPW_EN, MLX5_TXQ_MPW_HDR_DSEG_EN, MLX5_TXQ_MAX_INLINE_LEN, @@ -640,8 +647,17 @@ mlx5_args_assign(struct priv *priv, struct mlx5_args *args) priv->txq_inline = args->txq_inline; if (args->txqs_inline != MLX5_ARG_UNSET) priv->txqs_inline = args->txqs_inline; - if (args->mps != MLX5_ARG_UNSET) + if (args->txqs_vec != MLX5_ARG_UNSET) + priv->txqs_vec = args->txqs_vec; + if (args->mps != MLX5_ARG_UNSET) { priv->mps = args->mps ? priv->mps : 0; + } else if (priv->mps == MLX5_MPW) { + /* + * MPW is disabled by default, while the Enhanced MPW is enabled + * by default. + */ + priv->mps = MLX5_MPW_DISABLED; + } if (args->mpw_hdr_dseg != MLX5_ARG_UNSET) priv->mpw_hdr_dseg = args->mpw_hdr_dseg; if (args->inline_max_packet_sz != MLX5_ARG_UNSET) @@ -680,6 +696,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, unsigned int mps; unsigned int cqe_comp; unsigned int tunnel_en = 0; + unsigned int txqs_vec = MLX5_VPMD_MAX_TXQS; int idx; int i; struct mlx5dv_context attrs_out; @@ -726,8 +743,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, continue; switch (pci_dev->id.device_id) { case PCI_DEVICE_ID_MELLANOX_CONNECTX4: - tunnel_en = 1; - break; case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX: case PCI_DEVICE_ID_MELLANOX_CONNECTX5: case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: @@ -735,6 +750,10 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: tunnel_en = 1; break; + case PCI_DEVICE_ID_MELLANOX_CONNECTX5BF: + txqs_vec = MLX5_VPMD_MAX_TXQS_BLUEFIELD; + tunnel_en = 1; + break; default: break; } @@ -805,6 +824,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, .cqe_comp = MLX5_ARG_UNSET, .txq_inline = MLX5_ARG_UNSET, .txqs_inline = MLX5_ARG_UNSET, + .txqs_vec = MLX5_ARG_UNSET, .mps = MLX5_ARG_UNSET, .mpw_hdr_dseg = MLX5_ARG_UNSET, .inline_max_packet_sz = MLX5_ARG_UNSET, @@ -908,6 +928,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, /* Enable vector by default if supported. */ priv->tx_vec_en = 1; priv->rx_vec_en = 1; + priv->txqs_vec = txqs_vec; err = mlx5_args(&args, pci_dev->device.devargs); if (err) { DRV_LOG(ERR, "failed to process device arguments: %s", @@ -1154,6 +1175,10 @@ static const struct rte_pci_id mlx5_pci_id_map[] = { PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF) }, { + RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, + PCI_DEVICE_ID_MELLANOX_CONNECTX5BF) + }, + { .vendor_id = 0 } }; diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index 5e6027b8..08b667f9 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -77,6 +77,7 @@ enum { PCI_DEVICE_ID_MELLANOX_CONNECTX5VF = 0x1018, PCI_DEVICE_ID_MELLANOX_CONNECTX5EX = 0x1019, PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a, + PCI_DEVICE_ID_MELLANOX_CONNECTX5BF = 0xa2d2, }; struct mlx5_xstats_ctrl { @@ -138,6 +139,7 @@ struct priv { unsigned int max_tso_payload_sz; /* Maximum TCP payload for TSO. */ unsigned int txq_inline; /* Maximum packet size for inlining. */ unsigned int txqs_inline; /* Queue number threshold for inlining. */ + unsigned int txqs_vec; /* Queue number threshold for vectorized Tx. */ unsigned int inline_max_packet_sz; /* Max packet size for inlining. */ /* RX/TX queues. */ unsigned int rxqs_n; /* RX queues array size. */ @@ -152,7 +154,9 @@ struct priv { struct mlx5_hrxq_drop *flow_drop_queue; /* Flow drop queue. */ struct mlx5_flows flows; /* RTE Flow rules. */ struct mlx5_flows ctrl_flows; /* Control flow rules. */ - LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */ + struct mlx5_mr (*mr)[]; /* Static MR table. */ + struct mlx5_mr_cache (*mr_cache)[]; /* Global MR cache table. */ + unsigned int mr_n; /* Size of static MR table. */ LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */ LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */ LIST_HEAD(hrxq, mlx5_hrxq) hrxqs; /* Verbs Hash Rx queues. */ @@ -301,16 +305,14 @@ void mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev); /* mlx5_socket.c */ -int mlx5_socket_init(struct rte_eth_dev *priv); -void mlx5_socket_uninit(struct rte_eth_dev *priv); -void mlx5_socket_handle(struct rte_eth_dev *priv); -int mlx5_socket_connect(struct rte_eth_dev *priv); +int mlx5_socket_init(struct rte_eth_dev *dev); +void mlx5_socket_uninit(struct rte_eth_dev *dev); +void mlx5_socket_handle(struct rte_eth_dev *dev); +int mlx5_socket_connect(struct rte_eth_dev *dev); /* mlx5_mr.c */ -struct mlx5_mr *mlx5_mr_new(struct rte_eth_dev *dev, struct rte_mempool *mp); -struct mlx5_mr *mlx5_mr_get(struct rte_eth_dev *dev, struct rte_mempool *mp); -int mlx5_mr_release(struct mlx5_mr *mr); -int mlx5_mr_verify(struct rte_eth_dev *dev); +int mlx5_mr_register_memseg(struct rte_eth_dev *dev); +void mlx5_mr_deregister_memseg(struct rte_eth_dev *dev); #endif /* RTE_PMD_MLX5_H_ */ diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h index 9c64bb33..1de3bdc4 100644 --- a/drivers/net/mlx5/mlx5_defs.h +++ b/drivers/net/mlx5/mlx5_defs.h @@ -88,8 +88,13 @@ /* Maximum Packet headers size (L2+L3+L4) for TSO. */ #define MLX5_MAX_TSO_HEADER 128 -/* Default minimum number of Tx queues for vectorized Tx. */ -#define MLX5_VPMD_MIN_TXQS 4 +/* Default maximum number of Tx queues for vectorized Tx. */ +#if defined(RTE_ARCH_ARM64) +#define MLX5_VPMD_MAX_TXQS 8 +#else +#define MLX5_VPMD_MAX_TXQS 4 +#endif +#define MLX5_VPMD_MAX_TXQS_BLUEFIELD 16 /* Threshold of buffer replenishment for vectorized Rx. */ #define MLX5_VPMD_RXQ_RPLNSH_THRESH(n) \ @@ -124,6 +129,12 @@ */ #define MLX5_UAR_OFFSET (1ULL << 32) +/* Size of per-queue MR cache table. */ +#define MLX5_MR_CACHE_N 8 + +/* First entry must be NULL for comparison. */ +#define MLX5_MR_LOOKUP_TABLE_PAD 1 + /* Definition of static_assert found in /usr/include/assert.h */ #ifndef HAVE_STATIC_ASSERT #define static_assert _Static_assert diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index e441483a..198c30b3 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -408,6 +408,11 @@ mlx5_dev_configure(struct rte_eth_dev *dev) ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); if (ret) return ret; + if (mlx5_mr_register_memseg(dev)) { + DRV_LOG(ERR, "%p: MR registration failed", (void *)dev); + rte_errno = ENOMEM; + return -rte_errno; + } /* When the number of RX queues is not a power of two, the remaining * table entries are padded with reused WQs and hashes are not spread * uniformly. */ diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c index a50c5208..c3410a62 100644 --- a/drivers/net/mlx5/mlx5_mr.c +++ b/drivers/net/mlx5/mlx5_mr.c @@ -47,355 +47,398 @@ #include "mlx5.h" #include "mlx5_rxtx.h" -struct mlx5_check_mempool_data { - int ret; - char *start; - char *end; +struct mr_update_mempool_data { + struct rte_eth_dev *dev; + struct mlx5_mr_cache *lkp_tbl; + uint16_t tbl_sz; }; -/* Called by mlx5_check_mempool() when iterating the memory chunks. */ -static void -mlx5_check_mempool_cb(struct rte_mempool *mp __rte_unused, - void *opaque, struct rte_mempool_memhdr *memhdr, - unsigned int mem_idx __rte_unused) +/** + * Look up LKEY from given lookup table by Binary Search, store the last index + * and return searched LKEY. + * + * @param lkp_tbl + * Pointer to lookup table. + * @param n + * Size of lookup table. + * @param[out] idx + * Pointer to index. Even on searh failure, returns index where it stops + * searching so that index can be used when inserting a new entry. + * @param addr + * Search key. + * + * @return + * Searched LKEY on success, UINT32_MAX on no match. + */ +static uint32_t +mlx5_mr_lookup(struct mlx5_mr_cache *lkp_tbl, uint16_t n, uint16_t *idx, + uintptr_t addr) { - struct mlx5_check_mempool_data *data = opaque; + uint16_t base = 0; - /* It already failed, skip the next chunks. */ - if (data->ret != 0) - return; - /* It is the first chunk. */ - if (data->start == NULL && data->end == NULL) { - data->start = memhdr->addr; - data->end = data->start + memhdr->len; - return; - } - if (data->end == memhdr->addr) { - data->end += memhdr->len; - return; - } - if (data->start == (char *)memhdr->addr + memhdr->len) { - data->start -= memhdr->len; - return; - } - /* Error, mempool is not virtually contiguous. */ - data->ret = -1; + /* First entry must be NULL for comparison. */ + assert(n == 0 || (lkp_tbl[0].start == 0 && + lkp_tbl[0].lkey == UINT32_MAX)); + /* Binary search. */ + do { + register uint16_t delta = n >> 1; + + if (addr < lkp_tbl[base + delta].start) { + n = delta; + } else { + base += delta; + n -= delta; + } + } while (n > 1); + assert(addr >= lkp_tbl[base].start); + *idx = base; + if (addr < lkp_tbl[base].end) + return lkp_tbl[base].lkey; + /* Not found. */ + return UINT32_MAX; } /** - * Check if a mempool can be used: it must be virtually contiguous. + * Insert an entry to LKEY lookup table. * - * @param[in] mp - * Pointer to memory pool. - * @param[out] start - * Pointer to the start address of the mempool virtual memory area - * @param[out] end - * Pointer to the end address of the mempool virtual memory area + * @param lkp_tbl + * Pointer to lookup table. The size of array must be enough to add one more + * entry. + * @param n + * Size of lookup table. + * @param entry + * Pointer to new entry to insert. * * @return - * 0 on success (mempool is virtually contiguous), -1 on error. + * Size of returning lookup table. */ static int -mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start, - uintptr_t *end) +mlx5_mr_insert(struct mlx5_mr_cache *lkp_tbl, uint16_t n, + struct mlx5_mr_cache *entry) { - struct mlx5_check_mempool_data data; + uint16_t idx = 0; + size_t shift; - memset(&data, 0, sizeof(data)); - rte_mempool_mem_iter(mp, mlx5_check_mempool_cb, &data); - *start = (uintptr_t)data.start; - *end = (uintptr_t)data.end; - return data.ret; + /* Check if entry exist. */ + if (mlx5_mr_lookup(lkp_tbl, n, &idx, entry->start) != UINT32_MAX) + return n; + /* Insert entry. */ + ++idx; + shift = (n - idx) * sizeof(struct mlx5_mr_cache); + if (shift) + memmove(&lkp_tbl[idx + 1], &lkp_tbl[idx], shift); + lkp_tbl[idx] = *entry; + DRV_LOG(DEBUG, "%p: inserted lkp_tbl[%u], start = 0x%lx, end = 0x%lx", + (void *)lkp_tbl, idx, lkp_tbl[idx].start, lkp_tbl[idx].end); + return n + 1; } /** - * Register a Memory Region (MR) <-> Memory Pool (MP) association in - * txq->mp2mr[]. If mp2mr[] is full, remove an entry first. + * Incrementally update LKEY lookup table for a specific address from registered + * Memory Regions. * - * @param txq - * Pointer to TX queue structure. - * @param[in] mp - * Memory Pool for which a Memory Region lkey must be returned. - * @param idx - * Index of the next available entry. + * @param dev + * Pointer to Ethernet device structure. + * @param lkp_tbl + * Pointer to lookup table to fill. The size of array must be at least + * (priv->mr_n + 1). + * @param n + * Size of lookup table. + * @param addr + * Search key. * * @return - * mr on success, NULL on failure and rte_errno is set. + * Size of returning lookup table. */ -struct mlx5_mr * -mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp, - unsigned int idx) +static int +mlx5_mr_update_addr(struct rte_eth_dev *dev, struct mlx5_mr_cache *lkp_tbl, + uint16_t n, uintptr_t addr) { - struct mlx5_txq_ctrl *txq_ctrl = - container_of(txq, struct mlx5_txq_ctrl, txq); - struct rte_eth_dev *dev; - struct mlx5_mr *mr; + struct priv *priv = dev->data->dev_private; + uint16_t idx; + uint32_t ret __rte_unused; - rte_spinlock_lock(&txq_ctrl->priv->mr_lock); - /* Add a new entry, register MR first. */ - DRV_LOG(DEBUG, "port %u discovered new memory pool \"%s\" (%p)", - PORT_ID(txq_ctrl->priv), mp->name, (void *)mp); - dev = ETH_DEV(txq_ctrl->priv); - mr = mlx5_mr_get(dev, mp); - if (mr == NULL) { - if (rte_eal_process_type() != RTE_PROC_PRIMARY) { - DRV_LOG(DEBUG, - "port %u using unregistered mempool 0x%p(%s)" - " in secondary process, please create mempool" - " before rte_eth_dev_start()", - PORT_ID(txq_ctrl->priv), (void *)mp, mp->name); - rte_spinlock_unlock(&txq_ctrl->priv->mr_lock); - rte_errno = ENOTSUP; - return NULL; - } - mr = mlx5_mr_new(dev, mp); - } - if (unlikely(mr == NULL)) { - DRV_LOG(DEBUG, - "port %u unable to configure memory region," - " ibv_reg_mr() failed.", - PORT_ID(txq_ctrl->priv)); - rte_spinlock_unlock(&txq_ctrl->priv->mr_lock); - return NULL; + if (n == 0) { + /* First entry must be NULL for comparison. */ + lkp_tbl[n++] = (struct mlx5_mr_cache) { + .lkey = UINT32_MAX, + }; } - if (unlikely(idx == RTE_DIM(txq->mp2mr))) { - /* Table is full, remove oldest entry. */ - DRV_LOG(DEBUG, - "port %u memory region <-> memory pool table full, " - " dropping oldest entry", - PORT_ID(txq_ctrl->priv)); - --idx; - mlx5_mr_release(txq->mp2mr[0]); - memmove(&txq->mp2mr[0], &txq->mp2mr[1], - (sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0]))); - } - /* Store the new entry. */ - txq_ctrl->txq.mp2mr[idx] = mr; - DRV_LOG(DEBUG, - "port %u new memory region lkey for MP \"%s\" (%p): 0x%08" - PRIu32, - PORT_ID(txq_ctrl->priv), mp->name, (void *)mp, - txq_ctrl->txq.mp2mr[idx]->lkey); - rte_spinlock_unlock(&txq_ctrl->priv->mr_lock); - return mr; + ret = mlx5_mr_lookup(*priv->mr_cache, MR_TABLE_SZ(priv->mr_n), + &idx, addr); + /* Lookup must succeed, the global cache is all-inclusive. */ + assert(ret != UINT32_MAX); + DRV_LOG(DEBUG, "port %u adding LKEY (0x%x) for addr 0x%lx", + dev->data->port_id, (*priv->mr_cache)[idx].lkey, addr); + return mlx5_mr_insert(lkp_tbl, n, &(*priv->mr_cache)[idx]); } -struct mlx5_mp2mr_mbuf_check_data { - int ret; -}; - /** - * Callback function for rte_mempool_obj_iter() to check whether a given - * mempool object looks like a mbuf. + * Bottom-half of LKEY search on datapath. Firstly search in cache_bh[] and if + * misses, search in the global MR cache table and update the new entry to + * per-queue local caches. * - * @param[in] mp - * The mempool pointer - * @param[in] arg - * Context data (struct txq_mp2mr_mbuf_check_data). Contains the - * return value. - * @param[in] obj - * Object address. - * @param index - * Object index, unused. + * @param dev + * Pointer to Ethernet device structure. + * @param mr_ctrl + * Pointer to per-queue MR control structure. + * @param addr + * Search key. + * + * @return + * LKEY on success. */ -static void -txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj, - uint32_t index __rte_unused) +static inline uint32_t +mlx5_mr_mb2mr_bh(struct rte_eth_dev *dev, struct mlx5_mr_ctrl *mr_ctrl, + uintptr_t addr) { - struct mlx5_mp2mr_mbuf_check_data *data = arg; - struct rte_mbuf *buf = obj; + uint32_t lkey; + uint16_t bh_idx = 0; + struct mlx5_mr_cache *mr_cache = &mr_ctrl->cache[mr_ctrl->head]; - /* - * Check whether mbuf structure fits element size and whether mempool - * pointer is valid. - */ - if (sizeof(*buf) > mp->elt_size || buf->pool != mp) - data->ret = -1; + /* Binary-search MR translation table. */ + lkey = mlx5_mr_lookup(*mr_ctrl->cache_bh, mr_ctrl->bh_n, &bh_idx, addr); + if (likely(lkey != UINT32_MAX)) { + /* Update cache. */ + *mr_cache = (*mr_ctrl->cache_bh)[bh_idx]; + mr_ctrl->mru = mr_ctrl->head; + /* Point to the next victim, the oldest. */ + mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N; + return lkey; + } + /* Missed in the per-queue lookup table. Search in the global cache. */ + mr_ctrl->bh_n = mlx5_mr_update_addr(dev, *mr_ctrl->cache_bh, + mr_ctrl->bh_n, addr); + /* Search again with updated entries. */ + lkey = mlx5_mr_lookup(*mr_ctrl->cache_bh, mr_ctrl->bh_n, &bh_idx, addr); + /* Must always succeed. */ + assert(lkey != UINT32_MAX); + /* Update cache. */ + *mr_cache = (*mr_ctrl->cache_bh)[bh_idx]; + mr_ctrl->mru = mr_ctrl->head; + /* Point to the next victim, the oldest. */ + mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N; + return lkey; } /** - * Iterator function for rte_mempool_walk() to register existing mempools and - * fill the MP to MR cache of a TX queue. + * Bottom-half of mlx5_rx_mb2mr() if search on mr_cache_bh[] fails. * - * @param[in] mp - * Memory Pool to register. - * @param *arg - * Pointer to TX queue structure. + * @param rxq + * Pointer to Rx queue structure. + * @param addr + * Search key. + * + * @return + * LKEY on success. */ -void -mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg) +uint32_t +mlx5_rx_mb2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr) { - struct priv *priv = (struct priv *)arg; - struct mlx5_mp2mr_mbuf_check_data data = { - .ret = 0, - }; - struct mlx5_mr *mr; + struct mlx5_rxq_ctrl *rxq_ctrl = + container_of(rxq, struct mlx5_rxq_ctrl, rxq); - /* Register mempool only if the first element looks like a mbuf. */ - if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 || - data.ret == -1) - return; - mr = mlx5_mr_get(ETH_DEV(priv), mp); - if (mr) { - mlx5_mr_release(mr); - return; - } - mr = mlx5_mr_new(ETH_DEV(priv), mp); - if (!mr) - DRV_LOG(ERR, "port %u cannot create memory region: %s", - PORT_ID(priv), strerror(rte_errno)); + DRV_LOG(DEBUG, + "port %u not found in rxq->mr_cache[], last-hit=%u, head=%u", + PORT_ID(rxq_ctrl->priv), rxq->mr_ctrl.mru, rxq->mr_ctrl.head); + return mlx5_mr_mb2mr_bh(ETH_DEV(rxq_ctrl->priv), &rxq->mr_ctrl, addr); } /** - * Register a new memory region from the mempool and store it in the memory - * region list. + * Bottom-half of mlx5_tx_mb2mr() if search on cache_bh[] fails. * - * @param dev - * Pointer to Ethernet device. - * @param mp - * Pointer to the memory pool to register. + * @param txq + * Pointer to Tx queue structure. + * @param addr + * Search key. * * @return - * The memory region on success, NULL on failure and rte_errno is set. + * LKEY on success. */ -struct mlx5_mr * -mlx5_mr_new(struct rte_eth_dev *dev, struct rte_mempool *mp) +uint32_t +mlx5_tx_mb2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr) { - struct priv *priv = dev->data->dev_private; - const struct rte_memseg *ms = rte_eal_get_physmem_layout(); - uintptr_t start; - uintptr_t end; - unsigned int i; - struct mlx5_mr *mr; - - mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id); - if (!mr) { - DRV_LOG(DEBUG, - "port %u unable to configure memory region," - " ibv_reg_mr() failed.", - dev->data->port_id); - rte_errno = ENOMEM; - return NULL; - } - if (mlx5_check_mempool(mp, &start, &end) != 0) { - DRV_LOG(ERR, "port %u mempool %p: not virtually contiguous", - dev->data->port_id, (void *)mp); - rte_errno = ENOMEM; - return NULL; - } - DRV_LOG(DEBUG, "port %u mempool %p area start=%p end=%p size=%zu", - dev->data->port_id, (void *)mp, (void *)start, (void *)end, - (size_t)(end - start)); - /* Save original addresses for exact MR lookup. */ - mr->start = start; - mr->end = end; - /* Round start and end to page boundary if found in memory segments. */ - for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) { - uintptr_t addr = (uintptr_t)ms[i].addr; - size_t len = ms[i].len; - unsigned int align = ms[i].hugepage_sz; + struct mlx5_txq_ctrl *txq_ctrl = + container_of(txq, struct mlx5_txq_ctrl, txq); - if ((start > addr) && (start < addr + len)) - start = RTE_ALIGN_FLOOR(start, align); - if ((end > addr) && (end < addr + len)) - end = RTE_ALIGN_CEIL(end, align); - } DRV_LOG(DEBUG, - "port %u mempool %p using start=%p end=%p size=%zu for memory" - " region", - dev->data->port_id, (void *)mp, (void *)start, (void *)end, - (size_t)(end - start)); - mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start, - IBV_ACCESS_LOCAL_WRITE); - if (!mr->mr) { - rte_errno = ENOMEM; - return NULL; - } - mr->mp = mp; - mr->lkey = rte_cpu_to_be_32(mr->mr->lkey); - rte_atomic32_inc(&mr->refcnt); - DRV_LOG(DEBUG, "port %u new memory Region %p refcnt: %d", - dev->data->port_id, (void *)mr, rte_atomic32_read(&mr->refcnt)); - LIST_INSERT_HEAD(&priv->mr, mr, next); - return mr; + "port %u not found in txq->mr_cache[], last-hit=%u, head=%u", + PORT_ID(txq_ctrl->priv), txq->mr_ctrl.mru, txq->mr_ctrl.head); + return mlx5_mr_mb2mr_bh(ETH_DEV(txq_ctrl->priv), &txq->mr_ctrl, addr); +} + +/* Called by mr_update_mempool() when iterating the memory chunks. */ +static void +mr_update_mempool_cb(struct rte_mempool *mp __rte_unused, + void *opaque, struct rte_mempool_memhdr *memhdr, + unsigned int mem_idx __rte_unused) +{ + struct mr_update_mempool_data *data = opaque; + + DRV_LOG(DEBUG, "port %u adding chunk[%u] of %s", + data->dev->data->port_id, mem_idx, mp->name); + data->tbl_sz = + mlx5_mr_update_addr(data->dev, data->lkp_tbl, data->tbl_sz, + (uintptr_t)memhdr->addr); } /** - * Search the memory region object in the memory region list. + * Incrementally update LKEY lookup table for a specific Memory Pool from + * registered Memory Regions. * * @param dev * Pointer to Ethernet device. - * @param mp - * Pointer to the memory pool to register. + * @param[out] lkp_tbl + * Pointer to lookup table to fill. The size of array must be at least + * (priv->static_mr_n + 1). + * @param n + * Size of lookup table. + * @param[in] mp + * Pointer to Memory Pool. * * @return - * The memory region on success. + * Size of returning lookup table. */ -struct mlx5_mr * -mlx5_mr_get(struct rte_eth_dev *dev, struct rte_mempool *mp) +int +mlx5_mr_update_mp(struct rte_eth_dev *dev, struct mlx5_mr_cache *lkp_tbl, + uint16_t n, struct rte_mempool *mp) { - struct priv *priv = dev->data->dev_private; - struct mlx5_mr *mr; + struct mr_update_mempool_data data = { + .dev = dev, + .lkp_tbl = lkp_tbl, + .tbl_sz = n + }; - assert(mp); - if (LIST_EMPTY(&priv->mr)) - return NULL; - LIST_FOREACH(mr, &priv->mr, next) { - if (mr->mp == mp) { - rte_atomic32_inc(&mr->refcnt); - DRV_LOG(DEBUG, "port %u memory region %p refcnt: %d", - dev->data->port_id, (void *)mr, - rte_atomic32_read(&mr->refcnt)); - return mr; - } - } - return NULL; + rte_mempool_mem_iter(mp, mr_update_mempool_cb, &data); + return data.tbl_sz; +} + +/* Called by qsort() to compare MR entries. */ +static int +mr_comp_addr(const void *m1, const void *m2) +{ + const struct mlx5_mr *mi1 = m1; + const struct mlx5_mr *mi2 = m2; + + if (mi1->memseg->addr < mi2->memseg->addr) + return -1; + else if (mi1->memseg->addr > mi2->memseg->addr) + return 1; + else + return 0; } /** - * Release the memory region object. + * Register entire physical memory to Verbs. * - * @param mr - * Pointer to memory region to release. + * @param dev + * Pointer to Ethernet device. * * @return - * 1 while a reference on it exists, 0 when freed. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -mlx5_mr_release(struct mlx5_mr *mr) +mlx5_mr_register_memseg(struct rte_eth_dev *dev) { - assert(mr); - DRV_LOG(DEBUG, "memory region %p refcnt: %d", (void *)mr, - rte_atomic32_read(&mr->refcnt)); - if (rte_atomic32_dec_and_test(&mr->refcnt)) { - claim_zero(ibv_dereg_mr(mr->mr)); - LIST_REMOVE(mr, next); - rte_free(mr); + struct priv *priv = dev->data->dev_private; + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); + struct mlx5_mr *mr; + struct mlx5_mr_cache *mr_cache; + unsigned int i; + + if (priv->mr_n != 0) return 0; + /* Count the existing memsegs in the system. */ + for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) + ++priv->mr_n; + priv->mr = rte_calloc(__func__, priv->mr_n, sizeof(*mr), 0); + if (priv->mr == NULL) { + DRV_LOG(ERR, + "port %u cannot allocate memory for array of static MR", + dev->data->port_id); + rte_errno = ENOMEM; + return -rte_errno; + } + priv->mr_cache = rte_calloc(__func__, MR_TABLE_SZ(priv->mr_n), + sizeof(*mr_cache), 0); + if (priv->mr_cache == NULL) { + DRV_LOG(ERR, + "port %u cannot allocate memory for array of MR cache", + dev->data->port_id); + rte_free(priv->mr); + rte_errno = ENOMEM; + return -rte_errno; } - return 1; + for (i = 0; i < priv->mr_n; ++i) { + mr = &(*priv->mr)[i]; + mr->memseg = &ms[i]; + mr->ibv_mr = ibv_reg_mr(priv->pd, + mr->memseg->addr, mr->memseg->len, + IBV_ACCESS_LOCAL_WRITE); + if (mr->ibv_mr == NULL) { + rte_dump_physmem_layout(stderr); + DRV_LOG(ERR, "port %u cannot register memseg[%u]", + dev->data->port_id, i); + goto error; + } + } + /* Sort by virtual address. */ + qsort(*priv->mr, priv->mr_n, sizeof(struct mlx5_mr), mr_comp_addr); + /* First entry must be NULL for comparison. */ + (*priv->mr_cache)[0] = (struct mlx5_mr_cache) { + .lkey = UINT32_MAX, + }; + /* Compile global all-inclusive MR cache table. */ + for (i = 0; i < priv->mr_n; ++i) { + mr = &(*priv->mr)[i]; + mr_cache = &(*priv->mr_cache)[i + 1]; + /* Paranoid, mr[] must be sorted. */ + assert(i == 0 || mr->memseg->addr > (mr - 1)->memseg->addr); + *mr_cache = (struct mlx5_mr_cache) { + .start = (uintptr_t)mr->memseg->addr, + .end = (uintptr_t)mr->memseg->addr + mr->memseg->len, + .lkey = rte_cpu_to_be_32(mr->ibv_mr->lkey) + }; + } + return 0; +error: + for (i = 0; i < priv->mr_n; ++i) { + mr = &(*priv->mr)[i]; + if (mr->ibv_mr != NULL) + ibv_dereg_mr(mr->ibv_mr); + } + rte_free(priv->mr); + rte_free(priv->mr_cache); + rte_errno = ENOMEM; + return -rte_errno; } /** - * Verify the flow list is empty + * Deregister all Memory Regions. * * @param dev * Pointer to Ethernet device. - * - * @return - * The number of object not released. */ -int -mlx5_mr_verify(struct rte_eth_dev *dev) +void +mlx5_mr_deregister_memseg(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; - int ret = 0; - struct mlx5_mr *mr; + unsigned int i; + + if (priv->mr_n == 0) + return; + for (i = 0; i < priv->mr_n; ++i) { + struct mlx5_mr *mr; - LIST_FOREACH(mr, &priv->mr, next) { - DRV_LOG(DEBUG, "port %u memory region %p still referenced", - dev->data->port_id, (void *)mr); - ++ret; + mr = &(*priv->mr)[i]; + /* Physical memory can't be changed dynamically. */ + assert(mr->memseg != NULL); + assert(mr->ibv_mr != NULL); + ibv_dereg_mr(mr->ibv_mr); } - return ret; + rte_free(priv->mr); + rte_free(priv->mr_cache); + priv->mr = NULL; + priv->mr_cache = NULL; + priv->mr_n = 0; } diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index dcc5a87b..7161825a 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -595,16 +595,6 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx) goto error; } tmpl->rxq_ctrl = rxq_ctrl; - /* Use the entire RX mempool as the memory region. */ - tmpl->mr = mlx5_mr_get(dev, rxq_data->mp); - if (!tmpl->mr) { - tmpl->mr = mlx5_mr_new(dev, rxq_data->mp); - if (!tmpl->mr) { - DRV_LOG(ERR, "port %u: memeroy region creation failure", - dev->data->port_id); - goto error; - } - } if (rxq_ctrl->irq) { tmpl->channel = ibv_create_comp_channel(priv->ctx); if (!tmpl->channel) { @@ -737,14 +727,14 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx) for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) { struct rte_mbuf *buf = (*rxq_data->elts)[i]; volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i]; + uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t); /* scat->addr must be able to store a pointer. */ assert(sizeof(scat->addr) >= sizeof(uintptr_t)); *scat = (struct mlx5_wqe_data_seg){ - .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, - uintptr_t)), + .addr = rte_cpu_to_be_64(addr), .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)), - .lkey = tmpl->mr->lkey, + .lkey = mlx5_rx_mb2mr(rxq_data, buf) }; } rxq_data->rq_db = rwq.dbrec; @@ -780,8 +770,6 @@ error: claim_zero(ibv_destroy_cq(tmpl->cq)); if (tmpl->channel) claim_zero(ibv_destroy_comp_channel(tmpl->channel)); - if (tmpl->mr) - mlx5_mr_release(tmpl->mr); priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; rte_errno = ret; /* Restore rte_errno. */ return NULL; @@ -811,7 +799,6 @@ mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx) return NULL; rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); if (rxq_ctrl->ibv) { - mlx5_mr_get(dev, rxq_data->mp); rte_atomic32_inc(&rxq_ctrl->ibv->refcnt); DRV_LOG(DEBUG, "port %u Verbs Rx queue %u: refcnt %d", dev->data->port_id, rxq_ctrl->idx, @@ -832,15 +819,9 @@ mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx) int mlx5_rxq_ibv_release(struct mlx5_rxq_ibv *rxq_ibv) { - int ret; - assert(rxq_ibv); assert(rxq_ibv->wq); assert(rxq_ibv->cq); - assert(rxq_ibv->mr); - ret = mlx5_mr_release(rxq_ibv->mr); - if (!ret) - rxq_ibv->mr = NULL; DRV_LOG(DEBUG, "port %u Verbs Rx queue %u: refcnt %d", PORT_ID(rxq_ibv->rxq_ctrl->priv), rxq_ibv->rxq_ctrl->idx, rte_atomic32_read(&rxq_ibv->refcnt)); @@ -918,10 +899,12 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, const uint16_t desc_n = desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; unsigned int mb_len = rte_pktmbuf_data_room_size(mp); + const unsigned int mr_n = MR_TABLE_SZ(priv->mr_n); tmpl = rte_calloc_socket("RXQ", 1, sizeof(*tmpl) + - desc_n * sizeof(struct rte_mbuf *), + desc_n * sizeof(struct rte_mbuf *) + + mr_n * sizeof(struct mlx5_mr_cache), 0, socket); if (!tmpl) { rte_errno = ENOMEM; @@ -1019,8 +1002,17 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, tmpl->rxq.mp = mp; tmpl->rxq.stats.idx = idx; tmpl->rxq.elts_n = log2above(desc); + tmpl->rxq.rq_repl_thresh = + MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n); tmpl->rxq.elts = (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1); + tmpl->rxq.mr_ctrl.cache_bh = + (struct mlx5_mr_cache (*)[mr_n])&(*tmpl->rxq.elts)[desc_n]; + tmpl->rxq.mr_ctrl.bh_n = + mlx5_mr_update_mp(dev, *tmpl->rxq.mr_ctrl.cache_bh, + tmpl->rxq.mr_ctrl.bh_n, mp); + DRV_LOG(DEBUG, "Rx MR lookup table: %u entires built", + MR_N(tmpl->rxq.mr_ctrl.bh_n)); tmpl->idx = idx; rte_atomic32_inc(&tmpl->refcnt); DRV_LOG(DEBUG, "port %u Rx queue %u: refcnt %d", dev->data->port_id, diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 1bbce3b7..d95c4bff 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -1920,6 +1920,9 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) * changes. */ wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); + /* If there's only one MR, no need to replace LKEY in WQEs. */ + if (unlikely(!IS_SINGLE_MR(rxq->mr_ctrl.bh_n))) + wqe->lkey = mlx5_rx_mb2mr(rxq, rep); if (len > DATA_LEN(seg)) { len -= DATA_LEN(seg); ++NB_SEGS(pkt); diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index dac3b39f..7e811c10 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -82,17 +82,37 @@ struct mlx5_txq_stats { struct priv; -/* Memory region queue object. */ +/* Memory Region object. */ struct mlx5_mr { - LIST_ENTRY(mlx5_mr) next; /**< Pointer to the next element. */ - rte_atomic32_t refcnt; /*<< Reference counter. */ - uint32_t lkey; /*<< rte_cpu_to_be_32(mr->lkey) */ - uintptr_t start; /* Start address of MR */ - uintptr_t end; /* End address of MR */ - struct ibv_mr *mr; /*<< Memory Region. */ - struct rte_mempool *mp; /*<< Memory Pool. */ + const struct rte_memseg *memseg; + struct ibv_mr *ibv_mr; /* Verbs Memory Region. */ }; +/* Cache entry for Memory Region. */ +struct mlx5_mr_cache { + uintptr_t start; /* Start address of MR. */ + uintptr_t end; /* End address of MR. */ + uint32_t lkey; /* rte_cpu_to_be_32(ibv_mr->lkey). */ +} __rte_packed; + +/* Per-queue MR control descriptor. */ +struct mlx5_mr_ctrl { + uint16_t bh_n; /* Size of MR cache table for bottom-half. */ + uint16_t mru; /* Index of last hit entry. */ + uint16_t head; /* Index of the oldest entry. */ + struct mlx5_mr_cache cache[MLX5_MR_CACHE_N]; /* MR cache. */ + struct mlx5_mr_cache (*cache_bh)[]; /* MR cache for bottom-half. */ +} __rte_packed; + +/* MR table size including padding at index 0. */ +#define MR_TABLE_SZ(n) ((n) + MLX5_MR_LOOKUP_TABLE_PAD) + +/* Actual table size excluding padding at index 0. */ +#define MR_N(n) ((n) - MLX5_MR_LOOKUP_TABLE_PAD) + +/* Whether there's only one entry in MR lookup table. */ +#define IS_SINGLE_MR(n) (MR_N(n) <= 1) + /* Compressed CQE context. */ struct rxq_zip { uint16_t ai; /* Array index. */ @@ -118,9 +138,11 @@ struct mlx5_rxq_data { volatile uint32_t *rq_db; volatile uint32_t *cq_db; uint16_t port_id; - uint16_t rq_ci; - uint16_t rq_pi; - uint16_t cq_ci; + uint32_t rq_ci; + uint32_t rq_pi; + uint32_t cq_ci; + uint16_t rq_repl_thresh; /* Threshold for buffer replenishment. */ + struct mlx5_mr_ctrl mr_ctrl; volatile struct mlx5_wqe_data_seg(*wqes)[]; volatile struct mlx5_cqe(*cqes)[]; struct rxq_zip zip; /* Compressed context. */ @@ -142,7 +164,6 @@ struct mlx5_rxq_ibv { struct ibv_cq *cq; /* Completion Queue. */ struct ibv_wq *wq; /* Work Queue. */ struct ibv_comp_channel *channel; - struct mlx5_mr *mr; /* Memory Region (for mp). */ }; /* RX queue control descriptor. */ @@ -200,15 +221,14 @@ struct mlx5_txq_data { uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */ uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */ uint16_t inline_max_packet_sz; /* Max packet size for inlining. */ - uint16_t mr_cache_idx; /* Index of last hit entry. */ uint32_t qp_num_8s; /* QP number shifted by 8. */ uint32_t flags; /* Flags for Tx Queue. */ + struct mlx5_mr_ctrl mr_ctrl; volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */ volatile void *wqes; /* Work queue (use volatile to write into). */ volatile uint32_t *qp_db; /* Work queue doorbell. */ volatile uint32_t *cq_db; /* Completion queue doorbell. */ volatile void *bf_reg; /* Blueflame register remapped. */ - struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */ struct rte_mbuf *(*elts)[]; /* TX elements. */ struct mlx5_txq_stats stats; /* TX queue counters. */ } __rte_cache_aligned; @@ -337,9 +357,10 @@ uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, /* mlx5_mr.c */ -void mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg); -struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, - struct rte_mempool *mp, unsigned int idx); +int mlx5_mr_update_mp(struct rte_eth_dev *dev, struct mlx5_mr_cache *lkp_tbl, + uint16_t n, struct rte_mempool *mp); +uint32_t mlx5_rx_mb2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr); +uint32_t mlx5_tx_mb2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr); #ifndef NDEBUG /** @@ -527,77 +548,102 @@ mlx5_tx_complete(struct mlx5_txq_data *txq) } /** - * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which - * the cloned mbuf is allocated is returned instead. + * Look up LKEY from given lookup table by linear search. Firstly look up the + * last-hit entry. If miss, the entire array is searched. If found, update the + * last-hit index and return LKEY. * - * @param buf - * Pointer to mbuf. + * @param lkp_tbl + * Pointer to lookup table. + * @param[in,out] cached_idx + * Pointer to last-hit index. + * @param n + * Size of lookup table. + * @param addr + * Search key. * * @return - * Memory pool where data is located for given mbuf. + * Searched LKEY on success, UINT32_MAX on no match. */ -static struct rte_mempool * -mlx5_tx_mb2mp(struct rte_mbuf *buf) +static __rte_always_inline uint32_t +mlx5_mr_lookup_cache(struct mlx5_mr_cache *lkp_tbl, uint16_t *cached_idx, + uint16_t n, uintptr_t addr) { - if (unlikely(RTE_MBUF_INDIRECT(buf))) - return rte_mbuf_from_indirect(buf)->pool; - return buf->pool; + uint16_t idx; + + if (likely(addr >= lkp_tbl[*cached_idx].start && + addr < lkp_tbl[*cached_idx].end)) + return lkp_tbl[*cached_idx].lkey; + for (idx = 0; idx < n && lkp_tbl[idx].start != 0; ++idx) { + if (addr >= lkp_tbl[idx].start && + addr < lkp_tbl[idx].end) { + /* Found. */ + *cached_idx = idx; + return lkp_tbl[idx].lkey; + } + } + return UINT32_MAX; } /** - * Get Memory Region (MR) <-> rte_mbuf association from txq->mp2mr[]. - * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full, - * remove an entry first. + * Query LKEY from address for Rx. + * + * @param rxq + * Pointer to Rx queue structure. + * @param addr + * Address to search. + * + * @return + * LKEY on success. + */ +static __rte_always_inline uint32_t +mlx5_rx_addr2mr(struct mlx5_rxq_data *rxq, uintptr_t addr) +{ + uint32_t lkey; + + /* Linear search on MR cache array. */ + lkey = mlx5_mr_lookup_cache(rxq->mr_ctrl.cache, + &rxq->mr_ctrl.mru, + MLX5_MR_CACHE_N, addr); + if (likely(lkey != UINT32_MAX)) + return lkey; + DEBUG("No found in rxq->mr_cache[], last-hit = %u, head = %u)", + rxq->mr_ctrl.mru, rxq->mr_ctrl.head); + /* Take slower bottom-half (binary search) on miss. */ + return mlx5_rx_mb2mr_bh(rxq, addr); +} + +#define mlx5_rx_mb2mr(rxq, mb) mlx5_rx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr)) + +/** + * Query LKEY from address for Tx. * * @param txq - * Pointer to TX queue structure. - * @param[in] mp - * Memory Pool for which a Memory Region lkey must be returned. + * Pointer to Tx queue structure. + * @param addr + * Address to search. * * @return - * mr->lkey on success, (uint32_t)-1 on failure. + * LKEY on success. */ static __rte_always_inline uint32_t -mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb) +mlx5_tx_addr2mr(struct mlx5_txq_data *txq, uintptr_t addr) { - uint16_t i = txq->mr_cache_idx; - uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t); - struct mlx5_mr *mr; - - assert(i < RTE_DIM(txq->mp2mr)); - if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end > addr)) - return txq->mp2mr[i]->lkey; - for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) { - if (unlikely(txq->mp2mr[i] == NULL || - txq->mp2mr[i]->mr == NULL)) { - /* Unknown MP, add a new MR for it. */ - break; - } - if (txq->mp2mr[i]->start <= addr && - txq->mp2mr[i]->end > addr) { - assert(txq->mp2mr[i]->lkey != (uint32_t)-1); - txq->mr_cache_idx = i; - return txq->mp2mr[i]->lkey; - } - } - mr = mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i); - /* - * Request the reference to use in this queue, the original one is - * kept by the control plane. - */ - if (mr) { - rte_atomic32_inc(&mr->refcnt); - txq->mr_cache_idx = i >= RTE_DIM(txq->mp2mr) ? i - 1 : i; - return mr->lkey; - } else { - struct rte_mempool *mp = mlx5_tx_mb2mp(mb); - - DRV_LOG(WARNING, "failed to register mempool 0x%p(%s)", - (void *)mp, mp->name); - } - return (uint32_t)-1; + uint32_t lkey; + + /* Linear search on MR cache array. */ + lkey = mlx5_mr_lookup_cache(txq->mr_ctrl.cache, + &txq->mr_ctrl.mru, + MLX5_MR_CACHE_N, addr); + if (likely(lkey != UINT32_MAX)) + return lkey; + DEBUG("No found in txq->mr_cache[], last-hit = %u, head = %u)", + txq->mr_ctrl.mru, txq->mr_ctrl.head); + /* Take slower bottom-half (binary search) on miss. */ + return mlx5_tx_mb2mr_bh(txq, addr); } +#define mlx5_tx_mb2mr(rxq, mb) mlx5_tx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr)) + /** * Ring TX queue doorbell and flush the update if requested. * diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c index 982b8f1f..12465b43 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec.c +++ b/drivers/net/mlx5/mlx5_rxtx_vec.c @@ -316,7 +316,7 @@ mlx5_check_vec_tx_support(struct rte_eth_dev *dev) struct priv *priv = dev->data->dev_private; if (!priv->tx_vec_en || - priv->txqs_n > MLX5_VPMD_MIN_TXQS || + priv->txqs_n > (unsigned int)priv->txqs_vec || priv->mps != MLX5_MPW_ENHANCED || priv->tso) return -ENOTSUP; diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.h b/drivers/net/mlx5/mlx5_rxtx_vec.h index d504e2ae..750559b8 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec.h @@ -115,9 +115,13 @@ mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n) rxq->stats.rx_nombuf += n; return; } - for (i = 0; i < n; ++i) + for (i = 0; i < n; ++i) { wq[i].addr = rte_cpu_to_be_64((uintptr_t)elts[i]->buf_addr + RTE_PKTMBUF_HEADROOM); + /* If there's only one MR, no need to replace LKEY in WQEs. */ + if (unlikely(!IS_SINGLE_MR(rxq->mr_ctrl.bh_n))) + wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]); + } rxq->rq_ci += n; /* Prevent overflowing into consumed mbufs. */ elts_idx = rxq->rq_ci & q_mask; diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h index e748615e..ae37c2bd 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h @@ -756,7 +756,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, * N - (rq_ci - rq_pi) := # of buffers consumed (to be replenished). */ repl_n = q_n - (rxq->rq_ci - rxq->rq_pi); - if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n)) + if (repl_n >= rxq->rq_repl_thresh) mlx5_rx_replenish_bulk_mbuf(rxq, repl_n); /* See if there're unreturned mbufs from compressed CQE. */ rcvd_pkt = rxq->cq_ci - rxq->rq_pi; diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h index 7e8c9b88..866a5e9b 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h @@ -737,7 +737,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, * N - (rq_ci - rq_pi) := # of buffers consumed (to be replenished). */ repl_n = q_n - (rxq->rq_ci - rxq->rq_pi); - if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n)) + if (repl_n >= rxq->rq_repl_thresh) mlx5_rx_replenish_bulk_mbuf(rxq, repl_n); /* See if there're unreturned mbufs from compressed CQE. */ rcvd_pkt = rxq->cq_ci - rxq->rq_pi; diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c index 345ed707..e880d24c 100644 --- a/drivers/net/mlx5/mlx5_stats.c +++ b/drivers/net/mlx5/mlx5_stats.c @@ -356,10 +356,11 @@ int mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { struct priv *priv = dev->data->dev_private; - struct rte_eth_stats tmp = {0}; + struct rte_eth_stats tmp; unsigned int i; unsigned int idx; + memset(&tmp, 0, sizeof(tmp)); /* Add software counters. */ for (i = 0; (i != priv->rxqs_n); ++i) { struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c index 9a1d6f95..e6a29cb7 100644 --- a/drivers/net/mlx5/mlx5_trigger.c +++ b/drivers/net/mlx5/mlx5_trigger.c @@ -74,17 +74,10 @@ mlx5_txq_start(struct rte_eth_dev *dev) int ret; for (i = 0; i != priv->txqs_n; ++i) { - unsigned int idx = 0; - struct mlx5_mr *mr; struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); if (!txq_ctrl) continue; - LIST_FOREACH(mr, &priv->mr, next) { - mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mr->mp, idx++); - if (idx == MLX5_PMD_TX_MP_CACHE) - break; - } txq_alloc_elts(txq_ctrl); txq_ctrl->ibv = mlx5_txq_ibv_new(dev, i); if (!txq_ctrl->ibv) { @@ -177,7 +170,6 @@ int mlx5_dev_start(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; - struct mlx5_mr *mr = NULL; int ret; DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id); @@ -187,7 +179,6 @@ mlx5_dev_start(struct rte_eth_dev *dev) dev->data->port_id, strerror(rte_errno)); goto error; } - rte_mempool_walk(mlx5_mp2mr_iter, priv); ret = mlx5_txq_start(dev); if (ret) { DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", @@ -229,8 +220,6 @@ error: ret = rte_errno; /* Save rte_errno before cleanup. */ /* Rollback. */ dev->data->dev_started = 0; - for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr)) - mlx5_mr_release(mr); mlx5_flow_stop(dev, &priv->flows); mlx5_traffic_disable(dev); mlx5_txq_stop(dev); @@ -252,7 +241,6 @@ void mlx5_dev_stop(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; - struct mlx5_mr *mr; dev->data->dev_started = 0; /* Prevent crashes when queues are still in use. */ @@ -267,8 +255,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev) mlx5_dev_interrupt_handler_uninstall(dev); mlx5_txq_stop(dev); mlx5_rxq_stop(dev); - for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr)) - mlx5_mr_release(mr); mlx5_flow_delete_drop_queue(dev); } diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index 760ac92d..2ead2177 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -339,7 +339,6 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx) return NULL; } memset(&tmpl, 0, sizeof(struct mlx5_txq_ibv)); - /* MRs will be registered in mp2mr[] later. */ attr.cq = (struct ibv_cq_init_attr_ex){ .comp_mask = 0, }; @@ -622,10 +621,12 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, ((MLX5_MAX_TSO_HEADER + (RTE_CACHE_LINE_SIZE - 1)) / RTE_CACHE_LINE_SIZE); struct mlx5_txq_ctrl *tmpl; + const unsigned int mr_n = MR_TABLE_SZ(priv->mr_n); tmpl = rte_calloc_socket("TXQ", 1, sizeof(*tmpl) + - desc * sizeof(struct rte_mbuf *), + desc * sizeof(struct rte_mbuf *) + + mr_n * sizeof(struct mlx5_mr_cache), 0, socket); if (!tmpl) { rte_errno = ENOMEM; @@ -639,7 +640,6 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, tmpl->idx = idx; if (priv->mps == MLX5_MPW_ENHANCED) tmpl->txq.mpw_hdr_dseg = priv->mpw_hdr_dseg; - /* MRs will be registered in mp2mr[] later. */ DRV_LOG(DEBUG, "port %u priv->device_attr.max_qp_wr is %d", dev->data->port_id, priv->device_attr.orig_attr.max_qp_wr); DRV_LOG(DEBUG, "port %u priv->device_attr.max_sge is %d", @@ -700,6 +700,9 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, tmpl->txq.tunnel_en = 1; tmpl->txq.elts = (struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1); + tmpl->txq.mr_ctrl.cache_bh = + (struct mlx5_mr_cache (*)[mr_n]) + &(*tmpl->txq.elts)[1 << tmpl->txq.elts_n]; tmpl->txq.stats.idx = idx; rte_atomic32_inc(&tmpl->refcnt); DRV_LOG(DEBUG, "port %u Tx queue %u: refcnt %d", dev->data->port_id, @@ -728,15 +731,8 @@ mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx) if ((*priv->txqs)[idx]) { ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); - unsigned int i; mlx5_txq_ibv_get(dev, idx); - for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) { - if (ctrl->txq.mp2mr[i]) - claim_nonzero - (mlx5_mr_get(dev, - ctrl->txq.mp2mr[i]->mp)); - } rte_atomic32_inc(&ctrl->refcnt); DRV_LOG(DEBUG, "port %u Tx queue %u refcnt %d", dev->data->port_id, @@ -760,7 +756,6 @@ int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx) { struct priv *priv = dev->data->dev_private; - unsigned int i; struct mlx5_txq_ctrl *txq; size_t page_size = sysconf(_SC_PAGESIZE); @@ -771,12 +766,6 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx) txq->idx, rte_atomic32_read(&txq->refcnt)); if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv)) txq->ibv = NULL; - for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) { - if (txq->txq.mp2mr[i]) { - mlx5_mr_release(txq->txq.mp2mr[i]); - txq->txq.mp2mr[i] = NULL; - } - } if (priv->uar_base) munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, page_size), page_size); |