aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/mlx5
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/mlx5')
-rw-r--r--drivers/net/mlx5/mlx5.c39
-rw-r--r--drivers/net/mlx5/mlx5.h20
-rw-r--r--drivers/net/mlx5/mlx5_defs.h15
-rw-r--r--drivers/net/mlx5/mlx5_ethdev.c5
-rw-r--r--drivers/net/mlx5/mlx5_mr.c583
-rw-r--r--drivers/net/mlx5/mlx5_rxq.c38
-rw-r--r--drivers/net/mlx5/mlx5_rxtx.c3
-rw-r--r--drivers/net/mlx5/mlx5_rxtx.h188
-rw-r--r--drivers/net/mlx5/mlx5_rxtx_vec.c2
-rw-r--r--drivers/net/mlx5/mlx5_rxtx_vec.h6
-rw-r--r--drivers/net/mlx5/mlx5_rxtx_vec_neon.h2
-rw-r--r--drivers/net/mlx5/mlx5_rxtx_vec_sse.h2
-rw-r--r--drivers/net/mlx5/mlx5_stats.c3
-rw-r--r--drivers/net/mlx5/mlx5_trigger.c14
-rw-r--r--drivers/net/mlx5/mlx5_txq.c23
15 files changed, 525 insertions, 418 deletions
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 36f3a056..e117ec84 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -78,6 +78,12 @@
*/
#define MLX5_TXQS_MIN_INLINE "txqs_min_inline"
+/*
+ * Device parameter to configure the number of TX queues threshold for
+ * enabling vectorized Tx.
+ */
+#define MLX5_TXQS_MAX_VEC "txqs_max_vec"
+
/* Device parameter to enable multi-packet send WQEs. */
#define MLX5_TXQ_MPW_EN "txq_mpw_en"
@@ -112,6 +118,7 @@ struct mlx5_args {
int cqe_comp;
int txq_inline;
int txqs_inline;
+ int txqs_vec;
int mps;
int mpw_hdr_dseg;
int inline_max_packet_sz;
@@ -236,6 +243,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
priv->txqs_n = 0;
priv->txqs = NULL;
}
+ mlx5_mr_deregister_memseg(dev);
if (priv->pd != NULL) {
assert(priv->ctx != NULL);
claim_zero(ibv_dealloc_pd(priv->pd));
@@ -276,10 +284,6 @@ mlx5_dev_close(struct rte_eth_dev *dev)
if (ret)
DRV_LOG(WARNING, "port %u some flows still remain",
dev->data->port_id);
- ret = mlx5_mr_verify(dev);
- if (ret)
- DRV_LOG(WARNING, "port %u some memory region still remain",
- dev->data->port_id);
memset(priv, 0, sizeof(*priv));
}
@@ -442,6 +446,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
args->txq_inline = tmp;
} else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
args->txqs_inline = tmp;
+ } else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) {
+ args->txqs_vec = tmp;
} else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
args->mps = !!tmp;
} else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) {
@@ -480,6 +486,7 @@ mlx5_args(struct mlx5_args *args, struct rte_devargs *devargs)
MLX5_RXQ_CQE_COMP_EN,
MLX5_TXQ_INLINE,
MLX5_TXQS_MIN_INLINE,
+ MLX5_TXQS_MAX_VEC,
MLX5_TXQ_MPW_EN,
MLX5_TXQ_MPW_HDR_DSEG_EN,
MLX5_TXQ_MAX_INLINE_LEN,
@@ -640,8 +647,17 @@ mlx5_args_assign(struct priv *priv, struct mlx5_args *args)
priv->txq_inline = args->txq_inline;
if (args->txqs_inline != MLX5_ARG_UNSET)
priv->txqs_inline = args->txqs_inline;
- if (args->mps != MLX5_ARG_UNSET)
+ if (args->txqs_vec != MLX5_ARG_UNSET)
+ priv->txqs_vec = args->txqs_vec;
+ if (args->mps != MLX5_ARG_UNSET) {
priv->mps = args->mps ? priv->mps : 0;
+ } else if (priv->mps == MLX5_MPW) {
+ /*
+ * MPW is disabled by default, while the Enhanced MPW is enabled
+ * by default.
+ */
+ priv->mps = MLX5_MPW_DISABLED;
+ }
if (args->mpw_hdr_dseg != MLX5_ARG_UNSET)
priv->mpw_hdr_dseg = args->mpw_hdr_dseg;
if (args->inline_max_packet_sz != MLX5_ARG_UNSET)
@@ -680,6 +696,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
unsigned int mps;
unsigned int cqe_comp;
unsigned int tunnel_en = 0;
+ unsigned int txqs_vec = MLX5_VPMD_MAX_TXQS;
int idx;
int i;
struct mlx5dv_context attrs_out;
@@ -726,8 +743,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
continue;
switch (pci_dev->id.device_id) {
case PCI_DEVICE_ID_MELLANOX_CONNECTX4:
- tunnel_en = 1;
- break;
case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX:
case PCI_DEVICE_ID_MELLANOX_CONNECTX5:
case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:
@@ -735,6 +750,10 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF:
tunnel_en = 1;
break;
+ case PCI_DEVICE_ID_MELLANOX_CONNECTX5BF:
+ txqs_vec = MLX5_VPMD_MAX_TXQS_BLUEFIELD;
+ tunnel_en = 1;
+ break;
default:
break;
}
@@ -805,6 +824,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
.cqe_comp = MLX5_ARG_UNSET,
.txq_inline = MLX5_ARG_UNSET,
.txqs_inline = MLX5_ARG_UNSET,
+ .txqs_vec = MLX5_ARG_UNSET,
.mps = MLX5_ARG_UNSET,
.mpw_hdr_dseg = MLX5_ARG_UNSET,
.inline_max_packet_sz = MLX5_ARG_UNSET,
@@ -908,6 +928,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
/* Enable vector by default if supported. */
priv->tx_vec_en = 1;
priv->rx_vec_en = 1;
+ priv->txqs_vec = txqs_vec;
err = mlx5_args(&args, pci_dev->device.devargs);
if (err) {
DRV_LOG(ERR, "failed to process device arguments: %s",
@@ -1154,6 +1175,10 @@ static const struct rte_pci_id mlx5_pci_id_map[] = {
PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF)
},
{
+ RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+ PCI_DEVICE_ID_MELLANOX_CONNECTX5BF)
+ },
+ {
.vendor_id = 0
}
};
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 5e6027b8..08b667f9 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -77,6 +77,7 @@ enum {
PCI_DEVICE_ID_MELLANOX_CONNECTX5VF = 0x1018,
PCI_DEVICE_ID_MELLANOX_CONNECTX5EX = 0x1019,
PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a,
+ PCI_DEVICE_ID_MELLANOX_CONNECTX5BF = 0xa2d2,
};
struct mlx5_xstats_ctrl {
@@ -138,6 +139,7 @@ struct priv {
unsigned int max_tso_payload_sz; /* Maximum TCP payload for TSO. */
unsigned int txq_inline; /* Maximum packet size for inlining. */
unsigned int txqs_inline; /* Queue number threshold for inlining. */
+ unsigned int txqs_vec; /* Queue number threshold for vectorized Tx. */
unsigned int inline_max_packet_sz; /* Max packet size for inlining. */
/* RX/TX queues. */
unsigned int rxqs_n; /* RX queues array size. */
@@ -152,7 +154,9 @@ struct priv {
struct mlx5_hrxq_drop *flow_drop_queue; /* Flow drop queue. */
struct mlx5_flows flows; /* RTE Flow rules. */
struct mlx5_flows ctrl_flows; /* Control flow rules. */
- LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
+ struct mlx5_mr (*mr)[]; /* Static MR table. */
+ struct mlx5_mr_cache (*mr_cache)[]; /* Global MR cache table. */
+ unsigned int mr_n; /* Size of static MR table. */
LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
LIST_HEAD(hrxq, mlx5_hrxq) hrxqs; /* Verbs Hash Rx queues. */
@@ -301,16 +305,14 @@ void mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev);
/* mlx5_socket.c */
-int mlx5_socket_init(struct rte_eth_dev *priv);
-void mlx5_socket_uninit(struct rte_eth_dev *priv);
-void mlx5_socket_handle(struct rte_eth_dev *priv);
-int mlx5_socket_connect(struct rte_eth_dev *priv);
+int mlx5_socket_init(struct rte_eth_dev *dev);
+void mlx5_socket_uninit(struct rte_eth_dev *dev);
+void mlx5_socket_handle(struct rte_eth_dev *dev);
+int mlx5_socket_connect(struct rte_eth_dev *dev);
/* mlx5_mr.c */
-struct mlx5_mr *mlx5_mr_new(struct rte_eth_dev *dev, struct rte_mempool *mp);
-struct mlx5_mr *mlx5_mr_get(struct rte_eth_dev *dev, struct rte_mempool *mp);
-int mlx5_mr_release(struct mlx5_mr *mr);
-int mlx5_mr_verify(struct rte_eth_dev *dev);
+int mlx5_mr_register_memseg(struct rte_eth_dev *dev);
+void mlx5_mr_deregister_memseg(struct rte_eth_dev *dev);
#endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index 9c64bb33..1de3bdc4 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -88,8 +88,13 @@
/* Maximum Packet headers size (L2+L3+L4) for TSO. */
#define MLX5_MAX_TSO_HEADER 128
-/* Default minimum number of Tx queues for vectorized Tx. */
-#define MLX5_VPMD_MIN_TXQS 4
+/* Default maximum number of Tx queues for vectorized Tx. */
+#if defined(RTE_ARCH_ARM64)
+#define MLX5_VPMD_MAX_TXQS 8
+#else
+#define MLX5_VPMD_MAX_TXQS 4
+#endif
+#define MLX5_VPMD_MAX_TXQS_BLUEFIELD 16
/* Threshold of buffer replenishment for vectorized Rx. */
#define MLX5_VPMD_RXQ_RPLNSH_THRESH(n) \
@@ -124,6 +129,12 @@
*/
#define MLX5_UAR_OFFSET (1ULL << 32)
+/* Size of per-queue MR cache table. */
+#define MLX5_MR_CACHE_N 8
+
+/* First entry must be NULL for comparison. */
+#define MLX5_MR_LOOKUP_TABLE_PAD 1
+
/* Definition of static_assert found in /usr/include/assert.h */
#ifndef HAVE_STATIC_ASSERT
#define static_assert _Static_assert
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index e441483a..198c30b3 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -408,6 +408,11 @@ mlx5_dev_configure(struct rte_eth_dev *dev)
ret = mlx5_rss_reta_index_resize(dev, reta_idx_n);
if (ret)
return ret;
+ if (mlx5_mr_register_memseg(dev)) {
+ DRV_LOG(ERR, "%p: MR registration failed", (void *)dev);
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
/* When the number of RX queues is not a power of two, the remaining
* table entries are padded with reused WQs and hashes are not spread
* uniformly. */
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index a50c5208..c3410a62 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -47,355 +47,398 @@
#include "mlx5.h"
#include "mlx5_rxtx.h"
-struct mlx5_check_mempool_data {
- int ret;
- char *start;
- char *end;
+struct mr_update_mempool_data {
+ struct rte_eth_dev *dev;
+ struct mlx5_mr_cache *lkp_tbl;
+ uint16_t tbl_sz;
};
-/* Called by mlx5_check_mempool() when iterating the memory chunks. */
-static void
-mlx5_check_mempool_cb(struct rte_mempool *mp __rte_unused,
- void *opaque, struct rte_mempool_memhdr *memhdr,
- unsigned int mem_idx __rte_unused)
+/**
+ * Look up LKEY from given lookup table by Binary Search, store the last index
+ * and return searched LKEY.
+ *
+ * @param lkp_tbl
+ * Pointer to lookup table.
+ * @param n
+ * Size of lookup table.
+ * @param[out] idx
+ * Pointer to index. Even on searh failure, returns index where it stops
+ * searching so that index can be used when inserting a new entry.
+ * @param addr
+ * Search key.
+ *
+ * @return
+ * Searched LKEY on success, UINT32_MAX on no match.
+ */
+static uint32_t
+mlx5_mr_lookup(struct mlx5_mr_cache *lkp_tbl, uint16_t n, uint16_t *idx,
+ uintptr_t addr)
{
- struct mlx5_check_mempool_data *data = opaque;
+ uint16_t base = 0;
- /* It already failed, skip the next chunks. */
- if (data->ret != 0)
- return;
- /* It is the first chunk. */
- if (data->start == NULL && data->end == NULL) {
- data->start = memhdr->addr;
- data->end = data->start + memhdr->len;
- return;
- }
- if (data->end == memhdr->addr) {
- data->end += memhdr->len;
- return;
- }
- if (data->start == (char *)memhdr->addr + memhdr->len) {
- data->start -= memhdr->len;
- return;
- }
- /* Error, mempool is not virtually contiguous. */
- data->ret = -1;
+ /* First entry must be NULL for comparison. */
+ assert(n == 0 || (lkp_tbl[0].start == 0 &&
+ lkp_tbl[0].lkey == UINT32_MAX));
+ /* Binary search. */
+ do {
+ register uint16_t delta = n >> 1;
+
+ if (addr < lkp_tbl[base + delta].start) {
+ n = delta;
+ } else {
+ base += delta;
+ n -= delta;
+ }
+ } while (n > 1);
+ assert(addr >= lkp_tbl[base].start);
+ *idx = base;
+ if (addr < lkp_tbl[base].end)
+ return lkp_tbl[base].lkey;
+ /* Not found. */
+ return UINT32_MAX;
}
/**
- * Check if a mempool can be used: it must be virtually contiguous.
+ * Insert an entry to LKEY lookup table.
*
- * @param[in] mp
- * Pointer to memory pool.
- * @param[out] start
- * Pointer to the start address of the mempool virtual memory area
- * @param[out] end
- * Pointer to the end address of the mempool virtual memory area
+ * @param lkp_tbl
+ * Pointer to lookup table. The size of array must be enough to add one more
+ * entry.
+ * @param n
+ * Size of lookup table.
+ * @param entry
+ * Pointer to new entry to insert.
*
* @return
- * 0 on success (mempool is virtually contiguous), -1 on error.
+ * Size of returning lookup table.
*/
static int
-mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
- uintptr_t *end)
+mlx5_mr_insert(struct mlx5_mr_cache *lkp_tbl, uint16_t n,
+ struct mlx5_mr_cache *entry)
{
- struct mlx5_check_mempool_data data;
+ uint16_t idx = 0;
+ size_t shift;
- memset(&data, 0, sizeof(data));
- rte_mempool_mem_iter(mp, mlx5_check_mempool_cb, &data);
- *start = (uintptr_t)data.start;
- *end = (uintptr_t)data.end;
- return data.ret;
+ /* Check if entry exist. */
+ if (mlx5_mr_lookup(lkp_tbl, n, &idx, entry->start) != UINT32_MAX)
+ return n;
+ /* Insert entry. */
+ ++idx;
+ shift = (n - idx) * sizeof(struct mlx5_mr_cache);
+ if (shift)
+ memmove(&lkp_tbl[idx + 1], &lkp_tbl[idx], shift);
+ lkp_tbl[idx] = *entry;
+ DRV_LOG(DEBUG, "%p: inserted lkp_tbl[%u], start = 0x%lx, end = 0x%lx",
+ (void *)lkp_tbl, idx, lkp_tbl[idx].start, lkp_tbl[idx].end);
+ return n + 1;
}
/**
- * Register a Memory Region (MR) <-> Memory Pool (MP) association in
- * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
+ * Incrementally update LKEY lookup table for a specific address from registered
+ * Memory Regions.
*
- * @param txq
- * Pointer to TX queue structure.
- * @param[in] mp
- * Memory Pool for which a Memory Region lkey must be returned.
- * @param idx
- * Index of the next available entry.
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param lkp_tbl
+ * Pointer to lookup table to fill. The size of array must be at least
+ * (priv->mr_n + 1).
+ * @param n
+ * Size of lookup table.
+ * @param addr
+ * Search key.
*
* @return
- * mr on success, NULL on failure and rte_errno is set.
+ * Size of returning lookup table.
*/
-struct mlx5_mr *
-mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
- unsigned int idx)
+static int
+mlx5_mr_update_addr(struct rte_eth_dev *dev, struct mlx5_mr_cache *lkp_tbl,
+ uint16_t n, uintptr_t addr)
{
- struct mlx5_txq_ctrl *txq_ctrl =
- container_of(txq, struct mlx5_txq_ctrl, txq);
- struct rte_eth_dev *dev;
- struct mlx5_mr *mr;
+ struct priv *priv = dev->data->dev_private;
+ uint16_t idx;
+ uint32_t ret __rte_unused;
- rte_spinlock_lock(&txq_ctrl->priv->mr_lock);
- /* Add a new entry, register MR first. */
- DRV_LOG(DEBUG, "port %u discovered new memory pool \"%s\" (%p)",
- PORT_ID(txq_ctrl->priv), mp->name, (void *)mp);
- dev = ETH_DEV(txq_ctrl->priv);
- mr = mlx5_mr_get(dev, mp);
- if (mr == NULL) {
- if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
- DRV_LOG(DEBUG,
- "port %u using unregistered mempool 0x%p(%s)"
- " in secondary process, please create mempool"
- " before rte_eth_dev_start()",
- PORT_ID(txq_ctrl->priv), (void *)mp, mp->name);
- rte_spinlock_unlock(&txq_ctrl->priv->mr_lock);
- rte_errno = ENOTSUP;
- return NULL;
- }
- mr = mlx5_mr_new(dev, mp);
- }
- if (unlikely(mr == NULL)) {
- DRV_LOG(DEBUG,
- "port %u unable to configure memory region,"
- " ibv_reg_mr() failed.",
- PORT_ID(txq_ctrl->priv));
- rte_spinlock_unlock(&txq_ctrl->priv->mr_lock);
- return NULL;
+ if (n == 0) {
+ /* First entry must be NULL for comparison. */
+ lkp_tbl[n++] = (struct mlx5_mr_cache) {
+ .lkey = UINT32_MAX,
+ };
}
- if (unlikely(idx == RTE_DIM(txq->mp2mr))) {
- /* Table is full, remove oldest entry. */
- DRV_LOG(DEBUG,
- "port %u memory region <-> memory pool table full, "
- " dropping oldest entry",
- PORT_ID(txq_ctrl->priv));
- --idx;
- mlx5_mr_release(txq->mp2mr[0]);
- memmove(&txq->mp2mr[0], &txq->mp2mr[1],
- (sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
- }
- /* Store the new entry. */
- txq_ctrl->txq.mp2mr[idx] = mr;
- DRV_LOG(DEBUG,
- "port %u new memory region lkey for MP \"%s\" (%p): 0x%08"
- PRIu32,
- PORT_ID(txq_ctrl->priv), mp->name, (void *)mp,
- txq_ctrl->txq.mp2mr[idx]->lkey);
- rte_spinlock_unlock(&txq_ctrl->priv->mr_lock);
- return mr;
+ ret = mlx5_mr_lookup(*priv->mr_cache, MR_TABLE_SZ(priv->mr_n),
+ &idx, addr);
+ /* Lookup must succeed, the global cache is all-inclusive. */
+ assert(ret != UINT32_MAX);
+ DRV_LOG(DEBUG, "port %u adding LKEY (0x%x) for addr 0x%lx",
+ dev->data->port_id, (*priv->mr_cache)[idx].lkey, addr);
+ return mlx5_mr_insert(lkp_tbl, n, &(*priv->mr_cache)[idx]);
}
-struct mlx5_mp2mr_mbuf_check_data {
- int ret;
-};
-
/**
- * Callback function for rte_mempool_obj_iter() to check whether a given
- * mempool object looks like a mbuf.
+ * Bottom-half of LKEY search on datapath. Firstly search in cache_bh[] and if
+ * misses, search in the global MR cache table and update the new entry to
+ * per-queue local caches.
*
- * @param[in] mp
- * The mempool pointer
- * @param[in] arg
- * Context data (struct txq_mp2mr_mbuf_check_data). Contains the
- * return value.
- * @param[in] obj
- * Object address.
- * @param index
- * Object index, unused.
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param mr_ctrl
+ * Pointer to per-queue MR control structure.
+ * @param addr
+ * Search key.
+ *
+ * @return
+ * LKEY on success.
*/
-static void
-txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
- uint32_t index __rte_unused)
+static inline uint32_t
+mlx5_mr_mb2mr_bh(struct rte_eth_dev *dev, struct mlx5_mr_ctrl *mr_ctrl,
+ uintptr_t addr)
{
- struct mlx5_mp2mr_mbuf_check_data *data = arg;
- struct rte_mbuf *buf = obj;
+ uint32_t lkey;
+ uint16_t bh_idx = 0;
+ struct mlx5_mr_cache *mr_cache = &mr_ctrl->cache[mr_ctrl->head];
- /*
- * Check whether mbuf structure fits element size and whether mempool
- * pointer is valid.
- */
- if (sizeof(*buf) > mp->elt_size || buf->pool != mp)
- data->ret = -1;
+ /* Binary-search MR translation table. */
+ lkey = mlx5_mr_lookup(*mr_ctrl->cache_bh, mr_ctrl->bh_n, &bh_idx, addr);
+ if (likely(lkey != UINT32_MAX)) {
+ /* Update cache. */
+ *mr_cache = (*mr_ctrl->cache_bh)[bh_idx];
+ mr_ctrl->mru = mr_ctrl->head;
+ /* Point to the next victim, the oldest. */
+ mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N;
+ return lkey;
+ }
+ /* Missed in the per-queue lookup table. Search in the global cache. */
+ mr_ctrl->bh_n = mlx5_mr_update_addr(dev, *mr_ctrl->cache_bh,
+ mr_ctrl->bh_n, addr);
+ /* Search again with updated entries. */
+ lkey = mlx5_mr_lookup(*mr_ctrl->cache_bh, mr_ctrl->bh_n, &bh_idx, addr);
+ /* Must always succeed. */
+ assert(lkey != UINT32_MAX);
+ /* Update cache. */
+ *mr_cache = (*mr_ctrl->cache_bh)[bh_idx];
+ mr_ctrl->mru = mr_ctrl->head;
+ /* Point to the next victim, the oldest. */
+ mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N;
+ return lkey;
}
/**
- * Iterator function for rte_mempool_walk() to register existing mempools and
- * fill the MP to MR cache of a TX queue.
+ * Bottom-half of mlx5_rx_mb2mr() if search on mr_cache_bh[] fails.
*
- * @param[in] mp
- * Memory Pool to register.
- * @param *arg
- * Pointer to TX queue structure.
+ * @param rxq
+ * Pointer to Rx queue structure.
+ * @param addr
+ * Search key.
+ *
+ * @return
+ * LKEY on success.
*/
-void
-mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg)
+uint32_t
+mlx5_rx_mb2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr)
{
- struct priv *priv = (struct priv *)arg;
- struct mlx5_mp2mr_mbuf_check_data data = {
- .ret = 0,
- };
- struct mlx5_mr *mr;
+ struct mlx5_rxq_ctrl *rxq_ctrl =
+ container_of(rxq, struct mlx5_rxq_ctrl, rxq);
- /* Register mempool only if the first element looks like a mbuf. */
- if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
- data.ret == -1)
- return;
- mr = mlx5_mr_get(ETH_DEV(priv), mp);
- if (mr) {
- mlx5_mr_release(mr);
- return;
- }
- mr = mlx5_mr_new(ETH_DEV(priv), mp);
- if (!mr)
- DRV_LOG(ERR, "port %u cannot create memory region: %s",
- PORT_ID(priv), strerror(rte_errno));
+ DRV_LOG(DEBUG,
+ "port %u not found in rxq->mr_cache[], last-hit=%u, head=%u",
+ PORT_ID(rxq_ctrl->priv), rxq->mr_ctrl.mru, rxq->mr_ctrl.head);
+ return mlx5_mr_mb2mr_bh(ETH_DEV(rxq_ctrl->priv), &rxq->mr_ctrl, addr);
}
/**
- * Register a new memory region from the mempool and store it in the memory
- * region list.
+ * Bottom-half of mlx5_tx_mb2mr() if search on cache_bh[] fails.
*
- * @param dev
- * Pointer to Ethernet device.
- * @param mp
- * Pointer to the memory pool to register.
+ * @param txq
+ * Pointer to Tx queue structure.
+ * @param addr
+ * Search key.
*
* @return
- * The memory region on success, NULL on failure and rte_errno is set.
+ * LKEY on success.
*/
-struct mlx5_mr *
-mlx5_mr_new(struct rte_eth_dev *dev, struct rte_mempool *mp)
+uint32_t
+mlx5_tx_mb2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr)
{
- struct priv *priv = dev->data->dev_private;
- const struct rte_memseg *ms = rte_eal_get_physmem_layout();
- uintptr_t start;
- uintptr_t end;
- unsigned int i;
- struct mlx5_mr *mr;
-
- mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id);
- if (!mr) {
- DRV_LOG(DEBUG,
- "port %u unable to configure memory region,"
- " ibv_reg_mr() failed.",
- dev->data->port_id);
- rte_errno = ENOMEM;
- return NULL;
- }
- if (mlx5_check_mempool(mp, &start, &end) != 0) {
- DRV_LOG(ERR, "port %u mempool %p: not virtually contiguous",
- dev->data->port_id, (void *)mp);
- rte_errno = ENOMEM;
- return NULL;
- }
- DRV_LOG(DEBUG, "port %u mempool %p area start=%p end=%p size=%zu",
- dev->data->port_id, (void *)mp, (void *)start, (void *)end,
- (size_t)(end - start));
- /* Save original addresses for exact MR lookup. */
- mr->start = start;
- mr->end = end;
- /* Round start and end to page boundary if found in memory segments. */
- for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
- uintptr_t addr = (uintptr_t)ms[i].addr;
- size_t len = ms[i].len;
- unsigned int align = ms[i].hugepage_sz;
+ struct mlx5_txq_ctrl *txq_ctrl =
+ container_of(txq, struct mlx5_txq_ctrl, txq);
- if ((start > addr) && (start < addr + len))
- start = RTE_ALIGN_FLOOR(start, align);
- if ((end > addr) && (end < addr + len))
- end = RTE_ALIGN_CEIL(end, align);
- }
DRV_LOG(DEBUG,
- "port %u mempool %p using start=%p end=%p size=%zu for memory"
- " region",
- dev->data->port_id, (void *)mp, (void *)start, (void *)end,
- (size_t)(end - start));
- mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start,
- IBV_ACCESS_LOCAL_WRITE);
- if (!mr->mr) {
- rte_errno = ENOMEM;
- return NULL;
- }
- mr->mp = mp;
- mr->lkey = rte_cpu_to_be_32(mr->mr->lkey);
- rte_atomic32_inc(&mr->refcnt);
- DRV_LOG(DEBUG, "port %u new memory Region %p refcnt: %d",
- dev->data->port_id, (void *)mr, rte_atomic32_read(&mr->refcnt));
- LIST_INSERT_HEAD(&priv->mr, mr, next);
- return mr;
+ "port %u not found in txq->mr_cache[], last-hit=%u, head=%u",
+ PORT_ID(txq_ctrl->priv), txq->mr_ctrl.mru, txq->mr_ctrl.head);
+ return mlx5_mr_mb2mr_bh(ETH_DEV(txq_ctrl->priv), &txq->mr_ctrl, addr);
+}
+
+/* Called by mr_update_mempool() when iterating the memory chunks. */
+static void
+mr_update_mempool_cb(struct rte_mempool *mp __rte_unused,
+ void *opaque, struct rte_mempool_memhdr *memhdr,
+ unsigned int mem_idx __rte_unused)
+{
+ struct mr_update_mempool_data *data = opaque;
+
+ DRV_LOG(DEBUG, "port %u adding chunk[%u] of %s",
+ data->dev->data->port_id, mem_idx, mp->name);
+ data->tbl_sz =
+ mlx5_mr_update_addr(data->dev, data->lkp_tbl, data->tbl_sz,
+ (uintptr_t)memhdr->addr);
}
/**
- * Search the memory region object in the memory region list.
+ * Incrementally update LKEY lookup table for a specific Memory Pool from
+ * registered Memory Regions.
*
* @param dev
* Pointer to Ethernet device.
- * @param mp
- * Pointer to the memory pool to register.
+ * @param[out] lkp_tbl
+ * Pointer to lookup table to fill. The size of array must be at least
+ * (priv->static_mr_n + 1).
+ * @param n
+ * Size of lookup table.
+ * @param[in] mp
+ * Pointer to Memory Pool.
*
* @return
- * The memory region on success.
+ * Size of returning lookup table.
*/
-struct mlx5_mr *
-mlx5_mr_get(struct rte_eth_dev *dev, struct rte_mempool *mp)
+int
+mlx5_mr_update_mp(struct rte_eth_dev *dev, struct mlx5_mr_cache *lkp_tbl,
+ uint16_t n, struct rte_mempool *mp)
{
- struct priv *priv = dev->data->dev_private;
- struct mlx5_mr *mr;
+ struct mr_update_mempool_data data = {
+ .dev = dev,
+ .lkp_tbl = lkp_tbl,
+ .tbl_sz = n
+ };
- assert(mp);
- if (LIST_EMPTY(&priv->mr))
- return NULL;
- LIST_FOREACH(mr, &priv->mr, next) {
- if (mr->mp == mp) {
- rte_atomic32_inc(&mr->refcnt);
- DRV_LOG(DEBUG, "port %u memory region %p refcnt: %d",
- dev->data->port_id, (void *)mr,
- rte_atomic32_read(&mr->refcnt));
- return mr;
- }
- }
- return NULL;
+ rte_mempool_mem_iter(mp, mr_update_mempool_cb, &data);
+ return data.tbl_sz;
+}
+
+/* Called by qsort() to compare MR entries. */
+static int
+mr_comp_addr(const void *m1, const void *m2)
+{
+ const struct mlx5_mr *mi1 = m1;
+ const struct mlx5_mr *mi2 = m2;
+
+ if (mi1->memseg->addr < mi2->memseg->addr)
+ return -1;
+ else if (mi1->memseg->addr > mi2->memseg->addr)
+ return 1;
+ else
+ return 0;
}
/**
- * Release the memory region object.
+ * Register entire physical memory to Verbs.
*
- * @param mr
- * Pointer to memory region to release.
+ * @param dev
+ * Pointer to Ethernet device.
*
* @return
- * 1 while a reference on it exists, 0 when freed.
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
-mlx5_mr_release(struct mlx5_mr *mr)
+mlx5_mr_register_memseg(struct rte_eth_dev *dev)
{
- assert(mr);
- DRV_LOG(DEBUG, "memory region %p refcnt: %d", (void *)mr,
- rte_atomic32_read(&mr->refcnt));
- if (rte_atomic32_dec_and_test(&mr->refcnt)) {
- claim_zero(ibv_dereg_mr(mr->mr));
- LIST_REMOVE(mr, next);
- rte_free(mr);
+ struct priv *priv = dev->data->dev_private;
+ const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+ struct mlx5_mr *mr;
+ struct mlx5_mr_cache *mr_cache;
+ unsigned int i;
+
+ if (priv->mr_n != 0)
return 0;
+ /* Count the existing memsegs in the system. */
+ for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i)
+ ++priv->mr_n;
+ priv->mr = rte_calloc(__func__, priv->mr_n, sizeof(*mr), 0);
+ if (priv->mr == NULL) {
+ DRV_LOG(ERR,
+ "port %u cannot allocate memory for array of static MR",
+ dev->data->port_id);
+ rte_errno = ENOMEM;
+ return -rte_errno;
+ }
+ priv->mr_cache = rte_calloc(__func__, MR_TABLE_SZ(priv->mr_n),
+ sizeof(*mr_cache), 0);
+ if (priv->mr_cache == NULL) {
+ DRV_LOG(ERR,
+ "port %u cannot allocate memory for array of MR cache",
+ dev->data->port_id);
+ rte_free(priv->mr);
+ rte_errno = ENOMEM;
+ return -rte_errno;
}
- return 1;
+ for (i = 0; i < priv->mr_n; ++i) {
+ mr = &(*priv->mr)[i];
+ mr->memseg = &ms[i];
+ mr->ibv_mr = ibv_reg_mr(priv->pd,
+ mr->memseg->addr, mr->memseg->len,
+ IBV_ACCESS_LOCAL_WRITE);
+ if (mr->ibv_mr == NULL) {
+ rte_dump_physmem_layout(stderr);
+ DRV_LOG(ERR, "port %u cannot register memseg[%u]",
+ dev->data->port_id, i);
+ goto error;
+ }
+ }
+ /* Sort by virtual address. */
+ qsort(*priv->mr, priv->mr_n, sizeof(struct mlx5_mr), mr_comp_addr);
+ /* First entry must be NULL for comparison. */
+ (*priv->mr_cache)[0] = (struct mlx5_mr_cache) {
+ .lkey = UINT32_MAX,
+ };
+ /* Compile global all-inclusive MR cache table. */
+ for (i = 0; i < priv->mr_n; ++i) {
+ mr = &(*priv->mr)[i];
+ mr_cache = &(*priv->mr_cache)[i + 1];
+ /* Paranoid, mr[] must be sorted. */
+ assert(i == 0 || mr->memseg->addr > (mr - 1)->memseg->addr);
+ *mr_cache = (struct mlx5_mr_cache) {
+ .start = (uintptr_t)mr->memseg->addr,
+ .end = (uintptr_t)mr->memseg->addr + mr->memseg->len,
+ .lkey = rte_cpu_to_be_32(mr->ibv_mr->lkey)
+ };
+ }
+ return 0;
+error:
+ for (i = 0; i < priv->mr_n; ++i) {
+ mr = &(*priv->mr)[i];
+ if (mr->ibv_mr != NULL)
+ ibv_dereg_mr(mr->ibv_mr);
+ }
+ rte_free(priv->mr);
+ rte_free(priv->mr_cache);
+ rte_errno = ENOMEM;
+ return -rte_errno;
}
/**
- * Verify the flow list is empty
+ * Deregister all Memory Regions.
*
* @param dev
* Pointer to Ethernet device.
- *
- * @return
- * The number of object not released.
*/
-int
-mlx5_mr_verify(struct rte_eth_dev *dev)
+void
+mlx5_mr_deregister_memseg(struct rte_eth_dev *dev)
{
struct priv *priv = dev->data->dev_private;
- int ret = 0;
- struct mlx5_mr *mr;
+ unsigned int i;
+
+ if (priv->mr_n == 0)
+ return;
+ for (i = 0; i < priv->mr_n; ++i) {
+ struct mlx5_mr *mr;
- LIST_FOREACH(mr, &priv->mr, next) {
- DRV_LOG(DEBUG, "port %u memory region %p still referenced",
- dev->data->port_id, (void *)mr);
- ++ret;
+ mr = &(*priv->mr)[i];
+ /* Physical memory can't be changed dynamically. */
+ assert(mr->memseg != NULL);
+ assert(mr->ibv_mr != NULL);
+ ibv_dereg_mr(mr->ibv_mr);
}
- return ret;
+ rte_free(priv->mr);
+ rte_free(priv->mr_cache);
+ priv->mr = NULL;
+ priv->mr_cache = NULL;
+ priv->mr_n = 0;
}
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index dcc5a87b..7161825a 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -595,16 +595,6 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
goto error;
}
tmpl->rxq_ctrl = rxq_ctrl;
- /* Use the entire RX mempool as the memory region. */
- tmpl->mr = mlx5_mr_get(dev, rxq_data->mp);
- if (!tmpl->mr) {
- tmpl->mr = mlx5_mr_new(dev, rxq_data->mp);
- if (!tmpl->mr) {
- DRV_LOG(ERR, "port %u: memeroy region creation failure",
- dev->data->port_id);
- goto error;
- }
- }
if (rxq_ctrl->irq) {
tmpl->channel = ibv_create_comp_channel(priv->ctx);
if (!tmpl->channel) {
@@ -737,14 +727,14 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
struct rte_mbuf *buf = (*rxq_data->elts)[i];
volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
+ uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
/* scat->addr must be able to store a pointer. */
assert(sizeof(scat->addr) >= sizeof(uintptr_t));
*scat = (struct mlx5_wqe_data_seg){
- .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
- uintptr_t)),
+ .addr = rte_cpu_to_be_64(addr),
.byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
- .lkey = tmpl->mr->lkey,
+ .lkey = mlx5_rx_mb2mr(rxq_data, buf)
};
}
rxq_data->rq_db = rwq.dbrec;
@@ -780,8 +770,6 @@ error:
claim_zero(ibv_destroy_cq(tmpl->cq));
if (tmpl->channel)
claim_zero(ibv_destroy_comp_channel(tmpl->channel));
- if (tmpl->mr)
- mlx5_mr_release(tmpl->mr);
priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE;
rte_errno = ret; /* Restore rte_errno. */
return NULL;
@@ -811,7 +799,6 @@ mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx)
return NULL;
rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
if (rxq_ctrl->ibv) {
- mlx5_mr_get(dev, rxq_data->mp);
rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
DRV_LOG(DEBUG, "port %u Verbs Rx queue %u: refcnt %d",
dev->data->port_id, rxq_ctrl->idx,
@@ -832,15 +819,9 @@ mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx)
int
mlx5_rxq_ibv_release(struct mlx5_rxq_ibv *rxq_ibv)
{
- int ret;
-
assert(rxq_ibv);
assert(rxq_ibv->wq);
assert(rxq_ibv->cq);
- assert(rxq_ibv->mr);
- ret = mlx5_mr_release(rxq_ibv->mr);
- if (!ret)
- rxq_ibv->mr = NULL;
DRV_LOG(DEBUG, "port %u Verbs Rx queue %u: refcnt %d",
PORT_ID(rxq_ibv->rxq_ctrl->priv),
rxq_ibv->rxq_ctrl->idx, rte_atomic32_read(&rxq_ibv->refcnt));
@@ -918,10 +899,12 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
const uint16_t desc_n =
desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
+ const unsigned int mr_n = MR_TABLE_SZ(priv->mr_n);
tmpl = rte_calloc_socket("RXQ", 1,
sizeof(*tmpl) +
- desc_n * sizeof(struct rte_mbuf *),
+ desc_n * sizeof(struct rte_mbuf *) +
+ mr_n * sizeof(struct mlx5_mr_cache),
0, socket);
if (!tmpl) {
rte_errno = ENOMEM;
@@ -1019,8 +1002,17 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
tmpl->rxq.mp = mp;
tmpl->rxq.stats.idx = idx;
tmpl->rxq.elts_n = log2above(desc);
+ tmpl->rxq.rq_repl_thresh =
+ MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
tmpl->rxq.elts =
(struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
+ tmpl->rxq.mr_ctrl.cache_bh =
+ (struct mlx5_mr_cache (*)[mr_n])&(*tmpl->rxq.elts)[desc_n];
+ tmpl->rxq.mr_ctrl.bh_n =
+ mlx5_mr_update_mp(dev, *tmpl->rxq.mr_ctrl.cache_bh,
+ tmpl->rxq.mr_ctrl.bh_n, mp);
+ DRV_LOG(DEBUG, "Rx MR lookup table: %u entires built",
+ MR_N(tmpl->rxq.mr_ctrl.bh_n));
tmpl->idx = idx;
rte_atomic32_inc(&tmpl->refcnt);
DRV_LOG(DEBUG, "port %u Rx queue %u: refcnt %d", dev->data->port_id,
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 1bbce3b7..d95c4bff 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1920,6 +1920,9 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
* changes.
*/
wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t));
+ /* If there's only one MR, no need to replace LKEY in WQEs. */
+ if (unlikely(!IS_SINGLE_MR(rxq->mr_ctrl.bh_n)))
+ wqe->lkey = mlx5_rx_mb2mr(rxq, rep);
if (len > DATA_LEN(seg)) {
len -= DATA_LEN(seg);
++NB_SEGS(pkt);
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index dac3b39f..7e811c10 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -82,17 +82,37 @@ struct mlx5_txq_stats {
struct priv;
-/* Memory region queue object. */
+/* Memory Region object. */
struct mlx5_mr {
- LIST_ENTRY(mlx5_mr) next; /**< Pointer to the next element. */
- rte_atomic32_t refcnt; /*<< Reference counter. */
- uint32_t lkey; /*<< rte_cpu_to_be_32(mr->lkey) */
- uintptr_t start; /* Start address of MR */
- uintptr_t end; /* End address of MR */
- struct ibv_mr *mr; /*<< Memory Region. */
- struct rte_mempool *mp; /*<< Memory Pool. */
+ const struct rte_memseg *memseg;
+ struct ibv_mr *ibv_mr; /* Verbs Memory Region. */
};
+/* Cache entry for Memory Region. */
+struct mlx5_mr_cache {
+ uintptr_t start; /* Start address of MR. */
+ uintptr_t end; /* End address of MR. */
+ uint32_t lkey; /* rte_cpu_to_be_32(ibv_mr->lkey). */
+} __rte_packed;
+
+/* Per-queue MR control descriptor. */
+struct mlx5_mr_ctrl {
+ uint16_t bh_n; /* Size of MR cache table for bottom-half. */
+ uint16_t mru; /* Index of last hit entry. */
+ uint16_t head; /* Index of the oldest entry. */
+ struct mlx5_mr_cache cache[MLX5_MR_CACHE_N]; /* MR cache. */
+ struct mlx5_mr_cache (*cache_bh)[]; /* MR cache for bottom-half. */
+} __rte_packed;
+
+/* MR table size including padding at index 0. */
+#define MR_TABLE_SZ(n) ((n) + MLX5_MR_LOOKUP_TABLE_PAD)
+
+/* Actual table size excluding padding at index 0. */
+#define MR_N(n) ((n) - MLX5_MR_LOOKUP_TABLE_PAD)
+
+/* Whether there's only one entry in MR lookup table. */
+#define IS_SINGLE_MR(n) (MR_N(n) <= 1)
+
/* Compressed CQE context. */
struct rxq_zip {
uint16_t ai; /* Array index. */
@@ -118,9 +138,11 @@ struct mlx5_rxq_data {
volatile uint32_t *rq_db;
volatile uint32_t *cq_db;
uint16_t port_id;
- uint16_t rq_ci;
- uint16_t rq_pi;
- uint16_t cq_ci;
+ uint32_t rq_ci;
+ uint32_t rq_pi;
+ uint32_t cq_ci;
+ uint16_t rq_repl_thresh; /* Threshold for buffer replenishment. */
+ struct mlx5_mr_ctrl mr_ctrl;
volatile struct mlx5_wqe_data_seg(*wqes)[];
volatile struct mlx5_cqe(*cqes)[];
struct rxq_zip zip; /* Compressed context. */
@@ -142,7 +164,6 @@ struct mlx5_rxq_ibv {
struct ibv_cq *cq; /* Completion Queue. */
struct ibv_wq *wq; /* Work Queue. */
struct ibv_comp_channel *channel;
- struct mlx5_mr *mr; /* Memory Region (for mp). */
};
/* RX queue control descriptor. */
@@ -200,15 +221,14 @@ struct mlx5_txq_data {
uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
uint16_t inline_max_packet_sz; /* Max packet size for inlining. */
- uint16_t mr_cache_idx; /* Index of last hit entry. */
uint32_t qp_num_8s; /* QP number shifted by 8. */
uint32_t flags; /* Flags for Tx Queue. */
+ struct mlx5_mr_ctrl mr_ctrl;
volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
volatile void *wqes; /* Work queue (use volatile to write into). */
volatile uint32_t *qp_db; /* Work queue doorbell. */
volatile uint32_t *cq_db; /* Completion queue doorbell. */
volatile void *bf_reg; /* Blueflame register remapped. */
- struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */
struct rte_mbuf *(*elts)[]; /* TX elements. */
struct mlx5_txq_stats stats; /* TX queue counters. */
} __rte_cache_aligned;
@@ -337,9 +357,10 @@ uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
/* mlx5_mr.c */
-void mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg);
-struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq,
- struct rte_mempool *mp, unsigned int idx);
+int mlx5_mr_update_mp(struct rte_eth_dev *dev, struct mlx5_mr_cache *lkp_tbl,
+ uint16_t n, struct rte_mempool *mp);
+uint32_t mlx5_rx_mb2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr);
+uint32_t mlx5_tx_mb2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr);
#ifndef NDEBUG
/**
@@ -527,77 +548,102 @@ mlx5_tx_complete(struct mlx5_txq_data *txq)
}
/**
- * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which
- * the cloned mbuf is allocated is returned instead.
+ * Look up LKEY from given lookup table by linear search. Firstly look up the
+ * last-hit entry. If miss, the entire array is searched. If found, update the
+ * last-hit index and return LKEY.
*
- * @param buf
- * Pointer to mbuf.
+ * @param lkp_tbl
+ * Pointer to lookup table.
+ * @param[in,out] cached_idx
+ * Pointer to last-hit index.
+ * @param n
+ * Size of lookup table.
+ * @param addr
+ * Search key.
*
* @return
- * Memory pool where data is located for given mbuf.
+ * Searched LKEY on success, UINT32_MAX on no match.
*/
-static struct rte_mempool *
-mlx5_tx_mb2mp(struct rte_mbuf *buf)
+static __rte_always_inline uint32_t
+mlx5_mr_lookup_cache(struct mlx5_mr_cache *lkp_tbl, uint16_t *cached_idx,
+ uint16_t n, uintptr_t addr)
{
- if (unlikely(RTE_MBUF_INDIRECT(buf)))
- return rte_mbuf_from_indirect(buf)->pool;
- return buf->pool;
+ uint16_t idx;
+
+ if (likely(addr >= lkp_tbl[*cached_idx].start &&
+ addr < lkp_tbl[*cached_idx].end))
+ return lkp_tbl[*cached_idx].lkey;
+ for (idx = 0; idx < n && lkp_tbl[idx].start != 0; ++idx) {
+ if (addr >= lkp_tbl[idx].start &&
+ addr < lkp_tbl[idx].end) {
+ /* Found. */
+ *cached_idx = idx;
+ return lkp_tbl[idx].lkey;
+ }
+ }
+ return UINT32_MAX;
}
/**
- * Get Memory Region (MR) <-> rte_mbuf association from txq->mp2mr[].
- * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
- * remove an entry first.
+ * Query LKEY from address for Rx.
+ *
+ * @param rxq
+ * Pointer to Rx queue structure.
+ * @param addr
+ * Address to search.
+ *
+ * @return
+ * LKEY on success.
+ */
+static __rte_always_inline uint32_t
+mlx5_rx_addr2mr(struct mlx5_rxq_data *rxq, uintptr_t addr)
+{
+ uint32_t lkey;
+
+ /* Linear search on MR cache array. */
+ lkey = mlx5_mr_lookup_cache(rxq->mr_ctrl.cache,
+ &rxq->mr_ctrl.mru,
+ MLX5_MR_CACHE_N, addr);
+ if (likely(lkey != UINT32_MAX))
+ return lkey;
+ DEBUG("No found in rxq->mr_cache[], last-hit = %u, head = %u)",
+ rxq->mr_ctrl.mru, rxq->mr_ctrl.head);
+ /* Take slower bottom-half (binary search) on miss. */
+ return mlx5_rx_mb2mr_bh(rxq, addr);
+}
+
+#define mlx5_rx_mb2mr(rxq, mb) mlx5_rx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr))
+
+/**
+ * Query LKEY from address for Tx.
*
* @param txq
- * Pointer to TX queue structure.
- * @param[in] mp
- * Memory Pool for which a Memory Region lkey must be returned.
+ * Pointer to Tx queue structure.
+ * @param addr
+ * Address to search.
*
* @return
- * mr->lkey on success, (uint32_t)-1 on failure.
+ * LKEY on success.
*/
static __rte_always_inline uint32_t
-mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
+mlx5_tx_addr2mr(struct mlx5_txq_data *txq, uintptr_t addr)
{
- uint16_t i = txq->mr_cache_idx;
- uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
- struct mlx5_mr *mr;
-
- assert(i < RTE_DIM(txq->mp2mr));
- if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end > addr))
- return txq->mp2mr[i]->lkey;
- for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
- if (unlikely(txq->mp2mr[i] == NULL ||
- txq->mp2mr[i]->mr == NULL)) {
- /* Unknown MP, add a new MR for it. */
- break;
- }
- if (txq->mp2mr[i]->start <= addr &&
- txq->mp2mr[i]->end > addr) {
- assert(txq->mp2mr[i]->lkey != (uint32_t)-1);
- txq->mr_cache_idx = i;
- return txq->mp2mr[i]->lkey;
- }
- }
- mr = mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
- /*
- * Request the reference to use in this queue, the original one is
- * kept by the control plane.
- */
- if (mr) {
- rte_atomic32_inc(&mr->refcnt);
- txq->mr_cache_idx = i >= RTE_DIM(txq->mp2mr) ? i - 1 : i;
- return mr->lkey;
- } else {
- struct rte_mempool *mp = mlx5_tx_mb2mp(mb);
-
- DRV_LOG(WARNING, "failed to register mempool 0x%p(%s)",
- (void *)mp, mp->name);
- }
- return (uint32_t)-1;
+ uint32_t lkey;
+
+ /* Linear search on MR cache array. */
+ lkey = mlx5_mr_lookup_cache(txq->mr_ctrl.cache,
+ &txq->mr_ctrl.mru,
+ MLX5_MR_CACHE_N, addr);
+ if (likely(lkey != UINT32_MAX))
+ return lkey;
+ DEBUG("No found in txq->mr_cache[], last-hit = %u, head = %u)",
+ txq->mr_ctrl.mru, txq->mr_ctrl.head);
+ /* Take slower bottom-half (binary search) on miss. */
+ return mlx5_tx_mb2mr_bh(txq, addr);
}
+#define mlx5_tx_mb2mr(rxq, mb) mlx5_tx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr))
+
/**
* Ring TX queue doorbell and flush the update if requested.
*
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index 982b8f1f..12465b43 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -316,7 +316,7 @@ mlx5_check_vec_tx_support(struct rte_eth_dev *dev)
struct priv *priv = dev->data->dev_private;
if (!priv->tx_vec_en ||
- priv->txqs_n > MLX5_VPMD_MIN_TXQS ||
+ priv->txqs_n > (unsigned int)priv->txqs_vec ||
priv->mps != MLX5_MPW_ENHANCED ||
priv->tso)
return -ENOTSUP;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.h b/drivers/net/mlx5/mlx5_rxtx_vec.h
index d504e2ae..750559b8 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.h
@@ -115,9 +115,13 @@ mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n)
rxq->stats.rx_nombuf += n;
return;
}
- for (i = 0; i < n; ++i)
+ for (i = 0; i < n; ++i) {
wq[i].addr = rte_cpu_to_be_64((uintptr_t)elts[i]->buf_addr +
RTE_PKTMBUF_HEADROOM);
+ /* If there's only one MR, no need to replace LKEY in WQEs. */
+ if (unlikely(!IS_SINGLE_MR(rxq->mr_ctrl.bh_n)))
+ wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
+ }
rxq->rq_ci += n;
/* Prevent overflowing into consumed mbufs. */
elts_idx = rxq->rq_ci & q_mask;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index e748615e..ae37c2bd 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -756,7 +756,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
* N - (rq_ci - rq_pi) := # of buffers consumed (to be replenished).
*/
repl_n = q_n - (rxq->rq_ci - rxq->rq_pi);
- if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n))
+ if (repl_n >= rxq->rq_repl_thresh)
mlx5_rx_replenish_bulk_mbuf(rxq, repl_n);
/* See if there're unreturned mbufs from compressed CQE. */
rcvd_pkt = rxq->cq_ci - rxq->rq_pi;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index 7e8c9b88..866a5e9b 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -737,7 +737,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n,
* N - (rq_ci - rq_pi) := # of buffers consumed (to be replenished).
*/
repl_n = q_n - (rxq->rq_ci - rxq->rq_pi);
- if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n))
+ if (repl_n >= rxq->rq_repl_thresh)
mlx5_rx_replenish_bulk_mbuf(rxq, repl_n);
/* See if there're unreturned mbufs from compressed CQE. */
rcvd_pkt = rxq->cq_ci - rxq->rq_pi;
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 345ed707..e880d24c 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -356,10 +356,11 @@ int
mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
{
struct priv *priv = dev->data->dev_private;
- struct rte_eth_stats tmp = {0};
+ struct rte_eth_stats tmp;
unsigned int i;
unsigned int idx;
+ memset(&tmp, 0, sizeof(tmp));
/* Add software counters. */
for (i = 0; (i != priv->rxqs_n); ++i) {
struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 9a1d6f95..e6a29cb7 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -74,17 +74,10 @@ mlx5_txq_start(struct rte_eth_dev *dev)
int ret;
for (i = 0; i != priv->txqs_n; ++i) {
- unsigned int idx = 0;
- struct mlx5_mr *mr;
struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i);
if (!txq_ctrl)
continue;
- LIST_FOREACH(mr, &priv->mr, next) {
- mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mr->mp, idx++);
- if (idx == MLX5_PMD_TX_MP_CACHE)
- break;
- }
txq_alloc_elts(txq_ctrl);
txq_ctrl->ibv = mlx5_txq_ibv_new(dev, i);
if (!txq_ctrl->ibv) {
@@ -177,7 +170,6 @@ int
mlx5_dev_start(struct rte_eth_dev *dev)
{
struct priv *priv = dev->data->dev_private;
- struct mlx5_mr *mr = NULL;
int ret;
DRV_LOG(DEBUG, "port %u starting device", dev->data->port_id);
@@ -187,7 +179,6 @@ mlx5_dev_start(struct rte_eth_dev *dev)
dev->data->port_id, strerror(rte_errno));
goto error;
}
- rte_mempool_walk(mlx5_mp2mr_iter, priv);
ret = mlx5_txq_start(dev);
if (ret) {
DRV_LOG(ERR, "port %u Tx queue allocation failed: %s",
@@ -229,8 +220,6 @@ error:
ret = rte_errno; /* Save rte_errno before cleanup. */
/* Rollback. */
dev->data->dev_started = 0;
- for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
- mlx5_mr_release(mr);
mlx5_flow_stop(dev, &priv->flows);
mlx5_traffic_disable(dev);
mlx5_txq_stop(dev);
@@ -252,7 +241,6 @@ void
mlx5_dev_stop(struct rte_eth_dev *dev)
{
struct priv *priv = dev->data->dev_private;
- struct mlx5_mr *mr;
dev->data->dev_started = 0;
/* Prevent crashes when queues are still in use. */
@@ -267,8 +255,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev)
mlx5_dev_interrupt_handler_uninstall(dev);
mlx5_txq_stop(dev);
mlx5_rxq_stop(dev);
- for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
- mlx5_mr_release(mr);
mlx5_flow_delete_drop_queue(dev);
}
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 760ac92d..2ead2177 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -339,7 +339,6 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
return NULL;
}
memset(&tmpl, 0, sizeof(struct mlx5_txq_ibv));
- /* MRs will be registered in mp2mr[] later. */
attr.cq = (struct ibv_cq_init_attr_ex){
.comp_mask = 0,
};
@@ -622,10 +621,12 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
((MLX5_MAX_TSO_HEADER + (RTE_CACHE_LINE_SIZE - 1)) /
RTE_CACHE_LINE_SIZE);
struct mlx5_txq_ctrl *tmpl;
+ const unsigned int mr_n = MR_TABLE_SZ(priv->mr_n);
tmpl = rte_calloc_socket("TXQ", 1,
sizeof(*tmpl) +
- desc * sizeof(struct rte_mbuf *),
+ desc * sizeof(struct rte_mbuf *) +
+ mr_n * sizeof(struct mlx5_mr_cache),
0, socket);
if (!tmpl) {
rte_errno = ENOMEM;
@@ -639,7 +640,6 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
tmpl->idx = idx;
if (priv->mps == MLX5_MPW_ENHANCED)
tmpl->txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
- /* MRs will be registered in mp2mr[] later. */
DRV_LOG(DEBUG, "port %u priv->device_attr.max_qp_wr is %d",
dev->data->port_id, priv->device_attr.orig_attr.max_qp_wr);
DRV_LOG(DEBUG, "port %u priv->device_attr.max_sge is %d",
@@ -700,6 +700,9 @@ mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
tmpl->txq.tunnel_en = 1;
tmpl->txq.elts =
(struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1);
+ tmpl->txq.mr_ctrl.cache_bh =
+ (struct mlx5_mr_cache (*)[mr_n])
+ &(*tmpl->txq.elts)[1 << tmpl->txq.elts_n];
tmpl->txq.stats.idx = idx;
rte_atomic32_inc(&tmpl->refcnt);
DRV_LOG(DEBUG, "port %u Tx queue %u: refcnt %d", dev->data->port_id,
@@ -728,15 +731,8 @@ mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx)
if ((*priv->txqs)[idx]) {
ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl,
txq);
- unsigned int i;
mlx5_txq_ibv_get(dev, idx);
- for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
- if (ctrl->txq.mp2mr[i])
- claim_nonzero
- (mlx5_mr_get(dev,
- ctrl->txq.mp2mr[i]->mp));
- }
rte_atomic32_inc(&ctrl->refcnt);
DRV_LOG(DEBUG, "port %u Tx queue %u refcnt %d",
dev->data->port_id,
@@ -760,7 +756,6 @@ int
mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
{
struct priv *priv = dev->data->dev_private;
- unsigned int i;
struct mlx5_txq_ctrl *txq;
size_t page_size = sysconf(_SC_PAGESIZE);
@@ -771,12 +766,6 @@ mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx)
txq->idx, rte_atomic32_read(&txq->refcnt));
if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv))
txq->ibv = NULL;
- for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
- if (txq->txq.mp2mr[i]) {
- mlx5_mr_release(txq->txq.mp2mr[i]);
- txq->txq.mp2mr[i] = NULL;
- }
- }
if (priv->uar_base)
munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg,
page_size), page_size);