aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/mlx4/mlx4_mr.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/mlx4/mlx4_mr.c')
-rw-r--r--drivers/net/mlx4/mlx4_mr.c293
1 files changed, 293 insertions, 0 deletions
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
new file mode 100644
index 00000000..2a3e2695
--- /dev/null
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -0,0 +1,293 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @file
+ * Memory management functions for mlx4 driver.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Verbs headers do not support -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_mempool.h>
+#include <rte_spinlock.h>
+
+#include "mlx4_rxtx.h"
+#include "mlx4_utils.h"
+
+struct mlx4_check_mempool_data {
+ int ret;
+ char *start;
+ char *end;
+};
+
+/**
+ * Called by mlx4_check_mempool() when iterating the memory chunks.
+ *
+ * @param[in] mp
+ * Pointer to memory pool (unused).
+ * @param[in, out] data
+ * Pointer to shared buffer with mlx4_check_mempool().
+ * @param[in] memhdr
+ * Pointer to mempool chunk header.
+ * @param mem_idx
+ * Mempool element index (unused).
+ */
+static void
+mlx4_check_mempool_cb(struct rte_mempool *mp, void *opaque,
+ struct rte_mempool_memhdr *memhdr,
+ unsigned int mem_idx)
+{
+ struct mlx4_check_mempool_data *data = opaque;
+
+ (void)mp;
+ (void)mem_idx;
+ /* It already failed, skip the next chunks. */
+ if (data->ret != 0)
+ return;
+ /* It is the first chunk. */
+ if (data->start == NULL && data->end == NULL) {
+ data->start = memhdr->addr;
+ data->end = data->start + memhdr->len;
+ return;
+ }
+ if (data->end == memhdr->addr) {
+ data->end += memhdr->len;
+ return;
+ }
+ if (data->start == (char *)memhdr->addr + memhdr->len) {
+ data->start -= memhdr->len;
+ return;
+ }
+ /* Error, mempool is not virtually contiguous. */
+ data->ret = -1;
+}
+
+/**
+ * Check if a mempool can be used: it must be virtually contiguous.
+ *
+ * @param[in] mp
+ * Pointer to memory pool.
+ * @param[out] start
+ * Pointer to the start address of the mempool virtual memory area.
+ * @param[out] end
+ * Pointer to the end address of the mempool virtual memory area.
+ *
+ * @return
+ * 0 on success (mempool is virtually contiguous), -1 on error.
+ */
+static int
+mlx4_check_mempool(struct rte_mempool *mp, uintptr_t *start, uintptr_t *end)
+{
+ struct mlx4_check_mempool_data data;
+
+ memset(&data, 0, sizeof(data));
+ rte_mempool_mem_iter(mp, mlx4_check_mempool_cb, &data);
+ *start = (uintptr_t)data.start;
+ *end = (uintptr_t)data.end;
+ return data.ret;
+}
+
+/**
+ * Obtain a memory region from a memory pool.
+ *
+ * If a matching memory region already exists, it is returned with its
+ * reference count incremented, otherwise a new one is registered.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param mp
+ * Pointer to memory pool.
+ *
+ * @return
+ * Memory region pointer, NULL in case of error and rte_errno is set.
+ */
+struct mlx4_mr *
+mlx4_mr_get(struct priv *priv, struct rte_mempool *mp)
+{
+ const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+ uintptr_t start;
+ uintptr_t end;
+ unsigned int i;
+ struct mlx4_mr *mr;
+
+ if (mlx4_check_mempool(mp, &start, &end) != 0) {
+ rte_errno = EINVAL;
+ ERROR("mempool %p: not virtually contiguous",
+ (void *)mp);
+ return NULL;
+ }
+ DEBUG("mempool %p area start=%p end=%p size=%zu",
+ (void *)mp, (void *)start, (void *)end,
+ (size_t)(end - start));
+ /* Round start and end to page boundary if found in memory segments. */
+ for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
+ uintptr_t addr = (uintptr_t)ms[i].addr;
+ size_t len = ms[i].len;
+ unsigned int align = ms[i].hugepage_sz;
+
+ if ((start > addr) && (start < addr + len))
+ start = RTE_ALIGN_FLOOR(start, align);
+ if ((end > addr) && (end < addr + len))
+ end = RTE_ALIGN_CEIL(end, align);
+ }
+ DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
+ (void *)mp, (void *)start, (void *)end,
+ (size_t)(end - start));
+ rte_spinlock_lock(&priv->mr_lock);
+ LIST_FOREACH(mr, &priv->mr, next)
+ if (mp == mr->mp && start >= mr->start && end <= mr->end)
+ break;
+ if (mr) {
+ ++mr->refcnt;
+ goto release;
+ }
+ mr = rte_malloc(__func__, sizeof(*mr), 0);
+ if (!mr) {
+ rte_errno = ENOMEM;
+ goto release;
+ }
+ *mr = (struct mlx4_mr){
+ .start = start,
+ .end = end,
+ .refcnt = 1,
+ .priv = priv,
+ .mr = ibv_reg_mr(priv->pd, (void *)start, end - start,
+ IBV_ACCESS_LOCAL_WRITE),
+ .mp = mp,
+ };
+ if (mr->mr) {
+ mr->lkey = mr->mr->lkey;
+ LIST_INSERT_HEAD(&priv->mr, mr, next);
+ } else {
+ rte_free(mr);
+ mr = NULL;
+ rte_errno = errno ? errno : EINVAL;
+ }
+release:
+ rte_spinlock_unlock(&priv->mr_lock);
+ return mr;
+}
+
+/**
+ * Release a memory region.
+ *
+ * This function decrements its reference count and destroys it after
+ * reaching 0.
+ *
+ * Note to avoid race conditions given this function may be used from the
+ * data plane, it's extremely important that each user holds its own
+ * reference.
+ *
+ * @param mr
+ * Memory region to release.
+ */
+void
+mlx4_mr_put(struct mlx4_mr *mr)
+{
+ struct priv *priv = mr->priv;
+
+ rte_spinlock_lock(&priv->mr_lock);
+ assert(mr->refcnt);
+ if (--mr->refcnt)
+ goto release;
+ LIST_REMOVE(mr, next);
+ claim_zero(ibv_dereg_mr(mr->mr));
+ rte_free(mr);
+release:
+ rte_spinlock_unlock(&priv->mr_lock);
+}
+
+/**
+ * Add memory region (MR) <-> memory pool (MP) association to txq->mp2mr[].
+ * If mp2mr[] is full, remove an entry first.
+ *
+ * @param txq
+ * Pointer to Tx queue structure.
+ * @param[in] mp
+ * Memory pool for which a memory region lkey must be added.
+ * @param[in] i
+ * Index in memory pool (MP) where to add memory region (MR).
+ *
+ * @return
+ * Added mr->lkey on success, (uint32_t)-1 on failure.
+ */
+uint32_t
+mlx4_txq_add_mr(struct txq *txq, struct rte_mempool *mp, uint32_t i)
+{
+ struct mlx4_mr *mr;
+
+ /* Add a new entry, register MR first. */
+ DEBUG("%p: discovered new memory pool \"%s\" (%p)",
+ (void *)txq, mp->name, (void *)mp);
+ mr = mlx4_mr_get(txq->priv, mp);
+ if (unlikely(mr == NULL)) {
+ DEBUG("%p: unable to configure MR, mlx4_mr_get() failed",
+ (void *)txq);
+ return (uint32_t)-1;
+ }
+ if (unlikely(i == RTE_DIM(txq->mp2mr))) {
+ /* Table is full, remove oldest entry. */
+ DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
+ (void *)txq);
+ --i;
+ mlx4_mr_put(txq->mp2mr[0].mr);
+ memmove(&txq->mp2mr[0], &txq->mp2mr[1],
+ (sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
+ }
+ /* Store the new entry. */
+ txq->mp2mr[i].mp = mp;
+ txq->mp2mr[i].mr = mr;
+ txq->mp2mr[i].lkey = mr->lkey;
+ DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
+ (void *)txq, mp->name, (void *)mp, txq->mp2mr[i].lkey);
+ return txq->mp2mr[i].lkey;
+}