summaryrefslogtreecommitdiffstats
path: root/src/plugins/rdma/device.c
diff options
context:
space:
mode:
authorBenoît Ganne <bganne@cisco.com>2019-12-16 10:42:25 +0100
committerDamjan Marion <dmarion@me.com>2020-03-16 19:09:39 +0000
commitdc812d9a71f2f5105e4aaba50fd98ea3b0b50a9b (patch)
tree0a6db468c41b164e1a71bbebc613e6b469c617c3 /src/plugins/rdma/device.c
parentdd648aac0615c416507de9097b6f50db16ad319c (diff)
rdma: introduce direct verb for Cx4/5 tx
Direct Verb allows for direct access to NIC HW rx/tx rings. This patch introduce TX direct verb support for Mellanox ConnectX-4/5 adapters. 'dv' mode must be explicitely selected at interface creation to benefit from this. Type: feature Change-Id: If830ba9f33db73299acdbddc68b5c09eaf6add98 Signed-off-by: Benoît Ganne <bganne@cisco.com>
Diffstat (limited to 'src/plugins/rdma/device.c')
-rw-r--r--src/plugins/rdma/device.c85
1 files changed, 66 insertions, 19 deletions
diff --git a/src/plugins/rdma/device.c b/src/plugins/rdma/device.c
index f33d55c85a0..eb13f855b1a 100644
--- a/src/plugins/rdma/device.c
+++ b/src/plugins/rdma/device.c
@@ -535,7 +535,8 @@ rdma_txq_init (vlib_main_t * vm, rdma_device_t * rd, u16 qid, u32 n_desc)
vec_validate_aligned (rd->txqs, qid, CLIB_CACHE_LINE_BYTES);
txq = vec_elt_at_index (rd->txqs, qid);
- txq->size = n_desc;
+ ASSERT (is_pow2 (n_desc));
+ txq->bufs_log2sz = min_log2 (n_desc);
vec_validate_aligned (txq->bufs, n_desc - 1, CLIB_CACHE_LINE_BYTES);
if ((txq->cq = ibv_create_cq (rd->ctx, n_desc, NULL, NULL, 0)) == 0)
@@ -569,6 +570,57 @@ rdma_txq_init (vlib_main_t * vm, rdma_device_t * rd, u16 qid, u32 n_desc)
qpa.qp_state = IBV_QPS_RTS;
if (ibv_modify_qp (txq->qp, &qpa, qp_flags) != 0)
return clib_error_return_unix (0, "Modify QP (send) Failed");
+
+ txq->ibv_cq = txq->cq;
+ txq->ibv_qp = txq->qp;
+
+ if (rd->flags & RDMA_DEVICE_F_MLX5DV)
+ {
+ rdma_mlx5_wqe_t *tmpl = (void *) txq->dv_wqe_tmpl;
+ struct mlx5dv_cq dv_cq;
+ struct mlx5dv_qp dv_qp;
+ struct mlx5dv_obj obj = { };
+
+ obj.cq.in = txq->cq;
+ obj.cq.out = &dv_cq;
+ obj.qp.in = txq->qp;
+ obj.qp.out = &dv_qp;
+
+ if (mlx5dv_init_obj (&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP))
+ return clib_error_return_unix (0, "DV init obj failed");
+
+ if (RDMA_TXQ_BUF_SZ (txq) > dv_qp.sq.wqe_cnt
+ || !is_pow2 (dv_qp.sq.wqe_cnt)
+ || sizeof (rdma_mlx5_wqe_t) != dv_qp.sq.stride
+ || (uword) dv_qp.sq.buf % sizeof (rdma_mlx5_wqe_t))
+ return clib_error_return (0, "Unsupported DV SQ parameters");
+
+ if (RDMA_TXQ_BUF_SZ (txq) > dv_cq.cqe_cnt
+ || !is_pow2 (dv_cq.cqe_cnt)
+ || sizeof (struct mlx5_cqe64) != dv_cq.cqe_size
+ || (uword) dv_cq.buf % sizeof (struct mlx5_cqe64))
+ return clib_error_return (0, "Unsupported DV CQ parameters");
+
+ /* get SQ and doorbell addresses */
+ txq->dv_sq_wqes = dv_qp.sq.buf;
+ txq->dv_sq_dbrec = dv_qp.dbrec;
+ txq->dv_sq_db = dv_qp.bf.reg;
+ txq->dv_sq_log2sz = min_log2 (dv_qp.sq.wqe_cnt);
+
+ /* get CQ and doorbell addresses */
+ txq->dv_cq_cqes = dv_cq.buf;
+ txq->dv_cq_dbrec = dv_cq.dbrec;
+ txq->dv_cq_log2sz = min_log2 (dv_cq.cqe_cnt);
+
+ /* init tx desc template */
+ STATIC_ASSERT_SIZEOF (txq->dv_wqe_tmpl, sizeof (*tmpl));
+ mlx5dv_set_ctrl_seg (&tmpl->ctrl, 0, MLX5_OPCODE_SEND, 0,
+ txq->qp->qp_num, 0, RDMA_MLX5_WQE_DS, 0,
+ RDMA_TXQ_DV_INVALID_ID);
+ /* FIXME: mlx5dv_set_eth_seg(&tmpl->eseg, MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM, 0, 0, 0); */
+ mlx5dv_set_data_seg (&tmpl->dseg, 0, rd->lkey, 0);
+ }
+
return 0;
}
@@ -587,6 +639,13 @@ rdma_dev_init (vlib_main_t * vm, rdma_device_t * rd, u32 rxq_size,
if ((rd->pd = ibv_alloc_pd (rd->ctx)) == 0)
return clib_error_return_unix (0, "PD Alloc Failed");
+ if ((rd->mr = ibv_reg_mr (rd->pd, (void *) bm->buffer_mem_start,
+ bm->buffer_mem_size,
+ IBV_ACCESS_LOCAL_WRITE)) == 0)
+ return clib_error_return_unix (0, "Register MR Failed");
+
+ rd->lkey = rd->mr->lkey; /* avoid indirection in datapath */
+
ethernet_mac_address_generate (rd->hwaddr.bytes);
if ((rd->mr = ibv_reg_mr (rd->pd, (void *) bm->buffer_mem_start,
@@ -657,28 +716,16 @@ rdma_create_if (vlib_main_t * vm, rdma_create_if_args_t * args)
}
if (args->rxq_size < VLIB_FRAME_SIZE || args->txq_size < VLIB_FRAME_SIZE ||
+ args->rxq_size > 65535 || args->txq_size > 65535 ||
!is_pow2 (args->rxq_size) || !is_pow2 (args->txq_size))
{
args->rv = VNET_API_ERROR_INVALID_VALUE;
- args->error =
- clib_error_return (0, "queue size must be a power of two >= %i",
- VLIB_FRAME_SIZE);
+ args->error = clib_error_return (0, "queue size must be a power of two "
+ "between %i and 65535",
+ VLIB_FRAME_SIZE);
goto err0;
}
- switch (args->mode)
- {
- case RDMA_MODE_AUTO:
- break;
- case RDMA_MODE_IBV:
- break;
- case RDMA_MODE_DV:
- args->rv = VNET_API_ERROR_INVALID_VALUE;
- args->error = clib_error_return (0, "unsupported mode");
- goto err0;
- break;
- }
-
dev_list = ibv_get_device_list (&n_devs);
if (n_devs == 0)
{
@@ -762,8 +809,8 @@ rdma_create_if (vlib_main_t * vm, rdma_create_if_args_t * args)
}
}
- if ((args->error =
- rdma_dev_init (vm, rd, args->rxq_size, args->txq_size, args->rxq_num)))
+ if ((args->error = rdma_dev_init (vm, rd, args->rxq_size, args->txq_size,
+ args->rxq_num)))
goto err2;
if ((args->error = rdma_register_interface (vnm, rd)))