aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/rdma/device.c
diff options
context:
space:
mode:
authorMohammed Hawari <mohammed@hawari.fr>2020-10-21 14:48:38 +0200
committerDamjan Marion <dmarion@me.com>2020-10-24 09:25:19 +0000
commit4df9f737a24be94c2988f18337a4ad845b1b0186 (patch)
tree292a032012d443205db2988667946bb77aeb3008 /src/plugins/rdma/device.c
parent91603958d1d4fc3114739f9b264808940942e5c8 (diff)
rdma: implement striding rq for multiseg rx
This change leverages the striding RQ feature of ConnectX-5 adapters to support chained buffers on the RX path. In Striding RQ mode, WQE are SG lists of data segments, each mapped to a vlib_buffer. When a packet is received, it can consume one or multiple data segments belonging to the WQE, without wasting the whole WQE. Change-Id: I74eba5b2c2c66538e75e046335058ba011cb27fd Type: improvement Signed-off-by: Mohammed Hawari <mohammed@hawari.fr>
Diffstat (limited to 'src/plugins/rdma/device.c')
-rw-r--r--src/plugins/rdma/device.c88
1 files changed, 82 insertions, 6 deletions
diff --git a/src/plugins/rdma/device.c b/src/plugins/rdma/device.c
index c2eb0006217..9b6fda982ca 100644
--- a/src/plugins/rdma/device.c
+++ b/src/plugins/rdma/device.c
@@ -426,10 +426,14 @@ rdma_rxq_init (vlib_main_t * vm, rdma_device_t * rd, u16 qid, u32 n_desc)
struct ibv_cq_init_attr_ex cqa = { };
struct ibv_wq_attr wqa;
struct ibv_cq_ex *cqex;
+ struct mlx5dv_wq_init_attr dv_wqia = { };
vec_validate_aligned (rd->rxqs, qid, CLIB_CACHE_LINE_BYTES);
rxq = vec_elt_at_index (rd->rxqs, qid);
rxq->size = n_desc;
+ rxq->log_wqe_sz = 0;
+ rxq->log_stride_per_wqe = 0;
+ rxq->buf_sz = vlib_buffer_get_default_data_size (vm);
vec_validate_aligned (rxq->bufs, n_desc - 1, CLIB_CACHE_LINE_BYTES);
cqa.cqe = n_desc;
@@ -456,7 +460,54 @@ rdma_rxq_init (vlib_main_t * vm, rdma_device_t * rd, u16 qid, u32 n_desc)
wqia.max_sge = 1;
wqia.pd = rd->pd;
wqia.cq = rxq->cq;
- if ((rxq->wq = ibv_create_wq (rd->ctx, &wqia)) == 0)
+ if (rd->flags & RDMA_DEVICE_F_MLX5DV)
+ {
+ if (rd->flags & RDMA_DEVICE_F_STRIDING_RQ)
+ {
+ /* In STRIDING_RQ mode, map a descriptor to a stride, not a full WQE buffer */
+ uword data_seg_log2_sz =
+ min_log2 (vlib_buffer_get_default_data_size (vm));
+
+ /* The trick is also to map a descriptor to a data segment in the WQE SG list
+ The number of strides per WQE and the size of a WQE (in 16-bytes words) both
+ must be powers of two.
+ Moreover, in striding RQ mode, WQEs must include the SRQ header, which occupies
+ one 16-bytes word. That is why WQEs have 2*RDMA_RXQ_MAX_CHAIN_SZ 16-bytes words:
+ - One for the SRQ Header
+ - RDMA_RXQ_MAX_CHAIN_SZ for the different data segments (each mapped to
+ a stride, and a vlib_buffer)
+ - RDMA_RXQ_MAX_CHAIN_SZ-1 null data segments
+ */
+
+ wqia.max_sge = RDMA_RXQ_MAX_CHAIN_SZ;
+ dv_wqia.comp_mask = MLX5DV_WQ_INIT_ATTR_MASK_STRIDING_RQ;
+ dv_wqia.striding_rq_attrs.two_byte_shift_en = 0;
+ dv_wqia.striding_rq_attrs.single_wqe_log_num_of_strides =
+ RDMA_RXQ_MAX_CHAIN_LOG_SZ;
+ dv_wqia.striding_rq_attrs.single_stride_log_num_of_bytes =
+ data_seg_log2_sz;
+ wqia.max_wr >>= RDMA_RXQ_MAX_CHAIN_LOG_SZ;
+ rxq->log_wqe_sz = RDMA_RXQ_MAX_CHAIN_LOG_SZ + 1;
+ rxq->log_stride_per_wqe = RDMA_RXQ_MAX_CHAIN_LOG_SZ;
+ rxq->buf_sz = 1 << data_seg_log2_sz;
+ }
+ else
+ {
+ /* For now, in non STRIDING_RQ mode, SG operations/chained buffers
+ are not supported */
+ wqia.max_sge = 1;
+ }
+
+ if ((rxq->wq = mlx5dv_create_wq (rd->ctx, &wqia, &dv_wqia)))
+ {
+ rxq->wq->events_completed = 0;
+ pthread_mutex_init (&rxq->wq->mutex, NULL);
+ pthread_cond_init (&rxq->wq->cond, NULL);
+ }
+ else
+ return clib_error_return_unix (0, "Create WQ Failed");
+ }
+ else if ((rxq->wq = ibv_create_wq (rd->ctx, &wqia)) == 0)
return clib_error_return_unix (0, "Create WQ Failed");
memset (&wqa, 0, sizeof (wqa));
@@ -471,6 +522,7 @@ rdma_rxq_init (vlib_main_t * vm, rdma_device_t * rd, u16 qid, u32 n_desc)
struct mlx5dv_cq dv_cq;
struct mlx5dv_rwq dv_rwq;
u64 qw0;
+ u64 qw0_nullseg;
obj.cq.in = rxq->cq;
obj.cq.out = &dv_cq;
@@ -488,16 +540,26 @@ rdma_rxq_init (vlib_main_t * vm, rdma_device_t * rd, u16 qid, u32 n_desc)
rxq->cq_db = (volatile u32 *) dv_cq.dbrec;
rxq->cqn = dv_cq.cqn;
- rxq->wqes = (mlx5dv_rwq_t *) dv_rwq.buf;
+ rxq->wqes = (mlx5dv_wqe_ds_t *) dv_rwq.buf;
rxq->wq_db = (volatile u32 *) dv_rwq.dbrec;
rxq->wq_stride = dv_rwq.stride;
rxq->wqe_cnt = dv_rwq.wqe_cnt;
- qw0 = clib_host_to_net_u32 (vlib_buffer_get_default_data_size (vm));
+ qw0 = clib_host_to_net_u32 (rxq->buf_sz);
+ qw0_nullseg = 0;
qw0 |= (u64) clib_host_to_net_u32 (rd->lkey) << 32;
-
- for (int i = 0; i < rxq->size; i++)
- rxq->wqes[i].dsz_and_lkey = qw0;
+ qw0_nullseg |= (u64) clib_host_to_net_u32 (rd->lkey) << 32;
+
+/* Prefill the different 16 bytes words of the WQ. If not in striding RQ mode,
+ init with qw0 only with segments of rxq->buf_sz. Otherwise, for each WQE, the
+ RDMA_RXQ_MAX_CHAIN_SZ + 1 first 16-bytes words are initialised with qw0, the rest
+ are null segments */
+ for (int i = 0; i < rxq->wqe_cnt << rxq->log_wqe_sz; i++)
+ if (!(rd->flags & RDMA_DEVICE_F_STRIDING_RQ)
+ || (i == 0) || !(((i - 1) >> rxq->log_stride_per_wqe) & 0x1))
+ rxq->wqes[i].dsz_and_lkey = qw0;
+ else
+ rxq->wqes[i].dsz_and_lkey = qw0_nullseg;
for (int i = 0; i < (1 << rxq->log2_cq_size); i++)
rxq->cqes[i].opcode_cqefmt_se_owner = 0xff;
@@ -824,11 +886,25 @@ rdma_create_if (vlib_main_t * vm, rdma_create_if_args_t * args)
if (args->mode != RDMA_MODE_IBV)
{
struct mlx5dv_context mlx5dv_attrs = { };
+ mlx5dv_attrs.comp_mask |= MLX5DV_CONTEXT_MASK_STRIDING_RQ;
if (mlx5dv_query_device (rd->ctx, &mlx5dv_attrs) == 0)
{
+ uword data_seg_log2_sz =
+ min_log2 (vlib_buffer_get_default_data_size (vm));
+
if ((mlx5dv_attrs.flags & MLX5DV_CONTEXT_FLAGS_CQE_V1))
rd->flags |= RDMA_DEVICE_F_MLX5DV;
+
+ if (data_seg_log2_sz <=
+ mlx5dv_attrs.striding_rq_caps.max_single_stride_log_num_of_bytes
+ && data_seg_log2_sz >=
+ mlx5dv_attrs.striding_rq_caps.min_single_stride_log_num_of_bytes
+ && RDMA_RXQ_MAX_CHAIN_LOG_SZ >=
+ mlx5dv_attrs.striding_rq_caps.min_single_wqe_log_num_of_strides
+ && RDMA_RXQ_MAX_CHAIN_LOG_SZ <=
+ mlx5dv_attrs.striding_rq_caps.max_single_wqe_log_num_of_strides)
+ rd->flags |= RDMA_DEVICE_F_STRIDING_RQ;
}
else
{