summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/plugins/rdma/output.c44
1 files changed, 20 insertions, 24 deletions
diff --git a/src/plugins/rdma/output.c b/src/plugins/rdma/output.c
index afc48451137..3cc3ab74437 100644
--- a/src/plugins/rdma/output.c
+++ b/src/plugins/rdma/output.c
@@ -143,23 +143,22 @@ rdma_mlx5_wqe_init (rdma_mlx5_wqe_t * wqe, const void *tmpl,
* plenty of descriptors available
*/
static_always_inline u32
-rdma_device_output_tx_mlx5_chained (vlib_main_t * vm,
- const vlib_node_runtime_t * node,
- const rdma_device_t * rd,
- rdma_txq_t * txq, u32 n_left_from, u32 n,
- u32 * bi, vlib_buffer_t ** b,
- rdma_mlx5_wqe_t * wqe, u16 tail)
+rdma_device_output_tx_mlx5_chained (vlib_main_t *vm,
+ const vlib_node_runtime_t *node,
+ const rdma_device_t *rd, rdma_txq_t *txq,
+ const u32 n_left_from, const u32 *bi,
+ vlib_buffer_t **b, u16 tail)
{
- rdma_mlx5_wqe_t *last = wqe;
u32 wqe_n = RDMA_TXQ_AVAIL_SZ (txq, txq->head, tail);
u32 sq_mask = pow2_mask (txq->dv_sq_log2sz);
u32 mask = pow2_mask (txq->bufs_log2sz);
u32 dseg_mask = RDMA_TXQ_DV_DSEG_SZ (txq) - 1;
const u32 lkey = clib_host_to_net_u32 (rd->lkey);
+ const u32 done = RDMA_TXQ_USED_SZ (txq->tail, tail);
+ u32 n = n_left_from - done;
+ rdma_mlx5_wqe_t *last = txq->dv_sq_wqes + (tail & sq_mask);
- vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask,
- RDMA_TXQ_BUF_SZ (txq), n_left_from - n);
- bi += n_left_from - n;
+ bi += done;
while (n >= 1 && wqe_n >= 1)
{
@@ -273,11 +272,10 @@ rdma_device_output_tx_mlx5_chained (vlib_main_t * vm,
}
static_always_inline u32
-rdma_device_output_tx_mlx5 (vlib_main_t * vm,
- const vlib_node_runtime_t * node,
- const rdma_device_t * rd, rdma_txq_t * txq,
- const u32 n_left_from, u32 * bi,
- vlib_buffer_t ** b)
+rdma_device_output_tx_mlx5 (vlib_main_t *vm, const vlib_node_runtime_t *node,
+ const rdma_device_t *rd, rdma_txq_t *txq,
+ const u32 n_left_from, const u32 *bi,
+ vlib_buffer_t **b)
{
u32 sq_mask = pow2_mask (txq->dv_sq_log2sz);
@@ -300,8 +298,7 @@ wrap_around:
u32 flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
if (PREDICT_FALSE (flags & VLIB_BUFFER_NEXT_PRESENT))
return rdma_device_output_tx_mlx5_chained (vm, node, rd, txq,
- n_left_from, n, bi, b, wqe,
- tail);
+ n_left_from, bi, b, tail);
vlib_prefetch_buffer_header (b[4], LOAD);
rdma_mlx5_wqe_init (wqe + 0, txq->dv_wqe_tmpl, b[0], tail + 0);
@@ -325,8 +322,7 @@ wrap_around:
{
if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT))
return rdma_device_output_tx_mlx5_chained (vm, node, rd, txq,
- n_left_from, n, bi, b, wqe,
- tail);
+ n_left_from, bi, b, tail);
rdma_mlx5_wqe_init (wqe, txq->dv_wqe_tmpl, b[0], tail);
@@ -344,8 +340,6 @@ wrap_around:
}
rdma_device_output_tx_mlx5_doorbell (txq, &wqe[-1], tail, sq_mask);
- vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask,
- RDMA_TXQ_BUF_SZ (txq), n_left_from);
txq->tail = tail;
return n_left_from;
}
@@ -393,7 +387,6 @@ rdma_device_output_tx_ibverb (vlib_main_t * vm,
const rdma_device_t * rd, rdma_txq_t * txq,
u32 n_left_from, u32 * bi, vlib_buffer_t ** b)
{
- const u32 mask = pow2_mask (txq->bufs_log2sz);
struct ibv_send_wr wr[VLIB_FRAME_SIZE], *w = wr;
struct ibv_sge sge[VLIB_FRAME_SIZE], *s = sge;
u32 n = n_left_from;
@@ -479,8 +472,6 @@ rdma_device_output_tx_ibverb (vlib_main_t * vm,
n_left_from - (w - wr));
n_left_from = w - wr;
}
- vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask,
- RDMA_TXQ_BUF_SZ (txq), n_left_from);
txq->tail += n_left_from;
return n_left_from;
}
@@ -505,6 +496,7 @@ rdma_device_output_tx_try (vlib_main_t * vm, const vlib_node_runtime_t * node,
u32 n_left_from, u32 * bi, int is_mlx5dv)
{
vlib_buffer_t *b[VLIB_FRAME_SIZE];
+ const u32 mask = pow2_mask (txq->bufs_log2sz);
/* do not enqueue more packet than ring space */
n_left_from = clib_min (n_left_from, RDMA_TXQ_AVAIL_SZ (txq, txq->head,
@@ -513,6 +505,10 @@ rdma_device_output_tx_try (vlib_main_t * vm, const vlib_node_runtime_t * node,
if (PREDICT_FALSE (n_left_from == 0))
return 0;
+ /* speculatively copy buffer indices */
+ vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask,
+ RDMA_TXQ_BUF_SZ (txq), n_left_from);
+
vlib_get_buffers (vm, bi, b, n_left_from);
n_left_from = is_mlx5dv ?