diff options
Diffstat (limited to 'src/plugins/rdma/output.c')
-rw-r--r-- | src/plugins/rdma/output.c | 44 |
1 files changed, 20 insertions, 24 deletions
diff --git a/src/plugins/rdma/output.c b/src/plugins/rdma/output.c index afc48451137..3cc3ab74437 100644 --- a/src/plugins/rdma/output.c +++ b/src/plugins/rdma/output.c @@ -143,23 +143,22 @@ rdma_mlx5_wqe_init (rdma_mlx5_wqe_t * wqe, const void *tmpl, * plenty of descriptors available */ static_always_inline u32 -rdma_device_output_tx_mlx5_chained (vlib_main_t * vm, - const vlib_node_runtime_t * node, - const rdma_device_t * rd, - rdma_txq_t * txq, u32 n_left_from, u32 n, - u32 * bi, vlib_buffer_t ** b, - rdma_mlx5_wqe_t * wqe, u16 tail) +rdma_device_output_tx_mlx5_chained (vlib_main_t *vm, + const vlib_node_runtime_t *node, + const rdma_device_t *rd, rdma_txq_t *txq, + const u32 n_left_from, const u32 *bi, + vlib_buffer_t **b, u16 tail) { - rdma_mlx5_wqe_t *last = wqe; u32 wqe_n = RDMA_TXQ_AVAIL_SZ (txq, txq->head, tail); u32 sq_mask = pow2_mask (txq->dv_sq_log2sz); u32 mask = pow2_mask (txq->bufs_log2sz); u32 dseg_mask = RDMA_TXQ_DV_DSEG_SZ (txq) - 1; const u32 lkey = clib_host_to_net_u32 (rd->lkey); + const u32 done = RDMA_TXQ_USED_SZ (txq->tail, tail); + u32 n = n_left_from - done; + rdma_mlx5_wqe_t *last = txq->dv_sq_wqes + (tail & sq_mask); - vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask, - RDMA_TXQ_BUF_SZ (txq), n_left_from - n); - bi += n_left_from - n; + bi += done; while (n >= 1 && wqe_n >= 1) { @@ -273,11 +272,10 @@ rdma_device_output_tx_mlx5_chained (vlib_main_t * vm, } static_always_inline u32 -rdma_device_output_tx_mlx5 (vlib_main_t * vm, - const vlib_node_runtime_t * node, - const rdma_device_t * rd, rdma_txq_t * txq, - const u32 n_left_from, u32 * bi, - vlib_buffer_t ** b) +rdma_device_output_tx_mlx5 (vlib_main_t *vm, const vlib_node_runtime_t *node, + const rdma_device_t *rd, rdma_txq_t *txq, + const u32 n_left_from, const u32 *bi, + vlib_buffer_t **b) { u32 sq_mask = pow2_mask (txq->dv_sq_log2sz); @@ -300,8 +298,7 @@ wrap_around: u32 flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags; if (PREDICT_FALSE (flags & VLIB_BUFFER_NEXT_PRESENT)) return rdma_device_output_tx_mlx5_chained (vm, node, rd, txq, - n_left_from, n, bi, b, wqe, - tail); + n_left_from, bi, b, tail); vlib_prefetch_buffer_header (b[4], LOAD); rdma_mlx5_wqe_init (wqe + 0, txq->dv_wqe_tmpl, b[0], tail + 0); @@ -325,8 +322,7 @@ wrap_around: { if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT)) return rdma_device_output_tx_mlx5_chained (vm, node, rd, txq, - n_left_from, n, bi, b, wqe, - tail); + n_left_from, bi, b, tail); rdma_mlx5_wqe_init (wqe, txq->dv_wqe_tmpl, b[0], tail); @@ -344,8 +340,6 @@ wrap_around: } rdma_device_output_tx_mlx5_doorbell (txq, &wqe[-1], tail, sq_mask); - vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask, - RDMA_TXQ_BUF_SZ (txq), n_left_from); txq->tail = tail; return n_left_from; } @@ -393,7 +387,6 @@ rdma_device_output_tx_ibverb (vlib_main_t * vm, const rdma_device_t * rd, rdma_txq_t * txq, u32 n_left_from, u32 * bi, vlib_buffer_t ** b) { - const u32 mask = pow2_mask (txq->bufs_log2sz); struct ibv_send_wr wr[VLIB_FRAME_SIZE], *w = wr; struct ibv_sge sge[VLIB_FRAME_SIZE], *s = sge; u32 n = n_left_from; @@ -479,8 +472,6 @@ rdma_device_output_tx_ibverb (vlib_main_t * vm, n_left_from - (w - wr)); n_left_from = w - wr; } - vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask, - RDMA_TXQ_BUF_SZ (txq), n_left_from); txq->tail += n_left_from; return n_left_from; } @@ -505,6 +496,7 @@ rdma_device_output_tx_try (vlib_main_t * vm, const vlib_node_runtime_t * node, u32 n_left_from, u32 * bi, int is_mlx5dv) { vlib_buffer_t *b[VLIB_FRAME_SIZE]; + const u32 mask = pow2_mask (txq->bufs_log2sz); /* do not enqueue more packet than ring space */ n_left_from = clib_min (n_left_from, RDMA_TXQ_AVAIL_SZ (txq, txq->head, @@ -513,6 +505,10 @@ rdma_device_output_tx_try (vlib_main_t * vm, const vlib_node_runtime_t * node, if (PREDICT_FALSE (n_left_from == 0)) return 0; + /* speculatively copy buffer indices */ + vlib_buffer_copy_indices_to_ring (txq->bufs, bi, txq->tail & mask, + RDMA_TXQ_BUF_SZ (txq), n_left_from); + vlib_get_buffers (vm, bi, b, n_left_from); n_left_from = is_mlx5dv ? |