diff options
author | Benoît Ganne <bganne@cisco.com> | 2019-08-21 15:11:43 +0200 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2019-08-26 14:10:36 +0000 |
commit | e7e8bf37f100b20acb99957572f1796f648c2853 (patch) | |
tree | 393bd5d4e67a7ee12d53fe53d6e32c9be064a73d /src/plugins/rdma/output.c | |
parent | 92f190a802b6999ce68696032e556aa75171e1cc (diff) |
rdma: use rings for buffers management
Refactor rdma driver for improved performance and prepare for raw
datapath access.
Type: refactor
Change-Id: Iae31872055a6947708ea9f430bd1dc083ea63b5a
Signed-off-by: Benoît Ganne <bganne@cisco.com>
Diffstat (limited to 'src/plugins/rdma/output.c')
-rw-r--r-- | src/plugins/rdma/output.c | 135 |
1 files changed, 82 insertions, 53 deletions
diff --git a/src/plugins/rdma/output.c b/src/plugins/rdma/output.c index ddda81a4b19..0c6848e09cd 100644 --- a/src/plugins/rdma/output.c +++ b/src/plugins/rdma/output.c @@ -28,46 +28,45 @@ static_always_inline void rdma_device_output_free (vlib_main_t * vm, rdma_txq_t * txq) { struct ibv_wc wc[VLIB_FRAME_SIZE]; - u32 to_free[VLIB_FRAME_SIZE]; - int n_free; - int i; + u32 tail, slot; + int n; - n_free = ibv_poll_cq (txq->cq, VLIB_FRAME_SIZE, wc); - if (n_free <= 0) + n = ibv_poll_cq (txq->cq, VLIB_FRAME_SIZE, wc); + if (n <= 0) return; - for (i = 0; i < n_free; i++) - to_free[i] = wc[i].wr_id; - - vlib_buffer_free (vm, to_free, n_free); + tail = wc[n - 1].wr_id; + slot = txq->head & (txq->size - 1); + vlib_buffer_free_from_ring (vm, txq->bufs, slot, txq->size, + tail - txq->head); + txq->head = tail; } -VNET_DEVICE_CLASS_TX_FN (rdma_device_class) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) +static_always_inline u32 +rmda_device_output_tx (vlib_main_t * vm, const rdma_device_t * rd, + rdma_txq_t * txq, u32 n_left_from, u32 * bi) { - rdma_main_t *rm = &rdma_main; - vnet_interface_output_runtime_t *ord = (void *) node->runtime_data; - rdma_device_t *rd = pool_elt_at_index (rm->devices, ord->dev_instance); - u32 thread_index = vm->thread_index; - rdma_txq_t *txq = - vec_elt_at_index (rd->txqs, thread_index % vec_len (rd->txqs)); - u32 *from, *f, n_left_from; - u32 n_tx_packets, n_tx_failed; vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; struct ibv_send_wr wr[VLIB_FRAME_SIZE], *w = wr; struct ibv_sge sge[VLIB_FRAME_SIZE], *s = sge; - int i, ret; + u32 n, slot = txq->tail & (txq->size - 1); + u32 *tx = &txq->bufs[slot]; - f = from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - vlib_get_buffers (vm, from, bufs, n_left_from); + /* do not enqueue more packet than ring space */ + n_left_from = clib_min (n_left_from, txq->size - (txq->tail - txq->head)); + /* avoid wrap-around logic in core loop */ + n = n_left_from = clib_min (n_left_from, txq->size - slot); + /* if ring is full, do nothing */ + if (PREDICT_FALSE (0 == n_left_from)) + return 0; + + vlib_get_buffers (vm, bi, bufs, n_left_from); memset (w, 0, n_left_from * sizeof (w[0])); - while (n_left_from >= 4) + while (n >= 4) { - if (PREDICT_TRUE (n_left_from >= 8)) + if (PREDICT_TRUE (n >= 8)) { vlib_prefetch_buffer_header (b[4 + 0], LOAD); vlib_prefetch_buffer_header (b[4 + 1], LOAD); @@ -82,96 +81,126 @@ VNET_DEVICE_CLASS_TX_FN (rdma_device_class) (vlib_main_t * vm, CLIB_PREFETCH (&w[4 + 3], CLIB_CACHE_LINE_BYTES, STORE); } + vlib_buffer_copy_indices (tx, bi, 4); + s[0].addr = vlib_buffer_get_current_va (b[0]); s[0].length = b[0]->current_length; - s[0].lkey = rd->mr->lkey; + s[0].lkey = rd->lkey; s[1].addr = vlib_buffer_get_current_va (b[1]); s[1].length = b[1]->current_length; - s[1].lkey = rd->mr->lkey; + s[1].lkey = rd->lkey; s[2].addr = vlib_buffer_get_current_va (b[2]); s[2].length = b[2]->current_length; - s[2].lkey = rd->mr->lkey; + s[2].lkey = rd->lkey; s[3].addr = vlib_buffer_get_current_va (b[3]); s[3].length = b[3]->current_length; - s[3].lkey = rd->mr->lkey; + s[3].lkey = rd->lkey; - w[0].wr_id = f[0]; w[0].next = &w[0] + 1; w[0].sg_list = &s[0]; w[0].num_sge = 1; w[0].opcode = IBV_WR_SEND; - w[1].wr_id = f[1]; w[1].next = &w[1] + 1; w[1].sg_list = &s[1]; w[1].num_sge = 1; w[1].opcode = IBV_WR_SEND; - w[2].wr_id = f[2]; w[2].next = &w[2] + 1; w[2].sg_list = &s[2]; w[2].num_sge = 1; w[2].opcode = IBV_WR_SEND; - w[3].wr_id = f[3]; w[3].next = &w[3] + 1; w[3].sg_list = &s[3]; w[3].num_sge = 1; w[3].opcode = IBV_WR_SEND; s += 4; - f += 4; w += 4; b += 4; - n_left_from -= 4; + bi += 4; + tx += 4; + n -= 4; } - while (n_left_from >= 1) + while (n >= 1) { + vlib_buffer_copy_indices (tx, bi, 1); + s[0].addr = vlib_buffer_get_current_va (b[0]); s[0].length = b[0]->current_length; - s[0].lkey = rd->mr->lkey; + s[0].lkey = rd->lkey; - w[0].wr_id = f[0]; w[0].next = &w[0] + 1; w[0].sg_list = &s[0]; w[0].num_sge = 1; w[0].opcode = IBV_WR_SEND; s += 1; - f += 1; w += 1; b += 1; - n_left_from -= 1; + bi += 1; + tx += 1; + n -= 1; } - w[-1].next = 0; /* fix next pointer in WR linked-list last item */ + w[-1].wr_id = txq->tail + n_left_from; /* register item to free */ + w[-1].next = 0; /* fix next pointer in WR linked-list */ + w[-1].send_flags = IBV_SEND_SIGNALED; /* generate a CQE so we can free buffers */ w = wr; + if (PREDICT_FALSE (0 != ibv_post_send (txq->qp, w, &w))) + n_left_from = w - wr; + + txq->tail += n_left_from; + return n_left_from; +} + +VNET_DEVICE_CLASS_TX_FN (rdma_device_class) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + rdma_main_t *rm = &rdma_main; + vnet_interface_output_runtime_t *ord = (void *) node->runtime_data; + rdma_device_t *rd = pool_elt_at_index (rm->devices, ord->dev_instance); + u32 thread_index = vm->thread_index; + rdma_txq_t *txq = + vec_elt_at_index (rd->txqs, thread_index % vec_len (rd->txqs)); + u32 *from; + u32 n_left_from; + int i; + + ASSERT (txq->size >= VLIB_FRAME_SIZE && is_pow2 (txq->size)); + ASSERT (txq->tail - txq->head <= txq->size); + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + clib_spinlock_lock_if_init (&txq->lock); - for (i = 0; i < 5; i++) + + for (i = 0; i < 5 && n_left_from >= 0; i++) { + u32 n_enq; rdma_device_output_free (vm, txq); - ret = ibv_post_send (txq->qp, w, &w); - if (0 == ret) - break; + n_enq = rmda_device_output_tx (vm, rd, txq, n_left_from, from); + n_left_from -= n_enq; + from += n_enq; } - clib_spinlock_unlock_if_init (&txq->lock); - n_tx_packets = 0 == ret ? frame->n_vectors : w - wr; - n_tx_failed = frame->n_vectors - n_tx_packets; + clib_spinlock_unlock_if_init (&txq->lock); - if (PREDICT_FALSE (n_tx_failed)) + if (PREDICT_FALSE (n_left_from)) { - vlib_buffer_free (vm, &from[n_tx_packets], n_tx_failed); + vlib_buffer_free (vm, from, n_left_from); vlib_error_count (vm, node->node_index, - RDMA_TX_ERROR_NO_FREE_SLOTS, n_tx_failed); + RDMA_TX_ERROR_NO_FREE_SLOTS, n_left_from); } - return n_tx_packets; + return frame->n_vectors - n_left_from; } /* |