diff options
author | Benoît Ganne <bganne@cisco.com> | 2019-04-01 16:05:22 +0200 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2019-04-03 14:41:12 +0000 |
commit | 211ef2eb24752faeb8a8cb1e3e727e008acf921e (patch) | |
tree | fd868c4c9c02475e67001e463227bc614186f713 | |
parent | b294f1099e5f0d703f1c87767653a2896d28ea36 (diff) |
rdma: tx: fix stats and add batching
Tx stats are no longer counted twice.
Submit tx packets as a single batch per vector instead of per-packet
Change-Id: I26820b21f23842b3a67ace0b939095f3550d3856
Signed-off-by: Benoît Ganne <bganne@cisco.com>
-rw-r--r-- | src/plugins/rdma/output.c | 149 |
1 files changed, 88 insertions, 61 deletions
diff --git a/src/plugins/rdma/output.c b/src/plugins/rdma/output.c index 410784308f3..41b9af1fbe5 100644 --- a/src/plugins/rdma/output.c +++ b/src/plugins/rdma/output.c @@ -24,39 +24,6 @@ #include <rdma/rdma.h> -static_always_inline u16 -rdma_device_output_tx (vlib_main_t * vm, rdma_device_t * rd, rdma_txq_t * txq, - u32 * buffers, u16 n_left, u32 * n_tx_packets, - u32 * n_tx_bytes) -{ - struct ibv_sge sg_entry; - struct ibv_send_wr wr, *bad_wr; - u16 i; - - for (i = 0; i < n_left; i++) - { - vlib_buffer_t *b = vlib_get_buffer (vm, buffers[i]); - sg_entry.addr = vlib_buffer_get_current_va (b); - sg_entry.length = b->current_length; - sg_entry.lkey = rd->mr->lkey; - - memset (&wr, 0, sizeof (wr)); - wr.num_sge = 1; - wr.sg_list = &sg_entry; - wr.opcode = IBV_WR_SEND; - wr.send_flags = IBV_SEND_SIGNALED; - wr.wr_id = buffers[i]; - - if (ibv_post_send (txq->qp, &wr, &bad_wr) != 0) - break; - - *n_tx_bytes += b->current_length; - } - - *n_tx_packets += i; - return i; -} - static_always_inline void rdma_device_output_free (vlib_main_t * vm, rdma_txq_t * txq) { @@ -79,49 +46,109 @@ VNET_DEVICE_CLASS_TX_FN (rdma_device_class) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - vnet_main_t *vnm = vnet_get_main (); rdma_main_t *rm = &rdma_main; vnet_interface_output_runtime_t *ord = (void *) node->runtime_data; rdma_device_t *rd = pool_elt_at_index (rm->devices, ord->dev_instance); u32 thread_index = vm->thread_index; - u8 qid = thread_index; - rdma_txq_t *txq = vec_elt_at_index (rd->txqs, qid % vec_len (rd->txqs)); - u32 *buffers = vlib_frame_vector_args (frame); - u16 n_left; - u16 n_retry = 5; - u32 n_tx_packets = 0, n_tx_bytes = 0; + rdma_txq_t *txq = + vec_elt_at_index (rd->txqs, thread_index % vec_len (rd->txqs)); + u32 *from, *f, n_left_from; + u32 n_tx_packets, n_tx_failed; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + struct ibv_send_wr wr[VLIB_FRAME_SIZE], *w = wr; + struct ibv_sge sge[VLIB_FRAME_SIZE], *s = sge; + int i; - clib_spinlock_lock_if_init (&txq->lock); + f = from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + vlib_get_buffers (vm, from, bufs, n_left_from); - n_left = frame->n_vectors; + memset (w, 0, n_left_from * sizeof (w[0])); - while (n_left) + while (n_left_from >= 2) { - u16 n; - rdma_device_output_free (vm, txq); - n = - rdma_device_output_tx (vm, rd, txq, buffers, n_left, &n_tx_packets, - &n_tx_bytes); - n_left -= n; - buffers += n; - - if (n_left && n_retry--) + if (PREDICT_TRUE (n_left_from >= 4)) { - vlib_buffer_free (vm, buffers, n_left); - vlib_error_count (vm, node->node_index, - RDMA_TX_ERROR_NO_FREE_SLOTS, n_left); - break; + vlib_prefetch_buffer_header (b[2 + 0], LOAD); + vlib_prefetch_buffer_header (b[2 + 1], LOAD); + CLIB_PREFETCH (&s[2 + 0], sizeof (s[0]), STORE); + CLIB_PREFETCH (&s[2 + 1], sizeof (s[0]), STORE); + CLIB_PREFETCH (&w[2 + 0], sizeof (w[0]), STORE); + CLIB_PREFETCH (&w[2 + 1], sizeof (w[0]), STORE); } + + s[0].addr = vlib_buffer_get_current_va (b[0]); + s[0].length = b[0]->current_length; + s[0].lkey = rd->mr->lkey; + + s[1].addr = vlib_buffer_get_current_va (b[1]); + s[1].length = b[1]->current_length; + s[1].lkey = rd->mr->lkey; + + w[0].wr_id = f[0]; + w[0].next = &w[1 + 0]; + w[0].sg_list = &s[0]; + w[0].num_sge = 1; + w[0].opcode = IBV_WR_SEND; + w[0].send_flags = IBV_SEND_SIGNALED; + + w[1].wr_id = f[1]; + w[1].next = &w[1 + 1]; + w[1].sg_list = &s[1]; + w[1].num_sge = 1; + w[1].opcode = IBV_WR_SEND; + w[1].send_flags = IBV_SEND_SIGNALED; + + s += 2; + f += 2; + w += 2; + b += 2; + n_left_from -= 2; + } + + while (n_left_from >= 1) + { + s[0].addr = vlib_buffer_get_current_va (b[0]); + s[0].length = b[0]->current_length; + s[0].lkey = rd->mr->lkey; + + w[0].wr_id = f[0]; + w[0].next = &w[1 + 0]; + w[0].sg_list = &s[0]; + w[0].num_sge = 1; + w[0].opcode = IBV_WR_SEND; + w[0].send_flags = IBV_SEND_SIGNALED; + + s += 1; + f += 1; + w += 1; + b += 1; + n_left_from -= 1; } + w[-1].next = 0; /* fix next pointer in WR linked-list last item */ + + w = wr; + clib_spinlock_lock_if_init (&txq->lock); + for (i = 0; i < 5; i++) + { + rdma_device_output_free (vm, txq); + if (0 == ibv_post_send (txq->qp, w, &w)) + break; + } clib_spinlock_unlock_if_init (&txq->lock); - vlib_increment_combined_counter - (vnm->interface_main.combined_sw_if_counters + - VNET_INTERFACE_COUNTER_TX, thread_index, - rd->hw_if_index, n_tx_packets, n_tx_bytes); + n_tx_packets = w == wr ? frame->n_vectors : w - wr; + n_tx_failed = frame->n_vectors - n_tx_packets; + + if (PREDICT_FALSE (n_tx_failed)) + { + vlib_buffer_free (vm, &from[n_tx_packets], n_tx_failed); + vlib_error_count (vm, node->node_index, + RDMA_TX_ERROR_NO_FREE_SLOTS, n_tx_failed); + } - return frame->n_vectors - n_left; + return n_tx_packets; } /* |