aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenoît Ganne <bganne@cisco.com>2019-04-01 16:05:22 +0200
committerDamjan Marion <dmarion@me.com>2019-04-03 14:41:12 +0000
commit211ef2eb24752faeb8a8cb1e3e727e008acf921e (patch)
treefd868c4c9c02475e67001e463227bc614186f713
parentb294f1099e5f0d703f1c87767653a2896d28ea36 (diff)
rdma: tx: fix stats and add batching
Tx stats are no longer counted twice. Submit tx packets as a single batch per vector instead of per-packet Change-Id: I26820b21f23842b3a67ace0b939095f3550d3856 Signed-off-by: Benoît Ganne <bganne@cisco.com>
-rw-r--r--src/plugins/rdma/output.c149
1 files changed, 88 insertions, 61 deletions
diff --git a/src/plugins/rdma/output.c b/src/plugins/rdma/output.c
index 410784308f3..41b9af1fbe5 100644
--- a/src/plugins/rdma/output.c
+++ b/src/plugins/rdma/output.c
@@ -24,39 +24,6 @@
#include <rdma/rdma.h>
-static_always_inline u16
-rdma_device_output_tx (vlib_main_t * vm, rdma_device_t * rd, rdma_txq_t * txq,
- u32 * buffers, u16 n_left, u32 * n_tx_packets,
- u32 * n_tx_bytes)
-{
- struct ibv_sge sg_entry;
- struct ibv_send_wr wr, *bad_wr;
- u16 i;
-
- for (i = 0; i < n_left; i++)
- {
- vlib_buffer_t *b = vlib_get_buffer (vm, buffers[i]);
- sg_entry.addr = vlib_buffer_get_current_va (b);
- sg_entry.length = b->current_length;
- sg_entry.lkey = rd->mr->lkey;
-
- memset (&wr, 0, sizeof (wr));
- wr.num_sge = 1;
- wr.sg_list = &sg_entry;
- wr.opcode = IBV_WR_SEND;
- wr.send_flags = IBV_SEND_SIGNALED;
- wr.wr_id = buffers[i];
-
- if (ibv_post_send (txq->qp, &wr, &bad_wr) != 0)
- break;
-
- *n_tx_bytes += b->current_length;
- }
-
- *n_tx_packets += i;
- return i;
-}
-
static_always_inline void
rdma_device_output_free (vlib_main_t * vm, rdma_txq_t * txq)
{
@@ -79,49 +46,109 @@ VNET_DEVICE_CLASS_TX_FN (rdma_device_class) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- vnet_main_t *vnm = vnet_get_main ();
rdma_main_t *rm = &rdma_main;
vnet_interface_output_runtime_t *ord = (void *) node->runtime_data;
rdma_device_t *rd = pool_elt_at_index (rm->devices, ord->dev_instance);
u32 thread_index = vm->thread_index;
- u8 qid = thread_index;
- rdma_txq_t *txq = vec_elt_at_index (rd->txqs, qid % vec_len (rd->txqs));
- u32 *buffers = vlib_frame_vector_args (frame);
- u16 n_left;
- u16 n_retry = 5;
- u32 n_tx_packets = 0, n_tx_bytes = 0;
+ rdma_txq_t *txq =
+ vec_elt_at_index (rd->txqs, thread_index % vec_len (rd->txqs));
+ u32 *from, *f, n_left_from;
+ u32 n_tx_packets, n_tx_failed;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ struct ibv_send_wr wr[VLIB_FRAME_SIZE], *w = wr;
+ struct ibv_sge sge[VLIB_FRAME_SIZE], *s = sge;
+ int i;
- clib_spinlock_lock_if_init (&txq->lock);
+ f = from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ vlib_get_buffers (vm, from, bufs, n_left_from);
- n_left = frame->n_vectors;
+ memset (w, 0, n_left_from * sizeof (w[0]));
- while (n_left)
+ while (n_left_from >= 2)
{
- u16 n;
- rdma_device_output_free (vm, txq);
- n =
- rdma_device_output_tx (vm, rd, txq, buffers, n_left, &n_tx_packets,
- &n_tx_bytes);
- n_left -= n;
- buffers += n;
-
- if (n_left && n_retry--)
+ if (PREDICT_TRUE (n_left_from >= 4))
{
- vlib_buffer_free (vm, buffers, n_left);
- vlib_error_count (vm, node->node_index,
- RDMA_TX_ERROR_NO_FREE_SLOTS, n_left);
- break;
+ vlib_prefetch_buffer_header (b[2 + 0], LOAD);
+ vlib_prefetch_buffer_header (b[2 + 1], LOAD);
+ CLIB_PREFETCH (&s[2 + 0], sizeof (s[0]), STORE);
+ CLIB_PREFETCH (&s[2 + 1], sizeof (s[0]), STORE);
+ CLIB_PREFETCH (&w[2 + 0], sizeof (w[0]), STORE);
+ CLIB_PREFETCH (&w[2 + 1], sizeof (w[0]), STORE);
}
+
+ s[0].addr = vlib_buffer_get_current_va (b[0]);
+ s[0].length = b[0]->current_length;
+ s[0].lkey = rd->mr->lkey;
+
+ s[1].addr = vlib_buffer_get_current_va (b[1]);
+ s[1].length = b[1]->current_length;
+ s[1].lkey = rd->mr->lkey;
+
+ w[0].wr_id = f[0];
+ w[0].next = &w[1 + 0];
+ w[0].sg_list = &s[0];
+ w[0].num_sge = 1;
+ w[0].opcode = IBV_WR_SEND;
+ w[0].send_flags = IBV_SEND_SIGNALED;
+
+ w[1].wr_id = f[1];
+ w[1].next = &w[1 + 1];
+ w[1].sg_list = &s[1];
+ w[1].num_sge = 1;
+ w[1].opcode = IBV_WR_SEND;
+ w[1].send_flags = IBV_SEND_SIGNALED;
+
+ s += 2;
+ f += 2;
+ w += 2;
+ b += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from >= 1)
+ {
+ s[0].addr = vlib_buffer_get_current_va (b[0]);
+ s[0].length = b[0]->current_length;
+ s[0].lkey = rd->mr->lkey;
+
+ w[0].wr_id = f[0];
+ w[0].next = &w[1 + 0];
+ w[0].sg_list = &s[0];
+ w[0].num_sge = 1;
+ w[0].opcode = IBV_WR_SEND;
+ w[0].send_flags = IBV_SEND_SIGNALED;
+
+ s += 1;
+ f += 1;
+ w += 1;
+ b += 1;
+ n_left_from -= 1;
}
+ w[-1].next = 0; /* fix next pointer in WR linked-list last item */
+
+ w = wr;
+ clib_spinlock_lock_if_init (&txq->lock);
+ for (i = 0; i < 5; i++)
+ {
+ rdma_device_output_free (vm, txq);
+ if (0 == ibv_post_send (txq->qp, w, &w))
+ break;
+ }
clib_spinlock_unlock_if_init (&txq->lock);
- vlib_increment_combined_counter
- (vnm->interface_main.combined_sw_if_counters +
- VNET_INTERFACE_COUNTER_TX, thread_index,
- rd->hw_if_index, n_tx_packets, n_tx_bytes);
+ n_tx_packets = w == wr ? frame->n_vectors : w - wr;
+ n_tx_failed = frame->n_vectors - n_tx_packets;
+
+ if (PREDICT_FALSE (n_tx_failed))
+ {
+ vlib_buffer_free (vm, &from[n_tx_packets], n_tx_failed);
+ vlib_error_count (vm, node->node_index,
+ RDMA_TX_ERROR_NO_FREE_SLOTS, n_tx_failed);
+ }
- return frame->n_vectors - n_left;
+ return n_tx_packets;
}
/*