aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2020-03-02 19:02:15 +0100
committerDamjan Marion <damarion@cisco.com>2020-03-02 19:27:35 +0100
commitb9250a9eee1dc259f11bdb9bb8c881da56b52895 (patch)
treea49ddbaa3d1b2d752fd26fdc5a6c8eb5e9704ab0
parent599d088804085c88beda0026fa0984f8177d31c0 (diff)
rdma: improve rx loop
Type: improvement Change-Id: If81847bc0c92d167ce03e1e94a2f8e18f8154af2 Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r--src/plugins/rdma/input.c54
-rw-r--r--src/vlib/buffer_funcs.h18
2 files changed, 34 insertions, 38 deletions
diff --git a/src/plugins/rdma/input.c b/src/plugins/rdma/input.c
index 7665c99d26d..f4ef6505a0f 100644
--- a/src/plugins/rdma/input.c
+++ b/src/plugins/rdma/input.c
@@ -194,12 +194,12 @@ rdma_device_input_ethernet (vlib_main_t * vm, vlib_node_runtime_t * node,
}
static_always_inline u32
-rdma_device_input_bufs (vlib_main_t * vm, const rdma_device_t * rd,
- u32 * next, u32 * bi, struct ibv_wc * wc,
- u32 n_left_from, vlib_buffer_t * bt)
+rdma_device_input_bufs (vlib_main_t * vm, const rdma_device_t * rd, u32 * bi,
+ struct ibv_wc * wc, u32 n_left_from,
+ vlib_buffer_t * bt)
{
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
- u32 n_rx_bytes[4] = { 0 };
+ u32 n_rx_bytes = 0;
vlib_get_buffers (vm, bi, bufs, n_left_from);
ASSERT (bt->buffer_pool_index == bufs[0]->buffer_pool_index);
@@ -218,25 +218,16 @@ rdma_device_input_bufs (vlib_main_t * vm, const rdma_device_t * rd,
vlib_prefetch_buffer_header (b[4 + 3], STORE);
}
- vlib_buffer_copy_indices (next, bi, 4);
-
vlib_buffer_copy_template (b[0], bt);
vlib_buffer_copy_template (b[1], bt);
vlib_buffer_copy_template (b[2], bt);
vlib_buffer_copy_template (b[3], bt);
- b[0]->current_length = wc[0].byte_len;
- b[1]->current_length = wc[1].byte_len;
- b[2]->current_length = wc[2].byte_len;
- b[3]->current_length = wc[3].byte_len;
-
- n_rx_bytes[0] += wc[0].byte_len;
- n_rx_bytes[1] += wc[1].byte_len;
- n_rx_bytes[2] += wc[2].byte_len;
- n_rx_bytes[3] += wc[3].byte_len;
+ n_rx_bytes += b[0]->current_length = wc[0].byte_len;
+ n_rx_bytes += b[1]->current_length = wc[1].byte_len;
+ n_rx_bytes += b[2]->current_length = wc[2].byte_len;
+ n_rx_bytes += b[3]->current_length = wc[3].byte_len;
- next += 4;
- bi += 4;
b += 4;
wc += 4;
n_left_from -= 4;
@@ -244,19 +235,15 @@ rdma_device_input_bufs (vlib_main_t * vm, const rdma_device_t * rd,
while (n_left_from >= 1)
{
- vlib_buffer_copy_indices (next, bi, 1);
vlib_buffer_copy_template (b[0], bt);
- b[0]->current_length = wc[0].byte_len;
- n_rx_bytes[0] += wc[0].byte_len;
+ n_rx_bytes += b[0]->current_length = wc[0].byte_len;
- next += 1;
- bi += 1;
b += 1;
wc += 1;
n_left_from -= 1;
}
- return n_rx_bytes[0] + n_rx_bytes[1] + n_rx_bytes[2] + n_rx_bytes[3];
+ return n_rx_bytes;
}
static_always_inline uword
@@ -272,7 +259,7 @@ rdma_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_t bt;
u32 next_index, *to_next, n_left_to_next;
u32 n_rx_packets, n_rx_bytes;
- u32 slot, n_tail;
+ u32 mask = rxq->size - 1;
ASSERT (rxq->size >= VLIB_FRAME_SIZE && is_pow2 (rxq->size));
ASSERT (rxq->tail - rxq->head <= rxq->size);
@@ -296,20 +283,11 @@ rdma_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
ASSERT (n_rx_packets <= n_left_to_next);
- /*
- * avoid wrap-around logic in core loop
- * we requested VLIB_FRAME_SIZE packets and rxq->size >= VLIB_FRAME_SIZE
- * => we can process all packets in 2 iterations max
- */
- slot = rxq->head & (rxq->size - 1);
- n_tail = clib_min (n_rx_packets, rxq->size - slot);
- n_rx_bytes =
- rdma_device_input_bufs (vm, rd, &to_next[0], &rxq->bufs[slot], wc, n_tail,
- &bt);
- if (n_tail < n_rx_packets)
- n_rx_bytes +=
- rdma_device_input_bufs (vm, rd, &to_next[n_tail], &rxq->bufs[0],
- &wc[n_tail], n_rx_packets - n_tail, &bt);
+ vlib_buffer_copy_indices_from_ring (to_next, rxq->bufs, rxq->head & mask,
+ rxq->size, n_rx_packets);
+ n_rx_bytes = rdma_device_input_bufs (vm, rd, to_next, wc, n_rx_packets,
+ &bt);
+
rdma_device_input_ethernet (vm, node, rd, next_index);
vlib_put_next_frame (vm, node, next_index, n_left_to_next - n_rx_packets);
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 58abdb18841..07f270c8c23 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -140,6 +140,24 @@ vlib_buffer_copy_indices (u32 * dst, u32 * src, u32 n_indices)
}
}
+always_inline void
+vlib_buffer_copy_indices_from_ring (u32 * dst, u32 * ring, u32 start,
+ u32 ring_size, u32 n_buffers)
+{
+ ASSERT (n_buffers <= ring_size);
+
+ if (PREDICT_TRUE (start + n_buffers <= ring_size))
+ {
+ vlib_buffer_copy_indices (dst, ring + start, n_buffers);
+ }
+ else
+ {
+ u32 n = ring_size - start;
+ vlib_buffer_copy_indices (dst, ring + start, n);
+ vlib_buffer_copy_indices (dst + n, ring, n_buffers - n);
+ }
+}
+
STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, template_end, 64);
static_always_inline void
vlib_buffer_copy_template (vlib_buffer_t * b, vlib_buffer_t * bt)