summaryrefslogtreecommitdiffstats
path: root/src/svm/svm_fifo.h
diff options
context:
space:
mode:
authorSirshak Das <sirshak.das@arm.com>2019-02-05 01:33:33 -0600
committerFlorin Coras <florin.coras@gmail.com>2019-04-16 19:33:21 +0000
commit28aa539f7da7b172d0f35ea9a63f3986939477f7 (patch)
treebe856eb44878b604b2fc93beffb7268db77b457b /src/svm/svm_fifo.h
parent39d04099467414175803273433c95a96c0276252 (diff)
svm_fifo rework to avoid contention on cursize
Problems Addressed: - Contention of cursize by producer and consumer. - Reduce the no of modulo operations. Changes: - Synchronization between producer and consumer changed from cursize to head and tail indexes Implications: reduces the usable size of fifo by 1. - Using weaker memory ordering C++11 atomics to access head and tail based on producer and consumer role. - Head and tail indexes are unsigned 32 bit integers. Additions and subtraction on them are implicit 32 bit Modulo operation. - Adding weaker memory ordering variants of max_enq, max_deq, is_empty and is_full Using them appropriately in all places. Perfomance improvement (iperf3 via Hoststack): iperf3 Server: Marvell ThunderX2(AArch64) - iperf3 Client: Skylake(x86) ~6%(256 rxd/txd) - ~11%(2048 rxd/txd) Change-Id: I1d484e000e437430fdd5a819657d1c6b62443018 Signed-off-by: Sirshak Das <sirshak.das@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Diffstat (limited to 'src/svm/svm_fifo.h')
-rw-r--r--src/svm/svm_fifo.h195
1 files changed, 158 insertions, 37 deletions
diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h
index 4a119341d8b..94a05938d26 100644
--- a/src/svm/svm_fifo.h
+++ b/src/svm/svm_fifo.h
@@ -1,5 +1,9 @@
/*
* Copyright (c) 2016-2019 Cisco and/or its affiliates.
+ * Copyright (c) 2019 Arm Limited
+ * Copyright (c) 2010-2017 Intel Corporation and/or its affiliates.
+ * Copyright (c) 2007-2009 Kip Macy kmacy@freebsd.org
+ * Inspired from DPDK rte_ring.h (SPSC only) (derived from freebsd bufring.h).
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -59,8 +63,8 @@ typedef struct
typedef struct _svm_fifo
{
CLIB_CACHE_LINE_ALIGN_MARK (shared_first);
- volatile u32 cursize; /**< current fifo size */
- u32 nitems;
+ u32 size; /**< size of the fifo(must be power of 2) */
+ u32 nitems; /**< usable size(size-1) */
CLIB_CACHE_LINE_ALIGN_MARK (shared_second);
volatile u32 has_event; /**< non-zero if deq event exists */
@@ -125,28 +129,141 @@ typedef struct svm_fifo_segment_
u8 *svm_fifo_dump_trace (u8 * s, svm_fifo_t * f);
u8 *svm_fifo_replay (u8 * s, svm_fifo_t * f, u8 no_read, u8 verbose);
+/* internal function */
+static inline void
+f_load_head_tail_cons (svm_fifo_t * f, u32 * head, u32 * tail)
+{
+ /* load-relaxed: consumer owned index */
+ *head = f->head;
+ /* load-acq: consumer foreign index (paired with store-rel in producer) */
+ *tail = clib_atomic_load_acq_n (&f->tail);
+}
+
+/* internal function */
+static inline void
+f_load_head_tail_prod (svm_fifo_t * f, u32 * head, u32 * tail)
+{
+ /* load relaxed: producer owned index */
+ *tail = f->tail;
+ /* load-acq: producer foreign index (paired with store-rel in consumer) */
+ *head = clib_atomic_load_acq_n (&f->head);
+}
+
+/* producer consumer role independent */
+/* internal function */
+static inline void
+f_load_head_tail_all_acq (svm_fifo_t * f, u32 * head, u32 * tail)
+{
+ /* load-acq : consumer foreign index (paired with store-rel) */
+ *tail = clib_atomic_load_acq_n (&f->tail);
+ /* load-acq : producer foriegn index (paired with store-rel) */
+ *head = clib_atomic_load_acq_n (&f->head);
+}
+
+/* internal function */
+static inline u32
+f_free_count (svm_fifo_t * f, u32 head, u32 tail)
+{
+ return (f->nitems + head - tail);
+}
+
+/* internal function */
+static inline u32
+f_cursize (svm_fifo_t * f, u32 head, u32 tail)
+{
+ return (f->nitems - f_free_count (f, head, tail));
+}
+
+/* used by consumer */
+static inline u32
+svm_fifo_max_dequeue_cons (svm_fifo_t * f)
+{
+ u32 tail, head;
+ f_load_head_tail_cons (f, &head, &tail);
+ return f_cursize (f, head, tail);
+}
+
+/* used by producer*/
+static inline u32
+svm_fifo_max_dequeue_prod (svm_fifo_t * f)
+{
+ u32 tail, head;
+ f_load_head_tail_prod (f, &head, &tail);
+ return f_cursize (f, head, tail);
+}
+
+/* use producer or consumer specific functions for perfomance.
+ * svm_fifo_max_dequeue_cons (svm_fifo_t *f)
+ * svm_fifo_max_dequeue_prod (svm_fifo_t *f)
+ */
static inline u32
svm_fifo_max_dequeue (svm_fifo_t * f)
{
- return clib_atomic_load_acq_n (&f->cursize);
+ u32 tail, head;
+ f_load_head_tail_all_acq (f, &head, &tail);
+ return f_cursize (f, head, tail);
}
+/* used by producer */
+static inline int
+svm_fifo_is_full_prod (svm_fifo_t * f)
+{
+ return (svm_fifo_max_dequeue_prod (f) == f->nitems);
+}
+
+/* use producer or consumer specific functions for perfomance.
+ * svm_fifo_is_full_prod (svm_fifo_t * f)
+ * add cons version if needed
+ */
static inline int
svm_fifo_is_full (svm_fifo_t * f)
{
- return (clib_atomic_load_acq_n (&f->cursize) == f->nitems);
+ return (svm_fifo_max_dequeue (f) == f->nitems);
+}
+
+/* used by consumer */
+static inline int
+svm_fifo_is_empty_cons (svm_fifo_t * f)
+{
+ return (svm_fifo_max_dequeue_cons (f) == 0);
}
+/* used by producer */
+static inline int
+svm_fifo_is_empty_prod (svm_fifo_t * f)
+{
+ return (svm_fifo_max_dequeue_prod (f) == 0);
+}
+
+/* use producer or consumer specific functions for perfomance.
+ * svm_fifo_is_empty_cons (svm_fifo_t * f)
+ * svm_fifo_is_empty_prod (svm_fifo_t * f)
+ */
static inline int
svm_fifo_is_empty (svm_fifo_t * f)
{
- return (clib_atomic_load_acq_n (&f->cursize) == 0);
+ return (svm_fifo_max_dequeue (f) == 0);
}
+/* used by producer*/
+static inline u32
+svm_fifo_max_enqueue_prod (svm_fifo_t * f)
+{
+ u32 head, tail;
+ f_load_head_tail_prod (f, &head, &tail);
+ return f_free_count (f, head, tail);
+}
+
+/* use producer or consumer specfic functions for perfomance.
+ * svm_fifo_max_enqueue_prod (svm_fifo_t *f)
+ * add consumer specific version if needed.
+ */
static inline u32
svm_fifo_max_enqueue (svm_fifo_t * f)
{
- return f->nitems - svm_fifo_max_dequeue (f);
+ u32 head, tail;
+ f_load_head_tail_all_acq (f, &head, &tail);
+ return f_free_count (f, head, tail);
}
static inline int
@@ -164,7 +281,7 @@ svm_fifo_has_ooo_data (svm_fifo_t * f)
/**
* Sets fifo event flag.
*
- * Also acts as a release barrier.
+ * Also acts as a release ordering.
*
* @return 1 if flag was not set.
*/
@@ -202,6 +319,7 @@ void svm_fifo_dequeue_drop_all (svm_fifo_t * f);
int svm_fifo_segments (svm_fifo_t * f, svm_fifo_segment_t * fs);
void svm_fifo_segments_free (svm_fifo_t * f, svm_fifo_segment_t * fs);
void svm_fifo_init_pointers (svm_fifo_t * f, u32 pointer);
+void svm_fifo_clone (svm_fifo_t * df, svm_fifo_t * sf);
void svm_fifo_overwrite_head (svm_fifo_t * f, u8 * data, u32 len);
void svm_fifo_add_subscriber (svm_fifo_t * f, u8 subscriber);
void svm_fifo_del_subscriber (svm_fifo_t * f, u8 subscriber);
@@ -213,7 +331,12 @@ format_function_t format_svm_fifo;
always_inline u32
svm_fifo_max_read_chunk (svm_fifo_t * f)
{
- return ((f->tail > f->head) ? (f->tail - f->head) : (f->nitems - f->head));
+ u32 head, tail;
+ u32 head_idx, tail_idx;
+ f_load_head_tail_cons (f, &head, &tail);
+ head_idx = head % f->size;
+ tail_idx = tail % f->size;
+ return tail_idx > head_idx ? (tail_idx - head_idx) : (f->size - head_idx);
}
/**
@@ -222,7 +345,12 @@ svm_fifo_max_read_chunk (svm_fifo_t * f)
always_inline u32
svm_fifo_max_write_chunk (svm_fifo_t * f)
{
- return ((f->tail >= f->head) ? (f->nitems - f->tail) : (f->head - f->tail));
+ u32 head, tail;
+ u32 head_idx, tail_idx;
+ f_load_head_tail_prod (f, &head, &tail);
+ head_idx = head % f->size;
+ tail_idx = tail % f->size;
+ return tail_idx >= head_idx ? (f->size - tail_idx) : (head_idx - tail_idx);
}
/**
@@ -233,27 +361,26 @@ svm_fifo_max_write_chunk (svm_fifo_t * f)
always_inline void
svm_fifo_enqueue_nocopy (svm_fifo_t * f, u32 bytes)
{
- ASSERT (bytes <= svm_fifo_max_enqueue (f));
- f->tail = (f->tail + bytes) % f->nitems;
- clib_atomic_fetch_add_rel (&f->cursize, bytes);
+ ASSERT (bytes <= svm_fifo_max_enqueue_prod (f));
+ /* load-relaxed: producer owned index */
+ u32 tail = f->tail;
+ tail += bytes;
+ /* store-rel: producer owned index (paired with load-acq in consumer) */
+ clib_atomic_store_rel_n (&f->tail, tail);
}
always_inline u8 *
svm_fifo_head (svm_fifo_t * f)
{
- return (f->data + f->head);
+ /* load-relaxed: consumer owned index */
+ return (f->data + (f->head % f->size));
}
always_inline u8 *
svm_fifo_tail (svm_fifo_t * f)
{
- return (f->data + f->tail);
-}
-
-always_inline u32
-svm_fifo_nitems (svm_fifo_t * f)
-{
- return f->nitems;
+ /* load-relaxed: producer owned index */
+ return (f->data + (f->tail % f->size));
}
static inline void
@@ -293,8 +420,8 @@ svm_fifo_needs_tx_ntf (svm_fifo_t * f, u32 n_last_deq)
return 1;
else if (want_ntf & SVM_FIFO_WANT_TX_NOTIF_IF_FULL)
{
- u32 max_deq = svm_fifo_max_dequeue (f);
- u32 nitems = svm_fifo_nitems (f);
+ u32 max_deq = svm_fifo_max_dequeue_cons (f);
+ u32 nitems = f->nitems;
if (!f->has_tx_ntf && max_deq < nitems
&& max_deq + n_last_deq >= nitems)
return 1;
@@ -328,31 +455,25 @@ svm_fifo_newest_ooo_segment_reset (svm_fifo_t * f)
}
always_inline u32
-ooo_segment_distance_from_tail (svm_fifo_t * f, u32 pos)
+ooo_segment_distance_from_tail (svm_fifo_t * f, u32 pos, u32 tail)
{
- /* Ambiguous. Assumption is that ooo segments don't touch tail */
- if (PREDICT_FALSE (pos == f->tail && f->tail == f->head))
- return f->nitems;
-
- return (((f->nitems + pos) - f->tail) % f->nitems);
+ return ((pos - tail) % f->size);
}
always_inline u32
-ooo_segment_distance_to_tail (svm_fifo_t * f, u32 pos)
+ooo_segment_distance_to_tail (svm_fifo_t * f, u32 pos, u32 tail)
{
- return (((f->nitems + f->tail) - pos) % f->nitems);
+ return ((tail - pos) % f->size);
}
always_inline u32
-ooo_segment_offset (svm_fifo_t * f, ooo_segment_t * s)
+ooo_segment_offset_prod (svm_fifo_t * f, ooo_segment_t * s)
{
- return ooo_segment_distance_from_tail (f, s->start);
-}
+ u32 tail;
+ /* load-relaxed: producer owned index */
+ tail = f->tail;
-always_inline u32
-ooo_segment_end_offset (svm_fifo_t * f, ooo_segment_t * s)
-{
- return ooo_segment_distance_from_tail (f, s->start) + s->length;
+ return ooo_segment_distance_from_tail (f, s->start, tail);
}
always_inline u32