aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2024-03-29 21:33:03 +0100
committerFlorin Coras <florin.coras@gmail.com>2024-04-05 19:33:32 +0000
commitdf47a0812ab9365b2de10a7aabcf4b29a255c088 (patch)
tree176b5b7103e8716442043f7d8026ae1bf8681407
parentd1364402b6964cc5b2e83490bfe516e0d34becd8 (diff)
octeon: fix memory ordering issue in tx batch free
Type: fix Fixes: 01fe7ab Change-Id: I4425e809f0977521ddecf91b58b26fe4519dd6e0 Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r--src/plugins/dev_octeon/octeon.h14
-rw-r--r--src/plugins/dev_octeon/port.c7
-rw-r--r--src/plugins/dev_octeon/queue.c14
-rw-r--r--src/plugins/dev_octeon/tx_node.c26
4 files changed, 42 insertions, 19 deletions
diff --git a/src/plugins/dev_octeon/octeon.h b/src/plugins/dev_octeon/octeon.h
index fd8a92c7b3d..72d2d56a437 100644
--- a/src/plugins/dev_octeon/octeon.h
+++ b/src/plugins/dev_octeon/octeon.h
@@ -15,6 +15,8 @@
#include <base/roc_api.h>
#include <dev_octeon/hw_defs.h>
+#define OCT_BATCH_ALLOC_IOVA0_MASK 0xFFFFFFFFFFFFFF80
+
typedef enum
{
OCT_DEVICE_TYPE_UNKNOWN = 0,
@@ -72,13 +74,15 @@ typedef struct
typedef struct
{
CLIB_ALIGN_MARK (cl, 128);
- union
- {
- struct npa_batch_alloc_status_s status;
- u64 iova[16];
- };
+ u64 iova[16];
} oct_npa_batch_alloc_cl128_t;
+typedef union
+{
+ struct npa_batch_alloc_status_s status;
+ u64 as_u64;
+} oct_npa_batch_alloc_status_t;
+
STATIC_ASSERT_SIZEOF (oct_npa_batch_alloc_cl128_t, 128);
typedef struct
diff --git a/src/plugins/dev_octeon/port.c b/src/plugins/dev_octeon/port.c
index 00ad8b9c477..a82e48004b5 100644
--- a/src/plugins/dev_octeon/port.c
+++ b/src/plugins/dev_octeon/port.c
@@ -284,8 +284,11 @@ oct_txq_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
for (n = ctq->ba_num_cl, cl = ctq->ba_buffer + ctq->ba_first_cl; n;
cl++, n--)
{
- if (cl->status.ccode != 0)
- for (u32 i = 0; i < cl->status.count; i++)
+ oct_npa_batch_alloc_status_t st;
+
+ st.as_u64 = __atomic_load_n (cl->iova, __ATOMIC_ACQUIRE);
+ if (st.status.ccode != ALLOC_CCODE_INVAL)
+ for (u32 i = 0; i < st.status.count; i++)
{
vlib_buffer_t *b = (vlib_buffer_t *) (cl->iova[i] + off);
vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b));
diff --git a/src/plugins/dev_octeon/queue.c b/src/plugins/dev_octeon/queue.c
index 9378fc3b7c7..d6ae794fb8d 100644
--- a/src/plugins/dev_octeon/queue.c
+++ b/src/plugins/dev_octeon/queue.c
@@ -57,12 +57,20 @@ oct_tx_queue_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
vnet_dev_port_t *port = txq->port;
vnet_dev_t *dev = port->dev;
+ u32 sz = sizeof (void *) * ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS;
+ vnet_dev_rv_t rv;
log_debug (dev, "tx_queue_alloc: queue %u alocated", txq->queue_id);
- return vnet_dev_dma_mem_alloc (
- vm, dev, sizeof (void *) * ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS, 128,
- (void **) &ctq->ba_buffer);
+ rv = vnet_dev_dma_mem_alloc (vm, dev, sz, 128, (void **) &ctq->ba_buffer);
+
+ if (rv != VNET_DEV_OK)
+ return rv;
+
+ clib_memset_u64 (ctq->ba_buffer, OCT_BATCH_ALLOC_IOVA0_MASK,
+ ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS);
+
+ return rv;
}
void
diff --git a/src/plugins/dev_octeon/tx_node.c b/src/plugins/dev_octeon/tx_node.c
index 28e8f25adb1..0dbf8759d35 100644
--- a/src/plugins/dev_octeon/tx_node.c
+++ b/src/plugins/dev_octeon/tx_node.c
@@ -46,9 +46,12 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq)
for (cl = ctq->ba_buffer + ctq->ba_first_cl; num_cl > 0; num_cl--, cl++)
{
- u8 count;
- if (cl->status.ccode == ALLOC_CCODE_INVAL)
+ oct_npa_batch_alloc_status_t st;
+
+ if ((st.as_u64 = __atomic_load_n (cl->iova, __ATOMIC_RELAXED)) ==
+ OCT_BATCH_ALLOC_IOVA0_MASK + ALLOC_CCODE_INVAL)
{
+ cl_not_ready:
ctx->batch_alloc_not_ready++;
n_freed = bi - (u32 *) ctq->ba_buffer;
if (n_freed > 0)
@@ -63,11 +66,15 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq)
return 0;
}
- count = cl->status.count;
+ if (st.status.count > 8 &&
+ __atomic_load_n (cl->iova + 8, __ATOMIC_RELAXED) ==
+ OCT_BATCH_ALLOC_IOVA0_MASK)
+ goto cl_not_ready;
+
#if (CLIB_DEBUG > 0)
- cl->status.count = cl->status.ccode = 0;
+ cl->iova[0] &= OCT_BATCH_ALLOC_IOVA0_MASK;
#endif
- if (PREDICT_TRUE (count == 16))
+ if (PREDICT_TRUE (st.status.count == 16))
{
/* optimize for likely case where cacheline is full */
vlib_get_buffer_indices_with_offset (vm, (void **) cl, bi, 16,
@@ -76,9 +83,9 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq)
}
else
{
- vlib_get_buffer_indices_with_offset (vm, (void **) cl, bi, count,
- off);
- bi += count;
+ vlib_get_buffer_indices_with_offset (vm, (void **) cl, bi,
+ st.status.count, off);
+ bi += st.status.count;
}
}
@@ -89,7 +96,8 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq)
/* clear status bits in each cacheline */
n = cl - ctq->ba_buffer;
for (u32 i = 0; i < n; i++)
- ctq->ba_buffer[i].iova[0] = 0;
+ ctq->ba_buffer[i].iova[0] = ctq->ba_buffer[i].iova[8] =
+ OCT_BATCH_ALLOC_IOVA0_MASK;
ctq->ba_num_cl = ctq->ba_first_cl = 0;
}