From df47a0812ab9365b2de10a7aabcf4b29a255c088 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 29 Mar 2024 21:33:03 +0100 Subject: octeon: fix memory ordering issue in tx batch free Type: fix Fixes: 01fe7ab Change-Id: I4425e809f0977521ddecf91b58b26fe4519dd6e0 Signed-off-by: Damjan Marion --- src/plugins/dev_octeon/octeon.h | 14 +++++++++----- src/plugins/dev_octeon/port.c | 7 +++++-- src/plugins/dev_octeon/queue.c | 14 +++++++++++--- src/plugins/dev_octeon/tx_node.c | 26 +++++++++++++++++--------- 4 files changed, 42 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/plugins/dev_octeon/octeon.h b/src/plugins/dev_octeon/octeon.h index fd8a92c7b3d..72d2d56a437 100644 --- a/src/plugins/dev_octeon/octeon.h +++ b/src/plugins/dev_octeon/octeon.h @@ -15,6 +15,8 @@ #include #include +#define OCT_BATCH_ALLOC_IOVA0_MASK 0xFFFFFFFFFFFFFF80 + typedef enum { OCT_DEVICE_TYPE_UNKNOWN = 0, @@ -72,13 +74,15 @@ typedef struct typedef struct { CLIB_ALIGN_MARK (cl, 128); - union - { - struct npa_batch_alloc_status_s status; - u64 iova[16]; - }; + u64 iova[16]; } oct_npa_batch_alloc_cl128_t; +typedef union +{ + struct npa_batch_alloc_status_s status; + u64 as_u64; +} oct_npa_batch_alloc_status_t; + STATIC_ASSERT_SIZEOF (oct_npa_batch_alloc_cl128_t, 128); typedef struct diff --git a/src/plugins/dev_octeon/port.c b/src/plugins/dev_octeon/port.c index 00ad8b9c477..a82e48004b5 100644 --- a/src/plugins/dev_octeon/port.c +++ b/src/plugins/dev_octeon/port.c @@ -284,8 +284,11 @@ oct_txq_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) for (n = ctq->ba_num_cl, cl = ctq->ba_buffer + ctq->ba_first_cl; n; cl++, n--) { - if (cl->status.ccode != 0) - for (u32 i = 0; i < cl->status.count; i++) + oct_npa_batch_alloc_status_t st; + + st.as_u64 = __atomic_load_n (cl->iova, __ATOMIC_ACQUIRE); + if (st.status.ccode != ALLOC_CCODE_INVAL) + for (u32 i = 0; i < st.status.count; i++) { vlib_buffer_t *b = (vlib_buffer_t *) (cl->iova[i] + off); vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b)); diff --git a/src/plugins/dev_octeon/queue.c b/src/plugins/dev_octeon/queue.c index 9378fc3b7c7..d6ae794fb8d 100644 --- a/src/plugins/dev_octeon/queue.c +++ b/src/plugins/dev_octeon/queue.c @@ -57,12 +57,20 @@ oct_tx_queue_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq) oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); vnet_dev_port_t *port = txq->port; vnet_dev_t *dev = port->dev; + u32 sz = sizeof (void *) * ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS; + vnet_dev_rv_t rv; log_debug (dev, "tx_queue_alloc: queue %u alocated", txq->queue_id); - return vnet_dev_dma_mem_alloc ( - vm, dev, sizeof (void *) * ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS, 128, - (void **) &ctq->ba_buffer); + rv = vnet_dev_dma_mem_alloc (vm, dev, sz, 128, (void **) &ctq->ba_buffer); + + if (rv != VNET_DEV_OK) + return rv; + + clib_memset_u64 (ctq->ba_buffer, OCT_BATCH_ALLOC_IOVA0_MASK, + ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS); + + return rv; } void diff --git a/src/plugins/dev_octeon/tx_node.c b/src/plugins/dev_octeon/tx_node.c index 28e8f25adb1..0dbf8759d35 100644 --- a/src/plugins/dev_octeon/tx_node.c +++ b/src/plugins/dev_octeon/tx_node.c @@ -46,9 +46,12 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) for (cl = ctq->ba_buffer + ctq->ba_first_cl; num_cl > 0; num_cl--, cl++) { - u8 count; - if (cl->status.ccode == ALLOC_CCODE_INVAL) + oct_npa_batch_alloc_status_t st; + + if ((st.as_u64 = __atomic_load_n (cl->iova, __ATOMIC_RELAXED)) == + OCT_BATCH_ALLOC_IOVA0_MASK + ALLOC_CCODE_INVAL) { + cl_not_ready: ctx->batch_alloc_not_ready++; n_freed = bi - (u32 *) ctq->ba_buffer; if (n_freed > 0) @@ -63,11 +66,15 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) return 0; } - count = cl->status.count; + if (st.status.count > 8 && + __atomic_load_n (cl->iova + 8, __ATOMIC_RELAXED) == + OCT_BATCH_ALLOC_IOVA0_MASK) + goto cl_not_ready; + #if (CLIB_DEBUG > 0) - cl->status.count = cl->status.ccode = 0; + cl->iova[0] &= OCT_BATCH_ALLOC_IOVA0_MASK; #endif - if (PREDICT_TRUE (count == 16)) + if (PREDICT_TRUE (st.status.count == 16)) { /* optimize for likely case where cacheline is full */ vlib_get_buffer_indices_with_offset (vm, (void **) cl, bi, 16, @@ -76,9 +83,9 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) } else { - vlib_get_buffer_indices_with_offset (vm, (void **) cl, bi, count, - off); - bi += count; + vlib_get_buffer_indices_with_offset (vm, (void **) cl, bi, + st.status.count, off); + bi += st.status.count; } } @@ -89,7 +96,8 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) /* clear status bits in each cacheline */ n = cl - ctq->ba_buffer; for (u32 i = 0; i < n; i++) - ctq->ba_buffer[i].iova[0] = 0; + ctq->ba_buffer[i].iova[0] = ctq->ba_buffer[i].iova[8] = + OCT_BATCH_ALLOC_IOVA0_MASK; ctq->ba_num_cl = ctq->ba_first_cl = 0; } -- cgit 1.2.3-korg