From a93c85a5793852b6edda20bc1100fa9fabd0eb29 Mon Sep 17 00:00:00 2001 From: Monendra Singh Kushwaha Date: Wed, 28 Feb 2024 13:09:06 +0530 Subject: octeon: add support for Marvell Octeon9 SoC Type: feature Signed-off-by: Monendra Singh Kushwaha Change-Id: I5db58b8ec41b45596bc03b4a336a184c17871294 --- build/external/packages/octeon-roc.mk | 4 +-- src/cmake/platform/octeon9.cmake | 4 +++ src/plugins/dev_octeon/CMakeLists.txt | 6 +++- src/plugins/dev_octeon/roc_helper.c | 7 +++++ src/plugins/dev_octeon/rx_node.c | 33 +++++++++++++++++++++ src/plugins/dev_octeon/tx_node.c | 56 ++++++++++++++++++++++++++++++++++- 6 files changed, 106 insertions(+), 4 deletions(-) create mode 100644 src/cmake/platform/octeon9.cmake diff --git a/build/external/packages/octeon-roc.mk b/build/external/packages/octeon-roc.mk index 62f5d823bdf..f96cd4c6e3d 100644 --- a/build/external/packages/octeon-roc.mk +++ b/build/external/packages/octeon-roc.mk @@ -2,9 +2,9 @@ # SPDX-License-Identifier: Apache-2.0 # https://spdx.org/licenses/Apache-2.0.html -octeon-roc_version := 0.3 +octeon-roc_version := 0.4 octeon-roc_tarball := octeon-roc-v$(octeon-roc_version).tar.gz -octeon-roc_tarball_md5sum := e4a16beb76a6c63af1600dd4d1d752b8 +octeon-roc_tarball_md5sum := f3772f6e05d71cf2709c00ff1a1929ca octeon-roc_tarball_strip_dirs := 1 octeon-roc_url := https://github.com/MarvellEmbeddedProcessors/marvell-vpp/archive/refs/tags/$(octeon-roc_tarball) diff --git a/src/cmake/platform/octeon9.cmake b/src/cmake/platform/octeon9.cmake new file mode 100644 index 00000000000..46ca7dfa64a --- /dev/null +++ b/src/cmake/platform/octeon9.cmake @@ -0,0 +1,4 @@ + +set(VPP_PLATFORM_CACHE_LINE_SIZE 128) +set(VPP_PLATFORM_MARCH_FLAGS -march=armv8.2-a+crc+crypto) +set(VPP_PLATFORM_BUFFER_ALIGN 128) diff --git a/src/plugins/dev_octeon/CMakeLists.txt b/src/plugins/dev_octeon/CMakeLists.txt index e8abf1a3389..0f6b32bbecd 100644 --- a/src/plugins/dev_octeon/CMakeLists.txt +++ b/src/plugins/dev_octeon/CMakeLists.txt @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # Copyright(c) 2022 Cisco Systems, Inc. -if (NOT VPP_PLATFORM_NAME STREQUAL "octeon10") +if (NOT VPP_PLATFORM_NAME STREQUAL "octeon10" AND NOT VPP_PLATFORM_NAME STREQUAL "octeon9") return() endif() @@ -21,6 +21,10 @@ endif() include_directories (${OCTEON_ROC_DIR}/) +if (VPP_PLATFORM_NAME STREQUAL "octeon9") + add_compile_definitions(PLATFORM_OCTEON9) +endif() + add_vpp_plugin(dev_octeon SOURCES init.c diff --git a/src/plugins/dev_octeon/roc_helper.c b/src/plugins/dev_octeon/roc_helper.c index f10c2cb578b..16e0a871a9d 100644 --- a/src/plugins/dev_octeon/roc_helper.c +++ b/src/plugins/dev_octeon/roc_helper.c @@ -49,6 +49,12 @@ oct_plt_get_thread_index (void) return __os_thread_index; } +static u64 +oct_plt_get_cache_line_size (void) +{ + return CLIB_CACHE_LINE_BYTES; +} + static void oct_drv_physmem_free (vlib_main_t *vm, void *mem) { @@ -178,4 +184,5 @@ oct_plt_init_param_t oct_plt_init_param = { .oct_plt_spinlock_unlock = oct_plt_spinlock_unlock, .oct_plt_spinlock_trylock = oct_plt_spinlock_trylock, .oct_plt_get_thread_index = oct_plt_get_thread_index, + .oct_plt_get_cache_line_size = oct_plt_get_cache_line_size, }; diff --git a/src/plugins/dev_octeon/rx_node.c b/src/plugins/dev_octeon/rx_node.c index 997f1356199..1f8d5d93fa3 100644 --- a/src/plugins/dev_octeon/rx_node.c +++ b/src/plugins/dev_octeon/rx_node.c @@ -165,6 +165,38 @@ oct_rx_batch (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, return n; } +#ifdef PLATFORM_OCTEON9 +static_always_inline u32 +oct_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u16 n_refill) +{ + u32 n_alloc, n_free; + u32 buffer_indices[n_refill]; + vlib_buffer_t *buffers[n_refill]; + u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq); + oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq); + u64 aura = roc_npa_aura_handle_to_aura (crq->aura_handle); + const uint64_t addr = + roc_npa_aura_handle_to_base (crq->aura_handle) + NPA_LF_AURA_OP_FREE0; + + if (n_refill < 256) + return 0; + + n_alloc = vlib_buffer_alloc (vm, buffer_indices, n_refill); + if (PREDICT_FALSE (n_alloc < n_refill)) + goto alloc_fail; + + vlib_get_buffers (vm, buffer_indices, (vlib_buffer_t **) buffers, n_alloc); + + for (n_free = 0; n_free < n_alloc; n_free++) + roc_store_pair ((u64) buffers[n_free], aura, addr); + + return n_alloc; + +alloc_fail: + vlib_buffer_unalloc_to_pool (vm, buffer_indices, n_alloc, bpi); + return 0; +} +#else static_always_inline void oct_rxq_refill_batch (vlib_main_t *vm, u64 lmt_id, u64 addr, oct_npa_lf_aura_batch_free_line_t *lines, u32 *bi, @@ -260,6 +292,7 @@ oct_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u16 n_refill) return n_enq; } +#endif static_always_inline void oct_rx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, diff --git a/src/plugins/dev_octeon/tx_node.c b/src/plugins/dev_octeon/tx_node.c index a2e4b07de8a..0907493814d 100644 --- a/src/plugins/dev_octeon/tx_node.c +++ b/src/plugins/dev_octeon/tx_node.c @@ -32,6 +32,44 @@ typedef struct lmt_line_t *lmt_lines; } oct_tx_ctx_t; +#ifdef PLATFORM_OCTEON9 +static_always_inline u32 +oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) +{ + oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq); + u16 off = ctq->hdr_off; + u64 ah = ctq->aura_handle; + u32 n_freed = 0, n; + + ah = ctq->aura_handle; + + if ((n = roc_npa_aura_op_available (ah)) >= 32) + { + u64 buffers[n]; + u32 bi[n]; + + n_freed = roc_npa_aura_op_bulk_alloc (ah, buffers, n, 0, 1); + vlib_get_buffer_indices_with_offset (vm, (void **) &buffers, bi, n_freed, + off); + vlib_buffer_free_no_next (vm, bi, n_freed); + } + + return n_freed; +} + +static_always_inline void +oct_lmt_copy (void *lmt_addr, u64 io_addr, void *desc, u64 dwords) +{ + u64 lmt_status; + + do + { + roc_lmt_mov_seg (lmt_addr, desc, dwords); + lmt_status = roc_lmt_submit_ldeor (io_addr); + } + while (lmt_status == 0); +} +#else static_always_inline u32 oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) { @@ -133,6 +171,7 @@ oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq) return n_freed; } +#endif static_always_inline u8 oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, @@ -158,6 +197,11 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, return 0; } +#ifdef PLATFORM_OCTEON9 + /* Override line for Octeon9 */ + line = ctx->lmt_lines; +#endif + if (!simple && flags & VLIB_BUFFER_NEXT_PRESENT) { u8 n_tail_segs = 0; @@ -238,8 +282,12 @@ oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b, t->sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX]; } +#ifdef PLATFORM_OCTEON9 + oct_lmt_copy (line, ctx->lmt_ioaddr, &d, n_dwords); +#else for (u32 i = 0; i < n_dwords; i++) line->dwords[i] = d.as_u128[i]; +#endif *dpl = n_dwords; *n = *n + 1; @@ -252,7 +300,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, vlib_buffer_t **b, u32 n_pkts, int trace) { u8 dwords_per_line[16], *dpl = dwords_per_line; - u64 lmt_arg, ioaddr, n_lines; + u64 __attribute__ ((unused)) lmt_arg, ioaddr, n_lines; u32 n_left, or_flags_16 = 0, n = 0; const u32 not_simple_flags = VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD; @@ -331,6 +379,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, if (PREDICT_FALSE (!n_lines)) return n_pkts; +#ifndef PLATFORM_OCTEON9 if (PREDICT_FALSE (or_flags_16 & VLIB_BUFFER_NEXT_PRESENT)) { dpl = dwords_per_line; @@ -359,6 +408,7 @@ oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq, } roc_lmt_submit_steorl (lmt_arg, ioaddr); +#endif return n_pkts; } @@ -375,7 +425,11 @@ VNET_DEV_NODE_FN (oct_tx_node) u32 *from = vlib_frame_vector_args (frame); u32 n, n_enq, n_left, n_pkts = frame->n_vectors; vlib_buffer_t *buffers[VLIB_FRAME_SIZE + 8], **b = buffers; +#ifdef PLATFORM_OCTEON9 + u64 lmt_id = 0; +#else u64 lmt_id = vm->thread_index << ROC_LMT_LINES_PER_CORE_LOG2; +#endif oct_tx_ctx_t ctx = { .node = node, -- cgit 1.2.3-korg