summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/cmake/cpu.cmake64
-rw-r--r--src/plugins/dpdk/device/init.c2
-rw-r--r--src/vppinfra/cache.h60
3 files changed, 56 insertions, 70 deletions
diff --git a/src/cmake/cpu.cmake b/src/cmake/cpu.cmake
index f4a57a34281..dbe0f17b94f 100644
--- a/src/cmake/cpu.cmake
+++ b/src/cmake/cpu.cmake
@@ -11,49 +11,31 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+macro(set_log2_cacheline_size var n)
+ if(${n} EQUAL 128)
+ set(${var} 7)
+ elseif(${n} EQUAL 64)
+ set(${var} 6)
+ else()
+ message(FATAL_ERROR "Cacheline size ${n} not supported")
+ endif()
+endmacro()
+
##############################################################################
-# Cache line size detection
+# Cache line size
##############################################################################
-if(CMAKE_CROSSCOMPILING)
- message(STATUS "Cross-compiling - cache line size detection disabled")
- set(VPP_LOG2_CACHE_LINE_SIZE 6)
-elseif(DEFINED VPP_LOG2_CACHE_LINE_SIZE)
+if(DEFINED VPP_CACHE_LINE_SIZE)
# Cache line size assigned via cmake args
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
- file(READ "/proc/cpuinfo" cpuinfo)
- string(REPLACE "\n" ";" cpuinfo ${cpuinfo})
- foreach(l ${cpuinfo})
- string(REPLACE ":" ";" l ${l})
- list(GET l 0 name)
- list(GET l 1 value)
- string(STRIP ${name} name)
- string(STRIP ${value} value)
- if(${name} STREQUAL "CPU implementer")
- set(CPU_IMPLEMENTER ${value})
- endif()
- if(${name} STREQUAL "CPU part")
- set(CPU_PART ${value})
- endif()
- endforeach()
- # Implementer 0x43 - Cavium
- # Part 0x0af - ThunderX2 is 64B, rest all are 128B
- if (${CPU_IMPLEMENTER} STREQUAL "0x43")
- if (${CPU_PART} STREQUAL "0x0af")
- set(VPP_LOG2_CACHE_LINE_SIZE 6)
- else()
- set(VPP_LOG2_CACHE_LINE_SIZE 7)
- endif()
- else()
- set(VPP_LOG2_CACHE_LINE_SIZE 6)
- endif()
- math(EXPR VPP_CACHE_LINE_SIZE "1 << ${VPP_LOG2_CACHE_LINE_SIZE}")
- message(STATUS "ARM AArch64 CPU implementer ${CPU_IMPLEMENTER} part ${CPU_PART} cacheline size ${VPP_CACHE_LINE_SIZE}")
+ set(VPP_CACHE_LINE_SIZE 128)
else()
- set(VPP_LOG2_CACHE_LINE_SIZE 6)
+ set(VPP_CACHE_LINE_SIZE 64)
endif()
-set(VPP_LOG2_CACHE_LINE_SIZE ${VPP_LOG2_CACHE_LINE_SIZE}
- CACHE STRING "Target CPU cache line size (power of 2)")
+set(VPP_CACHE_LINE_SIZE ${VPP_CACHE_LINE_SIZE}
+ CACHE STRING "Target CPU cache line size")
+
+set_log2_cacheline_size(VPP_LOG2_CACHE_LINE_SIZE ${VPP_CACHE_LINE_SIZE})
##############################################################################
# Gnu Assembler AVX-512 bug detection
@@ -77,7 +59,7 @@ endif()
macro(add_vpp_march_variant v)
cmake_parse_arguments(ARG
"OFF"
- "N_PREFETCHES"
+ "N_PREFETCHES;CACHE_PREFETCH_BYTES"
"FLAGS"
${ARGN}
)
@@ -98,6 +80,10 @@ macro(add_vpp_march_variant v)
if(ARG_N_PREFETCHES)
string(APPEND fs " -DCLIB_N_PREFETCHES=${ARG_N_PREFETCHES}")
endif()
+ if(ARG_CACHE_PREFETCH_BYTES)
+ set_log2_cacheline_size(log2 ${ARG_CACHE_PREFETCH_BYTES})
+ string(APPEND fs " -DCLIB_LOG2_CACHE_PREFETCH_BYTES=${log2}")
+ endif()
if(flags_ok)
string(TOUPPER ${v} uv)
if(ARG_OFF)
@@ -143,6 +129,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
add_vpp_march_variant(qdf24xx
FLAGS -march=armv8-a+crc+crypto -mtune=qdf24xx
N_PREFETCHES 8
+ CACHE_PREFETCH_BYTES 64
OFF
)
@@ -154,16 +141,19 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
add_vpp_march_variant(thunderx2t99
FLAGS -march=armv8.1-a+crc+crypto -mtune=thunderx2t99
N_PREFETCHES 8
+ CACHE_PREFETCH_BYTES 64
)
add_vpp_march_variant(cortexa72
FLAGS -march=armv8-a+crc+crypto -mtune=cortex-a72
N_PREFETCHES 6
+ CACHE_PREFETCH_BYTES 64
)
add_vpp_march_variant(neoversen1
FLAGS -march=armv8.2-a+crc+crypto -mtune=neoverse-n1
N_PREFETCHES 6
+ CACHE_PREFETCH_BYTES 64
)
endif()
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
index aebbb64dd76..f923da6c09e 100644
--- a/src/plugins/dpdk/device/init.c
+++ b/src/plugins/dpdk/device/init.c
@@ -1921,8 +1921,6 @@ dpdk_init (vlib_main_t * vm)
"Data in cache line 0 is bigger than cache line size");
STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0,
"Cache line marker must be 1st element in frame_queue_trace_t");
- STATIC_ASSERT (RTE_CACHE_LINE_SIZE == 1 << CLIB_LOG2_CACHE_LINE_BYTES,
- "DPDK RTE CACHE LINE SIZE does not match with 1<<CLIB_LOG2_CACHE_LINE_BYTES");
dpdk_cli_reference ();
diff --git a/src/vppinfra/cache.h b/src/vppinfra/cache.h
index 04f91e00061..4229a068486 100644
--- a/src/vppinfra/cache.h
+++ b/src/vppinfra/cache.h
@@ -40,66 +40,64 @@
#include <vppinfra/error_bootstrap.h>
-/*
- * Allow CFLAGS to override the configured / deduced cache line size
- */
-#ifndef CLIB_LOG2_CACHE_LINE_BYTES
-
/* Default cache line size of 64 bytes. */
#ifndef CLIB_LOG2_CACHE_LINE_BYTES
#define CLIB_LOG2_CACHE_LINE_BYTES 6
#endif
-#endif /* CLIB_LOG2_CACHE_LINE_BYTES defined */
-
-#if (CLIB_LOG2_CACHE_LINE_BYTES >= 9)
-#error Cache line size 512 bytes or greater
+/* How much data prefetch instruction prefetches */
+#ifndef CLIB_LOG2_CACHE_PREFETCH_BYTES
+#define CLIB_LOG2_CACHE_PREFETCH_BYTES CLIB_LOG2_CACHE_LINE_BYTES
#endif
-#define CLIB_CACHE_LINE_BYTES (1 << CLIB_LOG2_CACHE_LINE_BYTES)
-#define CLIB_CACHE_LINE_ALIGN_MARK(mark) u8 mark[0] __attribute__((aligned(CLIB_CACHE_LINE_BYTES)))
-#define CLIB_CACHE_LINE_ROUND(x) ((x + CLIB_CACHE_LINE_BYTES - 1) & ~(CLIB_CACHE_LINE_BYTES - 1))
-
/* Default cache line fill buffers. */
#ifndef CLIB_N_PREFETCHES
#define CLIB_N_PREFETCHES 16
#endif
+#define CLIB_CACHE_LINE_BYTES (1 << CLIB_LOG2_CACHE_LINE_BYTES)
+#define CLIB_CACHE_PREFETCH_BYTES (1 << CLIB_LOG2_CACHE_PREFETCH_BYTES)
+#define CLIB_CACHE_LINE_ALIGN_MARK(mark) \
+ u8 mark[0] __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES)))
+#define CLIB_CACHE_LINE_ROUND(x) \
+ ((x + CLIB_CACHE_LINE_BYTES - 1) & ~(CLIB_CACHE_LINE_BYTES - 1))
+
/* Read/write arguments to __builtin_prefetch. */
#define CLIB_PREFETCH_READ 0
#define CLIB_PREFETCH_LOAD 0 /* alias for read */
#define CLIB_PREFETCH_WRITE 1
#define CLIB_PREFETCH_STORE 1 /* alias for write */
-#define _CLIB_PREFETCH(n,size,type) \
- if ((size) > (n)*CLIB_CACHE_LINE_BYTES) \
- __builtin_prefetch (_addr + (n)*CLIB_CACHE_LINE_BYTES, \
- CLIB_PREFETCH_##type, \
- /* locality */ 3);
-
-#define CLIB_PREFETCH(addr,size,type) \
-do { \
- void * _addr = (addr); \
- \
- ASSERT ((size) <= 4*CLIB_CACHE_LINE_BYTES); \
- _CLIB_PREFETCH (0, size, type); \
- _CLIB_PREFETCH (1, size, type); \
- _CLIB_PREFETCH (2, size, type); \
- _CLIB_PREFETCH (3, size, type); \
-} while (0)
+#define _CLIB_PREFETCH(n, size, type) \
+ if ((size) > (n) *CLIB_CACHE_PREFETCH_BYTES) \
+ __builtin_prefetch (_addr + (n) *CLIB_CACHE_PREFETCH_BYTES, \
+ CLIB_PREFETCH_##type, /* locality */ 3);
+
+#define CLIB_PREFETCH(addr, size, type) \
+ do \
+ { \
+ void *_addr = (addr); \
+ \
+ ASSERT ((size) <= 4 * CLIB_CACHE_PREFETCH_BYTES); \
+ _CLIB_PREFETCH (0, size, type); \
+ _CLIB_PREFETCH (1, size, type); \
+ _CLIB_PREFETCH (2, size, type); \
+ _CLIB_PREFETCH (3, size, type); \
+ } \
+ while (0)
#undef _
static_always_inline void
clib_prefetch_load (void *p)
{
- CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+ __builtin_prefetch (p, /* rw */ 0, /* locality */ 3);
}
static_always_inline void
clib_prefetch_store (void *p)
{
- CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, STORE);
+ __builtin_prefetch (p, /* rw */ 1, /* locality */ 3);
}
#endif /* included_clib_cache_h */