diff options
-rw-r--r-- | src/cmake/cpu.cmake | 7 | ||||
-rw-r--r-- | src/vppinfra/cpu.c | 74 | ||||
-rw-r--r-- | src/vppinfra/cpu.h | 144 |
3 files changed, 131 insertions, 94 deletions
diff --git a/src/cmake/cpu.cmake b/src/cmake/cpu.cmake index 69f83c98b8d..f683b62b8f4 100644 --- a/src/cmake/cpu.cmake +++ b/src/cmake/cpu.cmake @@ -194,6 +194,13 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)") N_PREFETCHES 6 CACHE_PREFETCH_BYTES 64 ) + + add_vpp_march_variant(neoversen2 + FLAGS -march=armv8.6-a+crc+crypto+sve2 -mtune=neoverse-n2 + N_PREFETCHES 8 + CACHE_PREFETCH_BYTES 64 + OFF + ) endif() macro(vpp_library_set_multiarch_sources lib) diff --git a/src/vppinfra/cpu.c b/src/vppinfra/cpu.c index b66dd4968ad..79e7dc0955e 100644 --- a/src/vppinfra/cpu.c +++ b/src/vppinfra/cpu.c @@ -71,21 +71,22 @@ _ (0x06, 0x17, "Penryn", "Yorkfield,Wolfdale,Penryn,Harpertown") /* _(implementor-id, part-id, vendor-name, cpu-name, show CPU pass as string) */ -#define foreach_aarch64_cpu_uarch \ - _(0x41, 0xd03, "ARM", "Cortex-A53", 0) \ - _(0x41, 0xd07, "ARM", "Cortex-A57", 0) \ - _(0x41, 0xd08, "ARM", "Cortex-A72", 0) \ - _(0x41, 0xd09, "ARM", "Cortex-A73", 0) \ - _(0x41, 0xd0a, "ARM", "Cortex-A75", 0) \ - _(0x41, 0xd0b, "ARM", "Cortex-A76", 0) \ - _(0x41, 0xd0c, "ARM", "Neoverse-N1", 0) \ - _(0x41, 0xd4a, "ARM", "Neoverse-E1", 0) \ - _(0x43, 0x0a1, "Marvell", "THUNDERX CN88XX", 0) \ - _(0x43, 0x0a2, "Marvell", "OCTEON TX CN81XX", 0) \ - _(0x43, 0x0a3, "Marvell", "OCTEON TX CN83XX", 0) \ - _(0x43, 0x0af, "Marvell", "THUNDERX2 CN99XX", 1) \ - _(0x43, 0x0b1, "Marvell", "OCTEON TX2 CN98XX", 1) \ - _(0x43, 0x0b2, "Marvell", "OCTEON TX2 CN96XX", 1) +#define foreach_aarch64_cpu_uarch \ + _ (0x41, 0xd03, "ARM", "Cortex-A53", 0) \ + _ (0x41, 0xd07, "ARM", "Cortex-A57", 0) \ + _ (0x41, 0xd08, "ARM", "Cortex-A72", 0) \ + _ (0x41, 0xd09, "ARM", "Cortex-A73", 0) \ + _ (0x41, 0xd0a, "ARM", "Cortex-A75", 0) \ + _ (0x41, 0xd0b, "ARM", "Cortex-A76", 0) \ + _ (0x41, 0xd0c, "ARM", "Neoverse-N1", 0) \ + _ (0x41, 0xd49, "ARM", "Neoverse-N2", 0) \ + _ (0x41, 0xd4a, "ARM", "Neoverse-E1", 0) \ + _ (0x43, 0x0a1, "Marvell", "THUNDERX CN88XX", 0) \ + _ (0x43, 0x0a2, "Marvell", "OCTEON TX CN81XX", 0) \ + _ (0x43, 0x0a3, "Marvell", "OCTEON TX CN83XX", 0) \ + _ (0x43, 0x0af, "Marvell", "THUNDERX2 CN99XX", 1) \ + _ (0x43, 0x0b1, "Marvell", "OCTEON TX2 CN98XX", 1) \ + _ (0x43, 0x0b2, "Marvell", "OCTEON TX2 CN96XX", 1) __clib_export u8 * format_cpu_uarch (u8 * s, va_list * args) @@ -276,10 +277,39 @@ format_march_variant (u8 *s, va_list *args) return format (s, "%s", variants[t]); } -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ +#ifdef __aarch64__ + +__clib_export const clib_cpu_info_t * +clib_get_cpu_info () +{ + static int first_run = 1; + static clib_cpu_info_t info = {}; + if (first_run) + { + FILE *fp = fopen ("/proc/cpuinfo", "r"); + char buf[128]; + + if (!fp) + return 0; + + while (!feof (fp)) + { + if (!fgets (buf, sizeof (buf), fp)) + break; + buf[127] = '\0'; + if (strstr (buf, "CPU part")) + info.aarch64.part_num = + strtol (memchr (buf, ':', 128) + 2, NULL, 0); + + if (strstr (buf, "CPU implementer")) + info.aarch64.implementer = + strtol (memchr (buf, ':', 128) + 2, NULL, 0); + } + fclose (fp); + + first_run = 0; + } + return &info; +} + +#endif diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h index 017ecb1ca0e..7a1b75fcf7d 100644 --- a/src/vppinfra/cpu.h +++ b/src/vppinfra/cpu.h @@ -36,7 +36,8 @@ _ (thunderx2t99, "Marvell ThunderX2 T99") \ _ (qdf24xx, "Qualcomm CentriqTM 2400") \ _ (cortexa72, "ARM Cortex-A72") \ - _ (neoversen1, "ARM Neoverse N1") + _ (neoversen1, "ARM Neoverse N1") \ + _ (neoversen2, "ARM Neoverse N2") #else #define foreach_march_variant #endif @@ -337,116 +338,115 @@ clib_get_pmu_counter_count (u8 *fixed, u8 *general) #endif } -static inline u32 -clib_cpu_implementer () +typedef struct { - char buf[128]; - static u32 implementer = -1; - - if (-1 != implementer) - return implementer; - - FILE *fp = fopen ("/proc/cpuinfo", "r"); - if (!fp) - return implementer; - - while (!feof (fp)) - { - if (!fgets (buf, sizeof (buf), fp)) - break; - buf[127] = '\0'; - if (strstr (buf, "CPU implementer")) - implementer = (u32) strtol (memchr (buf, ':', 128) + 2, NULL, 0); - if (-1 != implementer) - break; - } - fclose (fp); - - return implementer; -} - -static inline u32 -clib_cpu_part () -{ - char buf[128]; - static u32 part = -1; - - if (-1 != part) - return part; - - FILE *fp = fopen ("/proc/cpuinfo", "r"); - if (!fp) - return part; - - while (!feof (fp)) - { - if (!fgets (buf, sizeof (buf), fp)) - break; - buf[127] = '\0'; - if (strstr (buf, "CPU part")) - part = (u32) strtol (memchr (buf, ':', 128) + 2, NULL, 0); - if (-1 != part) - break; - } - fclose (fp); - - return part; -} - + struct + { + u8 implementer; + u16 part_num; + } aarch64; +} clib_cpu_info_t; + +const clib_cpu_info_t *clib_get_cpu_info (); + +/* ARM */ +#define AARCH64_CPU_IMPLEMENTER_ARM 0x41 +#define AARCH64_CPU_PART_CORTEXA72 0xd08 +#define AARCH64_CPU_PART_NEOVERSEN1 0xd0c +#define AARCH64_CPU_PART_NEOVERSEN2 0xd49 + +/*cavium */ #define AARCH64_CPU_IMPLEMENTER_CAVIUM 0x43 #define AARCH64_CPU_PART_THUNDERX2 0x0af #define AARCH64_CPU_PART_OCTEONTX2T96 0x0b2 #define AARCH64_CPU_PART_OCTEONTX2T98 0x0b1 -#define AARCH64_CPU_IMPLEMENTER_QDF24XX 0x51 + +/* Qualcomm */ +#define AARCH64_CPU_IMPLEMENTER_QUALCOMM 0x51 #define AARCH64_CPU_PART_QDF24XX 0xc00 -#define AARCH64_CPU_IMPLEMENTER_CORTEXA72 0x41 -#define AARCH64_CPU_PART_CORTEXA72 0xd08 -#define AARCH64_CPU_IMPLEMENTER_NEOVERSEN1 0x41 -#define AARCH64_CPU_PART_NEOVERSEN1 0xd0c static inline int clib_cpu_march_priority_octeontx2 () { - if ((AARCH64_CPU_IMPLEMENTER_CAVIUM == clib_cpu_implementer ()) && - ((AARCH64_CPU_PART_OCTEONTX2T96 == clib_cpu_part ()) - || AARCH64_CPU_PART_OCTEONTX2T98 == clib_cpu_part ())) + const clib_cpu_info_t *info = clib_get_cpu_info (); + + if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_CAVIUM) + return -1; + + if (info->aarch64.part_num == AARCH64_CPU_PART_OCTEONTX2T96 || + info->aarch64.part_num == AARCH64_CPU_PART_OCTEONTX2T98) return 20; + return -1; } static inline int clib_cpu_march_priority_thunderx2t99 () { - if ((AARCH64_CPU_IMPLEMENTER_CAVIUM == clib_cpu_implementer ()) && - (AARCH64_CPU_PART_THUNDERX2 == clib_cpu_part ())) + const clib_cpu_info_t *info = clib_get_cpu_info (); + + if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_CAVIUM) + return -1; + + if (info->aarch64.part_num == AARCH64_CPU_PART_THUNDERX2) return 20; + return -1; } static inline int clib_cpu_march_priority_qdf24xx () { - if ((AARCH64_CPU_IMPLEMENTER_QDF24XX == clib_cpu_implementer ()) && - (AARCH64_CPU_PART_QDF24XX == clib_cpu_part ())) + const clib_cpu_info_t *info = clib_get_cpu_info (); + + if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_QUALCOMM) + return -1; + + if (info->aarch64.part_num == AARCH64_CPU_PART_QDF24XX) return 20; + return -1; } static inline int clib_cpu_march_priority_cortexa72 () { - if ((AARCH64_CPU_IMPLEMENTER_CORTEXA72 == clib_cpu_implementer ()) && - (AARCH64_CPU_PART_CORTEXA72 == clib_cpu_part ())) + const clib_cpu_info_t *info = clib_get_cpu_info (); + + if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM) + return -1; + + if (info->aarch64.part_num == AARCH64_CPU_PART_CORTEXA72) return 10; + return -1; } static inline int clib_cpu_march_priority_neoversen1 () { - if ((AARCH64_CPU_IMPLEMENTER_NEOVERSEN1 == clib_cpu_implementer ()) && - (AARCH64_CPU_PART_NEOVERSEN1 == clib_cpu_part ())) + const clib_cpu_info_t *info = clib_get_cpu_info (); + + if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM) + return -1; + + if (info->aarch64.part_num == AARCH64_CPU_PART_NEOVERSEN1) return 10; + + return -1; +} + +static inline int +clib_cpu_march_priority_neoversen2 () +{ + const clib_cpu_info_t *info = clib_get_cpu_info (); + + if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM) + return -1; + + if (info->aarch64.part_num == AARCH64_CPU_PART_NEOVERSEN2) + return 10; + return -1; } |