3 files changed, 131 insertions, 94 deletions
diff --git a/src/cmake/cpu.cmake b/src/cmake/cpu.cmake
index 69f83c98b8d..f683b62b8f4 100644
--- a/src/cmake/cpu.cmake
+++ b/src/cmake/cpu.cmake
@@ -194,6 +194,13 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
     N_PREFETCHES 6
     CACHE_PREFETCH_BYTES 64
   )
+
+  add_vpp_march_variant(neoversen2
+    FLAGS -march=armv8.6-a+crc+crypto+sve2 -mtune=neoverse-n2
+    N_PREFETCHES 8
+    CACHE_PREFETCH_BYTES 64
+    OFF
+  )
 endif()
 
 macro(vpp_library_set_multiarch_sources lib)
diff --git a/src/vppinfra/cpu.c b/src/vppinfra/cpu.c
index b66dd4968ad..79e7dc0955e 100644
--- a/src/vppinfra/cpu.c
+++ b/src/vppinfra/cpu.c
@@ -71,21 +71,22 @@
   _ (0x06, 0x17, "Penryn", "Yorkfield,Wolfdale,Penryn,Harpertown")
 
 /* _(implementor-id, part-id, vendor-name, cpu-name, show CPU pass as string) */
-#define foreach_aarch64_cpu_uarch \
- _(0x41, 0xd03, "ARM", "Cortex-A53", 0) \
- _(0x41, 0xd07, "ARM", "Cortex-A57", 0) \
- _(0x41, 0xd08, "ARM", "Cortex-A72", 0) \
- _(0x41, 0xd09, "ARM", "Cortex-A73", 0) \
- _(0x41, 0xd0a, "ARM", "Cortex-A75", 0) \
- _(0x41, 0xd0b, "ARM", "Cortex-A76", 0) \
- _(0x41, 0xd0c, "ARM", "Neoverse-N1", 0) \
- _(0x41, 0xd4a, "ARM", "Neoverse-E1", 0) \
- _(0x43, 0x0a1, "Marvell", "THUNDERX CN88XX", 0) \
- _(0x43, 0x0a2, "Marvell", "OCTEON TX CN81XX", 0) \
- _(0x43, 0x0a3, "Marvell", "OCTEON TX CN83XX", 0) \
- _(0x43, 0x0af, "Marvell", "THUNDERX2 CN99XX", 1) \
- _(0x43, 0x0b1, "Marvell", "OCTEON TX2 CN98XX", 1) \
- _(0x43, 0x0b2, "Marvell", "OCTEON TX2 CN96XX", 1)
+#define foreach_aarch64_cpu_uarch                                             \
+  _ (0x41, 0xd03, "ARM", "Cortex-A53", 0)                                     \
+  _ (0x41, 0xd07, "ARM", "Cortex-A57", 0)                                     \
+  _ (0x41, 0xd08, "ARM", "Cortex-A72", 0)                                     \
+  _ (0x41, 0xd09, "ARM", "Cortex-A73", 0)                                     \
+  _ (0x41, 0xd0a, "ARM", "Cortex-A75", 0)                                     \
+  _ (0x41, 0xd0b, "ARM", "Cortex-A76", 0)                                     \
+  _ (0x41, 0xd0c, "ARM", "Neoverse-N1", 0)                                    \
+  _ (0x41, 0xd49, "ARM", "Neoverse-N2", 0)                                    \
+  _ (0x41, 0xd4a, "ARM", "Neoverse-E1", 0)                                    \
+  _ (0x43, 0x0a1, "Marvell", "THUNDERX CN88XX", 0)                            \
+  _ (0x43, 0x0a2, "Marvell", "OCTEON TX CN81XX", 0)                           \
+  _ (0x43, 0x0a3, "Marvell", "OCTEON TX CN83XX", 0)                           \
+  _ (0x43, 0x0af, "Marvell", "THUNDERX2 CN99XX", 1)                           \
+  _ (0x43, 0x0b1, "Marvell", "OCTEON TX2 CN98XX", 1)                          \
+  _ (0x43, 0x0b2, "Marvell", "OCTEON TX2 CN96XX", 1)
 
 __clib_export u8 *
 format_cpu_uarch (u8 * s, va_list * args)
@@ -276,10 +277,39 @@ format_march_variant (u8 *s, va_list *args)
   return format (s, "%s", variants[t]);
 }
 
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+#ifdef __aarch64__
+
+__clib_export const clib_cpu_info_t *
+clib_get_cpu_info ()
+{
+  static int first_run = 1;
+  static clib_cpu_info_t info = {};
+  if (first_run)
+    {
+      FILE *fp = fopen ("/proc/cpuinfo", "r");
+      char buf[128];
+
+      if (!fp)
+	return 0;
+
+      while (!feof (fp))
+	{
+	  if (!fgets (buf, sizeof (buf), fp))
+	    break;
+	  buf[127] = '\0';
+	  if (strstr (buf, "CPU part"))
+	    info.aarch64.part_num =
+	      strtol (memchr (buf, ':', 128) + 2, NULL, 0);
+
+	  if (strstr (buf, "CPU implementer"))
+	    info.aarch64.implementer =
+	      strtol (memchr (buf, ':', 128) + 2, NULL, 0);
+	}
+      fclose (fp);
+
+      first_run = 0;
+    }
+  return &info;
+}
+
+#endif
diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h
index 017ecb1ca0e..7a1b75fcf7d 100644
--- a/src/vppinfra/cpu.h
+++ b/src/vppinfra/cpu.h
@@ -36,7 +36,8 @@
   _ (thunderx2t99, "Marvell ThunderX2 T99")                                   \
   _ (qdf24xx, "Qualcomm CentriqTM 2400")                                      \
   _ (cortexa72, "ARM Cortex-A72")                                             \
-  _ (neoversen1, "ARM Neoverse N1")
+  _ (neoversen1, "ARM Neoverse N1")                                           \
+  _ (neoversen2, "ARM Neoverse N2")
 #else
 #define foreach_march_variant
 #endif
@@ -337,116 +338,115 @@ clib_get_pmu_counter_count (u8 *fixed, u8 *general)
 #endif
 }
 
-static inline u32
-clib_cpu_implementer ()
+typedef struct
 {
-  char buf[128];
-  static u32 implementer = -1;
-
-  if (-1 != implementer)
-    return implementer;
-
-  FILE *fp = fopen ("/proc/cpuinfo", "r");
-  if (!fp)
-    return implementer;
-
-  while (!feof (fp))
-    {
-      if (!fgets (buf, sizeof (buf), fp))
-	break;
-      buf[127] = '\0';
-      if (strstr (buf, "CPU implementer"))
-	implementer = (u32) strtol (memchr (buf, ':', 128) + 2, NULL, 0);
-      if (-1 != implementer)
-	break;
-    }
-  fclose (fp);
-
-  return implementer;
-}
-
-static inline u32
-clib_cpu_part ()
-{
-  char buf[128];
-  static u32 part = -1;
-
-  if (-1 != part)
-    return part;
-
-  FILE *fp = fopen ("/proc/cpuinfo", "r");
-  if (!fp)
-    return part;
-
-  while (!feof (fp))
-    {
-      if (!fgets (buf, sizeof (buf), fp))
-	break;
-      buf[127] = '\0';
-      if (strstr (buf, "CPU part"))
-	part = (u32) strtol (memchr (buf, ':', 128) + 2, NULL, 0);
-      if (-1 != part)
-	break;
-    }
-  fclose (fp);
-
-  return part;
-}
-
+  struct
+  {
+    u8 implementer;
+    u16 part_num;
+  } aarch64;
+} clib_cpu_info_t;
+
+const clib_cpu_info_t *clib_get_cpu_info ();
+
+/* ARM */
+#define AARCH64_CPU_IMPLEMENTER_ARM 0x41
+#define AARCH64_CPU_PART_CORTEXA72  0xd08
+#define AARCH64_CPU_PART_NEOVERSEN1 0xd0c
+#define AARCH64_CPU_PART_NEOVERSEN2 0xd49
+
+/*cavium */
 #define AARCH64_CPU_IMPLEMENTER_CAVIUM      0x43
 #define AARCH64_CPU_PART_THUNDERX2          0x0af
 #define AARCH64_CPU_PART_OCTEONTX2T96       0x0b2
 #define AARCH64_CPU_PART_OCTEONTX2T98       0x0b1
-#define AARCH64_CPU_IMPLEMENTER_QDF24XX     0x51
+
+/* Qualcomm */
+#define AARCH64_CPU_IMPLEMENTER_QUALCOMM    0x51
 #define AARCH64_CPU_PART_QDF24XX            0xc00
-#define AARCH64_CPU_IMPLEMENTER_CORTEXA72   0x41
-#define AARCH64_CPU_PART_CORTEXA72          0xd08
-#define AARCH64_CPU_IMPLEMENTER_NEOVERSEN1  0x41
-#define AARCH64_CPU_PART_NEOVERSEN1         0xd0c
 
 static inline int
 clib_cpu_march_priority_octeontx2 ()
 {
-  if ((AARCH64_CPU_IMPLEMENTER_CAVIUM == clib_cpu_implementer ()) &&
-      ((AARCH64_CPU_PART_OCTEONTX2T96 == clib_cpu_part ())
-       || AARCH64_CPU_PART_OCTEONTX2T98 == clib_cpu_part ()))
+  const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+  if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_CAVIUM)
+    return -1;
+
+  if (info->aarch64.part_num == AARCH64_CPU_PART_OCTEONTX2T96 ||
+      info->aarch64.part_num == AARCH64_CPU_PART_OCTEONTX2T98)
     return 20;
+
   return -1;
 }
 
 static inline int
 clib_cpu_march_priority_thunderx2t99 ()
 {
-  if ((AARCH64_CPU_IMPLEMENTER_CAVIUM == clib_cpu_implementer ()) &&
-      (AARCH64_CPU_PART_THUNDERX2 == clib_cpu_part ()))
+  const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+  if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_CAVIUM)
+    return -1;
+
+  if (info->aarch64.part_num == AARCH64_CPU_PART_THUNDERX2)
     return 20;
+
   return -1;
 }
 
 static inline int
 clib_cpu_march_priority_qdf24xx ()
 {
-  if ((AARCH64_CPU_IMPLEMENTER_QDF24XX == clib_cpu_implementer ()) &&
-      (AARCH64_CPU_PART_QDF24XX == clib_cpu_part ()))
+  const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+  if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_QUALCOMM)
+    return -1;
+
+  if (info->aarch64.part_num == AARCH64_CPU_PART_QDF24XX)
     return 20;
+
   return -1;
 }
 
 static inline int
 clib_cpu_march_priority_cortexa72 ()
 {
-  if ((AARCH64_CPU_IMPLEMENTER_CORTEXA72 == clib_cpu_implementer ()) &&
-      (AARCH64_CPU_PART_CORTEXA72 == clib_cpu_part ()))
+  const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+  if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM)
+    return -1;
+
+  if (info->aarch64.part_num == AARCH64_CPU_PART_CORTEXA72)
     return 10;
+
   return -1;
 }
 
 static inline int
 clib_cpu_march_priority_neoversen1 ()
 {
-  if ((AARCH64_CPU_IMPLEMENTER_NEOVERSEN1 == clib_cpu_implementer ()) &&
-      (AARCH64_CPU_PART_NEOVERSEN1 == clib_cpu_part ()))
+  const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+  if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM)
+    return -1;
+
+  if (info->aarch64.part_num == AARCH64_CPU_PART_NEOVERSEN1)
     return 10;
+
+  return -1;
+}
+
+static inline int
+clib_cpu_march_priority_neoversen2 ()
+{
+  const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+  if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM)
+    return -1;
+
+  if (info->aarch64.part_num == AARCH64_CPU_PART_NEOVERSEN2)
+    return 10;
+
   return -1;
 }