summaryrefslogtreecommitdiffstats
path: root/src/vppinfra/vector/test
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2022-04-06 21:16:37 +0200
committerFlorin Coras <florin.coras@gmail.com>2022-04-08 15:53:10 +0000
commitd5045e68a782d484e3f0e54edb4a88dc3dfac291 (patch)
treef0cf5627b672050e4463d1dee4ea952a066c12d6 /src/vppinfra/vector/test
parentdfc43164078b481e39dc0a87e8e358cc6a56d14e (diff)
vppinfra: introduce clib_perfmom
Type: improvement Change-Id: I85a90774eb313020435c9bc2297c1bdf23d52efc Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vppinfra/vector/test')
-rw-r--r--src/vppinfra/vector/test/ip_csum.c26
-rw-r--r--src/vppinfra/vector/test/sha2.c7
-rw-r--r--src/vppinfra/vector/test/test.c163
-rw-r--r--src/vppinfra/vector/test/test.h30
-rw-r--r--src/vppinfra/vector/test/toeplitz.c54
5 files changed, 64 insertions, 216 deletions
diff --git a/src/vppinfra/vector/test/ip_csum.c b/src/vppinfra/vector/test/ip_csum.c
index cb33c036120..17a606523f2 100644
--- a/src/vppinfra/vector/test/ip_csum.c
+++ b/src/vppinfra/vector/test/ip_csum.c
@@ -115,48 +115,48 @@ done:
}
void __test_perf_fn
-perftest_ip4_hdr (int fd, test_perf_t *tp)
+perftest_ip4_hdr (test_perf_t *tp)
{
u32 n = tp->n_ops;
u8 *data = test_mem_alloc_and_splat (20, n, (void *) &test1);
u16 *res = test_mem_alloc (n * sizeof (u16));
- test_perf_event_enable (fd);
+ test_perf_event_enable (tp);
for (int i = 0; i < n; i++)
res[i] = clib_ip_csum (data + i * 20, 20);
- test_perf_event_disable (fd);
+ test_perf_event_disable (tp);
test_mem_free (data);
test_mem_free (res);
}
void __test_perf_fn
-perftest_tcp_payload (int fd, test_perf_t *tp)
+perftest_tcp_payload (test_perf_t *tp)
{
u32 n = tp->n_ops;
volatile uword *lenp = &tp->arg0;
u8 *data = test_mem_alloc_and_splat (20, n, (void *) &test1);
u16 *res = test_mem_alloc (n * sizeof (u16));
- test_perf_event_enable (fd);
+ test_perf_event_enable (tp);
for (int i = 0; i < n; i++)
res[i] = clib_ip_csum (data + i * lenp[0], lenp[0]);
- test_perf_event_disable (fd);
+ test_perf_event_disable (tp);
test_mem_free (data);
test_mem_free (res);
}
void __test_perf_fn
-perftest_byte (int fd, test_perf_t *tp)
+perftest_byte (test_perf_t *tp)
{
volatile uword *np = &tp->n_ops;
u8 *data = test_mem_alloc_and_fill_inc_u8 (*np, 0, 0);
u16 *res = test_mem_alloc (sizeof (u16));
- test_perf_event_enable (fd);
+ test_perf_event_enable (tp);
res[0] = clib_ip_csum (data, np[0]);
- test_perf_event_disable (fd);
+ test_perf_event_disable (tp);
test_mem_free (data);
test_mem_free (res);
@@ -166,16 +166,14 @@ REGISTER_TEST (clib_ip_csum) = {
.name = "clib_ip_csum",
.fn = test_clib_ip_csum,
.perf_tests = PERF_TESTS (
- { .name = "ip4_hdr",
- .op_name = "IP4Hdr",
+ { .name = "fixed size (per IPv4 Header)",
.n_ops = 1024,
.fn = perftest_ip4_hdr },
- { .name = "tcp_paylaad",
- .op_name = "1460Byte",
+ { .name = "fixed size (per 1460 byte block)",
.n_ops = 16,
.arg0 = 1460,
.fn = perftest_tcp_payload },
- { .name = "byte", .op_name = "Byte", .n_ops = 16384, .fn = perftest_byte }
+ { .name = "variable size (per byte)", .n_ops = 16384, .fn = perftest_byte }
),
};
diff --git a/src/vppinfra/vector/test/sha2.c b/src/vppinfra/vector/test/sha2.c
index 58fb2e74401..81365792063 100644
--- a/src/vppinfra/vector/test/sha2.c
+++ b/src/vppinfra/vector/test/sha2.c
@@ -293,7 +293,7 @@ check_digest (clib_error_t *err, int tc, u8 *calculated, const u8 *expected,
return err; \
} \
\
- void __test_perf_fn perftest_sha##bits##_byte (int fd, test_perf_t *tp) \
+ void __test_perf_fn perftest_sha##bits##_byte (test_perf_t *tp) \
{ \
volatile uword *np = &tp->n_ops; \
volatile uword *kl = &tp->arg0; \
@@ -302,9 +302,9 @@ check_digest (clib_error_t *err, int tc, u8 *calculated, const u8 *expected,
u8 *data = test_mem_alloc_and_fill_inc_u8 (*np, 0, 0); \
u8 *digest = test_mem_alloc (64); \
\
- test_perf_event_enable (fd); \
+ test_perf_event_enable (tp); \
clib_hmac_sha##bits (key, *kl, data, *np, digest); \
- test_perf_event_disable (fd); \
+ test_perf_event_disable (tp); \
\
test_mem_free (key); \
test_mem_free (data); \
@@ -314,7 +314,6 @@ check_digest (clib_error_t *err, int tc, u8 *calculated, const u8 *expected,
.name = "clib_hmac_sha" #bits, \
.fn = test_clib_hmac_sha##bits, \
.perf_tests = PERF_TESTS ({ .name = "byte", \
- .op_name = "Byte", \
.n_ops = 16384, \
.arg0 = 20, \
.fn = perftest_sha##bits##_byte }) \
diff --git a/src/vppinfra/vector/test/test.c b/src/vppinfra/vector/test/test.c
index 51b6bbf4bb2..dc5651c47cc 100644
--- a/src/vppinfra/vector/test/test.c
+++ b/src/vppinfra/vector/test/test.c
@@ -53,61 +53,7 @@ test_funct (test_main_t *tm)
return 0;
}
-#define TEST_PERF_MAX_EVENTS 7
-typedef struct
-{
- char *name;
- char *desc;
- u64 config[TEST_PERF_MAX_EVENTS];
- u32 type;
- u8 n_events;
- format_function_t *format_fn;
-} test_perf_event_bundle_t;
-
-static u8 *
-format_test_perf_bundle_default (u8 *s, va_list *args)
-{
- test_main_t *tm = &test_main;
- test_perf_event_bundle_t __clib_unused *b =
- va_arg (*args, test_perf_event_bundle_t *);
- test_perf_t *tp = va_arg (*args, test_perf_t *);
- u64 *data = va_arg (*args, u64 *);
-
- if (tm->ref_clock > 0)
- {
- if (data)
- s = format (s, "%8.1f", tm->ref_clock * data[0] / data[1] / 1e9);
- else
- s = format (s, "%8s", "Freq");
- }
-
- if (data)
- s = format (s, "%5.2f", (f64) data[2] / data[0]);
- else
- s = format (s, "%5s", "IPC");
-
- if (data)
- s = format (s, "%8.2f", (f64) data[0] / tp->n_ops);
- else
- s = format (s, "%8s", "Clks/Op");
-
- if (data)
- s = format (s, "%8.2f", (f64) data[2] / tp->n_ops);
- else
- s = format (s, "%8s", "Inst/Op");
-
- if (data)
- s = format (s, "%9.2f", (f64) data[3] / tp->n_ops);
- else
- s = format (s, "%9s", "Brnch/Op");
-
- if (data)
- s = format (s, "%10.2f", (f64) data[4] / tp->n_ops);
- else
- s = format (s, "%10s", "BrMiss/Op");
- return s;
-}
-
+#if 0
static u8 *
format_test_perf_bundle_core_power (u8 *s, va_list *args)
{
@@ -134,19 +80,6 @@ format_test_perf_bundle_core_power (u8 *s, va_list *args)
return s;
}
-test_perf_event_bundle_t perf_bundles[] = {
- {
- .name = "default",
- .desc = "IPC, Clocks/Operatiom, Instr/Operation, Branch Total & Miss",
- .type = PERF_TYPE_HARDWARE,
- .config[0] = PERF_COUNT_HW_CPU_CYCLES,
- .config[1] = PERF_COUNT_HW_REF_CPU_CYCLES,
- .config[2] = PERF_COUNT_HW_INSTRUCTIONS,
- .config[3] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
- .config[4] = PERF_COUNT_HW_BRANCH_MISSES,
- .n_events = 5,
- .format_fn = format_test_perf_bundle_default,
- }
#ifdef __x86_64__
#define PERF_INTEL_CODE(event, umask) ((event) | (umask) << 8)
,
@@ -165,69 +98,21 @@ test_perf_event_bundle_t perf_bundles[] = {
}
#endif
};
+#endif
#ifdef __linux__
clib_error_t *
test_perf (test_main_t *tm)
{
clib_error_t *err = 0;
- test_perf_event_bundle_t *b = 0;
- int group_fd = -1, fds[TEST_PERF_MAX_EVENTS];
- u64 count[TEST_PERF_MAX_EVENTS + 3] = {};
- struct perf_event_attr pe = {
- .size = sizeof (struct perf_event_attr),
- .disabled = 1,
- .exclude_kernel = 1,
- .exclude_hv = 1,
- .pinned = 1,
- .exclusive = 1,
- .read_format = (PERF_FORMAT_GROUP | PERF_FORMAT_TOTAL_TIME_ENABLED |
- PERF_FORMAT_TOTAL_TIME_RUNNING),
- };
-
- for (int i = 0; i < TEST_PERF_MAX_EVENTS; i++)
- fds[i] = -1;
-
- tm->ref_clock = os_cpu_clock_frequency ();
-
- if (tm->bundle)
- {
- for (int i = 0; i < ARRAY_LEN (perf_bundles); i++)
- if (strncmp ((char *) tm->bundle, perf_bundles[i].name,
- vec_len (tm->bundle)) == 0)
- {
- b = perf_bundles + i;
- break;
- }
- if (b == 0)
- return clib_error_return (0, "Unknown bundle '%s'", tm->bundle);
- }
- else
- b = perf_bundles;
+ clib_perfmon_ctx_t _ctx, *ctx = &_ctx;
- for (int i = 0; i < b->n_events; i++)
- {
- pe.config = b->config[i];
- pe.type = b->type;
- int fd = syscall (__NR_perf_event_open, &pe, /* pid */ 0, /* cpu */ -1,
- /* group_fd */ group_fd, /* flags */ 0);
- if (fd < 0)
- {
- err = clib_error_return_unix (0, "perf_event_open");
- goto done;
- }
+ if ((err = clib_perfmon_init_by_bundle_name (
+ ctx, "%s", tm->bundle ? (char *) tm->bundle : "default")))
+ return err;
- if (group_fd == -1)
- {
- group_fd = fd;
- pe.pinned = 0;
- pe.exclusive = 0;
- }
- fds[i] = fd;
- }
fformat (stdout, "Warming up...\n");
- for (u64 i = 0; i < (u64) tm->ref_clock; i++)
- asm inline("" : : "r"(i * i) : "memory");
+ clib_perfmon_warmup (ctx);
for (int i = 0; i < CLIB_MARCH_TYPE_N_VARIANTS; i++)
{
@@ -246,31 +131,16 @@ test_perf (test_main_t *tm)
test_perf_t *pt = r->perf_tests;
if (tm->filter && strstr (r->name, (char *) tm->filter) == 0)
goto next;
- fformat (stdout, "%-22s%-12s%U\n", r->name, "OpType",
- b->format_fn, b, pt, 0UL);
+
+ clib_perfmon_capture_group (ctx, "%s", r->name);
do
{
- u32 read_size = (b->n_events + 3) * sizeof (u64);
for (int i = 0; i < tm->repeat; i++)
{
- test_perf_event_reset (group_fd);
- pt->fn (group_fd, pt);
- if ((read (group_fd, &count, read_size) != read_size))
- {
- err = clib_error_return_unix (0, "read");
- goto done;
- }
- if (count[1] != count[2])
- clib_warning (
- "perf counters were not running all the time."
-#ifdef __x86_64__
- "\nConsider turning NMI watchdog off ('sysctl -w "
- "kernel.nmi_watchdog=0')."
-#endif
- );
- fformat (stdout, " %-20s%-12s%U\n", pt->name,
- pt->op_name ? pt->op_name : "", b->format_fn, b,
- pt, count + 3);
+ pt->fd = ctx->group_fd;
+ clib_perfmon_reset (ctx);
+ pt->fn (pt);
+ clib_perfmon_capture (ctx, pt->n_ops, "%0s", pt->name);
}
}
while ((++pt)->fn);
@@ -278,12 +148,11 @@ test_perf (test_main_t *tm)
next:
r = r->next;
}
+ fformat (stdout, "%U\n", format_perfmon_bundle, ctx);
+ clib_perfmon_clear (ctx);
}
-done:
- for (int i = 0; i < TEST_PERF_MAX_EVENTS; i++)
- if (fds[i] != -1)
- close (fds[i]);
+ clib_perfmon_free (ctx);
return err;
}
#endif
diff --git a/src/vppinfra/vector/test/test.h b/src/vppinfra/vector/test/test.h
index 4511bf31fa9..7d54d80c6ec 100644
--- a/src/vppinfra/vector/test/test.h
+++ b/src/vppinfra/vector/test/test.h
@@ -6,6 +6,7 @@
#define included_test_test_h
#include <vppinfra/cpu.h>
+#include <vppinfra/perfmon/perfmon.h>
#ifdef __linux__
#include <sys/ioctl.h>
#include <linux/perf_event.h>
@@ -14,10 +15,11 @@
typedef clib_error_t *(test_fn_t) (clib_error_t *);
struct test_perf_;
-typedef void (test_perf_fn_t) (int fd, struct test_perf_ *tp);
+typedef void (test_perf_fn_t) (struct test_perf_ *tp);
typedef struct test_perf_
{
+ int fd;
u64 n_ops;
union
{
@@ -34,7 +36,6 @@ typedef struct test_perf_
u64 arg2;
void *ptr2;
};
- char *op_name;
char *name;
test_perf_fn_t *fn;
} test_perf_t;
@@ -83,32 +84,19 @@ extern test_main_t test_main;
}
static_always_inline void
-test_perf_event_ioctl (int fd, u32 req)
+test_perf_event_reset (test_perf_t *t)
{
-#ifdef __x86_64__
- asm inline("syscall"
- :
- : "D"(fd), "S"(req), "a"(__NR_ioctl), "d"(PERF_IOC_FLAG_GROUP)
- : "rcx", "r11" /* registers modified by kernel */);
-#else
- ioctl (fd, req, PERF_IOC_FLAG_GROUP);
-#endif
-}
-
-static_always_inline void
-test_perf_event_reset (int fd)
-{
- test_perf_event_ioctl (fd, PERF_EVENT_IOC_RESET);
+ clib_perfmon_ioctl (t->fd, PERF_EVENT_IOC_RESET);
}
static_always_inline void
-test_perf_event_enable (int fd)
+test_perf_event_enable (test_perf_t *t)
{
- test_perf_event_ioctl (fd, PERF_EVENT_IOC_ENABLE);
+ clib_perfmon_ioctl (t->fd, PERF_EVENT_IOC_ENABLE);
}
static_always_inline void
-test_perf_event_disable (int fd)
+test_perf_event_disable (test_perf_t *t)
{
- test_perf_event_ioctl (fd, PERF_EVENT_IOC_DISABLE);
+ clib_perfmon_ioctl (t->fd, PERF_EVENT_IOC_DISABLE);
}
void *test_mem_alloc (uword size);
diff --git a/src/vppinfra/vector/test/toeplitz.c b/src/vppinfra/vector/test/toeplitz.c
index d425a443eec..fbe4275f9fa 100644
--- a/src/vppinfra/vector/test/toeplitz.c
+++ b/src/vppinfra/vector/test/toeplitz.c
@@ -259,17 +259,17 @@ done:
}
void __test_perf_fn
-perftest_fixed_12byte (int fd, test_perf_t *tp)
+perftest_fixed_12byte (test_perf_t *tp)
{
u32 n = tp->n_ops;
u8 *data = test_mem_alloc_and_splat (12, n, (void *) &ip4_tests[0].key);
u8 *res = test_mem_alloc (4 * n);
clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
- test_perf_event_enable (fd);
+ test_perf_event_enable (tp);
for (int i = 0; i < n; i++)
((u32 *) res)[i] = clib_toeplitz_hash (k, data + i * 12, 12);
- test_perf_event_disable (fd);
+ test_perf_event_disable (tp);
clib_toeplitz_hash_key_free (k);
test_mem_free (data);
@@ -277,17 +277,17 @@ perftest_fixed_12byte (int fd, test_perf_t *tp)
}
void __test_perf_fn
-perftest_fixed_36byte (int fd, test_perf_t *tp)
+perftest_fixed_36byte (test_perf_t *tp)
{
u32 n = tp->n_ops;
u8 *data = test_mem_alloc_and_splat (36, n, (void *) &ip6_tests[0].key);
u8 *res = test_mem_alloc (4 * n);
clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
- test_perf_event_enable (fd);
+ test_perf_event_enable (tp);
for (int i = 0; i < n; i++)
((u32 *) res)[i] = clib_toeplitz_hash (k, data + i * 36, 36);
- test_perf_event_disable (fd);
+ test_perf_event_disable (tp);
clib_toeplitz_hash_key_free (k);
test_mem_free (data);
@@ -295,7 +295,7 @@ perftest_fixed_36byte (int fd, test_perf_t *tp)
}
void __test_perf_fn
-perftest_variable_size (int fd, test_perf_t *tp)
+perftest_variable_size (test_perf_t *tp)
{
u32 key_len, n_keys, n = tp->n_ops;
u8 *key, *data = test_mem_alloc (n);
@@ -309,9 +309,9 @@ perftest_variable_size (int fd, test_perf_t *tp)
clib_toeplitz_hash_key_free (k);
k = clib_toeplitz_hash_key_init (key, key_len * n_keys);
- test_perf_event_enable (fd);
+ test_perf_event_enable (tp);
res[0] = clib_toeplitz_hash (k, data, n);
- test_perf_event_disable (fd);
+ test_perf_event_disable (tp);
clib_toeplitz_hash_key_free (k);
test_mem_free (data);
@@ -322,16 +322,13 @@ perftest_variable_size (int fd, test_perf_t *tp)
REGISTER_TEST (clib_toeplitz_hash) = {
.name = "clib_toeplitz_hash",
.fn = test_clib_toeplitz_hash,
- .perf_tests = PERF_TESTS ({ .name = "fixed_12",
- .op_name = "12B Tuple",
+ .perf_tests = PERF_TESTS ({ .name = "fixed (per 12 byte tuple)",
.n_ops = 1024,
.fn = perftest_fixed_12byte },
- { .name = "fixed_36",
- .op_name = "36B Tuple",
+ { .name = "fixed (per 36 byte tuple)",
.n_ops = 1024,
.fn = perftest_fixed_36byte },
- { .name = "variable_size",
- .op_name = "Byte",
+ { .name = "variable size (per byte)",
.n_ops = 16384,
.fn = perftest_variable_size }),
};
@@ -442,7 +439,7 @@ done:
}
void __test_perf_fn
-perftest_fixed_12byte_x4 (int fd, test_perf_t *tp)
+perftest_fixed_12byte_x4 (test_perf_t *tp)
{
u32 n = tp->n_ops / 4;
u8 *d0 = test_mem_alloc_and_splat (12, n, (void *) &ip4_tests[0].key);
@@ -455,11 +452,11 @@ perftest_fixed_12byte_x4 (int fd, test_perf_t *tp)
u32 *h3 = test_mem_alloc (4 * n);
clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
- test_perf_event_enable (fd);
+ test_perf_event_enable (tp);
for (int i = 0; i < n; i++)
clib_toeplitz_hash_x4 (k, d0 + i * 12, d1 + i * 12, d2 + i * 12,
d3 + i * 12, h0 + i, h1 + i, h2 + i, h3 + i, 12);
- test_perf_event_disable (fd);
+ test_perf_event_disable (tp);
clib_toeplitz_hash_key_free (k);
test_mem_free (d0);
@@ -473,7 +470,7 @@ perftest_fixed_12byte_x4 (int fd, test_perf_t *tp)
}
void __test_perf_fn
-perftest_fixed_36byte_x4 (int fd, test_perf_t *tp)
+perftest_fixed_36byte_x4 (test_perf_t *tp)
{
u32 n = tp->n_ops / 4;
u8 *d0 = test_mem_alloc_and_splat (36, n, (void *) &ip4_tests[0].key);
@@ -486,11 +483,11 @@ perftest_fixed_36byte_x4 (int fd, test_perf_t *tp)
u32 *h3 = test_mem_alloc (4 * n);
clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
- test_perf_event_enable (fd);
+ test_perf_event_enable (tp);
for (int i = 0; i < n; i++)
clib_toeplitz_hash_x4 (k, d0 + i * 36, d1 + i * 36, d2 + i * 36,
d3 + i * 36, h0 + i, h1 + i, h2 + i, h3 + i, 36);
- test_perf_event_disable (fd);
+ test_perf_event_disable (tp);
clib_toeplitz_hash_key_free (k);
test_mem_free (d0);
@@ -504,7 +501,7 @@ perftest_fixed_36byte_x4 (int fd, test_perf_t *tp)
}
void __test_perf_fn
-perftest_variable_size_x4 (int fd, test_perf_t *tp)
+perftest_variable_size_x4 (test_perf_t *tp)
{
u32 key_len, n_keys, n = tp->n_ops / 4;
u8 *key;
@@ -525,9 +522,9 @@ perftest_variable_size_x4 (int fd, test_perf_t *tp)
clib_toeplitz_hash_key_free (k);
k = clib_toeplitz_hash_key_init (key, key_len * n_keys);
- test_perf_event_enable (fd);
+ test_perf_event_enable (tp);
clib_toeplitz_hash_x4 (k, d0, d1, d2, d3, h0, h1, h2, h3, n);
- test_perf_event_disable (fd);
+ test_perf_event_disable (tp);
clib_toeplitz_hash_key_free (k);
test_mem_free (key);
@@ -544,16 +541,13 @@ perftest_variable_size_x4 (int fd, test_perf_t *tp)
REGISTER_TEST (clib_toeplitz_hash_x4) = {
.name = "clib_toeplitz_hash_x4",
.fn = test_clib_toeplitz_hash_x4,
- .perf_tests = PERF_TESTS ({ .name = "fixed_12",
- .op_name = "12B Tuple",
+ .perf_tests = PERF_TESTS ({ .name = "fixed (per 12 byte tuple)",
.n_ops = 1024,
.fn = perftest_fixed_12byte_x4 },
- { .name = "fixed_36",
- .op_name = "36B Tuple",
+ { .name = "fixed (per 36 byte tuple)",
.n_ops = 1024,
.fn = perftest_fixed_36byte_x4 },
- { .name = "variable_size",
- .op_name = "Byte",
+ { .name = "variable size (per byte)",
.n_ops = 16384,
.fn = perftest_variable_size_x4 }),
};