11 files changed, 999 insertions, 41 deletions
diff --git a/resources/libraries/python/TelemetryUtil.py b/resources/libraries/python/TelemetryUtil.py
index 2d4bb096c6..f8c7d8c9b5 100644
--- a/resources/libraries/python/TelemetryUtil.py
+++ b/resources/libraries/python/TelemetryUtil.py
@@ -14,8 +14,10 @@
 """Telemetry utility."""
 
 from robot.api import logger
+from time import sleep
 
 from resources.libraries.python.Constants import Constants
+from resources.libraries.python.VppCounters import VppCounters
 from resources.libraries.python.OptionString import OptionString
 from resources.libraries.python.ssh import exec_cmd, exec_cmd_no_error
 from resources.libraries.python.topology import NodeType
@@ -119,6 +121,10 @@ class TelemetryUtil:
             f"{stdout}"
         )
 
+        VppCounters.vpp_clear_runtime(node)
+        sleep(1)
+        VppCounters.vpp_show_runtime(node)
+
     @staticmethod
     def run_telemetry_on_all_duts(nodes, profile):
         """Get telemetry stat read on all DUTs.
diff --git a/resources/libraries/robot/performance/performance_actions.robot b/resources/libraries/robot/performance/performance_actions.robot
index 3235dfa868..40f0bc9999 100644
--- a/resources/libraries/robot/performance/performance_actions.robot
+++ b/resources/libraries/robot/performance/performance_actions.robot
@@ -97,6 +97,78 @@
 | | ... | ${nodes} | profile=vppctl_runtime.yaml
 | | Stop traffic on tg
 
+| Additional Statistics Action For bpf-runtime
+| | [Documentation]
+| | ... | Additional Statistics Action for linux bundle counters with
+| | ... | running traffic.
+| |
+| | ... | See documentation of the called keyword for required test variables.
+| |
+| | ${ppta} = | Get Packets Per Transaction Aggregated
+| | ${ramp_up_duration} = | Get Ramp Up Duration
+| | ${ramp_up_rate} = | Get Ramp Up Rate
+| | ${runtime_duration} = | Get Runtime Duration
+| | ${runtime_rate} = | Get Runtime Rate
+| | ${traffic_directions} = | Get Traffic Directions
+| | ${transaction_duration} = | Get Transaction Duration
+| | ${transaction_scale} = | Get Transaction Scale
+| | ${transaction_type} = | Get Transaction Type
+| | ${use_latency} = | Get Use Latency
+| | Send traffic on tg
+| | ... | duration=${-1}
+| | ... | rate=${runtime_rate}
+| | ... | frame_size=${frame_size}
+| | ... | traffic_profile=${traffic_profile}
+| | ... | async_call=${True}
+| | ... | ppta=${ppta}
+| | ... | use_latency=${use_latency}
+| | ... | traffic_directions=${traffic_directions}
+| | ... | transaction_duration=${transaction_duration}
+| | ... | transaction_scale=${transaction_scale}
+| | ... | transaction_type=${transaction_type}
+| | ... | duration_limit=${0.0}
+| | ... | ramp_up_duration=${ramp_up_duration}
+| | ... | ramp_up_rate=${ramp_up_rate}
+| | Run Telemetry On All DUTs
+| | ... | ${nodes} | profile=bpf_runtime.yaml
+| | Stop traffic on tg
+
+| Additional Statistics Action For perf-stat-runtime
+| | [Documentation]
+| | ... | Additional Statistics Action for linux bundle counters with
+| | ... | running traffic.
+| |
+| | ... | See documentation of the called keyword for required test variables.
+| |
+| | ${ppta} = | Get Packets Per Transaction Aggregated
+| | ${ramp_up_duration} = | Get Ramp Up Duration
+| | ${ramp_up_rate} = | Get Ramp Up Rate
+| | ${runtime_duration} = | Get Runtime Duration
+| | ${runtime_rate} = | Get Runtime Rate
+| | ${traffic_directions} = | Get Traffic Directions
+| | ${transaction_duration} = | Get Transaction Duration
+| | ${transaction_scale} = | Get Transaction Scale
+| | ${transaction_type} = | Get Transaction Type
+| | ${use_latency} = | Get Use Latency
+| | Send traffic on tg
+| | ... | duration=${-1}
+| | ... | rate=${runtime_rate}
+| | ... | frame_size=${frame_size}
+| | ... | traffic_profile=${traffic_profile}
+| | ... | async_call=${True}
+| | ... | ppta=${ppta}
+| | ... | use_latency=${use_latency}
+| | ... | traffic_directions=${traffic_directions}
+| | ... | transaction_duration=${transaction_duration}
+| | ... | transaction_scale=${transaction_scale}
+| | ... | transaction_type=${transaction_type}
+| | ... | duration_limit=${0.0}
+| | ... | ramp_up_duration=${ramp_up_duration}
+| | ... | ramp_up_rate=${ramp_up_rate}
+| | Run Telemetry On All DUTs
+| | ... | ${nodes} | profile=perf_stat_runtime.yaml
+| | Stop traffic on tg
+
 | Additional Statistics Action For vpp-runtime-iperf3
 | | [Documentation]
 | | ... | Additional Statistics Action for clear and show runtime counters with
diff --git a/resources/templates/telemetry/bpf_runtime.yaml b/resources/templates/telemetry/bpf_runtime.yaml
index bb9d1c70ae..e2e1fd52f1 100644
--- a/resources/templates/telemetry/bpf_runtime.yaml
+++ b/resources/templates/telemetry/bpf_runtime.yaml
@@ -35,6 +35,7 @@ logging:
     handlers: [console_stdout, console_stderr]
 scheduler:
   duration: 1
+  sample_period: 100
 programs:
   - name: bundle_bpf
     metrics:
@@ -46,6 +47,41 @@ programs:
             - name
             - cpu
             - pid
+    events:
+      - type: 0x4 # RAW
+        name: 0x3C # INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P
+        target: on_cpu_cycle
+        table: cpu_cycle
+    code: |
+      #include <linux/ptrace.h>
+      #include <uapi/linux/bpf_perf_event.h>
+
+      const int max_cpus = 256;
+
+      struct key_t {
+          int cpu;
+          int pid;
+          char name[TASK_COMM_LEN];
+      };
+
+      BPF_HASH(cpu_cycle, struct key_t);
+
+      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+          key->cpu = bpf_get_smp_processor_id();
+          key->pid = bpf_get_current_pid_tgid();
+          bpf_get_current_comm(&(key->name), sizeof(key->name));
+      }
+
+      int on_cpu_cycle(struct bpf_perf_event_data *ctx) {
+          struct key_t key = {};
+          get_key(&key);
+
+          cpu_cycle.increment(key, ctx->sample_period);
+          return 0;
+      }
+  - name: bundle_bpf
+    metrics:
+      counter:
         - name: cpu_instruction
           documentation: Instructions retired by CPUs
           namespace: bpf
@@ -53,15 +89,85 @@ programs:
             - name
             - cpu
             - pid
-        - name: llc_reference
-          documentation: Last level cache operations by type
+    events:
+      - type: 0x4 # RAW
+        name: 0xC0 # INTEL_CORE_E_INST_RETIRED_ANY_P
+        target: on_cpu_instruction
+        table: cpu_instruction
+    code: |
+      #include <linux/ptrace.h>
+      #include <uapi/linux/bpf_perf_event.h>
+
+      const int max_cpus = 256;
+
+      struct key_t {
+          int cpu;
+          int pid;
+          char name[TASK_COMM_LEN];
+      };
+
+      BPF_HASH(cpu_instruction, struct key_t);
+
+      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+          key->cpu = bpf_get_smp_processor_id();
+          key->pid = bpf_get_current_pid_tgid();
+          bpf_get_current_comm(&(key->name), sizeof(key->name));
+      }
+
+      int on_cpu_instruction(struct bpf_perf_event_data *ctx) {
+          struct key_t key = {};
+          get_key(&key);
+
+          cpu_instruction.increment(key, ctx->sample_period);
+          return 0;
+      }
+  - name: bundle_bpf
+    metrics:
+      counter:
+        - name: cache_references
+          documentation: Cache references
           namespace: bpf
           labelnames:
             - name
             - cpu
             - pid
-        - name: llc_miss
-          documentation: Last level cache operations by type
+    events:
+      - type: 0x0 # HARDWARE
+        name: 0x2 # PERF_COUNT_HW_CACHE_REFERENCES
+        target: on_cache_reference
+        table: cache_references
+    code: |
+      #include <linux/ptrace.h>
+      #include <uapi/linux/bpf_perf_event.h>
+
+      const int max_cpus = 256;
+
+      struct key_t {
+          int cpu;
+          int pid;
+          char name[TASK_COMM_LEN];
+      };
+
+      BPF_HASH(cache_references, struct key_t);
+
+      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+          key->cpu = bpf_get_smp_processor_id();
+          key->pid = bpf_get_current_pid_tgid();
+          bpf_get_current_comm(&(key->name), sizeof(key->name));
+      }
+
+      int on_cache_reference(struct bpf_perf_event_data *ctx) {
+          struct key_t key = {};
+          get_key(&key);
+
+          cache_references.increment(key, ctx->sample_period);
+          return 0;
+      }
+  - name: bundle_bpf
+    metrics:
+      counter:
+        - name: cache_miss
+          documentation: Cache misses
           namespace: bpf
           labelnames:
             - name
@@ -69,21 +175,9 @@ programs:
             - pid
     events:
       - type: 0x0 # HARDWARE
-        name: 0x0 # PERF_COUNT_HW_CPU_CYCLES
-        target: on_cpu_cycle
-        table: cpu_cycle
-      - type: 0x0 # HARDWARE
-        name: 0x1 # PERF_COUNT_HW_INSTRUCTIONS
-        target: on_cpu_instruction
-        table: cpu_instruction
-      - type: 0x0 # HARDWARE
-        name: 0x2 # PERF_COUNT_HW_CACHE_REFERENCES
-        target: on_cache_reference
-        table: llc_reference
-      - type: 0x0 # HARDWARE
         name: 0x3 # PERF_COUNT_HW_CACHE_MISSES
         target: on_cache_miss
-        table: llc_miss
+        table: cache_miss
     code: |
       #include <linux/ptrace.h>
       #include <uapi/linux/bpf_perf_event.h>
@@ -96,10 +190,7 @@ programs:
           char name[TASK_COMM_LEN];
       };
 
-      BPF_HASH(llc_miss, struct key_t);
-      BPF_HASH(llc_reference, struct key_t);
-      BPF_HASH(cpu_instruction, struct key_t);
-      BPF_HASH(cpu_cycle, struct key_t);
+      BPF_HASH(cache_miss, struct key_t);
 
       static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
           key->cpu = bpf_get_smp_processor_id();
@@ -107,31 +198,555 @@ programs:
           bpf_get_current_comm(&(key->name), sizeof(key->name));
       }
 
-      int on_cpu_cycle(struct bpf_perf_event_data *ctx) {
+      int on_cache_miss(struct bpf_perf_event_data *ctx) {
           struct key_t key = {};
           get_key(&key);
 
-          cpu_cycle.increment(key, ctx->sample_period);
+          cache_miss.increment(key, ctx->sample_period);
           return 0;
       }
-      int on_cpu_instruction(struct bpf_perf_event_data *ctx) {
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: branch_instruction
+#          documentation: Instructions retired by branch
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x0 # HARDWARE
+#        name: 0x4 # PERF_COUNT_HW_BRANCH_INSTRUCTION
+#        target: on_branch_instruction
+#        table: branch_instruction
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(branch_instruction, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_branch_instruction(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          branch_instruction.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: branch_misses (not supported by CPU)
+#          documentation: Last level miss operations by type
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x0 # HARDWARE
+#        name: 0x5 # PERF_COUNT_HW_BRANCH_MISSES
+#        target: on_branch_misses
+#        table: branch_misses
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(branch_misses, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_branch_misses(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          branch_misses.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: bus_cycles
+#          documentation: Count of bus cycles
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x0 # HARDWARE
+#        name: 0x6 # PERF_COUNT_HW_BUS_CYCLES
+#        target: on_bus_cycles
+#        table: bus_cycles
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(bus_cycles, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#      int on_bus_cycles(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          bus_cycles.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: stalled_cycles_frontend (not supported by CPU)
+#          documentation: Frontend stalled cycles
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x0 # HARDWARE
+#        name: 0x7 # PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+#        target: on_stalled_cycles_frontend
+#        table: stalled_cycles_frontend
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(stalled_cycles_frontend, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_stalled_cycles_frontend(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          stalled_cycles_frontend.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: stalled_cycles_backend
+#          documentation: Backend stalled cycles
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x0 # HARDWARE
+#        name: 0x8 # PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+#        target: on_stalled_cycles_backend
+#        table: stalled_cycles_backend
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(stalled_cycles_backend, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_stalled_cycles_backend(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          stalled_cycles_backend.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: referenced_cpu_cycles
+#          documentation: Referenced CPU cycles
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x0 # HARDWARE
+#        name: 0x9 # PERF_COUNT_HW_REF_CPU_CYCLES
+#        target: on_referenced_cpu_cycles
+#        table: referenced_cpu_cycles
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(referenced_cpu_cycles, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_referenced_cpu_cycles(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          referenced_cpu_cycles.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: sw_cpu_clock
+#          documentation: SW CPU clock
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x1 # SOFTWARE
+#        name: 0x0 # PERF_COUNT_SW_CPU_CLOCK
+#        target: on_sw_cpu_clock
+#        table: sw_cpu_clock
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(sw_cpu_clock, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_sw_cpu_clock(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          sw_cpu_clock.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: sw_task_clock
+#          documentation: SW task clock
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x1 # SOFTWARE
+#        name: 0x1 # PERF_COUNT_SW_TASK_CLOCK
+#        target: on_sw_task_clock
+#        table: sw_task_clock
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(sw_task_clock, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_sw_task_clock(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          sw_task_clock.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: sw_page_faults
+#          documentation: SW page faults
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x1 # SOFTWARE
+#        name: 0x2 # PERF_COUNT_SW_PAGE_FAULTS
+#        target: on_sw_page_faults
+#        table: sw_page_faults
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(sw_page_faults, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_sw_page_faults(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          sw_page_faults.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+  - name: bundle_bpf
+    metrics:
+      counter:
+        - name: sw_context_switches
+          documentation: SW context switches
+          namespace: bpf
+          labelnames:
+            - name
+            - cpu
+            - pid
+    events:
+      - type: 0x1 # SOFTWARE
+        name: 0x3 # PERF_COUNT_SW_CONTEXT_SWITCHES
+        target: on_sw_context_switches
+        table: sw_context_switches
+    code: |
+      #include <linux/ptrace.h>
+      #include <uapi/linux/bpf_perf_event.h>
+
+      const int max_cpus = 256;
+
+      struct key_t {
+          int cpu;
+          int pid;
+          char name[TASK_COMM_LEN];
+      };
+
+      BPF_HASH(sw_context_switches, struct key_t);
+
+      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+          key->cpu = bpf_get_smp_processor_id();
+          key->pid = bpf_get_current_pid_tgid();
+          bpf_get_current_comm(&(key->name), sizeof(key->name));
+      }
+
+      int on_sw_context_switches(struct bpf_perf_event_data *ctx) {
           struct key_t key = {};
           get_key(&key);
 
-          cpu_instruction.increment(key, ctx->sample_period);
+          sw_context_switches.increment(key, ctx->sample_period);
           return 0;
       }
-      int on_cache_reference(struct bpf_perf_event_data *ctx) {
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: sw_cpu_migrations
+#          documentation: SW cpu migrations
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x1 # SOFTWARE
+#        name: 0x4 # PERF_COUNT_SW_CPU_MIGRATIONS
+#        target: on_sw_cpu_migrations
+#        table: sw_cpu_migrations
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(sw_cpu_migrations, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_sw_cpu_migrations(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          sw_cpu_migrations.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+  - name: bundle_bpf
+    metrics:
+      counter:
+        - name: sw_page_faults_min
+          documentation: SW page faults minor
+          namespace: bpf
+          labelnames:
+            - name
+            - cpu
+            - pid
+    events:
+      - type: 0x1 # SOFTWARE
+        name: 0x5 # PERF_COUNT_SW_PAGE_FAULTS_MIN
+        target: on_sw_page_faults_min
+        table: sw_page_faults_min
+    code: |
+      #include <linux/ptrace.h>
+      #include <uapi/linux/bpf_perf_event.h>
+
+      const int max_cpus = 256;
+
+      struct key_t {
+          int cpu;
+          int pid;
+          char name[TASK_COMM_LEN];
+      };
+
+      BPF_HASH(sw_page_faults_min, struct key_t);
+
+      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+          key->cpu = bpf_get_smp_processor_id();
+          key->pid = bpf_get_current_pid_tgid();
+          bpf_get_current_comm(&(key->name), sizeof(key->name));
+      }
+
+      int on_sw_page_faults_min(struct bpf_perf_event_data *ctx) {
           struct key_t key = {};
           get_key(&key);
 
-          llc_reference.increment(key, ctx->sample_period);
+          sw_page_faults_min.increment(key, ctx->sample_period);
           return 0;
       }
-      int on_cache_miss(struct bpf_perf_event_data *ctx) {
+  - name: bundle_bpf
+    metrics:
+      counter:
+        - name: sw_page_faults_maj
+          documentation: SW page faults major
+          namespace: bpf
+          labelnames:
+            - name
+            - cpu
+            - pid
+    events:
+      - type: 0x1 # SOFTWARE
+        name: 0x6 # PERF_COUNT_SW_PAGE_FAULTS_MAJ
+        target: on_sw_page_faults_maj
+        table: sw_page_faults_maj
+    code: |
+      #include <linux/ptrace.h>
+      #include <uapi/linux/bpf_perf_event.h>
+
+      const int max_cpus = 256;
+
+      struct key_t {
+          int cpu;
+          int pid;
+          char name[TASK_COMM_LEN];
+      };
+
+      BPF_HASH(sw_page_faults_maj, struct key_t);
+
+      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+          key->cpu = bpf_get_smp_processor_id();
+          key->pid = bpf_get_current_pid_tgid();
+          bpf_get_current_comm(&(key->name), sizeof(key->name));
+      }
+
+      int on_sw_page_faults_maj(struct bpf_perf_event_data *ctx) {
           struct key_t key = {};
           get_key(&key);
 
-          llc_miss.increment(key, ctx->sample_period);
+          sw_page_faults_maj.increment(key, ctx->sample_period);
           return 0;
       }
diff --git a/resources/templates/telemetry/perf_stat_runtime.yaml b/resources/templates/telemetry/perf_stat_runtime.yaml
new file mode 100644
index 0000000000..54b77a9bcc
--- /dev/null
+++ b/resources/templates/telemetry/perf_stat_runtime.yaml
@@ -0,0 +1,142 @@
+---
+logging:
+  version: 1
+  formatters:
+    console_stdout:
+      format: '%(asctime)s - %(name)s - %(message)s'
+    console_stderr:
+      format: '%(message)s'
+    prom:
+      format: '%(message)s'
+  handlers:
+    console_stdout:
+      class: logging.StreamHandler
+      level: INFO
+      formatter: console_stdout
+      stream: ext://sys.stdout
+    console_stderr:
+      class: logging.StreamHandler
+      level: ERROR
+      formatter: console_stderr
+      stream: ext://sys.stderr
+    prom:
+      class: logging.handlers.RotatingFileHandler
+      level: INFO
+      formatter: prom
+      filename: /tmp/metric.prom
+      mode: w
+  loggers:
+    prom:
+      handlers: [prom]
+      level: INFO
+      propagate: False
+  root:
+    level: INFO
+    handlers: [console_stdout, console_stderr]
+scheduler:
+  duration: 1
+programs:
+  - name: bundle_perf_stat
+    metrics:
+      gauge:
+        - name: cpu-cycles
+          documentation: Cycles processed by CPUs
+          labelnames:
+            - name
+            - thread
+            - pid
+    events:
+      - name: cpu-cycles # 0x3C umask: 0x00
+        EventCode: 0x3C
+        UMask: 0x00
+  - name: bundle_perf_stat
+    metrics:
+      gauge:
+        - name: instructions
+          documentation: Instructions retired by CPUs
+          labelnames:
+            - name
+            - thread
+            - pid
+    events:
+      - name: instructions # 0xC0 umask: 0x00
+        EventCode: 0xC0
+        UMask: 0x00
+  - name: bundle_perf_stat
+    metrics:
+      gauge:
+        - name: MEM_LOAD_UOPS_RETIRED.L1_HIT
+          documentation: L1 Hit
+          labelnames:
+            - name
+            - thread
+            - pid
+    events:
+      - name: MEM_LOAD_UOPS_RETIRED.L1_HIT # 0xD1 umask: 0x01
+        EventCode: 0xD1
+        UMask: 0x01
+  - name: bundle_perf_stat
+    metrics:
+      gauge:
+        - name: MEM_LOAD_UOPS_RETIRED.L2_HIT
+          documentation: L2 Hit
+          labelnames:
+            - name
+            - thread
+            - pid
+    events:
+      - name: MEM_LOAD_UOPS_RETIRED.L2_HIT # 0xd1 umask: 0x02
+        EventCode: 0xD1
+        UMask: 0x02
+  - name: bundle_perf_stat
+    metrics:
+      gauge:
+        - name: MEM_LOAD_UOPS_RETIRED.L3_HIT
+          documentation: L3 Hit
+          labelnames:
+            - name
+            - thread
+            - pid
+    events:
+      - name: MEM_LOAD_UOPS_RETIRED.L3_HIT # 0xd1 umask: 0x04
+        EventCode: 0xD1
+        UMask: 0x04
+  - name: bundle_perf_stat
+    metrics:
+      gauge:
+        - name: MEM_LOAD_UOPS_RETIRED.L1_MISS
+          documentation: L1 Miss
+          labelnames:
+            - name
+            - thread
+            - pid
+    events:
+      - name: MEM_LOAD_UOPS_RETIRED.L1_MISS # 0xd1 umask: 0x08
+        EventCode: 0xD1
+        UMask: 0x08
+  - name: bundle_perf_stat
+    metrics:
+      gauge:
+        - name: MEM_LOAD_UOPS_RETIRED.L2_MISS
+          documentation: L2 Miss
+          labelnames:
+            - name
+            - thread
+            - pid
+    events:
+      - name: MEM_LOAD_UOPS_RETIRED.L2_MISS # 0xd1 umask: 0x10
+        EventCode: 0xD1
+        UMask: 0x10
+  - name: bundle_perf_stat
+    metrics:
+      gauge:
+        - name: MEM_LOAD_UOPS_RETIRED.L3_MISS
+          documentation: L3 Miss
+          labelnames:
+            - name
+            - thread
+            - pid
+    events:
+      - name: MEM_LOAD_UOPS_RETIRED.L3_MISS # 0xd1 umask: 0x020
+        EventCode: 0xD1
+        UMask: 0x20
diff --git a/resources/tools/telemetry/__main__.py b/resources/tools/telemetry/__main__.py
index 2ab87b661a..7a612b8eea 100755
--- a/resources/tools/telemetry/__main__.py
+++ b/resources/tools/telemetry/__main__.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2022 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -19,6 +19,7 @@ from argparse import ArgumentParser, RawDescriptionHelpFormatter
 
 from .executor import Executor
 
+
 def main():
     """
     Main entry function when called from cli
@@ -45,5 +46,6 @@ def main():
     else:
         Executor(args.config).execute(args.hook)
 
+
 if __name__ == u"__main__":
     main()
diff --git a/resources/tools/telemetry/bundle_bpf.py b/resources/tools/telemetry/bundle_bpf.py
index c376da9d63..58cfd5d0b6 100644
--- a/resources/tools/telemetry/bundle_bpf.py
+++ b/resources/tools/telemetry/bundle_bpf.py
@@ -52,12 +52,15 @@ class BundleBpf:
 
         self.obj = BPF(text=self.code)
 
-    def attach(self, duration):
+
+    def attach(self, sample_period):
         """
         Attach events to BPF.
 
-        :param duration: Trial duration.
-        :type duration: int
+        :param sample_period:  A "sampling" event is one that generates
+        an overflow notification every N events, where N is given by
+        sample_period.
+        :type sample_period: int
         """
         try:
             for event in self.events:
@@ -65,15 +68,16 @@ class BundleBpf:
                     ev_type=event[u"type"],
                     ev_config=event[u"name"],
                     fn_name=event[u"target"],
-                    sample_period=duration
+                    sample_period=sample_period
                 )
         except AttributeError:
-            getLogger("console_stderr").error(u"Could not attach BPF events!")
+            getLogger("console_stderr").error(f"Could not attach BPF event: "
+                                              f"{event[u'name']}")
             sys.exit(Constants.err_linux_attach)
 
     def detach(self):
         """
-        Dettach events from BPF.
+        Detach events from BPF.
         """
         try:
             for event in self.events:
@@ -98,6 +102,9 @@ class BundleBpf:
 
         for _, metric_list in self.metrics.items():
             for metric in metric_list:
+                if table_name != metric[u"name"]:
+                    table_name = metric[u"name"]
+                    text += f"{table_name}\n"
                 for (key, val) in self.obj.get_table(metric[u"name"]).items():
                     item = dict()
                     labels = dict()
diff --git a/resources/tools/telemetry/bundle_perf_stat.py b/resources/tools/telemetry/bundle_perf_stat.py
new file mode 100644
index 0000000000..038e86e7a0
--- /dev/null
+++ b/resources/tools/telemetry/bundle_perf_stat.py
@@ -0,0 +1,109 @@
+# Copyright (c) 2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Perf Stat performance bundle."""
+
+from logging import getLogger
+import sys
+import subprocess
+
+from .constants import Constants
+
+
+class BundlePerfStat:
+    """
+    Creates a Perf stat object. This is the main object for defining a Perf Stat
+    program and interacting with its output.
+
+    Syntax: perf stat [-e <EVENT> | --event=EVENT] [-a] — <command> [<options>]
+    """
+    def __init__(self, program, serializer, hook):
+        """Initialize Bundle Perf Stat event class.
+
+        :param program: events
+        :param serializer: Metric serializer.
+        :param hook: Process ID.
+        :type program: dict
+        :type serializer: Serializer
+        :type hook: int
+        """
+        self.metrics = program[u"metrics"]
+        self.events = program[u"events"]
+        self.api_replies_list = list()
+        self.serializer = serializer
+        self.hook = hook
+
+    def attach(self, duration=1):
+        """
+               Performs perf stat.
+
+               :param duration: Time how long perf stat is collecting data (in
+               seconds). Default value is 1 second.
+               :type duration: int
+               EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
+               """
+        try:
+            self.serializer.create(metrics=self.metrics)
+            for event in self.events:
+                text = subprocess.getoutput(
+                    f"""sudo perf stat -x\; -e\
+                    '{{cpu/event={hex(event[u"EventCode"])},\
+                    umask={hex(event[u"UMask"])}/u}}'\
+                    -a --per-thread\
+                    sleep {duration}"""
+                )
+
+                if text == u"":
+                    getLogger("console_stdout").info(event[u"name"])
+                    continue
+                if u";" not in text:
+                    getLogger("console_stdout").info(
+                        f"Could not get counters for event \"{event[u'name']}\""
+                        f". Is it supported by CPU?"
+                    )
+                    continue
+
+                for line in text.splitlines():
+                    item = dict()
+                    labels = dict()
+                    item[u"name"] = event[u"name"]
+                    item[u"value"] = line.split(";")[1]
+                    labels["thread"] = u"-".join(
+                        line.split(";")[0].split("-")[0:-1]
+                    )
+                    labels["pid"] = line.split(";")[0].split("-")[-1]
+                    labels["name"] = item[u"name"]
+                    item[u"labels"] = labels
+
+                    getLogger("console_stdout").info(item)
+                    self.api_replies_list.append(item)
+
+        except AttributeError:
+            getLogger("console_stderr").error(f"Could not successfully run "
+                                              f"perf stat command.")
+            sys.exit(Constants.err_linux_perf_stat)
+
+    def detach(self):
+        pass
+
+    def fetch_data(self):
+       pass
+
+    def process_data(self):
+        """
+        Post process API replies.
+        """
+        for item in self.api_replies_list:
+            self.serializer.serialize(
+                metric=item[u"name"], labels=item[u"labels"], item=item
+            )
diff --git a/resources/tools/telemetry/constants.py b/resources/tools/telemetry/constants.py
index 9961a07b8b..5363ddeaa4 100644
--- a/resources/tools/telemetry/constants.py
+++ b/resources/tools/telemetry/constants.py
@@ -17,6 +17,7 @@
 does not need to be hard coded here, but can be read from environment variables.
 """
 
+
 class Constants:
     """Constants used in telemetry.
     1-10: Telemetry errors
@@ -46,3 +47,7 @@ class Constants:
 
     # Could not detach BPF events
     err_linux_detach = 52
+
+    # Could not successfuly run perf stat command
+    err_linux_perf_stat = 53
+
diff --git a/resources/tools/telemetry/metrics.py b/resources/tools/telemetry/metrics.py
index 7a22acfd1b..ba6bae5e70 100644
--- a/resources/tools/telemetry/metrics.py
+++ b/resources/tools/telemetry/metrics.py
@@ -104,7 +104,7 @@ class Metric:
             u"Sample", [u"name", u"labels", u"value", u"timestamp"]
         )
 
-        if not re.compile(r"^[a-zA-Z_:][a-zA-Z0-9_:]*$").match(name):
+        if not re.compile(r"^[a-zA-Z_:\-.][a-zA-Z0-9_:\-.]*$").match(name):
             raise ValueError(f"Invalid metric name: {name}!")
         if typ not in self.metric_types:
             raise ValueError(f"Invalid metric type: {typ}!")
@@ -214,7 +214,7 @@ class MetricBase:
         full_name += f"{subsystem}_" if subsystem else u""
         full_name += name
 
-        if not re.compile(r"^[a-zA-Z_:][a-zA-Z0-9_:]*$").match(full_name):
+        if not re.compile(r"^[a-zA-Z_:\-.][a-zA-Z0-9_:\-.]*$").match(full_name):
             raise ValueError(
                 f"Invalid metric name: {full_name}!"
             )
diff --git a/resources/tools/telemetry/serializer.py b/resources/tools/telemetry/serializer.py
index 3da857c0ab..e28454fc8b 100644
--- a/resources/tools/telemetry/serializer.py
+++ b/resources/tools/telemetry/serializer.py
@@ -19,7 +19,7 @@ from logging import getLogger
 
 class Serializer:
     """
-    Executor class reponsible for executing configuration.
+    Executor class responsible for executing configuration.
     """
     def __init__(self):
         """
diff --git a/tests/vpp/perf/__init__.robot b/tests/vpp/perf/__init__.robot
index 37875163a4..d13ada581f 100644
--- a/tests/vpp/perf/__init__.robot
+++ b/tests/vpp/perf/__init__.robot
@@ -1,4 +1,4 @@
-# Copyright (c) 2021 Cisco and/or its affiliates.
+# Copyright (c) 2022 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -50,7 +50,7 @@
 | | ... | - packages_dir - Path to directory where VPP packages are stored.
 | |
 | | ${stat_runtime}= | Create List
-| | ... | vpp-runtime
+| | ... | vpp-runtime | perf-stat-runtime
 | | ${stat_pre_trial}= | Create List
 | | ... | vpp-clear-stats | vpp-enable-packettrace
 | | ${stat_post_trial}= | Create List