1 files changed, 644 insertions, 29 deletions
diff --git a/resources/templates/telemetry/bpf_runtime.yaml b/resources/templates/telemetry/bpf_runtime.yaml
index bb9d1c70ae..e2e1fd52f1 100644
--- a/resources/templates/telemetry/bpf_runtime.yaml
+++ b/resources/templates/telemetry/bpf_runtime.yaml
@@ -35,6 +35,7 @@ logging:
     handlers: [console_stdout, console_stderr]
 scheduler:
   duration: 1
+  sample_period: 100
 programs:
   - name: bundle_bpf
     metrics:
@@ -46,6 +47,41 @@ programs:
             - name
             - cpu
             - pid
+    events:
+      - type: 0x4 # RAW
+        name: 0x3C # INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P
+        target: on_cpu_cycle
+        table: cpu_cycle
+    code: |
+      #include <linux/ptrace.h>
+      #include <uapi/linux/bpf_perf_event.h>
+
+      const int max_cpus = 256;
+
+      struct key_t {
+          int cpu;
+          int pid;
+          char name[TASK_COMM_LEN];
+      };
+
+      BPF_HASH(cpu_cycle, struct key_t);
+
+      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+          key->cpu = bpf_get_smp_processor_id();
+          key->pid = bpf_get_current_pid_tgid();
+          bpf_get_current_comm(&(key->name), sizeof(key->name));
+      }
+
+      int on_cpu_cycle(struct bpf_perf_event_data *ctx) {
+          struct key_t key = {};
+          get_key(&key);
+
+          cpu_cycle.increment(key, ctx->sample_period);
+          return 0;
+      }
+  - name: bundle_bpf
+    metrics:
+      counter:
         - name: cpu_instruction
           documentation: Instructions retired by CPUs
           namespace: bpf
@@ -53,15 +89,85 @@ programs:
             - name
             - cpu
             - pid
-        - name: llc_reference
-          documentation: Last level cache operations by type
+    events:
+      - type: 0x4 # RAW
+        name: 0xC0 # INTEL_CORE_E_INST_RETIRED_ANY_P
+        target: on_cpu_instruction
+        table: cpu_instruction
+    code: |
+      #include <linux/ptrace.h>
+      #include <uapi/linux/bpf_perf_event.h>
+
+      const int max_cpus = 256;
+
+      struct key_t {
+          int cpu;
+          int pid;
+          char name[TASK_COMM_LEN];
+      };
+
+      BPF_HASH(cpu_instruction, struct key_t);
+
+      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+          key->cpu = bpf_get_smp_processor_id();
+          key->pid = bpf_get_current_pid_tgid();
+          bpf_get_current_comm(&(key->name), sizeof(key->name));
+      }
+
+      int on_cpu_instruction(struct bpf_perf_event_data *ctx) {
+          struct key_t key = {};
+          get_key(&key);
+
+          cpu_instruction.increment(key, ctx->sample_period);
+          return 0;
+      }
+  - name: bundle_bpf
+    metrics:
+      counter:
+        - name: cache_references
+          documentation: Cache references
           namespace: bpf
           labelnames:
             - name
             - cpu
             - pid
-        - name: llc_miss
-          documentation: Last level cache operations by type
+    events:
+      - type: 0x0 # HARDWARE
+        name: 0x2 # PERF_COUNT_HW_CACHE_REFERENCES
+        target: on_cache_reference
+        table: cache_references
+    code: |
+      #include <linux/ptrace.h>
+      #include <uapi/linux/bpf_perf_event.h>
+
+      const int max_cpus = 256;
+
+      struct key_t {
+          int cpu;
+          int pid;
+          char name[TASK_COMM_LEN];
+      };
+
+      BPF_HASH(cache_references, struct key_t);
+
+      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+          key->cpu = bpf_get_smp_processor_id();
+          key->pid = bpf_get_current_pid_tgid();
+          bpf_get_current_comm(&(key->name), sizeof(key->name));
+      }
+
+      int on_cache_reference(struct bpf_perf_event_data *ctx) {
+          struct key_t key = {};
+          get_key(&key);
+
+          cache_references.increment(key, ctx->sample_period);
+          return 0;
+      }
+  - name: bundle_bpf
+    metrics:
+      counter:
+        - name: cache_miss
+          documentation: Cache misses
           namespace: bpf
           labelnames:
             - name
@@ -69,21 +175,9 @@ programs:
             - pid
     events:
       - type: 0x0 # HARDWARE
-        name: 0x0 # PERF_COUNT_HW_CPU_CYCLES
-        target: on_cpu_cycle
-        table: cpu_cycle
-      - type: 0x0 # HARDWARE
-        name: 0x1 # PERF_COUNT_HW_INSTRUCTIONS
-        target: on_cpu_instruction
-        table: cpu_instruction
-      - type: 0x0 # HARDWARE
-        name: 0x2 # PERF_COUNT_HW_CACHE_REFERENCES
-        target: on_cache_reference
-        table: llc_reference
-      - type: 0x0 # HARDWARE
         name: 0x3 # PERF_COUNT_HW_CACHE_MISSES
         target: on_cache_miss
-        table: llc_miss
+        table: cache_miss
     code: |
       #include <linux/ptrace.h>
       #include <uapi/linux/bpf_perf_event.h>
@@ -96,10 +190,7 @@ programs:
           char name[TASK_COMM_LEN];
       };
 
-      BPF_HASH(llc_miss, struct key_t);
-      BPF_HASH(llc_reference, struct key_t);
-      BPF_HASH(cpu_instruction, struct key_t);
-      BPF_HASH(cpu_cycle, struct key_t);
+      BPF_HASH(cache_miss, struct key_t);
 
       static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
           key->cpu = bpf_get_smp_processor_id();
@@ -107,31 +198,555 @@ programs:
           bpf_get_current_comm(&(key->name), sizeof(key->name));
       }
 
-      int on_cpu_cycle(struct bpf_perf_event_data *ctx) {
+      int on_cache_miss(struct bpf_perf_event_data *ctx) {
           struct key_t key = {};
           get_key(&key);
 
-          cpu_cycle.increment(key, ctx->sample_period);
+          cache_miss.increment(key, ctx->sample_period);
           return 0;
       }
-      int on_cpu_instruction(struct bpf_perf_event_data *ctx) {
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: branch_instruction
+#          documentation: Instructions retired by branch
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x0 # HARDWARE
+#        name: 0x4 # PERF_COUNT_HW_BRANCH_INSTRUCTION
+#        target: on_branch_instruction
+#        table: branch_instruction
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(branch_instruction, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_branch_instruction(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          branch_instruction.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: branch_misses (not supported by CPU)
+#          documentation: Last level miss operations by type
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x0 # HARDWARE
+#        name: 0x5 # PERF_COUNT_HW_BRANCH_MISSES
+#        target: on_branch_misses
+#        table: branch_misses
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(branch_misses, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_branch_misses(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          branch_misses.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: bus_cycles
+#          documentation: Count of bus cycles
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x0 # HARDWARE
+#        name: 0x6 # PERF_COUNT_HW_BUS_CYCLES
+#        target: on_bus_cycles
+#        table: bus_cycles
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(bus_cycles, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#      int on_bus_cycles(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          bus_cycles.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: stalled_cycles_frontend (not supported by CPU)
+#          documentation: Frontend stalled cycles
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x0 # HARDWARE
+#        name: 0x7 # PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+#        target: on_stalled_cycles_frontend
+#        table: stalled_cycles_frontend
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(stalled_cycles_frontend, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_stalled_cycles_frontend(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          stalled_cycles_frontend.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: stalled_cycles_backend
+#          documentation: Backend stalled cycles
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x0 # HARDWARE
+#        name: 0x8 # PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+#        target: on_stalled_cycles_backend
+#        table: stalled_cycles_backend
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(stalled_cycles_backend, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_stalled_cycles_backend(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          stalled_cycles_backend.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: referenced_cpu_cycles
+#          documentation: Referenced CPU cycles
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x0 # HARDWARE
+#        name: 0x9 # PERF_COUNT_HW_REF_CPU_CYCLES
+#        target: on_referenced_cpu_cycles
+#        table: referenced_cpu_cycles
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(referenced_cpu_cycles, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_referenced_cpu_cycles(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          referenced_cpu_cycles.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: sw_cpu_clock
+#          documentation: SW CPU clock
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x1 # SOFTWARE
+#        name: 0x0 # PERF_COUNT_SW_CPU_CLOCK
+#        target: on_sw_cpu_clock
+#        table: sw_cpu_clock
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(sw_cpu_clock, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_sw_cpu_clock(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          sw_cpu_clock.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: sw_task_clock
+#          documentation: SW task clock
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x1 # SOFTWARE
+#        name: 0x1 # PERF_COUNT_SW_TASK_CLOCK
+#        target: on_sw_task_clock
+#        table: sw_task_clock
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(sw_task_clock, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_sw_task_clock(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          sw_task_clock.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: sw_page_faults
+#          documentation: SW page faults
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x1 # SOFTWARE
+#        name: 0x2 # PERF_COUNT_SW_PAGE_FAULTS
+#        target: on_sw_page_faults
+#        table: sw_page_faults
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(sw_page_faults, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_sw_page_faults(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          sw_page_faults.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+  - name: bundle_bpf
+    metrics:
+      counter:
+        - name: sw_context_switches
+          documentation: SW context switches
+          namespace: bpf
+          labelnames:
+            - name
+            - cpu
+            - pid
+    events:
+      - type: 0x1 # SOFTWARE
+        name: 0x3 # PERF_COUNT_SW_CONTEXT_SWITCHES
+        target: on_sw_context_switches
+        table: sw_context_switches
+    code: |
+      #include <linux/ptrace.h>
+      #include <uapi/linux/bpf_perf_event.h>
+
+      const int max_cpus = 256;
+
+      struct key_t {
+          int cpu;
+          int pid;
+          char name[TASK_COMM_LEN];
+      };
+
+      BPF_HASH(sw_context_switches, struct key_t);
+
+      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+          key->cpu = bpf_get_smp_processor_id();
+          key->pid = bpf_get_current_pid_tgid();
+          bpf_get_current_comm(&(key->name), sizeof(key->name));
+      }
+
+      int on_sw_context_switches(struct bpf_perf_event_data *ctx) {
           struct key_t key = {};
           get_key(&key);
 
-          cpu_instruction.increment(key, ctx->sample_period);
+          sw_context_switches.increment(key, ctx->sample_period);
           return 0;
       }
-      int on_cache_reference(struct bpf_perf_event_data *ctx) {
+#  - name: bundle_bpf
+#    metrics:
+#      counter:
+#        - name: sw_cpu_migrations
+#          documentation: SW cpu migrations
+#          namespace: bpf
+#          labelnames:
+#            - name
+#            - cpu
+#            - pid
+#    events:
+#      - type: 0x1 # SOFTWARE
+#        name: 0x4 # PERF_COUNT_SW_CPU_MIGRATIONS
+#        target: on_sw_cpu_migrations
+#        table: sw_cpu_migrations
+#    code: |
+#      #include <linux/ptrace.h>
+#      #include <uapi/linux/bpf_perf_event.h>
+#
+#      const int max_cpus = 256;
+#
+#      struct key_t {
+#          int cpu;
+#          int pid;
+#          char name[TASK_COMM_LEN];
+#      };
+#
+#      BPF_HASH(sw_cpu_migrations, struct key_t);
+#
+#      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+#          key->cpu = bpf_get_smp_processor_id();
+#          key->pid = bpf_get_current_pid_tgid();
+#          bpf_get_current_comm(&(key->name), sizeof(key->name));
+#      }
+#
+#      int on_sw_cpu_migrations(struct bpf_perf_event_data *ctx) {
+#          struct key_t key = {};
+#          get_key(&key);
+#
+#          sw_cpu_migrations.increment(key, ctx->sample_period);
+#          return 0;
+#      }
+  - name: bundle_bpf
+    metrics:
+      counter:
+        - name: sw_page_faults_min
+          documentation: SW page faults minor
+          namespace: bpf
+          labelnames:
+            - name
+            - cpu
+            - pid
+    events:
+      - type: 0x1 # SOFTWARE
+        name: 0x5 # PERF_COUNT_SW_PAGE_FAULTS_MIN
+        target: on_sw_page_faults_min
+        table: sw_page_faults_min
+    code: |
+      #include <linux/ptrace.h>
+      #include <uapi/linux/bpf_perf_event.h>
+
+      const int max_cpus = 256;
+
+      struct key_t {
+          int cpu;
+          int pid;
+          char name[TASK_COMM_LEN];
+      };
+
+      BPF_HASH(sw_page_faults_min, struct key_t);
+
+      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+          key->cpu = bpf_get_smp_processor_id();
+          key->pid = bpf_get_current_pid_tgid();
+          bpf_get_current_comm(&(key->name), sizeof(key->name));
+      }
+
+      int on_sw_page_faults_min(struct bpf_perf_event_data *ctx) {
           struct key_t key = {};
           get_key(&key);
 
-          llc_reference.increment(key, ctx->sample_period);
+          sw_page_faults_min.increment(key, ctx->sample_period);
           return 0;
       }
-      int on_cache_miss(struct bpf_perf_event_data *ctx) {
+  - name: bundle_bpf
+    metrics:
+      counter:
+        - name: sw_page_faults_maj
+          documentation: SW page faults major
+          namespace: bpf
+          labelnames:
+            - name
+            - cpu
+            - pid
+    events:
+      - type: 0x1 # SOFTWARE
+        name: 0x6 # PERF_COUNT_SW_PAGE_FAULTS_MAJ
+        target: on_sw_page_faults_maj
+        table: sw_page_faults_maj
+    code: |
+      #include <linux/ptrace.h>
+      #include <uapi/linux/bpf_perf_event.h>
+
+      const int max_cpus = 256;
+
+      struct key_t {
+          int cpu;
+          int pid;
+          char name[TASK_COMM_LEN];
+      };
+
+      BPF_HASH(sw_page_faults_maj, struct key_t);
+
+      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
+          key->cpu = bpf_get_smp_processor_id();
+          key->pid = bpf_get_current_pid_tgid();
+          bpf_get_current_comm(&(key->name), sizeof(key->name));
+      }
+
+      int on_sw_page_faults_maj(struct bpf_perf_event_data *ctx) {
           struct key_t key = {};
           get_key(&key);
 
-          llc_miss.increment(key, ctx->sample_period);
+          sw_page_faults_maj.increment(key, ctx->sample_period);
           return 0;
       }