telemetry: error message handling
[csit.git] / resources / templates / telemetry / bpf_runtime.yaml
1 ---
2 logging:
3   version: 1
4   formatters:
5     console_stdout:
6       format: '%(asctime)s - %(name)s - %(message)s'
7     console_stderr:
8       format: '%(message)s'
9     prom:
10       format: '%(message)s'
11   handlers:
12     console_stdout:
13       class: logging.StreamHandler
14       level: INFO
15       formatter: console_stdout
16       stream: ext://sys.stdout
17     console_stderr:
18       class: logging.StreamHandler
19       level: ERROR
20       formatter: console_stderr
21       stream: ext://sys.stderr
22     prom:
23       class: logging.handlers.RotatingFileHandler
24       level: INFO
25       formatter: prom
26       filename: /tmp/metric.prom
27       mode: w
28   loggers:
29     prom:
30       handlers: [prom]
31       level: INFO
32       propagate: False
33   root:
34     level: INFO
35     handlers: [console_stdout, console_stderr]
36 scheduler:
37   duration: 1
38 programs:
39   - name: bundle_bpf
40     metrics:
41       counter:
42         - name: cpu_cycle
43           documentation: Cycles processed by CPUs
44           namespace: bpf
45           labelnames:
46             - name
47             - cpu
48             - pid
49         - name: cpu_instruction
50           documentation: Instructions retired by CPUs
51           namespace: bpf
52           labelnames:
53             - name
54             - cpu
55             - pid
56         - name: llc_reference
57           documentation: Last level cache operations by type
58           namespace: bpf
59           labelnames:
60             - name
61             - cpu
62             - pid
63         - name: llc_miss
64           documentation: Last level cache operations by type
65           namespace: bpf
66           labelnames:
67             - name
68             - cpu
69             - pid
70     events:
71       - type: 0x0 # HARDWARE
72         name: 0x0 # PERF_COUNT_HW_CPU_CYCLES
73         target: on_cpu_cycle
74         table: cpu_cycle
75       - type: 0x0 # HARDWARE
76         name: 0x1 # PERF_COUNT_HW_INSTRUCTIONS
77         target: on_cpu_instruction
78         table: cpu_instruction
79       - type: 0x0 # HARDWARE
80         name: 0x2 # PERF_COUNT_HW_CACHE_REFERENCES
81         target: on_cache_reference
82         table: llc_reference
83       - type: 0x0 # HARDWARE
84         name: 0x3 # PERF_COUNT_HW_CACHE_MISSES
85         target: on_cache_miss
86         table: llc_miss
87     code: |
88       #include <linux/ptrace.h>
89       #include <uapi/linux/bpf_perf_event.h>
90
91       const int max_cpus = 256;
92
93       struct key_t {
94           int cpu;
95           int pid;
96           char name[TASK_COMM_LEN];
97       };
98
99       BPF_HASH(llc_miss, struct key_t);
100       BPF_HASH(llc_reference, struct key_t);
101       BPF_HASH(cpu_instruction, struct key_t);
102       BPF_HASH(cpu_cycle, struct key_t);
103
104       static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
105           key->cpu = bpf_get_smp_processor_id();
106           key->pid = bpf_get_current_pid_tgid();
107           bpf_get_current_comm(&(key->name), sizeof(key->name));
108       }
109
110       int on_cpu_cycle(struct bpf_perf_event_data *ctx) {
111           struct key_t key = {};
112           get_key(&key);
113
114           cpu_cycle.increment(key, ctx->sample_period);
115           return 0;
116       }
117       int on_cpu_instruction(struct bpf_perf_event_data *ctx) {
118           struct key_t key = {};
119           get_key(&key);
120
121           cpu_instruction.increment(key, ctx->sample_period);
122           return 0;
123       }
124       int on_cache_reference(struct bpf_perf_event_data *ctx) {
125           struct key_t key = {};
126           get_key(&key);
127
128           llc_reference.increment(key, ctx->sample_period);
129           return 0;
130       }
131       int on_cache_miss(struct bpf_perf_event_data *ctx) {
132           struct key_t key = {};
133           get_key(&key);
134
135           llc_miss.increment(key, ctx->sample_period);
136           return 0;
137       }