feat(jobspec): Unify soak jobspecs
[csit.git] / resources / templates / telemetry / bpf_runtime.yaml
1 ---
2 logging:
3   version: 1
4   formatters:
5     console_stdout:
6       format: '%(asctime)s - %(name)s - %(message)s'
7     console_stderr:
8       format: '%(message)s'
9     prom:
10       format: '%(message)s'
11   handlers:
12     console_stdout:
13       class: logging.StreamHandler
14       level: INFO
15       formatter: console_stdout
16       stream: ext://sys.stdout
17     console_stderr:
18       class: logging.StreamHandler
19       level: ERROR
20       formatter: console_stderr
21       stream: ext://sys.stderr
22     prom:
23       class: logging.handlers.RotatingFileHandler
24       level: INFO
25       formatter: prom
26       filename: /tmp/metric.prom
27       mode: w
28   loggers:
29     prom:
30       handlers: [prom]
31       level: INFO
32       propagate: False
33   root:
34     level: INFO
35     handlers: [console_stdout, console_stderr]
36 scheduler:
37   duration: 1
38   sample_period: 100
39 programs:
40   - name: bundle_bpf
41     metrics:
42       counter:
43         - name: cpu_cycle
44           documentation: Cycles processed by CPUs
45           namespace: bpf
46           labelnames:
47             - name
48             - cpu
49             - pid
50     events:
51       - type: 0x4 # RAW
52         name: 0x3C # INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P
53         target: on_cpu_cycle
54         table: cpu_cycle
55     code: |
56       #include <linux/ptrace.h>
57       #include <uapi/linux/bpf_perf_event.h>
58
59       const int max_cpus = 256;
60
61       struct key_t {
62           int cpu;
63           int pid;
64           char name[TASK_COMM_LEN];
65       };
66
67       BPF_HASH(cpu_cycle, struct key_t);
68
69       static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
70           key->cpu = bpf_get_smp_processor_id();
71           key->pid = bpf_get_current_pid_tgid();
72           bpf_get_current_comm(&(key->name), sizeof(key->name));
73       }
74
75       int on_cpu_cycle(struct bpf_perf_event_data *ctx) {
76           struct key_t key = {};
77           get_key(&key);
78
79           cpu_cycle.increment(key, ctx->sample_period);
80           return 0;
81       }
82   - name: bundle_bpf
83     metrics:
84       counter:
85         - name: cpu_instruction
86           documentation: Instructions retired by CPUs
87           namespace: bpf
88           labelnames:
89             - name
90             - cpu
91             - pid
92     events:
93       - type: 0x4 # RAW
94         name: 0xC0 # INTEL_CORE_E_INST_RETIRED_ANY_P
95         target: on_cpu_instruction
96         table: cpu_instruction
97     code: |
98       #include <linux/ptrace.h>
99       #include <uapi/linux/bpf_perf_event.h>
100
101       const int max_cpus = 256;
102
103       struct key_t {
104           int cpu;
105           int pid;
106           char name[TASK_COMM_LEN];
107       };
108
109       BPF_HASH(cpu_instruction, struct key_t);
110
111       static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
112           key->cpu = bpf_get_smp_processor_id();
113           key->pid = bpf_get_current_pid_tgid();
114           bpf_get_current_comm(&(key->name), sizeof(key->name));
115       }
116
117       int on_cpu_instruction(struct bpf_perf_event_data *ctx) {
118           struct key_t key = {};
119           get_key(&key);
120
121           cpu_instruction.increment(key, ctx->sample_period);
122           return 0;
123       }
124   - name: bundle_bpf
125     metrics:
126       counter:
127         - name: cache_references
128           documentation: Cache references
129           namespace: bpf
130           labelnames:
131             - name
132             - cpu
133             - pid
134     events:
135       - type: 0x0 # HARDWARE
136         name: 0x2 # PERF_COUNT_HW_CACHE_REFERENCES
137         target: on_cache_reference
138         table: cache_references
139     code: |
140       #include <linux/ptrace.h>
141       #include <uapi/linux/bpf_perf_event.h>
142
143       const int max_cpus = 256;
144
145       struct key_t {
146           int cpu;
147           int pid;
148           char name[TASK_COMM_LEN];
149       };
150
151       BPF_HASH(cache_references, struct key_t);
152
153       static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
154           key->cpu = bpf_get_smp_processor_id();
155           key->pid = bpf_get_current_pid_tgid();
156           bpf_get_current_comm(&(key->name), sizeof(key->name));
157       }
158
159       int on_cache_reference(struct bpf_perf_event_data *ctx) {
160           struct key_t key = {};
161           get_key(&key);
162
163           cache_references.increment(key, ctx->sample_period);
164           return 0;
165       }
166   - name: bundle_bpf
167     metrics:
168       counter:
169         - name: cache_miss
170           documentation: Cache misses
171           namespace: bpf
172           labelnames:
173             - name
174             - cpu
175             - pid
176     events:
177       - type: 0x0 # HARDWARE
178         name: 0x3 # PERF_COUNT_HW_CACHE_MISSES
179         target: on_cache_miss
180         table: cache_miss
181     code: |
182       #include <linux/ptrace.h>
183       #include <uapi/linux/bpf_perf_event.h>
184
185       const int max_cpus = 256;
186
187       struct key_t {
188           int cpu;
189           int pid;
190           char name[TASK_COMM_LEN];
191       };
192
193       BPF_HASH(cache_miss, struct key_t);
194
195       static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
196           key->cpu = bpf_get_smp_processor_id();
197           key->pid = bpf_get_current_pid_tgid();
198           bpf_get_current_comm(&(key->name), sizeof(key->name));
199       }
200
201       int on_cache_miss(struct bpf_perf_event_data *ctx) {
202           struct key_t key = {};
203           get_key(&key);
204
205           cache_miss.increment(key, ctx->sample_period);
206           return 0;
207       }
208 #  - name: bundle_bpf
209 #    metrics:
210 #      counter:
211 #        - name: branch_instruction
212 #          documentation: Instructions retired by branch
213 #          namespace: bpf
214 #          labelnames:
215 #            - name
216 #            - cpu
217 #            - pid
218 #    events:
219 #      - type: 0x0 # HARDWARE
220 #        name: 0x4 # PERF_COUNT_HW_BRANCH_INSTRUCTION
221 #        target: on_branch_instruction
222 #        table: branch_instruction
223 #    code: |
224 #      #include <linux/ptrace.h>
225 #      #include <uapi/linux/bpf_perf_event.h>
226 #
227 #      const int max_cpus = 256;
228 #
229 #      struct key_t {
230 #          int cpu;
231 #          int pid;
232 #          char name[TASK_COMM_LEN];
233 #      };
234 #
235 #      BPF_HASH(branch_instruction, struct key_t);
236 #
237 #      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
238 #          key->cpu = bpf_get_smp_processor_id();
239 #          key->pid = bpf_get_current_pid_tgid();
240 #          bpf_get_current_comm(&(key->name), sizeof(key->name));
241 #      }
242 #
243 #      int on_branch_instruction(struct bpf_perf_event_data *ctx) {
244 #          struct key_t key = {};
245 #          get_key(&key);
246 #
247 #          branch_instruction.increment(key, ctx->sample_period);
248 #          return 0;
249 #      }
250 #  - name: bundle_bpf
251 #    metrics:
252 #      counter:
253 #        - name: branch_misses (not supported by CPU)
254 #          documentation: Last level miss operations by type
255 #          namespace: bpf
256 #          labelnames:
257 #            - name
258 #            - cpu
259 #            - pid
260 #    events:
261 #      - type: 0x0 # HARDWARE
262 #        name: 0x5 # PERF_COUNT_HW_BRANCH_MISSES
263 #        target: on_branch_misses
264 #        table: branch_misses
265 #    code: |
266 #      #include <linux/ptrace.h>
267 #      #include <uapi/linux/bpf_perf_event.h>
268 #
269 #      const int max_cpus = 256;
270 #
271 #      struct key_t {
272 #          int cpu;
273 #          int pid;
274 #          char name[TASK_COMM_LEN];
275 #      };
276 #
277 #      BPF_HASH(branch_misses, struct key_t);
278 #
279 #      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
280 #          key->cpu = bpf_get_smp_processor_id();
281 #          key->pid = bpf_get_current_pid_tgid();
282 #          bpf_get_current_comm(&(key->name), sizeof(key->name));
283 #      }
284 #
285 #      int on_branch_misses(struct bpf_perf_event_data *ctx) {
286 #          struct key_t key = {};
287 #          get_key(&key);
288 #
289 #          branch_misses.increment(key, ctx->sample_period);
290 #          return 0;
291 #      }
292 #  - name: bundle_bpf
293 #    metrics:
294 #      counter:
295 #        - name: bus_cycles
296 #          documentation: Count of bus cycles
297 #          namespace: bpf
298 #          labelnames:
299 #            - name
300 #            - cpu
301 #            - pid
302 #    events:
303 #      - type: 0x0 # HARDWARE
304 #        name: 0x6 # PERF_COUNT_HW_BUS_CYCLES
305 #        target: on_bus_cycles
306 #        table: bus_cycles
307 #    code: |
308 #      #include <linux/ptrace.h>
309 #      #include <uapi/linux/bpf_perf_event.h>
310 #
311 #      const int max_cpus = 256;
312 #
313 #      struct key_t {
314 #          int cpu;
315 #          int pid;
316 #          char name[TASK_COMM_LEN];
317 #      };
318 #
319 #      BPF_HASH(bus_cycles, struct key_t);
320 #
321 #      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
322 #          key->cpu = bpf_get_smp_processor_id();
323 #          key->pid = bpf_get_current_pid_tgid();
324 #          bpf_get_current_comm(&(key->name), sizeof(key->name));
325 #      }
326 #      int on_bus_cycles(struct bpf_perf_event_data *ctx) {
327 #          struct key_t key = {};
328 #          get_key(&key);
329 #
330 #          bus_cycles.increment(key, ctx->sample_period);
331 #          return 0;
332 #      }
333 #  - name: bundle_bpf
334 #    metrics:
335 #      counter:
336 #        - name: stalled_cycles_frontend (not supported by CPU)
337 #          documentation: Frontend stalled cycles
338 #          namespace: bpf
339 #          labelnames:
340 #            - name
341 #            - cpu
342 #            - pid
343 #    events:
344 #      - type: 0x0 # HARDWARE
345 #        name: 0x7 # PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
346 #        target: on_stalled_cycles_frontend
347 #        table: stalled_cycles_frontend
348 #    code: |
349 #      #include <linux/ptrace.h>
350 #      #include <uapi/linux/bpf_perf_event.h>
351 #
352 #      const int max_cpus = 256;
353 #
354 #      struct key_t {
355 #          int cpu;
356 #          int pid;
357 #          char name[TASK_COMM_LEN];
358 #      };
359 #
360 #      BPF_HASH(stalled_cycles_frontend, struct key_t);
361 #
362 #      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
363 #          key->cpu = bpf_get_smp_processor_id();
364 #          key->pid = bpf_get_current_pid_tgid();
365 #          bpf_get_current_comm(&(key->name), sizeof(key->name));
366 #      }
367 #
368 #      int on_stalled_cycles_frontend(struct bpf_perf_event_data *ctx) {
369 #          struct key_t key = {};
370 #          get_key(&key);
371 #
372 #          stalled_cycles_frontend.increment(key, ctx->sample_period);
373 #          return 0;
374 #      }
375 #  - name: bundle_bpf
376 #    metrics:
377 #      counter:
378 #        - name: stalled_cycles_backend
379 #          documentation: Backend stalled cycles
380 #          namespace: bpf
381 #          labelnames:
382 #            - name
383 #            - cpu
384 #            - pid
385 #    events:
386 #      - type: 0x0 # HARDWARE
387 #        name: 0x8 # PERF_COUNT_HW_STALLED_CYCLES_BACKEND
388 #        target: on_stalled_cycles_backend
389 #        table: stalled_cycles_backend
390 #    code: |
391 #      #include <linux/ptrace.h>
392 #      #include <uapi/linux/bpf_perf_event.h>
393 #
394 #      const int max_cpus = 256;
395 #
396 #      struct key_t {
397 #          int cpu;
398 #          int pid;
399 #          char name[TASK_COMM_LEN];
400 #      };
401 #
402 #      BPF_HASH(stalled_cycles_backend, struct key_t);
403 #
404 #      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
405 #          key->cpu = bpf_get_smp_processor_id();
406 #          key->pid = bpf_get_current_pid_tgid();
407 #          bpf_get_current_comm(&(key->name), sizeof(key->name));
408 #      }
409 #
410 #      int on_stalled_cycles_backend(struct bpf_perf_event_data *ctx) {
411 #          struct key_t key = {};
412 #          get_key(&key);
413 #
414 #          stalled_cycles_backend.increment(key, ctx->sample_period);
415 #          return 0;
416 #      }
417 #  - name: bundle_bpf
418 #    metrics:
419 #      counter:
420 #        - name: referenced_cpu_cycles
421 #          documentation: Referenced CPU cycles
422 #          namespace: bpf
423 #          labelnames:
424 #            - name
425 #            - cpu
426 #            - pid
427 #    events:
428 #      - type: 0x0 # HARDWARE
429 #        name: 0x9 # PERF_COUNT_HW_REF_CPU_CYCLES
430 #        target: on_referenced_cpu_cycles
431 #        table: referenced_cpu_cycles
432 #    code: |
433 #      #include <linux/ptrace.h>
434 #      #include <uapi/linux/bpf_perf_event.h>
435 #
436 #      const int max_cpus = 256;
437 #
438 #      struct key_t {
439 #          int cpu;
440 #          int pid;
441 #          char name[TASK_COMM_LEN];
442 #      };
443 #
444 #      BPF_HASH(referenced_cpu_cycles, struct key_t);
445 #
446 #      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
447 #          key->cpu = bpf_get_smp_processor_id();
448 #          key->pid = bpf_get_current_pid_tgid();
449 #          bpf_get_current_comm(&(key->name), sizeof(key->name));
450 #      }
451 #
452 #      int on_referenced_cpu_cycles(struct bpf_perf_event_data *ctx) {
453 #          struct key_t key = {};
454 #          get_key(&key);
455 #
456 #          referenced_cpu_cycles.increment(key, ctx->sample_period);
457 #          return 0;
458 #      }
459 #  - name: bundle_bpf
460 #    metrics:
461 #      counter:
462 #        - name: sw_cpu_clock
463 #          documentation: SW CPU clock
464 #          namespace: bpf
465 #          labelnames:
466 #            - name
467 #            - cpu
468 #            - pid
469 #    events:
470 #      - type: 0x1 # SOFTWARE
471 #        name: 0x0 # PERF_COUNT_SW_CPU_CLOCK
472 #        target: on_sw_cpu_clock
473 #        table: sw_cpu_clock
474 #    code: |
475 #      #include <linux/ptrace.h>
476 #      #include <uapi/linux/bpf_perf_event.h>
477 #
478 #      const int max_cpus = 256;
479 #
480 #      struct key_t {
481 #          int cpu;
482 #          int pid;
483 #          char name[TASK_COMM_LEN];
484 #      };
485 #
486 #      BPF_HASH(sw_cpu_clock, struct key_t);
487 #
488 #      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
489 #          key->cpu = bpf_get_smp_processor_id();
490 #          key->pid = bpf_get_current_pid_tgid();
491 #          bpf_get_current_comm(&(key->name), sizeof(key->name));
492 #      }
493 #
494 #      int on_sw_cpu_clock(struct bpf_perf_event_data *ctx) {
495 #          struct key_t key = {};
496 #          get_key(&key);
497 #
498 #          sw_cpu_clock.increment(key, ctx->sample_period);
499 #          return 0;
500 #      }
501 #  - name: bundle_bpf
502 #    metrics:
503 #      counter:
504 #        - name: sw_task_clock
505 #          documentation: SW task clock
506 #          namespace: bpf
507 #          labelnames:
508 #            - name
509 #            - cpu
510 #            - pid
511 #    events:
512 #      - type: 0x1 # SOFTWARE
513 #        name: 0x1 # PERF_COUNT_SW_TASK_CLOCK
514 #        target: on_sw_task_clock
515 #        table: sw_task_clock
516 #    code: |
517 #      #include <linux/ptrace.h>
518 #      #include <uapi/linux/bpf_perf_event.h>
519 #
520 #      const int max_cpus = 256;
521 #
522 #      struct key_t {
523 #          int cpu;
524 #          int pid;
525 #          char name[TASK_COMM_LEN];
526 #      };
527 #
528 #      BPF_HASH(sw_task_clock, struct key_t);
529 #
530 #      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
531 #          key->cpu = bpf_get_smp_processor_id();
532 #          key->pid = bpf_get_current_pid_tgid();
533 #          bpf_get_current_comm(&(key->name), sizeof(key->name));
534 #      }
535 #
536 #      int on_sw_task_clock(struct bpf_perf_event_data *ctx) {
537 #          struct key_t key = {};
538 #          get_key(&key);
539 #
540 #          sw_task_clock.increment(key, ctx->sample_period);
541 #          return 0;
542 #      }
543 #  - name: bundle_bpf
544 #    metrics:
545 #      counter:
546 #        - name: sw_page_faults
547 #          documentation: SW page faults
548 #          namespace: bpf
549 #          labelnames:
550 #            - name
551 #            - cpu
552 #            - pid
553 #    events:
554 #      - type: 0x1 # SOFTWARE
555 #        name: 0x2 # PERF_COUNT_SW_PAGE_FAULTS
556 #        target: on_sw_page_faults
557 #        table: sw_page_faults
558 #    code: |
559 #      #include <linux/ptrace.h>
560 #      #include <uapi/linux/bpf_perf_event.h>
561 #
562 #      const int max_cpus = 256;
563 #
564 #      struct key_t {
565 #          int cpu;
566 #          int pid;
567 #          char name[TASK_COMM_LEN];
568 #      };
569 #
570 #      BPF_HASH(sw_page_faults, struct key_t);
571 #
572 #      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
573 #          key->cpu = bpf_get_smp_processor_id();
574 #          key->pid = bpf_get_current_pid_tgid();
575 #          bpf_get_current_comm(&(key->name), sizeof(key->name));
576 #      }
577 #
578 #      int on_sw_page_faults(struct bpf_perf_event_data *ctx) {
579 #          struct key_t key = {};
580 #          get_key(&key);
581 #
582 #          sw_page_faults.increment(key, ctx->sample_period);
583 #          return 0;
584 #      }
585   - name: bundle_bpf
586     metrics:
587       counter:
588         - name: sw_context_switches
589           documentation: SW context switches
590           namespace: bpf
591           labelnames:
592             - name
593             - cpu
594             - pid
595     events:
596       - type: 0x1 # SOFTWARE
597         name: 0x3 # PERF_COUNT_SW_CONTEXT_SWITCHES
598         target: on_sw_context_switches
599         table: sw_context_switches
600     code: |
601       #include <linux/ptrace.h>
602       #include <uapi/linux/bpf_perf_event.h>
603
604       const int max_cpus = 256;
605
606       struct key_t {
607           int cpu;
608           int pid;
609           char name[TASK_COMM_LEN];
610       };
611
612       BPF_HASH(sw_context_switches, struct key_t);
613
614       static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
615           key->cpu = bpf_get_smp_processor_id();
616           key->pid = bpf_get_current_pid_tgid();
617           bpf_get_current_comm(&(key->name), sizeof(key->name));
618       }
619
620       int on_sw_context_switches(struct bpf_perf_event_data *ctx) {
621           struct key_t key = {};
622           get_key(&key);
623
624           sw_context_switches.increment(key, ctx->sample_period);
625           return 0;
626       }
627 #  - name: bundle_bpf
628 #    metrics:
629 #      counter:
630 #        - name: sw_cpu_migrations
631 #          documentation: SW cpu migrations
632 #          namespace: bpf
633 #          labelnames:
634 #            - name
635 #            - cpu
636 #            - pid
637 #    events:
638 #      - type: 0x1 # SOFTWARE
639 #        name: 0x4 # PERF_COUNT_SW_CPU_MIGRATIONS
640 #        target: on_sw_cpu_migrations
641 #        table: sw_cpu_migrations
642 #    code: |
643 #      #include <linux/ptrace.h>
644 #      #include <uapi/linux/bpf_perf_event.h>
645 #
646 #      const int max_cpus = 256;
647 #
648 #      struct key_t {
649 #          int cpu;
650 #          int pid;
651 #          char name[TASK_COMM_LEN];
652 #      };
653 #
654 #      BPF_HASH(sw_cpu_migrations, struct key_t);
655 #
656 #      static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
657 #          key->cpu = bpf_get_smp_processor_id();
658 #          key->pid = bpf_get_current_pid_tgid();
659 #          bpf_get_current_comm(&(key->name), sizeof(key->name));
660 #      }
661 #
662 #      int on_sw_cpu_migrations(struct bpf_perf_event_data *ctx) {
663 #          struct key_t key = {};
664 #          get_key(&key);
665 #
666 #          sw_cpu_migrations.increment(key, ctx->sample_period);
667 #          return 0;
668 #      }
669   - name: bundle_bpf
670     metrics:
671       counter:
672         - name: sw_page_faults_min
673           documentation: SW page faults minor
674           namespace: bpf
675           labelnames:
676             - name
677             - cpu
678             - pid
679     events:
680       - type: 0x1 # SOFTWARE
681         name: 0x5 # PERF_COUNT_SW_PAGE_FAULTS_MIN
682         target: on_sw_page_faults_min
683         table: sw_page_faults_min
684     code: |
685       #include <linux/ptrace.h>
686       #include <uapi/linux/bpf_perf_event.h>
687
688       const int max_cpus = 256;
689
690       struct key_t {
691           int cpu;
692           int pid;
693           char name[TASK_COMM_LEN];
694       };
695
696       BPF_HASH(sw_page_faults_min, struct key_t);
697
698       static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
699           key->cpu = bpf_get_smp_processor_id();
700           key->pid = bpf_get_current_pid_tgid();
701           bpf_get_current_comm(&(key->name), sizeof(key->name));
702       }
703
704       int on_sw_page_faults_min(struct bpf_perf_event_data *ctx) {
705           struct key_t key = {};
706           get_key(&key);
707
708           sw_page_faults_min.increment(key, ctx->sample_period);
709           return 0;
710       }
711   - name: bundle_bpf
712     metrics:
713       counter:
714         - name: sw_page_faults_maj
715           documentation: SW page faults major
716           namespace: bpf
717           labelnames:
718             - name
719             - cpu
720             - pid
721     events:
722       - type: 0x1 # SOFTWARE
723         name: 0x6 # PERF_COUNT_SW_PAGE_FAULTS_MAJ
724         target: on_sw_page_faults_maj
725         table: sw_page_faults_maj
726     code: |
727       #include <linux/ptrace.h>
728       #include <uapi/linux/bpf_perf_event.h>
729
730       const int max_cpus = 256;
731
732       struct key_t {
733           int cpu;
734           int pid;
735           char name[TASK_COMM_LEN];
736       };
737
738       BPF_HASH(sw_page_faults_maj, struct key_t);
739
740       static inline __attribute__((always_inline)) void get_key(struct key_t* key) {
741           key->cpu = bpf_get_smp_processor_id();
742           key->pid = bpf_get_current_pid_tgid();
743           bpf_get_current_comm(&(key->name), sizeof(key->name));
744       }
745
746       int on_sw_page_faults_maj(struct bpf_perf_event_data *ctx) {
747           struct key_t key = {};
748           get_key(&key);
749
750           sw_page_faults_maj.increment(key, ctx->sample_period);
751           return 0;
752       }