fix(telemetry): fix perf stat issue on Arm 87/42087/9
authorJay Wang <[email protected]>
Fri, 3 Jan 2025 15:41:22 +0000 (15:41 +0000)
committerPeter Mikus <[email protected]>
Tue, 21 Jan 2025 08:51:00 +0000 (08:51 +0000)
VPP CSIT utilises perf stat command to monitor the perf events defined
in perf_stat_runtime.yaml file. However, the raw event specification
format(cpu/event=0xXXX,umask=0xXXX/u) is specific to x86
platforms(Intel/AMD). On ARM platforms, we stick to the symbolic names
of the perf events.

Signed-off-by: Jay Wang <[email protected]>
Change-Id: I967bc4bddf5fc9ffbe61c276efdd95d69f7c4412

resources/libraries/robot/performance/performance_actions.robot
resources/templates/telemetry/perf_stat_runtime.yaml
resources/templates/telemetry/perf_stat_runtime_arm.yaml [new file with mode: 0644]
resources/tools/telemetry/bundle_perf_stat.py

index df520e0..de7c632 100644 (file)
 | | ${transaction_scale} = | Get Transaction Scale
 | | ${transaction_type} = | Get Transaction Type
 | | ${use_latency} = | Get Use Latency
+| | ${node_arch} = | Get Node Arch | ${nodes[u'DUT1']}
+| | ${profile} = | Set Variable If | "${node_arch}" == "aarch64"
+| | ... | perf_stat_runtime_arm.yaml | perf_stat_runtime.yaml
 | | Send traffic on tg
 | | ... | duration=${-1}
 | | ... | rate=${runtime_rate}
 | | ... | ramp_up_duration=${ramp_up_duration}
 | | ... | ramp_up_rate=${ramp_up_rate}
 | | Run Telemetry On All DUTs
-| | ... | ${nodes} | profile=perf_stat_runtime.yaml
+| | ... | ${nodes} | profile=${profile}
 | | ... | rate=${telemetry_rate} | export=${False}
 | | Stop traffic on tg
 
index ae0f804..1e07c2d 100644 (file)
@@ -37,6 +37,7 @@ scheduler:
   duration: 1
 programs:
   - name: bundle_perf_stat
+    architecture: x86_64
     metrics:
       counter:
         - name: cpu_cycles
@@ -49,6 +50,7 @@ programs:
       - eventcode: 0x3C # cpu-cycles
         umask: 0x00
   - name: bundle_perf_stat
+    architecture: x86_64
     metrics:
       counter:
         - name: instructions
@@ -61,6 +63,7 @@ programs:
       - eventcode: 0xC0 # instructions
         umask: 0x00
   - name: bundle_perf_stat
+    architecture: x86_64
     metrics:
       counter:
         - name: mem_load_uops_retired_l1_hit
@@ -73,6 +76,7 @@ programs:
       - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L1_HIT
         umask: 0x01
   - name: bundle_perf_stat
+    architecture: x86_64
     metrics:
       counter:
         - name: mem_load_uops_retired_l2_hit
@@ -85,6 +89,7 @@ programs:
       - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L2_HIT
         umask: 0x02
   - name: bundle_perf_stat
+    architecture: x86_64
     metrics:
       counter:
         - name: mem_load_uops_retired_l3_hit
@@ -97,6 +102,7 @@ programs:
       - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L3_HIT
         umask: 0x04
   - name: bundle_perf_stat
+    architecture: x86_64
     metrics:
       counter:
         - name: mem_load_uops_retired_l1_miss
@@ -109,6 +115,7 @@ programs:
       - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L1_MISS
         umask: 0x08
   - name: bundle_perf_stat
+    architecture: x86_64
     metrics:
       counter:
         - name: mem_load_uops_retired_l2_miss
@@ -121,6 +128,7 @@ programs:
       - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L2_MISS
         umask: 0x10
   - name: bundle_perf_stat
+    architecture: x86_64
     metrics:
       counter:
         - name: mem_load_uops_retired_l3_miss
diff --git a/resources/templates/telemetry/perf_stat_runtime_arm.yaml b/resources/templates/telemetry/perf_stat_runtime_arm.yaml
new file mode 100644 (file)
index 0000000..ae1e4aa
--- /dev/null
@@ -0,0 +1,104 @@
+---
+logging:
+  version: 1
+  formatters:
+    console_stdout:
+      format: '%(asctime)s - %(name)s - %(message)s'
+    console_stderr:
+      format: '%(message)s'
+    prom:
+      format: '%(message)s'
+  handlers:
+    console_stdout:
+      class: logging.StreamHandler
+      level: INFO
+      formatter: console_stdout
+      stream: ext://sys.stdout
+    console_stderr:
+      class: logging.StreamHandler
+      level: ERROR
+      formatter: console_stderr
+      stream: ext://sys.stderr
+    prom:
+      class: logging.handlers.RotatingFileHandler
+      level: INFO
+      formatter: prom
+      filename: /tmp/metric.prom
+      mode: w
+  loggers:
+    prom:
+      handlers: [prom]
+      level: INFO
+      propagate: False
+  root:
+    level: INFO
+    handlers: [console_stdout, console_stderr]
+scheduler:
+  duration: 1
+programs:
+  - name: bundle_perf_stat
+    architecture: aarch64
+    metrics:
+      counter:
+        - name: cpu_cycles
+          documentation: Cycles processed by CPUs
+          namespace: perf_stat
+          labelnames:
+            - thread
+            - pid
+    events: cpu-cycles
+  - name: bundle_perf_stat
+    architecture: aarch64
+    metrics:
+      counter:
+        - name: instructions
+          documentation: Instructions retired by CPUs
+          namespace: perf_stat
+          labelnames:
+            - thread
+            - pid
+    events: instructions
+  - name: bundle_perf_stat
+    architecture: aarch64
+    metrics:
+      counter:
+        - name: l1d_cache
+          documentation: Level 1 data cache access
+          namespace: perf_stat
+          labelnames:
+            - thread
+            - pid
+    events: l1d_cache
+  - name: bundle_perf_stat
+    architecture: aarch64
+    metrics:
+      counter:
+        - name: l1d_cache_refill
+          documentation: Level 1 data cache refill
+          namespace: perf_stat
+          labelnames:
+            - thread
+            - pid
+    events: l1d_cache_refill
+  - name: bundle_perf_stat
+    architecture: aarch64
+    metrics:
+      counter:
+        - name: l2d_cache
+          documentation: Level 2 data cache access
+          namespace: perf_stat
+          labelnames:
+            - thread
+            - pid
+    events: l2d_cache
+  - name: bundle_perf_stat
+    architecture: aarch64
+    metrics:
+      counter:
+        - name: l2d_cache_refill
+          documentation: Level 2 data cache refill
+          namespace: perf_stat
+          labelnames:
+            - thread
+            - pid
+    events: l2d_cache_refill
index 471dd07..7f64eda 100644 (file)
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 Cisco and/or its affiliates.
+# Copyright (c) 2025 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -39,6 +39,7 @@ class BundlePerfStat:
         """
         self.metrics = program[u"metrics"]
         self.events = program[u"events"]
+        self.architecture = program[u"architecture"]
         self.api_replies_list = list()
         self.serializer = serializer
         self.hook = hook
@@ -53,21 +54,35 @@ class BundlePerfStat:
                """
         try:
             self.serializer.create(metrics=self.metrics)
-            event = self.events[0]
-            text = subprocess.getoutput(
-                f"""sudo perf stat -x\; -e\
-                '{{cpu/event={hex(event[u"eventcode"])},\
-                umask={hex(event[u"umask"])}/u}}'\
-                -a --per-thread\
-                sleep {duration}"""
-            )
+            # The following PMU event format is specific to x86_64 systems.
+            if self.architecture == "x86_64":
+                event = self.events[0]
+                text = subprocess.getoutput(
+                    f"""sudo perf stat -x';' -e\
+                    '{{cpu/event={hex(event[u"eventcode"])},\
+                    umask={hex(event[u"umask"])}/u}}'\
+                    -a --per-thread\
+                    sleep {duration}"""
+                )
+            # We select the symbolic event name instead on AArch64.
+            else:
+                event = self.events
+                text = subprocess.getoutput(
+                    f"""sudo perf stat -x';' -e\
+                    {event}\
+                    -a --per-thread\
+                    sleep {duration}"""
+                )
         except subprocess.CalledProcessError:
             getLogger("console_stderr").error(f"Could not successfully run "
                                               f"perf stat command.")
             sys.exit(Constants.err_linux_perf_stat)
 
         if text == u"":
-            getLogger("console_stdout").info(event[u"eventcode"])
+            if self.architecture == "x86_64":
+                getLogger("console_stdout").info(event[u"eventcode"])
+            else:
+                getLogger("console_stdout").info(event)
         else:
             for line in text.splitlines():
                 if line.count(u";") < 6: