telemetry: small fixes 37/37337/8
authorViliam Luc <vluc@cisco.com>
Mon, 3 Oct 2022 12:45:48 +0000 (14:45 +0200)
committerPeter Mikus <peter.mikus@protonmail.ch>
Mon, 7 Nov 2022 11:05:36 +0000 (11:05 +0000)
- removed remnant debugging code
+ fix compatibility with Metric naming convention
+ fix failures when perf-stat command returns wrong data
+ better handling when perf-stat command fails

Signed-off-by: Viliam Luc <vluc@cisco.com>
Change-Id: I23a064ed09c8cf0394abb5306fb04f4e33c20aa4

resources/libraries/python/TelemetryUtil.py
resources/templates/telemetry/perf_stat_runtime.yaml
resources/tools/telemetry/bundle_perf_stat.py
resources/tools/telemetry/metrics.py

index f8c7d8c..c978441 100644 (file)
@@ -121,10 +121,6 @@ class TelemetryUtil:
             f"{stdout}"
         )
 
-        VppCounters.vpp_clear_runtime(node)
-        sleep(1)
-        VppCounters.vpp_show_runtime(node)
-
     @staticmethod
     def run_telemetry_on_all_duts(nodes, profile):
         """Get telemetry stat read on all DUTs.
index 54b77a9..ae0f804 100644 (file)
@@ -38,105 +38,97 @@ scheduler:
 programs:
   - name: bundle_perf_stat
     metrics:
-      gauge:
-        - name: cpu-cycles
+      counter:
+        - name: cpu_cycles
           documentation: Cycles processed by CPUs
+          namespace: perf_stat
           labelnames:
-            - name
             - thread
             - pid
     events:
-      - name: cpu-cycles # 0x3C umask: 0x00
-        EventCode: 0x3C
-        UMask: 0x00
+      - eventcode: 0x3C # cpu-cycles
+        umask: 0x00
   - name: bundle_perf_stat
     metrics:
-      gauge:
+      counter:
         - name: instructions
           documentation: Instructions retired by CPUs
+          namespace: perf_stat
           labelnames:
-            - name
             - thread
             - pid
     events:
-      - name: instructions # 0xC0 umask: 0x00
-        EventCode: 0xC0
-        UMask: 0x00
+      - eventcode: 0xC0 # instructions
+        umask: 0x00
   - name: bundle_perf_stat
     metrics:
-      gauge:
-        - name: MEM_LOAD_UOPS_RETIRED.L1_HIT
+      counter:
+        - name: mem_load_uops_retired_l1_hit
           documentation: L1 Hit
+          namespace: perf_stat
           labelnames:
-            - name
             - thread
             - pid
     events:
-      - name: MEM_LOAD_UOPS_RETIRED.L1_HIT # 0xD1 umask: 0x01
-        EventCode: 0xD1
-        UMask: 0x01
+      - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L1_HIT
+        umask: 0x01
   - name: bundle_perf_stat
     metrics:
-      gauge:
-        - name: MEM_LOAD_UOPS_RETIRED.L2_HIT
+      counter:
+        - name: mem_load_uops_retired_l2_hit
           documentation: L2 Hit
+          namespace: perf_stat
           labelnames:
-            - name
             - thread
             - pid
     events:
-      - name: MEM_LOAD_UOPS_RETIRED.L2_HIT # 0xd1 umask: 0x02
-        EventCode: 0xD1
-        UMask: 0x02
+      - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L2_HIT
+        umask: 0x02
   - name: bundle_perf_stat
     metrics:
-      gauge:
-        - name: MEM_LOAD_UOPS_RETIRED.L3_HIT
+      counter:
+        - name: mem_load_uops_retired_l3_hit
           documentation: L3 Hit
+          namespace: perf_stat
           labelnames:
-            - name
             - thread
             - pid
     events:
-      - name: MEM_LOAD_UOPS_RETIRED.L3_HIT # 0xd1 umask: 0x04
-        EventCode: 0xD1
-        UMask: 0x04
+      - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L3_HIT
+        umask: 0x04
   - name: bundle_perf_stat
     metrics:
-      gauge:
-        - name: MEM_LOAD_UOPS_RETIRED.L1_MISS
+      counter:
+        - name: mem_load_uops_retired_l1_miss
           documentation: L1 Miss
+          namespace: perf_stat
           labelnames:
-            - name
             - thread
             - pid
     events:
-      - name: MEM_LOAD_UOPS_RETIRED.L1_MISS # 0xd1 umask: 0x08
-        EventCode: 0xD1
-        UMask: 0x08
+      - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L1_MISS
+        umask: 0x08
   - name: bundle_perf_stat
     metrics:
-      gauge:
-        - name: MEM_LOAD_UOPS_RETIRED.L2_MISS
+      counter:
+        - name: mem_load_uops_retired_l2_miss
           documentation: L2 Miss
+          namespace: perf_stat
           labelnames:
-            - name
             - thread
             - pid
     events:
-      - name: MEM_LOAD_UOPS_RETIRED.L2_MISS # 0xd1 umask: 0x10
-        EventCode: 0xD1
-        UMask: 0x10
+      - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L2_MISS
+        umask: 0x10
   - name: bundle_perf_stat
     metrics:
-      gauge:
-        - name: MEM_LOAD_UOPS_RETIRED.L3_MISS
+      counter:
+        - name: mem_load_uops_retired_l3_miss
           documentation: L3 Miss
+          namespace: perf_stat
           labelnames:
-            - name
             - thread
             - pid
     events:
-      - name: MEM_LOAD_UOPS_RETIRED.L3_MISS # 0xd1 umask: 0x020
-        EventCode: 0xD1
-        UMask: 0x20
+      - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L3_MISS
+        umask: 0x20
index 038e86e..f76149f 100644 (file)
@@ -50,49 +50,46 @@ class BundlePerfStat:
                :param duration: Time how long perf stat is collecting data (in
                seconds). Default value is 1 second.
                :type duration: int
-               EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
                """
         try:
             self.serializer.create(metrics=self.metrics)
-            for event in self.events:
-                text = subprocess.getoutput(
-                    f"""sudo perf stat -x\; -e\
-                    '{{cpu/event={hex(event[u"EventCode"])},\
-                    umask={hex(event[u"UMask"])}/u}}'\
-                    -a --per-thread\
-                    sleep {duration}"""
-                )
-
-                if text == u"":
-                    getLogger("console_stdout").info(event[u"name"])
-                    continue
-                if u";" not in text:
-                    getLogger("console_stdout").info(
-                        f"Could not get counters for event \"{event[u'name']}\""
-                        f". Is it supported by CPU?"
-                    )
-                    continue
-
-                for line in text.splitlines():
-                    item = dict()
-                    labels = dict()
-                    item[u"name"] = event[u"name"]
-                    item[u"value"] = line.split(";")[1]
-                    labels["thread"] = u"-".join(
-                        line.split(";")[0].split("-")[0:-1]
-                    )
-                    labels["pid"] = line.split(";")[0].split("-")[-1]
-                    labels["name"] = item[u"name"]
-                    item[u"labels"] = labels
-
-                    getLogger("console_stdout").info(item)
-                    self.api_replies_list.append(item)
-
-        except AttributeError:
+            event = self.events[0]
+            text = subprocess.getoutput(
+                f"""sudo perf stat -x\; -e\
+                '{{cpu/event={hex(event[u"eventcode"])},\
+                umask={hex(event[u"umask"])}/u}}'\
+                -a --per-thread\
+                sleep {duration}"""
+            )
+        except subprocess.CalledProcessError:
             getLogger("console_stderr").error(f"Could not successfully run "
                                               f"perf stat command.")
             sys.exit(Constants.err_linux_perf_stat)
 
+        if text == u"":
+            getLogger("console_stdout").info(event[u"eventcode"])
+        elif text.count(u";") < 6:
+            getLogger("console_stdout").info(
+                f"Could not get counters for event "\
+                f"{event[u'eventcode']}. "\
+                f"Is it supported by CPU?"
+            )
+        else:
+            for line in text.splitlines():
+                item = dict()
+                labels = dict()
+                item[u"name"] = self.metrics['counter'][0]['name']
+                item[u"value"] = line.split(";")[1]
+                labels["thread"] = u"-".join(
+                    line.split(";")[0].split("-")[0:-1]
+                )
+                labels["pid"] = line.split(";")[0].split("-")[-1]
+                item[u"labels"] = labels
+
+                getLogger("console_stdout").info(item)
+                self.api_replies_list.append(item)
+
+
     def detach(self):
         pass
 
index ba6bae5..7a22acf 100644 (file)
@@ -104,7 +104,7 @@ class Metric:
             u"Sample", [u"name", u"labels", u"value", u"timestamp"]
         )
 
-        if not re.compile(r"^[a-zA-Z_:\-.][a-zA-Z0-9_:\-.]*$").match(name):
+        if not re.compile(r"^[a-zA-Z_:][a-zA-Z0-9_:]*$").match(name):
             raise ValueError(f"Invalid metric name: {name}!")
         if typ not in self.metric_types:
             raise ValueError(f"Invalid metric type: {typ}!")
@@ -214,7 +214,7 @@ class MetricBase:
         full_name += f"{subsystem}_" if subsystem else u""
         full_name += name
 
-        if not re.compile(r"^[a-zA-Z_:\-.][a-zA-Z0-9_:\-.]*$").match(full_name):
+        if not re.compile(r"^[a-zA-Z_:][a-zA-Z0-9_:]*$").match(full_name):
             raise ValueError(
                 f"Invalid metric name: {full_name}!"
             )