From 5cd951da85f1d57451c1043c38162dccbb67a3e9 Mon Sep 17 00:00:00 2001 From: Viliam Luc Date: Mon, 3 Oct 2022 14:45:48 +0200 Subject: [PATCH] telemetry: small fixes - removed remnant debugging code + fix compatibility with Metric naming convention + fix failures when perf-stat command returns wrong data + better handling when perf-stat command fails Signed-off-by: Viliam Luc Change-Id: I23a064ed09c8cf0394abb5306fb04f4e33c20aa4 --- resources/libraries/python/TelemetryUtil.py | 4 - .../templates/telemetry/perf_stat_runtime.yaml | 86 ++++++++++------------ resources/tools/telemetry/bundle_perf_stat.py | 69 +++++++++-------- resources/tools/telemetry/metrics.py | 4 +- 4 files changed, 74 insertions(+), 89 deletions(-) diff --git a/resources/libraries/python/TelemetryUtil.py b/resources/libraries/python/TelemetryUtil.py index f8c7d8c9b5..c978441d72 100644 --- a/resources/libraries/python/TelemetryUtil.py +++ b/resources/libraries/python/TelemetryUtil.py @@ -121,10 +121,6 @@ class TelemetryUtil: f"{stdout}" ) - VppCounters.vpp_clear_runtime(node) - sleep(1) - VppCounters.vpp_show_runtime(node) - @staticmethod def run_telemetry_on_all_duts(nodes, profile): """Get telemetry stat read on all DUTs. diff --git a/resources/templates/telemetry/perf_stat_runtime.yaml b/resources/templates/telemetry/perf_stat_runtime.yaml index 54b77a9bcc..ae0f804945 100644 --- a/resources/templates/telemetry/perf_stat_runtime.yaml +++ b/resources/templates/telemetry/perf_stat_runtime.yaml @@ -38,105 +38,97 @@ scheduler: programs: - name: bundle_perf_stat metrics: - gauge: - - name: cpu-cycles + counter: + - name: cpu_cycles documentation: Cycles processed by CPUs + namespace: perf_stat labelnames: - - name - thread - pid events: - - name: cpu-cycles # 0x3C umask: 0x00 - EventCode: 0x3C - UMask: 0x00 + - eventcode: 0x3C # cpu-cycles + umask: 0x00 - name: bundle_perf_stat metrics: - gauge: + counter: - name: instructions documentation: Instructions retired by CPUs + namespace: perf_stat labelnames: - - name - thread - pid events: - - name: instructions # 0xC0 umask: 0x00 - EventCode: 0xC0 - UMask: 0x00 + - eventcode: 0xC0 # instructions + umask: 0x00 - name: bundle_perf_stat metrics: - gauge: - - name: MEM_LOAD_UOPS_RETIRED.L1_HIT + counter: + - name: mem_load_uops_retired_l1_hit documentation: L1 Hit + namespace: perf_stat labelnames: - - name - thread - pid events: - - name: MEM_LOAD_UOPS_RETIRED.L1_HIT # 0xD1 umask: 0x01 - EventCode: 0xD1 - UMask: 0x01 + - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L1_HIT + umask: 0x01 - name: bundle_perf_stat metrics: - gauge: - - name: MEM_LOAD_UOPS_RETIRED.L2_HIT + counter: + - name: mem_load_uops_retired_l2_hit documentation: L2 Hit + namespace: perf_stat labelnames: - - name - thread - pid events: - - name: MEM_LOAD_UOPS_RETIRED.L2_HIT # 0xd1 umask: 0x02 - EventCode: 0xD1 - UMask: 0x02 + - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L2_HIT + umask: 0x02 - name: bundle_perf_stat metrics: - gauge: - - name: MEM_LOAD_UOPS_RETIRED.L3_HIT + counter: + - name: mem_load_uops_retired_l3_hit documentation: L3 Hit + namespace: perf_stat labelnames: - - name - thread - pid events: - - name: MEM_LOAD_UOPS_RETIRED.L3_HIT # 0xd1 umask: 0x04 - EventCode: 0xD1 - UMask: 0x04 + - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L3_HIT + umask: 0x04 - name: bundle_perf_stat metrics: - gauge: - - name: MEM_LOAD_UOPS_RETIRED.L1_MISS + counter: + - name: mem_load_uops_retired_l1_miss documentation: L1 Miss + namespace: perf_stat labelnames: - - name - thread - pid events: - - name: MEM_LOAD_UOPS_RETIRED.L1_MISS # 0xd1 umask: 0x08 - EventCode: 0xD1 - UMask: 0x08 + - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L1_MISS + umask: 0x08 - name: bundle_perf_stat metrics: - gauge: - - name: MEM_LOAD_UOPS_RETIRED.L2_MISS + counter: + - name: mem_load_uops_retired_l2_miss documentation: L2 Miss + namespace: perf_stat labelnames: - - name - thread - pid events: - - name: MEM_LOAD_UOPS_RETIRED.L2_MISS # 0xd1 umask: 0x10 - EventCode: 0xD1 - UMask: 0x10 + - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L2_MISS + umask: 0x10 - name: bundle_perf_stat metrics: - gauge: - - name: MEM_LOAD_UOPS_RETIRED.L3_MISS + counter: + - name: mem_load_uops_retired_l3_miss documentation: L3 Miss + namespace: perf_stat labelnames: - - name - thread - pid events: - - name: MEM_LOAD_UOPS_RETIRED.L3_MISS # 0xd1 umask: 0x020 - EventCode: 0xD1 - UMask: 0x20 + - eventcode: 0xD1 # MEM_LOAD_UOPS_RETIRED.L3_MISS + umask: 0x20 diff --git a/resources/tools/telemetry/bundle_perf_stat.py b/resources/tools/telemetry/bundle_perf_stat.py index 038e86e7a0..f76149f476 100644 --- a/resources/tools/telemetry/bundle_perf_stat.py +++ b/resources/tools/telemetry/bundle_perf_stat.py @@ -50,49 +50,46 @@ class BundlePerfStat: :param duration: Time how long perf stat is collecting data (in seconds). Default value is 1 second. :type duration: int - EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask """ try: self.serializer.create(metrics=self.metrics) - for event in self.events: - text = subprocess.getoutput( - f"""sudo perf stat -x\; -e\ - '{{cpu/event={hex(event[u"EventCode"])},\ - umask={hex(event[u"UMask"])}/u}}'\ - -a --per-thread\ - sleep {duration}""" - ) - - if text == u"": - getLogger("console_stdout").info(event[u"name"]) - continue - if u";" not in text: - getLogger("console_stdout").info( - f"Could not get counters for event \"{event[u'name']}\"" - f". Is it supported by CPU?" - ) - continue - - for line in text.splitlines(): - item = dict() - labels = dict() - item[u"name"] = event[u"name"] - item[u"value"] = line.split(";")[1] - labels["thread"] = u"-".join( - line.split(";")[0].split("-")[0:-1] - ) - labels["pid"] = line.split(";")[0].split("-")[-1] - labels["name"] = item[u"name"] - item[u"labels"] = labels - - getLogger("console_stdout").info(item) - self.api_replies_list.append(item) - - except AttributeError: + event = self.events[0] + text = subprocess.getoutput( + f"""sudo perf stat -x\; -e\ + '{{cpu/event={hex(event[u"eventcode"])},\ + umask={hex(event[u"umask"])}/u}}'\ + -a --per-thread\ + sleep {duration}""" + ) + except subprocess.CalledProcessError: getLogger("console_stderr").error(f"Could not successfully run " f"perf stat command.") sys.exit(Constants.err_linux_perf_stat) + if text == u"": + getLogger("console_stdout").info(event[u"eventcode"]) + elif text.count(u";") < 6: + getLogger("console_stdout").info( + f"Could not get counters for event "\ + f"{event[u'eventcode']}. "\ + f"Is it supported by CPU?" + ) + else: + for line in text.splitlines(): + item = dict() + labels = dict() + item[u"name"] = self.metrics['counter'][0]['name'] + item[u"value"] = line.split(";")[1] + labels["thread"] = u"-".join( + line.split(";")[0].split("-")[0:-1] + ) + labels["pid"] = line.split(";")[0].split("-")[-1] + item[u"labels"] = labels + + getLogger("console_stdout").info(item) + self.api_replies_list.append(item) + + def detach(self): pass diff --git a/resources/tools/telemetry/metrics.py b/resources/tools/telemetry/metrics.py index ba6bae5e70..7a22acfd1b 100644 --- a/resources/tools/telemetry/metrics.py +++ b/resources/tools/telemetry/metrics.py @@ -104,7 +104,7 @@ class Metric: u"Sample", [u"name", u"labels", u"value", u"timestamp"] ) - if not re.compile(r"^[a-zA-Z_:\-.][a-zA-Z0-9_:\-.]*$").match(name): + if not re.compile(r"^[a-zA-Z_:][a-zA-Z0-9_:]*$").match(name): raise ValueError(f"Invalid metric name: {name}!") if typ not in self.metric_types: raise ValueError(f"Invalid metric type: {typ}!") @@ -214,7 +214,7 @@ class MetricBase: full_name += f"{subsystem}_" if subsystem else u"" full_name += name - if not re.compile(r"^[a-zA-Z_:\-.][a-zA-Z0-9_:\-.]*$").match(full_name): + if not re.compile(r"^[a-zA-Z_:][a-zA-Z0-9_:]*$").match(full_name): raise ValueError( f"Invalid metric name: {full_name}!" ) -- 2.16.6