feat(uti): Generate structure of tests from data for trending
[csit.git] / resources / tools / dash / app / pal / trending / graphs.py
1 # Copyright (c) 2022 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """
15 """
16
17 import plotly.graph_objects as go
18 import pandas as pd
19 import re
20
21 import hdrh.histogram
22 import hdrh.codec
23
24 from datetime import datetime
25 from numpy import isnan
26
27 from ..jumpavg import classify
28
29
30 _COLORS = (
31     u"#1A1110", u"#DA2647", u"#214FC6", u"#01786F", u"#BD8260", u"#FFD12A",
32     u"#A6E7FF", u"#738276", u"#C95A49", u"#FC5A8D", u"#CEC8EF", u"#391285",
33     u"#6F2DA8", u"#FF878D", u"#45A27D", u"#FFD0B9", u"#FD5240", u"#DB91EF",
34     u"#44D7A8", u"#4F86F7", u"#84DE02", u"#FFCFF1", u"#614051"
35 )
36 _ANOMALY_COLOR = {
37     u"regression": 0.0,
38     u"normal": 0.5,
39     u"progression": 1.0
40 }
41 _COLORSCALE_TPUT = [
42     [0.00, u"red"],
43     [0.33, u"red"],
44     [0.33, u"white"],
45     [0.66, u"white"],
46     [0.66, u"green"],
47     [1.00, u"green"]
48 ]
49 _TICK_TEXT_TPUT = [u"Regression", u"Normal", u"Progression"]
50 _COLORSCALE_LAT = [
51     [0.00, u"green"],
52     [0.33, u"green"],
53     [0.33, u"white"],
54     [0.66, u"white"],
55     [0.66, u"red"],
56     [1.00, u"red"]
57 ]
58 _TICK_TEXT_LAT = [u"Progression", u"Normal", u"Regression"]
59 _VALUE = {
60     "mrr": "result_receive_rate_rate_avg",
61     "ndr": "result_ndr_lower_rate_value",
62     "pdr": "result_pdr_lower_rate_value",
63     "pdr-lat": "result_latency_forward_pdr_50_avg"
64 }
65 _UNIT = {
66     "mrr": "result_receive_rate_rate_unit",
67     "ndr": "result_ndr_lower_rate_unit",
68     "pdr": "result_pdr_lower_rate_unit",
69     "pdr-lat": "result_latency_forward_pdr_50_unit"
70 }
71 _LAT_HDRH = (  # Do not change the order
72     "result_latency_forward_pdr_0_hdrh",
73     "result_latency_reverse_pdr_0_hdrh",
74     "result_latency_forward_pdr_10_hdrh",
75     "result_latency_reverse_pdr_10_hdrh",
76     "result_latency_forward_pdr_50_hdrh",
77     "result_latency_reverse_pdr_50_hdrh",
78     "result_latency_forward_pdr_90_hdrh",
79     "result_latency_reverse_pdr_90_hdrh",
80 )
81 # This value depends on latency stream rate (9001 pps) and duration (5s).
82 # Keep it slightly higher to ensure rounding errors to not remove tick mark.
83 PERCENTILE_MAX = 99.999501
84
85 _GRAPH_LAT_HDRH_DESC = {
86     u"result_latency_forward_pdr_0_hdrh": u"No-load.",
87     u"result_latency_reverse_pdr_0_hdrh": u"No-load.",
88     u"result_latency_forward_pdr_10_hdrh": u"Low-load, 10% PDR.",
89     u"result_latency_reverse_pdr_10_hdrh": u"Low-load, 10% PDR.",
90     u"result_latency_forward_pdr_50_hdrh": u"Mid-load, 50% PDR.",
91     u"result_latency_reverse_pdr_50_hdrh": u"Mid-load, 50% PDR.",
92     u"result_latency_forward_pdr_90_hdrh": u"High-load, 90% PDR.",
93     u"result_latency_reverse_pdr_90_hdrh": u"High-load, 90% PDR."
94 }
95
96
97 def _get_hdrh_latencies(row: pd.Series, name: str) -> dict:
98     """
99     """
100
101     latencies = {"name": name}
102     for key in _LAT_HDRH:
103         try:
104             latencies[key] = row[key]
105         except KeyError:
106             return None
107
108     return latencies
109
110
111 def _classify_anomalies(data):
112     """Process the data and return anomalies and trending values.
113
114     Gather data into groups with average as trend value.
115     Decorate values within groups to be normal,
116     the first value of changed average as a regression, or a progression.
117
118     :param data: Full data set with unavailable samples replaced by nan.
119     :type data: OrderedDict
120     :returns: Classification and trend values
121     :rtype: 3-tuple, list of strings, list of floats and list of floats
122     """
123     # NaN means something went wrong.
124     # Use 0.0 to cause that being reported as a severe regression.
125     bare_data = [0.0 if isnan(sample) else sample for sample in data.values()]
126     # TODO: Make BitCountingGroupList a subclass of list again?
127     group_list = classify(bare_data).group_list
128     group_list.reverse()  # Just to use .pop() for FIFO.
129     classification = list()
130     avgs = list()
131     stdevs = list()
132     active_group = None
133     values_left = 0
134     avg = 0.0
135     stdv = 0.0
136     for sample in data.values():
137         if isnan(sample):
138             classification.append(u"outlier")
139             avgs.append(sample)
140             stdevs.append(sample)
141             continue
142         if values_left < 1 or active_group is None:
143             values_left = 0
144             while values_left < 1:  # Ignore empty groups (should not happen).
145                 active_group = group_list.pop()
146                 values_left = len(active_group.run_list)
147             avg = active_group.stats.avg
148             stdv = active_group.stats.stdev
149             classification.append(active_group.comment)
150             avgs.append(avg)
151             stdevs.append(stdv)
152             values_left -= 1
153             continue
154         classification.append(u"normal")
155         avgs.append(avg)
156         stdevs.append(stdv)
157         values_left -= 1
158     return classification, avgs, stdevs
159
160
161 def select_trending_data(data: pd.DataFrame, itm:dict) -> pd.DataFrame:
162     """
163     """
164
165     phy = itm["phy"].split("-")
166     if len(phy) == 4:
167         topo, arch, nic, drv = phy
168         if drv == "dpdk":
169             drv = ""
170         else:
171             drv += "-"
172             drv = drv.replace("_", "-")
173     else:
174         return None
175
176     core = str() if itm["dut"] == "trex" else f"{itm['core']}"
177     ttype = "ndrpdr" if itm["testtype"] in ("ndr", "pdr") else itm["testtype"]
178     dut = "none" if itm["dut"] == "trex" else itm["dut"].upper()
179
180     df = data.loc[(
181         (data["dut_type"] == dut) &
182         (data["test_type"] == ttype) &
183         (data["passed"] == True)
184     )]
185     df = df[df.job.str.endswith(f"{topo}-{arch}")]
186     df = df[df.test_id.str.contains(
187         f"^.*[.|-]{nic}.*{itm['framesize']}-{core}-{drv}{itm['test']}-{ttype}$",
188         regex=True
189     )].sort_values(by="start_time", ignore_index=True)
190
191     return df
192
193
194 def _generate_trending_traces(ttype: str, name: str, df: pd.DataFrame,
195     start: datetime, end: datetime, color: str) -> list:
196     """
197     """
198
199     df = df.dropna(subset=[_VALUE[ttype], ])
200     if df.empty:
201         return list()
202     df = df.loc[((df["start_time"] >= start) & (df["start_time"] <= end))]
203     if df.empty:
204         return list()
205
206     x_axis = df["start_time"].tolist()
207
208     anomalies, trend_avg, trend_stdev = _classify_anomalies(
209         {k: v for k, v in zip(x_axis, df[_VALUE[ttype]])}
210     )
211
212     hover = list()
213     customdata = list()
214     for _, row in df.iterrows():
215         hover_itm = (
216             f"date: {row['start_time'].strftime('%d-%m-%Y %H:%M:%S')}<br>"
217             f"<prop> [{row[_UNIT[ttype]]}]: {row[_VALUE[ttype]]}<br>"
218             f"<stdev>"
219             f"{row['dut_type']}-ref: {row['dut_version']}<br>"
220             f"csit-ref: {row['job']}/{row['build']}<br>"
221             f"hosts: {', '.join(row['hosts'])}"
222         )
223         if ttype == "mrr":
224             stdev = (
225                 f"stdev [{row['result_receive_rate_rate_unit']}]: "
226                 f"{row['result_receive_rate_rate_stdev']}<br>"
227             )
228         else:
229             stdev = ""
230         hover_itm = hover_itm.replace(
231             "<prop>", "latency" if ttype == "pdr-lat" else "average"
232         ).replace("<stdev>", stdev)
233         hover.append(hover_itm)
234         if ttype == "pdr-lat":
235             customdata.append(_get_hdrh_latencies(row, name))
236
237     hover_trend = list()
238     for avg, stdev, (_, row) in zip(trend_avg, trend_stdev, df.iterrows()):
239         hover_itm = (
240             f"date: {row['start_time'].strftime('%d-%m-%Y %H:%M:%S')}<br>"
241             f"trend [pps]: {avg}<br>"
242             f"stdev [pps]: {stdev}<br>"
243             f"{row['dut_type']}-ref: {row['dut_version']}<br>"
244             f"csit-ref: {row['job']}/{row['build']}<br>"
245             f"hosts: {', '.join(row['hosts'])}"
246         )
247         if ttype == "pdr-lat":
248             hover_itm = hover_itm.replace("[pps]", "[us]")
249         hover_trend.append(hover_itm)
250
251     traces = [
252         go.Scatter(  # Samples
253             x=x_axis,
254             y=df[_VALUE[ttype]],
255             name=name,
256             mode="markers",
257             marker={
258                 u"size": 5,
259                 u"color": color,
260                 u"symbol": u"circle",
261             },
262             text=hover,
263             hoverinfo=u"text+name",
264             showlegend=True,
265             legendgroup=name,
266             customdata=customdata
267         ),
268         go.Scatter(  # Trend line
269             x=x_axis,
270             y=trend_avg,
271             name=name,
272             mode="lines",
273             line={
274                 u"shape": u"linear",
275                 u"width": 1,
276                 u"color": color,
277             },
278             text=hover_trend,
279             hoverinfo=u"text+name",
280             showlegend=False,
281             legendgroup=name,
282         )
283     ]
284
285     if anomalies:
286         anomaly_x = list()
287         anomaly_y = list()
288         anomaly_color = list()
289         hover = list()
290         for idx, anomaly in enumerate(anomalies):
291             if anomaly in (u"regression", u"progression"):
292                 anomaly_x.append(x_axis[idx])
293                 anomaly_y.append(trend_avg[idx])
294                 anomaly_color.append(_ANOMALY_COLOR[anomaly])
295                 hover_itm = (
296                     f"date: {x_axis[idx].strftime('%d-%m-%Y %H:%M:%S')}<br>"
297                     f"trend [pps]: {trend_avg[idx]}<br>"
298                     f"classification: {anomaly}"
299                 )
300                 if ttype == "pdr-lat":
301                     hover_itm = hover_itm.replace("[pps]", "[us]")
302                 hover.append(hover_itm)
303         anomaly_color.extend([0.0, 0.5, 1.0])
304         traces.append(
305             go.Scatter(
306                 x=anomaly_x,
307                 y=anomaly_y,
308                 mode=u"markers",
309                 text=hover,
310                 hoverinfo=u"text+name",
311                 showlegend=False,
312                 legendgroup=name,
313                 name=name,
314                 marker={
315                     u"size": 15,
316                     u"symbol": u"circle-open",
317                     u"color": anomaly_color,
318                     u"colorscale": _COLORSCALE_LAT \
319                         if ttype == "pdr-lat" else _COLORSCALE_TPUT,
320                     u"showscale": True,
321                     u"line": {
322                         u"width": 2
323                     },
324                     u"colorbar": {
325                         u"y": 0.5,
326                         u"len": 0.8,
327                         u"title": u"Circles Marking Data Classification",
328                         u"titleside": u"right",
329                         u"tickmode": u"array",
330                         u"tickvals": [0.167, 0.500, 0.833],
331                         u"ticktext": _TICK_TEXT_LAT \
332                             if ttype == "pdr-lat" else _TICK_TEXT_TPUT,
333                         u"ticks": u"",
334                         u"ticklen": 0,
335                         u"tickangle": -90,
336                         u"thickness": 10
337                     }
338                 }
339             )
340         )
341
342     return traces
343
344
345 def graph_trending(data: pd.DataFrame, sel:dict, layout: dict,
346     start: datetime, end: datetime) -> tuple:
347     """
348     """
349
350     if not sel:
351         return None, None
352
353     fig_tput = None
354     fig_lat = None
355     for idx, itm in enumerate(sel):
356
357         df = select_trending_data(data, itm)
358         if df is None or df.empty:
359             continue
360
361         name = "-".join((itm["dut"], itm["phy"], itm["framesize"], itm["core"],
362             itm["test"], itm["testtype"], ))
363         traces = _generate_trending_traces(
364             itm["testtype"], name, df, start, end, _COLORS[idx % len(_COLORS)]
365         )
366         if traces:
367             if not fig_tput:
368                 fig_tput = go.Figure()
369             fig_tput.add_traces(traces)
370
371         if itm["testtype"] == "pdr":
372             traces = _generate_trending_traces(
373                 "pdr-lat", name, df, start, end, _COLORS[idx % len(_COLORS)]
374             )
375             if traces:
376                 if not fig_lat:
377                     fig_lat = go.Figure()
378                 fig_lat.add_traces(traces)
379
380     if fig_tput:
381         fig_tput.update_layout(layout.get("plot-trending-tput", dict()))
382     if fig_lat:
383         fig_lat.update_layout(layout.get("plot-trending-lat", dict()))
384
385     return fig_tput, fig_lat
386
387
388 def graph_hdrh_latency(data: dict, layout: dict) -> go.Figure:
389     """
390     """
391
392     fig = None
393
394     traces = list()
395     for idx, (lat_name, lat_hdrh) in enumerate(data.items()):
396         try:
397             decoded = hdrh.histogram.HdrHistogram.decode(lat_hdrh)
398         except (hdrh.codec.HdrLengthException, TypeError) as err:
399             continue
400         previous_x = 0.0
401         prev_perc = 0.0
402         xaxis = list()
403         yaxis = list()
404         hovertext = list()
405         for item in decoded.get_recorded_iterator():
406             # The real value is "percentile".
407             # For 100%, we cut that down to "x_perc" to avoid
408             # infinity.
409             percentile = item.percentile_level_iterated_to
410             x_perc = min(percentile, PERCENTILE_MAX)
411             xaxis.append(previous_x)
412             yaxis.append(item.value_iterated_to)
413             hovertext.append(
414                 f"<b>{_GRAPH_LAT_HDRH_DESC[lat_name]}</b><br>"
415                 f"Direction: {(u'W-E', u'E-W')[idx % 2]}<br>"
416                 f"Percentile: {prev_perc:.5f}-{percentile:.5f}%<br>"
417                 f"Latency: {item.value_iterated_to}uSec"
418             )
419             next_x = 100.0 / (100.0 - x_perc)
420             xaxis.append(next_x)
421             yaxis.append(item.value_iterated_to)
422             hovertext.append(
423                 f"<b>{_GRAPH_LAT_HDRH_DESC[lat_name]}</b><br>"
424                 f"Direction: {(u'W-E', u'E-W')[idx % 2]}<br>"
425                 f"Percentile: {prev_perc:.5f}-{percentile:.5f}%<br>"
426                 f"Latency: {item.value_iterated_to}uSec"
427             )
428             previous_x = next_x
429             prev_perc = percentile
430
431         traces.append(
432             go.Scatter(
433                 x=xaxis,
434                 y=yaxis,
435                 name=_GRAPH_LAT_HDRH_DESC[lat_name],
436                 mode=u"lines",
437                 legendgroup=_GRAPH_LAT_HDRH_DESC[lat_name],
438                 showlegend=bool(idx % 2),
439                 line=dict(
440                     color=_COLORS[int(idx/2)],
441                     dash=u"solid",
442                     width=1 if idx % 2 else 2
443                 ),
444                 hovertext=hovertext,
445                 hoverinfo=u"text"
446             )
447         )
448     if traces:
449         fig = go.Figure()
450         fig.add_traces(traces)
451         layout_hdrh = layout.get("plot-hdrh-latency", None)
452         if lat_hdrh:
453             fig.update_layout(layout_hdrh)
454
455     return fig