feat(uti): remove not neccessary data
[csit.git] / resources / tools / dash / app / pal / trending / graphs.py
1 # Copyright (c) 2022 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
5 #
6 #     http://www.apache.org/licenses/LICENSE-2.0
7 #
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
13
14 """
15 """
16
17 import plotly.graph_objects as go
18 import pandas as pd
19
20 import hdrh.histogram
21 import hdrh.codec
22
23 from datetime import datetime
24 from numpy import isnan
25
26 from ..jumpavg import classify
27
28
29 _ANOMALY_COLOR = {
30     "regression": 0.0,
31     "normal": 0.5,
32     "progression": 1.0
33 }
34 _COLORSCALE_TPUT = [
35     [0.00, "red"],
36     [0.33, "red"],
37     [0.33, "white"],
38     [0.66, "white"],
39     [0.66, "green"],
40     [1.00, "green"]
41 ]
42 _TICK_TEXT_TPUT = ["Regression", "Normal", "Progression"]
43 _COLORSCALE_LAT = [
44     [0.00, "green"],
45     [0.33, "green"],
46     [0.33, "white"],
47     [0.66, "white"],
48     [0.66, "red"],
49     [1.00, "red"]
50 ]
51 _TICK_TEXT_LAT = ["Progression", "Normal", "Regression"]
52 _VALUE = {
53     "mrr": "result_receive_rate_rate_avg",
54     "ndr": "result_ndr_lower_rate_value",
55     "pdr": "result_pdr_lower_rate_value",
56     "pdr-lat": "result_latency_forward_pdr_50_avg"
57 }
58 _UNIT = {
59     "mrr": "result_receive_rate_rate_unit",
60     "ndr": "result_ndr_lower_rate_unit",
61     "pdr": "result_pdr_lower_rate_unit",
62     "pdr-lat": "result_latency_forward_pdr_50_unit"
63 }
64 _LAT_HDRH = (  # Do not change the order
65     "result_latency_forward_pdr_0_hdrh",
66     "result_latency_reverse_pdr_0_hdrh",
67     "result_latency_forward_pdr_10_hdrh",
68     "result_latency_reverse_pdr_10_hdrh",
69     "result_latency_forward_pdr_50_hdrh",
70     "result_latency_reverse_pdr_50_hdrh",
71     "result_latency_forward_pdr_90_hdrh",
72     "result_latency_reverse_pdr_90_hdrh",
73 )
74 # This value depends on latency stream rate (9001 pps) and duration (5s).
75 # Keep it slightly higher to ensure rounding errors to not remove tick mark.
76 PERCENTILE_MAX = 99.999501
77
78 _GRAPH_LAT_HDRH_DESC = {
79     "result_latency_forward_pdr_0_hdrh": "No-load.",
80     "result_latency_reverse_pdr_0_hdrh": "No-load.",
81     "result_latency_forward_pdr_10_hdrh": "Low-load, 10% PDR.",
82     "result_latency_reverse_pdr_10_hdrh": "Low-load, 10% PDR.",
83     "result_latency_forward_pdr_50_hdrh": "Mid-load, 50% PDR.",
84     "result_latency_reverse_pdr_50_hdrh": "Mid-load, 50% PDR.",
85     "result_latency_forward_pdr_90_hdrh": "High-load, 90% PDR.",
86     "result_latency_reverse_pdr_90_hdrh": "High-load, 90% PDR."
87 }
88
89
90 def _get_color(idx: int) -> str:
91     """
92     """
93     _COLORS = (
94         "#1A1110", "#DA2647", "#214FC6", "#01786F", "#BD8260", "#FFD12A",
95         "#A6E7FF", "#738276", "#C95A49", "#FC5A8D", "#CEC8EF", "#391285",
96         "#6F2DA8", "#FF878D", "#45A27D", "#FFD0B9", "#FD5240", "#DB91EF",
97         "#44D7A8", "#4F86F7", "#84DE02", "#FFCFF1", "#614051"
98     )
99     return _COLORS[idx % len(_COLORS)]
100
101
102 def _get_hdrh_latencies(row: pd.Series, name: str) -> dict:
103     """
104     """
105
106     latencies = {"name": name}
107     for key in _LAT_HDRH:
108         try:
109             latencies[key] = row[key]
110         except KeyError:
111             return None
112
113     return latencies
114
115
116 def _classify_anomalies(data):
117     """Process the data and return anomalies and trending values.
118
119     Gather data into groups with average as trend value.
120     Decorate values within groups to be normal,
121     the first value of changed average as a regression, or a progression.
122
123     :param data: Full data set with unavailable samples replaced by nan.
124     :type data: OrderedDict
125     :returns: Classification and trend values
126     :rtype: 3-tuple, list of strings, list of floats and list of floats
127     """
128     # NaN means something went wrong.
129     # Use 0.0 to cause that being reported as a severe regression.
130     bare_data = [0.0 if isnan(sample) else sample for sample in data.values()]
131     # TODO: Make BitCountingGroupList a subclass of list again?
132     group_list = classify(bare_data).group_list
133     group_list.reverse()  # Just to use .pop() for FIFO.
134     classification = list()
135     avgs = list()
136     stdevs = list()
137     active_group = None
138     values_left = 0
139     avg = 0.0
140     stdv = 0.0
141     for sample in data.values():
142         if isnan(sample):
143             classification.append("outlier")
144             avgs.append(sample)
145             stdevs.append(sample)
146             continue
147         if values_left < 1 or active_group is None:
148             values_left = 0
149             while values_left < 1:  # Ignore empty groups (should not happen).
150                 active_group = group_list.pop()
151                 values_left = len(active_group.run_list)
152             avg = active_group.stats.avg
153             stdv = active_group.stats.stdev
154             classification.append(active_group.comment)
155             avgs.append(avg)
156             stdevs.append(stdv)
157             values_left -= 1
158             continue
159         classification.append("normal")
160         avgs.append(avg)
161         stdevs.append(stdv)
162         values_left -= 1
163     return classification, avgs, stdevs
164
165
166 def select_trending_data(data: pd.DataFrame, itm:dict) -> pd.DataFrame:
167     """
168     """
169
170     phy = itm["phy"].split("-")
171     if len(phy) == 4:
172         topo, arch, nic, drv = phy
173         if drv == "dpdk":
174             drv = ""
175         else:
176             drv += "-"
177             drv = drv.replace("_", "-")
178     else:
179         return None
180
181     core = str() if itm["dut"] == "trex" else f"{itm['core']}"
182     ttype = "ndrpdr" if itm["testtype"] in ("ndr", "pdr") else itm["testtype"]
183     dut_v100 = "none" if itm["dut"] == "trex" else itm["dut"]
184     dut_v101 = itm["dut"]
185
186     df = data.loc[(
187         (
188             (
189                 (data["version"] == "1.0.0") &
190                 (data["dut_type"].str.lower() == dut_v100)
191             ) |
192             (
193                 (data["version"] == "1.0.1") &
194                 (data["dut_type"].str.lower() == dut_v101)
195             )
196         ) &
197         (data["test_type"] == ttype) &
198         (data["passed"] == True)
199     )]
200     df = df[df.job.str.endswith(f"{topo}-{arch}")]
201     df = df[df.test_id.str.contains(
202         f"^.*[.|-]{nic}.*{itm['framesize']}-{core}-{drv}{itm['test']}-{ttype}$",
203         regex=True
204     )].sort_values(by="start_time", ignore_index=True)
205
206     return df
207
208
209 def _generate_trending_traces(ttype: str, name: str, df: pd.DataFrame,
210     start: datetime, end: datetime, color: str) -> list:
211     """
212     """
213
214     df = df.dropna(subset=[_VALUE[ttype], ])
215     if df.empty:
216         return list()
217     df = df.loc[((df["start_time"] >= start) & (df["start_time"] <= end))]
218     if df.empty:
219         return list()
220
221     x_axis = df["start_time"].tolist()
222
223     anomalies, trend_avg, trend_stdev = _classify_anomalies(
224         {k: v for k, v in zip(x_axis, df[_VALUE[ttype]])}
225     )
226
227     hover = list()
228     customdata = list()
229     for _, row in df.iterrows():
230         d_type = "trex" if row["dut_type"] == "none" else row["dut_type"]
231         hover_itm = (
232             f"date: {row['start_time'].strftime('%Y-%m-%d %H:%M:%S')}<br>"
233             f"<prop> [{row[_UNIT[ttype]]}]: {row[_VALUE[ttype]]:,.0f}<br>"
234             f"<stdev>"
235             f"{d_type}-ref: {row['dut_version']}<br>"
236             f"csit-ref: {row['job']}/{row['build']}<br>"
237             f"hosts: {', '.join(row['hosts'])}"
238         )
239         if ttype == "mrr":
240             stdev = (
241                 f"stdev [{row['result_receive_rate_rate_unit']}]: "
242                 f"{row['result_receive_rate_rate_stdev']:,.0f}<br>"
243             )
244         else:
245             stdev = ""
246         hover_itm = hover_itm.replace(
247             "<prop>", "latency" if ttype == "pdr-lat" else "average"
248         ).replace("<stdev>", stdev)
249         hover.append(hover_itm)
250         if ttype == "pdr-lat":
251             customdata.append(_get_hdrh_latencies(row, name))
252
253     hover_trend = list()
254     for avg, stdev, (_, row) in zip(trend_avg, trend_stdev, df.iterrows()):
255         d_type = "trex" if row["dut_type"] == "none" else row["dut_type"]
256         hover_itm = (
257             f"date: {row['start_time'].strftime('%Y-%m-%d %H:%M:%S')}<br>"
258             f"trend [pps]: {avg:,.0f}<br>"
259             f"stdev [pps]: {stdev:,.0f}<br>"
260             f"{d_type}-ref: {row['dut_version']}<br>"
261             f"csit-ref: {row['job']}/{row['build']}<br>"
262             f"hosts: {', '.join(row['hosts'])}"
263         )
264         if ttype == "pdr-lat":
265             hover_itm = hover_itm.replace("[pps]", "[us]")
266         hover_trend.append(hover_itm)
267
268     traces = [
269         go.Scatter(  # Samples
270             x=x_axis,
271             y=df[_VALUE[ttype]],
272             name=name,
273             mode="markers",
274             marker={
275                 "size": 5,
276                 "color": color,
277                 "symbol": "circle",
278             },
279             text=hover,
280             hoverinfo="text+name",
281             showlegend=True,
282             legendgroup=name,
283             customdata=customdata
284         ),
285         go.Scatter(  # Trend line
286             x=x_axis,
287             y=trend_avg,
288             name=name,
289             mode="lines",
290             line={
291                 "shape": "linear",
292                 "width": 1,
293                 "color": color,
294             },
295             text=hover_trend,
296             hoverinfo="text+name",
297             showlegend=False,
298             legendgroup=name,
299         )
300     ]
301
302     if anomalies:
303         anomaly_x = list()
304         anomaly_y = list()
305         anomaly_color = list()
306         hover = list()
307         for idx, anomaly in enumerate(anomalies):
308             if anomaly in ("regression", "progression"):
309                 anomaly_x.append(x_axis[idx])
310                 anomaly_y.append(trend_avg[idx])
311                 anomaly_color.append(_ANOMALY_COLOR[anomaly])
312                 hover_itm = (
313                     f"date: {x_axis[idx].strftime('%Y-%m-%d %H:%M:%S')}<br>"
314                     f"trend [pps]: {trend_avg[idx]:,.0f}<br>"
315                     f"classification: {anomaly}"
316                 )
317                 if ttype == "pdr-lat":
318                     hover_itm = hover_itm.replace("[pps]", "[us]")
319                 hover.append(hover_itm)
320         anomaly_color.extend([0.0, 0.5, 1.0])
321         traces.append(
322             go.Scatter(
323                 x=anomaly_x,
324                 y=anomaly_y,
325                 mode="markers",
326                 text=hover,
327                 hoverinfo="text+name",
328                 showlegend=False,
329                 legendgroup=name,
330                 name=name,
331                 marker={
332                     "size": 15,
333                     "symbol": "circle-open",
334                     "color": anomaly_color,
335                     "colorscale": _COLORSCALE_LAT \
336                         if ttype == "pdr-lat" else _COLORSCALE_TPUT,
337                     "showscale": True,
338                     "line": {
339                         "width": 2
340                     },
341                     "colorbar": {
342                         "y": 0.5,
343                         "len": 0.8,
344                         "title": "Circles Marking Data Classification",
345                         "titleside": "right",
346                         "tickmode": "array",
347                         "tickvals": [0.167, 0.500, 0.833],
348                         "ticktext": _TICK_TEXT_LAT \
349                             if ttype == "pdr-lat" else _TICK_TEXT_TPUT,
350                         "ticks": "",
351                         "ticklen": 0,
352                         "tickangle": -90,
353                         "thickness": 10
354                     }
355                 }
356             )
357         )
358
359     return traces
360
361
362 def graph_trending(data: pd.DataFrame, sel:dict, layout: dict,
363     start: datetime, end: datetime) -> tuple:
364     """
365     """
366
367     if not sel:
368         return None, None
369
370     fig_tput = None
371     fig_lat = None
372     for idx, itm in enumerate(sel):
373
374         df = select_trending_data(data, itm)
375         if df is None or df.empty:
376             continue
377
378         name = "-".join((itm["dut"], itm["phy"], itm["framesize"], itm["core"],
379             itm["test"], itm["testtype"], ))
380         traces = _generate_trending_traces(
381             itm["testtype"], name, df, start, end, _get_color(idx)
382         )
383         if traces:
384             if not fig_tput:
385                 fig_tput = go.Figure()
386             fig_tput.add_traces(traces)
387
388         if itm["testtype"] == "pdr":
389             traces = _generate_trending_traces(
390                 "pdr-lat", name, df, start, end, _get_color(idx)
391             )
392             if traces:
393                 if not fig_lat:
394                     fig_lat = go.Figure()
395                 fig_lat.add_traces(traces)
396
397     if fig_tput:
398         fig_tput.update_layout(layout.get("plot-trending-tput", dict()))
399     if fig_lat:
400         fig_lat.update_layout(layout.get("plot-trending-lat", dict()))
401
402     return fig_tput, fig_lat
403
404
405 def graph_hdrh_latency(data: dict, layout: dict) -> go.Figure:
406     """
407     """
408
409     fig = None
410
411     traces = list()
412     for idx, (lat_name, lat_hdrh) in enumerate(data.items()):
413         try:
414             decoded = hdrh.histogram.HdrHistogram.decode(lat_hdrh)
415         except (hdrh.codec.HdrLengthException, TypeError) as err:
416             continue
417         previous_x = 0.0
418         prev_perc = 0.0
419         xaxis = list()
420         yaxis = list()
421         hovertext = list()
422         for item in decoded.get_recorded_iterator():
423             # The real value is "percentile".
424             # For 100%, we cut that down to "x_perc" to avoid
425             # infinity.
426             percentile = item.percentile_level_iterated_to
427             x_perc = min(percentile, PERCENTILE_MAX)
428             xaxis.append(previous_x)
429             yaxis.append(item.value_iterated_to)
430             hovertext.append(
431                 f"<b>{_GRAPH_LAT_HDRH_DESC[lat_name]}</b><br>"
432                 f"Direction: {('W-E', 'E-W')[idx % 2]}<br>"
433                 f"Percentile: {prev_perc:.5f}-{percentile:.5f}%<br>"
434                 f"Latency: {item.value_iterated_to}uSec"
435             )
436             next_x = 100.0 / (100.0 - x_perc)
437             xaxis.append(next_x)
438             yaxis.append(item.value_iterated_to)
439             hovertext.append(
440                 f"<b>{_GRAPH_LAT_HDRH_DESC[lat_name]}</b><br>"
441                 f"Direction: {('W-E', 'E-W')[idx % 2]}<br>"
442                 f"Percentile: {prev_perc:.5f}-{percentile:.5f}%<br>"
443                 f"Latency: {item.value_iterated_to}uSec"
444             )
445             previous_x = next_x
446             prev_perc = percentile
447
448         traces.append(
449             go.Scatter(
450                 x=xaxis,
451                 y=yaxis,
452                 name=_GRAPH_LAT_HDRH_DESC[lat_name],
453                 mode="lines",
454                 legendgroup=_GRAPH_LAT_HDRH_DESC[lat_name],
455                 showlegend=bool(idx % 2),
456                 line=dict(
457                     color=_get_color(int(idx/2)),
458                     dash="solid",
459                     width=1 if idx % 2 else 2
460                 ),
461                 hovertext=hovertext,
462                 hoverinfo="text"
463             )
464         )
465     if traces:
466         fig = go.Figure()
467         fig.add_traces(traces)
468         layout_hdrh = layout.get("plot-hdrh-latency", None)
469         if lat_hdrh:
470             fig.update_layout(layout_hdrh)
471
472     return fig