1 # Copyright (c) 2024 Cisco and/or its affiliates.
2 # Licensed under the Apache License, Version 2.0 (the "License");
3 # you may not use this file except in compliance with the License.
4 # You may obtain a copy of the License at:
6 # http://www.apache.org/licenses/LICENSE-2.0
8 # Unless required by applicable law or agreed to in writing, software
9 # distributed under the License is distributed on an "AS IS" BASIS,
10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 # See the License for the specific language governing permissions and
12 # limitations under the License.
14 """The comparison tables.
19 from numpy import mean, std, percentile
20 from copy import deepcopy
22 from ..utils.constants import Constants as C
23 from ..utils.utils import relative_change_stdev
29 normalize: bool=False,
30 remove_outliers: bool=False,
33 """Select data for a comparison table.
35 :param data: Data to be filtered for the comparison table.
36 :param selected: A dictionary with parameters and their values selected by
38 :param normalize: If True, the data is normalized to CPU frequency
39 Constants.NORM_FREQUENCY.
40 :param remove_outliers: If True the outliers are removed before
42 :param raw_data: If True, returns data as it is in parquets without any
43 processing. It is used for "download raw data" feature.
44 :type data: pandas.DataFrame
47 :type remove_outliers: bool
49 :returns: A data frame with selected data.
50 :rtype: pandas.DataFrame
53 def _calculate_statistics(
54 data_in: pd.DataFrame,
58 remove_outliers: bool=False
60 """Calculates mean value and standard deviation for provided data.
62 :param data_in: Input data for calculations.
63 :param ttype: The test type.
64 :param drv: The driver.
65 :param norm_factor: The data normalization factor.
66 :param remove_outliers: If True the outliers are removed before
68 :type data_in: pandas.DataFrame
71 :type norm_factor: float
72 :type remove_outliers: bool
73 :returns: A pandas dataframe with: test name, mean value, standard
75 :rtype: pandas.DataFrame
83 for itm in data_in["test_id"].unique().tolist():
84 itm_lst = itm.split(".")
85 test = itm_lst[-1].rsplit("-", 1)[0]
86 if "hoststack" in itm:
87 test_type = f"hoststack-{ttype}"
90 df = data_in.loc[(data_in["test_id"] == itm)]
91 l_df = df[C.VALUE_ITER[test_type]].to_list()
92 if len(l_df) and isinstance(l_df[0], list):
99 q1 = percentile(l_df, 25, method=C.COMP_PERCENTILE_METHOD)
100 q3 = percentile(l_df, 75, method=C.COMP_PERCENTILE_METHOD)
102 lif = q1 - C.COMP_OUTLIER_TYPE * irq
103 uif = q3 + C.COMP_OUTLIER_TYPE * irq
104 l_df = [i for i in l_df if i >= lif and i <= uif]
107 mean_val = mean(l_df)
109 except (TypeError, ValueError):
111 d_data["name"].append(f"{test.replace(f'{drv}-', '')}-{ttype}")
112 d_data["mean"].append(int(mean_val * norm_factor))
113 d_data["stdev"].append(int(std_val * norm_factor))
114 d_data["unit"].append(df[C.UNIT[test_type]].to_list()[0])
115 return pd.DataFrame(d_data)
119 if itm["ttype"] in ("NDR", "PDR", "Latency"):
121 elif itm["ttype"] in ("CPS", "RPS", "BPS"):
122 test_type = "hoststack"
124 test_type = itm["ttype"].lower()
126 dutver = itm["dutver"].split("-", 1) # 0 -> release, 1 -> dut version
127 tmp_df = pd.DataFrame(data.loc[(
128 (data["passed"] == True) &
129 (data["dut_type"] == itm["dut"]) &
130 (data["dut_version"] == dutver[1]) &
131 (data["test_type"] == test_type) &
132 (data["release"] == dutver[0])
135 drv = "" if itm["driver"] == "dpdk" else itm["driver"].replace("_", "-")
136 core = str() if itm["dut"] == "trex" else itm["core"].lower()
137 ttype = "ndrpdr" if itm["ttype"] in ("NDR", "PDR", "Latency") \
138 else itm["ttype"].lower()
140 (tmp_df.job.str.endswith(itm["tbed"])) &
141 (tmp_df.test_id.str.contains(
143 f"^.*[.|-]{itm['nic']}.*{itm['frmsize'].lower()}-"
144 f"{core}-{drv}.*-{ttype}$"
149 if itm["driver"] == "dpdk":
150 for drv in C.DRIVERS:
152 tmp_df[tmp_df.test_id.str.contains(f"-{drv}-")].index,
156 # Change the data type from ndrpdr to one of ("NDR", "PDR", "Latency")
157 if test_type == "ndrpdr":
158 tmp_df = tmp_df.assign(test_type=itm["ttype"].lower())
162 if itm["ttype"] == "Latency":
163 norm_factor = C.FREQUENCY[itm["tbed"]] / C.NORM_FREQUENCY
165 norm_factor = C.NORM_FREQUENCY / C.FREQUENCY[itm["tbed"]]
169 tmp_df = _calculate_statistics(
171 itm["ttype"].lower(),
174 remove_outliers=remove_outliers
177 lst_df.append(tmp_df)
181 elif len(lst_df) > 1:
193 def comparison_table(
198 remove_outliers: bool=False,
201 """Generate a comparison table.
203 :param data: Iterative data for the comparison table.
204 :param selected: A dictionary with parameters and their values selected by
206 :param normalize: If True, the data is normalized to CPU frequency
207 Constants.NORM_FREQUENCY.
208 :param format: The output format of the table:
209 - html: To be displayed on html page, the values are shown in millions
211 - csv: To be downloaded as a CSV file the values are stored in base
213 :param remove_outliers: If True the outliers are removed before
214 generating the table.
215 :param raw_data: If True, returns data as it is in parquets without any
216 processing. It is used for "download raw data" feature.
217 :type data: pandas.DataFrame
219 :type normalize: bool
221 :type remove_outliers: bool
223 :returns: A tuple with the tabe title and the comparison table.
224 :rtype: tuple[str, pandas.DataFrame]
227 def _create_selection(sel: dict) -> list:
228 """Transform the complex dictionary with user selection to list
231 :param sel: A complex dictionary with user selection.
233 :returns: A list of simple items.
236 l_infra = sel["infra"].split("-")
238 for core in sel["core"]:
239 for fsize in sel["frmsize"]:
240 for ttype in sel["ttype"]:
243 "dutver": sel["dutver"],
244 "tbed": f"{l_infra[0]}-{l_infra[1]}",
246 "driver": l_infra[-1].replace("_", "-"),
253 # Select reference data
254 r_sel = deepcopy(selected["reference"]["selection"])
255 r_selection = _create_selection(r_sel)
256 r_data = select_comp_data(
257 data, r_selection, normalize, remove_outliers, raw_data
260 # Select compare data
261 c_sel = deepcopy(selected["reference"]["selection"])
262 c_params = selected["compare"]
263 if c_params["parameter"] in ("core", "frmsize", "ttype"):
264 c_sel[c_params["parameter"]] = [c_params["value"], ]
266 c_sel[c_params["parameter"]] = c_params["value"]
267 c_selection = _create_selection(c_sel)
268 c_data = select_comp_data(
269 data, c_selection, normalize, remove_outliers, raw_data
273 r_data["ref/cmp"] = "reference"
274 c_data["ref/cmp"] = "compare"
275 return str(), pd.concat([r_data, c_data], ignore_index=True, copy=False)
277 if r_data.empty or c_data.empty:
278 return str(), pd.DataFrame()
280 if format == "html" and "Latency" not in r_sel["ttype"]:
281 unit_factor, s_unit_factor = (1e6, "M")
283 unit_factor, s_unit_factor = (1, str())
285 # Create Table title and titles of columns with data
287 params.remove(c_params["parameter"])
291 if isinstance(value, list):
292 lst_title.append("|".join(value))
294 lst_title.append(value)
295 title = "Comparison for: " + "-".join(lst_title)
296 r_name = r_sel[c_params["parameter"]]
297 if isinstance(r_name, list):
298 r_name = "|".join(r_name)
299 c_name = c_params["value"]
301 l_name, l_r_mean, l_r_std, l_c_mean, l_c_std, l_rc_mean, l_rc_std, unit = \
302 list(), list(), list(), list(), list(), list(), list(), set()
303 for _, row in r_data.iterrows():
304 if c_params["parameter"] in ("core", "frmsize", "ttype"):
305 l_cmp = row["name"].split("-")
306 if c_params["parameter"] == "core":
308 (c_data.name.str.contains(l_cmp[0])) &
309 (c_data.name.str.contains("-".join(l_cmp[2:])))
311 elif c_params["parameter"] == "frmsize":
312 c_row = c_data[c_data.name.str.contains("-".join(l_cmp[1:]))]
313 elif c_params["parameter"] == "ttype":
314 regex = r"^" + f"{'-'.join(l_cmp[:-1])}" + r"-.{3}$"
315 c_row = c_data[c_data.name.str.contains(regex, regex=True)]
317 c_row = c_data[c_data["name"] == row["name"]]
319 unit.add(f"{s_unit_factor}{row['unit']}")
322 c_mean = c_row["mean"].values[0]
323 c_std = c_row["stdev"].values[0]
324 l_name.append(row["name"])
325 l_r_mean.append(r_mean / unit_factor)
326 l_r_std.append(r_std / unit_factor)
327 l_c_mean.append(c_mean / unit_factor)
328 l_c_std.append(c_std / unit_factor)
329 delta, d_stdev = relative_change_stdev(r_mean, c_mean, r_std, c_std)
330 l_rc_mean.append(delta)
331 l_rc_std.append(d_stdev)
333 s_unit = "|".join(unit)
334 df_cmp = pd.DataFrame.from_dict({
336 f"{r_name} Mean [{s_unit}]": l_r_mean,
337 f"{r_name} Stdev [{s_unit}]": l_r_std,
338 f"{c_name} Mean [{s_unit}]": l_c_mean,
339 f"{c_name} Stdev [{s_unit}]": l_c_std,
340 "Relative Change Mean [%]": l_rc_mean,
341 "Relative Change Stdev [%]": l_rc_std
344 by="Relative Change Mean [%]",
349 return (title, df_cmp)