csit.infra.dash/app/cdash/comparisons/tables.py

   1 # Copyright (c) 2024 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """The comparison tables.
  15 """
  16
  17 import pandas as pd
  18
  19 from numpy import mean, std, percentile
  20 from copy import deepcopy
  21
  22 from ..utils.constants import Constants as C
  23 from ..utils.utils import relative_change_stdev
  24
  25
  26 def select_comp_data(
  27         data: pd.DataFrame,
  28         selected: dict,
  29         normalize: bool=False,
  30         remove_outliers: bool=False
  31     ) -> pd.DataFrame:
  32     """Select data for a comparison table.
  33
  34     :param data: Data to be filtered for the comparison table.
  35     :param selected: A dictionary with parameters and their values selected by
  36         the user.
  37     :param normalize: If True, the data is normalized to CPU frequency
  38         Constants.NORM_FREQUENCY.
  39     :param remove_outliers: If True the outliers are removed before
  40         generating the table.
  41     :type data: pandas.DataFrame
  42     :type selected: dict
  43     :type normalize: bool
  44     :type remove_outliers: bool
  45     :returns: A data frame with selected data.
  46     :rtype: pandas.DataFrame
  47     """
  48
  49     def _calculate_statistics(
  50             data_in: pd.DataFrame,
  51             ttype: str,
  52             drv: str,
  53             norm_factor: float,
  54             remove_outliers: bool=False
  55         ) -> pd.DataFrame:
  56         """Calculates mean value and standard deviation for provided data.
  57
  58         :param data_in: Input data for calculations.
  59         :param ttype: The test type.
  60         :param drv: The driver.
  61         :param norm_factor: The data normalization factor.
  62         :param remove_outliers: If True the outliers are removed before
  63             generating the table.
  64         :type data_in: pandas.DataFrame
  65         :type ttype: str
  66         :type drv: str
  67         :type norm_factor: float
  68         :type remove_outliers: bool
  69         :returns: A pandas dataframe with: test name, mean value, standard
  70             deviation and unit.
  71         :rtype: pandas.DataFrame
  72         """
  73         d_data = {
  74             "name": list(),
  75             "mean": list(),
  76             "stdev": list(),
  77             "unit": list()
  78         }
  79         for itm in data_in["test_id"].unique().tolist():
  80             itm_lst = itm.split(".")
  81             test = itm_lst[-1].rsplit("-", 1)[0]
  82             if "hoststack" in itm:
  83                 test_type = f"hoststack-{ttype}"
  84             else:
  85                 test_type = ttype
  86             df = data_in.loc[(data_in["test_id"] == itm)]
  87             l_df = df[C.VALUE_ITER[test_type]].to_list()
  88             if len(l_df) and isinstance(l_df[0], list):
  89                 tmp_df = list()
  90                 for l_itm in l_df:
  91                     tmp_df.extend(l_itm)
  92                 l_df = tmp_df
  93
  94             if remove_outliers:
  95                 q1 = percentile(l_df, 25, method=C.COMP_PERCENTILE_METHOD)
  96                 q3 = percentile(l_df, 75, method=C.COMP_PERCENTILE_METHOD)
  97                 irq = q3 - q1
  98                 lif = q1 - C.COMP_OUTLIER_TYPE * irq
  99                 uif = q3 + C.COMP_OUTLIER_TYPE * irq
 100                 l_df = [i for i in l_df if i >= lif and i <= uif]
 101
 102             try:
 103                 mean_val = mean(l_df)
 104                 std_val = std(l_df)
 105             except (TypeError, ValueError):
 106                 continue
 107             d_data["name"].append(f"{test.replace(f'{drv}-', '')}-{ttype}")
 108             d_data["mean"].append(int(mean_val * norm_factor))
 109             d_data["stdev"].append(int(std_val * norm_factor))
 110             d_data["unit"].append(df[C.UNIT[test_type]].to_list()[0])
 111         return pd.DataFrame(d_data)
 112
 113     lst_df = list()
 114     for itm in selected:
 115         if itm["ttype"] in ("NDR", "PDR", "Latency"):
 116             test_type = "ndrpdr"
 117         elif itm["ttype"] in ("CPS", "RPS", "BPS"):
 118             test_type  = "hoststack"
 119         else:
 120             test_type = itm["ttype"].lower()
 121
 122         dutver = itm["dutver"].split("-", 1)  # 0 -> release, 1 -> dut version
 123         tmp_df = pd.DataFrame(data.loc[(
 124             (data["passed"] == True) &
 125             (data["dut_type"] == itm["dut"]) &
 126             (data["dut_version"] == dutver[1]) &
 127             (data["test_type"] == test_type) &
 128             (data["release"] == dutver[0])
 129         )])
 130
 131         drv = "" if itm["driver"] == "dpdk" else itm["driver"].replace("_", "-")
 132         core = str() if itm["dut"] == "trex" else itm["core"].lower()
 133         ttype = "ndrpdr" if itm["ttype"] in ("NDR", "PDR", "Latency") \
 134             else itm["ttype"].lower()
 135         tmp_df = tmp_df[
 136             (tmp_df.job.str.endswith(itm["tbed"])) &
 137             (tmp_df.test_id.str.contains(
 138                 (
 139                     f"^.*[.|-]{itm['nic']}.*{itm['frmsize'].lower()}-"
 140                     f"{core}-{drv}.*-{ttype}$"
 141                 ),
 142                 regex=True
 143             ))
 144         ]
 145         if itm["driver"] == "dpdk":
 146             for drv in C.DRIVERS:
 147                 tmp_df.drop(
 148                     tmp_df[tmp_df.test_id.str.contains(f"-{drv}-")].index,
 149                     inplace=True
 150                 )
 151
 152         # Change the data type from ndrpdr to one of ("NDR", "PDR", "Latency")
 153         if test_type == "ndrpdr":
 154             tmp_df = tmp_df.assign(test_type=itm["ttype"].lower())
 155
 156         if not tmp_df.empty:
 157             if normalize:
 158                 if itm["ttype"] == "Latency":
 159                     norm_factor = C.FREQUENCY[itm["tbed"]] / C.NORM_FREQUENCY
 160                 else:
 161                     norm_factor = C.NORM_FREQUENCY / C.FREQUENCY[itm["tbed"]]
 162             else:
 163                 norm_factor = 1.0
 164             tmp_df = _calculate_statistics(
 165                 tmp_df,
 166                 itm["ttype"].lower(),
 167                 itm["driver"],
 168                 norm_factor,
 169                 remove_outliers=remove_outliers
 170             )
 171
 172         lst_df.append(tmp_df)
 173
 174     if len(lst_df) == 1:
 175         df = lst_df[0]
 176     elif len(lst_df) > 1:
 177         df = pd.concat(
 178             lst_df,
 179             ignore_index=True,
 180             copy=False
 181         )
 182     else:
 183         df = pd.DataFrame()
 184
 185     return df
 186
 187
 188 def comparison_table(
 189         data: pd.DataFrame,
 190         selected: dict,
 191         normalize: bool,
 192         format: str="html",
 193         remove_outliers: bool=False
 194     ) -> tuple:
 195     """Generate a comparison table.
 196
 197     :param data: Iterative data for the comparison table.
 198     :param selected: A dictionary with parameters and their values selected by
 199         the user.
 200     :param normalize: If True, the data is normalized to CPU frequency
 201         Constants.NORM_FREQUENCY.
 202     :param format: The output format of the table:
 203         - html: To be displayed on html page, the values are shown in millions
 204           of the unit.
 205         - csv: To be downloaded as a CSV file the values are stored in base
 206           units.
 207     :param remove_outliers: If True the outliers are removed before
 208         generating the table.
 209     :type data: pandas.DataFrame
 210     :type selected: dict
 211     :type normalize: bool
 212     :type format: str
 213     :type remove_outliers: bool
 214     :returns: A tuple with the tabe title and the comparison table.
 215     :rtype: tuple[str, pandas.DataFrame]
 216     """
 217
 218     def _create_selection(sel: dict) -> list:
 219         """Transform the complex dictionary with user selection to list
 220             of simple items.
 221
 222         :param sel: A complex dictionary with user selection.
 223         :type sel: dict
 224         :returns: A list of simple items.
 225         :rtype: list
 226         """
 227         l_infra = sel["infra"].split("-")
 228         selection = list()
 229         for core in sel["core"]:
 230             for fsize in sel["frmsize"]:
 231                 for ttype in sel["ttype"]:
 232                     selection.append({
 233                         "dut": sel["dut"],
 234                         "dutver": sel["dutver"],
 235                         "tbed": f"{l_infra[0]}-{l_infra[1]}",
 236                         "nic": l_infra[2],
 237                         "driver": l_infra[-1].replace("_", "-"),
 238                         "core": core,
 239                         "frmsize": fsize,
 240                         "ttype": ttype
 241                     })
 242         return selection
 243
 244     r_sel = deepcopy(selected["reference"]["selection"])
 245     c_params = selected["compare"]
 246     r_selection = _create_selection(r_sel)
 247
 248     if format == "html" and "Latency" not in r_sel["ttype"]:
 249         unit_factor, s_unit_factor = (1e6, "M")
 250     else:
 251         unit_factor, s_unit_factor = (1, str())
 252
 253     # Create Table title and titles of columns with data
 254     params = list(r_sel)
 255     params.remove(c_params["parameter"])
 256     lst_title = list()
 257     for param in params:
 258         value = r_sel[param]
 259         if isinstance(value, list):
 260             lst_title.append("|".join(value))
 261         else:
 262             lst_title.append(value)
 263     title = "Comparison for: " + "-".join(lst_title)
 264     r_name = r_sel[c_params["parameter"]]
 265     if isinstance(r_name, list):
 266         r_name = "|".join(r_name)
 267     c_name = c_params["value"]
 268
 269     # Select reference data
 270     r_data = select_comp_data(data, r_selection, normalize, remove_outliers)
 271
 272     # Select compare data
 273     c_sel = deepcopy(selected["reference"]["selection"])
 274     if c_params["parameter"] in ("core", "frmsize", "ttype"):
 275         c_sel[c_params["parameter"]] = [c_params["value"], ]
 276     else:
 277         c_sel[c_params["parameter"]] = c_params["value"]
 278
 279     c_selection = _create_selection(c_sel)
 280     c_data = select_comp_data(data, c_selection, normalize, remove_outliers)
 281
 282     if r_data.empty or c_data.empty:
 283         return str(), pd.DataFrame()
 284
 285     l_name, l_r_mean, l_r_std, l_c_mean, l_c_std, l_rc_mean, l_rc_std, unit = \
 286         list(), list(), list(), list(), list(), list(), list(), set()
 287     for _, row in r_data.iterrows():
 288         if c_params["parameter"] in ("core", "frmsize", "ttype"):
 289             l_cmp = row["name"].split("-")
 290             if c_params["parameter"] == "core":
 291                 c_row = c_data[
 292                     (c_data.name.str.contains(l_cmp[0])) &
 293                     (c_data.name.str.contains("-".join(l_cmp[2:])))
 294                 ]
 295             elif c_params["parameter"] == "frmsize":
 296                 c_row = c_data[c_data.name.str.contains("-".join(l_cmp[1:]))]
 297             elif c_params["parameter"] == "ttype":
 298                 regex = r"^" + f"{'-'.join(l_cmp[:-1])}" + r"-.{3}$"
 299                 c_row = c_data[c_data.name.str.contains(regex, regex=True)]
 300         else:
 301             c_row = c_data[c_data["name"] == row["name"]]
 302         if not c_row.empty:
 303             unit.add(f"{s_unit_factor}{row['unit']}")
 304             r_mean = row["mean"]
 305             r_std = row["stdev"]
 306             c_mean = c_row["mean"].values[0]
 307             c_std = c_row["stdev"].values[0]
 308             l_name.append(row["name"])
 309             l_r_mean.append(r_mean / unit_factor)
 310             l_r_std.append(r_std / unit_factor)
 311             l_c_mean.append(c_mean / unit_factor)
 312             l_c_std.append(c_std / unit_factor)
 313             delta, d_stdev = relative_change_stdev(r_mean, c_mean, r_std, c_std)
 314             l_rc_mean.append(delta)
 315             l_rc_std.append(d_stdev)
 316
 317     s_unit = "|".join(unit)
 318     df_cmp = pd.DataFrame.from_dict({
 319         "Test Name": l_name,
 320         f"{r_name} Mean [{s_unit}]": l_r_mean,
 321         f"{r_name} Stdev [{s_unit}]": l_r_std,
 322         f"{c_name} Mean [{s_unit}]": l_c_mean,
 323         f"{c_name} Stdev [{s_unit}]": l_c_std,
 324         "Relative Change Mean [%]": l_rc_mean,
 325         "Relative Change Stdev [%]": l_rc_std
 326     })
 327     df_cmp.sort_values(
 328         by="Relative Change Mean [%]",
 329         ascending=False,
 330         inplace=True
 331     )
 332
 333     return (title, df_cmp)