csit.infra.dash/app/cdash/comparisons/tables.py

   1 # Copyright (c) 2023 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """The comparison tables.
  15 """
  16
  17 import pandas as pd
  18
  19 from numpy import mean, std
  20 from copy import deepcopy
  21 from ..utils.constants import Constants as C
  22 from ..utils.utils import relative_change_stdev
  23
  24
  25 def select_comparison_data(
  26         data: pd.DataFrame,
  27         selected: dict,
  28         normalize: bool=False
  29     ) -> pd.DataFrame:
  30     """Select data for a comparison table.
  31
  32     :param data: Data to be filtered for the comparison table.
  33     :param selected: A dictionary with parameters and their values selected by
  34         the user.
  35     :param normalize: If True, the data is normalized to CPU frequency
  36         Constants.NORM_FREQUENCY.
  37     :type data: pandas.DataFrame
  38     :type selected: dict
  39     :type normalize: bool
  40     :returns: A data frame with selected data.
  41     :rtype: pandas.DataFrame
  42     """
  43
  44     def _calculate_statistics(
  45             data_in: pd.DataFrame,
  46             ttype: str,
  47             drv: str,
  48             norm_factor: float
  49         ) -> pd.DataFrame:
  50         """Calculates mean value and standard deviation for provided data.
  51
  52         :param data_in: Input data for calculations.
  53         :param ttype: The test type.
  54         :param drv: The driver.
  55         :param norm_factor: The data normalization factor.
  56         :type data_in: pandas.DataFrame
  57         :type ttype: str
  58         :type drv: str
  59         :type norm_factor: float
  60         :returns: A pandas dataframe with: test name, mean value, standard
  61             deviation and unit.
  62         :rtype: pandas.DataFrame
  63         """
  64         d_data = {
  65             "name": list(),
  66             "mean": list(),
  67             "stdev": list(),
  68             "unit": list()
  69         }
  70         for itm in data_in["test_id"].unique().tolist():
  71             itm_lst = itm.split(".")
  72             test = itm_lst[-1].rsplit("-", 1)[0]
  73             df = data_in.loc[(data_in["test_id"] == itm)]
  74             l_df = df[C.VALUE_ITER[ttype]].to_list()
  75             if len(l_df) and isinstance(l_df[0], list):
  76                 tmp_df = list()
  77                 for l_itm in l_df:
  78                     tmp_df.extend(l_itm)
  79                 l_df = tmp_df
  80             try:
  81                 mean_val = mean(l_df)
  82                 std_val = std(l_df)
  83             except (TypeError, ValueError):
  84                 continue
  85             d_data["name"].append(f"{test.replace(f'{drv}-', '')}-{ttype}")
  86             d_data["mean"].append(int(mean_val * norm_factor))
  87             d_data["stdev"].append(int(std_val * norm_factor))
  88             d_data["unit"].append(df[C.UNIT[ttype]].to_list()[0])
  89         return pd.DataFrame(d_data)
  90
  91     lst_df = list()
  92     for itm in selected:
  93         if itm["ttype"] in ("NDR", "PDR", "Latency"):
  94             test_type = "ndrpdr"
  95         elif itm["ttype"] in ("CPS", "RPS", "BPS"):
  96             test_type  = "hoststack"
  97         else:
  98             test_type = itm["ttype"].lower()
  99
 100         dutver = itm["dutver"].split("-", 1)  # 0 -> release, 1 -> dut version
 101         tmp_df = pd.DataFrame(data.loc[(
 102             (data["passed"] == True) &
 103             (data["dut_type"] == itm["dut"]) &
 104             (data["dut_version"] == dutver[1]) &
 105             (data["test_type"] == test_type) &
 106             (data["release"] == dutver[0])
 107         )])
 108
 109         drv = "" if itm["driver"] == "dpdk" else itm["driver"].replace("_", "-")
 110         core = str() if itm["dut"] == "trex" else itm["core"].lower()
 111         reg_id = \
 112             f"^.*[.|-]{itm['nic']}.*{itm['frmsize'].lower()}-{core}-{drv}.*$"
 113         tmp_df = tmp_df[
 114             (tmp_df.job.str.endswith(itm["tbed"])) &
 115             (tmp_df.test_id.str.contains(reg_id, regex=True))
 116         ]
 117         if itm["driver"] == "dpdk":
 118             for drv in C.DRIVERS:
 119                 tmp_df.drop(
 120                     tmp_df[tmp_df.test_id.str.contains(f"-{drv}-")].index,
 121                     inplace=True
 122                 )
 123
 124         # Change the data type from ndrpdr to one of ("NDR", "PDR", "Latency")
 125         if test_type == "ndrpdr":
 126             tmp_df = tmp_df.assign(test_type=itm["ttype"].lower())
 127
 128         if not tmp_df.empty:
 129             if normalize:
 130                 if itm["ttype"] == "Latency":
 131                     norm_factor = C.FREQUENCY[itm["tbed"]] / C.NORM_FREQUENCY
 132                 else:
 133                     norm_factor = C.NORM_FREQUENCY / C.FREQUENCY[itm["tbed"]]
 134             else:
 135                 norm_factor = 1.0
 136             tmp_df = _calculate_statistics(
 137                 tmp_df,
 138                 itm["ttype"].lower(),
 139                 itm["driver"],
 140                 norm_factor
 141             )
 142
 143         lst_df.append(tmp_df)
 144
 145     if len(lst_df) == 1:
 146         df = lst_df[0]
 147     elif len(lst_df) > 1:
 148         df = pd.concat(
 149             lst_df,
 150             ignore_index=True,
 151             copy=False
 152         )
 153     else:
 154         df = pd.DataFrame()
 155
 156     return df
 157
 158
 159 def comparison_table(
 160         data: pd.DataFrame,
 161         selected: dict,
 162         normalize: bool,
 163         format: str="html"
 164     ) -> tuple:
 165     """Generate a comparison table.
 166
 167     :param data: Iterative data for the comparison table.
 168     :param selected: A dictionary with parameters and their values selected by
 169         the user.
 170     :param normalize: If True, the data is normalized to CPU frequency
 171         Constants.NORM_FREQUENCY.
 172     :param format: The output format of the table:
 173         - html: To be displayed on html page, the values are shown in millions
 174           of the unit.
 175         - csv: To be downloaded as a CSV file the values are stored in base
 176           units.
 177     :type data: pandas.DataFrame
 178     :type selected: dict
 179     :type normalize: bool
 180     :type format: str
 181     :returns: A tuple with the tabe title and the comparison table.
 182     :rtype: tuple[str, pandas.DataFrame]
 183     """
 184
 185     def _create_selection(sel: dict) -> list:
 186         """Transform the complex dictionary with user selection to list
 187             of simple items.
 188
 189         :param sel: A complex dictionary with user selection.
 190         :type sel: dict
 191         :returns: A list of simple items.
 192         :rtype: list
 193         """
 194         l_infra = sel["infra"].split("-")
 195         selection = list()
 196         for core in sel["core"]:
 197             for fsize in sel["frmsize"]:
 198                 for ttype in sel["ttype"]:
 199                     selection.append({
 200                         "dut": sel["dut"],
 201                         "dutver": sel["dutver"],
 202                         "tbed": f"{l_infra[0]}-{l_infra[1]}",
 203                         "nic": l_infra[2],
 204                         "driver": l_infra[-1].replace("_", "-"),
 205                         "core": core,
 206                         "frmsize": fsize,
 207                         "ttype": ttype
 208                     })
 209         return selection
 210
 211     r_sel = deepcopy(selected["reference"]["selection"])
 212     c_params = selected["compare"]
 213     r_selection = _create_selection(r_sel)
 214
 215     if format == "html" and "Latency" not in r_sel["ttype"]:
 216         unit_factor, s_unit_factor = (1e6, "M")
 217     else:
 218         unit_factor, s_unit_factor = (1, str())
 219
 220     # Create Table title and titles of columns with data
 221     params = list(r_sel)
 222     params.remove(c_params["parameter"])
 223     lst_title = list()
 224     for param in params:
 225         value = r_sel[param]
 226         if isinstance(value, list):
 227             lst_title.append("|".join(value))
 228         else:
 229             lst_title.append(value)
 230     title = "Comparison for: " + "-".join(lst_title)
 231     r_name = r_sel[c_params["parameter"]]
 232     if isinstance(r_name, list):
 233         r_name = "|".join(r_name)
 234     c_name = c_params["value"]
 235
 236     # Select reference data
 237     r_data = select_comparison_data(data, r_selection, normalize)
 238
 239     # Select compare data
 240     c_sel = deepcopy(selected["reference"]["selection"])
 241     if c_params["parameter"] in ("core", "frmsize", "ttype"):
 242         c_sel[c_params["parameter"]] = [c_params["value"], ]
 243     else:
 244         c_sel[c_params["parameter"]] = c_params["value"]
 245
 246     c_selection = _create_selection(c_sel)
 247     c_data = select_comparison_data(data, c_selection, normalize)
 248
 249     if r_data.empty or c_data.empty:
 250         return str(), pd.DataFrame()
 251
 252     l_name, l_r_mean, l_r_std, l_c_mean, l_c_std, l_rc_mean, l_rc_std, unit = \
 253         list(), list(), list(), list(), list(), list(), list(), set()
 254     for _, row in r_data.iterrows():
 255         if c_params["parameter"] in ("core", "frmsize", "ttype"):
 256             l_cmp = row["name"].split("-")
 257             if c_params["parameter"] == "core":
 258                 c_row = c_data[
 259                     (c_data.name.str.contains(l_cmp[0])) &
 260                     (c_data.name.str.contains("-".join(l_cmp[2:])))
 261                 ]
 262             elif c_params["parameter"] == "frmsize":
 263                 c_row = c_data[c_data.name.str.contains("-".join(l_cmp[1:]))]
 264             elif c_params["parameter"] == "ttype":
 265                 regex = r"^" + f"{'-'.join(l_cmp[:-1])}" + r"-.{3}$"
 266                 c_row = c_data[c_data.name.str.contains(regex, regex=True)]
 267         else:
 268             c_row = c_data[c_data["name"] == row["name"]]
 269         if not c_row.empty:
 270             unit.add(f"{s_unit_factor}{row['unit']}")
 271             r_mean = row["mean"]
 272             r_std = row["stdev"]
 273             c_mean = c_row["mean"].values[0]
 274             c_std = c_row["stdev"].values[0]
 275             l_name.append(row["name"])
 276             l_r_mean.append(r_mean / unit_factor)
 277             l_r_std.append(r_std / unit_factor)
 278             l_c_mean.append(c_mean / unit_factor)
 279             l_c_std.append(c_std / unit_factor)
 280             delta, d_stdev = relative_change_stdev(r_mean, c_mean, r_std, c_std)
 281             l_rc_mean.append(delta)
 282             l_rc_std.append(d_stdev)
 283
 284     s_unit = "|".join(unit)
 285     df_cmp = pd.DataFrame.from_dict({
 286         "Test Name": l_name,
 287         f"{r_name} Mean [{s_unit}]": l_r_mean,
 288         f"{r_name} Stdev [{s_unit}]": l_r_std,
 289         f"{c_name} Mean [{s_unit}]": l_c_mean,
 290         f"{c_name} Stdev [{s_unit}]": l_c_std,
 291         "Relative Change Mean [%]": l_rc_mean,
 292         "Relative Change Stdev [%]": l_rc_std
 293     })
 294     df_cmp.sort_values(
 295         by="Relative Change Mean [%]",
 296         ascending=False,
 297         inplace=True
 298     )
 299
 300     return (title, df_cmp)