csit.infra.dash/app/cdash/comparisons/tables.py

   1 # Copyright (c) 2024 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """The comparison tables.
  15 """
  16
  17 import pandas as pd
  18
  19 from numpy import mean, std
  20 from copy import deepcopy
  21
  22 from ..utils.constants import Constants as C
  23 from ..utils.utils import relative_change_stdev
  24
  25
  26 def select_comparison_data(
  27         data: pd.DataFrame,
  28         selected: dict,
  29         normalize: bool=False
  30     ) -> pd.DataFrame:
  31     """Select data for a comparison table.
  32
  33     :param data: Data to be filtered for the comparison table.
  34     :param selected: A dictionary with parameters and their values selected by
  35         the user.
  36     :param normalize: If True, the data is normalized to CPU frequency
  37         Constants.NORM_FREQUENCY.
  38     :type data: pandas.DataFrame
  39     :type selected: dict
  40     :type normalize: bool
  41     :returns: A data frame with selected data.
  42     :rtype: pandas.DataFrame
  43     """
  44
  45     def _calculate_statistics(
  46             data_in: pd.DataFrame,
  47             ttype: str,
  48             drv: str,
  49             norm_factor: float
  50         ) -> pd.DataFrame:
  51         """Calculates mean value and standard deviation for provided data.
  52
  53         :param data_in: Input data for calculations.
  54         :param ttype: The test type.
  55         :param drv: The driver.
  56         :param norm_factor: The data normalization factor.
  57         :type data_in: pandas.DataFrame
  58         :type ttype: str
  59         :type drv: str
  60         :type norm_factor: float
  61         :returns: A pandas dataframe with: test name, mean value, standard
  62             deviation and unit.
  63         :rtype: pandas.DataFrame
  64         """
  65         d_data = {
  66             "name": list(),
  67             "mean": list(),
  68             "stdev": list(),
  69             "unit": list()
  70         }
  71         for itm in data_in["test_id"].unique().tolist():
  72             itm_lst = itm.split(".")
  73             test = itm_lst[-1].rsplit("-", 1)[0]
  74             if "hoststack" in itm:
  75                 test_type = f"hoststack-{ttype}"
  76             else:
  77                 test_type = ttype
  78             df = data_in.loc[(data_in["test_id"] == itm)]
  79             l_df = df[C.VALUE_ITER[test_type]].to_list()
  80             if len(l_df) and isinstance(l_df[0], list):
  81                 tmp_df = list()
  82                 for l_itm in l_df:
  83                     tmp_df.extend(l_itm)
  84                 l_df = tmp_df
  85             try:
  86                 mean_val = mean(l_df)
  87                 std_val = std(l_df)
  88             except (TypeError, ValueError):
  89                 continue
  90             d_data["name"].append(f"{test.replace(f'{drv}-', '')}-{ttype}")
  91             d_data["mean"].append(int(mean_val * norm_factor))
  92             d_data["stdev"].append(int(std_val * norm_factor))
  93             d_data["unit"].append(df[C.UNIT[test_type]].to_list()[0])
  94         return pd.DataFrame(d_data)
  95
  96     lst_df = list()
  97     for itm in selected:
  98         if itm["ttype"] in ("NDR", "PDR", "Latency"):
  99             test_type = "ndrpdr"
 100         elif itm["ttype"] in ("CPS", "RPS", "BPS"):
 101             test_type  = "hoststack"
 102         else:
 103             test_type = itm["ttype"].lower()
 104
 105         dutver = itm["dutver"].split("-", 1)  # 0 -> release, 1 -> dut version
 106         tmp_df = pd.DataFrame(data.loc[(
 107             (data["passed"] == True) &
 108             (data["dut_type"] == itm["dut"]) &
 109             (data["dut_version"] == dutver[1]) &
 110             (data["test_type"] == test_type) &
 111             (data["release"] == dutver[0])
 112         )])
 113
 114         drv = "" if itm["driver"] == "dpdk" else itm["driver"].replace("_", "-")
 115         core = str() if itm["dut"] == "trex" else itm["core"].lower()
 116         ttype = "ndrpdr" if itm["ttype"] in ("NDR", "PDR", "Latency") \
 117             else itm["ttype"].lower()
 118         tmp_df = tmp_df[
 119             (tmp_df.job.str.endswith(itm["tbed"])) &
 120             (tmp_df.test_id.str.contains(
 121                 (
 122                     f"^.*[.|-]{itm['nic']}.*{itm['frmsize'].lower()}-"
 123                     f"{core}-{drv}.*-{ttype}$"
 124                 ),
 125                 regex=True
 126             ))
 127         ]
 128         if itm["driver"] == "dpdk":
 129             for drv in C.DRIVERS:
 130                 tmp_df.drop(
 131                     tmp_df[tmp_df.test_id.str.contains(f"-{drv}-")].index,
 132                     inplace=True
 133                 )
 134
 135         # Change the data type from ndrpdr to one of ("NDR", "PDR", "Latency")
 136         if test_type == "ndrpdr":
 137             tmp_df = tmp_df.assign(test_type=itm["ttype"].lower())
 138
 139         if not tmp_df.empty:
 140             if normalize:
 141                 if itm["ttype"] == "Latency":
 142                     norm_factor = C.FREQUENCY[itm["tbed"]] / C.NORM_FREQUENCY
 143                 else:
 144                     norm_factor = C.NORM_FREQUENCY / C.FREQUENCY[itm["tbed"]]
 145             else:
 146                 norm_factor = 1.0
 147             tmp_df = _calculate_statistics(
 148                 tmp_df,
 149                 itm["ttype"].lower(),
 150                 itm["driver"],
 151                 norm_factor
 152             )
 153
 154         lst_df.append(tmp_df)
 155
 156     if len(lst_df) == 1:
 157         df = lst_df[0]
 158     elif len(lst_df) > 1:
 159         df = pd.concat(
 160             lst_df,
 161             ignore_index=True,
 162             copy=False
 163         )
 164     else:
 165         df = pd.DataFrame()
 166
 167     return df
 168
 169
 170 def comparison_table(
 171         data: pd.DataFrame,
 172         selected: dict,
 173         normalize: bool,
 174         format: str="html"
 175     ) -> tuple:
 176     """Generate a comparison table.
 177
 178     :param data: Iterative data for the comparison table.
 179     :param selected: A dictionary with parameters and their values selected by
 180         the user.
 181     :param normalize: If True, the data is normalized to CPU frequency
 182         Constants.NORM_FREQUENCY.
 183     :param format: The output format of the table:
 184         - html: To be displayed on html page, the values are shown in millions
 185           of the unit.
 186         - csv: To be downloaded as a CSV file the values are stored in base
 187           units.
 188     :type data: pandas.DataFrame
 189     :type selected: dict
 190     :type normalize: bool
 191     :type format: str
 192     :returns: A tuple with the tabe title and the comparison table.
 193     :rtype: tuple[str, pandas.DataFrame]
 194     """
 195
 196     def _create_selection(sel: dict) -> list:
 197         """Transform the complex dictionary with user selection to list
 198             of simple items.
 199
 200         :param sel: A complex dictionary with user selection.
 201         :type sel: dict
 202         :returns: A list of simple items.
 203         :rtype: list
 204         """
 205         l_infra = sel["infra"].split("-")
 206         selection = list()
 207         for core in sel["core"]:
 208             for fsize in sel["frmsize"]:
 209                 for ttype in sel["ttype"]:
 210                     selection.append({
 211                         "dut": sel["dut"],
 212                         "dutver": sel["dutver"],
 213                         "tbed": f"{l_infra[0]}-{l_infra[1]}",
 214                         "nic": l_infra[2],
 215                         "driver": l_infra[-1].replace("_", "-"),
 216                         "core": core,
 217                         "frmsize": fsize,
 218                         "ttype": ttype
 219                     })
 220         return selection
 221
 222     r_sel = deepcopy(selected["reference"]["selection"])
 223     c_params = selected["compare"]
 224     r_selection = _create_selection(r_sel)
 225
 226     if format == "html" and "Latency" not in r_sel["ttype"]:
 227         unit_factor, s_unit_factor = (1e6, "M")
 228     else:
 229         unit_factor, s_unit_factor = (1, str())
 230
 231     # Create Table title and titles of columns with data
 232     params = list(r_sel)
 233     params.remove(c_params["parameter"])
 234     lst_title = list()
 235     for param in params:
 236         value = r_sel[param]
 237         if isinstance(value, list):
 238             lst_title.append("|".join(value))
 239         else:
 240             lst_title.append(value)
 241     title = "Comparison for: " + "-".join(lst_title)
 242     r_name = r_sel[c_params["parameter"]]
 243     if isinstance(r_name, list):
 244         r_name = "|".join(r_name)
 245     c_name = c_params["value"]
 246
 247     # Select reference data
 248     r_data = select_comparison_data(data, r_selection, normalize)
 249
 250     # Select compare data
 251     c_sel = deepcopy(selected["reference"]["selection"])
 252     if c_params["parameter"] in ("core", "frmsize", "ttype"):
 253         c_sel[c_params["parameter"]] = [c_params["value"], ]
 254     else:
 255         c_sel[c_params["parameter"]] = c_params["value"]
 256
 257     c_selection = _create_selection(c_sel)
 258     c_data = select_comparison_data(data, c_selection, normalize)
 259
 260     if r_data.empty or c_data.empty:
 261         return str(), pd.DataFrame()
 262
 263     l_name, l_r_mean, l_r_std, l_c_mean, l_c_std, l_rc_mean, l_rc_std, unit = \
 264         list(), list(), list(), list(), list(), list(), list(), set()
 265     for _, row in r_data.iterrows():
 266         if c_params["parameter"] in ("core", "frmsize", "ttype"):
 267             l_cmp = row["name"].split("-")
 268             if c_params["parameter"] == "core":
 269                 c_row = c_data[
 270                     (c_data.name.str.contains(l_cmp[0])) &
 271                     (c_data.name.str.contains("-".join(l_cmp[2:])))
 272                 ]
 273             elif c_params["parameter"] == "frmsize":
 274                 c_row = c_data[c_data.name.str.contains("-".join(l_cmp[1:]))]
 275             elif c_params["parameter"] == "ttype":
 276                 regex = r"^" + f"{'-'.join(l_cmp[:-1])}" + r"-.{3}$"
 277                 c_row = c_data[c_data.name.str.contains(regex, regex=True)]
 278         else:
 279             c_row = c_data[c_data["name"] == row["name"]]
 280         if not c_row.empty:
 281             unit.add(f"{s_unit_factor}{row['unit']}")
 282             r_mean = row["mean"]
 283             r_std = row["stdev"]
 284             c_mean = c_row["mean"].values[0]
 285             c_std = c_row["stdev"].values[0]
 286             l_name.append(row["name"])
 287             l_r_mean.append(r_mean / unit_factor)
 288             l_r_std.append(r_std / unit_factor)
 289             l_c_mean.append(c_mean / unit_factor)
 290             l_c_std.append(c_std / unit_factor)
 291             delta, d_stdev = relative_change_stdev(r_mean, c_mean, r_std, c_std)
 292             l_rc_mean.append(delta)
 293             l_rc_std.append(d_stdev)
 294
 295     s_unit = "|".join(unit)
 296     df_cmp = pd.DataFrame.from_dict({
 297         "Test Name": l_name,
 298         f"{r_name} Mean [{s_unit}]": l_r_mean,
 299         f"{r_name} Stdev [{s_unit}]": l_r_std,
 300         f"{c_name} Mean [{s_unit}]": l_c_mean,
 301         f"{c_name} Stdev [{s_unit}]": l_c_std,
 302         "Relative Change Mean [%]": l_rc_mean,
 303         "Relative Change Stdev [%]": l_rc_std
 304     })
 305     df_cmp.sort_values(
 306         by="Relative Change Mean [%]",
 307         ascending=False,
 308         inplace=True
 309     )
 310
 311     return (title, df_cmp)