csit.infra.dash/app/cdash/comparisons/tables.py

   1 # Copyright (c) 2023 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """The comparison tables.
  15 """
  16
  17 import pandas as pd
  18
  19 from numpy import mean, std
  20 from copy import deepcopy
  21 from ..utils.constants import Constants as C
  22 from ..utils.utils import relative_change_stdev
  23
  24
  25 def select_comparison_data(
  26         data: pd.DataFrame,
  27         selected: dict,
  28         normalize: bool=False
  29     ) -> pd.DataFrame:
  30     """Select data for a comparison table.
  31
  32     :param data: Data to be filtered for the comparison table.
  33     :param selected: A dictionary with parameters and their values selected by
  34         the user.
  35     :param normalize: If True, the data is normalized to CPU frequency
  36         Constants.NORM_FREQUENCY.
  37     :type data: pandas.DataFrame
  38     :type selected: dict
  39     :type normalize: bool
  40     :returns: A data frame with selected data.
  41     :rtype: pandas.DataFrame
  42     """
  43
  44     def _calculate_statistics(
  45             data_in: pd.DataFrame,
  46             ttype: str,
  47             drv: str,
  48             norm_factor: float
  49         ) -> pd.DataFrame:
  50         """Calculates mean value and standard deviation for provided data.
  51
  52         :param data_in: Input data for calculations.
  53         :param ttype: The test type.
  54         :param drv: The driver.
  55         :param norm_factor: The data normalization factor.
  56         :type data_in: pandas.DataFrame
  57         :type ttype: str
  58         :type drv: str
  59         :type norm_factor: float
  60         :returns: A pandas dataframe with: test name, mean value, standard
  61             deviation and unit.
  62         :rtype: pandas.DataFrame
  63         """
  64         d_data = {
  65             "name": list(),
  66             "mean": list(),
  67             "stdev": list(),
  68             "unit": list()
  69         }
  70         for itm in data_in["test_id"].unique().tolist():
  71             itm_lst = itm.split(".")
  72             test = itm_lst[-1].rsplit("-", 1)[0]
  73             if "hoststack" in itm:
  74                 test_type = f"hoststack-{ttype}"
  75             else:
  76                 test_type = ttype
  77             df = data_in.loc[(data_in["test_id"] == itm)]
  78             l_df = df[C.VALUE_ITER[test_type]].to_list()
  79             if len(l_df) and isinstance(l_df[0], list):
  80                 tmp_df = list()
  81                 for l_itm in l_df:
  82                     tmp_df.extend(l_itm)
  83                 l_df = tmp_df
  84             try:
  85                 mean_val = mean(l_df)
  86                 std_val = std(l_df)
  87             except (TypeError, ValueError):
  88                 continue
  89             d_data["name"].append(f"{test.replace(f'{drv}-', '')}-{ttype}")
  90             d_data["mean"].append(int(mean_val * norm_factor))
  91             d_data["stdev"].append(int(std_val * norm_factor))
  92             d_data["unit"].append(df[C.UNIT[test_type]].to_list()[0])
  93         return pd.DataFrame(d_data)
  94
  95     lst_df = list()
  96     for itm in selected:
  97         if itm["ttype"] in ("NDR", "PDR", "Latency"):
  98             test_type = "ndrpdr"
  99         elif itm["ttype"] in ("CPS", "RPS", "BPS"):
 100             test_type  = "hoststack"
 101         else:
 102             test_type = itm["ttype"].lower()
 103
 104         dutver = itm["dutver"].split("-", 1)  # 0 -> release, 1 -> dut version
 105         tmp_df = pd.DataFrame(data.loc[(
 106             (data["passed"] == True) &
 107             (data["dut_type"] == itm["dut"]) &
 108             (data["dut_version"] == dutver[1]) &
 109             (data["test_type"] == test_type) &
 110             (data["release"] == dutver[0])
 111         )])
 112
 113         drv = "" if itm["driver"] == "dpdk" else itm["driver"].replace("_", "-")
 114         core = str() if itm["dut"] == "trex" else itm["core"].lower()
 115         ttype = "ndrpdr" if itm["ttype"] in ("NDR", "PDR", "Latency") \
 116             else itm["ttype"].lower()
 117         tmp_df = tmp_df[
 118             (tmp_df.job.str.endswith(itm["tbed"])) &
 119             (tmp_df.test_id.str.contains(
 120                 (
 121                     f"^.*[.|-]{itm['nic']}.*{itm['frmsize'].lower()}-"
 122                     f"{core}-{drv}.*-{ttype}$"
 123                 ),
 124                 regex=True
 125             ))
 126         ]
 127         if itm["driver"] == "dpdk":
 128             for drv in C.DRIVERS:
 129                 tmp_df.drop(
 130                     tmp_df[tmp_df.test_id.str.contains(f"-{drv}-")].index,
 131                     inplace=True
 132                 )
 133
 134         # Change the data type from ndrpdr to one of ("NDR", "PDR", "Latency")
 135         if test_type == "ndrpdr":
 136             tmp_df = tmp_df.assign(test_type=itm["ttype"].lower())
 137
 138         if not tmp_df.empty:
 139             if normalize:
 140                 if itm["ttype"] == "Latency":
 141                     norm_factor = C.FREQUENCY[itm["tbed"]] / C.NORM_FREQUENCY
 142                 else:
 143                     norm_factor = C.NORM_FREQUENCY / C.FREQUENCY[itm["tbed"]]
 144             else:
 145                 norm_factor = 1.0
 146             tmp_df = _calculate_statistics(
 147                 tmp_df,
 148                 itm["ttype"].lower(),
 149                 itm["driver"],
 150                 norm_factor
 151             )
 152
 153         lst_df.append(tmp_df)
 154
 155     if len(lst_df) == 1:
 156         df = lst_df[0]
 157     elif len(lst_df) > 1:
 158         df = pd.concat(
 159             lst_df,
 160             ignore_index=True,
 161             copy=False
 162         )
 163     else:
 164         df = pd.DataFrame()
 165
 166     return df
 167
 168
 169 def comparison_table(
 170         data: pd.DataFrame,
 171         selected: dict,
 172         normalize: bool,
 173         format: str="html"
 174     ) -> tuple:
 175     """Generate a comparison table.
 176
 177     :param data: Iterative data for the comparison table.
 178     :param selected: A dictionary with parameters and their values selected by
 179         the user.
 180     :param normalize: If True, the data is normalized to CPU frequency
 181         Constants.NORM_FREQUENCY.
 182     :param format: The output format of the table:
 183         - html: To be displayed on html page, the values are shown in millions
 184           of the unit.
 185         - csv: To be downloaded as a CSV file the values are stored in base
 186           units.
 187     :type data: pandas.DataFrame
 188     :type selected: dict
 189     :type normalize: bool
 190     :type format: str
 191     :returns: A tuple with the tabe title and the comparison table.
 192     :rtype: tuple[str, pandas.DataFrame]
 193     """
 194
 195     def _create_selection(sel: dict) -> list:
 196         """Transform the complex dictionary with user selection to list
 197             of simple items.
 198
 199         :param sel: A complex dictionary with user selection.
 200         :type sel: dict
 201         :returns: A list of simple items.
 202         :rtype: list
 203         """
 204         l_infra = sel["infra"].split("-")
 205         selection = list()
 206         for core in sel["core"]:
 207             for fsize in sel["frmsize"]:
 208                 for ttype in sel["ttype"]:
 209                     selection.append({
 210                         "dut": sel["dut"],
 211                         "dutver": sel["dutver"],
 212                         "tbed": f"{l_infra[0]}-{l_infra[1]}",
 213                         "nic": l_infra[2],
 214                         "driver": l_infra[-1].replace("_", "-"),
 215                         "core": core,
 216                         "frmsize": fsize,
 217                         "ttype": ttype
 218                     })
 219         return selection
 220
 221     r_sel = deepcopy(selected["reference"]["selection"])
 222     c_params = selected["compare"]
 223     r_selection = _create_selection(r_sel)
 224
 225     if format == "html" and "Latency" not in r_sel["ttype"]:
 226         unit_factor, s_unit_factor = (1e6, "M")
 227     else:
 228         unit_factor, s_unit_factor = (1, str())
 229
 230     # Create Table title and titles of columns with data
 231     params = list(r_sel)
 232     params.remove(c_params["parameter"])
 233     lst_title = list()
 234     for param in params:
 235         value = r_sel[param]
 236         if isinstance(value, list):
 237             lst_title.append("|".join(value))
 238         else:
 239             lst_title.append(value)
 240     title = "Comparison for: " + "-".join(lst_title)
 241     r_name = r_sel[c_params["parameter"]]
 242     if isinstance(r_name, list):
 243         r_name = "|".join(r_name)
 244     c_name = c_params["value"]
 245
 246     # Select reference data
 247     r_data = select_comparison_data(data, r_selection, normalize)
 248
 249     # Select compare data
 250     c_sel = deepcopy(selected["reference"]["selection"])
 251     if c_params["parameter"] in ("core", "frmsize", "ttype"):
 252         c_sel[c_params["parameter"]] = [c_params["value"], ]
 253     else:
 254         c_sel[c_params["parameter"]] = c_params["value"]
 255
 256     c_selection = _create_selection(c_sel)
 257     c_data = select_comparison_data(data, c_selection, normalize)
 258
 259     if r_data.empty or c_data.empty:
 260         return str(), pd.DataFrame()
 261
 262     l_name, l_r_mean, l_r_std, l_c_mean, l_c_std, l_rc_mean, l_rc_std, unit = \
 263         list(), list(), list(), list(), list(), list(), list(), set()
 264     for _, row in r_data.iterrows():
 265         if c_params["parameter"] in ("core", "frmsize", "ttype"):
 266             l_cmp = row["name"].split("-")
 267             if c_params["parameter"] == "core":
 268                 c_row = c_data[
 269                     (c_data.name.str.contains(l_cmp[0])) &
 270                     (c_data.name.str.contains("-".join(l_cmp[2:])))
 271                 ]
 272             elif c_params["parameter"] == "frmsize":
 273                 c_row = c_data[c_data.name.str.contains("-".join(l_cmp[1:]))]
 274             elif c_params["parameter"] == "ttype":
 275                 regex = r"^" + f"{'-'.join(l_cmp[:-1])}" + r"-.{3}$"
 276                 c_row = c_data[c_data.name.str.contains(regex, regex=True)]
 277         else:
 278             c_row = c_data[c_data["name"] == row["name"]]
 279         if not c_row.empty:
 280             unit.add(f"{s_unit_factor}{row['unit']}")
 281             r_mean = row["mean"]
 282             r_std = row["stdev"]
 283             c_mean = c_row["mean"].values[0]
 284             c_std = c_row["stdev"].values[0]
 285             l_name.append(row["name"])
 286             l_r_mean.append(r_mean / unit_factor)
 287             l_r_std.append(r_std / unit_factor)
 288             l_c_mean.append(c_mean / unit_factor)
 289             l_c_std.append(c_std / unit_factor)
 290             delta, d_stdev = relative_change_stdev(r_mean, c_mean, r_std, c_std)
 291             l_rc_mean.append(delta)
 292             l_rc_std.append(d_stdev)
 293
 294     s_unit = "|".join(unit)
 295     df_cmp = pd.DataFrame.from_dict({
 296         "Test Name": l_name,
 297         f"{r_name} Mean [{s_unit}]": l_r_mean,
 298         f"{r_name} Stdev [{s_unit}]": l_r_std,
 299         f"{c_name} Mean [{s_unit}]": l_c_mean,
 300         f"{c_name} Stdev [{s_unit}]": l_c_std,
 301         "Relative Change Mean [%]": l_rc_mean,
 302         "Relative Change Stdev [%]": l_rc_std
 303     })
 304     df_cmp.sort_values(
 305         by="Relative Change Mean [%]",
 306         ascending=False,
 307         inplace=True
 308     )
 309
 310     return (title, df_cmp)