csit.infra.dash/app/cdash/comparisons/tables.py

   1 # Copyright (c) 2023 Cisco and/or its affiliates.
   2 # Licensed under the Apache License, Version 2.0 (the "License");
   3 # you may not use this file except in compliance with the License.
   4 # You may obtain a copy of the License at:
   5 #
   6 #     http://www.apache.org/licenses/LICENSE-2.0
   7 #
   8 # Unless required by applicable law or agreed to in writing, software
   9 # distributed under the License is distributed on an "AS IS" BASIS,
  10 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  11 # See the License for the specific language governing permissions and
  12 # limitations under the License.
  13
  14 """The comparison tables.
  15 """
  16
  17 import pandas as pd
  18
  19 from numpy import mean, std
  20 from copy import deepcopy
  21 from ..utils.constants import Constants as C
  22 from ..utils.utils import relative_change_stdev
  23
  24
  25 def select_comparison_data(
  26         data: pd.DataFrame,
  27         selected: dict,
  28         normalize: bool=False
  29     ) -> pd.DataFrame:
  30     """Select data for a comparison table.
  31
  32     :param data: Data to be filtered for the comparison table.
  33     :param selected: A dictionary with parameters and their values selected by
  34         the user.
  35     :param normalize: If True, the data is normalized to CPU frequency
  36         Constants.NORM_FREQUENCY.
  37     :type data: pandas.DataFrame
  38     :type selected: dict
  39     :type normalize: bool
  40     :returns: A data frame with selected data.
  41     :rtype: pandas.DataFrame
  42     """
  43
  44     def _calculate_statistics(
  45             data_in: pd.DataFrame,
  46             ttype: str,
  47             drv: str,
  48             norm_factor: float
  49         ) -> pd.DataFrame:
  50         """Calculates mean value and standard deviation for provided data.
  51
  52         :param data_in: Input data for calculations.
  53         :param ttype: The test type.
  54         :param drv: The driver.
  55         :param norm_factor: The data normalization factor.
  56         :type data_in: pandas.DataFrame
  57         :type ttype: str
  58         :type drv: str
  59         :type norm_factor: float
  60         :returns: A pandas dataframe with: test name, mean value, standard
  61             deviation and unit.
  62         :rtype: pandas.DataFrame
  63         """
  64         d_data = {
  65             "name": list(),
  66             "mean": list(),
  67             "stdev": list(),
  68             "unit": list()
  69         }
  70         for itm in data_in["test_id"].unique().tolist():
  71             itm_lst = itm.split(".")
  72             test = itm_lst[-1].rsplit("-", 1)[0]
  73             df = data_in.loc[(data_in["test_id"] == itm)]
  74             l_df = df[C.VALUE_ITER[ttype]].to_list()
  75             if len(l_df) and isinstance(l_df[0], list):
  76                 tmp_df = list()
  77                 for l_itm in l_df:
  78                     tmp_df.extend(l_itm)
  79                 l_df = tmp_df
  80             d_data["name"].append(f"{test.replace(f'{drv}-', '')}-{ttype}")
  81             d_data["mean"].append(int(mean(l_df) * norm_factor))
  82             d_data["stdev"].append(int(std(l_df) * norm_factor))
  83             d_data["unit"].append(df[C.UNIT[ttype]].to_list()[0])
  84         return pd.DataFrame(d_data)
  85
  86     lst_df = list()
  87     for itm in selected:
  88         if itm["ttype"] in ("NDR", "PDR"):
  89             test_type = "ndrpdr"
  90         else:
  91             test_type = itm["ttype"].lower()
  92
  93         dutver = itm["dutver"].split("-", 1)  # 0 -> release, 1 -> dut version
  94         tmp_df = pd.DataFrame(data.loc[(
  95             (data["passed"] == True) &
  96             (data["dut_type"] == itm["dut"]) &
  97             (data["dut_version"] == dutver[1]) &
  98             (data["test_type"] == test_type) &
  99             (data["release"] == dutver[0])
 100         )])
 101
 102         drv = "" if itm["driver"] == "dpdk" else itm["driver"].replace("_", "-")
 103         core = str() if itm["dut"] == "trex" else itm["core"].lower()
 104         reg_id = \
 105             f"^.*[.|-]{itm['nic']}.*{itm['frmsize'].lower()}-{core}-{drv}.*$"
 106         tmp_df = tmp_df[
 107             (tmp_df.job.str.endswith(itm["tbed"])) &
 108             (tmp_df.test_id.str.contains(reg_id, regex=True))
 109         ]
 110         if itm["driver"] == "dpdk":
 111             for drv in C.DRIVERS:
 112                 tmp_df.drop(
 113                     tmp_df[tmp_df.test_id.str.contains(f"-{drv}-")].index,
 114                     inplace=True
 115                 )
 116
 117         # Change the data type from ndrpdr to one of ("NDR", "PDR")
 118         if test_type == "ndrpdr":
 119             tmp_df = tmp_df.assign(test_type=itm["ttype"].lower())
 120
 121         if not tmp_df.empty:
 122             tmp_df = _calculate_statistics(
 123                 tmp_df,
 124                 itm["ttype"].lower(),
 125                 itm["driver"],
 126                 C.NORM_FREQUENCY / C.FREQUENCY[itm["tbed"]] if normalize else 1
 127             )
 128
 129         lst_df.append(tmp_df)
 130
 131     if len(lst_df) == 1:
 132         df = lst_df[0]
 133     elif len(lst_df) > 1:
 134         df = pd.concat(
 135             lst_df,
 136             ignore_index=True,
 137             copy=False
 138         )
 139     else:
 140         df = pd.DataFrame()
 141
 142     return df
 143
 144
 145 def comparison_table(
 146         data: pd.DataFrame,
 147         selected: dict,
 148         normalize: bool,
 149         format: str="html"
 150     ) -> tuple:
 151     """Generate a comparison table.
 152
 153     :param data: Iterative data for the comparison table.
 154     :param selected: A dictionary with parameters and their values selected by
 155         the user.
 156     :param normalize: If True, the data is normalized to CPU frequency
 157         Constants.NORM_FREQUENCY.
 158     :param format: The output format of the table:
 159         - html: To be displayed on html page, the values are shown in millions
 160           of the unit.
 161         - csv: To be downloaded as a CSV file the values are stored in base
 162           units.
 163     :type data: pandas.DataFrame
 164     :type selected: dict
 165     :type normalize: bool
 166     :type format: str
 167     :returns: A tuple with the tabe title and the comparison table.
 168     :rtype: tuple[str, pandas.DataFrame]
 169     """
 170
 171     def _create_selection(sel: dict) -> list:
 172         """Transform the complex dictionary with user selection to list
 173             of simple items.
 174
 175         :param sel: A complex dictionary with user selection.
 176         :type sel: dict
 177         :returns: A list of simple items.
 178         :rtype: list
 179         """
 180         l_infra = sel["infra"].split("-")
 181         selection = list()
 182         for core in sel["core"]:
 183             for fsize in sel["frmsize"]:
 184                 for ttype in sel["ttype"]:
 185                     selection.append({
 186                         "dut": sel["dut"],
 187                         "dutver": sel["dutver"],
 188                         "tbed": f"{l_infra[0]}-{l_infra[1]}",
 189                         "nic": l_infra[2],
 190                         "driver": l_infra[-1].replace("_", "-"),
 191                         "core": core,
 192                         "frmsize": fsize,
 193                         "ttype": ttype
 194                     })
 195         return selection
 196
 197     unit_factor, s_unit_factor = (1e6, "M") if format == "html" else (1, str())
 198
 199     r_sel = deepcopy(selected["reference"]["selection"])
 200     c_params = selected["compare"]
 201     r_selection = _create_selection(r_sel)
 202
 203     # Create Table title and titles of columns with data
 204     params = list(r_sel)
 205     params.remove(c_params["parameter"])
 206     lst_title = list()
 207     for param in params:
 208         value = r_sel[param]
 209         if isinstance(value, list):
 210             lst_title.append("|".join(value))
 211         else:
 212             lst_title.append(value)
 213     title = "Comparison for: " + "-".join(lst_title)
 214     r_name = r_sel[c_params["parameter"]]
 215     if isinstance(r_name, list):
 216         r_name = "|".join(r_name)
 217     c_name = c_params["value"]
 218
 219     # Select reference data
 220     r_data = select_comparison_data(data, r_selection, normalize)
 221
 222     # Select compare data
 223     c_sel = deepcopy(selected["reference"]["selection"])
 224     if c_params["parameter"] in ("core", "frmsize", "ttype"):
 225         c_sel[c_params["parameter"]] = [c_params["value"], ]
 226     else:
 227         c_sel[c_params["parameter"]] = c_params["value"]
 228
 229     c_selection = _create_selection(c_sel)
 230     c_data = select_comparison_data(data, c_selection, normalize)
 231
 232     if r_data.empty or c_data.empty:
 233         return str(), pd.DataFrame()
 234
 235     l_name, l_r_mean, l_r_std, l_c_mean, l_c_std, l_rc_mean, l_rc_std, unit = \
 236         list(), list(), list(), list(), list(), list(), list(), set()
 237     for _, row in r_data.iterrows():
 238         if c_params["parameter"] in ("core", "frmsize", "ttype"):
 239             l_cmp = row["name"].split("-")
 240             if c_params["parameter"] == "core":
 241                 c_row = c_data[
 242                     (c_data.name.str.contains(l_cmp[0])) &
 243                     (c_data.name.str.contains("-".join(l_cmp[2:])))
 244                 ]
 245             elif c_params["parameter"] == "frmsize":
 246                 c_row = c_data[c_data.name.str.contains("-".join(l_cmp[1:]))]
 247             elif c_params["parameter"] == "ttype":
 248                 regex = r"^" + f"{'-'.join(l_cmp[:-1])}" + r"-.{3}$"
 249                 c_row = c_data[c_data.name.str.contains(regex, regex=True)]
 250         else:
 251             c_row = c_data[c_data["name"] == row["name"]]
 252         if not c_row.empty:
 253             unit.add(f"{s_unit_factor}{row['unit']}")
 254             r_mean = row["mean"]
 255             r_std = row["stdev"]
 256             c_mean = c_row["mean"].values[0]
 257             c_std = c_row["stdev"].values[0]
 258             l_name.append(row["name"])
 259             l_r_mean.append(r_mean / unit_factor)
 260             l_r_std.append(r_std / unit_factor)
 261             l_c_mean.append(c_mean / unit_factor)
 262             l_c_std.append(c_std / unit_factor)
 263             delta, d_stdev = relative_change_stdev(r_mean, c_mean, r_std, c_std)
 264             l_rc_mean.append(delta)
 265             l_rc_std.append(d_stdev)
 266
 267     s_unit = "|".join(unit)
 268     df_cmp = pd.DataFrame.from_dict({
 269         "Test Name": l_name,
 270         f"{r_name} Mean [{s_unit}]": l_r_mean,
 271         f"{r_name} Stdev [{s_unit}]": l_r_std,
 272         f"{c_name} Mean [{s_unit}]": l_c_mean,
 273         f"{c_name} Stdev [{s_unit}]": l_c_std,
 274         "Relative Change Mean [%]": l_rc_mean,
 275         "Relative Change Stdev [%]": l_rc_std
 276     })
 277     df_cmp.sort_values(
 278         by="Relative Change Mean [%]",
 279         ascending=False,
 280         inplace=True
 281     )
 282
 283     return (title, df_cmp)