diff options
Diffstat (limited to 'csit.infra.dash/app/cdash/comparisons/tables.py')
-rw-r--r-- | csit.infra.dash/app/cdash/comparisons/tables.py | 348 |
1 files changed, 348 insertions, 0 deletions
diff --git a/csit.infra.dash/app/cdash/comparisons/tables.py b/csit.infra.dash/app/cdash/comparisons/tables.py new file mode 100644 index 0000000000..0e32f38b6c --- /dev/null +++ b/csit.infra.dash/app/cdash/comparisons/tables.py @@ -0,0 +1,348 @@ +# Copyright (c) 2024 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""The comparison tables. +""" + +import pandas as pd + +from numpy import mean, std, percentile +from copy import deepcopy + +from ..utils.constants import Constants as C +from ..utils.utils import relative_change_stdev + + +def select_comp_data( + data: pd.DataFrame, + selected: dict, + normalize: bool=False, + remove_outliers: bool=False, + raw_data: bool=False + ) -> pd.DataFrame: + """Select data for a comparison table. + + :param data: Data to be filtered for the comparison table. + :param selected: A dictionary with parameters and their values selected by + the user. + :param normalize: If True, the data is normalized to CPU frequency + Constants.NORM_FREQUENCY. + :param remove_outliers: If True the outliers are removed before + generating the table. + :param raw_data: If True, returns data as it is in parquets without any + processing. It is used for "download raw data" feature. + :type data: pandas.DataFrame + :type selected: dict + :type normalize: bool + :type remove_outliers: bool + :type raw_data: bool + :returns: A data frame with selected data. + :rtype: pandas.DataFrame + """ + + def _calculate_statistics( + data_in: pd.DataFrame, + ttype: str, + drv: str, + norm_factor: float, + remove_outliers: bool=False + ) -> pd.DataFrame: + """Calculates mean value and standard deviation for provided data. + + :param data_in: Input data for calculations. + :param ttype: The test type. + :param drv: The driver. + :param norm_factor: The data normalization factor. + :param remove_outliers: If True the outliers are removed before + generating the table. + :type data_in: pandas.DataFrame + :type ttype: str + :type drv: str + :type norm_factor: float + :type remove_outliers: bool + :returns: A pandas dataframe with: test name, mean value, standard + deviation and unit. + :rtype: pandas.DataFrame + """ + d_data = { + "name": list(), + "mean": list(), + "stdev": list(), + "unit": list() + } + for itm in data_in["test_id"].unique().tolist(): + itm_lst = itm.split(".") + test = itm_lst[-1].rsplit("-", 1)[0] + if "hoststack" in itm: + test_type = f"hoststack-{ttype}" + else: + test_type = ttype + df = data_in.loc[(data_in["test_id"] == itm)] + l_df = df[C.VALUE_ITER[test_type]].to_list() + if len(l_df) and isinstance(l_df[0], list): + tmp_df = list() + for l_itm in l_df: + tmp_df.extend(l_itm) + l_df = tmp_df + + try: + if remove_outliers: + q1 = percentile(l_df, 25, method=C.COMP_PERCENTILE_METHOD) + q3 = percentile(l_df, 75, method=C.COMP_PERCENTILE_METHOD) + irq = q3 - q1 + lif = q1 - C.COMP_OUTLIER_TYPE * irq + uif = q3 + C.COMP_OUTLIER_TYPE * irq + l_df = [i for i in l_df if i >= lif and i <= uif] + mean_val = mean(l_df) + std_val = std(l_df) + except (TypeError, ValueError): + continue + d_data["name"].append(f"{test.replace(f'{drv}-', '')}-{ttype}") + d_data["mean"].append(int(mean_val * norm_factor)) + d_data["stdev"].append(int(std_val * norm_factor)) + d_data["unit"].append(df[C.UNIT[test_type]].to_list()[0]) + return pd.DataFrame(d_data) + + lst_df = list() + for itm in selected: + if itm["ttype"] in ("NDR", "PDR", "Latency"): + test_type = "ndrpdr" + elif itm["ttype"] in ("CPS", "RPS", "BPS"): + test_type = "hoststack" + else: + test_type = itm["ttype"].lower() + + dutver = itm["dutver"].split("-", 1) # 0 -> release, 1 -> dut version + tmp_df = pd.DataFrame(data.loc[( + (data["passed"] == True) & + (data["dut_type"] == itm["dut"]) & + (data["dut_version"] == dutver[1]) & + (data["test_type"] == test_type) & + (data["release"] == dutver[0]) + )]) + + drv = "" if itm["driver"] == "dpdk" else itm["driver"].replace("_", "-") + core = str() if itm["dut"] == "trex" else itm["core"].lower() + ttype = "ndrpdr" if itm["ttype"] in ("NDR", "PDR", "Latency") \ + else itm["ttype"].lower() + tmp_df = tmp_df[ + (tmp_df.job.str.endswith(itm["tbed"])) & + (tmp_df.test_id.str.contains( + ( + f"^.*[.|-]{itm['nic']}.*{itm['frmsize'].lower()}-" + f"{core}-{drv}.*-{ttype}$" + ), + regex=True + )) + ] + if itm["driver"] == "dpdk": + for drv in C.DRIVERS: + tmp_df.drop( + tmp_df[tmp_df.test_id.str.contains(f"-{drv}-")].index, + inplace=True + ) + + # Change the data type from ndrpdr to one of ("NDR", "PDR", "Latency") + if test_type == "ndrpdr": + tmp_df = tmp_df.assign(test_type=itm["ttype"].lower()) + + if not tmp_df.empty: + if normalize: + if itm["ttype"] == "Latency": + norm_factor = C.FREQUENCY[itm["tbed"]] / C.NORM_FREQUENCY + else: + norm_factor = C.NORM_FREQUENCY / C.FREQUENCY[itm["tbed"]] + else: + norm_factor = 1.0 + if not raw_data: + tmp_df = _calculate_statistics( + tmp_df, + itm["ttype"].lower(), + itm["driver"], + norm_factor, + remove_outliers=remove_outliers + ) + + lst_df.append(tmp_df) + + if len(lst_df) == 1: + df = lst_df[0] + elif len(lst_df) > 1: + df = pd.concat( + lst_df, + ignore_index=True, + copy=False + ) + else: + df = pd.DataFrame() + + return df + + +def comparison_table( + data: pd.DataFrame, + selected: dict, + normalize: bool, + format: str="html", + remove_outliers: bool=False, + raw_data: bool=False + ) -> tuple: + """Generate a comparison table. + + :param data: Iterative data for the comparison table. + :param selected: A dictionary with parameters and their values selected by + the user. + :param normalize: If True, the data is normalized to CPU frequency + Constants.NORM_FREQUENCY. + :param format: The output format of the table: + - html: To be displayed on html page, the values are shown in millions + of the unit. + - csv: To be downloaded as a CSV file the values are stored in base + units. + :param remove_outliers: If True the outliers are removed before + generating the table. + :param raw_data: If True, returns data as it is in parquets without any + processing. It is used for "download raw data" feature. + :type data: pandas.DataFrame + :type selected: dict + :type normalize: bool + :type format: str + :type remove_outliers: bool + :type raw_data: bool + :returns: A tuple with the tabe title and the comparison table. + :rtype: tuple[str, pandas.DataFrame] + """ + + def _create_selection(sel: dict) -> list: + """Transform the complex dictionary with user selection to list + of simple items. + + :param sel: A complex dictionary with user selection. + :type sel: dict + :returns: A list of simple items. + :rtype: list + """ + l_infra = sel["infra"].split("-") + selection = list() + for core in sel["core"]: + for fsize in sel["frmsize"]: + for ttype in sel["ttype"]: + selection.append({ + "dut": sel["dut"], + "dutver": sel["dutver"], + "tbed": f"{l_infra[0]}-{l_infra[1]}", + "nic": l_infra[2], + "driver": l_infra[-1].replace("_", "-"), + "core": core, + "frmsize": fsize, + "ttype": ttype + }) + return selection + + # Select reference data + r_sel = deepcopy(selected["reference"]["selection"]) + r_selection = _create_selection(r_sel) + r_data = select_comp_data( + data, r_selection, normalize, remove_outliers, raw_data + ) + + # Select compare data + c_sel = deepcopy(selected["reference"]["selection"]) + c_params = selected["compare"] + if c_params["parameter"] in ("core", "frmsize", "ttype"): + c_sel[c_params["parameter"]] = [c_params["value"], ] + else: + c_sel[c_params["parameter"]] = c_params["value"] + c_selection = _create_selection(c_sel) + c_data = select_comp_data( + data, c_selection, normalize, remove_outliers, raw_data + ) + + if raw_data: + r_data["ref/cmp"] = "reference" + c_data["ref/cmp"] = "compare" + return str(), pd.concat([r_data, c_data], ignore_index=True, copy=False) + + if r_data.empty or c_data.empty: + return str(), pd.DataFrame() + + if format == "html" and "Latency" not in r_sel["ttype"]: + unit_factor, s_unit_factor = (1e6, "M") + else: + unit_factor, s_unit_factor = (1, str()) + + # Create Table title and titles of columns with data + params = list(r_sel) + params.remove(c_params["parameter"]) + lst_title = list() + for param in params: + value = r_sel[param] + if isinstance(value, list): + lst_title.append("|".join(value)) + else: + lst_title.append(value) + title = "Comparison for: " + "-".join(lst_title) + r_name = r_sel[c_params["parameter"]] + if isinstance(r_name, list): + r_name = "|".join(r_name) + c_name = c_params["value"] + + l_name, l_r_mean, l_r_std, l_c_mean, l_c_std, l_rc_mean, l_rc_std, unit = \ + list(), list(), list(), list(), list(), list(), list(), set() + for _, row in r_data.iterrows(): + if c_params["parameter"] in ("core", "frmsize", "ttype"): + l_cmp = row["name"].split("-") + if c_params["parameter"] == "core": + c_row = c_data[ + (c_data.name.str.contains(l_cmp[0])) & + (c_data.name.str.contains("-".join(l_cmp[2:]))) + ] + elif c_params["parameter"] == "frmsize": + c_row = c_data[c_data.name.str.contains("-".join(l_cmp[1:]))] + elif c_params["parameter"] == "ttype": + regex = r"^" + f"{'-'.join(l_cmp[:-1])}" + r"-.{3}$" + c_row = c_data[c_data.name.str.contains(regex, regex=True)] + else: + c_row = c_data[c_data["name"] == row["name"]] + if not c_row.empty: + unit.add(f"{s_unit_factor}{row['unit']}") + r_mean = row["mean"] + r_std = row["stdev"] + c_mean = c_row["mean"].values[0] + c_std = c_row["stdev"].values[0] + l_name.append(row["name"]) + l_r_mean.append(r_mean / unit_factor) + l_r_std.append(r_std / unit_factor) + l_c_mean.append(c_mean / unit_factor) + l_c_std.append(c_std / unit_factor) + delta, d_stdev = relative_change_stdev(r_mean, c_mean, r_std, c_std) + l_rc_mean.append(delta) + l_rc_std.append(d_stdev) + + s_unit = "|".join(unit) + df_cmp = pd.DataFrame.from_dict({ + "Test Name": l_name, + f"{r_name} Mean [{s_unit}]": l_r_mean, + f"{r_name} Stdev [{s_unit}]": l_r_std, + f"{c_name} Mean [{s_unit}]": l_c_mean, + f"{c_name} Stdev [{s_unit}]": l_c_std, + "Relative Change Mean [%]": l_rc_mean, + "Relative Change Stdev [%]": l_rc_std + }) + df_cmp.sort_values( + by="Relative Change Mean [%]", + ascending=False, + inplace=True + ) + + return (title, df_cmp) |