CDash: Add comparison tables

Signed-off-by: Tibor Frank <tifrank@cisco.com> Change-Id: I8ce9e670721e1fdb1f297b3bfb8f0d8ffb916713
author: Tibor Frank <tifrank@cisco.com> 2023-03-13 10:13:57 +0100
committer: Tibor Frank <tifrank@cisco.com> 2023-04-04 08:22:38 +0000
commit: 0fc5aff9887fa7a3125c71d0662475a3f9a763ba (patch)
tree: 4f2810d91b2b88244f8ccf9ed0e7c8cbecede0bc /csit.infra.dash/app/cdash/comparisons/tables.py
parent: 3a8e7ca967bdb63f2497ff5654fbf94de4c465a2 (diff)
1 files changed, 283 insertions, 0 deletions
diff --git a/csit.infra.dash/app/cdash/comparisons/tables.py b/csit.infra.dash/app/cdash/comparisons/tables.py
new file mode 100644
index 0000000000..14d5d552af
--- /dev/null
+++ b/csit.infra.dash/app/cdash/comparisons/tables.py
@@ -0,0 +1,283 @@
+# Copyright (c) 2023 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""The comparison tables.
+"""
+
+import pandas as pd
+
+from numpy import mean, std
+from copy import deepcopy
+from ..utils.constants import Constants as C
+from ..utils.utils import relative_change_stdev
+
+
+def select_comparison_data(
+        data: pd.DataFrame,
+        selected: dict,
+        normalize: bool=False
+    ) -> pd.DataFrame:
+    """Select data for a comparison table.
+
+    :param data: Data to be filtered for the comparison table.
+    :param selected: A dictionary with parameters and their values selected by
+        the user.
+    :param normalize: If True, the data is normalized to CPU frequency
+        Constants.NORM_FREQUENCY.
+    :type data: pandas.DataFrame
+    :type selected: dict
+    :type normalize: bool
+    :returns: A data frame with selected data.
+    :rtype: pandas.DataFrame
+    """
+
+    def _calculate_statistics(
+            data_in: pd.DataFrame,
+            ttype: str,
+            drv: str,
+            norm_factor: float
+        ) -> pd.DataFrame:
+        """Calculates mean value and standard deviation for provided data.
+
+        :param data_in: Input data for calculations.
+        :param ttype: The test type.
+        :param drv: The driver.
+        :param norm_factor: The data normalization factor.
+        :type data_in: pandas.DataFrame
+        :type ttype: str
+        :type drv: str
+        :type norm_factor: float
+        :returns: A pandas dataframe with: test name, mean value, standard
+            deviation and unit.
+        :rtype: pandas.DataFrame
+        """
+        d_data = {
+            "name": list(),
+            "mean": list(),
+            "stdev": list(),
+            "unit": list()
+        }
+        for itm in data_in["test_id"].unique().tolist():
+            itm_lst = itm.split(".")
+            test = itm_lst[-1].rsplit("-", 1)[0]
+            df = data_in.loc[(data_in["test_id"] == itm)]
+            l_df = df[C.VALUE_ITER[ttype]].to_list()
+            if len(l_df) and isinstance(l_df[0], list):
+                tmp_df = list()
+                for l_itm in l_df:
+                    tmp_df.extend(l_itm)
+                l_df = tmp_df
+            d_data["name"].append(f"{test.replace(f'{drv}-', '')}-{ttype}")
+            d_data["mean"].append(int(mean(l_df) * norm_factor))
+            d_data["stdev"].append(int(std(l_df) * norm_factor))
+            d_data["unit"].append(df[C.UNIT[ttype]].to_list()[0])
+        return pd.DataFrame(d_data)
+
+    lst_df = list()
+    for itm in selected:
+        if itm["ttype"] in ("NDR", "PDR"):
+            test_type = "ndrpdr"
+        else:
+            test_type = itm["ttype"].lower()
+
+        dutver = itm["dutver"].split("-", 1)  # 0 -> release, 1 -> dut version
+        tmp_df = pd.DataFrame(data.loc[(
+            (data["passed"] == True) &
+            (data["dut_type"] == itm["dut"]) &
+            (data["dut_version"] == dutver[1]) &
+            (data["test_type"] == test_type) &
+            (data["release"] == dutver[0])
+        )])
+
+        drv = "" if itm["driver"] == "dpdk" else itm["driver"].replace("_", "-")
+        core = str() if itm["dut"] == "trex" else itm["core"].lower()
+        reg_id = \
+            f"^.*[.|-]{itm['nic']}.*{itm['frmsize'].lower()}-{core}-{drv}.*$"
+        tmp_df = tmp_df[
+            (tmp_df.job.str.endswith(itm["tbed"])) &
+            (tmp_df.test_id.str.contains(reg_id, regex=True))
+        ]
+        if itm["driver"] == "dpdk":
+            for drv in C.DRIVERS:
+                tmp_df.drop(
+                    tmp_df[tmp_df.test_id.str.contains(f"-{drv}-")].index,
+                    inplace=True
+                )
+
+        # Change the data type from ndrpdr to one of ("NDR", "PDR")
+        if test_type == "ndrpdr":
+            tmp_df = tmp_df.assign(test_type=itm["ttype"].lower())
+
+        if not tmp_df.empty:
+            tmp_df = _calculate_statistics(
+                tmp_df,
+                itm["ttype"].lower(),
+                itm["driver"],
+                C.NORM_FREQUENCY / C.FREQUENCY[itm["tbed"]] if normalize else 1
+            )
+
+        lst_df.append(tmp_df)
+
+    if len(lst_df) == 1:
+        df = lst_df[0]
+    elif len(lst_df) > 1:
+        df = pd.concat(
+            lst_df,
+            ignore_index=True,
+            copy=False
+        )
+    else:
+        df = pd.DataFrame()
+
+    return df
+
+
+def comparison_table(
+        data: pd.DataFrame,
+        selected: dict,
+        normalize: bool,
+        format: str="html"
+    ) -> tuple:
+    """Generate a comparison table.
+
+    :param data: Iterative data for the comparison table.
+    :param selected: A dictionary with parameters and their values selected by
+        the user.
+    :param normalize: If True, the data is normalized to CPU frequency
+        Constants.NORM_FREQUENCY.
+    :param format: The output format of the table:
+        - html: To be displayed on html page, the values are shown in millions
+          of the unit.
+        - csv: To be downloaded as a CSV file the values are stored in base
+          units.
+    :type data: pandas.DataFrame
+    :type selected: dict
+    :type normalize: bool
+    :type format: str
+    :returns: A tuple with the tabe title and the comparison table.
+    :rtype: tuple[str, pandas.DataFrame]
+    """
+
+    def _create_selection(sel: dict) -> list:
+        """Transform the complex dictionary with user selection to list
+            of simple items.
+
+        :param sel: A complex dictionary with user selection.
+        :type sel: dict
+        :returns: A list of simple items.
+        :rtype: list
+        """
+        l_infra = sel["infra"].split("-")
+        selection = list()
+        for core in sel["core"]:
+            for fsize in sel["frmsize"]:
+                for ttype in sel["ttype"]:
+                    selection.append({
+                        "dut": sel["dut"],
+                        "dutver": sel["dutver"],
+                        "tbed": f"{l_infra[0]}-{l_infra[1]}",
+                        "nic": l_infra[2],
+                        "driver": l_infra[-1].replace("_", "-"),
+                        "core": core,
+                        "frmsize": fsize,
+                        "ttype": ttype
+                    })
+        return selection
+
+    unit_factor, s_unit_factor = (1e6, "M") if format == "html" else (1, str())
+
+    r_sel = deepcopy(selected["reference"]["selection"])
+    c_params = selected["compare"]
+    r_selection = _create_selection(r_sel)
+
+    # Create Table title and titles of columns with data
+    params = list(r_sel)
+    params.remove(c_params["parameter"])
+    lst_title = list()
+    for param in params:
+        value = r_sel[param]
+        if isinstance(value, list):
+            lst_title.append("|".join(value))
+        else:
+            lst_title.append(value)
+    title = "Comparison for: " + "-".join(lst_title)
+    r_name = r_sel[c_params["parameter"]]
+    if isinstance(r_name, list):
+        r_name = "|".join(r_name)
+    c_name = c_params["value"]
+
+    # Select reference data
+    r_data = select_comparison_data(data, r_selection, normalize)
+
+    # Select compare data
+    c_sel = deepcopy(selected["reference"]["selection"])
+    if c_params["parameter"] in ("core", "frmsize", "ttype"):
+        c_sel[c_params["parameter"]] = [c_params["value"], ]
+    else:
+        c_sel[c_params["parameter"]] = c_params["value"]
+
+    c_selection = _create_selection(c_sel)
+    c_data = select_comparison_data(data, c_selection, normalize)
+
+    if r_data.empty or c_data.empty:
+        return str(), pd.DataFrame()
+
+    l_name, l_r_mean, l_r_std, l_c_mean, l_c_std, l_rc_mean, l_rc_std, unit = \
+        list(), list(), list(), list(), list(), list(), list(), set()
+    for _, row in r_data.iterrows():
+        if c_params["parameter"] in ("core", "frmsize", "ttype"):
+            l_cmp = row["name"].split("-")
+            if c_params["parameter"] == "core":
+                c_row = c_data[
+                    (c_data.name.str.contains(l_cmp[0])) &
+                    (c_data.name.str.contains("-".join(l_cmp[2:])))
+                ]
+            elif c_params["parameter"] == "frmsize":
+                c_row = c_data[c_data.name.str.contains("-".join(l_cmp[1:]))]
+            elif c_params["parameter"] == "ttype":
+                regex = r"^" + f"{'-'.join(l_cmp[:-1])}" + r"-.{3}$"
+                c_row = c_data[c_data.name.str.contains(regex, regex=True)]
+        else:
+            c_row = c_data[c_data["name"] == row["name"]]
+        if not c_row.empty:
+            unit.add(f"{s_unit_factor}{row['unit']}")
+            r_mean = row["mean"]
+            r_std = row["stdev"]
+            c_mean = c_row["mean"].values[0]
+            c_std = c_row["stdev"].values[0]
+            l_name.append(row["name"])
+            l_r_mean.append(r_mean / unit_factor)
+            l_r_std.append(r_std / unit_factor)
+            l_c_mean.append(c_mean / unit_factor)
+            l_c_std.append(c_std / unit_factor)
+            delta, d_stdev = relative_change_stdev(r_mean, c_mean, r_std, c_std)
+            l_rc_mean.append(delta)
+            l_rc_std.append(d_stdev)
+
+    s_unit = "|".join(unit)
+    df_cmp = pd.DataFrame.from_dict({
+        "Test Name": l_name,
+        f"{r_name} Mean [{s_unit}]": l_r_mean,
+        f"{r_name} Stdev [{s_unit}]": l_r_std,
+        f"{c_name} Mean [{s_unit}]": l_c_mean,
+        f"{c_name} Stdev [{s_unit}]": l_c_std,
+        "Relative Change Mean [%]": l_rc_mean,
+        "Relative Change Stdev [%]": l_rc_std
+    })
+    df_cmp.sort_values(
+        by="Relative Change Mean [%]",
+        ascending=False,
+        inplace=True
+    )
+
+    return (title, df_cmp)
author	Tibor Frank <tifrank@cisco.com>	2023-03-13 10:13:57 +0100
committer	Tibor Frank <tifrank@cisco.com>	2023-04-04 08:22:38 +0000
commit	0fc5aff9887fa7a3125c71d0662475a3f9a763ba (patch)
tree	4f2810d91b2b88244f8ccf9ed0e7c8cbecede0bc /csit.infra.dash/app/cdash/comparisons/tables.py
parent	3a8e7ca967bdb63f2497ff5654fbf94de4c465a2 (diff)