aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTibor Frank <tifrank@cisco.com>2024-03-07 12:00:04 +0000
committerTibor Frank <tifrank@cisco.com>2024-03-07 12:00:04 +0000
commit157aa10abdfb9c53f8e687da97ac72e9a724b457 (patch)
tree0ed9e0ddc3d795bf868b775b6c5a56c31401d489
parent5a8e8f743bdc3a66f26c651e9f7081244ab0da75 (diff)
C-Dash: Add possiblity to remove outliers from comparison data
- extreme outliers only for now Change-Id: I2be7c291dd207d5a557947415a960fa6d0067c64 Signed-off-by: Tibor Frank <tifrank@cisco.com>
-rw-r--r--csit.infra.dash/app/cdash/comparisons/layout.py70
-rw-r--r--csit.infra.dash/app/cdash/comparisons/tables.py38
-rw-r--r--csit.infra.dash/app/cdash/utils/constants.py18
3 files changed, 100 insertions, 26 deletions
diff --git a/csit.infra.dash/app/cdash/comparisons/layout.py b/csit.infra.dash/app/cdash/comparisons/layout.py
index 45bc75aca1..82b5b2d544 100644
--- a/csit.infra.dash/app/cdash/comparisons/layout.py
+++ b/csit.infra.dash/app/cdash/comparisons/layout.py
@@ -54,7 +54,8 @@ CP_PARAMS = {
"cmp-val-opt": list(),
"cmp-val-dis": True,
"cmp-val-val": str(),
- "normalize-val": list()
+ "normalize-val": list(),
+ "outliers-val": list()
}
# List of comparable parameters.
@@ -420,21 +421,33 @@ class Layout:
)
]
- normalize = [
+ processing = [
dbc.Row(
class_name="g-0 p-1",
children=[
dbc.InputGroup(
- dbc.Checklist(
- id="normalize",
- options=[{
- "value": "normalize",
- "label": "Normalize to 2GHz CPU frequency"
- }],
- value=[],
- inline=True,
- class_name="ms-2"
- ),
+ children = [
+ dbc.Checklist(
+ id="normalize",
+ options=[{
+ "value": "normalize",
+ "label": "Normalize to 2GHz CPU frequency"
+ }],
+ value=[],
+ inline=True,
+ class_name="ms-2"
+ ),
+ dbc.Checklist(
+ id="outliers",
+ options=[{
+ "value": "outliers",
+ "label": "Remove Extreme Outliers"
+ }],
+ value=[],
+ inline=True,
+ class_name="ms-2"
+ )
+ ],
style={"align-items": "center"},
size="sm"
)
@@ -479,10 +492,10 @@ class Layout:
dbc.Card(
[
dbc.CardHeader(
- html.H5("Normalization")
+ html.H5("Data Manipulations")
),
dbc.CardBody(
- children=normalize,
+ children=processing,
class_name="g-0 p-0"
)
],
@@ -659,7 +672,8 @@ class Layout:
Output({"type": "ctrl-dd", "index": "cmpval"}, "options"),
Output({"type": "ctrl-dd", "index": "cmpval"}, "disabled"),
Output({"type": "ctrl-dd", "index": "cmpval"}, "value"),
- Output("normalize", "value")
+ Output("normalize", "value"),
+ Output("outliers", "value")
],
[
State("store-control-panel", "data"),
@@ -671,6 +685,7 @@ class Layout:
[
Input("url", "href"),
Input("normalize", "value"),
+ Input("outliers", "value"),
Input({"type": "table", "index": ALL}, "filter_query"),
Input({"type": "ctrl-dd", "index": ALL}, "value"),
Input({"type": "ctrl-cl", "index": ALL}, "value"),
@@ -685,6 +700,7 @@ class Layout:
table_data: list,
href: str,
normalize: list,
+ outliers: bool,
table_filter: str,
*_
) -> tuple:
@@ -721,6 +737,10 @@ class Layout:
r_sel = selected["reference"]["selection"]
c_sel = selected["compare"]
normalize = literal_eval(url_params["norm"][0])
+ try: # Necessary for backward compatibility
+ outliers = literal_eval(url_params["outliers"][0])
+ except (KeyError, IndexError, AttributeError):
+ outliers = list()
process_url = bool(
(selected["reference"]["set"] == True) and
(c_sel["set"] == True)
@@ -755,7 +775,8 @@ class Layout:
[r_sel["infra"]]["ttype"]
),
"ttype-val": r_sel["ttype"],
- "normalize-val": normalize
+ "normalize-val": normalize,
+ "outliers-val": outliers
})
opts = list()
for itm, label in CMP_PARAMS.items():
@@ -784,6 +805,9 @@ class Layout:
elif trigger.type == "normalize":
ctrl_panel.set({"normalize-val": normalize})
on_draw = True
+ elif trigger.type == "outliers":
+ ctrl_panel.set({"outliers-val": outliers})
+ on_draw = True
elif trigger.type == "ctrl-dd":
if trigger.idx == "dut":
try:
@@ -942,13 +966,23 @@ class Layout:
if all((on_draw, selected["reference"]["set"],
selected["compare"]["set"], )):
- title, table = comparison_table(self._data, selected, normalize)
+ title, table = comparison_table(
+ data=self._data,
+ selected=selected,
+ normalize=normalize,
+ format="html",
+ remove_outliers=outliers
+ )
plotting_area = self._get_plotting_area(
title=title,
table=table,
url=gen_new_url(
parsed_url,
- params={"selected": selected, "norm": normalize}
+ params={
+ "selected": selected,
+ "norm": normalize,
+ "outliers": outliers
+ }
)
)
store_table_data = table.to_dict("records")
diff --git a/csit.infra.dash/app/cdash/comparisons/tables.py b/csit.infra.dash/app/cdash/comparisons/tables.py
index ab99f1839d..0c247e87c2 100644
--- a/csit.infra.dash/app/cdash/comparisons/tables.py
+++ b/csit.infra.dash/app/cdash/comparisons/tables.py
@@ -16,17 +16,18 @@
import pandas as pd
-from numpy import mean, std
+from numpy import mean, std, percentile
from copy import deepcopy
from ..utils.constants import Constants as C
from ..utils.utils import relative_change_stdev
-def select_comparison_data(
+def select_comp_data(
data: pd.DataFrame,
selected: dict,
- normalize: bool=False
+ normalize: bool=False,
+ remove_outliers: bool=False
) -> pd.DataFrame:
"""Select data for a comparison table.
@@ -35,9 +36,12 @@ def select_comparison_data(
the user.
:param normalize: If True, the data is normalized to CPU frequency
Constants.NORM_FREQUENCY.
+ :param remove_outliers: If True the outliers are removed before
+ generating the table.
:type data: pandas.DataFrame
:type selected: dict
:type normalize: bool
+ :type remove_outliers: bool
:returns: A data frame with selected data.
:rtype: pandas.DataFrame
"""
@@ -46,7 +50,8 @@ def select_comparison_data(
data_in: pd.DataFrame,
ttype: str,
drv: str,
- norm_factor: float
+ norm_factor: float,
+ remove_outliers: bool=False
) -> pd.DataFrame:
"""Calculates mean value and standard deviation for provided data.
@@ -54,10 +59,13 @@ def select_comparison_data(
:param ttype: The test type.
:param drv: The driver.
:param norm_factor: The data normalization factor.
+ :param remove_outliers: If True the outliers are removed before
+ generating the table.
:type data_in: pandas.DataFrame
:type ttype: str
:type drv: str
:type norm_factor: float
+ :type remove_outliers: bool
:returns: A pandas dataframe with: test name, mean value, standard
deviation and unit.
:rtype: pandas.DataFrame
@@ -82,6 +90,15 @@ def select_comparison_data(
for l_itm in l_df:
tmp_df.extend(l_itm)
l_df = tmp_df
+
+ if remove_outliers:
+ q1 = percentile(l_df, 25, method=C.COMP_PERCENTILE_METHOD)
+ q3 = percentile(l_df, 75, method=C.COMP_PERCENTILE_METHOD)
+ irq = q3 - q1
+ lif = q1 - C.COMP_OUTLIER_TYPE * irq
+ uif = q3 + C.COMP_OUTLIER_TYPE * irq
+ l_df = [i for i in l_df if i >= lif and i <= uif]
+
try:
mean_val = mean(l_df)
std_val = std(l_df)
@@ -148,7 +165,8 @@ def select_comparison_data(
tmp_df,
itm["ttype"].lower(),
itm["driver"],
- norm_factor
+ norm_factor,
+ remove_outliers=remove_outliers
)
lst_df.append(tmp_df)
@@ -171,7 +189,8 @@ def comparison_table(
data: pd.DataFrame,
selected: dict,
normalize: bool,
- format: str="html"
+ format: str="html",
+ remove_outliers: bool=False
) -> tuple:
"""Generate a comparison table.
@@ -185,10 +204,13 @@ def comparison_table(
of the unit.
- csv: To be downloaded as a CSV file the values are stored in base
units.
+ :param remove_outliers: If True the outliers are removed before
+ generating the table.
:type data: pandas.DataFrame
:type selected: dict
:type normalize: bool
:type format: str
+ :type remove_outliers: bool
:returns: A tuple with the tabe title and the comparison table.
:rtype: tuple[str, pandas.DataFrame]
"""
@@ -245,7 +267,7 @@ def comparison_table(
c_name = c_params["value"]
# Select reference data
- r_data = select_comparison_data(data, r_selection, normalize)
+ r_data = select_comp_data(data, r_selection, normalize, remove_outliers)
# Select compare data
c_sel = deepcopy(selected["reference"]["selection"])
@@ -255,7 +277,7 @@ def comparison_table(
c_sel[c_params["parameter"]] = c_params["value"]
c_selection = _create_selection(c_sel)
- c_data = select_comparison_data(data, c_selection, normalize)
+ c_data = select_comp_data(data, c_selection, normalize, remove_outliers)
if r_data.empty or c_data.empty:
return str(), pd.DataFrame()
diff --git a/csit.infra.dash/app/cdash/utils/constants.py b/csit.infra.dash/app/cdash/utils/constants.py
index c86f4d5136..5ed5a8cbd7 100644
--- a/csit.infra.dash/app/cdash/utils/constants.py
+++ b/csit.infra.dash/app/cdash/utils/constants.py
@@ -358,6 +358,24 @@ class Constants:
# Default name of downloaded file with selected data.
COMP_DOWNLOAD_FILE_NAME = "comparison_data.csv"
+ # This parameter specifies the method to use for estimating the percentile.
+ # Possible values:
+ # - inverted_cdf
+ # - averaged_inverted_cdf
+ # - closest_observation
+ # - interpolated_inverted_cdf
+ # - hazen
+ # - weibull
+ # - linear (default)
+ # - median_unbiased
+ # - normal_unbiased
+ COMP_PERCENTILE_METHOD = "linear"
+
+ # Extreme or mild outlier?
+ OUTLIER_EXTREME = 3
+ OUTLIER_MILD = 1.5
+ COMP_OUTLIER_TYPE = OUTLIER_EXTREME
+
############################################################################
# Statistics.