From 4f5872c1bb23873b3a93cb471aae8700d5ca029d Mon Sep 17 00:00:00 2001 From: Vratko Polak Date: Fri, 20 Apr 2018 14:23:11 +0200 Subject: FIX: Use rolling window for outlier detection + Rename find_outliers to split_outliers. + Make remove_ouliers call split_outliers internally. + Add "window" argument to both functions. + Add TODOs to call sites not setting window size explicitly. + Improve docstrings. Change-Id: I24961e2859ddbfa62b543031284517c7389a2abb Signed-off-by: Vratko Polak --- resources/tools/presentation/generator_tables.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'resources/tools/presentation/generator_tables.py') diff --git a/resources/tools/presentation/generator_tables.py b/resources/tools/presentation/generator_tables.py index 74579b0a9d..9b9f09f4be 100644 --- a/resources/tools/presentation/generator_tables.py +++ b/resources/tools/presentation/generator_tables.py @@ -25,7 +25,7 @@ from math import isnan from xml.etree import ElementTree as ET from errors import PresentationError -from utils import mean, stdev, relative_change, remove_outliers, find_outliers +from utils import mean, stdev, relative_change, remove_outliers, split_outliers def generate_tables(spec, data): @@ -405,14 +405,16 @@ def table_performance_comparison(table, input_data): item = [tbl_dict[tst_name]["name"], ] if tbl_dict[tst_name]["ref-data"]: data_t = remove_outliers(tbl_dict[tst_name]["ref-data"], - table["outlier-const"]) + outlier_constant=table["outlier-const"]) + # TODO: Specify window size. item.append(round(mean(data_t) / 1000000, 2)) item.append(round(stdev(data_t) / 1000000, 2)) else: item.extend([None, None]) if tbl_dict[tst_name]["cmp-data"]: data_t = remove_outliers(tbl_dict[tst_name]["cmp-data"], - table["outlier-const"]) + outlier_constant=table["outlier-const"]) + # TODO: Specify window size. item.append(round(mean(data_t) / 1000000, 2)) item.append(round(stdev(data_t) / 1000000, 2)) else: @@ -594,14 +596,16 @@ def table_performance_comparison_mrr(table, input_data): item = [tbl_dict[tst_name]["name"], ] if tbl_dict[tst_name]["ref-data"]: data_t = remove_outliers(tbl_dict[tst_name]["ref-data"], - table["outlier-const"]) + outlier_const=table["outlier-const"]) + # TODO: Specify window size. item.append(round(mean(data_t) / 1000000, 2)) item.append(round(stdev(data_t) / 1000000, 2)) else: item.extend([None, None]) if tbl_dict[tst_name]["cmp-data"]: data_t = remove_outliers(tbl_dict[tst_name]["cmp-data"], - table["outlier-const"]) + outlier_const=table["outlier-const"]) + # TODO: Specify window size. item.append(round(mean(data_t) / 1000000, 2)) item.append(round(stdev(data_t) / 1000000, 2)) else: @@ -708,7 +712,8 @@ def table_performance_trending_dashboard(table, input_data): name = tbl_dict[tst_name]["name"] median = pd_data.rolling(window=win_size, min_periods=2).median() - trimmed_data, _ = find_outliers(pd_data, outlier_const=1.5) + trimmed_data, _ = split_outliers(pd_data, outlier_const=1.5, + window=win_size) stdev_t = pd_data.rolling(window=win_size, min_periods=2).std() rel_change_lst = [None, ] -- cgit 1.2.3-korg