From fde192bc8a6ba1a4e2b4cf5b3297fb076efd58fc Mon Sep 17 00:00:00 2001 From: Vratko Polak Date: Wed, 25 Apr 2018 17:35:52 +0200 Subject: CSIT-1041: Use TMM for trending line + Make also pro/re-gression detection use TMM. + Update graphs. - Do not update dashboard tables yet. Change-Id: Iae526c846b329ad99549be61481532e197704fb0 Signed-off-by: Vratko Polak --- resources/tools/presentation/generator_CPTA.py | 76 +++++++++++++------------- resources/tools/presentation/utils.py | 2 +- 2 files changed, 38 insertions(+), 40 deletions(-) diff --git a/resources/tools/presentation/generator_CPTA.py b/resources/tools/presentation/generator_CPTA.py index 51787e43c5..e3cc55f8cf 100644 --- a/resources/tools/presentation/generator_CPTA.py +++ b/resources/tools/presentation/generator_CPTA.py @@ -144,55 +144,53 @@ def _select_data(in_data, period, fill_missing=False, use_first=False): return OrderedDict(sorted(data_dict.items(), key=lambda t: t[0])) -def _evaluate_results(in_data, trimmed_data, window=10): +def _evaluate_results(trimmed_data, window=10): """Evaluates if the sample value is regress, normal or progress compared to previous data within the window. We use the intervals defined as: - - regress: less than median - 3 * stdev - - normal: between median - 3 * stdev and median + 3 * stdev - - progress: more than median + 3 * stdev + - regress: less than trimmed moving median - 3 * stdev + - normal: between trimmed moving median - 3 * stdev and median + 3 * stdev + - progress: more than trimmed moving median + 3 * stdev + where stdev is trimmed moving standard deviation. - :param in_data: Full data set. - :param trimmed_data: Full data set without the outliers. - :param window: Window size used to calculate moving median and moving stdev. - :type in_data: pandas.Series + :param trimmed_data: Full data set with the outliers replaced by nan. + :param window: Window size used to calculate moving average and moving stdev. :type trimmed_data: pandas.Series :type window: int :returns: Evaluated results. :rtype: list """ - if len(in_data) > 2: - win_size = in_data.size if in_data.size < window else window + if len(trimmed_data) > 2: + win_size = trimmed_data.size if trimmed_data.size < window else window results = [0.66, ] - median = trimmed_data.rolling(window=win_size, min_periods=2).median() - stdev_t = trimmed_data.rolling(window=win_size, min_periods=2).std() + tmm = trimmed_data.rolling(window=win_size, min_periods=2).median() + tmstd = trimmed_data.rolling(window=win_size, min_periods=2).std() first = True - for build_nr, value in in_data.iteritems(): + for build_nr, value in trimmed_data.iteritems(): if first: first = False continue - if np.isnan(trimmed_data[build_nr]) \ - or np.isnan(median[build_nr]) \ - or np.isnan(stdev_t[build_nr]) \ - or np.isnan(value): + if (np.isnan(value) \ + or np.isnan(tmm[build_nr]) \ + or np.isnan(tmstd[build_nr])): results.append(0.0) - elif value < (median[build_nr] - 3 * stdev_t[build_nr]): + elif value < (tmm[build_nr] - 3 * tmstd[build_nr]): results.append(0.33) - elif value > (median[build_nr] + 3 * stdev_t[build_nr]): + elif value > (tmm[build_nr] + 3 * tmstd[build_nr]): results.append(1.0) else: results.append(0.66) else: results = [0.0, ] try: - median = np.median(in_data) - stdev = np.std(in_data) - if in_data.values[-1] < (median - 3 * stdev): + tmm = np.median(trimmed_data) + tmstd = np.std(trimmed_data) + if trimmed_data.values[-1] < (tmm - 3 * tmstd): results.append(0.33) - elif (median - 3 * stdev) <= in_data.values[-1] <= ( - median + 3 * stdev): + elif (tmm - 3 * tmstd) <= trimmed_data.values[-1] <= ( + tmm + 3 * tmstd): results.append(0.66) else: results.append(1.0) @@ -203,10 +201,10 @@ def _evaluate_results(in_data, trimmed_data, window=10): def _generate_trending_traces(in_data, build_info, period, moving_win_size=10, fill_missing=True, use_first=False, - show_moving_median=True, name="", color=""): + show_trend_line=True, name="", color=""): """Generate the trending traces: - samples, - - moving median (trending plot) + - trimmed moving median (trending line) - outliers, regress, progress :param in_data: Full data set. @@ -214,9 +212,9 @@ def _generate_trending_traces(in_data, build_info, period, moving_win_size=10, :param period: Sampling period. :param moving_win_size: Window size. :param fill_missing: If the chosen sample is missing in the full set, its - nearest neighbour is used. + nearest neighbour is used. :param use_first: Use the first sample even though it is not chosen. - :param show_moving_median: Show moving median (trending plot). + :param show_trend_line: Show moving median (trending plot). :param name: Name of the plot :param color: Name of the color for the plot. :type in_data: OrderedDict @@ -225,7 +223,7 @@ def _generate_trending_traces(in_data, build_info, period, moving_win_size=10, :type moving_win_size: int :type fill_missing: bool :type use_first: bool - :type show_moving_median: bool + :type show_trend_line: bool :type name: str :type color: str :returns: Generated traces (list) and the evaluated result (float). @@ -237,8 +235,8 @@ def _generate_trending_traces(in_data, build_info, period, moving_win_size=10, fill_missing=fill_missing, use_first=use_first) - data_x = [key for key in in_data.keys()] - data_y = [val for val in in_data.values()] + data_x = list(in_data.keys()) + data_y = list(in_data.values()) hover_text = list() for idx in data_x: @@ -249,7 +247,7 @@ def _generate_trending_traces(in_data, build_info, period, moving_win_size=10, t_data, outliers = split_outliers(data_pd, outlier_const=1.5, window=moving_win_size) - results = _evaluate_results(data_pd, t_data, window=moving_win_size) + results = _evaluate_results(t_data, window=moving_win_size) anomalies = pd.Series() anomalies_res = list() @@ -328,12 +326,12 @@ def _generate_trending_traces(in_data, build_info, period, moving_win_size=10, ) traces.append(trace_anomalies) - if show_moving_median: - data_mean_y = pd.Series(data_y).rolling( - window=moving_win_size, min_periods=2).median() - trace_median = plgo.Scatter( - x=data_x, - y=data_mean_y, + if show_trend_line: + data_trend = t_data.rolling(window=moving_win_size, + min_periods=2).median() + trace_trend = plgo.Scatter( + x=data_trend.keys(), + y=data_trend.tolist(), mode='lines', line={ "shape": "spline", @@ -342,7 +340,7 @@ def _generate_trending_traces(in_data, build_info, period, moving_win_size=10, }, name='{name}-trend'.format(name=name) ) - traces.append(trace_median) + traces.append(trace_trend) return traces, results[-1] diff --git a/resources/tools/presentation/utils.py b/resources/tools/presentation/utils.py index f118afca1d..88baf95928 100644 --- a/resources/tools/presentation/utils.py +++ b/resources/tools/presentation/utils.py @@ -140,7 +140,7 @@ def get_files(path, extension=None, full_path=True): :param path: Path to files. :param extension: Extension of files to process. If it is the empty string, - all files will be processed. + all files will be processed. :param full_path: If True, the files with full path are generated. :type path: str :type extension: str -- cgit 1.2.3-korg