aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTibor Frank <tifrank@cisco.com>2018-05-28 09:02:35 +0200
committerTibor Frank <tifrank@cisco.com>2018-05-29 08:48:46 +0200
commitf31dbcd6553ca6e7436736a5bc3aeec8fe18cad1 (patch)
tree93ab6520d8aa05595dda06f4bf885a21cc2d426e
parent6f5de201aadfbb31419c05dfae6495107a745899 (diff)
CSIT-1106: Unify the anomaly detection (plots, dashboard)
Change-Id: I27aaa5482224d1ff518aceb879cd889f2fc8d0f5 Signed-off-by: Tibor Frank <tifrank@cisco.com>
-rw-r--r--resources/tools/presentation/generator_CPTA.py135
-rw-r--r--resources/tools/presentation/generator_tables.py97
-rw-r--r--resources/tools/presentation/specification_CPTA.yaml6
-rw-r--r--resources/tools/presentation/utils.py42
4 files changed, 130 insertions, 150 deletions
diff --git a/resources/tools/presentation/generator_CPTA.py b/resources/tools/presentation/generator_CPTA.py
index 73d55affa2..2c62e11a97 100644
--- a/resources/tools/presentation/generator_CPTA.py
+++ b/resources/tools/presentation/generator_CPTA.py
@@ -22,13 +22,13 @@ import prettytable
import plotly.offline as ploff
import plotly.graph_objs as plgo
import plotly.exceptions as plerr
-import numpy as np
import pandas as pd
from collections import OrderedDict
from datetime import datetime
-from utils import split_outliers, archive_input_data, execute_command, Worker
+from utils import split_outliers, archive_input_data, execute_command,\
+ classify_anomalies, Worker
# Command to build the html format of the report
@@ -87,61 +87,6 @@ def generate_cpta(spec, data):
return ret_code
-def _evaluate_results(trimmed_data, window=10):
- """Evaluates if the sample value is regress, normal or progress compared to
- previous data within the window.
- We use the intervals defined as:
- - regress: less than trimmed moving median - 3 * stdev
- - normal: between trimmed moving median - 3 * stdev and median + 3 * stdev
- - progress: more than trimmed moving median + 3 * stdev
- where stdev is trimmed moving standard deviation.
-
- :param trimmed_data: Full data set with the outliers replaced by nan.
- :param window: Window size used to calculate moving average and moving stdev.
- :type trimmed_data: pandas.Series
- :type window: int
- :returns: Evaluated results.
- :rtype: list
- """
-
- if len(trimmed_data) > 2:
- win_size = trimmed_data.size if trimmed_data.size < window else window
- results = [0.66, ]
- tmm = trimmed_data.rolling(window=win_size, min_periods=2).median()
- tmstd = trimmed_data.rolling(window=win_size, min_periods=2).std()
-
- first = True
- for build_nr, value in trimmed_data.iteritems():
- if first:
- first = False
- continue
- if (np.isnan(value)
- or np.isnan(tmm[build_nr])
- or np.isnan(tmstd[build_nr])):
- results.append(0.0)
- elif value < (tmm[build_nr] - 3 * tmstd[build_nr]):
- results.append(0.33)
- elif value > (tmm[build_nr] + 3 * tmstd[build_nr]):
- results.append(1.0)
- else:
- results.append(0.66)
- else:
- results = [0.0, ]
- try:
- tmm = np.median(trimmed_data)
- tmstd = np.std(trimmed_data)
- if trimmed_data.values[-1] < (tmm - 3 * tmstd):
- results.append(0.33)
- elif (tmm - 3 * tmstd) <= trimmed_data.values[-1] <= (
- tmm + 3 * tmstd):
- results.append(0.66)
- else:
- results.append(1.0)
- except TypeError:
- results.append(None)
- return results
-
-
def _generate_trending_traces(in_data, build_info, moving_win_size=10,
show_trend_line=True, name="", color=""):
"""Generate the trending traces:
@@ -182,29 +127,27 @@ def _generate_trending_traces(in_data, build_info, moving_win_size=10,
t_data, outliers = split_outliers(data_pd, outlier_const=1.5,
window=moving_win_size)
- results = _evaluate_results(t_data, window=moving_win_size)
+ anomaly_classification = classify_anomalies(t_data, window=moving_win_size)
anomalies = pd.Series()
- anomalies_res = list()
- for idx, item in enumerate(data_pd.items()):
- item_pd = pd.Series([item[1], ], index=[item[0], ])
- if item[0] in outliers.keys():
- anomalies = anomalies.append(item_pd)
- anomalies_res.append(0.0)
- elif results[idx] in (0.33, 1.0):
- anomalies = anomalies.append(item_pd)
- anomalies_res.append(results[idx])
- anomalies_res.extend([0.0, 0.33, 0.66, 1.0])
+ anomalies_colors = list()
+ anomaly_color = {
+ "outlier": 0.0,
+ "regression": 0.33,
+ "normal": 0.66,
+ "progression": 1.0
+ }
+ if anomaly_classification:
+ for idx, item in enumerate(data_pd.items()):
+ if anomaly_classification[idx] in \
+ ("outlier", "regression", "progression"):
+ anomalies = anomalies.append(pd.Series([item[1], ],
+ index=[item[0], ]))
+ anomalies_colors.append(
+ anomaly_color[anomaly_classification[idx]])
+ anomalies_colors.extend([0.0, 0.33, 0.66, 1.0])
# Create traces
- color_scale = [[0.00, "grey"],
- [0.25, "grey"],
- [0.25, "red"],
- [0.50, "red"],
- [0.50, "white"],
- [0.75, "white"],
- [0.75, "green"],
- [1.00, "green"]]
trace_samples = plgo.Scatter(
x=xaxis,
@@ -236,8 +179,15 @@ def _generate_trending_traces(in_data, build_info, moving_win_size=10,
marker={
"size": 15,
"symbol": "circle-open",
- "color": anomalies_res,
- "colorscale": color_scale,
+ "color": anomalies_colors,
+ "colorscale": [[0.00, "grey"],
+ [0.25, "grey"],
+ [0.25, "red"],
+ [0.50, "red"],
+ [0.50, "white"],
+ [0.75, "white"],
+ [0.75, "green"],
+ [1.00, "green"]],
"showscale": True,
"line": {
"width": 2
@@ -279,7 +229,7 @@ def _generate_trending_traces(in_data, build_info, moving_win_size=10,
)
traces.append(trace_trend)
- return traces, results[-1]
+ return traces, anomaly_classification[-1]
def _generate_all_charts(spec, input_data):
@@ -371,8 +321,6 @@ def _generate_all_charts(spec, input_data):
except plerr.PlotlyEmptyDataError:
logs.append(("WARNING", "No data for the plot. Skipped."))
- logging.info(" Done.")
-
data_out = {
"csv_table": csv_tbl,
"results": res,
@@ -419,7 +367,7 @@ def _generate_all_charts(spec, input_data):
work_queue.put((chart, ))
work_queue.join()
- results = list()
+ anomaly_classifications = list()
# Create the header:
csv_table = list()
@@ -435,7 +383,7 @@ def _generate_all_charts(spec, input_data):
while not data_queue.empty():
result = data_queue.get()
- results.extend(result["results"])
+ anomaly_classifications.extend(result["results"])
csv_table.extend(result["csv_table"])
for item in result["logs"]:
@@ -487,17 +435,16 @@ def _generate_all_charts(spec, input_data):
txt_file.write(str(txt_table))
# Evaluate result:
- result = "PASS"
- for item in results:
- if item is None:
- result = "FAIL"
- break
- if item == 0.66 and result == "PASS":
- result = "PASS"
- elif item == 0.33 or item == 0.0:
- result = "FAIL"
-
- logging.info("Partial results: {0}".format(results))
+ if anomaly_classifications:
+ result = "PASS"
+ for classification in anomaly_classifications:
+ if classification == "regression" or classification == "outlier":
+ result = "FAIL"
+ break
+ else:
+ result = "FAIL"
+
+ logging.info("Partial results: {0}".format(anomaly_classifications))
logging.info("Result: {0}".format(result))
return result
diff --git a/resources/tools/presentation/generator_tables.py b/resources/tools/presentation/generator_tables.py
index 5246952e20..84a6a411dc 100644
--- a/resources/tools/presentation/generator_tables.py
+++ b/resources/tools/presentation/generator_tables.py
@@ -26,7 +26,8 @@ from numpy import nan, isnan
from xml.etree import ElementTree as ET
from errors import PresentationError
-from utils import mean, stdev, relative_change, remove_outliers, split_outliers
+from utils import mean, stdev, relative_change, remove_outliers,\
+ split_outliers, classify_anomalies
def generate_tables(spec, data):
@@ -774,60 +775,50 @@ def table_performance_trending_dashboard(table, input_data):
tbl_lst = list()
for tst_name in tbl_dict.keys():
- if len(tbl_dict[tst_name]["data"]) > 2:
-
- pd_data = pd.Series(tbl_dict[tst_name]["data"])
- data_t, _ = split_outliers(pd_data, outlier_const=1.5,
- window=table["window"])
- last_key = data_t.keys()[-1]
- win_size = min(data_t.size, table["window"])
- win_first_idx = data_t.size - win_size
- key_14 = data_t.keys()[win_first_idx]
- long_win_size = min(data_t.size, table["long-trend-window"])
- median_t = data_t.rolling(window=win_size, min_periods=2).median()
- stdev_t = data_t.rolling(window=win_size, min_periods=2).std()
- median_first_idx = median_t.size - long_win_size
- try:
- max_median = max(
- [x for x in median_t.values[median_first_idx:-win_size]
- if not isnan(x)])
- except ValueError:
- max_median = nan
- try:
- last_median_t = median_t[last_key]
- except KeyError:
- last_median_t = nan
- try:
- median_t_14 = median_t[key_14]
- except KeyError:
- median_t_14 = nan
-
- # Classification list:
- classification_lst = list()
- for build_nr, value in data_t.iteritems():
- if isnan(median_t[build_nr]) \
- or isnan(stdev_t[build_nr]) \
- or isnan(value):
- classification_lst.append("outlier")
- elif value < (median_t[build_nr] - 3 * stdev_t[build_nr]):
- classification_lst.append("regression")
- elif value > (median_t[build_nr] + 3 * stdev_t[build_nr]):
- classification_lst.append("progression")
- else:
- classification_lst.append("normal")
+ if len(tbl_dict[tst_name]["data"]) < 3:
+ continue
+
+ pd_data = pd.Series(tbl_dict[tst_name]["data"])
+ data_t, _ = split_outliers(pd_data, outlier_const=1.5,
+ window=table["window"])
+ last_key = data_t.keys()[-1]
+ win_size = min(data_t.size, table["window"])
+ win_first_idx = data_t.size - win_size
+ key_14 = data_t.keys()[win_first_idx]
+ long_win_size = min(data_t.size, table["long-trend-window"])
+ median_t = data_t.rolling(window=win_size, min_periods=2).median()
+ median_first_idx = median_t.size - long_win_size
+ try:
+ max_median = max(
+ [x for x in median_t.values[median_first_idx:-win_size]
+ if not isnan(x)])
+ except ValueError:
+ max_median = nan
+ try:
+ last_median_t = median_t[last_key]
+ except KeyError:
+ last_median_t = nan
+ try:
+ median_t_14 = median_t[key_14]
+ except KeyError:
+ median_t_14 = nan
- if isnan(last_median_t) or isnan(median_t_14) or median_t_14 == 0.0:
- rel_change_last = nan
- else:
- rel_change_last = round(
- ((last_median_t - median_t_14) / median_t_14) * 100, 2)
+ if isnan(last_median_t) or isnan(median_t_14) or median_t_14 == 0.0:
+ rel_change_last = nan
+ else:
+ rel_change_last = round(
+ ((last_median_t - median_t_14) / median_t_14) * 100, 2)
- if isnan(max_median) or isnan(last_median_t) or max_median == 0.0:
- rel_change_long = nan
- else:
- rel_change_long = round(
- ((last_median_t - max_median) / max_median) * 100, 2)
+ if isnan(max_median) or isnan(last_median_t) or max_median == 0.0:
+ rel_change_long = nan
+ else:
+ rel_change_long = round(
+ ((last_median_t - max_median) / max_median) * 100, 2)
+
+ # Classification list:
+ classification_lst = classify_anomalies(data_t, window=14)
+ if classification_lst:
tbl_lst.append(
[tbl_dict[tst_name]["name"],
'-' if isnan(last_median_t) else
@@ -976,7 +967,7 @@ def table_performance_trending_dashboard_html(table, input_data):
if "64b" in item:
anchor += "64b-"
elif "78b" in item:
- anchor += "78b"
+ anchor += "78b-"
elif "imix" in item:
anchor += "imix-"
elif "9000b" in item:
diff --git a/resources/tools/presentation/specification_CPTA.yaml b/resources/tools/presentation/specification_CPTA.yaml
index 5200a446a6..1937a53735 100644
--- a/resources/tools/presentation/specification_CPTA.yaml
+++ b/resources/tools/presentation/specification_CPTA.yaml
@@ -615,7 +615,7 @@
- title: "VPP 1T1C IPv6 78B Packet Throughput - Trending"
output-file-name: "ip6-1t1c-x520"
data: "plot-performance-trending"
- filter: "'NIC_Intel-X520-DA2' and 'MRR' and '78B' and ('BASE' or 'SCALE' or 'FEATURE') and '1T1C' and 'IP6FWD' and not 'IPSEC' and not 'VHOST'"
+ filter: "'NIC_Intel-X520-DA2' and 'MRR' and '78B' and ('BASE' or 'SCALE' or 'FEATURE') and '1T1C' and 'IP6FWD' and not 'IPSEC' and not 'VHOST' and not 'SRv6'"
parameters:
- "result"
layout: "plot-cpta"
@@ -623,7 +623,7 @@
- title: "VPP 2T2C IPv6 78B Packet Throughput - Trending"
output-file-name: "ip6-2t2c-x520"
data: "plot-performance-trending"
- filter: "'NIC_Intel-X520-DA2' and 'MRR' and '78B' and ('BASE' or 'SCALE' or 'FEATURE') and '2T2C' and 'IP6FWD' and not 'IPSEC' and not 'VHOST'"
+ filter: "'NIC_Intel-X520-DA2' and 'MRR' and '78B' and ('BASE' or 'SCALE' or 'FEATURE') and '2T2C' and 'IP6FWD' and not 'IPSEC' and not 'VHOST' and not 'SRv6'"
parameters:
- "result"
layout: "plot-cpta"
@@ -631,7 +631,7 @@
- title: "VPP 4T4C IPv6 78B Packet Throughput - Trending"
output-file-name: "ip6-4t4c-x520"
data: "plot-performance-trending"
- filter: "'NIC_Intel-X520-DA2' and 'MRR' and '78B' and ('BASE' or 'SCALE' or 'FEATURE') and '4T4C' and 'IP6FWD' and not 'IPSEC' and not 'VHOST'"
+ filter: "'NIC_Intel-X520-DA2' and 'MRR' and '78B' and ('BASE' or 'SCALE' or 'FEATURE') and '4T4C' and 'IP6FWD' and not 'IPSEC' and not 'VHOST' and not 'SRv6'"
parameters:
- "result"
layout: "plot-cpta"
diff --git a/resources/tools/presentation/utils.py b/resources/tools/presentation/utils.py
index f32019dc2e..0a9d985a88 100644
--- a/resources/tools/presentation/utils.py
+++ b/resources/tools/presentation/utils.py
@@ -274,6 +274,48 @@ def archive_input_data(spec):
logging.info(" Done.")
+def classify_anomalies(data, window):
+ """Evaluates if the sample value is an outlier, regression, normal or
+ progression compared to the previous data within the window.
+ We use the intervals defined as:
+ - regress: less than trimmed moving median - 3 * stdev
+ - normal: between trimmed moving median - 3 * stdev and median + 3 * stdev
+ - progress: more than trimmed moving median + 3 * stdev
+ where stdev is trimmed moving standard deviation.
+
+ :param data: Full data set with the outliers replaced by nan.
+ :param window: Window size used to calculate moving average and moving
+ stdev.
+ :type data: pandas.Series
+ :type window: int
+ :returns: Evaluated results.
+ :rtype: list
+ """
+
+ if data.size < 3:
+ return None
+
+ win_size = data.size if data.size < window else window
+ tmm = data.rolling(window=win_size, min_periods=2).median()
+ tmstd = data.rolling(window=win_size, min_periods=2).std()
+
+ classification = ["normal", ]
+ first = True
+ for build, value in data.iteritems():
+ if first:
+ first = False
+ continue
+ if np.isnan(value) or np.isnan(tmm[build]) or np.isnan(tmstd[build]):
+ classification.append("outlier")
+ elif value < (tmm[build] - 3 * tmstd[build]):
+ classification.append("regression")
+ elif value > (tmm[build] + 3 * tmstd[build]):
+ classification.append("progression")
+ else:
+ classification.append("normal")
+ return classification
+
+
class Worker(multiprocessing.Process):
"""Worker class used to process tasks in separate parallel processes.
"""