CSIT-913: Continuous Trending, Analysis and Change Detection

- CSIT-915: LLD - CSIT-917: Functions to evaluate the results according to the PASS / FAIL criteria - CSIT-918: Sphinx configuration - CSIT-948: Statistical functions - CSIT-949: Data models for trending plots - CSIT-950: Code trending plots - CSIT-951: Static content - CSIT-984: PAL Specification file - CSIT-996: Download data from nexus Change-Id: Icb9305945bb0f142135bb177cb8781ba0096280e Signed-off-by: Tibor Frank <tifrank@cisco.com>
author: Tibor Frank <tifrank@cisco.com> 2018-03-01 14:52:47 +0100
committer: Tibor Frank <tifrank@cisco.com> 2018-03-21 15:43:10 +0000
commit: efdcf6470f6e15dcc918c70e5a61d10e10653f1e (patch)
tree: b2d5a5a2163b56d12f300c06119e925377674187 /resources/tools/presentation/generator_CPTA.py
parent: 70068307d35abcd40abbcd9275bcb836d2cdbae6 (diff)
1 files changed, 429 insertions, 0 deletions
diff --git a/resources/tools/presentation/generator_CPTA.py b/resources/tools/presentation/generator_CPTA.py
new file mode 100644
index 0000000000..c1b14f1f55
--- /dev/null
+++ b/resources/tools/presentation/generator_CPTA.py
@@ -0,0 +1,429 @@
+# Copyright (c) 2018 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Generation of Continuous Performance Trending and Analysis.
+"""
+
+import datetime
+import logging
+import plotly.offline as ploff
+import plotly.graph_objs as plgo
+import numpy as np
+import pandas as pd
+
+from collections import OrderedDict
+from utils import find_outliers, archive_input_data, execute_command
+
+
+# Command to build the html format of the report
+HTML_BUILDER = 'sphinx-build -v -c conf_cpta -a ' \
+               '-b html -E ' \
+               '-t html ' \
+               '{working_dir} ' \
+               '{build_dir}/'
+
+# .css file for the html format of the report
+THEME_OVERRIDES = """/* override table width restrictions */
+.wy-nav-content {
+    max-width: 1200px !important;
+}
+"""
+
+COLORS = ["SkyBlue", "Olive", "Purple", "Coral", "Indigo", "Pink",
+          "Chocolate", "Brown", "Magenta", "Cyan", "Orange", "Black",
+          "Violet", "Blue", "Yellow"]
+
+
+def generate_cpta(spec, data):
+    """Generate all formats and versions of the Continuous Performance Trending
+    and Analysis.
+
+    :param spec: Specification read from the specification file.
+    :param data: Full data set.
+    :type spec: Specification
+    :type data: InputData
+    """
+
+    logging.info("Generating the Continuous Performance Trending and Analysis "
+                 "...")
+
+    ret_code = _generate_all_charts(spec, data)
+
+    cmd = HTML_BUILDER.format(
+        date=datetime.date.today().strftime('%d-%b-%Y'),
+        working_dir=spec.environment["paths"]["DIR[WORKING,SRC]"],
+        build_dir=spec.environment["paths"]["DIR[BUILD,HTML]"])
+    execute_command(cmd)
+
+    with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE]"], "w") as \
+            css_file:
+        css_file.write(THEME_OVERRIDES)
+
+    with open(spec.environment["paths"]["DIR[CSS_PATCH_FILE2]"], "w") as \
+            css_file:
+        css_file.write(THEME_OVERRIDES)
+
+    archive_input_data(spec)
+
+    logging.info("Done.")
+
+    return ret_code
+
+
+def _select_data(in_data, period, fill_missing=False, use_first=False):
+    """Select the data from the full data set. The selection is done by picking
+    the samples depending on the period: period = 1: All, period = 2: every
+    second sample, period = 3: every third sample ...
+
+    :param in_data: Full set of data.
+    :param period: Sampling period.
+    :param fill_missing: If the chosen sample is missing in the full set, its
+    nearest neighbour is used.
+    :param use_first: Use the first sample even though it is not chosen.
+    :type in_data: OrderedDict
+    :type period: int
+    :type fill_missing: bool
+    :type use_first: bool
+    :returns: Reduced data.
+    :rtype: OrderedDict
+    """
+
+    first_idx = min(in_data.keys())
+    last_idx = max(in_data.keys())
+
+    idx = last_idx
+    data_dict = dict()
+    if use_first:
+        data_dict[first_idx] = in_data[first_idx]
+    while idx >= first_idx:
+        data = in_data.get(idx, None)
+        if data is None:
+            if fill_missing:
+                threshold = int(round(idx - period / 2)) + 1 - period % 2
+                idx_low = first_idx if threshold < first_idx else threshold
+                threshold = int(round(idx + period / 2))
+                idx_high = last_idx if threshold > last_idx else threshold
+
+                flag_l = True
+                flag_h = True
+                idx_lst = list()
+                inc = 1
+                while flag_l or flag_h:
+                    if idx + inc > idx_high:
+                        flag_h = False
+                    else:
+                        idx_lst.append(idx + inc)
+                    if idx - inc < idx_low:
+                        flag_l = False
+                    else:
+                        idx_lst.append(idx - inc)
+                    inc += 1
+
+                for i in idx_lst:
+                    if i in in_data.keys():
+                        data_dict[i] = in_data[i]
+                        break
+        else:
+            data_dict[idx] = data
+        idx -= period
+
+    return OrderedDict(sorted(data_dict.items(), key=lambda t: t[0]))
+
+
+def _evaluate_results(in_data, trimmed_data, window=10):
+    """Evaluates if the sample value is regress, normal or progress compared to
+    previous data within the window.
+    We use the intervals defined as:
+    - regress: less than median - 3 * stdev
+    - normal: between median - 3 * stdev and median + 3 * stdev
+    - progress: more than median + 3 * stdev
+
+    :param in_data: Full data set.
+    :param trimmed_data: Full data set without the outliers.
+    :param window: Window size used to calculate moving median and moving stdev.
+    :type in_data: pandas.Series
+    :type trimmed_data: pandas.Series
+    :type window: int
+    :returns: Evaluated results.
+    :rtype: list
+    """
+
+    if len(in_data) > 2:
+        win_size = in_data.size if in_data.size < window else window
+        results = [0.0, ] * win_size
+        median = in_data.rolling(window=win_size).median()
+        stdev_t = trimmed_data.rolling(window=win_size, min_periods=2).std()
+        m_vals = median.values
+        s_vals = stdev_t.values
+        d_vals = in_data.values
+        for day in range(win_size, in_data.size):
+            if np.isnan(m_vals[day - 1]) or np.isnan(s_vals[day - 1]):
+                results.append(0.0)
+            elif d_vals[day] < (m_vals[day - 1] - 3 * s_vals[day - 1]):
+                results.append(0.33)
+            elif (m_vals[day - 1] - 3 * s_vals[day - 1]) <= d_vals[day] <= \
+                    (m_vals[day - 1] + 3 * s_vals[day - 1]):
+                results.append(0.66)
+            else:
+                results.append(1.0)
+    else:
+        results = [0.0, ]
+        try:
+            median = np.median(in_data)
+            stdev = np.std(in_data)
+            if in_data.values[-1] < (median - 3 * stdev):
+                results.append(0.33)
+            elif (median - 3 * stdev) <= in_data.values[-1] <= (
+                    median + 3 * stdev):
+                results.append(0.66)
+            else:
+                results.append(1.0)
+        except TypeError:
+            results.append(None)
+    return results
+
+
+def _generate_trending_traces(in_data, period, moving_win_size=10,
+                              fill_missing=True, use_first=False,
+                              show_moving_median=True, name="", color=""):
+    """Generate the trending traces:
+     - samples,
+     - moving median (trending plot)
+     - outliers, regress, progress
+
+    :param in_data: Full data set.
+    :param period: Sampling period.
+    :param moving_win_size: Window size.
+    :param fill_missing: If the chosen sample is missing in the full set, its
+    nearest neighbour is used.
+    :param use_first: Use the first sample even though it is not chosen.
+    :param show_moving_median: Show moving median (trending plot).
+    :param name: Name of the plot
+    :param color: Name of the color for the plot.
+    :type in_data: OrderedDict
+    :type period: int
+    :type moving_win_size: int
+    :type fill_missing: bool
+    :type use_first: bool
+    :type show_moving_median: bool
+    :type name: str
+    :type color: str
+    :returns: Generated traces (list) and the evaluated result (float).
+    :rtype: tuple(traces, result)
+    """
+
+    if period > 1:
+        in_data = _select_data(in_data, period,
+                               fill_missing=fill_missing,
+                               use_first=use_first)
+
+    data_x = [key for key in in_data.keys()]
+    data_y = [val for val in in_data.values()]
+    data_pd = pd.Series(data_y, index=data_x)
+
+    t_data, outliers = find_outliers(data_pd)
+
+    results = _evaluate_results(data_pd, t_data, window=moving_win_size)
+
+    anomalies = pd.Series()
+    anomalies_res = list()
+    for idx, item in enumerate(in_data.items()):
+        item_pd = pd.Series([item[1], ], index=[item[0], ])
+        if item[0] in outliers.keys():
+            anomalies = anomalies.append(item_pd)
+            anomalies_res.append(0.0)
+        elif results[idx] in (0.33, 1.0):
+            anomalies = anomalies.append(item_pd)
+            anomalies_res.append(results[idx])
+    anomalies_res.extend([0.0, 0.33, 0.66, 1.0])
+
+    # Create traces
+    color_scale = [[0.00, "grey"],
+                   [0.25, "grey"],
+                   [0.25, "red"],
+                   [0.50, "red"],
+                   [0.50, "white"],
+                   [0.75, "white"],
+                   [0.75, "green"],
+                   [1.00, "green"]]
+
+    trace_samples = plgo.Scatter(
+        x=data_x,
+        y=data_y,
+        mode='markers',
+        line={
+            "width": 1
+        },
+        name="{name}-thput".format(name=name),
+        marker={
+            "size": 5,
+            "color": color,
+            "symbol": "circle",
+        },
+    )
+    traces = [trace_samples, ]
+
+    trace_anomalies = plgo.Scatter(
+        x=anomalies.keys(),
+        y=anomalies.values,
+        mode='markers',
+        hoverinfo="none",
+        showlegend=False,
+        legendgroup=name,
+        name="{name}: outliers".format(name=name),
+        marker={
+            "size": 15,
+            "symbol": "circle-open",
+            "color": anomalies_res,
+            "colorscale": color_scale,
+            "showscale": True,
+
+            "colorbar": {
+                "y": 0.5,
+                "len": 0.8,
+                "title": "Results Clasification",
+                "titleside": 'right',
+                "titlefont": {
+                    "size": 14
+                },
+                "tickmode": 'array',
+                "tickvals": [0.125, 0.375, 0.625, 0.875],
+                "ticktext": ["Outlier", "Regress", "Normal", "Progress"],
+                "ticks": 'outside',
+                "ticklen": 0,
+                "tickangle": -90,
+                "thickness": 10
+            }
+        }
+    )
+    traces.append(trace_anomalies)
+
+    if show_moving_median:
+        data_mean_y = pd.Series(data_y).rolling(
+            window=moving_win_size).median()
+        trace_median = plgo.Scatter(
+            x=data_x,
+            y=data_mean_y,
+            mode='lines',
+            line={
+                "shape": "spline",
+                "width": 1,
+                "color": color,
+            },
+            name='{name}-trend'.format(name=name, size=moving_win_size)
+        )
+        traces.append(trace_median)
+
+    return traces, results[-1]
+
+
+def _generate_chart(traces, layout, file_name):
+    """Generates the whole chart using pre-generated traces.
+
+    :param traces: Traces for the chart.
+    :param layout: Layout of the chart.
+    :param file_name: File name for the generated chart.
+    :type traces: list
+    :type layout: dict
+    :type file_name: str
+    """
+
+    # Create plot
+    logging.info("    Writing the file '{0}' ...".format(file_name))
+    plpl = plgo.Figure(data=traces, layout=layout)
+    ploff.plot(plpl, show_link=False, auto_open=False, filename=file_name)
+
+
+def _generate_all_charts(spec, input_data):
+    """Generate all charts specified in the specification file.
+
+    :param spec: Specification.
+    :param input_data: Full data set.
+    :type spec: Specification
+    :type input_data: InputData
+    """
+
+    results = list()
+    for chart in spec.cpta["plots"]:
+        logging.info("  Generating the chart '{0}' ...".
+                     format(chart.get("title", "")))
+
+        # Transform the data
+        data = input_data.filter_data(chart, continue_on_error=True)
+        if data is None:
+            logging.error("No data.")
+            return
+
+        chart_data = dict()
+        for job in data:
+            for idx, build in job.items():
+                for test in build:
+                    if chart_data.get(test["name"], None) is None:
+                        chart_data[test["name"]] = OrderedDict()
+                    try:
+                        chart_data[test["name"]][int(idx)] = \
+                            test["result"]["throughput"]
+                    except (KeyError, TypeError):
+                        chart_data[test["name"]][int(idx)] = None
+
+        for period in chart["periods"]:
+            # Generate traces:
+            traces = list()
+            win_size = 10 if period == 1 else 5 if period < 20 else 3
+            idx = 0
+            for test_name, test_data in chart_data.items():
+                if not test_data:
+                    logging.warning("No data for the test '{0}'".
+                                    format(test_name))
+                    continue
+                trace, result = _generate_trending_traces(
+                    test_data,
+                    period=period,
+                    moving_win_size=win_size,
+                    fill_missing=True,
+                    use_first=False,
+                    name='-'.join(test_name.split('-')[3:-1]),
+                    color=COLORS[idx])
+                traces.extend(trace)
+                results.append(result)
+                idx += 1
+
+            # Generate the chart:
+            period_name = "Daily" if period == 1 else \
+                "Weekly" if period < 20 else "Monthly"
+            chart["layout"]["title"] = chart["title"].format(period=period_name)
+            _generate_chart(traces,
+                            chart["layout"],
+                            file_name="{0}-{1}-{2}{3}".format(
+                                spec.cpta["output-file"],
+                                chart["output-file-name"],
+                                period,
+                                spec.cpta["output-file-type"]))
+
+        logging.info("  Done.")
+
+    result = "PASS"
+    for item in results:
+        if item is None:
+            result = "FAIL"
+            break
+        if item == 0.66 and result == "PASS":
+            result = "PASS"
+        elif item == 0.33 or item == 0.0:
+            result = "FAIL"
+    print(results)
+    print(result)
+    if result == "FAIL":
+        return 1
+    else:
+        return 0
author	Tibor Frank <tifrank@cisco.com>	2018-03-01 14:52:47 +0100
committer	Tibor Frank <tifrank@cisco.com>	2018-03-21 15:43:10 +0000
commit	efdcf6470f6e15dcc918c70e5a61d10e10653f1e (patch)
tree	b2d5a5a2163b56d12f300c06119e925377674187 /resources/tools/presentation/generator_CPTA.py
parent	70068307d35abcd40abbcd9275bcb836d2cdbae6 (diff)