From c31372861134f29ae6eec8d98874e030e57ab5f1 Mon Sep 17 00:00:00 2001 From: Tibor Frank Date: Fri, 27 Jan 2023 08:26:25 +0100 Subject: C-Dash: Pre-load the data from parquets Signed-off-by: Tibor Frank Change-Id: I20792792469c10d1db2e891b76879ec8ced1b7d3 --- csit.infra.dash/app/cdash/__init__.py | 29 ++- csit.infra.dash/app/cdash/data/data.py | 252 ++++++++----------------- csit.infra.dash/app/cdash/data/data.yaml | 172 +++++++++++++---- csit.infra.dash/app/cdash/debug.py | 48 ----- csit.infra.dash/app/cdash/news/layout.py | 57 ++---- csit.infra.dash/app/cdash/news/news.py | 17 +- csit.infra.dash/app/cdash/report/layout.py | 34 +--- csit.infra.dash/app/cdash/report/report.py | 11 +- csit.infra.dash/app/cdash/stats/layout.py | 45 ++--- csit.infra.dash/app/cdash/stats/stats.py | 17 +- csit.infra.dash/app/cdash/trending/layout.py | 39 +--- csit.infra.dash/app/cdash/trending/trending.py | 11 +- csit.infra.dash/app/cdash/utils/constants.py | 7 +- 13 files changed, 332 insertions(+), 407 deletions(-) delete mode 100644 csit.infra.dash/app/cdash/debug.py diff --git a/csit.infra.dash/app/cdash/__init__.py b/csit.infra.dash/app/cdash/__init__.py index c21e2c28d5..77722c78bd 100644 --- a/csit.infra.dash/app/cdash/__init__.py +++ b/csit.infra.dash/app/cdash/__init__.py @@ -15,11 +15,13 @@ """ import logging +import pandas as pd from flask import Flask from flask_assets import Environment, Bundle from .utils.constants import Constants as C +from .data.data import Data def init_app(): @@ -55,24 +57,41 @@ def init_app(): assets.register("sass_all", sass_bundle) sass_bundle.build() - # Set the time period for Trending if C.TIME_PERIOD is None or C.TIME_PERIOD > C.MAX_TIME_PERIOD: time_period = C.MAX_TIME_PERIOD else: time_period = C.TIME_PERIOD + data = Data( + data_spec_file=C.DATA_SPEC_FILE, + ).read_all_data(days=time_period) + # Import Dash applications. from .news.news import init_news - app = init_news(app) + app = init_news( + app, + data_stats=data["statistics"], + data_trending=data["trending"] + ) from .stats.stats import init_stats - app = init_stats(app, time_period=time_period) + app = init_stats( + app, + data_stats=data["statistics"], + data_trending=data["trending"] + ) from .trending.trending import init_trending - app = init_trending(app, time_period=time_period) + app = init_trending( + app, + data_trending=data["trending"] + ) from .report.report import init_report - app = init_report(app, releases=C.RELEASES) + app = init_report( + app, + data_iterative=data["iterative"] + ) return app diff --git a/csit.infra.dash/app/cdash/data/data.py b/csit.infra.dash/app/cdash/data/data.py index 7ddb44311a..8537cd8db1 100644 --- a/csit.infra.dash/app/cdash/data/data.py +++ b/csit.infra.dash/app/cdash/data/data.py @@ -15,13 +15,14 @@ """ import logging +import resource import awswrangler as wr +import pandas as pd from yaml import load, FullLoader, YAMLError from datetime import datetime, timedelta from time import time from pytz import UTC -from pandas import DataFrame from awswrangler.exceptions import EmptyDataFrame, NoFilesFound @@ -30,27 +31,24 @@ class Data: applications. """ - def __init__(self, data_spec_file: str, debug: bool=False) -> None: + def __init__(self, data_spec_file: str) -> None: """Initialize the Data object. :param data_spec_file: Path to file specifying the data to be read from parquets. - :param debug: If True, the debuf information is printed to stdout. :type data_spec_file: str - :type debug: bool :raises RuntimeError: if it is not possible to open data_spec_file or it is not a valid yaml file. """ # Inputs: self._data_spec_file = data_spec_file - self._debug = debug # Specification of data to be read from parquets: - self._data_spec = None + self._data_spec = list() # Data frame to keep the data: - self._data = None + self._data = pd.DataFrame() # Read from files: try: @@ -71,48 +69,6 @@ class Data: def data(self): return self._data - def _get_columns(self, parquet: str) -> list: - """Get the list of columns from the data specification file to be read - from parquets. - - :param parquet: The parquet's name. - :type parquet: str - :raises RuntimeError: if the parquet is not defined in the data - specification file or it does not have any columns specified. - :returns: List of columns. - :rtype: list - """ - - try: - return self._data_spec[parquet]["columns"] - except KeyError as err: - raise RuntimeError( - f"The parquet {parquet} is not defined in the specification " - f"file {self._data_spec_file} or it does not have any columns " - f"specified.\n{err}" - ) - - def _get_path(self, parquet: str) -> str: - """Get the path from the data specification file to be read from - parquets. - - :param parquet: The parquet's name. - :type parquet: str - :raises RuntimeError: if the parquet is not defined in the data - specification file or it does not have the path specified. - :returns: Path. - :rtype: str - """ - - try: - return self._data_spec[parquet]["path"] - except KeyError as err: - raise RuntimeError( - f"The parquet {parquet} is not defined in the specification " - f"file {self._data_spec_file} or it does not have the path " - f"specified.\n{err}" - ) - def _get_list_of_files(self, path, last_modified_begin=None, @@ -147,8 +103,7 @@ class Data: last_modified_begin=last_modified_begin, last_modified_end=last_modified_end ) - if self._debug: - logging.info("\n".join(file_list)) + logging.debug("\n".join(file_list)) except NoFilesFound as err: logging.error(f"No parquets found.\n{err}") except EmptyDataFrame as err: @@ -156,13 +111,16 @@ class Data: return file_list - def _create_dataframe_from_parquet(self, - path, partition_filter=None, - columns=None, - validate_schema=False, - last_modified_begin=None, - last_modified_end=None, - days=None) -> DataFrame: + def _create_dataframe_from_parquet( + self, + path, partition_filter=None, + columns=None, + categories=list(), + validate_schema=False, + last_modified_begin=None, + last_modified_end=None, + days=None + ) -> pd.DataFrame: """Read parquet stored in S3 compatible storage and returns Pandas Dataframe. @@ -176,6 +134,8 @@ class Data: extracted from S3. This function MUST return a bool, True to read the partition or False to ignore it. Ignored if dataset=False. :param columns: Names of columns to read from the file(s). + :param categories: List of columns names that should be returned as + pandas.Categorical. :param validate_schema: Check that individual file schemas are all the same / compatible. Schemas within a folder prefix should all be the same. Disable if you have schemas that are different and want to @@ -189,6 +149,7 @@ class Data: :type path: Union[str, List[str]] :type partition_filter: Callable[[Dict[str, str]], bool], optional :type columns: List[str], optional + :type categories: List[str], optional :type validate_schema: bool, optional :type last_modified_begin: datetime, optional :type last_modified_end: datetime, optional @@ -209,142 +170,89 @@ class Data: use_threads=True, dataset=True, columns=columns, + # categories=categories, partition_filter=partition_filter, last_modified_begin=last_modified_begin, last_modified_end=last_modified_end ) - if self._debug: - df.info(verbose=True, memory_usage='deep') - logging.info( - f"\nCreation of dataframe {path} took: {time() - start}\n" - ) + df.info(verbose=True, memory_usage="deep") + logging.debug( + f"\nCreation of dataframe {path} took: {time() - start}\n" + ) except NoFilesFound as err: logging.error(f"No parquets found.\n{err}") except EmptyDataFrame as err: logging.error(f"No data.\n{err}") - self._data = df return df - def check_datasets(self, days: int=None): - """Read structure from parquet. + def read_all_data(self, days: int=None) -> dict: + """Read all data necessary for all applications. - :param days: Number of days back to the past for which the data will be - read. + :param days: Number of days to filter. If None, all data will be + downloaded. :type days: int + :returns: A dictionary where keys are names of parquets and values are + the pandas dataframes with fetched data. + :rtype: dict(str: pandas.DataFrame) """ - self._get_list_of_files(path=self._get_path("trending"), days=days) - self._get_list_of_files(path=self._get_path("statistics"), days=days) - - def read_stats(self, days: int=None) -> tuple: - """Read statistics from parquet. - It reads from: - - Suite Result Analysis (SRA) partition, - - NDRPDR trending partition, - - MRR trending partition. + self._data = dict() + self._data["trending"] = pd.DataFrame() + self._data["iterative"] = pd.DataFrame() + lst_trending = list() + lst_iterative = list() - :param days: Number of days back to the past for which the data will be - read. - :type days: int - :returns: tuple of pandas DataFrame-s with data read from specified - parquets. - :rtype: tuple of pandas DataFrame-s - """ - - l_stats = lambda part: True if part["stats_type"] == "sra" else False - l_mrr = lambda part: True if part["test_type"] == "mrr" else False - l_ndrpdr = lambda part: True if part["test_type"] == "ndrpdr" else False - - return ( - self._create_dataframe_from_parquet( - path=self._get_path("statistics"), - partition_filter=l_stats, - columns=self._get_columns("statistics"), - days=days - ), - self._create_dataframe_from_parquet( - path=self._get_path("statistics-trending-mrr"), - partition_filter=l_mrr, - columns=self._get_columns("statistics-trending-mrr"), - days=days - ), - self._create_dataframe_from_parquet( - path=self._get_path("statistics-trending-ndrpdr"), - partition_filter=l_ndrpdr, - columns=self._get_columns("statistics-trending-ndrpdr"), - days=days + for data_set in self._data_spec: + logging.info( + f"Reading data for {data_set['data_type']} " + f"{data_set['partition_name']} {data_set.get('release', '')}" ) - ) - - def read_trending_mrr(self, days: int=None) -> DataFrame: - """Read MRR data partition from parquet. + partition_filter = lambda part: True \ + if part[data_set["partition"]] == data_set["partition_name"] \ + else False - :param days: Number of days back to the past for which the data will be - read. - :type days: int - :returns: Pandas DataFrame with read data. - :rtype: DataFrame - """ - - lambda_f = lambda part: True if part["test_type"] == "mrr" else False - - return self._create_dataframe_from_parquet( - path=self._get_path("trending-mrr"), - partition_filter=lambda_f, - columns=self._get_columns("trending-mrr"), - days=days - ) - - def read_trending_ndrpdr(self, days: int=None) -> DataFrame: - """Read NDRPDR data partition from iterative parquet. - - :param days: Number of days back to the past for which the data will be - read. - :type days: int - :returns: Pandas DataFrame with read data. - :rtype: DataFrame - """ + data = self._create_dataframe_from_parquet( + path=data_set["path"], + partition_filter=partition_filter, + columns=data_set.get("columns", list()), + categories=data_set.get("categories", list()), + days=None if data_set["data_type"] == "iterative" else days + ) - lambda_f = lambda part: True if part["test_type"] == "ndrpdr" else False + if data_set["data_type"] == "statistics": + self._data["statistics"] = data + elif data_set["data_type"] == "trending": + lst_trending.append(data) + elif data_set["data_type"] == "iterative": + data["release"] = data_set["release"] + data["release"] = data["release"].astype("category") + lst_iterative.append(data) + else: + raise NotImplementedError( + f"The data type {data_set['data_type']} is not implemented." + ) - return self._create_dataframe_from_parquet( - path=self._get_path("trending-ndrpdr"), - partition_filter=lambda_f, - columns=self._get_columns("trending-ndrpdr"), - days=days + self._data["iterative"] = pd.concat( + lst_iterative, + ignore_index=True, + copy=False ) - - def read_iterative_mrr(self, release: str) -> DataFrame: - """Read MRR data partition from iterative parquet. - - :param release: The CSIT release from which the data will be read. - :type release: str - :returns: Pandas DataFrame with read data. - :rtype: DataFrame - """ - - lambda_f = lambda part: True if part["test_type"] == "mrr" else False - - return self._create_dataframe_from_parquet( - path=self._get_path("iterative-mrr").format(release=release), - partition_filter=lambda_f, - columns=self._get_columns("iterative-mrr") + self._data["trending"] = pd.concat( + lst_trending, + ignore_index=True, + copy=False ) - def read_iterative_ndrpdr(self, release: str) -> DataFrame: - """Read NDRPDR data partition from parquet. - - :param release: The CSIT release from which the data will be read. - :type release: str - :returns: Pandas DataFrame with read data. - :rtype: DataFrame - """ + for key in self._data.keys(): + logging.info( + f"\nData frame {key}:" + f"\n{self._data[key].memory_usage(deep=True)}\n" + ) + self._data[key].info(verbose=True, memory_usage="deep") - lambda_f = lambda part: True if part["test_type"] == "ndrpdr" else False + mem_alloc = \ + resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000 + logging.info(f"Memory allocation: {mem_alloc:.0f}MB") - return self._create_dataframe_from_parquet( - path=self._get_path("iterative-ndrpdr").format(release=release), - partition_filter=lambda_f, - columns=self._get_columns("iterative-ndrpdr") - ) + return self._data diff --git a/csit.infra.dash/app/cdash/data/data.yaml b/csit.infra.dash/app/cdash/data/data.yaml index ec7f7ef1dd..846be6b628 100644 --- a/csit.infra.dash/app/cdash/data/data.yaml +++ b/csit.infra.dash/app/cdash/data/data.yaml @@ -1,11 +1,42 @@ -statistics: +- data_type: statistics + partition: stats_type + partition_name: sra path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/stats columns: - job - build - start_time - duration -statistics-trending-ndrpdr: + categories: + - job + - build +- data_type: trending + partition: test_type + partition_name: mrr + path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/trending + columns: + - job + - build + - dut_type + - dut_version + - hosts + - start_time + - passed + - test_id + - version + - result_receive_rate_rate_avg + - result_receive_rate_rate_stdev + - result_receive_rate_rate_unit + - telemetry + categories: + - job + - build + - dut_type + - dut_version + - version +- data_type: trending + partition: test_type + partition_name: ndrpdr path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/trending columns: - job @@ -16,10 +47,33 @@ statistics-trending-ndrpdr: - start_time - passed - test_id + - version + - result_pdr_lower_rate_unit - result_pdr_lower_rate_value + - result_ndr_lower_rate_unit - result_ndr_lower_rate_value -statistics-trending-mrr: - path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/trending + - result_latency_reverse_pdr_90_hdrh + - result_latency_reverse_pdr_50_hdrh + - result_latency_reverse_pdr_10_hdrh + - result_latency_reverse_pdr_0_hdrh + - result_latency_forward_pdr_90_hdrh + - result_latency_forward_pdr_50_avg + - result_latency_forward_pdr_50_hdrh + - result_latency_forward_pdr_50_unit + - result_latency_forward_pdr_10_hdrh + - result_latency_forward_pdr_0_hdrh + - telemetry + categories: + - job + - build + - dut_type + - dut_version + - version +- data_type: iterative + partition: test_type + partition_name: mrr + release: rls2206 + path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2206 columns: - job - build @@ -29,15 +83,47 @@ statistics-trending-mrr: - start_time - passed - test_id + - version - result_receive_rate_rate_avg -trending: - path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/trending + - result_receive_rate_rate_stdev + - result_receive_rate_rate_unit + - result_receive_rate_rate_values + categories: + - job + - build + - dut_type + - dut_version + - version +- data_type: iterative + partition: test_type + partition_name: mrr + release: rls2210 + path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2210 columns: - job - build + - dut_type + - dut_version + - hosts - start_time -trending-mrr: - path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/trending + - passed + - test_id + - version + - result_receive_rate_rate_avg + - result_receive_rate_rate_stdev + - result_receive_rate_rate_unit + - result_receive_rate_rate_values + categories: + - job + - build + - dut_type + - dut_version + - version +- data_type: iterative + partition: test_type + partition_name: mrr + release: rls2302 + path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2302 columns: - job - build @@ -51,9 +137,18 @@ trending-mrr: - result_receive_rate_rate_avg - result_receive_rate_rate_stdev - result_receive_rate_rate_unit - - telemetry -trending-ndrpdr: - path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/trending + - result_receive_rate_rate_values + categories: + - job + - build + - dut_type + - dut_version + - version +- data_type: iterative + partition: test_type + partition_name: ndrpdr + release: rls2206 + path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2206 columns: - job - build @@ -68,19 +163,19 @@ trending-ndrpdr: - result_pdr_lower_rate_value - result_ndr_lower_rate_unit - result_ndr_lower_rate_value - - result_latency_reverse_pdr_90_hdrh - - result_latency_reverse_pdr_50_hdrh - - result_latency_reverse_pdr_10_hdrh - - result_latency_reverse_pdr_0_hdrh - - result_latency_forward_pdr_90_hdrh - result_latency_forward_pdr_50_avg - - result_latency_forward_pdr_50_hdrh - result_latency_forward_pdr_50_unit - - result_latency_forward_pdr_10_hdrh - - result_latency_forward_pdr_0_hdrh - - telemetry -iterative-mrr: - path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_{release} + categories: + - job + - build + - dut_type + - dut_version + - version +- data_type: iterative + partition: test_type + partition_name: ndrpdr + release: rls2210 + path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2210 columns: - job - build @@ -91,12 +186,23 @@ iterative-mrr: - passed - test_id - version - - result_receive_rate_rate_avg - - result_receive_rate_rate_stdev - - result_receive_rate_rate_unit - - result_receive_rate_rate_values -iterative-ndrpdr: - path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_{release} + - result_pdr_lower_rate_unit + - result_pdr_lower_rate_value + - result_ndr_lower_rate_unit + - result_ndr_lower_rate_value + - result_latency_forward_pdr_50_avg + - result_latency_forward_pdr_50_unit + categories: + - job + - build + - dut_type + - dut_version + - version +- data_type: iterative + partition: test_type + partition_name: ndrpdr + release: rls2302 + path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2302 columns: - job - build @@ -113,7 +219,9 @@ iterative-ndrpdr: - result_ndr_lower_rate_value - result_latency_forward_pdr_50_avg - result_latency_forward_pdr_50_unit -# coverage-ndrpdr: -# path: str -# columns: -# - list + categories: + - job + - build + - dut_type + - dut_version + - version diff --git a/csit.infra.dash/app/cdash/debug.py b/csit.infra.dash/app/cdash/debug.py deleted file mode 100644 index 4a354470b2..0000000000 --- a/csit.infra.dash/app/cdash/debug.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (c) 2023 Cisco and/or its affiliates. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Debug class. Only for internal debugging puproses. -""" - -import logging - -from data.data import Data -from utils.constants import Constants as C - - -logging.basicConfig( - format=u"%(asctime)s: %(levelname)s: %(message)s", - datefmt=u"%Y/%m/%d %H:%M:%S", - level=logging.INFO -) - -# Set the time period for data fetch -if C.TIME_PERIOD is None or C.TIME_PERIOD > C.MAX_TIME_PERIOD: - time_period = C.MAX_TIME_PERIOD -else: - time_period = C.TIME_PERIOD - -#data_mrr = Data( -# data_spec_file=C.DATA_SPEC_FILE, -# debug=True -#).read_trending_mrr(days=time_period) -# -#data_ndrpdr = Data( -# data_spec_file=C.DATA_SPEC_FILE, -# debug=True -#).read_trending_ndrpdr(days=time_period) - -data_list = Data( - data_spec_file=C.DATA_SPEC_FILE, - debug=True -).check_datasets(days=time_period) \ No newline at end of file diff --git a/csit.infra.dash/app/cdash/news/layout.py b/csit.infra.dash/app/cdash/news/layout.py index 11151d727a..da36b1430c 100644 --- a/csit.infra.dash/app/cdash/news/layout.py +++ b/csit.infra.dash/app/cdash/news/layout.py @@ -14,7 +14,6 @@ """Plotly Dash HTML layout override. """ -import logging import pandas as pd import dash_bootstrap_components as dbc @@ -23,13 +22,10 @@ from dash import dcc from dash import html from dash import callback_context from dash import Input, Output, State -from yaml import load, FullLoader, YAMLError -from ..data.data import Data from ..utils.constants import Constants as C -from ..utils.utils import classify_anomalies, show_tooltip, gen_new_url +from ..utils.utils import classify_anomalies, gen_new_url from ..utils.url_processing import url_decode -from ..data.data import Data from .tables import table_summary @@ -37,8 +33,13 @@ class Layout: """The layout of the dash app and the callbacks. """ - def __init__(self, app: Flask, html_layout_file: str, data_spec_file: str, - tooltip_file: str) -> None: + def __init__( + self, + app: Flask, + data_stats: pd.DataFrame, + data_trending: pd.DataFrame, + html_layout_file: str + ) -> None: """Initialization: - save the input parameters, - read and pre-process the data, @@ -47,38 +48,22 @@ class Layout: - read tooltips from the tooltip file. :param app: Flask application running the dash application. + :param data_stats: Pandas dataframe with staistical data. + :param data_trending: Pandas dataframe with trending data. :param html_layout_file: Path and name of the file specifying the HTML layout of the dash application. - :param data_spec_file: Path and name of the file specifying the data to - be read from parquets for this application. - :param tooltip_file: Path and name of the yaml file specifying the - tooltips. :type app: Flask + :type data_stats: pandas.DataFrame + :type data_trending: pandas.DataFrame :type html_layout_file: str - :type data_spec_file: str - :type tooltip_file: str """ # Inputs self._app = app self._html_layout_file = html_layout_file - self._data_spec_file = data_spec_file - self._tooltip_file = tooltip_file - - # Read the data: - data_stats, data_mrr, data_ndrpdr = Data( - data_spec_file=self._data_spec_file, - debug=True - ).read_stats(days=C.NEWS_TIME_PERIOD) - - df_tst_info = pd.concat( - [data_mrr, data_ndrpdr], - ignore_index=True, - copy=False - ) # Prepare information for the control panel: - self._jobs = sorted(list(df_tst_info["job"].unique())) + self._jobs = sorted(list(data_trending["job"].unique())) d_job_info = { "job": list(), "dut": list(), @@ -119,7 +104,7 @@ class Layout: } for job in self._jobs: # Create lists of failed tests: - df_job = df_tst_info.loc[(df_tst_info["job"] == job)] + df_job = data_trending.loc[(data_trending["job"] == job)] last_build = str(max(pd.to_numeric(df_job["build"].unique()))) df_build = df_job.loc[(df_job["build"] == last_build)] tst_info["job"].append(job) @@ -230,7 +215,6 @@ class Layout: # Read from files: self._html_layout = str() - self._tooltips = dict() try: with open(self._html_layout_file, "r") as file_read: @@ -240,19 +224,6 @@ class Layout: f"Not possible to open the file {self._html_layout_file}\n{err}" ) - try: - with open(self._tooltip_file, "r") as file_read: - self._tooltips = load(file_read, Loader=FullLoader) - except IOError as err: - logging.warning( - f"Not possible to open the file {self._tooltip_file}\n{err}" - ) - except YAMLError as err: - logging.warning( - f"An error occurred while parsing the specification file " - f"{self._tooltip_file}\n{err}" - ) - self._default_period = C.NEWS_SHORT self._default_active = (False, True, False) diff --git a/csit.infra.dash/app/cdash/news/news.py b/csit.infra.dash/app/cdash/news/news.py index eda70d385f..b5cc5483a8 100644 --- a/csit.infra.dash/app/cdash/news/news.py +++ b/csit.infra.dash/app/cdash/news/news.py @@ -14,16 +14,25 @@ """Instantiate the News Dash application. """ import dash +import pandas as pd from ..utils.constants import Constants as C from .layout import Layout -def init_news(server): +def init_news( + server, + data_stats: pd.DataFrame, + data_trending: pd.DataFrame + ) -> dash.Dash: """Create a Plotly Dash dashboard. :param server: Flask server. :type server: Flask + :param data_stats: Pandas dataframe with staistical data. + :param data_trending: Pandas dataframe with trending data. + :type data_stats: pandas.DataFrame + :type data_trending: pandas.DataFrame :returns: Dash app server. :rtype: Dash """ @@ -37,9 +46,9 @@ def init_news(server): layout = Layout( app=dash_app, - html_layout_file=C.HTML_LAYOUT_FILE, - data_spec_file=C.DATA_SPEC_FILE, - tooltip_file=C.TOOLTIP_FILE, + data_stats=data_stats, + data_trending=data_trending, + html_layout_file=C.HTML_LAYOUT_FILE ) dash_app.index_string = layout.html_layout dash_app.layout = layout.add_content() diff --git a/csit.infra.dash/app/cdash/report/layout.py b/csit.infra.dash/app/cdash/report/layout.py index 50cf092ae1..495de36dec 100644 --- a/csit.infra.dash/app/cdash/report/layout.py +++ b/csit.infra.dash/app/cdash/report/layout.py @@ -33,7 +33,6 @@ from ..utils.trigger import Trigger from ..utils.utils import show_tooltip, label, sync_checklists, gen_new_url, \ generate_options, get_list_group_items from ..utils.url_processing import url_decode -from ..data.data import Data from .graphs import graph_iterative, select_iterative_data @@ -76,8 +75,14 @@ class Layout: """The layout of the dash app and the callbacks. """ - def __init__(self, app: Flask, releases: list, html_layout_file: str, - graph_layout_file: str, data_spec_file: str, tooltip_file: str) -> None: + def __init__( + self, + app: Flask, + data_iterative: pd.DataFrame, + html_layout_file: str, + graph_layout_file: str, + tooltip_file: str + ) -> None: """Initialization: - save the input parameters, - read and pre-process the data, @@ -86,45 +91,24 @@ class Layout: - read tooltips from the tooltip file. :param app: Flask application running the dash application. - :param releases: Lis of releases to be displayed. :param html_layout_file: Path and name of the file specifying the HTML layout of the dash application. :param graph_layout_file: Path and name of the file with layout of plot.ly graphs. - :param data_spec_file: Path and name of the file specifying the data to - be read from parquets for this application. :param tooltip_file: Path and name of the yaml file specifying the tooltips. :type app: Flask - :type releases: list :type html_layout_file: str :type graph_layout_file: str - :type data_spec_file: str :type tooltip_file: str """ # Inputs self._app = app - self.releases = releases self._html_layout_file = html_layout_file self._graph_layout_file = graph_layout_file - self._data_spec_file = data_spec_file self._tooltip_file = tooltip_file - - # Read the data: - self._data = pd.DataFrame() - for rls in releases: - data_mrr = Data(self._data_spec_file, True).\ - read_iterative_mrr(release=rls) - data_mrr["release"] = rls - data_ndrpdr = Data(self._data_spec_file, True).\ - read_iterative_ndrpdr(release=rls) - data_ndrpdr["release"] = rls - self._data = pd.concat( - [self._data, data_mrr, data_ndrpdr], - ignore_index=True, - copy=False - ) + self._data = data_iterative # Get structure of tests: tbs = dict() diff --git a/csit.infra.dash/app/cdash/report/report.py b/csit.infra.dash/app/cdash/report/report.py index 9e45483086..661bb2ce7f 100644 --- a/csit.infra.dash/app/cdash/report/report.py +++ b/csit.infra.dash/app/cdash/report/report.py @@ -14,12 +14,16 @@ """Instantiate the Report Dash application. """ import dash +import pandas as pd from ..utils.constants import Constants as C from .layout import Layout -def init_report(server, releases): +def init_report( + server, + data_iterative: pd.DataFrame + ) -> dash.Dash: """Create a Plotly Dash dashboard. :param server: Flask server. @@ -37,11 +41,10 @@ def init_report(server, releases): layout = Layout( app=dash_app, - releases=releases, + data_iterative=data_iterative, html_layout_file=C.HTML_LAYOUT_FILE, graph_layout_file=C.REPORT_GRAPH_LAYOUT_FILE, - data_spec_file=C.DATA_SPEC_FILE, - tooltip_file=C.TOOLTIP_FILE, + tooltip_file=C.TOOLTIP_FILE ) dash_app.index_string = layout.html_layout dash_app.layout = layout.add_content() diff --git a/csit.infra.dash/app/cdash/stats/layout.py b/csit.infra.dash/app/cdash/stats/layout.py index 116185d62c..ecd81bacbe 100644 --- a/csit.infra.dash/app/cdash/stats/layout.py +++ b/csit.infra.dash/app/cdash/stats/layout.py @@ -25,14 +25,12 @@ from dash import callback_context, no_update from dash import Input, Output, State from dash.exceptions import PreventUpdate from yaml import load, FullLoader, YAMLError -from datetime import datetime from ..utils.constants import Constants as C from ..utils.control_panel import ControlPanel from ..utils.utils import show_tooltip, gen_new_url, get_ttypes, get_cadences, \ get_test_beds, get_job, generate_options, set_job_params from ..utils.url_processing import url_decode -from ..data.data import Data from .graphs import graph_statistics, select_data @@ -40,9 +38,15 @@ class Layout: """The layout of the dash app and the callbacks. """ - def __init__(self, app: Flask, html_layout_file: str, - graph_layout_file: str, data_spec_file: str, tooltip_file: str, - time_period: int=None) -> None: + def __init__( + self, + app: Flask, + data_stats: pd.DataFrame, + data_trending: pd.DataFrame, + html_layout_file: str, + graph_layout_file: str, + tooltip_file: str + ) -> None: """Initialization: - save the input parameters, - read and pre-process the data, @@ -51,43 +55,27 @@ class Layout: - read tooltips from the tooltip file. :param app: Flask application running the dash application. + :param data_stats: Pandas dataframe with staistical data. + :param data_trending: Pandas dataframe with trending data. :param html_layout_file: Path and name of the file specifying the HTML layout of the dash application. :param graph_layout_file: Path and name of the file with layout of plot.ly graphs. - :param data_spec_file: Path and name of the file specifying the data to - be read from parquets for this application. :param tooltip_file: Path and name of the yaml file specifying the tooltips. - :param time_period: It defines the time period for data read from the - parquets in days from now back to the past. :type app: Flask + :type data_stats: pandas.DataFrame + :type data_trending: pandas.DataFrame :type html_layout_file: str :type graph_layout_file: str - :type data_spec_file: str :type tooltip_file: str - :type time_period: int """ # Inputs self._app = app self._html_layout_file = html_layout_file self._graph_layout_file = graph_layout_file - self._data_spec_file = data_spec_file self._tooltip_file = tooltip_file - self._time_period = time_period - - # Read the data: - data_stats, data_mrr, data_ndrpdr = Data( - data_spec_file=self._data_spec_file, - debug=True - ).read_stats(days=self._time_period) - - df_tst_info = pd.concat( - [data_mrr, data_ndrpdr], - ignore_index=True, - copy=False - ) # Pre-process the data: data_stats = data_stats[~data_stats.job.str.contains("-verify-")] @@ -95,11 +83,6 @@ class Layout: data_stats = data_stats[~data_stats.job.str.contains("-iterative-")] data_stats = data_stats[["job", "build", "start_time", "duration"]] - data_time_period = \ - (datetime.utcnow() - data_stats["start_time"].min()).days - if self._time_period > data_time_period: - self._time_period = data_time_period - jobs = sorted(list(data_stats["job"].unique())) d_job_info = { "job": list(), @@ -130,7 +113,7 @@ class Layout: "lst_failed": list() } for job in jobs: - df_job = df_tst_info.loc[(df_tst_info["job"] == job)] + df_job = data_trending.loc[(data_trending["job"] == job)] builds = df_job["build"].unique() for build in builds: df_build = df_job.loc[(df_job["build"] == build)] diff --git a/csit.infra.dash/app/cdash/stats/stats.py b/csit.infra.dash/app/cdash/stats/stats.py index 062e6b0bba..fdeef8b2f7 100644 --- a/csit.infra.dash/app/cdash/stats/stats.py +++ b/csit.infra.dash/app/cdash/stats/stats.py @@ -14,16 +14,25 @@ """Instantiate the Statistics Dash application. """ import dash +import pandas as pd from ..utils.constants import Constants as C from .layout import Layout -def init_stats(server, time_period=None): +def init_stats( + server, + data_stats: pd.DataFrame, + data_trending: pd.DataFrame + ) -> dash.Dash: """Create a Plotly Dash dashboard. :param server: Flask server. + :param data_stats: Pandas dataframe with staistical data. + :param data_trending: Pandas dataframe with trending data. :type server: Flask + :type data_stats: pandas.DataFrame + :type data_trending: pandas.DataFrame :returns: Dash app server. :rtype: Dash """ @@ -37,11 +46,11 @@ def init_stats(server, time_period=None): layout = Layout( app=dash_app, + data_stats=data_stats, + data_trending=data_trending, html_layout_file=C.HTML_LAYOUT_FILE, graph_layout_file=C.STATS_GRAPH_LAYOUT_FILE, - data_spec_file=C.DATA_SPEC_FILE, - tooltip_file=C.TOOLTIP_FILE, - time_period=time_period + tooltip_file=C.TOOLTIP_FILE ) dash_app.index_string = layout.html_layout dash_app.layout = layout.add_content() diff --git a/csit.infra.dash/app/cdash/trending/layout.py b/csit.infra.dash/app/cdash/trending/layout.py index 1866183da0..2d65567489 100644 --- a/csit.infra.dash/app/cdash/trending/layout.py +++ b/csit.infra.dash/app/cdash/trending/layout.py @@ -36,7 +36,6 @@ from ..utils.telemetry_data import TelemetryData from ..utils.utils import show_tooltip, label, sync_checklists, gen_new_url, \ generate_options, get_list_group_items from ..utils.url_processing import url_decode -from ..data.data import Data from .graphs import graph_trending, graph_hdrh_latency, select_trending_data, \ graph_tm_trending @@ -74,9 +73,13 @@ class Layout: """The layout of the dash app and the callbacks. """ - def __init__(self, app: Flask, html_layout_file: str, - graph_layout_file: str, data_spec_file: str, tooltip_file: str, - time_period: str=None) -> None: + def __init__(self, + app: Flask, + data_trending: pd.DataFrame, + html_layout_file: str, + graph_layout_file: str, + tooltip_file: str + ) -> None: """Initialization: - save the input parameters, - read and pre-process the data, @@ -85,48 +88,26 @@ class Layout: - read tooltips from the tooltip file. :param app: Flask application running the dash application. + :param data_trending: Pandas dataframe with trending data. :param html_layout_file: Path and name of the file specifying the HTML layout of the dash application. :param graph_layout_file: Path and name of the file with layout of plot.ly graphs. - :param data_spec_file: Path and name of the file specifying the data to - be read from parquets for this application. :param tooltip_file: Path and name of the yaml file specifying the tooltips. - :param time_period: It defines the time period for data read from the - parquets in days from now back to the past. :type app: Flask + :type data_trending: pandas.DataFrame :type html_layout_file: str :type graph_layout_file: str - :type data_spec_file: str :type tooltip_file: str - :type time_period: int """ # Inputs self._app = app + self._data = data_trending self._html_layout_file = html_layout_file self._graph_layout_file = graph_layout_file - self._data_spec_file = data_spec_file self._tooltip_file = tooltip_file - self._time_period = time_period - - # Read the data: - data_mrr = Data( - data_spec_file=self._data_spec_file, - debug=True - ).read_trending_mrr(days=self._time_period) - - data_ndrpdr = Data( - data_spec_file=self._data_spec_file, - debug=True - ).read_trending_ndrpdr(days=self._time_period) - - self._data = pd.concat( - [data_mrr, data_ndrpdr], - ignore_index=True, - copy=False - ) # Get structure of tests: tbs = dict() diff --git a/csit.infra.dash/app/cdash/trending/trending.py b/csit.infra.dash/app/cdash/trending/trending.py index a098a8d74f..a9dfbc1987 100644 --- a/csit.infra.dash/app/cdash/trending/trending.py +++ b/csit.infra.dash/app/cdash/trending/trending.py @@ -14,12 +14,16 @@ """Instantiate the Trending Dash application. """ import dash +import pandas as pd from ..utils.constants import Constants as C from .layout import Layout -def init_trending(server, time_period=None): +def init_trending( + server, + data_trending: pd.DataFrame + ) -> dash.Dash: """Create a Plotly Dash dashboard. :param server: Flask server. @@ -37,11 +41,10 @@ def init_trending(server, time_period=None): layout = Layout( app=dash_app, + data_trending=data_trending, html_layout_file=C.HTML_LAYOUT_FILE, graph_layout_file=C.TREND_GRAPH_LAYOUT_FILE, - data_spec_file=C.DATA_SPEC_FILE, - tooltip_file=C.TOOLTIP_FILE, - time_period=time_period + tooltip_file=C.TOOLTIP_FILE ) dash_app.index_string = layout.html_layout dash_app.layout = layout.add_content() diff --git a/csit.infra.dash/app/cdash/utils/constants.py b/csit.infra.dash/app/cdash/utils/constants.py index 95acc07c47..cca68a56b8 100644 --- a/csit.infra.dash/app/cdash/utils/constants.py +++ b/csit.infra.dash/app/cdash/utils/constants.py @@ -63,7 +63,7 @@ class Constants: # Maximal value of TIME_PERIOD for data read from the parquets in days. # Do not change without a good reason. - MAX_TIME_PERIOD = 150 # 180 + MAX_TIME_PERIOD = 130 # It defines the time period for data read from the parquets in days from # now back to the past. @@ -71,11 +71,6 @@ class Constants: # TIME_PERIOD = MAX_TIME_PERIOD - is the default value TIME_PERIOD = MAX_TIME_PERIOD # [days] - # List of releases used for iterative data processing. - # The releases MUST be in the order from the current (newest) to the last - # (oldest). - RELEASES = ["rls2302", "rls2210", "rls2206", ] - ############################################################################ # General, application wide, layout affecting constants. -- cgit 1.2.3-korg