aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTibor Frank <tifrank@cisco.com>2023-08-02 07:49:42 +0000
committerTibor Frank <tifrank@cisco.com>2023-08-09 14:00:47 +0000
commit8b54db58fca5841433e84fd222cbb2b4f5323a30 (patch)
tree3edae56dfbc8b48c44229b78b8060b4ff83c1036
parentffca8b8655c772fc6273702cae2151e7ac7a846d (diff)
C-Dash: Update requirements file
+ add pyarrow exception processing + set 'dtype_backend="pyarrow"' for "wr.s3.read_parquet" method + add pyarrow schemas + improve console output Change-Id: Iae7f1dda3de4804af7089b80b9001f05d3364b39 Signed-off-by: Tibor Frank <tifrank@cisco.com>
-rw-r--r--csit.infra.dash/app/cdash/__init__.py96
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/coverage_rls2302_devicebin0 -> 5373 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/coverage_rls2302_ndrpdrbin0 -> 11868 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/coverage_rls2306_devicebin0 -> 5373 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/coverage_rls2306_ndrpdrbin0 -> 11868 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/iterative_rls2210_mrrbin0 -> 7919 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/iterative_rls2210_ndrpdrbin0 -> 13081 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/iterative_rls2302_hoststackbin0 -> 7882 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/iterative_rls2302_mrrbin0 -> 7919 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/iterative_rls2302_ndrpdrbin0 -> 13081 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/iterative_rls2306_hoststackbin0 -> 7882 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/iterative_rls2306_mrrbin0 -> 7919 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/iterative_rls2306_ndrpdrbin0 -> 13081 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/statisticsbin0 -> 4398 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/trending_hoststackbin0 -> 9628 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/trending_mrrbin0 -> 8545 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/_metadata/trending_ndrpdrbin0 -> 13999 bytes
-rw-r--r--csit.infra.dash/app/cdash/data/data.py214
-rw-r--r--csit.infra.dash/app/cdash/data/data.yaml102
-rw-r--r--csit.infra.dash/app/cdash/utils/constants.py3
-rw-r--r--csit.infra.dash/app/requirements.txt88
21 files changed, 311 insertions, 192 deletions
diff --git a/csit.infra.dash/app/cdash/__init__.py b/csit.infra.dash/app/cdash/__init__.py
index dd1dc20169..796dcef01f 100644
--- a/csit.infra.dash/app/cdash/__init__.py
+++ b/csit.infra.dash/app/cdash/__init__.py
@@ -67,43 +67,65 @@ def init_app():
).read_all_data(days=time_period)
# Import Dash applications.
- from .news.news import init_news
- app = init_news(
- app,
- data_stats=data["statistics"],
- data_trending=data["trending"]
- )
-
- from .stats.stats import init_stats
- app = init_stats(
- app,
- data_stats=data["statistics"],
- data_trending=data["trending"]
- )
-
- from .trending.trending import init_trending
- app = init_trending(
- app,
- data_trending=data["trending"]
- )
-
- from .report.report import init_report
- app = init_report(
- app,
- data_iterative=data["iterative"]
- )
-
- from .comparisons.comparisons import init_comparisons
- app = init_comparisons(
- app,
- data_iterative=data["iterative"]
- )
-
- from .coverage.coverage import init_coverage
- app = init_coverage(
- app,
- data_coverage=data["coverage"]
- )
+ logging.info("\n\nStarting the applications:\n" + "-" * 26 + "\n")
+ if data["statistics"].empty or data["trending"].empty:
+ logging.error(
+ f'"{C.NEWS_TITLE}" application not loaded, no data available.'
+ )
+ logging.error(
+ f'"{C.STATS_TITLE}" application not loaded, no data available.'
+ )
+ else:
+ logging.info(C.NEWS_TITLE)
+ from .news.news import init_news
+ app = init_news(
+ app,
+ data_stats=data["statistics"],
+ data_trending=data["trending"]
+ )
+
+ logging.info(C.STATS_TITLE)
+ from .stats.stats import init_stats
+ app = init_stats(
+ app,
+ data_stats=data["statistics"],
+ data_trending=data["trending"]
+ )
+
+ if data["trending"].empty:
+ logging.error(
+ f'"{C.TREND_TITLE}" application not loaded, no data available.'
+ )
+ else:
+ logging.info(C.TREND_TITLE)
+ from .trending.trending import init_trending
+ app = init_trending(app, data_trending=data["trending"])
+
+ if data["iterative"].empty:
+ logging.error(
+ f'"{C.REPORT_TITLE}" application not loaded, no data available.'
+ )
+ logging.error(
+ f'"{C.COMP_TITLE}" application not loaded, no data available.'
+ )
+ else:
+ logging.info(C.REPORT_TITLE)
+ from .report.report import init_report
+ app = init_report(app, data_iterative=data["iterative"])
+
+ logging.info(C.COMP_TITLE)
+ from .comparisons.comparisons import init_comparisons
+ app = init_comparisons(app, data_iterative=data["iterative"])
+
+ if data["coverage"].empty:
+ logging.error((
+ f'"{C.COVERAGE_TITLE}" application not loaded, '
+ 'no data available.'
+ ))
+ else:
+ logging.info(C.COVERAGE_TITLE)
+ from .coverage.coverage import init_coverage
+ app = init_coverage(app, data_coverage=data["coverage"])
return app
diff --git a/csit.infra.dash/app/cdash/data/_metadata/coverage_rls2302_device b/csit.infra.dash/app/cdash/data/_metadata/coverage_rls2302_device
new file mode 100644
index 0000000000..f619ce8a8e
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/coverage_rls2302_device
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/coverage_rls2302_ndrpdr b/csit.infra.dash/app/cdash/data/_metadata/coverage_rls2302_ndrpdr
new file mode 100644
index 0000000000..06bc618bea
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/coverage_rls2302_ndrpdr
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/coverage_rls2306_device b/csit.infra.dash/app/cdash/data/_metadata/coverage_rls2306_device
new file mode 100644
index 0000000000..f619ce8a8e
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/coverage_rls2306_device
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/coverage_rls2306_ndrpdr b/csit.infra.dash/app/cdash/data/_metadata/coverage_rls2306_ndrpdr
new file mode 100644
index 0000000000..06bc618bea
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/coverage_rls2306_ndrpdr
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2210_mrr b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2210_mrr
new file mode 100644
index 0000000000..96832850b1
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2210_mrr
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2210_ndrpdr b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2210_ndrpdr
new file mode 100644
index 0000000000..2291bb8349
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2210_ndrpdr
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2302_hoststack b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2302_hoststack
new file mode 100644
index 0000000000..993d16c18c
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2302_hoststack
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2302_mrr b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2302_mrr
new file mode 100644
index 0000000000..96832850b1
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2302_mrr
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2302_ndrpdr b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2302_ndrpdr
new file mode 100644
index 0000000000..2291bb8349
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2302_ndrpdr
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2306_hoststack b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2306_hoststack
new file mode 100644
index 0000000000..993d16c18c
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2306_hoststack
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2306_mrr b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2306_mrr
new file mode 100644
index 0000000000..96832850b1
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2306_mrr
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2306_ndrpdr b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2306_ndrpdr
new file mode 100644
index 0000000000..2291bb8349
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/iterative_rls2306_ndrpdr
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/statistics b/csit.infra.dash/app/cdash/data/_metadata/statistics
new file mode 100644
index 0000000000..208e119735
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/statistics
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/trending_hoststack b/csit.infra.dash/app/cdash/data/_metadata/trending_hoststack
new file mode 100644
index 0000000000..f6ab72be9a
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/trending_hoststack
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/trending_mrr b/csit.infra.dash/app/cdash/data/_metadata/trending_mrr
new file mode 100644
index 0000000000..64b0db0d0c
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/trending_mrr
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/_metadata/trending_ndrpdr b/csit.infra.dash/app/cdash/data/_metadata/trending_ndrpdr
new file mode 100644
index 0000000000..17dc30c3be
--- /dev/null
+++ b/csit.infra.dash/app/cdash/data/_metadata/trending_ndrpdr
Binary files differ
diff --git a/csit.infra.dash/app/cdash/data/data.py b/csit.infra.dash/app/cdash/data/data.py
index a0d698e2b0..2bf3649778 100644
--- a/csit.infra.dash/app/cdash/data/data.py
+++ b/csit.infra.dash/app/cdash/data/data.py
@@ -18,12 +18,16 @@ import logging
import resource
import awswrangler as wr
import pandas as pd
+import pyarrow as pa
from yaml import load, FullLoader, YAMLError
from datetime import datetime, timedelta
from time import time
from pytz import UTC
from awswrangler.exceptions import EmptyDataFrame, NoFilesFound
+from pyarrow.lib import ArrowInvalid, ArrowNotImplementedError
+
+from ..utils.constants import Constants as C
class Data:
@@ -118,14 +122,117 @@ class Data:
return file_list
+ def _validate_columns(self, data_type: str) -> str:
+ """Check if all columns are present in the dataframe.
+
+ :param data_type: The data type defined in data.yaml
+ :type data_type: str
+ :returns: Error message if validation fails, otherwise empty string.
+ :rtype: str
+ """
+ defined_columns = set()
+ for data_set in self._data_spec:
+ if data_set.get("data_type", str()) == data_type:
+ defined_columns.update(data_set.get("columns", set()))
+
+ if not defined_columns:
+ return "No columns defined in the data set(s)."
+
+ if self.data[data_type].empty:
+ return "No data."
+
+ ret_msg = str()
+ for col in defined_columns:
+ if col not in self.data[data_type].columns:
+ if not ret_msg:
+ ret_msg = "Missing columns: "
+ else:
+ ret_msg += ", "
+ ret_msg += f"{col}"
+ return ret_msg
+
@staticmethod
- def _create_dataframe_from_parquet(
- path, partition_filter=None,
+ def _write_parquet_schema(
+ path,
+ partition_filter=None,
columns=None,
validate_schema=False,
last_modified_begin=None,
last_modified_end=None,
days=None
+ ) -> None:
+ """Auxiliary function to write parquet schemas. Use it instead of
+ "_create_dataframe_from_parquet" in "read_all_data".
+
+ :param path: S3 prefix (accepts Unix shell-style wildcards)
+ (e.g. s3://bucket/prefix) or list of S3 objects paths
+ (e.g. [s3://bucket/key0, s3://bucket/key1]).
+ :param partition_filter: Callback Function filters to apply on PARTITION
+ columns (PUSH-DOWN filter). This function MUST receive a single
+ argument (Dict[str, str]) where keys are partitions names and values
+ are partitions values. Partitions values will be always strings
+ extracted from S3. This function MUST return a bool, True to read
+ the partition or False to ignore it. Ignored if dataset=False.
+ :param columns: Names of columns to read from the file(s).
+ :param validate_schema: Check that individual file schemas are all the
+ same / compatible. Schemas within a folder prefix should all be the
+ same. Disable if you have schemas that are different and want to
+ disable this check.
+ :param last_modified_begin: Filter the s3 files by the Last modified
+ date of the object. The filter is applied only after list all s3
+ files.
+ :param last_modified_end: Filter the s3 files by the Last modified date
+ of the object. The filter is applied only after list all s3 files.
+ :param days: Number of days to filter.
+ :type path: Union[str, List[str]]
+ :type partition_filter: Callable[[Dict[str, str]], bool], optional
+ :type columns: List[str], optional
+ :type validate_schema: bool, optional
+ :type last_modified_begin: datetime, optional
+ :type last_modified_end: datetime, optional
+ :type days: integer, optional
+ """
+ if days:
+ last_modified_begin = datetime.now(tz=UTC) - timedelta(days=days)
+
+ df = wr.s3.read_parquet(
+ path=path,
+ path_suffix="parquet",
+ ignore_empty=True,
+ validate_schema=validate_schema,
+ use_threads=True,
+ dataset=True,
+ columns=columns,
+ partition_filter=partition_filter,
+ last_modified_begin=last_modified_begin,
+ last_modified_end=last_modified_end,
+ chunked=1
+ )
+
+ for itm in df:
+ try:
+ # Specify the condition or remove it:
+ if pd.api.types.is_string_dtype(itm["result_rate_unit"]):
+ print(pa.Schema.from_pandas(itm))
+ pa.parquet.write_metadata(
+ pa.Schema.from_pandas(itm),
+ f"{C.PATH_TO_SCHEMAS}_tmp_schema"
+ )
+ print(itm)
+ break
+ except KeyError:
+ pass
+
+ @staticmethod
+ def _create_dataframe_from_parquet(
+ path,
+ partition_filter=None,
+ columns=None,
+ validate_schema=False,
+ last_modified_begin=None,
+ last_modified_end=None,
+ days=None,
+ schema=None
) -> pd.DataFrame:
"""Read parquet stored in S3 compatible storage and returns Pandas
Dataframe.
@@ -150,6 +257,7 @@ class Data:
:param last_modified_end: Filter the s3 files by the Last modified date
of the object. The filter is applied only after list all s3 files.
:param days: Number of days to filter.
+ :param schema: Path to schema to use when reading data from the parquet.
:type path: Union[str, List[str]]
:type partition_filter: Callable[[Dict[str, str]], bool], optional
:type columns: List[str], optional
@@ -157,6 +265,7 @@ class Data:
:type last_modified_begin: datetime, optional
:type last_modified_end: datetime, optional
:type days: integer, optional
+ :type schema: string
:returns: Pandas DataFrame or None if DataFrame cannot be fetched.
:rtype: DataFrame
"""
@@ -169,31 +278,38 @@ class Data:
path=path,
path_suffix="parquet",
ignore_empty=True,
+ schema=schema,
validate_schema=validate_schema,
use_threads=True,
dataset=True,
columns=columns,
partition_filter=partition_filter,
last_modified_begin=last_modified_begin,
- last_modified_end=last_modified_end
+ last_modified_end=last_modified_end,
+ dtype_backend="pyarrow"
)
+
df.info(verbose=True, memory_usage="deep")
logging.debug(
f"\nCreation of dataframe {path} took: {time() - start}\n"
)
+ except (ArrowInvalid, ArrowNotImplementedError) as err:
+ logging.error(f"Reading of data from parquets FAILED.\n{repr(err)}")
except NoFilesFound as err:
logging.error(
+ f"Reading of data from parquets FAILED.\n"
f"No parquets found in specified time period.\n"
f"Nr of days: {days}\n"
f"last_modified_begin: {last_modified_begin}\n"
- f"{err}"
+ f"{repr(err)}"
)
except EmptyDataFrame as err:
logging.error(
+ f"Reading of data from parquets FAILED.\n"
f"No data in parquets in specified time period.\n"
f"Nr of days: {days}\n"
f"last_modified_begin: {last_modified_begin}\n"
- f"{err}"
+ f"{repr(err)}"
)
return df
@@ -209,15 +325,31 @@ class Data:
:rtype: dict(str: pandas.DataFrame)
"""
- lst_trending = list()
- lst_iterative = list()
- lst_coverage = list()
+ data_lists = {
+ "statistics": list(),
+ "trending": list(),
+ "iterative": list(),
+ "coverage": list()
+ }
+ logging.info("\n\nReading data:\n" + "-" * 13 + "\n")
for data_set in self._data_spec:
logging.info(
- f"Reading data for {data_set['data_type']} "
- f"{data_set['partition_name']} {data_set.get('release', '')}"
+ f"\n\nReading data for {data_set['data_type']} "
+ f"{data_set['partition_name']} {data_set.get('release', '')}\n"
)
+ schema_file = data_set.get("schema", None)
+ if schema_file:
+ try:
+ schema = pa.parquet.read_schema(
+ f"{C.PATH_TO_SCHEMAS}{schema_file}"
+ )
+ except FileNotFoundError as err:
+ logging.error(repr(err))
+ logging.error("Proceeding without schema.")
+ schema = None
+ else:
+ schema = None
partition_filter = lambda part: True \
if part[data_set["partition"]] == data_set["partition_name"] \
else False
@@ -229,51 +361,37 @@ class Data:
path=data_set["path"],
partition_filter=partition_filter,
columns=data_set.get("columns", None),
- days=time_period
+ days=time_period,
+ schema=schema
)
-
- if data_set["data_type"] == "statistics":
- self._data["statistics"] = data
- elif data_set["data_type"] == "trending":
- lst_trending.append(data)
- elif data_set["data_type"] == "iterative":
- data["release"] = data_set["release"]
- data["release"] = data["release"].astype("category")
- lst_iterative.append(data)
- elif data_set["data_type"] == "coverage":
+ if data_set["data_type"] in ("iterative", "coverage"):
data["release"] = data_set["release"]
data["release"] = data["release"].astype("category")
- lst_coverage.append(data)
- else:
- raise NotImplementedError(
- f"The data type {data_set['data_type']} is not implemented."
- )
- self._data["iterative"] = pd.concat(
- lst_iterative,
- ignore_index=True,
- copy=False
- )
- self._data["trending"] = pd.concat(
- lst_trending,
- ignore_index=True,
- copy=False
- )
- self._data["coverage"] = pd.concat(
- lst_coverage,
- ignore_index=True,
- copy=False
- )
+ data_lists[data_set["data_type"]].append(data)
+ logging.info(
+ "\n\nData post-processing, validation and summary:\n" +
+ "-" * 45 + "\n"
+ )
for key in self._data.keys():
- logging.info(
- f"\nData frame {key}:"
- f"\n{self._data[key].memory_usage(deep=True)}\n"
- )
+ logging.info(f"\n\nDataframe {key}:\n")
+ self._data[key] = pd.concat(
+ data_lists[key],
+ ignore_index=True,
+ copy=False
+ )
self._data[key].info(verbose=True, memory_usage="deep")
+ err_msg = self._validate_columns(key)
+ if err_msg:
+ self._data[key] = pd.DataFrame()
+ logging.error(
+ f"Data validation FAILED.\n"
+ f"{err_msg}\n"
+ "Generated dataframe replaced by an empty dataframe."
+ )
- mem_alloc = \
- resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000
- logging.info(f"Memory allocation: {mem_alloc:.0f}MB")
+ mem_alloc = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1000
+ logging.info(f"\n\nMemory allocation: {mem_alloc:.0f}MB\n")
return self._data
diff --git a/csit.infra.dash/app/cdash/data/data.yaml b/csit.infra.dash/app/cdash/data/data.yaml
index 3100f1813d..03b61ac6b2 100644
--- a/csit.infra.dash/app/cdash/data/data.yaml
+++ b/csit.infra.dash/app/cdash/data/data.yaml
@@ -2,6 +2,7 @@
partition: stats_type
partition_name: sra
path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/stats
+ schema: statistics
columns:
- job
- build
@@ -11,6 +12,7 @@
partition: test_type
partition_name: mrr
path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/trending
+ schema: trending_mrr
columns:
- job
- build
@@ -29,6 +31,7 @@
partition: test_type
partition_name: ndrpdr
path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/trending
+ schema: trending_ndrpdr
columns:
- job
- build
@@ -58,6 +61,7 @@
partition: test_type
partition_name: hoststack
path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/trending
+ schema: trending_hoststack
columns:
- job
- build
@@ -81,50 +85,12 @@
partition_name: mrr
release: rls2210
path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2210
+ schema: iterative_rls2210_mrr
columns:
- job
- build
- dut_type
- dut_version
- - hosts
- - start_time
- - passed
- - test_id
- - version
- - result_receive_rate_rate_avg
- - result_receive_rate_rate_stdev
- - result_receive_rate_rate_unit
- - result_receive_rate_rate_values
-- data_type: iterative
- partition: test_type
- partition_name: mrr
- release: rls2302
- path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2302
- columns:
- - job
- - build
- - dut_type
- - dut_version
- - hosts
- - start_time
- - passed
- - test_id
- - version
- - result_receive_rate_rate_avg
- - result_receive_rate_rate_stdev
- - result_receive_rate_rate_unit
- - result_receive_rate_rate_values
-- data_type: iterative
- partition: test_type
- partition_name: mrr
- release: rls2306
- path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2306
- columns:
- - job
- - build
- - dut_type
- - dut_version
- - hosts
- start_time
- passed
- test_id
@@ -138,12 +104,12 @@
partition_name: ndrpdr
release: rls2210
path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2210
+ schema: iterative_rls2210_ndrpdr
columns:
- job
- build
- dut_type
- dut_version
- - hosts
- start_time
- passed
- test_id
@@ -164,15 +130,34 @@
- result_latency_forward_pdr_0_hdrh
- data_type: iterative
partition: test_type
+ partition_name: mrr
+ release: rls2302
+ path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2302
+ schema: iterative_rls2302_mrr
+ columns:
+ - job
+ - build
+ - dut_type
+ - dut_version
+ - start_time
+ - passed
+ - test_id
+ - version
+ - result_receive_rate_rate_avg
+ - result_receive_rate_rate_stdev
+ - result_receive_rate_rate_unit
+ - result_receive_rate_rate_values
+- data_type: iterative
+ partition: test_type
partition_name: ndrpdr
release: rls2302
path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2302
+ schema: iterative_rls2302_ndrpdr
columns:
- job
- build
- dut_type
- dut_version
- - hosts
- start_time
- passed
- test_id
@@ -196,19 +181,17 @@
partition_name: hoststack
release: rls2302
path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2302
+ schema: iterative_rls2302_hoststack
columns:
- job
- build
- dut_type
- dut_version
- - hosts
- tg_type
- result_bandwidth_unit
- result_bandwidth_value
- result_rate_unit
- result_rate_value
- # - result_latency_unit
- # - result_latency_value
- start_time
- passed
- test_id
@@ -218,13 +201,13 @@
partition_name: ndrpdr
release: rls2302
path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/coverage_rls2302
+ schema: coverage_rls2302_ndrpdr
columns:
- job
- build
- dut_type
- dut_version
- tg_type
- - hosts
- start_time
- passed
- test_id
@@ -245,6 +228,7 @@
partition_name: device
release: rls2302
path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/coverage_rls2302
+ schema: coverage_rls2302_device
columns:
- job
- build
@@ -255,15 +239,34 @@
- version
- data_type: iterative
partition: test_type
+ partition_name: mrr
+ release: rls2306
+ path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2306
+ schema: iterative_rls2306_mrr
+ columns:
+ - job
+ - build
+ - dut_type
+ - dut_version
+ - start_time
+ - passed
+ - test_id
+ - version
+ - result_receive_rate_rate_avg
+ - result_receive_rate_rate_stdev
+ - result_receive_rate_rate_unit
+ - result_receive_rate_rate_values
+- data_type: iterative
+ partition: test_type
partition_name: ndrpdr
release: rls2306
path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2306
+ schema: iterative_rls2306_ndrpdr
columns:
- job
- build
- dut_type
- dut_version
- - hosts
- start_time
- passed
- test_id
@@ -287,19 +290,17 @@
partition_name: hoststack
release: rls2306
path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/iterative_rls2306
+ schema: iterative_rls2306_hoststack
columns:
- job
- build
- dut_type
- dut_version
- - hosts
- tg_type
- result_bandwidth_unit
- result_bandwidth_value
- result_rate_unit
- result_rate_value
- # - result_latency_unit
- # - result_latency_value
- start_time
- passed
- test_id
@@ -309,13 +310,13 @@
partition_name: ndrpdr
release: rls2306
path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/coverage_rls2306
+ schema: coverage_rls2306_ndrpdr
columns:
- job
- build
- dut_type
- dut_version
- tg_type
- - hosts
- start_time
- passed
- test_id
@@ -336,6 +337,7 @@
partition_name: device
release: rls2306
path: s3://fdio-docs-s3-cloudfront-index/csit/parquet/coverage_rls2306
+ schema: coverage_rls2306_device
columns:
- job
- build
diff --git a/csit.infra.dash/app/cdash/utils/constants.py b/csit.infra.dash/app/cdash/utils/constants.py
index 926e490f6a..376fefaf3c 100644
--- a/csit.infra.dash/app/cdash/utils/constants.py
+++ b/csit.infra.dash/app/cdash/utils/constants.py
@@ -65,6 +65,9 @@ class Constants:
# Data to be downloaded from the parquets specification file.
DATA_SPEC_FILE = "cdash/data/data.yaml"
+ # Path to schemas to use when reading data from the parquet.
+ PATH_TO_SCHEMAS = "cdash/data/_metadata/"
+
# The file with tooltips.
TOOLTIP_FILE = "cdash/utils/tooltips.yaml"
diff --git a/csit.infra.dash/app/requirements.txt b/csit.infra.dash/app/requirements.txt
index 74f7c5a7e2..96665c82ad 100644
--- a/csit.infra.dash/app/requirements.txt
+++ b/csit.infra.dash/app/requirements.txt
@@ -1,83 +1,57 @@
# AWS integration
-awswrangler==2.19.0
-pip==23.0
+awswrangler==3.2.1
+pip==23.2.1
# Dash integration
-dash==2.8.1
-dash-bootstrap-components==1.3.1
+dash==2.11.1
+dash-bootstrap-components==1.4.2
dash-core-components==2.0.0
dash-html-components==2.0.0
dash-renderer==1.9.1
dash-table==5.0.0
-numpy==1.23.4
-pandas==1.5.1
-plotly==5.13.0
+numpy==1.25.2
+pandas==2.0.3
+plotly==5.15.0
numexpr==2.8.4
-Bottleneck==1.3.6
+Bottleneck==1.3.7
# Web Application Integration
-Flask==2.2.3
+Flask==2.2.5
Flask-Assets==2.0
Flask-Compress==1.13
Jinja2==3.1.2
libsass==0.22.0
-uWSGI==2.0.21
+uWSGI==2.0.22
webassets==2.0
# Web Application Dependencies
-hdrhistogram==0.10.1
+hdrhistogram==0.10.2
python-dateutil==2.8.2
-PyYAML==6.0
+PyYAML==6.0.1
# PIP freeze
-aenum==3.1.11
-aiohttp==3.8.1
-aiosignal==1.3.1
-asn1crypto==1.5.1
-async-timeout==4.0.2
-attrs==22.2.0
-backoff==2.2.1
-beautifulsoup4==4.11.2
-boto3==1.26.73
-botocore==1.29.73
+ansi2html==1.8.0
+boto3==1.28.17
+botocore==1.31.17
Brotli==1.0.9
-certifi==2022.12.7
-charset-normalizer==2.1.1
-click==8.1.3
-decorator==5.1.1
-et-xmlfile==1.1.0
-frozenlist==1.3.3
-gremlinpython==3.6.2
+certifi==2023.7.22
+charset-normalizer==3.2.0
+click==8.1.6
idna==3.4
-importlib-metadata==6.0.0
-isodate==0.6.1
itsdangerous==2.1.2
jmespath==1.0.1
-jsonpath-ng==1.5.3
-lxml==4.9.2
-MarkupSafe==2.1.2
-multidict==6.0.4
-nest-asyncio==1.5.6
-openpyxl==3.0.10
-opensearch-py==2.1.1
-packaging==23.0
+MarkupSafe==2.1.3
+nest-asyncio==1.5.7
+packaging==23.1
pbr==5.11.1
-pg8000==1.29.4
-ply==3.11
-progressbar2==4.2.0
-pyarrow==10.0.1
-PyMySQL==1.0.2
-python-utils==3.5.2
-pytz==2022.7.1
-redshift-connector==2.0.910
-requests==2.28.2
-requests-aws4auth==1.2.2
-s3transfer==0.6.0
-scramp==1.4.4
+pyarrow==12.0.1
+pytz==2023.3
+requests==2.31.0
+retrying==1.3.4
+s3transfer==0.6.1
six==1.16.0
-soupsieve==2.4
-tenacity==8.2.1
-urllib3==1.26.14
-Werkzeug==2.2.3
-yarl==1.8.2
-zipp==3.13.0 \ No newline at end of file
+tenacity==8.2.2
+typing_extensions==4.7.1
+tzdata==2023.3
+urllib3==1.26.16
+Werkzeug==2.2.3 \ No newline at end of file