diff options
Diffstat (limited to 'csit.infra.dash/app/cdash/data/data.py')
-rw-r--r-- | csit.infra.dash/app/cdash/data/data.py | 23 |
1 files changed, 22 insertions, 1 deletions
diff --git a/csit.infra.dash/app/cdash/data/data.py b/csit.infra.dash/app/cdash/data/data.py index 2bf3649778..783ebe25ff 100644 --- a/csit.infra.dash/app/cdash/data/data.py +++ b/csit.infra.dash/app/cdash/data/data.py @@ -30,6 +30,12 @@ from pyarrow.lib import ArrowInvalid, ArrowNotImplementedError from ..utils.constants import Constants as C +# If True, pyarrow.Schema is generated. See also condition in the method +# _write_parquet_schema. +# To generate schema, select only one data set in data.yaml file. +GENERATE_SCHEMA = False + + class Data: """Gets the data from parquets and stores it for further use by dash applications. @@ -212,7 +218,10 @@ class Data: for itm in df: try: # Specify the condition or remove it: - if pd.api.types.is_string_dtype(itm["result_rate_unit"]): + if all(( + pd.api.types.is_string_dtype(itm["<column_name>"]), + pd.api.types.is_string_dtype(itm["telemetry"][0]) + )): print(pa.Schema.from_pandas(itm)) pa.parquet.write_metadata( pa.Schema.from_pandas(itm), @@ -357,6 +366,18 @@ class Data: time_period = days else: time_period = None + + if GENERATE_SCHEMA: + # Generate schema: + Data._write_parquet_schema( + path=data_set["path"], + partition_filter=partition_filter, + columns=data_set.get("columns", None), + days=time_period + ) + return + + # Read data: data = Data._create_dataframe_from_parquet( path=data_set["path"], partition_filter=partition_filter, |