aboutsummaryrefslogtreecommitdiffstats
path: root/csit.infra.etl
diff options
context:
space:
mode:
authorPeter Mikus <pmikus@cisco.com>2022-02-22 11:00:47 +0100
committerPeter Mikus <pmikus@cisco.com>2022-02-24 08:01:53 +0000
commit54fd337e30acc97434b33a6d0d3c19e4aa3051ab (patch)
tree5d131a1d7c0846a209e85380b66dfc7770cbe32e /csit.infra.etl
parentb2cb835b34c7404b2aaee3ec30700c67537da66d (diff)
feat(uti): etl
Signed-off-by: Peter Mikus <pmikus@cisco.com> Change-Id: I7cdcdcbf1e4986664d5d48357688185319f67b0c
Diffstat (limited to 'csit.infra.etl')
-rw-r--r--csit.infra.etl/.gitignore1
-rw-r--r--csit.infra.etl/coverage_device.json77
-rw-r--r--csit.infra.etl/coverage_mrr.json151
-rw-r--r--csit.infra.etl/coverage_ndrpdr.json679
-rw-r--r--csit.infra.etl/coverage_rls2202.py172
-rw-r--r--csit.infra.etl/coverage_soak.json221
-rw-r--r--csit.infra.etl/iterative_mrr.json151
-rw-r--r--csit.infra.etl/iterative_ndrpdr.json679
-rw-r--r--csit.infra.etl/iterative_rls2202.py172
-rw-r--r--csit.infra.etl/iterative_soak.json221
-rw-r--r--csit.infra.etl/local.py176
-rw-r--r--csit.infra.etl/stats.py133
-rw-r--r--csit.infra.etl/stats_sra.json41
-rw-r--r--csit.infra.etl/trending.py172
-rw-r--r--csit.infra.etl/trending_mrr.json169
-rw-r--r--csit.infra.etl/trending_ndrpdr.json697
-rw-r--r--csit.infra.etl/trending_soak.json239
17 files changed, 4151 insertions, 0 deletions
diff --git a/csit.infra.etl/.gitignore b/csit.infra.etl/.gitignore
new file mode 100644
index 0000000000..bccc1450f2
--- /dev/null
+++ b/csit.infra.etl/.gitignore
@@ -0,0 +1 @@
+*.parquet \ No newline at end of file
diff --git a/csit.infra.etl/coverage_device.json b/csit.infra.etl/coverage_device.json
new file mode 100644
index 0000000000..12989b30fa
--- /dev/null
+++ b/csit.infra.etl/coverage_device.json
@@ -0,0 +1,77 @@
+{
+ "fields": [
+ {
+ "metadata": {},
+ "name": "job",
+ "nullable": false,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "build",
+ "nullable": false,
+ "type": "integer"
+ },
+ {
+ "metadata": {},
+ "name": "duration",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "dut_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "dut_version",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "start_time",
+ "nullable": true,
+ "type": "timestamp"
+ },
+ {
+ "metadata": {},
+ "name": "passed",
+ "nullable": true,
+ "type": "boolean"
+ },
+ {
+ "metadata": {},
+ "name": "test_id",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_long",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_short",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "version",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+} \ No newline at end of file
diff --git a/csit.infra.etl/coverage_mrr.json b/csit.infra.etl/coverage_mrr.json
new file mode 100644
index 0000000000..13b80ec82e
--- /dev/null
+++ b/csit.infra.etl/coverage_mrr.json
@@ -0,0 +1,151 @@
+{
+ "fields": [
+ {
+ "metadata": {},
+ "name": "job",
+ "nullable": false,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "build",
+ "nullable": false,
+ "type": "integer"
+ },
+ {
+ "metadata": {},
+ "name": "duration",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "dut_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "dut_version",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "result",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "receive_rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "stdev",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "values",
+ "nullable": true,
+ "type": {
+ "containsNull": true,
+ "elementType": "double",
+ "type": "array"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "type",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "start_time",
+ "nullable": true,
+ "type": "timestamp"
+ },
+ {
+ "metadata": {},
+ "name": "passed",
+ "nullable": true,
+ "type": "boolean"
+ },
+ {
+ "metadata": {},
+ "name": "test_id",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_long",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_short",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "tags",
+ "nullable": true,
+ "type": {
+ "containsNull": true,
+ "elementType": "string",
+ "type": "array"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "version",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+} \ No newline at end of file
diff --git a/csit.infra.etl/coverage_ndrpdr.json b/csit.infra.etl/coverage_ndrpdr.json
new file mode 100644
index 0000000000..f188321bfb
--- /dev/null
+++ b/csit.infra.etl/coverage_ndrpdr.json
@@ -0,0 +1,679 @@
+{
+ "fields": [
+ {
+ "metadata": {},
+ "name": "job",
+ "nullable": false,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "build",
+ "nullable": false,
+ "type": "integer"
+ },
+ {
+ "metadata": {},
+ "name": "duration",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "dut_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "dut_version",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "result",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "latency_forward",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "pdr_0",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_10",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_50",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_90",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "latency_reverse",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "pdr_0",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_10",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_50",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_90",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "ndr",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "lower",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "upper",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "lower",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "upper",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "type",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "start_time",
+ "nullable": true,
+ "type": "timestamp"
+ },
+ {
+ "metadata": {},
+ "name": "passed",
+ "nullable": true,
+ "type": "boolean"
+ },
+ {
+ "metadata": {},
+ "name": "test_id",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_long",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_short",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "tags",
+ "nullable": true,
+ "type": {
+ "containsNull": true,
+ "elementType": "string",
+ "type": "array"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "version",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+} \ No newline at end of file
diff --git a/csit.infra.etl/coverage_rls2202.py b/csit.infra.etl/coverage_rls2202.py
new file mode 100644
index 0000000000..97b0a12b88
--- /dev/null
+++ b/csit.infra.etl/coverage_rls2202.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
+S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
+SUFFIX="info.json.gz"
+IGNORE_SUFFIX=[
+ "suite.info.json.gz",
+ "setup.info.json.gz",
+ "teardown.info.json.gz",
+ "suite.output.info.json.gz",
+ "setup.output.info.json.gz",
+ "teardown.output.info.json.gz"
+]
+LAST_MODIFIED_END=utc.localize(
+ datetime.strptime(
+ f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+ "%Y-%m-%d"
+ )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def flatten_frame(nested_sdf):
+ """Unnest Spark DataFrame in case there nested structered columns.
+
+ :param nested_sdf: Spark DataFrame.
+ :type nested_sdf: DataFrame
+ :returns: Unnest DataFrame.
+ :rtype: DataFrame
+ """
+ stack = [((), nested_sdf)]
+ columns = []
+ while len(stack) > 0:
+ parents, sdf = stack.pop()
+ for column_name, column_type in sdf.dtypes:
+ if column_type[:6] == "struct":
+ projected_sdf = sdf.select(column_name + ".*")
+ stack.append((parents + (column_name,), projected_sdf))
+ else:
+ columns.append(
+ col(".".join(parents + (column_name,))) \
+ .alias("_".join(parents + (column_name,)))
+ )
+ return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+ """Processes JSON to Spark DataFrame.
+
+ :param schema_name: Schema name.
+ :type schema_name: string
+ :param paths: S3 paths to process.
+ :type paths: list
+ :returns: Spark DataFrame.
+ :rtype: DataFrame
+ """
+ drop_subset = [
+ "dut_type", "dut_version",
+ "passed",
+ "test_name_long", "test_name_short",
+ "test_type",
+ "version"
+ ]
+
+ # load schemas
+ with open(f"coverage_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+ schema = StructType.fromJson(load(f_schema))
+
+ # create empty DF out of schemas
+ sdf = spark.createDataFrame([], schema)
+
+ # filter list
+ filtered = [path for path in paths if schema_name in path]
+
+ # select
+ for path in filtered:
+ print(path)
+
+ sdf_loaded = spark \
+ .read \
+ .option("multiline", "true") \
+ .schema(schema) \
+ .json(path) \
+ .withColumn("job", lit(path.split("/")[4])) \
+ .withColumn("build", lit(path.split("/")[5]))
+ sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+ # drop rows with all nulls and drop rows with null in critical frames
+ sdf = sdf.na.drop(how="all")
+ sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+ # flatten frame
+ sdf = flatten_frame(sdf)
+
+ return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+ path=PATH,
+ suffix=SUFFIX,
+ last_modified_begin=LAST_MODIFIED_BEGIN,
+ last_modified_end=LAST_MODIFIED_END,
+ ignore_suffix=IGNORE_SUFFIX,
+ ignore_empty=True
+)
+
+filtered_paths = [path for path in paths if "report-coverage-2202" in path]
+
+for schema_name in ["mrr", "ndrpdr", "soak", "device"]:
+ out_sdf = process_json_to_dataframe(schema_name, filtered_paths)
+ out_sdf.show(truncate=False)
+ out_sdf.printSchema()
+ out_sdf = out_sdf \
+ .withColumn("year", lit(datetime.now().year)) \
+ .withColumn("month", lit(datetime.now().month)) \
+ .withColumn("day", lit(datetime.now().day)) \
+ .repartition(1)
+
+ try:
+ wr.s3.to_parquet(
+ df=out_sdf.toPandas(),
+ path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2202",
+ dataset=True,
+ partition_cols=["test_type", "year", "month", "day"],
+ compression="snappy",
+ use_threads=True,
+ mode="overwrite_partitions",
+ boto3_session=session.Session(
+ aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+ aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+ region_name=environ["OUT_AWS_DEFAULT_REGION"]
+ )
+ )
+ except EmptyDataFrame:
+ pass
diff --git a/csit.infra.etl/coverage_soak.json b/csit.infra.etl/coverage_soak.json
new file mode 100644
index 0000000000..59eaec2e9d
--- /dev/null
+++ b/csit.infra.etl/coverage_soak.json
@@ -0,0 +1,221 @@
+{
+ "fields": [
+ {
+ "metadata": {},
+ "name": "job",
+ "nullable": false,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "build",
+ "nullable": false,
+ "type": "integer"
+ },
+ {
+ "metadata": {},
+ "name": "duration",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "dut_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "dut_version",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "result",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "critical_rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "lower",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "upper",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "type",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "start_time",
+ "nullable": true,
+ "type": "timestamp"
+ },
+ {
+ "metadata": {},
+ "name": "passed",
+ "nullable": true,
+ "type": "boolean"
+ },
+ {
+ "metadata": {},
+ "name": "test_id",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_long",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_short",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "tags",
+ "nullable": true,
+ "type": {
+ "containsNull": true,
+ "elementType": "string",
+ "type": "array"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "version",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+} \ No newline at end of file
diff --git a/csit.infra.etl/iterative_mrr.json b/csit.infra.etl/iterative_mrr.json
new file mode 100644
index 0000000000..13b80ec82e
--- /dev/null
+++ b/csit.infra.etl/iterative_mrr.json
@@ -0,0 +1,151 @@
+{
+ "fields": [
+ {
+ "metadata": {},
+ "name": "job",
+ "nullable": false,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "build",
+ "nullable": false,
+ "type": "integer"
+ },
+ {
+ "metadata": {},
+ "name": "duration",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "dut_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "dut_version",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "result",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "receive_rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "stdev",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "values",
+ "nullable": true,
+ "type": {
+ "containsNull": true,
+ "elementType": "double",
+ "type": "array"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "type",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "start_time",
+ "nullable": true,
+ "type": "timestamp"
+ },
+ {
+ "metadata": {},
+ "name": "passed",
+ "nullable": true,
+ "type": "boolean"
+ },
+ {
+ "metadata": {},
+ "name": "test_id",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_long",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_short",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "tags",
+ "nullable": true,
+ "type": {
+ "containsNull": true,
+ "elementType": "string",
+ "type": "array"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "version",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+} \ No newline at end of file
diff --git a/csit.infra.etl/iterative_ndrpdr.json b/csit.infra.etl/iterative_ndrpdr.json
new file mode 100644
index 0000000000..f188321bfb
--- /dev/null
+++ b/csit.infra.etl/iterative_ndrpdr.json
@@ -0,0 +1,679 @@
+{
+ "fields": [
+ {
+ "metadata": {},
+ "name": "job",
+ "nullable": false,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "build",
+ "nullable": false,
+ "type": "integer"
+ },
+ {
+ "metadata": {},
+ "name": "duration",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "dut_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "dut_version",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "result",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "latency_forward",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "pdr_0",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_10",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_50",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_90",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "latency_reverse",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "pdr_0",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_10",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_50",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_90",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "ndr",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "lower",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "upper",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "lower",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "upper",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "type",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "start_time",
+ "nullable": true,
+ "type": "timestamp"
+ },
+ {
+ "metadata": {},
+ "name": "passed",
+ "nullable": true,
+ "type": "boolean"
+ },
+ {
+ "metadata": {},
+ "name": "test_id",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_long",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_short",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "tags",
+ "nullable": true,
+ "type": {
+ "containsNull": true,
+ "elementType": "string",
+ "type": "array"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "version",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+} \ No newline at end of file
diff --git a/csit.infra.etl/iterative_rls2202.py b/csit.infra.etl/iterative_rls2202.py
new file mode 100644
index 0000000000..13b6f4272c
--- /dev/null
+++ b/csit.infra.etl/iterative_rls2202.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
+S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
+SUFFIX="info.json.gz"
+IGNORE_SUFFIX=[
+ "suite.info.json.gz",
+ "setup.info.json.gz",
+ "teardown.info.json.gz",
+ "suite.output.info.json.gz",
+ "setup.output.info.json.gz",
+ "teardown.output.info.json.gz"
+]
+LAST_MODIFIED_END=utc.localize(
+ datetime.strptime(
+ f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+ "%Y-%m-%d"
+ )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def flatten_frame(nested_sdf):
+ """Unnest Spark DataFrame in case there nested structered columns.
+
+ :param nested_sdf: Spark DataFrame.
+ :type nested_sdf: DataFrame
+ :returns: Unnest DataFrame.
+ :rtype: DataFrame
+ """
+ stack = [((), nested_sdf)]
+ columns = []
+ while len(stack) > 0:
+ parents, sdf = stack.pop()
+ for column_name, column_type in sdf.dtypes:
+ if column_type[:6] == "struct":
+ projected_sdf = sdf.select(column_name + ".*")
+ stack.append((parents + (column_name,), projected_sdf))
+ else:
+ columns.append(
+ col(".".join(parents + (column_name,))) \
+ .alias("_".join(parents + (column_name,)))
+ )
+ return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+ """Processes JSON to Spark DataFrame.
+
+ :param schema_name: Schema name.
+ :type schema_name: string
+ :param paths: S3 paths to process.
+ :type paths: list
+ :returns: Spark DataFrame.
+ :rtype: DataFrame
+ """
+ drop_subset = [
+ "dut_type", "dut_version",
+ "passed",
+ "test_name_long", "test_name_short",
+ "test_type",
+ "version"
+ ]
+
+ # load schemas
+ with open(f"iterative_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+ schema = StructType.fromJson(load(f_schema))
+
+ # create empty DF out of schemas
+ sdf = spark.createDataFrame([], schema)
+
+ # filter list
+ filtered = [path for path in paths if schema_name in path]
+
+ # select
+ for path in filtered:
+ print(path)
+
+ sdf_loaded = spark \
+ .read \
+ .option("multiline", "true") \
+ .schema(schema) \
+ .json(path) \
+ .withColumn("job", lit(path.split("/")[4])) \
+ .withColumn("build", lit(path.split("/")[5]))
+ sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+ # drop rows with all nulls and drop rows with null in critical frames
+ sdf = sdf.na.drop(how="all")
+ sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+ # flatten frame
+ sdf = flatten_frame(sdf)
+
+ return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+ path=PATH,
+ suffix=SUFFIX,
+ last_modified_begin=LAST_MODIFIED_BEGIN,
+ last_modified_end=LAST_MODIFIED_END,
+ ignore_suffix=IGNORE_SUFFIX,
+ ignore_empty=True
+)
+
+filtered_paths = [path for path in paths if "report-iterative-2202" in path]
+
+for schema_name in ["mrr", "ndrpdr", "soak"]:
+ out_sdf = process_json_to_dataframe(schema_name, filtered_paths)
+ out_sdf.show(truncate=False)
+ out_sdf.printSchema()
+ out_sdf = out_sdf \
+ .withColumn("year", lit(datetime.now().year)) \
+ .withColumn("month", lit(datetime.now().month)) \
+ .withColumn("day", lit(datetime.now().day)) \
+ .repartition(1)
+
+ try:
+ wr.s3.to_parquet(
+ df=out_sdf.toPandas(),
+ path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2202",
+ dataset=True,
+ partition_cols=["test_type", "year", "month", "day"],
+ compression="snappy",
+ use_threads=True,
+ mode="overwrite_partitions",
+ boto3_session=session.Session(
+ aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+ aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+ region_name=environ["OUT_AWS_DEFAULT_REGION"]
+ )
+ )
+ except EmptyDataFrame:
+ pass
diff --git a/csit.infra.etl/iterative_soak.json b/csit.infra.etl/iterative_soak.json
new file mode 100644
index 0000000000..59eaec2e9d
--- /dev/null
+++ b/csit.infra.etl/iterative_soak.json
@@ -0,0 +1,221 @@
+{
+ "fields": [
+ {
+ "metadata": {},
+ "name": "job",
+ "nullable": false,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "build",
+ "nullable": false,
+ "type": "integer"
+ },
+ {
+ "metadata": {},
+ "name": "duration",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "dut_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "dut_version",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "result",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "critical_rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "lower",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "upper",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "type",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "start_time",
+ "nullable": true,
+ "type": "timestamp"
+ },
+ {
+ "metadata": {},
+ "name": "passed",
+ "nullable": true,
+ "type": "boolean"
+ },
+ {
+ "metadata": {},
+ "name": "test_id",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_long",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_short",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "tags",
+ "nullable": true,
+ "type": {
+ "containsNull": true,
+ "elementType": "string",
+ "type": "array"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "version",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+} \ No newline at end of file
diff --git a/csit.infra.etl/local.py b/csit.infra.etl/local.py
new file mode 100644
index 0000000000..79e18d1c64
--- /dev/null
+++ b/csit.infra.etl/local.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the localhost"""
+
+from datetime import datetime
+from json import dump, load
+from pathlib import Path
+
+from awsglue.context import GlueContext
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+PATH="/app/tests"
+SUFFIX="info.json"
+IGNORE_SUFFIX=[
+ "suite.info.json",
+ "setup.info.json",
+ "teardown.info.json",
+ "suite.output.info.json",
+ "setup.output.info.json",
+ "teardown.output.info.json"
+]
+
+
+def schema_dump(schema, option):
+ """Dumps Spark DataFrame schema into JSON file.
+
+ :param schema: DataFrame schema.
+ :type schema: StructType
+ :param option: File name suffix for the DataFrame schema.
+ :type option: string
+ """
+ with open(f"trending_{option}.json", "w", encoding="UTF-8") as f_schema:
+ dump(schema.jsonValue(), f_schema, indent=4, sort_keys=True)
+
+
+def schema_load(option):
+ """Loads Spark DataFrame schema from JSON file.
+
+ :param option: File name suffix for the DataFrame schema.
+ :type option: string
+ :returns: DataFrame schema.
+ :rtype: StructType
+ """
+ with open(f"trending_{option}.json", "r", encoding="UTF-8") as f_schema:
+ return StructType.fromJson(load(f_schema))
+
+
+def schema_dump_from_json(option):
+ """Loads JSON with data and dumps Spark DataFrame schema into JSON file.
+
+ :param option: File name suffix for the JSON data.
+ :type option: string
+ """
+ schema_dump(spark \
+ .read \
+ .option("multiline", "true") \
+ .json(f"data_{option}.json") \
+ .schema, option
+ )
+
+
+def flatten_frame(nested_sdf):
+ """Unnest Spark DataFrame in case there nested structered columns.
+
+ :param nested_sdf: Spark DataFrame.
+ :type nested_sdf: DataFrame
+ :returns: Unnest DataFrame.
+ :rtype: DataFrame
+ """
+ stack = [((), nested_sdf)]
+ columns = []
+ while len(stack) > 0:
+ parents, sdf = stack.pop()
+ for column_name, column_type in sdf.dtypes:
+ if column_type[:6] == "struct":
+ projected_sdf = sdf.select(column_name + ".*")
+ stack.append((parents + (column_name,), projected_sdf))
+ else:
+ columns.append(
+ col(".".join(parents + (column_name,))) \
+ .alias("_".join(parents + (column_name,)))
+ )
+ return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+ """Processes JSON to Spark DataFrame.
+
+ :param schema_name: Schema name.
+ :type schema_name: string
+ :param paths: S3 paths to process.
+ :type paths: list
+ :returns: Spark DataFrame.
+ :rtype: DataFrame
+ """
+ drop_subset = [
+ "dut_type", "dut_version",
+ "passed",
+ "test_name_long", "test_name_short",
+ "test_type",
+ "version"
+ ]
+
+ # load schemas
+ schema = schema_load(schema_name)
+
+ # create empty DF out of schemas
+ sdf = spark.createDataFrame([], schema)
+
+ # filter list
+ filtered = [path for path in paths if schema_name in path]
+
+ # select
+ for path in filtered:
+ print(path)
+
+ sdf_loaded = spark \
+ .read \
+ .option("multiline", "true") \
+ .schema(schema) \
+ .json(path) \
+ .withColumn("job", lit("local")) \
+ .withColumn("build", lit("unknown"))
+ sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+ # drop rows with all nulls and drop rows with null in critical frames
+ sdf = sdf.na.drop(how="all")
+ sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+ # flatten frame
+ sdf = flatten_frame(sdf)
+
+ return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = []
+for file in Path(PATH).glob(f"**/*{SUFFIX}"):
+ if file.name not in IGNORE_SUFFIX:
+ paths.append(str(file))
+
+for schema_name in ["mrr", "ndrpdr", "soak"]:
+ out_sdf = process_json_to_dataframe(schema_name, paths)
+ out_sdf.show()
+ out_sdf.printSchema()
+ out_sdf \
+ .withColumn("year", lit(datetime.now().year)) \
+ .withColumn("month", lit(datetime.now().month)) \
+ .withColumn("day", lit(datetime.now().day)) \
+ .repartition(1) \
+ .write \
+ .partitionBy("test_type", "year", "month", "day") \
+ .mode("append") \
+ .parquet("local.parquet")
diff --git a/csit.infra.etl/stats.py b/csit.infra.etl/stats.py
new file mode 100644
index 0000000000..ab8bcafdeb
--- /dev/null
+++ b/csit.infra.etl/stats.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import lit
+from pyspark.sql.types import StructType
+
+S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
+S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
+SUFFIX="suite.info.json.gz"
+IGNORE_SUFFIX=[]
+LAST_MODIFIED_END=utc.localize(
+ datetime.strptime(
+ f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+ "%Y-%m-%d"
+ )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def process_json_to_dataframe(schema_name, paths):
+ """Processes JSON to Spark DataFrame.
+
+ :param schema_name: Schema name.
+ :type schema_name: string
+ :param paths: S3 paths to process.
+ :type paths: list
+ :returns: Spark DataFrame.
+ :rtype: DataFrame
+ """
+ drop_subset = [
+ "duration",
+ "version"
+ ]
+
+ # load schemas
+ with open(f"stats_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+ schema = StructType.fromJson(load(f_schema))
+
+ # create empty DF out of schemas
+ sdf = spark.createDataFrame([], schema)
+
+ # filter list
+ filtered = [path for path in paths if "tests/suite.info.json.gz" in path]
+
+ # select
+ for path in filtered:
+ print(path)
+
+ sdf_loaded = spark \
+ .read \
+ .option("multiline", "true") \
+ .schema(schema) \
+ .json(path) \
+ .withColumn("job", lit(path.split("/")[4])) \
+ .withColumn("build", lit(path.split("/")[5])) \
+ .withColumn("stats_type", lit(schema_name))
+ sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+ # drop rows with all nulls and drop rows with null in critical frames
+ sdf = sdf.na.drop(how="all")
+ sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+ return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+ path=PATH,
+ suffix=SUFFIX,
+ last_modified_begin=LAST_MODIFIED_BEGIN,
+ last_modified_end=LAST_MODIFIED_END,
+ ignore_suffix=IGNORE_SUFFIX,
+ ignore_empty=True
+)
+
+for schema_name in ["sra"]:
+ out_sdf = process_json_to_dataframe(schema_name, paths)
+ out_sdf.show(truncate=False)
+ out_sdf.printSchema()
+ out_sdf = out_sdf \
+ .withColumn("year", lit(datetime.now().year)) \
+ .withColumn("month", lit(datetime.now().month)) \
+ .withColumn("day", lit(datetime.now().day)) \
+ .repartition(1)
+
+ try:
+ wr.s3.to_parquet(
+ df=out_sdf.toPandas(),
+ path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/stats",
+ dataset=True,
+ partition_cols=["stats_type", "year", "month", "day"],
+ compression="snappy",
+ use_threads=True,
+ mode="overwrite_partitions",
+ boto3_session=session.Session(
+ aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+ aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+ region_name=environ["OUT_AWS_DEFAULT_REGION"]
+ )
+ )
+ except EmptyDataFrame:
+ pass
diff --git a/csit.infra.etl/stats_sra.json b/csit.infra.etl/stats_sra.json
new file mode 100644
index 0000000000..5f792e9bfe
--- /dev/null
+++ b/csit.infra.etl/stats_sra.json
@@ -0,0 +1,41 @@
+{
+ "fields": [
+ {
+ "metadata": {},
+ "name": "job",
+ "nullable": false,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "build",
+ "nullable": false,
+ "type": "integer"
+ },
+ {
+ "metadata": {},
+ "name": "start_time",
+ "nullable": true,
+ "type": "timestamp"
+ },
+ {
+ "metadata": {},
+ "name": "duration",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "version",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "stats_type",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+} \ No newline at end of file
diff --git a/csit.infra.etl/trending.py b/csit.infra.etl/trending.py
new file mode 100644
index 0000000000..bc27aaa063
--- /dev/null
+++ b/csit.infra.etl/trending.py
@@ -0,0 +1,172 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
+S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
+SUFFIX="info.json.gz"
+IGNORE_SUFFIX=[
+ "suite.info.json.gz",
+ "setup.info.json.gz",
+ "teardown.info.json.gz",
+ "suite.output.info.json.gz",
+ "setup.output.info.json.gz",
+ "teardown.output.info.json.gz"
+]
+LAST_MODIFIED_END=utc.localize(
+ datetime.strptime(
+ f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+ "%Y-%m-%d"
+ )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def flatten_frame(nested_sdf):
+ """Unnest Spark DataFrame in case there nested structered columns.
+
+ :param nested_sdf: Spark DataFrame.
+ :type nested_sdf: DataFrame
+ :returns: Unnest DataFrame.
+ :rtype: DataFrame
+ """
+ stack = [((), nested_sdf)]
+ columns = []
+ while len(stack) > 0:
+ parents, sdf = stack.pop()
+ for column_name, column_type in sdf.dtypes:
+ if column_type[:6] == "struct":
+ projected_sdf = sdf.select(column_name + ".*")
+ stack.append((parents + (column_name,), projected_sdf))
+ else:
+ columns.append(
+ col(".".join(parents + (column_name,))) \
+ .alias("_".join(parents + (column_name,)))
+ )
+ return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+ """Processes JSON to Spark DataFrame.
+
+ :param schema_name: Schema name.
+ :type schema_name: string
+ :param paths: S3 paths to process.
+ :type paths: list
+ :returns: Spark DataFrame.
+ :rtype: DataFrame
+ """
+ drop_subset = [
+ "dut_type", "dut_version",
+ "passed",
+ "test_name_long", "test_name_short",
+ "test_type",
+ "version"
+ ]
+
+ # load schemas
+ with open(f"trending_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+ schema = StructType.fromJson(load(f_schema))
+
+ # create empty DF out of schemas
+ sdf = spark.createDataFrame([], schema)
+
+ # filter list
+ filtered = [path for path in paths if schema_name in path]
+
+ # select
+ for path in filtered:
+ print(path)
+
+ sdf_loaded = spark \
+ .read \
+ .option("multiline", "true") \
+ .schema(schema) \
+ .json(path) \
+ .withColumn("job", lit(path.split("/")[4])) \
+ .withColumn("build", lit(path.split("/")[5]))
+ sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+ # drop rows with all nulls and drop rows with null in critical frames
+ sdf = sdf.na.drop(how="all")
+ sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+ # flatten frame
+ sdf = flatten_frame(sdf)
+
+ return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+ path=PATH,
+ suffix=SUFFIX,
+ last_modified_begin=LAST_MODIFIED_BEGIN,
+ last_modified_end=LAST_MODIFIED_END,
+ ignore_suffix=IGNORE_SUFFIX,
+ ignore_empty=True
+)
+
+filtered_paths = [path for path in paths if "daily" in path or "weekly" in path]
+
+for schema_name in ["mrr", "ndrpdr", "soak"]:
+ out_sdf = process_json_to_dataframe(schema_name, filtered_paths)
+ out_sdf.show(truncate=False)
+ out_sdf.printSchema()
+ out_sdf = out_sdf \
+ .withColumn("year", lit(datetime.now().year)) \
+ .withColumn("month", lit(datetime.now().month)) \
+ .withColumn("day", lit(datetime.now().day)) \
+ .repartition(1)
+
+ try:
+ wr.s3.to_parquet(
+ df=out_sdf.toPandas(),
+ path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/trending",
+ dataset=True,
+ partition_cols=["test_type", "year", "month", "day"],
+ compression="snappy",
+ use_threads=True,
+ mode="overwrite_partitions",
+ boto3_session=session.Session(
+ aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+ aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+ region_name=environ["OUT_AWS_DEFAULT_REGION"]
+ )
+ )
+ except EmptyDataFrame:
+ pass
diff --git a/csit.infra.etl/trending_mrr.json b/csit.infra.etl/trending_mrr.json
new file mode 100644
index 0000000000..4e222d33d5
--- /dev/null
+++ b/csit.infra.etl/trending_mrr.json
@@ -0,0 +1,169 @@
+{
+ "fields": [
+ {
+ "metadata": {},
+ "name": "job",
+ "nullable": false,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "build",
+ "nullable": false,
+ "type": "integer"
+ },
+ {
+ "metadata": {},
+ "name": "duration",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "dut_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "dut_version",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "hosts",
+ "nullable": true,
+ "type": {
+ "containsNull": true,
+ "elementType": "string",
+ "type": "array"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "tg_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "tg_version",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "result",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "receive_rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "stdev",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "values",
+ "nullable": true,
+ "type": {
+ "containsNull": true,
+ "elementType": "double",
+ "type": "array"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "type",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "start_time",
+ "nullable": true,
+ "type": "timestamp"
+ },
+ {
+ "metadata": {},
+ "name": "passed",
+ "nullable": true,
+ "type": "boolean"
+ },
+ {
+ "metadata": {},
+ "name": "test_id",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_long",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_short",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "message",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "version",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+} \ No newline at end of file
diff --git a/csit.infra.etl/trending_ndrpdr.json b/csit.infra.etl/trending_ndrpdr.json
new file mode 100644
index 0000000000..fd833aa84c
--- /dev/null
+++ b/csit.infra.etl/trending_ndrpdr.json
@@ -0,0 +1,697 @@
+{
+ "fields": [
+ {
+ "metadata": {},
+ "name": "job",
+ "nullable": false,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "build",
+ "nullable": false,
+ "type": "integer"
+ },
+ {
+ "metadata": {},
+ "name": "duration",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "dut_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "dut_version",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "hosts",
+ "nullable": true,
+ "type": {
+ "containsNull": true,
+ "elementType": "string",
+ "type": "array"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "tg_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "tg_version",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "result",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "latency_forward",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "pdr_0",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_10",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_50",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_90",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "latency_reverse",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "pdr_0",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_10",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_50",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr_90",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "avg",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "hdrh",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "max",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "min",
+ "nullable": true,
+ "type": "long"
+ },
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "ndr",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "lower",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "upper",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "pdr",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "lower",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "upper",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "type",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "start_time",
+ "nullable": true,
+ "type": "timestamp"
+ },
+ {
+ "metadata": {},
+ "name": "passed",
+ "nullable": true,
+ "type": "boolean"
+ },
+ {
+ "metadata": {},
+ "name": "test_id",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_long",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_short",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "message",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "version",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+} \ No newline at end of file
diff --git a/csit.infra.etl/trending_soak.json b/csit.infra.etl/trending_soak.json
new file mode 100644
index 0000000000..819d3142d3
--- /dev/null
+++ b/csit.infra.etl/trending_soak.json
@@ -0,0 +1,239 @@
+{
+ "fields": [
+ {
+ "metadata": {},
+ "name": "job",
+ "nullable": false,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "build",
+ "nullable": false,
+ "type": "integer"
+ },
+ {
+ "metadata": {},
+ "name": "duration",
+ "nullable": true,
+ "type": "double"
+ },
+ {
+ "metadata": {},
+ "name": "dut_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "dut_version",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "hosts",
+ "nullable": true,
+ "type": {
+ "containsNull": true,
+ "elementType": "string",
+ "type": "array"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "tg_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "tg_version",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "result",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "critical_rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "lower",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "upper",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "bandwidth",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "rate",
+ "nullable": true,
+ "type": {
+ "fields": [
+ {
+ "metadata": {},
+ "name": "unit",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "value",
+ "nullable": true,
+ "type": "double"
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "type",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+ }
+ },
+ {
+ "metadata": {},
+ "name": "start_time",
+ "nullable": true,
+ "type": "timestamp"
+ },
+ {
+ "metadata": {},
+ "name": "passed",
+ "nullable": true,
+ "type": "boolean"
+ },
+ {
+ "metadata": {},
+ "name": "test_id",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_long",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_name_short",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "test_type",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "message",
+ "nullable": true,
+ "type": "string"
+ },
+ {
+ "metadata": {},
+ "name": "version",
+ "nullable": true,
+ "type": "string"
+ }
+ ],
+ "type": "struct"
+} \ No newline at end of file