From 2ebf9e56d0dd200fa09979505a2da070b39da63f Mon Sep 17 00:00:00 2001
From: pmikus <peter.mikus@protonmail.ch>
Date: Wed, 22 May 2024 14:30:49 +0200
Subject: feat(etl): Release pipelines

Signed-off-by: pmikus <peter.mikus@protonmail.ch>
Change-Id: I4ce20267b4747bf1901b6175e0ec5936b583a510
---
 csit.infra.etl/coverage_device_rls2402.py     | 170 -------------------------
 csit.infra.etl/coverage_device_rls2406.py     | 176 +++++++++++++++++++++++++
 csit.infra.etl/coverage_hoststack_rls2402.py  | 171 -------------------------
 csit.infra.etl/coverage_hoststack_rls2406.py  | 176 +++++++++++++++++++++++++
 csit.infra.etl/coverage_mrr_rls2402.py        | 170 -------------------------
 csit.infra.etl/coverage_mrr_rls2406.py        | 176 +++++++++++++++++++++++++
 csit.infra.etl/coverage_ndrpdr_rls2402.py     | 170 -------------------------
 csit.infra.etl/coverage_ndrpdr_rls2406.py     | 176 +++++++++++++++++++++++++
 csit.infra.etl/coverage_reconf_rls2402.py     | 171 -------------------------
 csit.infra.etl/coverage_reconf_rls2406.py     | 176 +++++++++++++++++++++++++
 csit.infra.etl/coverage_soak_rls2402.py       | 170 -------------------------
 csit.infra.etl/coverage_soak_rls2406.py       | 176 +++++++++++++++++++++++++
 csit.infra.etl/iterative_hoststack_rls2402.py | 171 -------------------------
 csit.infra.etl/iterative_hoststack_rls2406.py | 176 +++++++++++++++++++++++++
 csit.infra.etl/iterative_mrr_rls2402.py       | 170 -------------------------
 csit.infra.etl/iterative_mrr_rls2406.py       | 176 +++++++++++++++++++++++++
 csit.infra.etl/iterative_ndrpdr_rls2402.py    | 170 -------------------------
 csit.infra.etl/iterative_ndrpdr_rls2406.py    | 176 +++++++++++++++++++++++++
 csit.infra.etl/iterative_reconf_rls2402.py    | 171 -------------------------
 csit.infra.etl/iterative_reconf_rls2406.py    | 177 ++++++++++++++++++++++++++
 csit.infra.etl/iterative_soak_rls2402.py      | 170 -------------------------
 csit.infra.etl/iterative_soak_rls2406.py      | 176 +++++++++++++++++++++++++
 csit.infra.etl/stats.py                       |  22 ++--
 csit.infra.etl/trending_hoststack.py          |  23 ++--
 csit.infra.etl/trending_mrr.py                |  22 ++--
 csit.infra.etl/trending_ndrpdr.py             |  22 ++--
 csit.infra.etl/trending_reconf.py             |  22 ++--
 csit.infra.etl/trending_soak.py               |  23 ++--
 28 files changed, 2021 insertions(+), 1924 deletions(-)
 delete mode 100644 csit.infra.etl/coverage_device_rls2402.py
 create mode 100644 csit.infra.etl/coverage_device_rls2406.py
 delete mode 100644 csit.infra.etl/coverage_hoststack_rls2402.py
 create mode 100644 csit.infra.etl/coverage_hoststack_rls2406.py
 delete mode 100644 csit.infra.etl/coverage_mrr_rls2402.py
 create mode 100644 csit.infra.etl/coverage_mrr_rls2406.py
 delete mode 100644 csit.infra.etl/coverage_ndrpdr_rls2402.py
 create mode 100644 csit.infra.etl/coverage_ndrpdr_rls2406.py
 delete mode 100644 csit.infra.etl/coverage_reconf_rls2402.py
 create mode 100644 csit.infra.etl/coverage_reconf_rls2406.py
 delete mode 100644 csit.infra.etl/coverage_soak_rls2402.py
 create mode 100644 csit.infra.etl/coverage_soak_rls2406.py
 delete mode 100644 csit.infra.etl/iterative_hoststack_rls2402.py
 create mode 100644 csit.infra.etl/iterative_hoststack_rls2406.py
 delete mode 100644 csit.infra.etl/iterative_mrr_rls2402.py
 create mode 100644 csit.infra.etl/iterative_mrr_rls2406.py
 delete mode 100644 csit.infra.etl/iterative_ndrpdr_rls2402.py
 create mode 100644 csit.infra.etl/iterative_ndrpdr_rls2406.py
 delete mode 100644 csit.infra.etl/iterative_reconf_rls2402.py
 create mode 100644 csit.infra.etl/iterative_reconf_rls2406.py
 delete mode 100644 csit.infra.etl/iterative_soak_rls2402.py
 create mode 100644 csit.infra.etl/iterative_soak_rls2406.py

(limited to 'csit.infra.etl')

diff --git a/csit.infra.etl/coverage_device_rls2402.py b/csit.infra.etl/coverage_device_rls2402.py
deleted file mode 100644
index 2db808164f..0000000000
--- a/csit.infra.etl/coverage_device_rls2402.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2023 Cisco and/or its affiliates.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""ETL script running on top of the s3://"""
-
-from datetime import datetime, timedelta
-from json import load
-from os import environ
-from pytz import utc
-
-import awswrangler as wr
-from awswrangler.exceptions import EmptyDataFrame
-from awsglue.context import GlueContext
-from boto3 import session
-from pyspark.context import SparkContext
-from pyspark.sql.functions import col, lit, regexp_replace
-from pyspark.sql.types import StructType
-
-
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
-PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-vpp-device-*"
-SUFFIX="info.json.gz"
-IGNORE_SUFFIX=[
-    "suite.info.json.gz",
-    "setup.info.json.gz",
-    "teardown.info.json.gz",
-    "suite.output.info.json.gz",
-    "setup.output.info.json.gz",
-    "teardown.output.info.json.gz"
-]
-LAST_MODIFIED_END=utc.localize(
-    datetime.strptime(
-        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
-        "%Y-%m-%d"
-    )
-)
-LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
-
-
-def flatten_frame(nested_sdf):
-    """Unnest Spark DataFrame in case there nested structered columns.
-
-    :param nested_sdf: Spark DataFrame.
-    :type nested_sdf: DataFrame
-    :returns: Unnest DataFrame.
-    :rtype: DataFrame
-    """
-    stack = [((), nested_sdf)]
-    columns = []
-    while len(stack) > 0:
-        parents, sdf = stack.pop()
-        for column_name, column_type in sdf.dtypes:
-            if column_type[:6] == "struct":
-                projected_sdf = sdf.select(column_name + ".*")
-                stack.append((parents + (column_name,), projected_sdf))
-            else:
-                columns.append(
-                    col(".".join(parents + (column_name,))) \
-                        .alias("_".join(parents + (column_name,)))
-                )
-    return nested_sdf.select(columns)
-
-
-def process_json_to_dataframe(schema_name, paths):
-    """Processes JSON to Spark DataFrame.
-
-    :param schema_name: Schema name.
-    :type schema_name: string
-    :param paths: S3 paths to process.
-    :type paths: list
-    :returns: Spark DataFrame.
-    :rtype: DataFrame
-    """
-    drop_subset = [
-        "dut_type", "dut_version",
-        "passed",
-        "test_name_long", "test_name_short",
-        "test_type",
-        "version"
-    ]
-
-    # load schemas
-    with open(f"coverage_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
-        schema = StructType.fromJson(load(f_schema))
-
-    # create empty DF out of schemas
-    sdf = spark.createDataFrame([], schema)
-
-    # filter list
-    filtered = [path for path in paths if schema_name in path]
-
-    # select
-    for path in filtered:
-        print(path)
-
-        sdf_loaded = spark \
-            .read \
-            .option("multiline", "true") \
-            .schema(schema) \
-            .json(path) \
-            .withColumn("job", lit(path.split("/")[4])) \
-            .withColumn("build", lit(path.split("/")[5]))
-        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
-
-    # drop rows with all nulls and drop rows with null in critical frames
-    sdf = sdf.na.drop(how="all")
-    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
-
-    # flatten frame
-    sdf = flatten_frame(sdf)
-
-    return sdf
-
-
-# create SparkContext and GlueContext
-spark_context = SparkContext.getOrCreate()
-spark_context.setLogLevel("WARN")
-glue_context = GlueContext(spark_context)
-spark = glue_context.spark_session
-
-# files of interest
-paths = wr.s3.list_objects(
-    path=PATH,
-    suffix=SUFFIX,
-    last_modified_begin=LAST_MODIFIED_BEGIN,
-    last_modified_end=LAST_MODIFIED_END,
-    ignore_suffix=IGNORE_SUFFIX,
-    ignore_empty=True
-)
-
-filtered_paths = [path for path in paths if "report-coverage-2402" in path]
-
-out_sdf = process_json_to_dataframe("device", filtered_paths)
-out_sdf.printSchema()
-out_sdf = out_sdf \
-    .withColumn("year", lit(datetime.now().year)) \
-    .withColumn("month", lit(datetime.now().month)) \
-    .withColumn("day", lit(datetime.now().day)) \
-    .repartition(1)
-
-try:
-    wr.s3.to_parquet(
-        df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2402",
-        dataset=True,
-        partition_cols=["test_type", "year", "month", "day"],
-        compression="snappy",
-        use_threads=True,
-        mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
-    )
-except EmptyDataFrame:
-    pass
diff --git a/csit.infra.etl/coverage_device_rls2406.py b/csit.infra.etl/coverage_device_rls2406.py
new file mode 100644
index 0000000000..04f4135851
--- /dev/null
+++ b/csit.infra.etl/coverage_device_rls2406.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-vpp-device-*"
+SUFFIX="info.json.gz"
+IGNORE_SUFFIX=[
+    "suite.info.json.gz",
+    "setup.info.json.gz",
+    "teardown.info.json.gz",
+    "suite.output.info.json.gz",
+    "setup.output.info.json.gz",
+    "teardown.output.info.json.gz"
+]
+LAST_MODIFIED_END=utc.localize(
+    datetime.strptime(
+        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+        "%Y-%m-%d"
+    )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def flatten_frame(nested_sdf):
+    """Unnest Spark DataFrame in case there nested structered columns.
+
+    :param nested_sdf: Spark DataFrame.
+    :type nested_sdf: DataFrame
+    :returns: Unnest DataFrame.
+    :rtype: DataFrame
+    """
+    stack = [((), nested_sdf)]
+    columns = []
+    while len(stack) > 0:
+        parents, sdf = stack.pop()
+        for column_name, column_type in sdf.dtypes:
+            if column_type[:6] == "struct":
+                projected_sdf = sdf.select(column_name + ".*")
+                stack.append((parents + (column_name,), projected_sdf))
+            else:
+                columns.append(
+                    col(".".join(parents + (column_name,))) \
+                        .alias("_".join(parents + (column_name,)))
+                )
+    return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+    """Processes JSON to Spark DataFrame.
+
+    :param schema_name: Schema name.
+    :type schema_name: string
+    :param paths: S3 paths to process.
+    :type paths: list
+    :returns: Spark DataFrame.
+    :rtype: DataFrame
+    """
+    drop_subset = [
+        "dut_type", "dut_version",
+        "passed",
+        "test_name_long", "test_name_short",
+        "test_type",
+        "version"
+    ]
+
+    # load schemas
+    with open(f"coverage_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+        schema = StructType.fromJson(load(f_schema))
+
+    # create empty DF out of schemas
+    sdf = spark.createDataFrame([], schema)
+
+    # filter list
+    filtered = [path for path in paths if schema_name in path]
+
+    # select
+    for path in filtered:
+        print(path)
+
+        sdf_loaded = spark \
+            .read \
+            .option("multiline", "true") \
+            .schema(schema) \
+            .json(path) \
+            .withColumn("job", lit(path.split("/")[4])) \
+            .withColumn("build", lit(path.split("/")[5]))
+        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+    # drop rows with all nulls and drop rows with null in critical frames
+    sdf = sdf.na.drop(how="all")
+    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+    # flatten frame
+    sdf = flatten_frame(sdf)
+
+    return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+    path=PATH,
+    suffix=SUFFIX,
+    last_modified_begin=LAST_MODIFIED_BEGIN,
+    last_modified_end=LAST_MODIFIED_END,
+    ignore_suffix=IGNORE_SUFFIX,
+    ignore_empty=True
+)
+
+filtered_paths = [path for path in paths if "report-coverage-2406" in path]
+
+out_sdf = process_json_to_dataframe("device", filtered_paths)
+out_sdf.printSchema()
+out_sdf = out_sdf \
+    .withColumn("year", lit(datetime.now().year)) \
+    .withColumn("month", lit(datetime.now().month)) \
+    .withColumn("day", lit(datetime.now().day)) \
+    .repartition(1)
+
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
+try:
+    wr.s3.to_parquet(
+        df=out_sdf.toPandas(),
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
+        dataset=True,
+        partition_cols=["test_type", "year", "month", "day"],
+        compression="snappy",
+        use_threads=True,
+        mode="overwrite_partitions",
+        boto3_session=boto3_session
+    )
+except EmptyDataFrame:
+    pass
diff --git a/csit.infra.etl/coverage_hoststack_rls2402.py b/csit.infra.etl/coverage_hoststack_rls2402.py
deleted file mode 100644
index 27eb9e8cc6..0000000000
--- a/csit.infra.etl/coverage_hoststack_rls2402.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2023 Cisco and/or its affiliates.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""ETL script running on top of the s3://"""
-
-from datetime import datetime, timedelta
-from json import load
-from os import environ
-from pytz import utc
-
-import awswrangler as wr
-from awswrangler.exceptions import EmptyDataFrame
-from awsglue.context import GlueContext
-from boto3 import session
-from pyspark.context import SparkContext
-from pyspark.sql.functions import col, lit, regexp_replace
-from pyspark.sql.types import StructType
-
-
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
-PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
-SUFFIX="info.json.gz"
-IGNORE_SUFFIX=[
-    "suite.info.json.gz",
-    "setup.info.json.gz",
-    "teardown.info.json.gz",
-    "suite.output.info.json.gz",
-    "setup.output.info.json.gz",
-    "teardown.output.info.json.gz"
-]
-LAST_MODIFIED_END=utc.localize(
-    datetime.strptime(
-        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
-        "%Y-%m-%d"
-    )
-)
-LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
-
-
-def flatten_frame(nested_sdf):
-    """Unnest Spark DataFrame in case there nested structered columns.
-
-    :param nested_sdf: Spark DataFrame.
-    :type nested_sdf: DataFrame
-    :returns: Unnest DataFrame.
-    :rtype: DataFrame
-    """
-    stack = [((), nested_sdf)]
-    columns = []
-    while len(stack) > 0:
-        parents, sdf = stack.pop()
-        for column_name, column_type in sdf.dtypes:
-            if column_type[:6] == "struct":
-                projected_sdf = sdf.select(column_name + ".*")
-                stack.append((parents + (column_name,), projected_sdf))
-            else:
-                columns.append(
-                    col(".".join(parents + (column_name,))) \
-                        .alias("_".join(parents + (column_name,)))
-                )
-    return nested_sdf.select(columns)
-
-
-def process_json_to_dataframe(schema_name, paths):
-    """Processes JSON to Spark DataFrame.
-
-    :param schema_name: Schema name.
-    :type schema_name: string
-    :param paths: S3 paths to process.
-    :type paths: list
-    :returns: Spark DataFrame.
-    :rtype: DataFrame
-    """
-    drop_subset = [
-        "dut_type", "dut_version",
-        "passed",
-        "test_name_long", "test_name_short",
-        "test_type",
-        "version"
-    ]
-
-    # load schemas
-    with open(f"coverage_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
-        schema = StructType.fromJson(load(f_schema))
-
-    # create empty DF out of schemas
-    sdf = spark.createDataFrame([], schema)
-
-    # filter list
-    filtered = [path for path in paths if schema_name in path]
-
-    # select
-    for path in filtered:
-        print(path)
-
-        sdf_loaded = spark \
-            .read \
-            .option("multiline", "true") \
-            .schema(schema) \
-            .json(path) \
-            .withColumn("job", lit(path.split("/")[4])) \
-            .withColumn("build", lit(path.split("/")[5]))
-        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
-
-    # drop rows with all nulls and drop rows with null in critical frames
-    sdf = sdf.na.drop(how="all")
-    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
-
-    # flatten frame
-    sdf = flatten_frame(sdf)
-
-    return sdf
-
-
-# create SparkContext and GlueContext
-spark_context = SparkContext.getOrCreate()
-spark_context.setLogLevel("WARN")
-glue_context = GlueContext(spark_context)
-spark = glue_context.spark_session
-
-# files of interest
-paths = wr.s3.list_objects(
-    path=PATH,
-    suffix=SUFFIX,
-    last_modified_begin=LAST_MODIFIED_BEGIN,
-    last_modified_end=LAST_MODIFIED_END,
-    ignore_suffix=IGNORE_SUFFIX,
-    ignore_empty=True
-)
-
-filtered_paths = [path for path in paths if "report-coverage-2402" in path]
-
-out_sdf = process_json_to_dataframe("hoststack", filtered_paths)
-out_sdf.show(truncate=False)
-out_sdf.printSchema()
-out_sdf = out_sdf \
-    .withColumn("year", lit(datetime.now().year)) \
-    .withColumn("month", lit(datetime.now().month)) \
-    .withColumn("day", lit(datetime.now().day)) \
-    .repartition(1)
-
-try:
-    wr.s3.to_parquet(
-        df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2402",
-        dataset=True,
-        partition_cols=["test_type", "year", "month", "day"],
-        compression="snappy",
-        use_threads=True,
-        mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
-    )
-except EmptyDataFrame:
-    pass
diff --git a/csit.infra.etl/coverage_hoststack_rls2406.py b/csit.infra.etl/coverage_hoststack_rls2406.py
new file mode 100644
index 0000000000..75edd983b8
--- /dev/null
+++ b/csit.infra.etl/coverage_hoststack_rls2406.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
+SUFFIX="info.json.gz"
+IGNORE_SUFFIX=[
+    "suite.info.json.gz",
+    "setup.info.json.gz",
+    "teardown.info.json.gz",
+    "suite.output.info.json.gz",
+    "setup.output.info.json.gz",
+    "teardown.output.info.json.gz"
+]
+LAST_MODIFIED_END=utc.localize(
+    datetime.strptime(
+        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+        "%Y-%m-%d"
+    )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def flatten_frame(nested_sdf):
+    """Unnest Spark DataFrame in case there nested structered columns.
+
+    :param nested_sdf: Spark DataFrame.
+    :type nested_sdf: DataFrame
+    :returns: Unnest DataFrame.
+    :rtype: DataFrame
+    """
+    stack = [((), nested_sdf)]
+    columns = []
+    while len(stack) > 0:
+        parents, sdf = stack.pop()
+        for column_name, column_type in sdf.dtypes:
+            if column_type[:6] == "struct":
+                projected_sdf = sdf.select(column_name + ".*")
+                stack.append((parents + (column_name,), projected_sdf))
+            else:
+                columns.append(
+                    col(".".join(parents + (column_name,))) \
+                        .alias("_".join(parents + (column_name,)))
+                )
+    return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+    """Processes JSON to Spark DataFrame.
+
+    :param schema_name: Schema name.
+    :type schema_name: string
+    :param paths: S3 paths to process.
+    :type paths: list
+    :returns: Spark DataFrame.
+    :rtype: DataFrame
+    """
+    drop_subset = [
+        "dut_type", "dut_version",
+        "passed",
+        "test_name_long", "test_name_short",
+        "test_type",
+        "version"
+    ]
+
+    # load schemas
+    with open(f"coverage_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+        schema = StructType.fromJson(load(f_schema))
+
+    # create empty DF out of schemas
+    sdf = spark.createDataFrame([], schema)
+
+    # filter list
+    filtered = [path for path in paths if schema_name in path]
+
+    # select
+    for path in filtered:
+        print(path)
+
+        sdf_loaded = spark \
+            .read \
+            .option("multiline", "true") \
+            .schema(schema) \
+            .json(path) \
+            .withColumn("job", lit(path.split("/")[4])) \
+            .withColumn("build", lit(path.split("/")[5]))
+        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+    # drop rows with all nulls and drop rows with null in critical frames
+    sdf = sdf.na.drop(how="all")
+    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+    # flatten frame
+    sdf = flatten_frame(sdf)
+
+    return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+    path=PATH,
+    suffix=SUFFIX,
+    last_modified_begin=LAST_MODIFIED_BEGIN,
+    last_modified_end=LAST_MODIFIED_END,
+    ignore_suffix=IGNORE_SUFFIX,
+    ignore_empty=True
+)
+
+filtered_paths = [path for path in paths if "report-coverage-2406" in path]
+
+out_sdf = process_json_to_dataframe("hoststack", filtered_paths)
+out_sdf.printSchema()
+out_sdf = out_sdf \
+    .withColumn("year", lit(datetime.now().year)) \
+    .withColumn("month", lit(datetime.now().month)) \
+    .withColumn("day", lit(datetime.now().day)) \
+    .repartition(1)
+
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
+try:
+    wr.s3.to_parquet(
+        df=out_sdf.toPandas(),
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
+        dataset=True,
+        partition_cols=["test_type", "year", "month", "day"],
+        compression="snappy",
+        use_threads=True,
+        mode="overwrite_partitions",
+        boto3_session=boto3_session
+    )
+except EmptyDataFrame:
+    pass
diff --git a/csit.infra.etl/coverage_mrr_rls2402.py b/csit.infra.etl/coverage_mrr_rls2402.py
deleted file mode 100644
index e68e4f0366..0000000000
--- a/csit.infra.etl/coverage_mrr_rls2402.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2023 Cisco and/or its affiliates.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""ETL script running on top of the s3://"""
-
-from datetime import datetime, timedelta
-from json import load
-from os import environ
-from pytz import utc
-
-import awswrangler as wr
-from awswrangler.exceptions import EmptyDataFrame
-from awsglue.context import GlueContext
-from boto3 import session
-from pyspark.context import SparkContext
-from pyspark.sql.functions import col, lit, regexp_replace
-from pyspark.sql.types import StructType
-
-
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
-PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
-SUFFIX="info.json.gz"
-IGNORE_SUFFIX=[
-    "suite.info.json.gz",
-    "setup.info.json.gz",
-    "teardown.info.json.gz",
-    "suite.output.info.json.gz",
-    "setup.output.info.json.gz",
-    "teardown.output.info.json.gz"
-]
-LAST_MODIFIED_END=utc.localize(
-    datetime.strptime(
-        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
-        "%Y-%m-%d"
-    )
-)
-LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
-
-
-def flatten_frame(nested_sdf):
-    """Unnest Spark DataFrame in case there nested structered columns.
-
-    :param nested_sdf: Spark DataFrame.
-    :type nested_sdf: DataFrame
-    :returns: Unnest DataFrame.
-    :rtype: DataFrame
-    """
-    stack = [((), nested_sdf)]
-    columns = []
-    while len(stack) > 0:
-        parents, sdf = stack.pop()
-        for column_name, column_type in sdf.dtypes:
-            if column_type[:6] == "struct":
-                projected_sdf = sdf.select(column_name + ".*")
-                stack.append((parents + (column_name,), projected_sdf))
-            else:
-                columns.append(
-                    col(".".join(parents + (column_name,))) \
-                        .alias("_".join(parents + (column_name,)))
-                )
-    return nested_sdf.select(columns)
-
-
-def process_json_to_dataframe(schema_name, paths):
-    """Processes JSON to Spark DataFrame.
-
-    :param schema_name: Schema name.
-    :type schema_name: string
-    :param paths: S3 paths to process.
-    :type paths: list
-    :returns: Spark DataFrame.
-    :rtype: DataFrame
-    """
-    drop_subset = [
-        "dut_type", "dut_version",
-        "passed",
-        "test_name_long", "test_name_short",
-        "test_type",
-        "version"
-    ]
-
-    # load schemas
-    with open(f"coverage_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
-        schema = StructType.fromJson(load(f_schema))
-
-    # create empty DF out of schemas
-    sdf = spark.createDataFrame([], schema)
-
-    # filter list
-    filtered = [path for path in paths if schema_name in path]
-
-    # select
-    for path in filtered:
-        print(path)
-
-        sdf_loaded = spark \
-            .read \
-            .option("multiline", "true") \
-            .schema(schema) \
-            .json(path) \
-            .withColumn("job", lit(path.split("/")[4])) \
-            .withColumn("build", lit(path.split("/")[5]))
-        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
-
-    # drop rows with all nulls and drop rows with null in critical frames
-    sdf = sdf.na.drop(how="all")
-    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
-
-    # flatten frame
-    sdf = flatten_frame(sdf)
-
-    return sdf
-
-
-# create SparkContext and GlueContext
-spark_context = SparkContext.getOrCreate()
-spark_context.setLogLevel("WARN")
-glue_context = GlueContext(spark_context)
-spark = glue_context.spark_session
-
-# files of interest
-paths = wr.s3.list_objects(
-    path=PATH,
-    suffix=SUFFIX,
-    last_modified_begin=LAST_MODIFIED_BEGIN,
-    last_modified_end=LAST_MODIFIED_END,
-    ignore_suffix=IGNORE_SUFFIX,
-    ignore_empty=True
-)
-
-filtered_paths = [path for path in paths if "report-coverage-2402" in path]
-
-out_sdf = process_json_to_dataframe("mrr", filtered_paths)
-out_sdf.printSchema()
-out_sdf = out_sdf \
-    .withColumn("year", lit(datetime.now().year)) \
-    .withColumn("month", lit(datetime.now().month)) \
-    .withColumn("day", lit(datetime.now().day)) \
-    .repartition(1)
-
-try:
-    wr.s3.to_parquet(
-        df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2402",
-        dataset=True,
-        partition_cols=["test_type", "year", "month", "day"],
-        compression="snappy",
-        use_threads=True,
-        mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
-    )
-except EmptyDataFrame:
-    pass
diff --git a/csit.infra.etl/coverage_mrr_rls2406.py b/csit.infra.etl/coverage_mrr_rls2406.py
new file mode 100644
index 0000000000..b84c077308
--- /dev/null
+++ b/csit.infra.etl/coverage_mrr_rls2406.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
+SUFFIX="info.json.gz"
+IGNORE_SUFFIX=[
+    "suite.info.json.gz",
+    "setup.info.json.gz",
+    "teardown.info.json.gz",
+    "suite.output.info.json.gz",
+    "setup.output.info.json.gz",
+    "teardown.output.info.json.gz"
+]
+LAST_MODIFIED_END=utc.localize(
+    datetime.strptime(
+        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+        "%Y-%m-%d"
+    )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def flatten_frame(nested_sdf):
+    """Unnest Spark DataFrame in case there nested structered columns.
+
+    :param nested_sdf: Spark DataFrame.
+    :type nested_sdf: DataFrame
+    :returns: Unnest DataFrame.
+    :rtype: DataFrame
+    """
+    stack = [((), nested_sdf)]
+    columns = []
+    while len(stack) > 0:
+        parents, sdf = stack.pop()
+        for column_name, column_type in sdf.dtypes:
+            if column_type[:6] == "struct":
+                projected_sdf = sdf.select(column_name + ".*")
+                stack.append((parents + (column_name,), projected_sdf))
+            else:
+                columns.append(
+                    col(".".join(parents + (column_name,))) \
+                        .alias("_".join(parents + (column_name,)))
+                )
+    return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+    """Processes JSON to Spark DataFrame.
+
+    :param schema_name: Schema name.
+    :type schema_name: string
+    :param paths: S3 paths to process.
+    :type paths: list
+    :returns: Spark DataFrame.
+    :rtype: DataFrame
+    """
+    drop_subset = [
+        "dut_type", "dut_version",
+        "passed",
+        "test_name_long", "test_name_short",
+        "test_type",
+        "version"
+    ]
+
+    # load schemas
+    with open(f"coverage_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+        schema = StructType.fromJson(load(f_schema))
+
+    # create empty DF out of schemas
+    sdf = spark.createDataFrame([], schema)
+
+    # filter list
+    filtered = [path for path in paths if schema_name in path]
+
+    # select
+    for path in filtered:
+        print(path)
+
+        sdf_loaded = spark \
+            .read \
+            .option("multiline", "true") \
+            .schema(schema) \
+            .json(path) \
+            .withColumn("job", lit(path.split("/")[4])) \
+            .withColumn("build", lit(path.split("/")[5]))
+        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+    # drop rows with all nulls and drop rows with null in critical frames
+    sdf = sdf.na.drop(how="all")
+    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+    # flatten frame
+    sdf = flatten_frame(sdf)
+
+    return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+    path=PATH,
+    suffix=SUFFIX,
+    last_modified_begin=LAST_MODIFIED_BEGIN,
+    last_modified_end=LAST_MODIFIED_END,
+    ignore_suffix=IGNORE_SUFFIX,
+    ignore_empty=True
+)
+
+filtered_paths = [path for path in paths if "report-coverage-2406" in path]
+
+out_sdf = process_json_to_dataframe("mrr", filtered_paths)
+out_sdf.printSchema()
+out_sdf = out_sdf \
+    .withColumn("year", lit(datetime.now().year)) \
+    .withColumn("month", lit(datetime.now().month)) \
+    .withColumn("day", lit(datetime.now().day)) \
+    .repartition(1)
+
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
+try:
+    wr.s3.to_parquet(
+        df=out_sdf.toPandas(),
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
+        dataset=True,
+        partition_cols=["test_type", "year", "month", "day"],
+        compression="snappy",
+        use_threads=True,
+        mode="overwrite_partitions",
+        boto3_session=boto3_session
+    )
+except EmptyDataFrame:
+    pass
diff --git a/csit.infra.etl/coverage_ndrpdr_rls2402.py b/csit.infra.etl/coverage_ndrpdr_rls2402.py
deleted file mode 100644
index 730e3ea748..0000000000
--- a/csit.infra.etl/coverage_ndrpdr_rls2402.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2023 Cisco and/or its affiliates.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""ETL script running on top of the s3://"""
-
-from datetime import datetime, timedelta
-from json import load
-from os import environ
-from pytz import utc
-
-import awswrangler as wr
-from awswrangler.exceptions import EmptyDataFrame
-from awsglue.context import GlueContext
-from boto3 import session
-from pyspark.context import SparkContext
-from pyspark.sql.functions import col, lit, regexp_replace
-from pyspark.sql.types import StructType
-
-
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
-PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
-SUFFIX="info.json.gz"
-IGNORE_SUFFIX=[
-    "suite.info.json.gz",
-    "setup.info.json.gz",
-    "teardown.info.json.gz",
-    "suite.output.info.json.gz",
-    "setup.output.info.json.gz",
-    "teardown.output.info.json.gz"
-]
-LAST_MODIFIED_END=utc.localize(
-    datetime.strptime(
-        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
-        "%Y-%m-%d"
-    )
-)
-LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
-
-
-def flatten_frame(nested_sdf):
-    """Unnest Spark DataFrame in case there nested structered columns.
-
-    :param nested_sdf: Spark DataFrame.
-    :type nested_sdf: DataFrame
-    :returns: Unnest DataFrame.
-    :rtype: DataFrame
-    """
-    stack = [((), nested_sdf)]
-    columns = []
-    while len(stack) > 0:
-        parents, sdf = stack.pop()
-        for column_name, column_type in sdf.dtypes:
-            if column_type[:6] == "struct":
-                projected_sdf = sdf.select(column_name + ".*")
-                stack.append((parents + (column_name,), projected_sdf))
-            else:
-                columns.append(
-                    col(".".join(parents + (column_name,))) \
-                        .alias("_".join(parents + (column_name,)))
-                )
-    return nested_sdf.select(columns)
-
-
-def process_json_to_dataframe(schema_name, paths):
-    """Processes JSON to Spark DataFrame.
-
-    :param schema_name: Schema name.
-    :type schema_name: string
-    :param paths: S3 paths to process.
-    :type paths: list
-    :returns: Spark DataFrame.
-    :rtype: DataFrame
-    """
-    drop_subset = [
-        "dut_type", "dut_version",
-        "passed",
-        "test_name_long", "test_name_short",
-        "test_type",
-        "version"
-    ]
-
-    # load schemas
-    with open(f"coverage_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
-        schema = StructType.fromJson(load(f_schema))
-
-    # create empty DF out of schemas
-    sdf = spark.createDataFrame([], schema)
-
-    # filter list
-    filtered = [path for path in paths if schema_name in path]
-
-    # select
-    for path in filtered:
-        print(path)
-
-        sdf_loaded = spark \
-            .read \
-            .option("multiline", "true") \
-            .schema(schema) \
-            .json(path) \
-            .withColumn("job", lit(path.split("/")[4])) \
-            .withColumn("build", lit(path.split("/")[5]))
-        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
-
-    # drop rows with all nulls and drop rows with null in critical frames
-    sdf = sdf.na.drop(how="all")
-    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
-
-    # flatten frame
-    sdf = flatten_frame(sdf)
-
-    return sdf
-
-
-# create SparkContext and GlueContext
-spark_context = SparkContext.getOrCreate()
-spark_context.setLogLevel("WARN")
-glue_context = GlueContext(spark_context)
-spark = glue_context.spark_session
-
-# files of interest
-paths = wr.s3.list_objects(
-    path=PATH,
-    suffix=SUFFIX,
-    last_modified_begin=LAST_MODIFIED_BEGIN,
-    last_modified_end=LAST_MODIFIED_END,
-    ignore_suffix=IGNORE_SUFFIX,
-    ignore_empty=True
-)
-
-filtered_paths = [path for path in paths if "report-coverage-2402" in path]
-
-out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths)
-out_sdf.printSchema()
-out_sdf = out_sdf \
-    .withColumn("year", lit(datetime.now().year)) \
-    .withColumn("month", lit(datetime.now().month)) \
-    .withColumn("day", lit(datetime.now().day)) \
-    .repartition(1)
-
-try:
-    wr.s3.to_parquet(
-        df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2402",
-        dataset=True,
-        partition_cols=["test_type", "year", "month", "day"],
-        compression="snappy",
-        use_threads=True,
-        mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
-    )
-except EmptyDataFrame:
-    pass
diff --git a/csit.infra.etl/coverage_ndrpdr_rls2406.py b/csit.infra.etl/coverage_ndrpdr_rls2406.py
new file mode 100644
index 0000000000..ee0f878833
--- /dev/null
+++ b/csit.infra.etl/coverage_ndrpdr_rls2406.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
+SUFFIX="info.json.gz"
+IGNORE_SUFFIX=[
+    "suite.info.json.gz",
+    "setup.info.json.gz",
+    "teardown.info.json.gz",
+    "suite.output.info.json.gz",
+    "setup.output.info.json.gz",
+    "teardown.output.info.json.gz"
+]
+LAST_MODIFIED_END=utc.localize(
+    datetime.strptime(
+        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+        "%Y-%m-%d"
+    )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def flatten_frame(nested_sdf):
+    """Unnest Spark DataFrame in case there nested structered columns.
+
+    :param nested_sdf: Spark DataFrame.
+    :type nested_sdf: DataFrame
+    :returns: Unnest DataFrame.
+    :rtype: DataFrame
+    """
+    stack = [((), nested_sdf)]
+    columns = []
+    while len(stack) > 0:
+        parents, sdf = stack.pop()
+        for column_name, column_type in sdf.dtypes:
+            if column_type[:6] == "struct":
+                projected_sdf = sdf.select(column_name + ".*")
+                stack.append((parents + (column_name,), projected_sdf))
+            else:
+                columns.append(
+                    col(".".join(parents + (column_name,))) \
+                        .alias("_".join(parents + (column_name,)))
+                )
+    return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+    """Processes JSON to Spark DataFrame.
+
+    :param schema_name: Schema name.
+    :type schema_name: string
+    :param paths: S3 paths to process.
+    :type paths: list
+    :returns: Spark DataFrame.
+    :rtype: DataFrame
+    """
+    drop_subset = [
+        "dut_type", "dut_version",
+        "passed",
+        "test_name_long", "test_name_short",
+        "test_type",
+        "version"
+    ]
+
+    # load schemas
+    with open(f"coverage_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+        schema = StructType.fromJson(load(f_schema))
+
+    # create empty DF out of schemas
+    sdf = spark.createDataFrame([], schema)
+
+    # filter list
+    filtered = [path for path in paths if schema_name in path]
+
+    # select
+    for path in filtered:
+        print(path)
+
+        sdf_loaded = spark \
+            .read \
+            .option("multiline", "true") \
+            .schema(schema) \
+            .json(path) \
+            .withColumn("job", lit(path.split("/")[4])) \
+            .withColumn("build", lit(path.split("/")[5]))
+        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+    # drop rows with all nulls and drop rows with null in critical frames
+    sdf = sdf.na.drop(how="all")
+    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+    # flatten frame
+    sdf = flatten_frame(sdf)
+
+    return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+    path=PATH,
+    suffix=SUFFIX,
+    last_modified_begin=LAST_MODIFIED_BEGIN,
+    last_modified_end=LAST_MODIFIED_END,
+    ignore_suffix=IGNORE_SUFFIX,
+    ignore_empty=True
+)
+
+filtered_paths = [path for path in paths if "report-coverage-2406" in path]
+
+out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths)
+out_sdf.printSchema()
+out_sdf = out_sdf \
+    .withColumn("year", lit(datetime.now().year)) \
+    .withColumn("month", lit(datetime.now().month)) \
+    .withColumn("day", lit(datetime.now().day)) \
+    .repartition(1)
+
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
+try:
+    wr.s3.to_parquet(
+        df=out_sdf.toPandas(),
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
+        dataset=True,
+        partition_cols=["test_type", "year", "month", "day"],
+        compression="snappy",
+        use_threads=True,
+        mode="overwrite_partitions",
+        boto3_session=boto3_session
+    )
+except EmptyDataFrame:
+    pass
diff --git a/csit.infra.etl/coverage_reconf_rls2402.py b/csit.infra.etl/coverage_reconf_rls2402.py
deleted file mode 100644
index dc1f647ff1..0000000000
--- a/csit.infra.etl/coverage_reconf_rls2402.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2023 Cisco and/or its affiliates.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""ETL script running on top of the s3://"""
-
-from datetime import datetime, timedelta
-from json import load
-from os import environ
-from pytz import utc
-
-import awswrangler as wr
-from awswrangler.exceptions import EmptyDataFrame
-from awsglue.context import GlueContext
-from boto3 import session
-from pyspark.context import SparkContext
-from pyspark.sql.functions import col, lit, regexp_replace
-from pyspark.sql.types import StructType
-
-
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
-PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
-SUFFIX="info.json.gz"
-IGNORE_SUFFIX=[
-    "suite.info.json.gz",
-    "setup.info.json.gz",
-    "teardown.info.json.gz",
-    "suite.output.info.json.gz",
-    "setup.output.info.json.gz",
-    "teardown.output.info.json.gz"
-]
-LAST_MODIFIED_END=utc.localize(
-    datetime.strptime(
-        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
-        "%Y-%m-%d"
-    )
-)
-LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
-
-
-def flatten_frame(nested_sdf):
-    """Unnest Spark DataFrame in case there nested structered columns.
-
-    :param nested_sdf: Spark DataFrame.
-    :type nested_sdf: DataFrame
-    :returns: Unnest DataFrame.
-    :rtype: DataFrame
-    """
-    stack = [((), nested_sdf)]
-    columns = []
-    while len(stack) > 0:
-        parents, sdf = stack.pop()
-        for column_name, column_type in sdf.dtypes:
-            if column_type[:6] == "struct":
-                projected_sdf = sdf.select(column_name + ".*")
-                stack.append((parents + (column_name,), projected_sdf))
-            else:
-                columns.append(
-                    col(".".join(parents + (column_name,))) \
-                        .alias("_".join(parents + (column_name,)))
-                )
-    return nested_sdf.select(columns)
-
-
-def process_json_to_dataframe(schema_name, paths):
-    """Processes JSON to Spark DataFrame.
-
-    :param schema_name: Schema name.
-    :type schema_name: string
-    :param paths: S3 paths to process.
-    :type paths: list
-    :returns: Spark DataFrame.
-    :rtype: DataFrame
-    """
-    drop_subset = [
-        "dut_type", "dut_version",
-        "passed",
-        "test_name_long", "test_name_short",
-        "test_type",
-        "version"
-    ]
-
-    # load schemas
-    with open(f"coverage_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
-        schema = StructType.fromJson(load(f_schema))
-
-    # create empty DF out of schemas
-    sdf = spark.createDataFrame([], schema)
-
-    # filter list
-    filtered = [path for path in paths if schema_name in path]
-
-    # select
-    for path in filtered:
-        print(path)
-
-        sdf_loaded = spark \
-            .read \
-            .option("multiline", "true") \
-            .schema(schema) \
-            .json(path) \
-            .withColumn("job", lit(path.split("/")[4])) \
-            .withColumn("build", lit(path.split("/")[5]))
-        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
-
-    # drop rows with all nulls and drop rows with null in critical frames
-    sdf = sdf.na.drop(how="all")
-    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
-
-    # flatten frame
-    sdf = flatten_frame(sdf)
-
-    return sdf
-
-
-# create SparkContext and GlueContext
-spark_context = SparkContext.getOrCreate()
-spark_context.setLogLevel("WARN")
-glue_context = GlueContext(spark_context)
-spark = glue_context.spark_session
-
-# files of interest
-paths = wr.s3.list_objects(
-    path=PATH,
-    suffix=SUFFIX,
-    last_modified_begin=LAST_MODIFIED_BEGIN,
-    last_modified_end=LAST_MODIFIED_END,
-    ignore_suffix=IGNORE_SUFFIX,
-    ignore_empty=True
-)
-
-filtered_paths = [path for path in paths if "report-coverage-2402" in path]
-
-out_sdf = process_json_to_dataframe("reconf", filtered_paths)
-out_sdf.show(truncate=False)
-out_sdf.printSchema()
-out_sdf = out_sdf \
-    .withColumn("year", lit(datetime.now().year)) \
-    .withColumn("month", lit(datetime.now().month)) \
-    .withColumn("day", lit(datetime.now().day)) \
-    .repartition(1)
-
-try:
-    wr.s3.to_parquet(
-        df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2402",
-        dataset=True,
-        partition_cols=["test_type", "year", "month", "day"],
-        compression="snappy",
-        use_threads=True,
-        mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
-    )
-except EmptyDataFrame:
-    pass
diff --git a/csit.infra.etl/coverage_reconf_rls2406.py b/csit.infra.etl/coverage_reconf_rls2406.py
new file mode 100644
index 0000000000..33dbac72d2
--- /dev/null
+++ b/csit.infra.etl/coverage_reconf_rls2406.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
+SUFFIX="info.json.gz"
+IGNORE_SUFFIX=[
+    "suite.info.json.gz",
+    "setup.info.json.gz",
+    "teardown.info.json.gz",
+    "suite.output.info.json.gz",
+    "setup.output.info.json.gz",
+    "teardown.output.info.json.gz"
+]
+LAST_MODIFIED_END=utc.localize(
+    datetime.strptime(
+        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+        "%Y-%m-%d"
+    )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def flatten_frame(nested_sdf):
+    """Unnest Spark DataFrame in case there nested structered columns.
+
+    :param nested_sdf: Spark DataFrame.
+    :type nested_sdf: DataFrame
+    :returns: Unnest DataFrame.
+    :rtype: DataFrame
+    """
+    stack = [((), nested_sdf)]
+    columns = []
+    while len(stack) > 0:
+        parents, sdf = stack.pop()
+        for column_name, column_type in sdf.dtypes:
+            if column_type[:6] == "struct":
+                projected_sdf = sdf.select(column_name + ".*")
+                stack.append((parents + (column_name,), projected_sdf))
+            else:
+                columns.append(
+                    col(".".join(parents + (column_name,))) \
+                        .alias("_".join(parents + (column_name,)))
+                )
+    return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+    """Processes JSON to Spark DataFrame.
+
+    :param schema_name: Schema name.
+    :type schema_name: string
+    :param paths: S3 paths to process.
+    :type paths: list
+    :returns: Spark DataFrame.
+    :rtype: DataFrame
+    """
+    drop_subset = [
+        "dut_type", "dut_version",
+        "passed",
+        "test_name_long", "test_name_short",
+        "test_type",
+        "version"
+    ]
+
+    # load schemas
+    with open(f"coverage_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+        schema = StructType.fromJson(load(f_schema))
+
+    # create empty DF out of schemas
+    sdf = spark.createDataFrame([], schema)
+
+    # filter list
+    filtered = [path for path in paths if schema_name in path]
+
+    # select
+    for path in filtered:
+        print(path)
+
+        sdf_loaded = spark \
+            .read \
+            .option("multiline", "true") \
+            .schema(schema) \
+            .json(path) \
+            .withColumn("job", lit(path.split("/")[4])) \
+            .withColumn("build", lit(path.split("/")[5]))
+        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+    # drop rows with all nulls and drop rows with null in critical frames
+    sdf = sdf.na.drop(how="all")
+    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+    # flatten frame
+    sdf = flatten_frame(sdf)
+
+    return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+    path=PATH,
+    suffix=SUFFIX,
+    last_modified_begin=LAST_MODIFIED_BEGIN,
+    last_modified_end=LAST_MODIFIED_END,
+    ignore_suffix=IGNORE_SUFFIX,
+    ignore_empty=True
+)
+
+filtered_paths = [path for path in paths if "report-coverage-2406" in path]
+
+out_sdf = process_json_to_dataframe("reconf", filtered_paths)
+out_sdf.printSchema()
+out_sdf = out_sdf \
+    .withColumn("year", lit(datetime.now().year)) \
+    .withColumn("month", lit(datetime.now().month)) \
+    .withColumn("day", lit(datetime.now().day)) \
+    .repartition(1)
+
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
+try:
+    wr.s3.to_parquet(
+        df=out_sdf.toPandas(),
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
+        dataset=True,
+        partition_cols=["test_type", "year", "month", "day"],
+        compression="snappy",
+        use_threads=True,
+        mode="overwrite_partitions",
+        boto3_session=boto3_session
+    )
+except EmptyDataFrame:
+    pass
diff --git a/csit.infra.etl/coverage_soak_rls2402.py b/csit.infra.etl/coverage_soak_rls2402.py
deleted file mode 100644
index 7d87afd952..0000000000
--- a/csit.infra.etl/coverage_soak_rls2402.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2023 Cisco and/or its affiliates.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""ETL script running on top of the s3://"""
-
-from datetime import datetime, timedelta
-from json import load
-from os import environ
-from pytz import utc
-
-import awswrangler as wr
-from awswrangler.exceptions import EmptyDataFrame
-from awsglue.context import GlueContext
-from boto3 import session
-from pyspark.context import SparkContext
-from pyspark.sql.functions import col, lit, regexp_replace
-from pyspark.sql.types import StructType
-
-
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
-PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
-SUFFIX="info.json.gz"
-IGNORE_SUFFIX=[
-    "suite.info.json.gz",
-    "setup.info.json.gz",
-    "teardown.info.json.gz",
-    "suite.output.info.json.gz",
-    "setup.output.info.json.gz",
-    "teardown.output.info.json.gz"
-]
-LAST_MODIFIED_END=utc.localize(
-    datetime.strptime(
-        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
-        "%Y-%m-%d"
-    )
-)
-LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
-
-
-def flatten_frame(nested_sdf):
-    """Unnest Spark DataFrame in case there nested structered columns.
-
-    :param nested_sdf: Spark DataFrame.
-    :type nested_sdf: DataFrame
-    :returns: Unnest DataFrame.
-    :rtype: DataFrame
-    """
-    stack = [((), nested_sdf)]
-    columns = []
-    while len(stack) > 0:
-        parents, sdf = stack.pop()
-        for column_name, column_type in sdf.dtypes:
-            if column_type[:6] == "struct":
-                projected_sdf = sdf.select(column_name + ".*")
-                stack.append((parents + (column_name,), projected_sdf))
-            else:
-                columns.append(
-                    col(".".join(parents + (column_name,))) \
-                        .alias("_".join(parents + (column_name,)))
-                )
-    return nested_sdf.select(columns)
-
-
-def process_json_to_dataframe(schema_name, paths):
-    """Processes JSON to Spark DataFrame.
-
-    :param schema_name: Schema name.
-    :type schema_name: string
-    :param paths: S3 paths to process.
-    :type paths: list
-    :returns: Spark DataFrame.
-    :rtype: DataFrame
-    """
-    drop_subset = [
-        "dut_type", "dut_version",
-        "passed",
-        "test_name_long", "test_name_short",
-        "test_type",
-        "version"
-    ]
-
-    # load schemas
-    with open(f"coverage_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
-        schema = StructType.fromJson(load(f_schema))
-
-    # create empty DF out of schemas
-    sdf = spark.createDataFrame([], schema)
-
-    # filter list
-    filtered = [path for path in paths if schema_name in path]
-
-    # select
-    for path in filtered:
-        print(path)
-
-        sdf_loaded = spark \
-            .read \
-            .option("multiline", "true") \
-            .schema(schema) \
-            .json(path) \
-            .withColumn("job", lit(path.split("/")[4])) \
-            .withColumn("build", lit(path.split("/")[5]))
-        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
-
-    # drop rows with all nulls and drop rows with null in critical frames
-    sdf = sdf.na.drop(how="all")
-    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
-
-    # flatten frame
-    sdf = flatten_frame(sdf)
-
-    return sdf
-
-
-# create SparkContext and GlueContext
-spark_context = SparkContext.getOrCreate()
-spark_context.setLogLevel("WARN")
-glue_context = GlueContext(spark_context)
-spark = glue_context.spark_session
-
-# files of interest
-paths = wr.s3.list_objects(
-    path=PATH,
-    suffix=SUFFIX,
-    last_modified_begin=LAST_MODIFIED_BEGIN,
-    last_modified_end=LAST_MODIFIED_END,
-    ignore_suffix=IGNORE_SUFFIX,
-    ignore_empty=True
-)
-
-filtered_paths = [path for path in paths if "report-coverage-2402" in path]
-
-out_sdf = process_json_to_dataframe("soak", filtered_paths)
-out_sdf.printSchema()
-out_sdf = out_sdf \
-    .withColumn("year", lit(datetime.now().year)) \
-    .withColumn("month", lit(datetime.now().month)) \
-    .withColumn("day", lit(datetime.now().day)) \
-    .repartition(1)
-
-try:
-    wr.s3.to_parquet(
-        df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2402",
-        dataset=True,
-        partition_cols=["test_type", "year", "month", "day"],
-        compression="snappy",
-        use_threads=True,
-        mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
-    )
-except EmptyDataFrame:
-    pass
diff --git a/csit.infra.etl/coverage_soak_rls2406.py b/csit.infra.etl/coverage_soak_rls2406.py
new file mode 100644
index 0000000000..3b13c16229
--- /dev/null
+++ b/csit.infra.etl/coverage_soak_rls2406.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
+SUFFIX="info.json.gz"
+IGNORE_SUFFIX=[
+    "suite.info.json.gz",
+    "setup.info.json.gz",
+    "teardown.info.json.gz",
+    "suite.output.info.json.gz",
+    "setup.output.info.json.gz",
+    "teardown.output.info.json.gz"
+]
+LAST_MODIFIED_END=utc.localize(
+    datetime.strptime(
+        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+        "%Y-%m-%d"
+    )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def flatten_frame(nested_sdf):
+    """Unnest Spark DataFrame in case there nested structered columns.
+
+    :param nested_sdf: Spark DataFrame.
+    :type nested_sdf: DataFrame
+    :returns: Unnest DataFrame.
+    :rtype: DataFrame
+    """
+    stack = [((), nested_sdf)]
+    columns = []
+    while len(stack) > 0:
+        parents, sdf = stack.pop()
+        for column_name, column_type in sdf.dtypes:
+            if column_type[:6] == "struct":
+                projected_sdf = sdf.select(column_name + ".*")
+                stack.append((parents + (column_name,), projected_sdf))
+            else:
+                columns.append(
+                    col(".".join(parents + (column_name,))) \
+                        .alias("_".join(parents + (column_name,)))
+                )
+    return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+    """Processes JSON to Spark DataFrame.
+
+    :param schema_name: Schema name.
+    :type schema_name: string
+    :param paths: S3 paths to process.
+    :type paths: list
+    :returns: Spark DataFrame.
+    :rtype: DataFrame
+    """
+    drop_subset = [
+        "dut_type", "dut_version",
+        "passed",
+        "test_name_long", "test_name_short",
+        "test_type",
+        "version"
+    ]
+
+    # load schemas
+    with open(f"coverage_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+        schema = StructType.fromJson(load(f_schema))
+
+    # create empty DF out of schemas
+    sdf = spark.createDataFrame([], schema)
+
+    # filter list
+    filtered = [path for path in paths if schema_name in path]
+
+    # select
+    for path in filtered:
+        print(path)
+
+        sdf_loaded = spark \
+            .read \
+            .option("multiline", "true") \
+            .schema(schema) \
+            .json(path) \
+            .withColumn("job", lit(path.split("/")[4])) \
+            .withColumn("build", lit(path.split("/")[5]))
+        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+    # drop rows with all nulls and drop rows with null in critical frames
+    sdf = sdf.na.drop(how="all")
+    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+    # flatten frame
+    sdf = flatten_frame(sdf)
+
+    return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+    path=PATH,
+    suffix=SUFFIX,
+    last_modified_begin=LAST_MODIFIED_BEGIN,
+    last_modified_end=LAST_MODIFIED_END,
+    ignore_suffix=IGNORE_SUFFIX,
+    ignore_empty=True
+)
+
+filtered_paths = [path for path in paths if "report-coverage-2406" in path]
+
+out_sdf = process_json_to_dataframe("soak", filtered_paths)
+out_sdf.printSchema()
+out_sdf = out_sdf \
+    .withColumn("year", lit(datetime.now().year)) \
+    .withColumn("month", lit(datetime.now().month)) \
+    .withColumn("day", lit(datetime.now().day)) \
+    .repartition(1)
+
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
+try:
+    wr.s3.to_parquet(
+        df=out_sdf.toPandas(),
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/coverage_rls2406",
+        dataset=True,
+        partition_cols=["test_type", "year", "month", "day"],
+        compression="snappy",
+        use_threads=True,
+        mode="overwrite_partitions",
+        boto3_session=boto3_session
+    )
+except EmptyDataFrame:
+    pass
diff --git a/csit.infra.etl/iterative_hoststack_rls2402.py b/csit.infra.etl/iterative_hoststack_rls2402.py
deleted file mode 100644
index 1c74126c47..0000000000
--- a/csit.infra.etl/iterative_hoststack_rls2402.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2023 Cisco and/or its affiliates.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""ETL script running on top of the s3://"""
-
-from datetime import datetime, timedelta
-from json import load
-from os import environ
-from pytz import utc
-
-import awswrangler as wr
-from awswrangler.exceptions import EmptyDataFrame
-from awsglue.context import GlueContext
-from boto3 import session
-from pyspark.context import SparkContext
-from pyspark.sql.functions import col, lit, regexp_replace
-from pyspark.sql.types import StructType
-
-
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
-PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
-SUFFIX="info.json.gz"
-IGNORE_SUFFIX=[
-    "suite.info.json.gz",
-    "setup.info.json.gz",
-    "teardown.info.json.gz",
-    "suite.output.info.json.gz",
-    "setup.output.info.json.gz",
-    "teardown.output.info.json.gz"
-]
-LAST_MODIFIED_END=utc.localize(
-    datetime.strptime(
-        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
-        "%Y-%m-%d"
-    )
-)
-LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
-
-
-def flatten_frame(nested_sdf):
-    """Unnest Spark DataFrame in case there nested structered columns.
-
-    :param nested_sdf: Spark DataFrame.
-    :type nested_sdf: DataFrame
-    :returns: Unnest DataFrame.
-    :rtype: DataFrame
-    """
-    stack = [((), nested_sdf)]
-    columns = []
-    while len(stack) > 0:
-        parents, sdf = stack.pop()
-        for column_name, column_type in sdf.dtypes:
-            if column_type[:6] == "struct":
-                projected_sdf = sdf.select(column_name + ".*")
-                stack.append((parents + (column_name,), projected_sdf))
-            else:
-                columns.append(
-                    col(".".join(parents + (column_name,))) \
-                        .alias("_".join(parents + (column_name,)))
-                )
-    return nested_sdf.select(columns)
-
-
-def process_json_to_dataframe(schema_name, paths):
-    """Processes JSON to Spark DataFrame.
-
-    :param schema_name: Schema name.
-    :type schema_name: string
-    :param paths: S3 paths to process.
-    :type paths: list
-    :returns: Spark DataFrame.
-    :rtype: DataFrame
-    """
-    drop_subset = [
-        "dut_type", "dut_version",
-        "passed",
-        "test_name_long", "test_name_short",
-        "test_type",
-        "version"
-    ]
-
-    # load schemas
-    with open(f"iterative_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
-        schema = StructType.fromJson(load(f_schema))
-
-    # create empty DF out of schemas
-    sdf = spark.createDataFrame([], schema)
-
-    # filter list
-    filtered = [path for path in paths if schema_name in path]
-
-    # select
-    for path in filtered:
-        print(path)
-
-        sdf_loaded = spark \
-            .read \
-            .option("multiline", "true") \
-            .schema(schema) \
-            .json(path) \
-            .withColumn("job", lit(path.split("/")[4])) \
-            .withColumn("build", lit(path.split("/")[5]))
-        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
-
-    # drop rows with all nulls and drop rows with null in critical frames
-    sdf = sdf.na.drop(how="all")
-    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
-
-    # flatten frame
-    sdf = flatten_frame(sdf)
-
-    return sdf
-
-
-# create SparkContext and GlueContext
-spark_context = SparkContext.getOrCreate()
-spark_context.setLogLevel("WARN")
-glue_context = GlueContext(spark_context)
-spark = glue_context.spark_session
-
-# files of interest
-paths = wr.s3.list_objects(
-    path=PATH,
-    suffix=SUFFIX,
-    last_modified_begin=LAST_MODIFIED_BEGIN,
-    last_modified_end=LAST_MODIFIED_END,
-    ignore_suffix=IGNORE_SUFFIX,
-    ignore_empty=True
-)
-
-filtered_paths = [path for path in paths if "report-iterative-2402" in path]
-
-out_sdf = process_json_to_dataframe("hoststack", filtered_paths)
-out_sdf.show(truncate=False)
-out_sdf.printSchema()
-out_sdf = out_sdf \
-    .withColumn("year", lit(datetime.now().year)) \
-    .withColumn("month", lit(datetime.now().month)) \
-    .withColumn("day", lit(datetime.now().day)) \
-    .repartition(1)
-
-try:
-    wr.s3.to_parquet(
-        df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2402",
-        dataset=True,
-        partition_cols=["test_type", "year", "month", "day"],
-        compression="snappy",
-        use_threads=True,
-        mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
-    )
-except EmptyDataFrame:
-    pass
diff --git a/csit.infra.etl/iterative_hoststack_rls2406.py b/csit.infra.etl/iterative_hoststack_rls2406.py
new file mode 100644
index 0000000000..ebeade5571
--- /dev/null
+++ b/csit.infra.etl/iterative_hoststack_rls2406.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
+SUFFIX="info.json.gz"
+IGNORE_SUFFIX=[
+    "suite.info.json.gz",
+    "setup.info.json.gz",
+    "teardown.info.json.gz",
+    "suite.output.info.json.gz",
+    "setup.output.info.json.gz",
+    "teardown.output.info.json.gz"
+]
+LAST_MODIFIED_END=utc.localize(
+    datetime.strptime(
+        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+        "%Y-%m-%d"
+    )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def flatten_frame(nested_sdf):
+    """Unnest Spark DataFrame in case there nested structered columns.
+
+    :param nested_sdf: Spark DataFrame.
+    :type nested_sdf: DataFrame
+    :returns: Unnest DataFrame.
+    :rtype: DataFrame
+    """
+    stack = [((), nested_sdf)]
+    columns = []
+    while len(stack) > 0:
+        parents, sdf = stack.pop()
+        for column_name, column_type in sdf.dtypes:
+            if column_type[:6] == "struct":
+                projected_sdf = sdf.select(column_name + ".*")
+                stack.append((parents + (column_name,), projected_sdf))
+            else:
+                columns.append(
+                    col(".".join(parents + (column_name,))) \
+                        .alias("_".join(parents + (column_name,)))
+                )
+    return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+    """Processes JSON to Spark DataFrame.
+
+    :param schema_name: Schema name.
+    :type schema_name: string
+    :param paths: S3 paths to process.
+    :type paths: list
+    :returns: Spark DataFrame.
+    :rtype: DataFrame
+    """
+    drop_subset = [
+        "dut_type", "dut_version",
+        "passed",
+        "test_name_long", "test_name_short",
+        "test_type",
+        "version"
+    ]
+
+    # load schemas
+    with open(f"iterative_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+        schema = StructType.fromJson(load(f_schema))
+
+    # create empty DF out of schemas
+    sdf = spark.createDataFrame([], schema)
+
+    # filter list
+    filtered = [path for path in paths if schema_name in path]
+
+    # select
+    for path in filtered:
+        print(path)
+
+        sdf_loaded = spark \
+            .read \
+            .option("multiline", "true") \
+            .schema(schema) \
+            .json(path) \
+            .withColumn("job", lit(path.split("/")[4])) \
+            .withColumn("build", lit(path.split("/")[5]))
+        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+    # drop rows with all nulls and drop rows with null in critical frames
+    sdf = sdf.na.drop(how="all")
+    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+    # flatten frame
+    sdf = flatten_frame(sdf)
+
+    return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+    path=PATH,
+    suffix=SUFFIX,
+    last_modified_begin=LAST_MODIFIED_BEGIN,
+    last_modified_end=LAST_MODIFIED_END,
+    ignore_suffix=IGNORE_SUFFIX,
+    ignore_empty=True
+)
+
+filtered_paths = [path for path in paths if "report-iterative-2406" in path]
+
+out_sdf = process_json_to_dataframe("hoststack", filtered_paths)
+out_sdf.printSchema()
+out_sdf = out_sdf \
+    .withColumn("year", lit(datetime.now().year)) \
+    .withColumn("month", lit(datetime.now().month)) \
+    .withColumn("day", lit(datetime.now().day)) \
+    .repartition(1)
+
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
+try:
+    wr.s3.to_parquet(
+        df=out_sdf.toPandas(),
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
+        dataset=True,
+        partition_cols=["test_type", "year", "month", "day"],
+        compression="snappy",
+        use_threads=True,
+        mode="overwrite_partitions",
+        boto3_session=boto3_session
+    )
+except EmptyDataFrame:
+    pass
diff --git a/csit.infra.etl/iterative_mrr_rls2402.py b/csit.infra.etl/iterative_mrr_rls2402.py
deleted file mode 100644
index e779dbdc36..0000000000
--- a/csit.infra.etl/iterative_mrr_rls2402.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2023 Cisco and/or its affiliates.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""ETL script running on top of the s3://"""
-
-from datetime import datetime, timedelta
-from json import load
-from os import environ
-from pytz import utc
-
-import awswrangler as wr
-from awswrangler.exceptions import EmptyDataFrame
-from awsglue.context import GlueContext
-from boto3 import session
-from pyspark.context import SparkContext
-from pyspark.sql.functions import col, lit, regexp_replace
-from pyspark.sql.types import StructType
-
-
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
-PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
-SUFFIX="info.json.gz"
-IGNORE_SUFFIX=[
-    "suite.info.json.gz",
-    "setup.info.json.gz",
-    "teardown.info.json.gz",
-    "suite.output.info.json.gz",
-    "setup.output.info.json.gz",
-    "teardown.output.info.json.gz"
-]
-LAST_MODIFIED_END=utc.localize(
-    datetime.strptime(
-        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
-        "%Y-%m-%d"
-    )
-)
-LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
-
-
-def flatten_frame(nested_sdf):
-    """Unnest Spark DataFrame in case there nested structered columns.
-
-    :param nested_sdf: Spark DataFrame.
-    :type nested_sdf: DataFrame
-    :returns: Unnest DataFrame.
-    :rtype: DataFrame
-    """
-    stack = [((), nested_sdf)]
-    columns = []
-    while len(stack) > 0:
-        parents, sdf = stack.pop()
-        for column_name, column_type in sdf.dtypes:
-            if column_type[:6] == "struct":
-                projected_sdf = sdf.select(column_name + ".*")
-                stack.append((parents + (column_name,), projected_sdf))
-            else:
-                columns.append(
-                    col(".".join(parents + (column_name,))) \
-                        .alias("_".join(parents + (column_name,)))
-                )
-    return nested_sdf.select(columns)
-
-
-def process_json_to_dataframe(schema_name, paths):
-    """Processes JSON to Spark DataFrame.
-
-    :param schema_name: Schema name.
-    :type schema_name: string
-    :param paths: S3 paths to process.
-    :type paths: list
-    :returns: Spark DataFrame.
-    :rtype: DataFrame
-    """
-    drop_subset = [
-        "dut_type", "dut_version",
-        "passed",
-        "test_name_long", "test_name_short",
-        "test_type",
-        "version"
-    ]
-
-    # load schemas
-    with open(f"iterative_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
-        schema = StructType.fromJson(load(f_schema))
-
-    # create empty DF out of schemas
-    sdf = spark.createDataFrame([], schema)
-
-    # filter list
-    filtered = [path for path in paths if schema_name in path]
-
-    # select
-    for path in filtered:
-        print(path)
-
-        sdf_loaded = spark \
-            .read \
-            .option("multiline", "true") \
-            .schema(schema) \
-            .json(path) \
-            .withColumn("job", lit(path.split("/")[4])) \
-            .withColumn("build", lit(path.split("/")[5]))
-        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
-
-    # drop rows with all nulls and drop rows with null in critical frames
-    sdf = sdf.na.drop(how="all")
-    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
-
-    # flatten frame
-    sdf = flatten_frame(sdf)
-
-    return sdf
-
-
-# create SparkContext and GlueContext
-spark_context = SparkContext.getOrCreate()
-spark_context.setLogLevel("WARN")
-glue_context = GlueContext(spark_context)
-spark = glue_context.spark_session
-
-# files of interest
-paths = wr.s3.list_objects(
-    path=PATH,
-    suffix=SUFFIX,
-    last_modified_begin=LAST_MODIFIED_BEGIN,
-    last_modified_end=LAST_MODIFIED_END,
-    ignore_suffix=IGNORE_SUFFIX,
-    ignore_empty=True
-)
-
-filtered_paths = [path for path in paths if "report-iterative-2402" in path]
-
-out_sdf = process_json_to_dataframe("mrr", filtered_paths)
-out_sdf.printSchema()
-out_sdf = out_sdf \
-    .withColumn("year", lit(datetime.now().year)) \
-    .withColumn("month", lit(datetime.now().month)) \
-    .withColumn("day", lit(datetime.now().day)) \
-    .repartition(1)
-
-try:
-    wr.s3.to_parquet(
-        df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2402",
-        dataset=True,
-        partition_cols=["test_type", "year", "month", "day"],
-        compression="snappy",
-        use_threads=True,
-        mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
-    )
-except EmptyDataFrame:
-    pass
diff --git a/csit.infra.etl/iterative_mrr_rls2406.py b/csit.infra.etl/iterative_mrr_rls2406.py
new file mode 100644
index 0000000000..9abb3434dc
--- /dev/null
+++ b/csit.infra.etl/iterative_mrr_rls2406.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
+SUFFIX="info.json.gz"
+IGNORE_SUFFIX=[
+    "suite.info.json.gz",
+    "setup.info.json.gz",
+    "teardown.info.json.gz",
+    "suite.output.info.json.gz",
+    "setup.output.info.json.gz",
+    "teardown.output.info.json.gz"
+]
+LAST_MODIFIED_END=utc.localize(
+    datetime.strptime(
+        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+        "%Y-%m-%d"
+    )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def flatten_frame(nested_sdf):
+    """Unnest Spark DataFrame in case there nested structered columns.
+
+    :param nested_sdf: Spark DataFrame.
+    :type nested_sdf: DataFrame
+    :returns: Unnest DataFrame.
+    :rtype: DataFrame
+    """
+    stack = [((), nested_sdf)]
+    columns = []
+    while len(stack) > 0:
+        parents, sdf = stack.pop()
+        for column_name, column_type in sdf.dtypes:
+            if column_type[:6] == "struct":
+                projected_sdf = sdf.select(column_name + ".*")
+                stack.append((parents + (column_name,), projected_sdf))
+            else:
+                columns.append(
+                    col(".".join(parents + (column_name,))) \
+                        .alias("_".join(parents + (column_name,)))
+                )
+    return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+    """Processes JSON to Spark DataFrame.
+
+    :param schema_name: Schema name.
+    :type schema_name: string
+    :param paths: S3 paths to process.
+    :type paths: list
+    :returns: Spark DataFrame.
+    :rtype: DataFrame
+    """
+    drop_subset = [
+        "dut_type", "dut_version",
+        "passed",
+        "test_name_long", "test_name_short",
+        "test_type",
+        "version"
+    ]
+
+    # load schemas
+    with open(f"iterative_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+        schema = StructType.fromJson(load(f_schema))
+
+    # create empty DF out of schemas
+    sdf = spark.createDataFrame([], schema)
+
+    # filter list
+    filtered = [path for path in paths if schema_name in path]
+
+    # select
+    for path in filtered:
+        print(path)
+
+        sdf_loaded = spark \
+            .read \
+            .option("multiline", "true") \
+            .schema(schema) \
+            .json(path) \
+            .withColumn("job", lit(path.split("/")[4])) \
+            .withColumn("build", lit(path.split("/")[5]))
+        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+    # drop rows with all nulls and drop rows with null in critical frames
+    sdf = sdf.na.drop(how="all")
+    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+    # flatten frame
+    sdf = flatten_frame(sdf)
+
+    return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+    path=PATH,
+    suffix=SUFFIX,
+    last_modified_begin=LAST_MODIFIED_BEGIN,
+    last_modified_end=LAST_MODIFIED_END,
+    ignore_suffix=IGNORE_SUFFIX,
+    ignore_empty=True
+)
+
+filtered_paths = [path for path in paths if "report-iterative-2406" in path]
+
+out_sdf = process_json_to_dataframe("mrr", filtered_paths)
+out_sdf.printSchema()
+out_sdf = out_sdf \
+    .withColumn("year", lit(datetime.now().year)) \
+    .withColumn("month", lit(datetime.now().month)) \
+    .withColumn("day", lit(datetime.now().day)) \
+    .repartition(1)
+
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
+try:
+    wr.s3.to_parquet(
+        df=out_sdf.toPandas(),
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
+        dataset=True,
+        partition_cols=["test_type", "year", "month", "day"],
+        compression="snappy",
+        use_threads=True,
+        mode="overwrite_partitions",
+        boto3_session=boto3_session
+    )
+except EmptyDataFrame:
+    pass
diff --git a/csit.infra.etl/iterative_ndrpdr_rls2402.py b/csit.infra.etl/iterative_ndrpdr_rls2402.py
deleted file mode 100644
index 9231176e10..0000000000
--- a/csit.infra.etl/iterative_ndrpdr_rls2402.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2023 Cisco and/or its affiliates.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""ETL script running on top of the s3://"""
-
-from datetime import datetime, timedelta
-from json import load
-from os import environ
-from pytz import utc
-
-import awswrangler as wr
-from awswrangler.exceptions import EmptyDataFrame
-from awsglue.context import GlueContext
-from boto3 import session
-from pyspark.context import SparkContext
-from pyspark.sql.functions import col, lit, regexp_replace
-from pyspark.sql.types import StructType
-
-
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
-PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
-SUFFIX="info.json.gz"
-IGNORE_SUFFIX=[
-    "suite.info.json.gz",
-    "setup.info.json.gz",
-    "teardown.info.json.gz",
-    "suite.output.info.json.gz",
-    "setup.output.info.json.gz",
-    "teardown.output.info.json.gz"
-]
-LAST_MODIFIED_END=utc.localize(
-    datetime.strptime(
-        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
-        "%Y-%m-%d"
-    )
-)
-LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
-
-
-def flatten_frame(nested_sdf):
-    """Unnest Spark DataFrame in case there nested structered columns.
-
-    :param nested_sdf: Spark DataFrame.
-    :type nested_sdf: DataFrame
-    :returns: Unnest DataFrame.
-    :rtype: DataFrame
-    """
-    stack = [((), nested_sdf)]
-    columns = []
-    while len(stack) > 0:
-        parents, sdf = stack.pop()
-        for column_name, column_type in sdf.dtypes:
-            if column_type[:6] == "struct":
-                projected_sdf = sdf.select(column_name + ".*")
-                stack.append((parents + (column_name,), projected_sdf))
-            else:
-                columns.append(
-                    col(".".join(parents + (column_name,))) \
-                        .alias("_".join(parents + (column_name,)))
-                )
-    return nested_sdf.select(columns)
-
-
-def process_json_to_dataframe(schema_name, paths):
-    """Processes JSON to Spark DataFrame.
-
-    :param schema_name: Schema name.
-    :type schema_name: string
-    :param paths: S3 paths to process.
-    :type paths: list
-    :returns: Spark DataFrame.
-    :rtype: DataFrame
-    """
-    drop_subset = [
-        "dut_type", "dut_version",
-        "passed",
-        "test_name_long", "test_name_short",
-        "test_type",
-        "version"
-    ]
-
-    # load schemas
-    with open(f"iterative_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
-        schema = StructType.fromJson(load(f_schema))
-
-    # create empty DF out of schemas
-    sdf = spark.createDataFrame([], schema)
-
-    # filter list
-    filtered = [path for path in paths if schema_name in path]
-
-    # select
-    for path in filtered:
-        print(path)
-
-        sdf_loaded = spark \
-            .read \
-            .option("multiline", "true") \
-            .schema(schema) \
-            .json(path) \
-            .withColumn("job", lit(path.split("/")[4])) \
-            .withColumn("build", lit(path.split("/")[5]))
-        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
-
-    # drop rows with all nulls and drop rows with null in critical frames
-    sdf = sdf.na.drop(how="all")
-    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
-
-    # flatten frame
-    sdf = flatten_frame(sdf)
-
-    return sdf
-
-
-# create SparkContext and GlueContext
-spark_context = SparkContext.getOrCreate()
-spark_context.setLogLevel("WARN")
-glue_context = GlueContext(spark_context)
-spark = glue_context.spark_session
-
-# files of interest
-paths = wr.s3.list_objects(
-    path=PATH,
-    suffix=SUFFIX,
-    last_modified_begin=LAST_MODIFIED_BEGIN,
-    last_modified_end=LAST_MODIFIED_END,
-    ignore_suffix=IGNORE_SUFFIX,
-    ignore_empty=True
-)
-
-filtered_paths = [path for path in paths if "report-iterative-2402" in path]
-
-out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths)
-out_sdf.printSchema()
-out_sdf = out_sdf \
-    .withColumn("year", lit(datetime.now().year)) \
-    .withColumn("month", lit(datetime.now().month)) \
-    .withColumn("day", lit(datetime.now().day)) \
-    .repartition(1)
-
-try:
-    wr.s3.to_parquet(
-        df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2402",
-        dataset=True,
-        partition_cols=["test_type", "year", "month", "day"],
-        compression="snappy",
-        use_threads=True,
-        mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
-    )
-except EmptyDataFrame:
-    pass
diff --git a/csit.infra.etl/iterative_ndrpdr_rls2406.py b/csit.infra.etl/iterative_ndrpdr_rls2406.py
new file mode 100644
index 0000000000..21a6c46cd1
--- /dev/null
+++ b/csit.infra.etl/iterative_ndrpdr_rls2406.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
+SUFFIX="info.json.gz"
+IGNORE_SUFFIX=[
+    "suite.info.json.gz",
+    "setup.info.json.gz",
+    "teardown.info.json.gz",
+    "suite.output.info.json.gz",
+    "setup.output.info.json.gz",
+    "teardown.output.info.json.gz"
+]
+LAST_MODIFIED_END=utc.localize(
+    datetime.strptime(
+        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+        "%Y-%m-%d"
+    )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def flatten_frame(nested_sdf):
+    """Unnest Spark DataFrame in case there nested structered columns.
+
+    :param nested_sdf: Spark DataFrame.
+    :type nested_sdf: DataFrame
+    :returns: Unnest DataFrame.
+    :rtype: DataFrame
+    """
+    stack = [((), nested_sdf)]
+    columns = []
+    while len(stack) > 0:
+        parents, sdf = stack.pop()
+        for column_name, column_type in sdf.dtypes:
+            if column_type[:6] == "struct":
+                projected_sdf = sdf.select(column_name + ".*")
+                stack.append((parents + (column_name,), projected_sdf))
+            else:
+                columns.append(
+                    col(".".join(parents + (column_name,))) \
+                        .alias("_".join(parents + (column_name,)))
+                )
+    return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+    """Processes JSON to Spark DataFrame.
+
+    :param schema_name: Schema name.
+    :type schema_name: string
+    :param paths: S3 paths to process.
+    :type paths: list
+    :returns: Spark DataFrame.
+    :rtype: DataFrame
+    """
+    drop_subset = [
+        "dut_type", "dut_version",
+        "passed",
+        "test_name_long", "test_name_short",
+        "test_type",
+        "version"
+    ]
+
+    # load schemas
+    with open(f"iterative_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+        schema = StructType.fromJson(load(f_schema))
+
+    # create empty DF out of schemas
+    sdf = spark.createDataFrame([], schema)
+
+    # filter list
+    filtered = [path for path in paths if schema_name in path]
+
+    # select
+    for path in filtered:
+        print(path)
+
+        sdf_loaded = spark \
+            .read \
+            .option("multiline", "true") \
+            .schema(schema) \
+            .json(path) \
+            .withColumn("job", lit(path.split("/")[4])) \
+            .withColumn("build", lit(path.split("/")[5]))
+        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+    # drop rows with all nulls and drop rows with null in critical frames
+    sdf = sdf.na.drop(how="all")
+    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+    # flatten frame
+    sdf = flatten_frame(sdf)
+
+    return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+    path=PATH,
+    suffix=SUFFIX,
+    last_modified_begin=LAST_MODIFIED_BEGIN,
+    last_modified_end=LAST_MODIFIED_END,
+    ignore_suffix=IGNORE_SUFFIX,
+    ignore_empty=True
+)
+
+filtered_paths = [path for path in paths if "report-iterative-2406" in path]
+
+out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths)
+out_sdf.printSchema()
+out_sdf = out_sdf \
+    .withColumn("year", lit(datetime.now().year)) \
+    .withColumn("month", lit(datetime.now().month)) \
+    .withColumn("day", lit(datetime.now().day)) \
+    .repartition(1)
+
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
+try:
+    wr.s3.to_parquet(
+        df=out_sdf.toPandas(),
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
+        dataset=True,
+        partition_cols=["test_type", "year", "month", "day"],
+        compression="snappy",
+        use_threads=True,
+        mode="overwrite_partitions",
+        boto3_session=boto3_session
+    )
+except EmptyDataFrame:
+    pass
diff --git a/csit.infra.etl/iterative_reconf_rls2402.py b/csit.infra.etl/iterative_reconf_rls2402.py
deleted file mode 100644
index 1beeb16d2c..0000000000
--- a/csit.infra.etl/iterative_reconf_rls2402.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2023 Cisco and/or its affiliates.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""ETL script running on top of the s3://"""
-
-from datetime import datetime, timedelta
-from json import load
-from os import environ
-from pytz import utc
-
-import awswrangler as wr
-from awswrangler.exceptions import EmptyDataFrame
-from awsglue.context import GlueContext
-from boto3 import session
-from pyspark.context import SparkContext
-from pyspark.sql.functions import col, lit, regexp_replace
-from pyspark.sql.types import StructType
-
-
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
-PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
-SUFFIX="info.json.gz"
-IGNORE_SUFFIX=[
-    "suite.info.json.gz",
-    "setup.info.json.gz",
-    "teardown.info.json.gz",
-    "suite.output.info.json.gz",
-    "setup.output.info.json.gz",
-    "teardown.output.info.json.gz"
-]
-LAST_MODIFIED_END=utc.localize(
-    datetime.strptime(
-        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
-        "%Y-%m-%d"
-    )
-)
-LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
-
-
-def flatten_frame(nested_sdf):
-    """Unnest Spark DataFrame in case there nested structered columns.
-
-    :param nested_sdf: Spark DataFrame.
-    :type nested_sdf: DataFrame
-    :returns: Unnest DataFrame.
-    :rtype: DataFrame
-    """
-    stack = [((), nested_sdf)]
-    columns = []
-    while len(stack) > 0:
-        parents, sdf = stack.pop()
-        for column_name, column_type in sdf.dtypes:
-            if column_type[:6] == "struct":
-                projected_sdf = sdf.select(column_name + ".*")
-                stack.append((parents + (column_name,), projected_sdf))
-            else:
-                columns.append(
-                    col(".".join(parents + (column_name,))) \
-                        .alias("_".join(parents + (column_name,)))
-                )
-    return nested_sdf.select(columns)
-
-
-def process_json_to_dataframe(schema_name, paths):
-    """Processes JSON to Spark DataFrame.
-
-    :param schema_name: Schema name.
-    :type schema_name: string
-    :param paths: S3 paths to process.
-    :type paths: list
-    :returns: Spark DataFrame.
-    :rtype: DataFrame
-    """
-    drop_subset = [
-        "dut_type", "dut_version",
-        "passed",
-        "test_name_long", "test_name_short",
-        "test_type",
-        "version"
-    ]
-
-    # load schemas
-    with open(f"iterative_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
-        schema = StructType.fromJson(load(f_schema))
-
-    # create empty DF out of schemas
-    sdf = spark.createDataFrame([], schema)
-
-    # filter list
-    filtered = [path for path in paths if schema_name in path]
-
-    # select
-    for path in filtered:
-        print(path)
-
-        sdf_loaded = spark \
-            .read \
-            .option("multiline", "true") \
-            .schema(schema) \
-            .json(path) \
-            .withColumn("job", lit(path.split("/")[4])) \
-            .withColumn("build", lit(path.split("/")[5]))
-        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
-
-    # drop rows with all nulls and drop rows with null in critical frames
-    sdf = sdf.na.drop(how="all")
-    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
-
-    # flatten frame
-    sdf = flatten_frame(sdf)
-
-    return sdf
-
-
-# create SparkContext and GlueContext
-spark_context = SparkContext.getOrCreate()
-spark_context.setLogLevel("WARN")
-glue_context = GlueContext(spark_context)
-spark = glue_context.spark_session
-
-# files of interest
-paths = wr.s3.list_objects(
-    path=PATH,
-    suffix=SUFFIX,
-    last_modified_begin=LAST_MODIFIED_BEGIN,
-    last_modified_end=LAST_MODIFIED_END,
-    ignore_suffix=IGNORE_SUFFIX,
-    ignore_empty=True
-)
-
-filtered_paths = [path for path in paths if "report-iterative-2402" in path]
-
-out_sdf = process_json_to_dataframe("reconf", filtered_paths)
-out_sdf.show(truncate=False)
-out_sdf.printSchema()
-out_sdf = out_sdf \
-    .withColumn("year", lit(datetime.now().year)) \
-    .withColumn("month", lit(datetime.now().month)) \
-    .withColumn("day", lit(datetime.now().day)) \
-    .repartition(1)
-
-try:
-    wr.s3.to_parquet(
-        df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2402",
-        dataset=True,
-        partition_cols=["test_type", "year", "month", "day"],
-        compression="snappy",
-        use_threads=True,
-        mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
-    )
-except EmptyDataFrame:
-    pass
diff --git a/csit.infra.etl/iterative_reconf_rls2406.py b/csit.infra.etl/iterative_reconf_rls2406.py
new file mode 100644
index 0000000000..e9b06812e3
--- /dev/null
+++ b/csit.infra.etl/iterative_reconf_rls2406.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
+SUFFIX="info.json.gz"
+IGNORE_SUFFIX=[
+    "suite.info.json.gz",
+    "setup.info.json.gz",
+    "teardown.info.json.gz",
+    "suite.output.info.json.gz",
+    "setup.output.info.json.gz",
+    "teardown.output.info.json.gz"
+]
+LAST_MODIFIED_END=utc.localize(
+    datetime.strptime(
+        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+        "%Y-%m-%d"
+    )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def flatten_frame(nested_sdf):
+    """Unnest Spark DataFrame in case there nested structered columns.
+
+    :param nested_sdf: Spark DataFrame.
+    :type nested_sdf: DataFrame
+    :returns: Unnest DataFrame.
+    :rtype: DataFrame
+    """
+    stack = [((), nested_sdf)]
+    columns = []
+    while len(stack) > 0:
+        parents, sdf = stack.pop()
+        for column_name, column_type in sdf.dtypes:
+            if column_type[:6] == "struct":
+                projected_sdf = sdf.select(column_name + ".*")
+                stack.append((parents + (column_name,), projected_sdf))
+            else:
+                columns.append(
+                    col(".".join(parents + (column_name,))) \
+                        .alias("_".join(parents + (column_name,)))
+                )
+    return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+    """Processes JSON to Spark DataFrame.
+
+    :param schema_name: Schema name.
+    :type schema_name: string
+    :param paths: S3 paths to process.
+    :type paths: list
+    :returns: Spark DataFrame.
+    :rtype: DataFrame
+    """
+    drop_subset = [
+        "dut_type", "dut_version",
+        "passed",
+        "test_name_long", "test_name_short",
+        "test_type",
+        "version"
+    ]
+
+    # load schemas
+    with open(f"iterative_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+        schema = StructType.fromJson(load(f_schema))
+
+    # create empty DF out of schemas
+    sdf = spark.createDataFrame([], schema)
+
+    # filter list
+    filtered = [path for path in paths if schema_name in path]
+
+    # select
+    for path in filtered:
+        print(path)
+
+        sdf_loaded = spark \
+            .read \
+            .option("multiline", "true") \
+            .schema(schema) \
+            .json(path) \
+            .withColumn("job", lit(path.split("/")[4])) \
+            .withColumn("build", lit(path.split("/")[5]))
+        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+    # drop rows with all nulls and drop rows with null in critical frames
+    sdf = sdf.na.drop(how="all")
+    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+    # flatten frame
+    sdf = flatten_frame(sdf)
+
+    return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+    path=PATH,
+    suffix=SUFFIX,
+    last_modified_begin=LAST_MODIFIED_BEGIN,
+    last_modified_end=LAST_MODIFIED_END,
+    ignore_suffix=IGNORE_SUFFIX,
+    ignore_empty=True
+)
+
+filtered_paths = [path for path in paths if "report-iterative-2406" in path]
+
+out_sdf = process_json_to_dataframe("reconf", filtered_paths)
+out_sdf.show(truncate=False)
+out_sdf.printSchema()
+out_sdf = out_sdf \
+    .withColumn("year", lit(datetime.now().year)) \
+    .withColumn("month", lit(datetime.now().month)) \
+    .withColumn("day", lit(datetime.now().day)) \
+    .repartition(1)
+
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
+try:
+    wr.s3.to_parquet(
+        df=out_sdf.toPandas(),
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
+        dataset=True,
+        partition_cols=["test_type", "year", "month", "day"],
+        compression="snappy",
+        use_threads=True,
+        mode="overwrite_partitions",
+        boto3_session=boto3_session
+    )
+except EmptyDataFrame:
+    pass
diff --git a/csit.infra.etl/iterative_soak_rls2402.py b/csit.infra.etl/iterative_soak_rls2402.py
deleted file mode 100644
index 55c6eb494d..0000000000
--- a/csit.infra.etl/iterative_soak_rls2402.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) 2023 Cisco and/or its affiliates.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""ETL script running on top of the s3://"""
-
-from datetime import datetime, timedelta
-from json import load
-from os import environ
-from pytz import utc
-
-import awswrangler as wr
-from awswrangler.exceptions import EmptyDataFrame
-from awsglue.context import GlueContext
-from boto3 import session
-from pyspark.context import SparkContext
-from pyspark.sql.functions import col, lit, regexp_replace
-from pyspark.sql.types import StructType
-
-
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
-PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
-SUFFIX="info.json.gz"
-IGNORE_SUFFIX=[
-    "suite.info.json.gz",
-    "setup.info.json.gz",
-    "teardown.info.json.gz",
-    "suite.output.info.json.gz",
-    "setup.output.info.json.gz",
-    "teardown.output.info.json.gz"
-]
-LAST_MODIFIED_END=utc.localize(
-    datetime.strptime(
-        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
-        "%Y-%m-%d"
-    )
-)
-LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
-
-
-def flatten_frame(nested_sdf):
-    """Unnest Spark DataFrame in case there nested structered columns.
-
-    :param nested_sdf: Spark DataFrame.
-    :type nested_sdf: DataFrame
-    :returns: Unnest DataFrame.
-    :rtype: DataFrame
-    """
-    stack = [((), nested_sdf)]
-    columns = []
-    while len(stack) > 0:
-        parents, sdf = stack.pop()
-        for column_name, column_type in sdf.dtypes:
-            if column_type[:6] == "struct":
-                projected_sdf = sdf.select(column_name + ".*")
-                stack.append((parents + (column_name,), projected_sdf))
-            else:
-                columns.append(
-                    col(".".join(parents + (column_name,))) \
-                        .alias("_".join(parents + (column_name,)))
-                )
-    return nested_sdf.select(columns)
-
-
-def process_json_to_dataframe(schema_name, paths):
-    """Processes JSON to Spark DataFrame.
-
-    :param schema_name: Schema name.
-    :type schema_name: string
-    :param paths: S3 paths to process.
-    :type paths: list
-    :returns: Spark DataFrame.
-    :rtype: DataFrame
-    """
-    drop_subset = [
-        "dut_type", "dut_version",
-        "passed",
-        "test_name_long", "test_name_short",
-        "test_type",
-        "version"
-    ]
-
-    # load schemas
-    with open(f"iterative_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
-        schema = StructType.fromJson(load(f_schema))
-
-    # create empty DF out of schemas
-    sdf = spark.createDataFrame([], schema)
-
-    # filter list
-    filtered = [path for path in paths if schema_name in path]
-
-    # select
-    for path in filtered:
-        print(path)
-
-        sdf_loaded = spark \
-            .read \
-            .option("multiline", "true") \
-            .schema(schema) \
-            .json(path) \
-            .withColumn("job", lit(path.split("/")[4])) \
-            .withColumn("build", lit(path.split("/")[5]))
-        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
-
-    # drop rows with all nulls and drop rows with null in critical frames
-    sdf = sdf.na.drop(how="all")
-    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
-
-    # flatten frame
-    sdf = flatten_frame(sdf)
-
-    return sdf
-
-
-# create SparkContext and GlueContext
-spark_context = SparkContext.getOrCreate()
-spark_context.setLogLevel("WARN")
-glue_context = GlueContext(spark_context)
-spark = glue_context.spark_session
-
-# files of interest
-paths = wr.s3.list_objects(
-    path=PATH,
-    suffix=SUFFIX,
-    last_modified_begin=LAST_MODIFIED_BEGIN,
-    last_modified_end=LAST_MODIFIED_END,
-    ignore_suffix=IGNORE_SUFFIX,
-    ignore_empty=True
-)
-
-filtered_paths = [path for path in paths if "report-iterative-2402" in path]
-
-out_sdf = process_json_to_dataframe("soak", filtered_paths)
-out_sdf.printSchema()
-out_sdf = out_sdf \
-    .withColumn("year", lit(datetime.now().year)) \
-    .withColumn("month", lit(datetime.now().month)) \
-    .withColumn("day", lit(datetime.now().day)) \
-    .repartition(1)
-
-try:
-    wr.s3.to_parquet(
-        df=out_sdf.toPandas(),
-        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2402",
-        dataset=True,
-        partition_cols=["test_type", "year", "month", "day"],
-        compression="snappy",
-        use_threads=True,
-        mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
-    )
-except EmptyDataFrame:
-    pass
diff --git a/csit.infra.etl/iterative_soak_rls2406.py b/csit.infra.etl/iterative_soak_rls2406.py
new file mode 100644
index 0000000000..6b05e30308
--- /dev/null
+++ b/csit.infra.etl/iterative_soak_rls2406.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python3
+
+# Copyright (c) 2024 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""ETL script running on top of the s3://"""
+
+from datetime import datetime, timedelta
+from json import load
+from os import environ
+from pytz import utc
+
+import awswrangler as wr
+from awswrangler.exceptions import EmptyDataFrame
+from awsglue.context import GlueContext
+from boto3 import session
+from pyspark.context import SparkContext
+from pyspark.sql.functions import col, lit, regexp_replace
+from pyspark.sql.types import StructType
+
+
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
+PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
+SUFFIX="info.json.gz"
+IGNORE_SUFFIX=[
+    "suite.info.json.gz",
+    "setup.info.json.gz",
+    "teardown.info.json.gz",
+    "suite.output.info.json.gz",
+    "setup.output.info.json.gz",
+    "teardown.output.info.json.gz"
+]
+LAST_MODIFIED_END=utc.localize(
+    datetime.strptime(
+        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
+        "%Y-%m-%d"
+    )
+)
+LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)
+
+
+def flatten_frame(nested_sdf):
+    """Unnest Spark DataFrame in case there nested structered columns.
+
+    :param nested_sdf: Spark DataFrame.
+    :type nested_sdf: DataFrame
+    :returns: Unnest DataFrame.
+    :rtype: DataFrame
+    """
+    stack = [((), nested_sdf)]
+    columns = []
+    while len(stack) > 0:
+        parents, sdf = stack.pop()
+        for column_name, column_type in sdf.dtypes:
+            if column_type[:6] == "struct":
+                projected_sdf = sdf.select(column_name + ".*")
+                stack.append((parents + (column_name,), projected_sdf))
+            else:
+                columns.append(
+                    col(".".join(parents + (column_name,))) \
+                        .alias("_".join(parents + (column_name,)))
+                )
+    return nested_sdf.select(columns)
+
+
+def process_json_to_dataframe(schema_name, paths):
+    """Processes JSON to Spark DataFrame.
+
+    :param schema_name: Schema name.
+    :type schema_name: string
+    :param paths: S3 paths to process.
+    :type paths: list
+    :returns: Spark DataFrame.
+    :rtype: DataFrame
+    """
+    drop_subset = [
+        "dut_type", "dut_version",
+        "passed",
+        "test_name_long", "test_name_short",
+        "test_type",
+        "version"
+    ]
+
+    # load schemas
+    with open(f"iterative_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
+        schema = StructType.fromJson(load(f_schema))
+
+    # create empty DF out of schemas
+    sdf = spark.createDataFrame([], schema)
+
+    # filter list
+    filtered = [path for path in paths if schema_name in path]
+
+    # select
+    for path in filtered:
+        print(path)
+
+        sdf_loaded = spark \
+            .read \
+            .option("multiline", "true") \
+            .schema(schema) \
+            .json(path) \
+            .withColumn("job", lit(path.split("/")[4])) \
+            .withColumn("build", lit(path.split("/")[5]))
+        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)
+
+    # drop rows with all nulls and drop rows with null in critical frames
+    sdf = sdf.na.drop(how="all")
+    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)
+
+    # flatten frame
+    sdf = flatten_frame(sdf)
+
+    return sdf
+
+
+# create SparkContext and GlueContext
+spark_context = SparkContext.getOrCreate()
+spark_context.setLogLevel("WARN")
+glue_context = GlueContext(spark_context)
+spark = glue_context.spark_session
+
+# files of interest
+paths = wr.s3.list_objects(
+    path=PATH,
+    suffix=SUFFIX,
+    last_modified_begin=LAST_MODIFIED_BEGIN,
+    last_modified_end=LAST_MODIFIED_END,
+    ignore_suffix=IGNORE_SUFFIX,
+    ignore_empty=True
+)
+
+filtered_paths = [path for path in paths if "report-iterative-2406" in path]
+
+out_sdf = process_json_to_dataframe("soak", filtered_paths)
+out_sdf.printSchema()
+out_sdf = out_sdf \
+    .withColumn("year", lit(datetime.now().year)) \
+    .withColumn("month", lit(datetime.now().month)) \
+    .withColumn("day", lit(datetime.now().day)) \
+    .repartition(1)
+
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
+try:
+    wr.s3.to_parquet(
+        df=out_sdf.toPandas(),
+        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/iterative_rls2406",
+        dataset=True,
+        partition_cols=["test_type", "year", "month", "day"],
+        compression="snappy",
+        use_threads=True,
+        mode="overwrite_partitions",
+        boto3_session=boto3_session
+    )
+except EmptyDataFrame:
+    pass
diff --git a/csit.infra.etl/stats.py b/csit.infra.etl/stats.py
index 5d44caa25d..08ce4a9d0d 100644
--- a/csit.infra.etl/stats.py
+++ b/csit.infra.etl/stats.py
@@ -28,8 +28,9 @@ from pyspark.context import SparkContext
 from pyspark.sql.functions import lit
 from pyspark.sql.types import StructType
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="suite.info.json.gz"
 IGNORE_SUFFIX=[]
@@ -106,7 +107,6 @@ paths = wr.s3.list_objects(
 
 for schema_name in ["sra"]:
     out_sdf = process_json_to_dataframe(schema_name, paths)
-    out_sdf.show(truncate=False)
     out_sdf.printSchema()
     out_sdf = out_sdf \
         .withColumn("year", lit(datetime.now().year)) \
@@ -114,6 +114,16 @@ for schema_name in ["sra"]:
         .withColumn("day", lit(datetime.now().day)) \
         .repartition(1)
 
+    try:
+        boto3_session = session.Session(
+            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+            region_name=environ["OUT_AWS_DEFAULT_REGION"]
+        )
+    except KeyError:
+        boto3_session = session.Session()
+    )
+
     try:
         wr.s3.to_parquet(
             df=out_sdf.toPandas(),
@@ -123,11 +133,7 @@ for schema_name in ["sra"]:
             compression="snappy",
             use_threads=True,
             mode="overwrite_partitions",
-            boto3_session=session.Session(
-                aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-                aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-                region_name=environ["OUT_AWS_DEFAULT_REGION"]
-            )
+            boto3_session=boto3_session
         )
     except EmptyDataFrame:
         pass
diff --git a/csit.infra.etl/trending_hoststack.py b/csit.infra.etl/trending_hoststack.py
index 85cab5a179..45cb5c9bf5 100644
--- a/csit.infra.etl/trending_hoststack.py
+++ b/csit.infra.etl/trending_hoststack.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -144,7 +144,6 @@ paths = wr.s3.list_objects(
 filtered_paths = [path for path in paths if "daily" in path or "weekly" in path]
 
 out_sdf = process_json_to_dataframe("hoststack", filtered_paths)
-out_sdf.show(truncate=False)
 out_sdf.printSchema()
 out_sdf = out_sdf \
     .withColumn("year", lit(datetime.now().year)) \
@@ -152,6 +151,16 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
@@ -161,11 +170,7 @@ try:
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
diff --git a/csit.infra.etl/trending_mrr.py b/csit.infra.etl/trending_mrr.py
index a00c5fb4e1..b42aacaf36 100644
--- a/csit.infra.etl/trending_mrr.py
+++ b/csit.infra.etl/trending_mrr.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -152,6 +152,16 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
@@ -161,11 +171,7 @@ try:
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
diff --git a/csit.infra.etl/trending_ndrpdr.py b/csit.infra.etl/trending_ndrpdr.py
index e35d27b0bf..96582f5928 100644
--- a/csit.infra.etl/trending_ndrpdr.py
+++ b/csit.infra.etl/trending_ndrpdr.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -152,6 +152,16 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
@@ -161,11 +171,7 @@ try:
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
diff --git a/csit.infra.etl/trending_reconf.py b/csit.infra.etl/trending_reconf.py
index 94e6199e89..08287a74cc 100644
--- a/csit.infra.etl/trending_reconf.py
+++ b/csit.infra.etl/trending_reconf.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -152,6 +152,16 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
@@ -161,11 +171,7 @@ try:
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
diff --git a/csit.infra.etl/trending_soak.py b/csit.infra.etl/trending_soak.py
index 40da521884..e6faf5be34 100644
--- a/csit.infra.etl/trending_soak.py
+++ b/csit.infra.etl/trending_soak.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright (c) 2023 Cisco and/or its affiliates.
+# Copyright (c) 2024 Cisco and/or its affiliates.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at:
@@ -29,8 +29,8 @@ from pyspark.sql.functions import col, lit, regexp_replace
 from pyspark.sql.types import StructType
 
 
-S3_LOGS_BUCKET="fdio-logs-s3-cloudfront-index"
-S3_DOCS_BUCKET="fdio-docs-s3-cloudfront-index"
+S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
+S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "fdio-docs-s3-cloudfront-index")
 PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
 SUFFIX="info.json.gz"
 IGNORE_SUFFIX=[
@@ -144,7 +144,6 @@ paths = wr.s3.list_objects(
 filtered_paths = [path for path in paths if "daily" in path or "weekly" in path]
 
 out_sdf = process_json_to_dataframe("soak", filtered_paths)
-out_sdf.show(truncate=False)
 out_sdf.printSchema()
 out_sdf = out_sdf \
     .withColumn("year", lit(datetime.now().year)) \
@@ -152,6 +151,16 @@ out_sdf = out_sdf \
     .withColumn("day", lit(datetime.now().day)) \
     .repartition(1)
 
+try:
+    boto3_session = session.Session(
+        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
+        region_name=environ["OUT_AWS_DEFAULT_REGION"]
+    )
+except KeyError:
+    boto3_session = session.Session()
+)
+
 try:
     wr.s3.to_parquet(
         df=out_sdf.toPandas(),
@@ -161,11 +170,7 @@ try:
         compression="snappy",
         use_threads=True,
         mode="overwrite_partitions",
-        boto3_session=session.Session(
-            aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
-            aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
-            region_name=environ["OUT_AWS_DEFAULT_REGION"]
-        )
+        boto3_session=boto3_session
     )
 except EmptyDataFrame:
     pass
-- 
cgit 1.2.3-korg