aboutsummaryrefslogtreecommitdiffstats
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/Makefile2
-rw-r--r--test/lisp.py9
-rw-r--r--test/test_lisp.py24
3 files changed, 24 insertions, 11 deletions
diff --git a/test/Makefile b/test/Makefile
index 5b4a965482c..9f13e90d701 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -73,7 +73,7 @@ PYTHON_EXTRA_DEPENDS=
endif
PYTHON_VENV_PATH=$(VPP_PYTHON_PREFIX)/virtualenv
-PYTHON_DEPENDS=$(PYTHON_EXTRA_DEPENDS) psutil faulthandler six scapy==2.4.0 pexpect cryptography subprocess32 cffi syslog-rfc5424-parser git+https://github.com/vpp-dev/py-lispnetworking
+PYTHON_DEPENDS=$(PYTHON_EXTRA_DEPENDS) psutil faulthandler six scapy==2.4.0 pexpect cryptography subprocess32 cffi syslog-rfc5424-parser
SCAPY_SOURCE=$(shell find $(PYTHON_VENV_PATH) -name site-packages)
BUILD_COV_DIR=$(BR)/test-cov
diff --git a/test/lisp.py b/test/lisp.py
index 865070dff9d..a1f0c1675a0 100644
--- a/test/lisp.py
+++ b/test/lisp.py
@@ -1,10 +1,4 @@
-from random import randint
-from socket import AF_INET, AF_INET6
-from scapy.all import *
-from scapy.packet import *
from scapy.fields import *
-from lisp import *
-from framework import *
from vpp_object import *
@@ -292,7 +286,8 @@ class VppLispAdjacency(VppObject):
reid_len=self._reid.prefix_length, vni=self._vni)
self._test.registry.register(self, self.test.logger)
- def eid_equal(self, eid, eid_type, eid_data, prefix_len):
+ @staticmethod
+ def eid_equal(eid, eid_type, eid_data, prefix_len):
if eid.eid_type != eid_type:
return False
diff --git a/test/test_lisp.py b/test/test_lisp.py
index cfe8e0af65d..b6272dcb1bd 100644
--- a/test/test_lisp.py
+++ b/test/test_lisp.py
@@ -1,13 +1,31 @@
#!/usr/bin/env python
import unittest
-from scapy.packet import Raw
+from scapy.fields import BitField, ByteField, FlagsField, IntField
+from scapy.packet import bind_layers, Packet, Raw
from scapy.layers.inet import IP, UDP, Ether
-from py_lispnetworking.lisp import LISP_GPE_Header
+from scapy.layers.inet6 import IPv6
-from util import ppp, ForeignAddressFactory
from framework import VppTestCase, VppTestRunner
from lisp import *
+from util import ppp, ForeignAddressFactory
+
+# From py_lispnetworking.lisp.py: # GNU General Public License v2.0
+
+
+class LISP_GPE_Header(Packet):
+ name = "LISP GPE Header"
+ fields_desc = [
+ FlagsField("gpe_flags", None, 6, ["N", "L", "E", "V", "I", "P"]),
+ BitField("reserved", 0, 18),
+ ByteField("next_proto", 0),
+ IntField("iid", 0),
+ ]
+bind_layers(UDP, LISP_GPE_Header, dport=4341)
+bind_layers(UDP, LISP_GPE_Header, sport=4341)
+bind_layers(LISP_GPE_Header, IP, next_proto=1)
+bind_layers(LISP_GPE_Header, IPv6, next_proto=2)
+bind_layers(LISP_GPE_Header, Ether, next_proto=3)
class Driver(object):
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
#!/usr/bin/env python3

# Copyright (c) 2025 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""ETL script running on top of the s3://"""

from datetime import datetime, timedelta
from json import load
from os import environ
from pytz import utc

import awswrangler as wr
from awswrangler.exceptions import EmptyDataFrame
from awsglue.context import GlueContext
from boto3 import session
from pyspark.context import SparkContext
from pyspark.sql.functions import col, lit, regexp_replace
from pyspark.sql.types import StructType


S3_LOGS_BUCKET=environ.get("S3_LOGS_BUCKET", "fdio-logs-s3-cloudfront-index")
S3_DOCS_BUCKET=environ.get("S3_DOCS_BUCKET", "csit-docs-s3-cloudfront-index")
PATH=f"s3://{S3_LOGS_BUCKET}/vex-yul-rot-jenkins-1/csit-*-perf-*"
SUFFIX="info.json.gz"
IGNORE_SUFFIX=[
    "suite.info.json.gz",
    "setup.info.json.gz",
    "teardown.info.json.gz",
    "suite.output.info.json.gz",
    "setup.output.info.json.gz",
    "teardown.output.info.json.gz"
]
LAST_MODIFIED_END=utc.localize(
    datetime.strptime(
        f"{datetime.now().year}-{datetime.now().month}-{datetime.now().day}",
        "%Y-%m-%d"
    )
)
LAST_MODIFIED_BEGIN=LAST_MODIFIED_END - timedelta(1)


def flatten_frame(nested_sdf):
    """Unnest Spark DataFrame in case there nested structered columns.

    :param nested_sdf: Spark DataFrame.
    :type nested_sdf: DataFrame
    :returns: Unnest DataFrame.
    :rtype: DataFrame
    """
    stack = [((), nested_sdf)]
    columns = []
    while len(stack) > 0:
        parents, sdf = stack.pop()
        for column_name, column_type in sdf.dtypes:
            if column_type[:6] == "struct":
                projected_sdf = sdf.select(column_name + ".*")
                stack.append((parents + (column_name,), projected_sdf))
            else:
                columns.append(
                    col(".".join(parents + (column_name,))) \
                        .alias("_".join(parents + (column_name,)))
                )
    return nested_sdf.select(columns)


def process_json_to_dataframe(schema_name, paths):
    """Processes JSON to Spark DataFrame.

    :param schema_name: Schema name.
    :type schema_name: string
    :param paths: S3 paths to process.
    :type paths: list
    :returns: Spark DataFrame.
    :rtype: DataFrame
    """
    drop_subset = [
        "dut_type", "dut_version",
        "passed",
        "test_name_long", "test_name_short",
        "test_type",
        "version"
    ]

    # load schemas
    with open(f"trending_{schema_name}.json", "r", encoding="UTF-8") as f_schema:
        schema = StructType.fromJson(load(f_schema))

    # create empty DF out of schemas
    sdf = spark.createDataFrame([], schema)

    # filter list
    filtered = [path for path in paths if schema_name in path]

    # select
    for path in filtered:
        print(path)

        sdf_loaded = spark \
            .read \
            .option("multiline", "true") \
            .schema(schema) \
            .json(path) \
            .withColumn("job", lit(path.split("/")[4])) \
            .withColumn("build", lit(path.split("/")[5]))
        sdf = sdf.unionByName(sdf_loaded, allowMissingColumns=True)

    # drop rows with all nulls and drop rows with null in critical frames
    sdf = sdf.na.drop(how="all")
    sdf = sdf.na.drop(how="any", thresh=None, subset=drop_subset)

    # flatten frame
    sdf = flatten_frame(sdf)

    return sdf


# create SparkContext and GlueContext
spark_context = SparkContext.getOrCreate()
spark_context.setLogLevel("WARN")
glue_context = GlueContext(spark_context)
spark = glue_context.spark_session

# files of interest
paths = wr.s3.list_objects(
    path=PATH,
    suffix=SUFFIX,
    last_modified_begin=LAST_MODIFIED_BEGIN,
    last_modified_end=LAST_MODIFIED_END,
    ignore_suffix=IGNORE_SUFFIX,
    ignore_empty=True
)

filtered_paths = [path for path in paths if "daily" in path or "weekly" in path]

out_sdf = process_json_to_dataframe("ndrpdr", filtered_paths)
out_sdf.show(truncate=False)
out_sdf.printSchema()
out_sdf = out_sdf \
    .withColumn("year", lit(datetime.now().year)) \
    .withColumn("month", lit(datetime.now().month)) \
    .withColumn("day", lit(datetime.now().day)) \
    .repartition(1)

try:
    boto3_session = session.Session(
        aws_access_key_id=environ["OUT_AWS_ACCESS_KEY_ID"],
        aws_secret_access_key=environ["OUT_AWS_SECRET_ACCESS_KEY"],
        region_name=environ["OUT_AWS_DEFAULT_REGION"]
    )
except KeyError:
    boto3_session = session.Session()

try:
    wr.s3.to_parquet(
        df=out_sdf.toPandas(),
        path=f"s3://{S3_DOCS_BUCKET}/csit/parquet/trending",
        dataset=True,
        partition_cols=["test_type", "year", "month", "day"],
        compression="snappy",
        use_threads=True,
        mode="overwrite_partitions",
        boto3_session=boto3_session
    )
except EmptyDataFrame:
    pass