From 1daa6fdc0bae284dee1b61f34534e59b60b7526a Mon Sep 17 00:00:00 2001 From: Vratko Polak Date: Mon, 25 Apr 2022 10:22:05 +0200 Subject: feat(astf): Support framesizes for ASTF - No support for IMIX. + Fix a bad bug in padding (most ASTF profiles had wrong frame sizes). + Fix a big typo in TCP PPS profiles (s->c was not data, just RST). + Control transaction size via ASTF_N_DATA_FRAMES env variable. - Default value 5 leads to transactions smaller than before. + It ensures transaction is one burst (per direction) even for jumbo. + Edit autogen to set supported frame sizes based on suite id. + Both TCP and UDP use the same values: + 64B for CPS (exact for UDP, nominal for TCP). + 100B, 1518B and 9000B for TPUT and PPS. - TCP TPUT achievable minimum is 70B. + Used 100B to leave room for possible IPv6 ASTF tests. + Separate function for code reused by vpp and trex tests. - I do not really like the new "copy and edit" approach added here. + But it is a quick edit, better autogen refactor is low priority. + Consider both established and transitory sessions as valid. - Mostly for compatibility with 2202 behavior and to avoid ramp-ups. - Assuming both session states have similar enough VPP CPU overhead. + Added a TODO to investigate and maybe reconsider later. + Update the state timeout value to 240s. + That is the default for TCP (for transitory state). - UDP could keep using 300s. + But I prefer UDP and TCP to behave as similarly as possible. + Use TRex tunables to get the exact frame size (for data packets). - It is not clear why the recipe for MSS has to be this complicated. + Move code away from profile init, as frame size is not known there. + Change internal profile API, so values related to MSS are passed. + Lower ramp-up rate for TCP TPUT tests. + Because without lower rate, jumbo fails on packet loss in ramp-up. + UDP TPUT ramp-up rate also lowered (just to keep suites more similar). + Distinguish one-direction and aggregated average frame size. + Update keyword documentation where the distiction matters. + One-direction is needed for turning bandwidth limit to TPS limit. + Aggregated is needed for correct NDRPDR bandwidth result value. - TCP TPUT will always be few percent below bidirectional maximum. + That is unavoidable, as one direction sends more control packets. + Add runtime consistency checks so future refactors are safer. + Fail if padding requested would be negative. + Fail if suite claims unexpected values for packets per transaction. + Edit the 4 types of ASTF profiles to keep them similar to each other. + Move UDP TPUT limit value from a field back to direct argument. + Stop pretending first UDP packet is not data. + Apply small improvements where convenient. + Replace "aggregate" with "aggregated" where possible. + To lower probability of any future typos in variable names. + Avoid calling Set Numeric Frame Sizes twice. + Code formatting, keyword documentation, code comments, ... + Add TODOs for less important code quality improvements. - Postpone updating of methodology pages to a subsequent change. Change-Id: I4b381e5210e69669f972326202fdcc5a2c9c923b Signed-off-by: Vratko Polak --- resources/libraries/python/Constants.py | 8 +++ resources/libraries/python/NATUtil.py | 28 +++++----- resources/libraries/python/TrafficGenerator.py | 19 ++++--- resources/libraries/python/autogen/Regenerator.py | 64 ++++++++++++++++------- 4 files changed, 78 insertions(+), 41 deletions(-) (limited to 'resources/libraries/python') diff --git a/resources/libraries/python/Constants.py b/resources/libraries/python/Constants.py index 6bcf5413d9..bfbbfd7471 100644 --- a/resources/libraries/python/Constants.py +++ b/resources/libraries/python/Constants.py @@ -269,6 +269,14 @@ class Constants: u"PERF_TRIAL_ASTF_DELAY", 0.112 ) + # Number of data frames in TPUT transaction, used both by TCP and UDP. + # The value should be 33 to keep historic continuity for UDP TPUT tests, + # but we are limited by TRex window of 48 KiB, so for 9000B tests + # it means we can send only 5 full data frames in a burst. + # https://github.com/cisco-system-traffic-generator/ + # trex-core/blob/v2.88/src/44bsd/tcp_var.h#L896-L903 + ASTF_N_DATA_FRAMES = get_int_from_env(u"ASTF_N_DATA_FRAMES", 5) + # Extended debug (incl. vpp packet trace, linux perf stat, ...). # Full list is available as suite variable (__init__.robot) or is # override by test. diff --git a/resources/libraries/python/NATUtil.py b/resources/libraries/python/NATUtil.py index ceed560a04..841bd2e683 100644 --- a/resources/libraries/python/NATUtil.py +++ b/resources/libraries/python/NATUtil.py @@ -278,7 +278,7 @@ class NATUtil: @staticmethod def get_nat44_sessions_number(node, proto): - """Get number of established NAT44 sessions from NAT44 mapping data. + """Get number of expected NAT44 sessions from NAT44 mapping data. This keyword uses output from a CLI command, so it can start failing when VPP changes the output format. @@ -287,17 +287,21 @@ class NATUtil: The current implementation supports both 2202 and post-2202 format. (The Gerrit number changing the output format is 34877.) - For TCP proto, the post-2202 format includes "timed out" - established sessions into its count of total sessions. + For TCP proto, the expected state after rampup is + some number of sessions in transitory state (VPP has seen the FINs), + and some number of sessions in established state (meaning + some FINs were lost in the last trial). + While the two states may need slightly different number of cycles + to process next packet, the current implementation considers + both of them the "fast path", so they are both counted as expected. + As the tests should fail if a session is timed-out, - the logic substracts timed out sessions for the resturned value. + the logic substracts timed out sessions for the returned value + (only available for post-2202 format). - The 2202 output reports most of TCP sessions as in "transitory" state, - as opposed to "established", but the previous CSIT logic tolerated that. - Ideally, whis keyword would add establised and transitory sessions - (but without CLOSED and WAIT_CLOSED sessions) and return that. - The current implementation simply returns "total tcp sessions" value, - to preserve the previous CSIT behavior for 2202 output. + TODO: Investigate if it is worth to insert additional rampup trials + in TPUT tests to ensure all sessions are transitory before next + measurement. :param node: DUT node. :param proto: Required protocol - TCP/UDP/ICMP. @@ -328,9 +332,7 @@ class NATUtil: found = True continue # Proto is found, find the line we are interested in. - if proto_l == u"tcp" and u"established" not in line: - continue - if u"total" not in line and u"established" not in line: + if u"total" not in line: raise RuntimeError(f"show nat summary: no {proto} total.") # We have the line with relevant numbers. total_part, timed_out_part = line.split(u"(", 1) diff --git a/resources/libraries/python/TrafficGenerator.py b/resources/libraries/python/TrafficGenerator.py index b2748f74ba..03e3890959 100644 --- a/resources/libraries/python/TrafficGenerator.py +++ b/resources/libraries/python/TrafficGenerator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021 Cisco and/or its affiliates. +# Copyright (c) 2022 Cisco and/or its affiliates. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at: @@ -643,6 +643,9 @@ class TrafficGenerator(AbstractMeasurer): ) command_line.add_with_value(u"duration", f"{computed_duration!r}") command_line.add_with_value(u"frame_size", self.frame_size) + command_line.add_with_value( + u"n_data_frames", Constants.ASTF_N_DATA_FRAMES + ) command_line.add_with_value(u"multiplier", multiplier) command_line.add_with_value(u"port_0", p_0) command_line.add_with_value(u"port_1", p_1) @@ -811,7 +814,7 @@ class TrafficGenerator(AbstractMeasurer): use_latency=False, ramp_up_rate=None, ramp_up_duration=None, - state_timeout=300.0, + state_timeout=240.0, ramp_up_only=False, ): """Send traffic from all configured interfaces on TG. @@ -1215,7 +1218,7 @@ class TrafficGenerator(AbstractMeasurer): The target_tr field of ReceiveRateMeasurement is in transactions per second. Transmit count and loss count units depend on the transaction type. Usually they are in transactions - per second, or aggregate packets per second. + per second, or aggregated packets per second. TODO: Fail on running or already reported measurement. @@ -1382,7 +1385,7 @@ class TrafficGenerator(AbstractMeasurer): use_latency=False, ramp_up_rate=None, ramp_up_duration=None, - state_timeout=300.0, + state_timeout=240.0, ): """Store values accessed by measure(). @@ -1451,7 +1454,7 @@ class OptimizedSearch: """Class to be imported as Robot Library, containing search keywords. Aside of setting up measurer and forwarding arguments, - the main business is to translate min/max rate from unidir to aggregate. + the main business is to translate min/max rate from unidir to aggregated. """ @staticmethod @@ -1475,7 +1478,7 @@ class OptimizedSearch: use_latency=False, ramp_up_rate=None, ramp_up_duration=None, - state_timeout=300.0, + state_timeout=240.0, expansion_coefficient=4.0, ): """Setup initialized TG, perform optimized search, return intervals. @@ -1616,7 +1619,7 @@ class OptimizedSearch: use_latency=False, ramp_up_rate=None, ramp_up_duration=None, - state_timeout=300.0, + state_timeout=240.0, ): """Setup initialized TG, perform soak search, return avg and stdev. @@ -1674,7 +1677,7 @@ class OptimizedSearch: :type ramp_up_rate: float :type ramp_up_duration: float :type state_timeout: float - :returns: Average and stdev of estimated aggregate rate giving PLR. + :returns: Average and stdev of estimated aggregated rate giving PLR. :rtype: 2-tuple of float """ tg_instance = BuiltIn().get_library_instance( diff --git a/resources/libraries/python/autogen/Regenerator.py b/resources/libraries/python/autogen/Regenerator.py index 3011b06897..4474996ef1 100644 --- a/resources/libraries/python/autogen/Regenerator.py +++ b/resources/libraries/python/autogen/Regenerator.py @@ -17,6 +17,7 @@ TODO: How can we check each suite id is unique, when currently the suite generation is run on each directory separately? """ +import copy import sys from glob import glob @@ -116,6 +117,39 @@ def check_suite_tag(suite_tag, prolog): raise ValueError(f"Suite tag found {found} times for {suite_tag}") +def filter_and_edit_kwargs_for_astf(suite_id, kwargs): + """Return possibly edited kwargs, or None if to be skipped. + + This is a code block used in few places. + Kwargs is (a copy of) one item from tc_kwargs_list. + Currently, the editable field is frame_size, + to be increased to for tests with data (not just CPS). + + :param suite_id: Suite ID. + :param kwargs: Key-value pairs used to construct one testcase. + :type suite_id: str + :type tc_kwargs_list: dict + :returns: Edited kwargs. + :rtype Optional[dict] + """ + if u"-cps-" in suite_id: + # Contrary to UDP, there is no place to affect frame size + # in TCP CPS tests. Actual frames are close to min size. + # UDP uses the min value too, for fairer comparison to TCP. + if kwargs[u"frame_size"] not in MIN_FRAME_SIZE_VALUES: + return None + elif (u"-pps-" in suite_id or u"-tput-" in suite_id): + if u"imix" in str(kwargs[u"frame_size"]).lower(): + # ASTF does not support IMIX (yet). + return None + if kwargs[u"frame_size"] in MIN_FRAME_SIZE_VALUES: + # Minimal (TRex) TCP data frame is 80B for IPv4. + # In future, we may want to have also IPv6 TCP. + # UDP uses the same value, for fairer comparison to TCP. + kwargs[u"frame_size"] = 100 + return kwargs + + def add_default_testcases(testcase, iface, suite_id, file_out, tc_kwargs_list): """Add default testcases to file. @@ -130,7 +164,9 @@ def add_default_testcases(testcase, iface, suite_id, file_out, tc_kwargs_list): :type file_out: file :type tc_kwargs_list: dict """ - for kwargs in tc_kwargs_list: + for kwas in tc_kwargs_list: + # We may edit framesize for ASTF, the copy should be local. + kwargs = copy.deepcopy(kwas) # TODO: Is there a better way to disable some combinations? emit = True if kwargs[u"frame_size"] == 9000: @@ -156,14 +192,8 @@ def add_default_testcases(testcase, iface, suite_id, file_out, tc_kwargs_list): emit = False if kwargs[u"frame_size"] not in MIN_FRAME_SIZE_VALUES: emit = False - if ( - u"-cps-" in suite_id - or u"-pps-" in suite_id - or u"-tput-" in suite_id - ): - if kwargs[u"frame_size"] not in MIN_FRAME_SIZE_VALUES: - emit = False - if emit: + kwargs = filter_and_edit_kwargs_for_astf(suite_id, kwargs) + if emit and kwargs is not None: file_out.write(testcase.generate(**kwargs)) @@ -207,17 +237,11 @@ def add_trex_testcases(testcase, suite_id, file_out, tc_kwargs_list): :type file_out: file :type tc_kwargs_list: dict """ - for kwargs in tc_kwargs_list: - # TODO: Is there a better way to disable some combinations? - emit = True - if ( - u"-cps-" in suite_id - or u"-pps-" in suite_id - or u"-tput-" in suite_id - ): - if kwargs[u"frame_size"] not in MIN_FRAME_SIZE_VALUES: - emit = False - if emit: + for kwas in tc_kwargs_list: + # We may edit framesize for ASTF, the copy should be local. + kwargs = copy.deepcopy(kwas) + kwargs = filter_and_edit_kwargs_for_astf(suite_id, kwargs) + if kwargs is not None: file_out.write(testcase.generate(**kwargs)) -- cgit 1.2.3-korg