From 9f0ea4e6be697aa8ca08a6da596d7ca5892d2183 Mon Sep 17 00:00:00 2001
From: Vratko Polak <vrpolak@cisco.com>
Date: Thu, 25 Mar 2021 19:26:35 +0100
Subject: STL traffic: Use the sleep+stop logic

+ PPS limit for AWS set to 1.2 Mpps.
+ The logic is very similar to that one in ASTF driver.
 + This helps for testbeds with high duration stretching (e.g. AWS).
 + Difference: No transaction scale, and we deal with floats.
+ Update loss counting to count unsent packets as lost.
 + Also count "unsent" transactions for other transaction types.
 + If nonzero, log the number of unsent packets/transactions.
+ Make STL and ASTF time overhead constant (called delay) configurable.
+ Subtract delay from approximated_duration, also for ASTF.

Change-Id: I6ee6aa6fba4f110ba1636e1b0ff76cac64383e33
Signed-off-by: Vratko Polak <vrpolak@cisco.com>
---
 resources/libraries/python/Constants.py        | 14 ++++++++++--
 resources/libraries/python/TrafficGenerator.py | 30 +++++++++++++++++++++-----
 2 files changed, 37 insertions(+), 7 deletions(-)

(limited to 'resources/libraries/python')

diff --git a/resources/libraries/python/Constants.py b/resources/libraries/python/Constants.py
index 8937e6cfc0..14fb7958b8 100644
--- a/resources/libraries/python/Constants.py
+++ b/resources/libraries/python/Constants.py
@@ -234,7 +234,7 @@ class Constants:
     # Number of trials to execute in MRR test.
     PERF_TRIAL_MULTIPLICITY = get_int_from_env(u"PERF_TRIAL_MULTIPLICITY", 10)
 
-    # Duration of one trial in MRR test.
+    # Duration [s] of one trial in MRR test.
     PERF_TRIAL_DURATION = get_float_from_env(u"PERF_TRIAL_DURATION", 1.0)
 
     # Whether to use latency streams in main search trials.
@@ -244,6 +244,16 @@ class Constants:
     PERF_TRIAL_LATENCY_DURATION = get_float_from_env(
         u"PERF_TRIAL_LATENCY_DURATION", 5.0)
 
+    # For some testbeds TG takes longer than usual to start sending traffic.
+    # This constant [s] allows longer wait, without affecting
+    # the approximate duration. For example, use 0.098 for AWS.
+    PERF_TRIAL_STL_DELAY = get_float_from_env(u"PERF_TRIAL_STL_DELAY", 0.0)
+
+    # ASTF usually needs a different value for the delay.
+    PERF_TRIAL_ASTF_DELAY = get_float_from_env(
+        u"PERF_TRIAL_ASTF_DELAY", 0.112
+    )
+
     # Extended debug (incl. vpp packet trace, linux perf stat, ...).
     # Full list is available as suite variable (__init__.robot) or is
     # override by test.
@@ -300,7 +310,7 @@ class Constants:
         # Vxlan traffic will still show stretching at 36 Mpps (>12%),
         # but we do not care about those tests that much.
         u"Mellanox-CX556A": 36000000, # 148809523,
-        u"Amazon-Nitro-50G": 1500000,
+        u"Amazon-Nitro-50G": 1200000,
         u"virtual": 14880952,
     }
 
diff --git a/resources/libraries/python/TrafficGenerator.py b/resources/libraries/python/TrafficGenerator.py
index c0d1b4ec63..e42ad48930 100644
--- a/resources/libraries/python/TrafficGenerator.py
+++ b/resources/libraries/python/TrafficGenerator.py
@@ -13,6 +13,7 @@
 
 """Performance testing traffic generator library."""
 
+import math
 import time
 
 from robot.api import logger
@@ -577,7 +578,6 @@ class TrafficGenerator(AbstractMeasurer):
             # so we can compare with what telemetry suggests
             # the real duration was.
             logger.debug(f"Expected duration {computed_duration}")
-            computed_duration += 0.1115
         if not self.duration_limit:
             return computed_duration, True
         limited_duration = min(computed_duration, self.duration_limit)
@@ -652,6 +652,9 @@ class TrafficGenerator(AbstractMeasurer):
         command_line.add_if(u"async_start", async_call)
         command_line.add_if(u"latency", self.use_latency)
         command_line.add_if(u"force", Constants.TREX_SEND_FORCE)
+        command_line.add_with_value(
+            u"delay", Constants.PERF_TRIAL_ASTF_DELAY
+        )
 
         self._start_time = time.monotonic()
         self._rate = multiplier
@@ -759,6 +762,7 @@ class TrafficGenerator(AbstractMeasurer):
         command_line.add_if(u"async_start", async_call)
         command_line.add_if(u"latency", self.use_latency)
         command_line.add_if(u"force", Constants.TREX_SEND_FORCE)
+        command_line.add_with_value(u"delay", Constants.PERF_TRIAL_STL_DELAY)
 
         # TODO: This is ugly. Handle parsing better.
         self._start_time = time.monotonic()
@@ -1244,16 +1248,27 @@ class TrafficGenerator(AbstractMeasurer):
         if not target_duration:
             target_duration = approximated_duration
         transmit_rate = self._rate
+        unsent = 0
         if self.transaction_type == u"packet":
             partial_attempt_count = self._sent
-            expected_attempt_count = self._sent
-            fail_count = self._loss
+            packet_rate = transmit_rate * self.ppta
+            # We have a float. TRex way of rounding it is not obvious.
+            # The biggest source of mismatch is Inter Stream Gap.
+            # So the code tolerates 10 usec of missing packets.
+            expected_attempt_count = (target_duration - 1e-5) * packet_rate
+            expected_attempt_count = math.ceil(expected_attempt_count)
+            # TRex can send more.
+            expected_attempt_count = max(expected_attempt_count, self._sent)
+            unsent = expected_attempt_count - self._sent
+            pass_count = self._received
+            fail_count = expected_attempt_count - pass_count
         elif self.transaction_type == u"udp_cps":
             if not self.transaction_scale:
                 raise RuntimeError(u"Add support for no-limit udp_cps.")
             partial_attempt_count = self._l7_data[u"client"][u"sent"]
             # We do not care whether TG is slow, it should have attempted all.
             expected_attempt_count = self.transaction_scale
+            unsent = expected_attempt_count - partial_attempt_count
             pass_count = self._l7_data[u"client"][u"received"]
             fail_count = expected_attempt_count - pass_count
         elif self.transaction_type == u"tcp_cps":
@@ -1263,6 +1278,7 @@ class TrafficGenerator(AbstractMeasurer):
             partial_attempt_count = ctca
             # We do not care whether TG is slow, it should have attempted all.
             expected_attempt_count = self.transaction_scale
+            unsent = expected_attempt_count - partial_attempt_count
             # From TCP point of view, server/connects counts full connections,
             # but we are testing NAT session so client/connects counts that
             # (half connections from TCP point of view).
@@ -1273,7 +1289,8 @@ class TrafficGenerator(AbstractMeasurer):
                 raise RuntimeError(u"Add support for no-limit udp_pps.")
             partial_attempt_count = self._sent
             expected_attempt_count = self.transaction_scale * self.ppta
-            fail_count = self._loss + (expected_attempt_count - self._sent)
+            unsent = expected_attempt_count - self._sent
+            fail_count = self._loss + unsent
         elif self.transaction_type == u"tcp_pps":
             if not self.transaction_scale:
                 raise RuntimeError(u"Add support for no-limit tcp_pps.")
@@ -1286,9 +1303,12 @@ class TrafficGenerator(AbstractMeasurer):
             # A simple workaround is to add absolute difference.
             # Probability of retransmissions exactly cancelling
             # packets unsent due to duration stretching is quite low.
-            fail_count = self._loss + abs(expected_attempt_count - self._sent)
+            unsent = abs(expected_attempt_count - self._sent)
+            fail_count = self._loss + unsent
         else:
             raise RuntimeError(f"Unknown parsing {self.transaction_type!r}")
+        if unsent:
+            logger.debug(f"Unsent packets/transactions: {unsent}")
         if fail_count < 0 and not self.negative_loss:
             fail_count = 0
         measurement = ReceiveRateMeasurement(
-- 
cgit