From 374954b9d648f503f6783325a1266457953a998d Mon Sep 17 00:00:00 2001
From: Tibor Frank <tifrank@cisco.com>
Date: Wed, 3 May 2023 13:53:27 +0000
Subject: C-Docs: New structure

Change-Id: I73d107f94b28b138f3350a9e1eedb0555583a9ca
Signed-off-by: Tibor Frank <tifrank@cisco.com>
---
 docs/content/methodology/_index.md                 |   4 +-
 docs/content/methodology/access_control_lists.md   |  70 ----
 .../methodology/data_plane_throughput/_index.md    |   6 -
 .../data_plane_throughput/data_plane_throughput.md | 129 ------
 .../methodology/data_plane_throughput/mlrsearch.md |  88 ----
 .../data_plane_throughput/mrr_throughput.md        |  56 ---
 .../methodology/data_plane_throughput/plrsearch.md | 383 ------------------
 .../methodology/dut_state_considerations.md        | 148 -------
 .../methodology/generic_segmentation_offload.md    | 116 ------
 docs/content/methodology/geneve.md                 |  66 ---
 .../methodology/hoststack_testing/_index.md        |   6 -
 .../hoststack_testing/quicudpip_with_vppecho.md    |  48 ---
 .../hoststack_testing/tcpip_with_iperf3.md         |  52 ---
 .../hoststack_testing/udpip_with_iperf3.md         |  44 --
 .../hoststack_testing/vsap_ab_with_nginx.md        |  39 --
 .../internet_protocol_security_ipsec.md            |  74 ----
 docs/content/methodology/measurements/_index.md    |   6 +
 .../measurements/data_plane_throughput/_index.md   |   6 +
 .../data_plane_throughput/data_plane_throughput.md | 129 ++++++
 .../data_plane_throughput/mlr_search.md            |  88 ++++
 .../measurements/data_plane_throughput/mrr.md      |  56 +++
 .../data_plane_throughput/plr_search.md            | 383 ++++++++++++++++++
 .../methodology/measurements/packet_latency.md     |  52 +++
 docs/content/methodology/measurements/telemetry.md | 158 ++++++++
 docs/content/methodology/multi_core_speedup.md     |  51 ---
 .../methodology/network_address_translation.md     | 445 ---------------------
 docs/content/methodology/overview/_index.md        |   6 +
 .../overview/dut_state_considerations.md           | 148 +++++++
 .../methodology/overview/multi_core_speedup.md     |  51 +++
 .../methodology/overview/per_thread_resources.md   | 101 +++++
 docs/content/methodology/overview/terminology.md   |  97 +++++
 .../methodology/overview/vpp_forwarding_modes.md   | 104 +++++
 docs/content/methodology/packet_flow_ordering.md   |  42 --
 docs/content/methodology/packet_latency.md         |  45 ---
 docs/content/methodology/per_patch_testing.md      | 230 +++++++++++
 docs/content/methodology/per_thread_resources.md   | 102 -----
 docs/content/methodology/reconfiguration_tests.md  |  68 ----
 .../methodology/root_cause_analysis/_index.md      |   6 -
 .../perpatch_performance_tests.md                  | 228 -----------
 docs/content/methodology/suite_generation.md       | 124 ------
 docs/content/methodology/telemetry.md              | 167 --------
 docs/content/methodology/terminology.md            |  82 ----
 docs/content/methodology/test/_index.md            |   6 +
 .../methodology/test/access_control_lists.md       |  66 +++
 .../test/generic_segmentation_offload.md           | 117 ++++++
 docs/content/methodology/test/hoststack/_index.md  |   6 +
 .../test/hoststack/quicudpip_with_vppecho.md       |  48 +++
 .../test/hoststack/tcpip_with_iperf3.md            |  52 +++
 .../test/hoststack/udpip_with_iperf3.md            |  44 ++
 .../test/hoststack/vsap_ab_with_nginx.md           |  39 ++
 .../methodology/test/internet_protocol_security.md |  73 ++++
 .../test/network_address_translation.md            | 445 +++++++++++++++++++++
 .../methodology/test/packet_flow_ordering.md       |  42 ++
 docs/content/methodology/test/reconfiguration.md   |  68 ++++
 .../methodology/test/tunnel_encapsulations.md      |  87 ++++
 docs/content/methodology/test/vpp_device.md        |  15 +
 docs/content/methodology/trending/_index.md        |  12 +
 docs/content/methodology/trending/analysis.md      | 224 +++++++++++
 docs/content/methodology/trending/presentation.md  |  34 ++
 .../methodology/trending_methodology/_index.md     |   6 -
 .../methodology/trending_methodology/overview.md   |  10 -
 .../trending_methodology/trend_analysis.md         | 224 -----------
 .../trending_methodology/trend_presentation.md     |  36 --
 docs/content/methodology/trex_traffic_generator.md | 195 ---------
 docs/content/methodology/tunnel_encapsulations.md  |  41 --
 docs/content/methodology/vpp_device_functional.md  |  15 -
 docs/content/methodology/vpp_forwarding_modes.md   | 104 -----
 docs/content/methodology/vpp_startup_settings.md   |  44 --
 68 files changed, 2995 insertions(+), 3362 deletions(-)
 delete mode 100644 docs/content/methodology/access_control_lists.md
 delete mode 100644 docs/content/methodology/data_plane_throughput/_index.md
 delete mode 100644 docs/content/methodology/data_plane_throughput/data_plane_throughput.md
 delete mode 100644 docs/content/methodology/data_plane_throughput/mlrsearch.md
 delete mode 100644 docs/content/methodology/data_plane_throughput/mrr_throughput.md
 delete mode 100644 docs/content/methodology/data_plane_throughput/plrsearch.md
 delete mode 100644 docs/content/methodology/dut_state_considerations.md
 delete mode 100644 docs/content/methodology/generic_segmentation_offload.md
 delete mode 100644 docs/content/methodology/geneve.md
 delete mode 100644 docs/content/methodology/hoststack_testing/_index.md
 delete mode 100644 docs/content/methodology/hoststack_testing/quicudpip_with_vppecho.md
 delete mode 100644 docs/content/methodology/hoststack_testing/tcpip_with_iperf3.md
 delete mode 100644 docs/content/methodology/hoststack_testing/udpip_with_iperf3.md
 delete mode 100644 docs/content/methodology/hoststack_testing/vsap_ab_with_nginx.md
 delete mode 100644 docs/content/methodology/internet_protocol_security_ipsec.md
 create mode 100644 docs/content/methodology/measurements/_index.md
 create mode 100644 docs/content/methodology/measurements/data_plane_throughput/_index.md
 create mode 100644 docs/content/methodology/measurements/data_plane_throughput/data_plane_throughput.md
 create mode 100644 docs/content/methodology/measurements/data_plane_throughput/mlr_search.md
 create mode 100644 docs/content/methodology/measurements/data_plane_throughput/mrr.md
 create mode 100644 docs/content/methodology/measurements/data_plane_throughput/plr_search.md
 create mode 100644 docs/content/methodology/measurements/packet_latency.md
 create mode 100644 docs/content/methodology/measurements/telemetry.md
 delete mode 100644 docs/content/methodology/multi_core_speedup.md
 delete mode 100644 docs/content/methodology/network_address_translation.md
 create mode 100644 docs/content/methodology/overview/_index.md
 create mode 100644 docs/content/methodology/overview/dut_state_considerations.md
 create mode 100644 docs/content/methodology/overview/multi_core_speedup.md
 create mode 100644 docs/content/methodology/overview/per_thread_resources.md
 create mode 100644 docs/content/methodology/overview/terminology.md
 create mode 100644 docs/content/methodology/overview/vpp_forwarding_modes.md
 delete mode 100644 docs/content/methodology/packet_flow_ordering.md
 delete mode 100644 docs/content/methodology/packet_latency.md
 create mode 100644 docs/content/methodology/per_patch_testing.md
 delete mode 100644 docs/content/methodology/per_thread_resources.md
 delete mode 100644 docs/content/methodology/reconfiguration_tests.md
 delete mode 100644 docs/content/methodology/root_cause_analysis/_index.md
 delete mode 100644 docs/content/methodology/root_cause_analysis/perpatch_performance_tests.md
 delete mode 100644 docs/content/methodology/suite_generation.md
 delete mode 100644 docs/content/methodology/telemetry.md
 delete mode 100644 docs/content/methodology/terminology.md
 create mode 100644 docs/content/methodology/test/_index.md
 create mode 100644 docs/content/methodology/test/access_control_lists.md
 create mode 100644 docs/content/methodology/test/generic_segmentation_offload.md
 create mode 100644 docs/content/methodology/test/hoststack/_index.md
 create mode 100644 docs/content/methodology/test/hoststack/quicudpip_with_vppecho.md
 create mode 100644 docs/content/methodology/test/hoststack/tcpip_with_iperf3.md
 create mode 100644 docs/content/methodology/test/hoststack/udpip_with_iperf3.md
 create mode 100644 docs/content/methodology/test/hoststack/vsap_ab_with_nginx.md
 create mode 100644 docs/content/methodology/test/internet_protocol_security.md
 create mode 100644 docs/content/methodology/test/network_address_translation.md
 create mode 100644 docs/content/methodology/test/packet_flow_ordering.md
 create mode 100644 docs/content/methodology/test/reconfiguration.md
 create mode 100644 docs/content/methodology/test/tunnel_encapsulations.md
 create mode 100644 docs/content/methodology/test/vpp_device.md
 create mode 100644 docs/content/methodology/trending/_index.md
 create mode 100644 docs/content/methodology/trending/analysis.md
 create mode 100644 docs/content/methodology/trending/presentation.md
 delete mode 100644 docs/content/methodology/trending_methodology/_index.md
 delete mode 100644 docs/content/methodology/trending_methodology/overview.md
 delete mode 100644 docs/content/methodology/trending_methodology/trend_analysis.md
 delete mode 100644 docs/content/methodology/trending_methodology/trend_presentation.md
 delete mode 100644 docs/content/methodology/trex_traffic_generator.md
 delete mode 100644 docs/content/methodology/tunnel_encapsulations.md
 delete mode 100644 docs/content/methodology/vpp_device_functional.md
 delete mode 100644 docs/content/methodology/vpp_forwarding_modes.md
 delete mode 100644 docs/content/methodology/vpp_startup_settings.md

(limited to 'docs/content/methodology')

diff --git a/docs/content/methodology/_index.md b/docs/content/methodology/_index.md
index 6f0dcae783..dbef64db94 100644
--- a/docs/content/methodology/_index.md
+++ b/docs/content/methodology/_index.md
@@ -1,6 +1,6 @@
 ---
-bookCollapseSection: true
+bookCollapseSection: false
 bookFlatSection: true
 title: "Methodology"
 weight: 2
----
\ No newline at end of file
+---
diff --git a/docs/content/methodology/access_control_lists.md b/docs/content/methodology/access_control_lists.md
deleted file mode 100644
index 9767d3f86a..0000000000
--- a/docs/content/methodology/access_control_lists.md
+++ /dev/null
@@ -1,70 +0,0 @@
----
-title: "Access Control Lists"
-weight: 12
----
-
-# Access Control Lists
-
-VPP is tested in a number of data plane feature configurations across
-different forwarding modes. Following sections list features tested.
-
-## ACL Security-Groups
-
-Both stateless and stateful access control lists (ACL), also known as
-security-groups, are supported by VPP.
-
-Following ACL configurations are tested for MAC switching with L2
-bridge-domains:
-
-- *l2bdbasemaclrn-iacl{E}sl-{F}flows*: Input stateless ACL, with {E}
-  entries and {F} flows.
-- *l2bdbasemaclrn-oacl{E}sl-{F}flows*: Output stateless ACL, with {E}
-  entries and {F} flows.
-- *l2bdbasemaclrn-iacl{E}sf-{F}flows*: Input stateful ACL, with {E}
-  entries and {F} flows.
-- *l2bdbasemaclrn-oacl{E}sf-{F}flows*: Output stateful ACL, with {E}
-  entries and {F} flows.
-
-Following ACL configurations are tested with IPv4 routing:
-
-- *ip4base-iacl{E}sl-{F}flows*: Input stateless ACL, with {E} entries
-  and {F} flows.
-- *ip4base-oacl{E}sl-{F}flows*: Output stateless ACL, with {E} entries
-  and {F} flows.
-- *ip4base-iacl{E}sf-{F}flows*: Input stateful ACL, with {E} entries and
-  {F} flows.
-- *ip4base-oacl{E}sf-{F}flows*: Output stateful ACL, with {E} entries
-  and {F} flows.
-
-ACL tests are executed with the following combinations of ACL entries
-and number of flows:
-
-- ACL entry definitions
-
-  - flow non-matching deny entry: (src-ip4, dst-ip4, src-port, dst-port).
-  - flow matching permit ACL entry: (src-ip4, dst-ip4).
-
-- {E} - number of non-matching deny ACL entries, {E} = [1, 10, 50].
-- {F} - number of UDP flows with different tuple (src-ip4, dst-ip4,
-  src-port, dst-port), {F} = [100, 10k, 100k].
-- All {E}x{F} combinations are tested per ACL type, total of 9.
-
-## ACL MAC-IP
-
-MAC-IP binding ACLs are tested for MAC switching with L2 bridge-domains:
-
-- *l2bdbasemaclrn-macip-iacl{E}sl-{F}flows*: Input stateless ACL, with
-  {E} entries and {F} flows.
-
-MAC-IP ACL tests are executed with the following combinations of ACL
-entries and number of flows:
-
-- ACL entry definitions
-
-  - flow non-matching deny entry: (dst-ip4, dst-mac, bit-mask)
-  - flow matching permit ACL entry: (dst-ip4, dst-mac, bit-mask)
-
-- {E} - number of non-matching deny ACL entries, {E} = [1, 10, 50]
-- {F} - number of UDP flows with different tuple (dst-ip4, dst-mac),
-  {F} = [100, 10k, 100k]
-- All {E}x{F} combinations are tested per ACL type, total of 9.
diff --git a/docs/content/methodology/data_plane_throughput/_index.md b/docs/content/methodology/data_plane_throughput/_index.md
deleted file mode 100644
index 5791438b3b..0000000000
--- a/docs/content/methodology/data_plane_throughput/_index.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-bookCollapseSection: true
-bookFlatSection: false
-title: "Data Plane Throughput"
-weight: 4
----
\ No newline at end of file
diff --git a/docs/content/methodology/data_plane_throughput/data_plane_throughput.md b/docs/content/methodology/data_plane_throughput/data_plane_throughput.md
deleted file mode 100644
index 7ff1d38d17..0000000000
--- a/docs/content/methodology/data_plane_throughput/data_plane_throughput.md
+++ /dev/null
@@ -1,129 +0,0 @@
----
-title: "Data Plane Throughput"
-weight: 1
----
-
-# Data Plane Throughput
-
-Network data plane throughput is measured using multiple test methods in
-order to obtain representative and repeatable results across the large
-set of performance test cases implemented and executed within CSIT.
-
-Following throughput test methods are used:
-
-- MLRsearch - Multiple Loss Ratio search
-- MRR - Maximum Receive Rate
-- PLRsearch - Probabilistic Loss Ratio search
-
-Description of each test method is followed by generic test properties
-shared by all methods.
-
-## MLRsearch Tests
-
-### Description
-
-Multiple Loss Ratio search (MLRsearch) tests discover multiple packet
-throughput rates in a single search, reducing the overall test execution
-time compared to a binary search. Each rate is associated with a
-distinct Packet Loss Ratio (PLR) criteria. In FD.io CSIT two throughput
-rates are discovered: Non-Drop Rate (NDR, with zero packet loss, PLR=0)
-and Partial Drop Rate (PDR, with PLR<0.5%). MLRsearch is compliant with
-RFC2544.
-
-### Usage
-
-MLRsearch tests are run to discover NDR and PDR rates for each VPP and
-DPDK release covered by CSIT report. Results for small frame sizes
-(64b/78B, IMIX) are presented in packet throughput graphs
-(Box-and-Whisker Plots) with NDR and PDR rates plotted against the test
-cases covering popular VPP packet paths.
-
-Each test is executed at least 10 times to verify measurements
-repeatability and results are compared between releases and test
-environments. NDR and PDR packet and bandwidth throughput results for
-all frame sizes and for all tests are presented in detailed results
-tables.
-
-### Details
-
-See [MLRSearch]({{< ref "mlrsearch/#MLRsearch" >}}) section for more detail.
-MLRsearch is being standardized in IETF in
-[draft-ietf-bmwg-mlrsearch](https://datatracker.ietf.org/doc/html/draft-ietf-bmwg-mlrsearch-01).
-
-## MRR Tests
-
-### Description
-
-Maximum Receive Rate (MRR) tests are complementary to MLRsearch tests,
-as they provide a maximum “raw” throughput benchmark for development and
-testing community.
-
-MRR tests measure the packet forwarding rate under the maximum load
-offered by traffic generator (dependent on link type and NIC model) over
-a set trial duration, regardless of packet loss. Maximum load for
-specified Ethernet frame size is set to the bi-directional link rate.
-
-### Usage
-
-MRR tests are much faster than MLRsearch as they rely on a single trial
-or a small set of trials with very short duration. It is this property
-that makes them suitable for continuous execution in daily performance
-trending jobs enabling detection of performance anomalies (regressions,
-progressions) resulting from data plane code changes.
-
-MRR tests are also used for VPP per patch performance jobs verifying
-patch performance vs parent. CSIT reports include MRR throughput
-comparisons between releases and test environments. Small frame sizes
-only (64b/78B, IMIX).
-
-### Details
-
-See [MRR Throughput]({{< ref "mrr_throughput/#MRR Throughput" >}})
-section for more detail about MRR tests configuration.
-
-FD.io CSIT performance dashboard includes complete description of
-[daily performance trending tests](https://s3-docs.fd.io/csit/master/trending/methodology/performance_tests.html)
-and [VPP per patch tests](https://s3-docs.fd.io/csit/master/trending/methodology/perpatch_performance_tests.html).
-
-## PLRsearch Tests
-
-### Description
-
-Probabilistic Loss Ratio search (PLRsearch) tests discovers a packet
-throughput rate associated with configured Packet Loss Ratio (PLR)
-criteria for tests run over an extended period of time a.k.a. soak
-testing. PLRsearch assumes that system under test is probabilistic in
-nature, and not deterministic.
-
-### Usage
-
-PLRsearch are run to discover a sustained throughput for PLR=10^-7
-(close to NDR) for VPP release covered by CSIT report. Results for small
-frame sizes (64b/78B) are presented in packet throughput graphs (Box
-Plots) for a small subset of baseline tests.
-
-Each soak test lasts 30 minutes and is executed at least twice. Results are
-compared against NDR and PDR rates discovered with MLRsearch.
-
-### Details
-
-See [PLRSearch]({{< ref "plrsearch/#PLRsearch" >}}) methodology section for
-more detail. PLRsearch is being standardized in IETF in
-[draft-vpolak-bmwg-plrsearch](https://tools.ietf.org/html/draft-vpolak-bmwg-plrsearch).
-
-## Generic Test Properties
-
-All data plane throughput test methodologies share following generic
-properties:
-
-- Tested L2 frame sizes (untagged Ethernet):
-
-  - IPv4 payload: 64B, IMIX (28x64B, 16x570B, 4x1518B), 1518B, 9000B.
-  - IPv6 payload: 78B, IMIX (28x78B, 16x570B, 4x1518B), 1518B, 9000B.
-  - All quoted sizes include frame CRC, but exclude per frame
-    transmission overhead of 20B (preamble, inter frame gap).
-
-- Offered packet load is always bi-directional and symmetric.
-- All measured and reported packet and bandwidth rates are aggregate
-  bi-directional rates reported from external Traffic Generator
-  perspective.
\ No newline at end of file
diff --git a/docs/content/methodology/data_plane_throughput/mlrsearch.md b/docs/content/methodology/data_plane_throughput/mlrsearch.md
deleted file mode 100644
index 73039c9b02..0000000000
--- a/docs/content/methodology/data_plane_throughput/mlrsearch.md
+++ /dev/null
@@ -1,88 +0,0 @@
----
-title: "MLRsearch"
-weight: 2
----
-
-# MLRsearch
-
-## Overview
-
-Multiple Loss Ratio search (MLRsearch) tests use an optimized search algorithm
-implemented in FD.io CSIT project. MLRsearch discovers any number of
-loss ratio loads in a single search.
-
-Two loss ratio goals are of interest in FD.io CSIT, leading to Non-Drop Rate
-(NDR, loss ratio goal is exact zero) and Partial Drop Rate
-(PDR, non-zero loss ratio goal, currently 0.5%).
-
-MLRsearch discovers all the loads in a single pass, reducing required time
-duration compared to separate `binary search`es[^1] for each rate. Overall
-search time is reduced even further by relying on shorter trial
-durations of intermediate steps, with only the final measurements
-conducted at the specified final trial duration. This results in the
-shorter overall execution time when compared to standard NDR/PDR binary
-search, while guaranteeing similar results.
-
-.. Note:: All throughput rates are *always* bi-directional
-   aggregates of two equal (symmetric) uni-directional packet rates
-   received and reported by an external traffic generator,
-   unless the test specifically requires unidirectional traffic.
-
-## Search Implementation
-
-Detailed description of the MLRsearch algorithm is included in the IETF
-draft
-[draft-ietf-bmwg-mlrsearch-02](https://datatracker.ietf.org/doc/html/draft-ietf-bmwg-mlrsearch-02)
-that is in the process of being standardized in the IETF Benchmarking
-Methodology Working Group (BMWG).
-(Newer version is published in IETF, describing improvements not yet used
-in CSIT production.)
-
-MLRsearch is also available as a
-[PyPI (Python Package Index) library](https://pypi.org/project/MLRsearch/).
-
-## Algorithm highlights
-
-MRR and receive rate at MRR load are used as initial guesses for the search.
-
-All previously measured trials (except the very first one which can act
-as a warm-up) are taken into consideration, unless superseded
-by a trial at the same load but higher duration.
-
-For every loss ratio goal, tightest upper and lower bound
-(from results of large enough trial duration) form an interval.
-Exit condition is given by that interval reaching low enough relative width.
-Small enough width is achieved by bisecting the current interval.
-The bisection can be uneven, to save measurements based on information theory.
-
-Switching to higher trial duration generally requires a re-measure
-at a load from previous trial duration.
-When the re-measurement does not confirm previous bound classification
-(e.g. tightest lower bound at shorter trial duration becomes
-a newest tightest upper bound upon re-measurement),
-external search is used to find close enough bound of the lost type.
-External search is a generalization of the first stage of
-`exponential search`[^2].
-
-Shorter trial durations use double width goal,
-because one bisection is always safe before risking external search.
-
-Within an iteration for a specific trial duration, smaller loss ratios (NDR)
-are narrowed down first before search continues with higher loss ratios (PDR).
-
-Other heuristics are there, aimed to prevent unneccessarily narrow intervals,
-and to handle corner cases around min and max load.
-
-## Deviations from RFC 2544
-
-CSIT does not have any explicit wait times before and after trial traffic.
-
-Small differences between intended and offered load are tolerated,
-mainly due to various time overheads preventing precise measurement
-of the traffic duration (and TRex can sometimes suffer from duration
-stretching).
-
-The final trial duration is only 30s (10s for reconf tests).
-
-[^1]: [binary search](https://en.wikipedia.org/wiki/Binary_search)
-[^2]: [exponential search](https://en.wikipedia.org/wiki/Exponential_search)
diff --git a/docs/content/methodology/data_plane_throughput/mrr_throughput.md b/docs/content/methodology/data_plane_throughput/mrr_throughput.md
deleted file mode 100644
index 076946fb66..0000000000
--- a/docs/content/methodology/data_plane_throughput/mrr_throughput.md
+++ /dev/null
@@ -1,56 +0,0 @@
----
-title: "MRR Throughput"
-weight: 4
----
-
-# MRR Throughput
-
-Maximum Receive Rate (MRR) tests are complementary to MLRsearch tests,
-as they provide a maximum "raw" throughput benchmark for development and
-testing community. MRR tests measure the packet forwarding rate under
-the maximum load offered by traffic generator over a set trial duration,
-regardless of packet loss.
-
-MRR tests are currently used for following test jobs:
-
-- Report performance comparison: 64B, IMIX for vhost, memif.
-- Daily performance trending: 64B, IMIX for vhost, memif.
-- Per-patch performance verification: 64B.
-- Initial iterations of MLRsearch and PLRsearch: 64B.
-
-Maximum offered load for specific L2 Ethernet frame size is set to
-either the maximum bi-directional link rate or tested NIC model
-capacity, as follows:
-
-- For 10GE NICs the maximum packet rate load is 2x14.88 Mpps for 64B, a
-  10GE bi-directional link rate.
-- For 25GE NICs the maximum packet rate load is 2x18.75 Mpps for 64B, a
-  25GE bi-directional link sub-rate limited by 25GE NIC used on TRex TG,
-  XXV710.
-- For 40GE NICs the maximum packet rate load is 2x18.75 Mpps for 64B, a
-  40GE bi-directional link sub-rate limited by 40GE NIC used on TRex
-  TG,XL710. Packet rate for other tested frame sizes is limited by
-  PCIeGen3 x8 bandwidth limitation of ~50Gbps.
-
-MRR test code implements multiple bursts of offered packet load and has
-two configurable burst parameters: individual trial duration and number
-of trials in a single burst. This enables more precise performance
-trending by providing more results data for analysis.
-
-Burst parameter settings vary between different tests using MRR:
-
-- MRR individual trial duration:
-
-  - Report performance comparison: 1 sec.
-  - Daily performance trending: 1 sec.
-  - Per-patch performance verification: 10 sec.
-  - Initial iteration for MLRsearch: 1 sec.
-  - Initial iteration for PLRsearch: 5.2 sec.
-
-- Number of MRR trials per burst:
-
-  - Report performance comparison: 10.
-  - Daily performance trending: 10.
-  - Per-patch performance verification: 5.
-  - Initial iteration for MLRsearch: 1.
-  - Initial iteration for PLRsearch: 1.
\ No newline at end of file
diff --git a/docs/content/methodology/data_plane_throughput/plrsearch.md b/docs/content/methodology/data_plane_throughput/plrsearch.md
deleted file mode 100644
index 1facccc63b..0000000000
--- a/docs/content/methodology/data_plane_throughput/plrsearch.md
+++ /dev/null
@@ -1,383 +0,0 @@
----
-title: "PLRsearch"
-weight: 3
----
-
-# PLRsearch
-
-## Motivation for PLRsearch
-
-Network providers are interested in throughput a system can sustain.
-
-`RFC 2544`[^3] assumes loss ratio is given by a deterministic function of
-offered load. But NFV software systems are not deterministic enough.
-This makes deterministic algorithms (such as `binary search`[^9] per RFC 2544
-and MLRsearch with single trial) to return results,
-which when repeated show relatively high standard deviation,
-thus making it harder to tell what "the throughput" actually is.
-
-We need another algorithm, which takes this indeterminism into account.
-
-## Generic Algorithm
-
-Detailed description of the PLRsearch algorithm is included in the IETF
-draft `draft-vpolak-bmwg-plrsearch-02`[^1] that is in the process
-of being standardized in the IETF Benchmarking Methodology Working Group (BMWG).
-
-### Terms
-
-The rest of this page assumes the reader is familiar with the following terms
-defined in the IETF draft:
-
-+ Trial Order Independent System
-+ Duration Independent System
-+ Target Loss Ratio
-+ Critical Load
-+ Offered Load regions
-
-  + Zero Loss Region
-  + Non-Deterministic Region
-  + Guaranteed Loss Region
-
-+ Fitting Function
-
-  + Stretch Function
-  + Erf Function
-
-+ Bayesian Inference
-
-  + Prior distribution
-  + Posterior Distribution
-
-+ Numeric Integration
-
-  + Monte Carlo
-  + Importance Sampling
-
-## FD.io CSIT Implementation Specifics
-
-The search receives min_rate and max_rate values, to avoid measurements
-at offered loads not supporeted by the traffic generator.
-
-The implemented tests cases use bidirectional traffic.
-The algorithm stores each rate as bidirectional rate (internally,
-the algorithm is agnostic to flows and directions,
-it only cares about aggregate counts of packets sent and packets lost),
-but debug output from traffic generator lists unidirectional values.
-
-### Measurement Delay
-
-In a sample implemenation in FD.io CSIT project, there is roughly 0.5
-second delay between trials due to restrictons imposed by packet traffic
-generator in use (T-Rex).
-
-As measurements results come in, posterior distribution computation takes
-more time (per sample), although there is a considerable constant part
-(mostly for inverting the fitting functions).
-
-Also, the integrator needs a fair amount of samples to reach the region
-the posterior distribution is concentrated at.
-
-And of course, the speed of the integrator depends on computing power
-of the CPU the algorithm is able to use.
-
-All those timing related effects are addressed by arithmetically increasing
-trial durations with configurable coefficients
-(currently 5.1 seconds for the first trial,
-each subsequent trial being 0.1 second longer).
-
-### Rounding Errors and Underflows
-
-In order to avoid them, the current implementation tracks natural logarithm
-(instead of the original quantity) for any quantity which is never negative.
-Logarithm of zero is minus infinity (not supported by Python),
-so special value "None" is used instead.
-Specific functions for frequent operations (such as "logarithm
-of sum of exponentials") are defined to handle None correctly.
-
-### Fitting Functions
-
-Current implementation uses two fitting functions, called "stretch" and "erf".
-In general, their estimates for critical rate differ,
-which adds a simple source of systematic error,
-on top of randomness error reported by integrator.
-Otherwise the reported stdev of critical rate estimate
-is unrealistically low.
-
-Both functions are not only increasing, but also convex
-(meaning the rate of increase is also increasing).
-
-Both fitting functions have several mathematically equivalent formulas,
-each can lead to an arithmetic overflow or underflow in different sub-terms.
-Overflows can be eliminated by using different exact formulas
-for different argument ranges.
-Underflows can be avoided by using approximate formulas
-in affected argument ranges, such ranges have their own formulas to compute.
-At the end, both fitting function implementations
-contain multiple "if" branches, discontinuities are a possibility
-at range boundaries.
-
-### Prior Distributions
-
-The numeric integrator expects all the parameters to be distributed
-(independently and) uniformly on an interval (-1, 1).
-
-As both "mrr" and "spread" parameters are positive and not dimensionless,
-a transformation is needed. Dimentionality is inherited from max_rate value.
-
-The "mrr" parameter follows a `Lomax distribution`[^4]
-with alpha equal to one, but shifted so that mrr is always greater than 1
-packet per second.
-
-The "stretch" parameter is generated simply as the "mrr" value
-raised to a random power between zero and one;
-thus it follows a `reciprocal distribution`[^5].
-
-### Integrator
-
-After few measurements, the posterior distribution of fitting function
-arguments gets quite concentrated into a small area.
-The integrator is using `Monte Carlo`[^6] with `importance sampling`[^7]
-where the biased distribution is `bivariate Gaussian`[^8] distribution,
-with deliberately larger variance.
-If the generated sample falls outside (-1, 1) interval,
-another sample is generated.
-
-The center and the covariance matrix for the biased distribution
-is based on the first and second moments of samples seen so far
-(within the computation). The center is used directly,
-covariance matrix is scaled up by a heurictic constant (8.0 by default).
-The following additional features are applied
-designed to avoid hyper-focused distributions.
-
-Each computation starts with the biased distribution inherited
-from the previous computation (zero point and unit covariance matrix
-is used in the first computation), but the overal weight of the data
-is set to the weight of the first sample of the computation.
-Also, the center is set to the first sample point.
-When additional samples come, their weight (including the importance correction)
-is compared to sum of the weights of data seen so far (within the iteration).
-If the new sample is more than one e-fold more impactful, both weight values
-(for data so far and for the new sample) are set to (geometric) average
-of the two weights.
-
-This combination showed the best behavior, as the integrator usually follows
-two phases. First phase (where inherited biased distribution
-or single big sample are dominating) is mainly important
-for locating the new area the posterior distribution is concentrated at.
-The second phase (dominated by whole sample population)
-is actually relevant for the critical rate estimation.
-
-### Offered Load Selection
-
-First two measurements are hardcoded to happen at the middle of rate interval
-and at max_rate. Next two measurements follow MRR-like logic,
-offered load is decreased so that it would reach target loss ratio
-if offered load decrease lead to equal decrease of loss rate.
-
-The rest of measurements start directly in between
-erf and stretch estimate average.
-There is one workaround implemented, aimed at reducing the number of consequent
-zero loss measurements (per fitting function). The workaround first stores
-every measurement result which loss ratio was the targed loss ratio or higher.
-Sorted list (called lossy loads) of such results is maintained.
-
-When a sequence of one or more zero loss measurement results is encountered,
-a smallest of lossy loads is drained from the list.
-If the estimate average is smaller than the drained value,
-a weighted average of this estimate and the drained value is used
-as the next offered load. The weight of the estimate decreases exponentially
-with the length of consecutive zero loss results.
-
-This behavior helps the algorithm with convergence speed,
-as it does not need so many zero loss result to get near critical region.
-Using the smallest (not drained yet) of lossy loads makes it sure
-the new offered load is unlikely to result in big loss region.
-Draining even if the estimate is large enough helps to discard
-early measurements when loss hapened at too low offered load.
-Current implementation adds 4 copies of lossy loads and drains 3 of them,
-which leads to fairly stable behavior even for somewhat inconsistent SUTs.
-
-### Caveats
-
-As high loss count measurements add many bits of information,
-they need a large amount of small loss count measurements to balance them,
-making the algorithm converge quite slowly. Typically, this happens
-when few initial measurements suggest spread way bigger then later measurements.
-The workaround in offered load selection helps,
-but more intelligent workarounds could get faster convergence still.
-
-Some systems evidently do not follow the assumption of repeated measurements
-having the same average loss rate (when the offered load is the same).
-The idea of estimating the trend is not implemented at all,
-as the observed trends have varied characteristics.
-
-Probably, using a more realistic fitting functions
-will give better estimates than trend analysis.
-
-## Bottom Line
-
-The notion of Throughput is easy to grasp, but it is harder to measure
-with any accuracy for non-deterministic systems.
-
-Even though the notion of critical rate is harder to grasp than the notion
-of throughput, it is easier to measure using probabilistic methods.
-
-In testing, the difference between througput measurements and critical
-rate measurements is usually small.
-
-In pactice, rules of thumb such as "send at max 95% of purported throughput"
-are common. The correct benchmarking analysis should ask "Which notion is
-95% of throughput an approximation to?" before attempting to answer
-"Is 95% of critical rate safe enough?".
-
-## Algorithmic Analysis
-
-### Motivation
-
-While the estimation computation is based on hard probability science;
-the offered load selection part of PLRsearch logic is pure heuristics,
-motivated by what would a human do based on measurement and computation results.
-
-The quality of any heuristic is not affected by soundness of its motivation,
-just by its ability to achieve the intended goals.
-In case of offered load selection, the goal is to help the search to converge
-to the long duration estimates sooner.
-
-But even those long duration estimates could still be of poor quality.
-Even though the estimate computation is Bayesian (so it is the best it could be
-within the applied assumptions), it can still of poor quality when compared
-to what a human would estimate.
-
-One possible source of poor quality is the randomnes inherently present
-in Monte Carlo numeric integration, but that can be supressed
-by tweaking the time related input parameters.
-
-The most likely source of poor quality then are the assumptions.
-Most importantly, the number and the shape of fitting functions;
-but also others, such as trial order independence and duration independence.
-
-The result can have poor quality in basically two ways.
-One way is related to location. Both upper and lower bounds
-can be overestimates or underestimates, meaning the entire estimated interval
-between lower bound and upper bound lays above or below (respectively)
-of human-estimated interval.
-The other way is related to the estimation interval width.
-The interval can be too wide or too narrow, compared to human estimation.
-
-An estimate from a particular fitting function can be classified
-as an overestimate (or underestimate) just by looking at time evolution
-(without human examining measurement results). Overestimates
-decrease by time, underestimates increase by time (assuming
-the system performance stays constant).
-
-Quality of the width of the estimation interval needs human evaluation,
-and is unrelated to both rate of narrowing (both good and bad estimate intervals
-get narrower at approximately the same relative rate) and relatative width
-(depends heavily on the system being tested).
-
-### Graphical Examples
-
-The following pictures show the upper (red) and lower (blue) bound,
-as well as average of Stretch (pink) and Erf (light green) estimate,
-and offered load chosen (grey), as computed by PLRsearch,
-after each trial measurement within the 30 minute duration of a test run.
-
-Both graphs are focusing on later estimates. Estimates computed from
-few initial measurements are wildly off the y-axis range shown.
-
-The following analysis will rely on frequency of zero loss measurements
-and magnitude of loss ratio if nonzero.
-
-The offered load selection strategy used implies zero loss measurements
-can be gleaned from the graph by looking at offered load points.
-When the points move up farther from lower estimate, it means
-the previous measurement had zero loss. After non-zero loss,
-the offered load starts again right between (the previous values of)
-the estimate curves.
-
-The very big loss ratio results are visible as noticeable jumps
-of both estimates downwards. Medium and small loss ratios are much harder
-to distinguish just by looking at the estimate curves,
-the analysis is based on raw loss ratio measurement results.
-
-The following descriptions should explain why the graphs seem to signal
-low quality estimate at first sight, but a more detailed look
-reveals the quality is good (considering the measurement results).
-
-#### L2 patch
-
-Both fitting functions give similar estimates, the graph shows
-"stochasticity" of measurements (estimates increase and decrease
-within small time regions), and an overall trend of decreasing estimates.
-
-On the first look, the final interval looks fairly narrow,
-especially compared to the region the estimates have travelled
-during the search. But the look at the frequency of zero loss results shows
-this is not a case of overestimation. Measurements at around the same
-offered load have higher probability of zero loss earlier
-(when performed farther from upper bound), but smaller probability later
-(when performed closer to upper bound). That means it is the performance
-of the system under test that decreases (slightly) over time.
-
-With that in mind, the apparent narrowness of the interval
-is not a sign of low quality, just a consequence of PLRsearch assuming
-the performance stays constant.
-
-{{< figure src="/cdocs/PLR_patch.svg" >}}
-
-#### Vhost
-
-This test case shows what looks like a quite broad estimation interval,
-compared to other test cases with similarly looking zero loss frequencies.
-Notable features are infrequent high-loss measurement results
-causing big drops of estimates, and lack of long-term convergence.
-
-Any convergence in medium-sized intervals (during zero loss results)
-is reverted by the big loss results, as they happen quite far
-from the critical load estimates, and the two fitting functions
-extrapolate differently.
-
-In other words, human only seeing estimates from one fitting function
-would expect narrower end interval, but human seeing the measured loss ratios
-agrees that the interval should be wider than that.
-
-{{< figure src="/cdocs/PLR_vhost.svg" >}}
-
-#### Summary
-
-The two graphs show the behavior of PLRsearch algorithm applied to soaking test
-when some of PLRsearch assumptions do not hold:
-
-+ L2 patch measurement results violate the assumption
-  of performance not changing over time.
-+ Vhost measurement results violate the assumption
-  of Poisson distribution matching the loss counts.
-
-The reported upper and lower bounds can have distance larger or smaller
-than a first look by a human would expect, but a more closer look reveals
-the quality is good, considering the circumstances.
-
-The usefullness of the critical load estimate is of questionable value
-when the assumptions are violated.
-
-Some improvements can be made via more specific workarounds,
-for example long term limit of L2 patch performance could be estmated
-by some heuristic.
-
-Other improvements can be achieved only by asking users
-whether loss patterns matter. Is it better to have single digit losses
-distributed fairly evenly over time (as Poisson distribution would suggest),
-or is it better to have short periods of medium losses
-mixed with long periods of zero losses (as happens in Vhost test)
-with the same overall loss ratio?
-
-[^1]: [draft-vpolak-bmwg-plrsearch-02](https://tools.ietf.org/html/draft-vpolak-bmwg-plrsearch-02)
-[^2]: [plrsearch draft](https://tools.ietf.org/html/draft-vpolak-bmwg-plrsearch-00)
-[^3]: [RFC 2544](https://tools.ietf.org/html/rfc2544)
-[^4]: [Lomax distribution](https://en.wikipedia.org/wiki/Lomax_distribution)
-[^5]: [reciprocal distribution](https://en.wikipedia.org/wiki/Reciprocal_distribution)
-[^6]: [Monte Carlo](https://en.wikipedia.org/wiki/Monte_Carlo_integration)
-[^7]: [importance sampling](https://en.wikipedia.org/wiki/Importance_sampling)
-[^8]: [bivariate Gaussian](https://en.wikipedia.org/wiki/Multivariate_normal_distribution)
-[^9]: [binary search](https://en.wikipedia.org/wiki/Binary_search_algorithm)
\ No newline at end of file
diff --git a/docs/content/methodology/dut_state_considerations.md b/docs/content/methodology/dut_state_considerations.md
deleted file mode 100644
index 55e408f5f2..0000000000
--- a/docs/content/methodology/dut_state_considerations.md
+++ /dev/null
@@ -1,148 +0,0 @@
----
-title: "DUT state considerations"
-weight: 6
----
-
-# DUT state considerations
-
-This page discusses considerations for Device Under Test (DUT) state.
-DUTs such as VPP require configuration, to be provided before the aplication
-starts (via config files) or just after it starts (via API or CLI access).
-
-During operation DUTs gather various telemetry data, depending on configuration.
-This internal state handling is part of normal operation,
-so any performance impact is included in the test results.
-Accessing telemetry data is additional load on DUT,
-so we are not doing that in main trial measurements that affect results,
-but we include separate trials specifically for gathering runtime telemetry.
-
-But there is one kind of state that needs specific handling.
-This kind of DUT state is dynamically created based on incoming traffic,
-it affects how DUT handles the traffic, and (unlike telemetry counters)
-it has uneven impact on CPU load.
-Typical example is NAT, where detecting new sessions takes more CPU than
-forwarding packet on existing (open or recently closed) sessions.
-We call DUT configurations with this kind of state "stateful",
-and configurations without them "stateless".
-(Even though stateless configurations contain state described in previous
-paragraphs, and some configuration items may have "stateful" in their name,
-such as stateful ACLs.)
-
-# Stateful DUT configurations
-
-Typically, the level of CPU impact of traffic depends on DUT state.
-The first packets causing DUT state to change have higher impact,
-subsequent packets matching that state have lower impact.
-
-From performance point of view, this is similar to traffic phases
-for stateful protocols, see
-[NGFW draft](https://tools.ietf.org/html/draft-ietf-bmwg-ngfw-performance-05#section-4.3.4).
-In CSIT we borrow the terminology (even if it does not fit perfectly,
-see discussion below). Ramp-up traffic causes the state change,
-sustain traffic does not change the state.
-
-As the performance is different, each test has to choose which traffic
-it wants to test, and manipulate the DUT state to achieve the intended impact.
-
-## Ramp-up trial
-
-Tests aiming at sustain performance need to make sure DUT state is created.
-We achieve this via a ramp-up trial, specific purpose of which
-is to create the state.
-
-Subsequent trials need no specific handling, as long as the state
-remains the same. But some state can time-out, so additional ramp-up
-trials are inserted whenever the code detects the state can time-out.
-Note that a trial with zero loss refreshes the state,
-so only the time since the last non-zero loss trial is tracked.
-
-For the state to be set completely, it is important both DUT and TG
-do not lose any packets. We achieve this by setting the profile multiplier
-(TPS from now on) to low enough value.
-
-It is also important each state-affecting packet is sent.
-For size-limited traffic profile it is guaranteed by the size limit.
-For continuous traffic, we set a long enough duration (based on TPS).
-
-At the end of the ramp-up trial, we check DUT state to confirm
-it has been created as expected.
-Test fails if the state is not (completely) created.
-
-## State Reset
-
-Tests aiming at ramp-up performance do not use ramp-up trial,
-and they need to reset the DUT state before each trial measurement.
-The way of resetting the state depends on test,
-usually an API call is used to partially de-configure
-the part that holds the state, and then re-configure it back.
-
-In CSIT we control the DUT state behavior via a test variable "resetter".
-If it is not set, DUT state is not reset.
-If it is set, each search algorithm (including MRR) will invoke it
-before all trial measurements (both main and telemetry ones).
-Any configuration keyword enabling a feature with DUT state
-will check whether a test variable for ramp-up rate is present.
-If it is present, resetter is not set.
-If it is not present, the keyword sets the apropriate resetter value.
-This logic makes sure either ramp-up or state reset are used.
-
-Notes: If both ramp-up and state reset were used, the DUT behavior
-would be identical to just reset, while test would take longer to execute.
-If neither were used, DUT will show different performance in subsequent trials,
-violating assumptions of search algorithms.
-
-## DUT versus protocol ramp-up
-
-There are at least three different causes for bandwidth possibly increasing
-within a single measurement trial.
-
-The first is DUT switching from state modification phase to constant phase,
-it is the primary focus of this document.
-Using ramp-up traffic before main trials eliminates this cause
-for tests wishing to measure the performance of the next phase.
-Using size-limited profiles eliminates the next phase
-for tests wishing to measure performance of this phase.
-
-The second is protocol such as TCP ramping up their throughput to utilize
-the bandwidth available. This is the original meaning of "ramp up"
-in the NGFW draft (see above).
-In existing tests we are not using this meaning of TCP ramp-up.
-Instead we use only small transactions, and large enough initial window
-so TCP acts as ramped-up already.
-
-The third is TCP increasing offered load due to retransmissions triggered by
-packet loss. In CSIT we again try to avoid this behavior
-by using small enough data to transfer, so overlap of multiple transactions
-(primary cause of packet loss) is unlikely.
-But in MRR tests, packet loss and non-constant offered load are still expected.
-
-# Stateless DUT configuratons
-
-These are simple configurations, which do not set any resetter value
-(even if ramp-up duration is not configured).
-Majority of existing tests are of this type, using continuous traffic profiles.
-
-In order to identify limits of Trex performance,
-we have added suites with stateless DUT configuration (VPP ip4base)
-subjected to size-limited ASTF traffic.
-The discovered rates serve as a basis of comparison
-for evaluating the results for stateful DUT configurations (VPP NAT44ed)
-subjected to the same traffic profiles.
-
-# DUT versus TG state
-
-Traffic Generator profiles can be stateful (ASTF) or stateless (STL).
-DUT configuration can be stateful or stateless (with respect to packet traffic).
-
-In CSIT we currently use all four possible configurations:
-
-- Regular stateless VPP tests use stateless traffic profiles.
-
-- Stateless VPP configuration with stateful profile is used as a base for
-  comparison.
-
-- Some stateful DUT configurations (NAT44DET, NAT44ED unidirectional)
-  are tested using stateless traffic profiles and continuous traffic.
-
-- The rest of stateful DUT configurations (NAT44ED bidirectional)
-  are tested using stateful traffic profiles and size limited traffic.
diff --git a/docs/content/methodology/generic_segmentation_offload.md b/docs/content/methodology/generic_segmentation_offload.md
deleted file mode 100644
index ddb19ba826..0000000000
--- a/docs/content/methodology/generic_segmentation_offload.md
+++ /dev/null
@@ -1,116 +0,0 @@
----
-title: "Generic Segmentation Offload"
-weight: 15
----
-
-# Generic Segmentation Offload
-
-## Overview
-
-Generic Segmentation Offload (GSO) reduces per-packet processing
-overhead by enabling applications  to pass a multi-packet buffer to
-(v)NIC and process a smaller number of large packets (e.g. frame size of
-64 KB), instead of processing higher numbers of small packets (e.g.
-frame size of 1500 B), thus reducing per-packet overhead.
-
-GSO tests for VPP vhostuser and tapv2 interfaces. All tests cases use iPerf3
-client and server applications running TCP/IP as a traffic generator. For
-performance comparison the same tests are run without GSO enabled.
-
-## GSO Test Topologies
-
-Two VPP GSO test topologies are implemented:
-
-1. iPerfC_GSOvirtio_LinuxVM --- GSOvhost_VPP_GSOvhost --- iPerfS_GSOvirtio_LinuxVM
-
-   - Tests VPP GSO on vhostuser interfaces and interaction with Linux
-     virtio with GSO enabled.
-
-2. iPerfC_GSOtap_LinuxNspace --- GSOtapv2_VPP_GSOtapv2 --- iPerfS_GSOtap_LinuxNspace
-
-   - Tests VPP GSO on tapv2 interfaces and interaction with Linux tap
-     with GSO enabled.
-
-Common configuration:
-
-- iPerfC (client) and iPerfS (server) run in TCP/IP mode without upper
-  bandwidth limit.
-- Trial duration is set to 30 sec.
-- iPerfC, iPerfS and VPP run in the single SUT node.
-
-
-## VPP GSOtap Topology
-
-### VPP Configuration
-
-VPP GSOtap tests are executed without using hyperthreading. VPP worker runs on
-a single core. Multi-core tests are not executed. Each interface belongs to
-separate namespace. Following core pinning scheme is used:
-
-- 1t1c (rxq=1, rx_qsz=4096, tx_qsz=4096)
-  - system isolated: 0,28,56,84
-  - vpp mt:  1
-  - vpp wt:  2
-  - vhost:   3-5
-  - iperf-s: 6
-  - iperf-c: 7
-
-### iPerf3 Server Configuration
-
-iPerf3 version used 3.7
-
-    $ sudo -E -S ip netns exec tap1_namespace iperf3 \
-        --server --daemon --pidfile /tmp/iperf3_server.pid --logfile /tmp/iperf3.log --port 5201 --affinity <X>
-
-For the full iPerf3 reference please see:
-[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst).
-
-
-### iPerf3 Client Configuration
-
-iPerf3 version used 3.7
-
-    $ sudo -E -S ip netns exec tap1_namespace iperf3 \
-        --client 2.2.2.2 --bind 1.1.1.1 --port 5201 --parallel <Y> --time 30.0 --affinity <X> --zerocopy
-
-For the full iPerf3 reference please see:
-[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst).
-
-
-## VPP GSOvhost Topology
-
-### VPP Configuration
-
-VPP GSOvhost tests are executed without using hyperthreading. VPP worker runs
-on a single core. Multi-core tests are not executed. Following core pinning
-scheme is used:
-
-- 1t1c (rxq=1, rx_qsz=1024, tx_qsz=1024)
-  - system isolated: 0,28,56,84
-  - vpp mt:  1
-  - vpp wt:  2
-  - vm-iperf-s: 3,4,5,6,7
-  - vm-iperf-c: 8,9,10,11,12
-  - iperf-s: 1
-  - iperf-c: 1
-
-###  iPerf3 Server Configuration
-
-iPerf3 version used 3.7
-
-    $ sudo iperf3 \
-        --server --daemon --pidfile /tmp/iperf3_server.pid --logfile /tmp/iperf3.log --port 5201 --affinity X
-
-For the full iPerf3 reference please see:
-[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst).
-
-
-### iPerf3 Client Configuration
-
-iPerf3 version used 3.7
-
-    $ sudo iperf3 \
-        --client 2.2.2.2 --bind 1.1.1.1 --port 5201 --parallel <Y> --time 30.0 --affinity X --zerocopy
-
-For the full iPerf3 reference please see:
-[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst).
\ No newline at end of file
diff --git a/docs/content/methodology/geneve.md b/docs/content/methodology/geneve.md
deleted file mode 100644
index f4a0af92e7..0000000000
--- a/docs/content/methodology/geneve.md
+++ /dev/null
@@ -1,66 +0,0 @@
----
-title: "GENEVE"
-weight: 21
----
-
-# GENEVE
-
-## GENEVE Prefix Bindings
-
-GENEVE prefix bindings should be representative to target applications, where
-a packet flows of particular set of IPv4 addresses (L3 underlay network) is
-routed via dedicated GENEVE interface by building an L2 overlay.
-
-Private address ranges to be used in tests:
-
-- East hosts ip address range: 10.0.1.0 - 10.127.255.255 (10.0/9 prefix)
-
-  - Total of 2^23 - 256 (8 388 352) of usable IPv4 addresses
-  - Usable in tests for up to 32 767 GENEVE tunnels (IPv4 underlay networks)
-
-- West hosts ip address range: 10.128.1.0 - 10.255.255.255 (10.128/9 prefix)
-
-  - Total of 2^23 - 256 (8 388 352) of usable IPv4 addresses
-  - Usable in tests for up to 32 767 GENEVE tunnels (IPv4 underlay networks)
-
-## GENEVE Tunnel Scale
-
-If N is a number of GENEVE tunnels (and IPv4 underlay networks) then TG sends
-256 packet flows in every of N different sets:
-
-- i = 1,2,3, ... N - GENEVE tunnel index
-
-- East-West direction: GENEVE encapsulated packets
-
-  - Outer IP header:
-
-    - src ip: 1.1.1.1
-
-    - dst ip: 1.1.1.2
-
-  - GENEVE header:
-
-    - vni: i
-
-  - Inner IP header:
-
-    - src_ip_range(i) = 10.(0 + rounddown(i/255)).(modulo(i/255)).(0-to-255)
-
-    - dst_ip_range(i) = 10.(128 + rounddown(i/255)).(modulo(i/255)).(0-to-255)
-
-- West-East direction: non-encapsulated packets
-
-  - IP header:
-
-    - src_ip_range(i) = 10.(128 + rounddown(i/255)).(modulo(i/255)).(0-to-255)
-
-    - dst_ip_range(i) = 10.(0 + rounddown(i/255)).(modulo(i/255)).(0-to-255)
-
- **geneve-tunnels** | **total-flows**
--------------------:|----------------:
- 1                  | 256
- 4                  | 1 024
- 16                 | 4 096
- 64                 | 16 384
- 256                | 65 536
- 1 024              | 262 144
\ No newline at end of file
diff --git a/docs/content/methodology/hoststack_testing/_index.md b/docs/content/methodology/hoststack_testing/_index.md
deleted file mode 100644
index b658313040..0000000000
--- a/docs/content/methodology/hoststack_testing/_index.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-bookCollapseSection: true
-bookFlatSection: false
-title: "Hoststack Testing"
-weight: 14
----
\ No newline at end of file
diff --git a/docs/content/methodology/hoststack_testing/quicudpip_with_vppecho.md b/docs/content/methodology/hoststack_testing/quicudpip_with_vppecho.md
deleted file mode 100644
index c7d57a51b3..0000000000
--- a/docs/content/methodology/hoststack_testing/quicudpip_with_vppecho.md
+++ /dev/null
@@ -1,48 +0,0 @@
----
-title: "QUIC/UDP/IP with vpp_echo"
-weight: 1
----
-
-# QUIC/UDP/IP with vpp_echo
-
-[vpp_echo performance testing tool](https://wiki.fd.io/view/VPP/HostStack#External_Echo_Server.2FClient_.28vpp_echo.29)
-is a bespoke performance test application which utilizes the 'native
-HostStack APIs' to verify performance and correct handling of
-connection/stream events with uni-directional and bi-directional
-streams of data.
-
-Because iperf3 does not support the QUIC transport protocol, vpp_echo
-is used for measuring the maximum attainable goodput of the VPP Host
-Stack connection utilizing the QUIC transport protocol across two
-instances of VPP running on separate DUT nodes. The QUIC transport
-protocol supports multiple streams per connection and test cases
-utilize different combinations of QUIC connections and number of
-streams per connection.
-
-The test configuration is as follows:
-
-            DUT1               Network                DUT2
-    [ vpp_echo-client -> VPP1 ]=======[ VPP2 -> vpp_echo-server]
-                          N-streams/connection
-
-where,
-
-1. vpp_echo server attaches to VPP2 and LISTENs on VPP2:TCP port 1234.
-2. vpp_echo client creates one or more connections to VPP1 and opens
-   one or more stream per connection to VPP2:TCP port 1234.
-3. vpp_echo client transmits a uni-directional stream as fast as the
-   VPP Host Stack allows to the vpp_echo server for the test duration.
-4. At the end of the test the vpp_echo client emits the goodput
-   measurements for all streams and the sum of all streams.
-
-Test cases include
-
-1. 1 QUIC Connection with 1 Stream
-2. 1 QUIC connection with 10 Streams
-3. 10 QUIC connetions with 1 Stream
-4. 10 QUIC connections with 10 Streams
-
-with stream sizes to provide reasonable test durations. The VPP Host
-Stack QUIC transport is configured to utilize the picotls encryption
-library. In the future, tests utilizing addtional encryption
-algorithms will be added.
diff --git a/docs/content/methodology/hoststack_testing/tcpip_with_iperf3.md b/docs/content/methodology/hoststack_testing/tcpip_with_iperf3.md
deleted file mode 100644
index 7baa88ab50..0000000000
--- a/docs/content/methodology/hoststack_testing/tcpip_with_iperf3.md
+++ /dev/null
@@ -1,52 +0,0 @@
----
-title: "TCP/IP with iperf3"
-weight: 2
----
-
-# TCP/IP with iperf3
-
-[iperf3 goodput measurement tool](https://github.com/esnet/iperf)
-is used for measuring the maximum attainable goodput of the VPP Host
-Stack connection across two instances of VPP running on separate DUT
-nodes. iperf3 is a popular open source tool for active measurements
-of the maximum achievable goodput on IP networks.
-
-Because iperf3 utilizes the POSIX socket interface APIs, the current
-test configuration utilizes the LD_PRELOAD mechanism in the linux
-kernel to connect iperf3 to the VPP Host Stack using the VPP
-Communications Library (VCL) LD_PRELOAD library (libvcl_ldpreload.so).
-
-In the future, a forked version of iperf3 which has been modified to
-directly use the VCL application APIs may be added to determine the
-difference in performance of 'VCL Native' applications versus utilizing
-LD_PRELOAD which inherently has more overhead and other limitations.
-
-The test configuration is as follows:
-
-           DUT1              Network               DUT2
-    [ iperf3-client -> VPP1 ]=======[ VPP2 -> iperf3-server]
-
-where,
-
-1. iperf3 server attaches to VPP2 and LISTENs on VPP2:TCP port 5201.
-2. iperf3 client attaches to VPP1 and opens one or more stream
-   connections to VPP2:TCP port 5201.
-3. iperf3 client transmits a uni-directional stream as fast as the
-   VPP Host Stack allows to the iperf3 server for the test duration.
-4. At the end of the test the iperf3 client emits the goodput
-   measurements for all streams and the sum of all streams.
-
-Test cases include 1 and 10 Streams with a 20 second test duration
-with the VPP Host Stack configured to utilize the Cubic TCP
-congestion algorithm.
-
-Note: iperf3 is single threaded, so it is expected that the 10 stream
-test shows little or no performance improvement due to
-multi-thread/multi-core execution.
-
-There are also variations of these test cases which use the VPP Network
-Simulator (NSIM) plugin to test the VPP Hoststack goodput with 1 percent
-of the traffic being dropped at the output interface of VPP1 thereby
-simulating a lossy network. The NSIM tests are experimental and the
-test results are not currently representative of typical results in a
-lossy network.
diff --git a/docs/content/methodology/hoststack_testing/udpip_with_iperf3.md b/docs/content/methodology/hoststack_testing/udpip_with_iperf3.md
deleted file mode 100644
index 01ddf61269..0000000000
--- a/docs/content/methodology/hoststack_testing/udpip_with_iperf3.md
+++ /dev/null
@@ -1,44 +0,0 @@
----
-title: "UDP/IP with iperf3"
-weight: 3
----
-
-# UDP/IP with iperf3
-
-[iperf3 goodput measurement tool](https://github.com/esnet/iperf)
-is used for measuring the maximum attainable goodput of the VPP Host
-Stack connection across two instances of VPP running on separate DUT
-nodes. iperf3 is a popular open source tool for active measurements
-of the maximum achievable goodput on IP networks.
-
-Because iperf3 utilizes the POSIX socket interface APIs, the current
-test configuration utilizes the LD_PRELOAD mechanism in the linux
-kernel to connect iperf3 to the VPP Host Stack using the VPP
-Communications Library (VCL) LD_PRELOAD library (libvcl_ldpreload.so).
-
-In the future, a forked version of iperf3 which has been modified to
-directly use the VCL application APIs may be added to determine the
-difference in performance of 'VCL Native' applications versus utilizing
-LD_PRELOAD which inherently has more overhead and other limitations.
-
-The test configuration is as follows:
-
-           DUT1              Network               DUT2
-    [ iperf3-client -> VPP1 ]=======[ VPP2 -> iperf3-server]
-
-where,
-
-1. iperf3 server attaches to VPP2 and LISTENs on VPP2:UDP port 5201.
-2. iperf3 client attaches to VPP1 and transmits one or more streams
-   of packets to VPP2:UDP port 5201.
-3. iperf3 client transmits a uni-directional stream as fast as the
-   VPP Host Stack allows to the iperf3 server for the test duration.
-4. At the end of the test the iperf3 client emits the goodput
-   measurements for all streams and the sum of all streams.
-
-Test cases include 1 and 10 Streams with a 20 second test duration
-with the VPP Host Stack using the UDP transport layer..
-
-Note: iperf3 is single threaded, so it is expected that the 10 stream
-test shows little or no performance improvement due to
-multi-thread/multi-core execution.
diff --git a/docs/content/methodology/hoststack_testing/vsap_ab_with_nginx.md b/docs/content/methodology/hoststack_testing/vsap_ab_with_nginx.md
deleted file mode 100644
index 2dc4d2b7f9..0000000000
--- a/docs/content/methodology/hoststack_testing/vsap_ab_with_nginx.md
+++ /dev/null
@@ -1,39 +0,0 @@
----
-title: "VSAP ab with nginx"
-weight: 4
----
-
-# VSAP ab with nginx
-
-[VSAP (VPP Stack Acceleration Project)](https://wiki.fd.io/view/VSAP)
-aims to establish an industry user space application ecosystem based on
-the VPP hoststack.  As a pre-requisite to adapting open source applications
-using VPP Communications Library to accelerate performance, the VSAP team
-has introduced baseline tests utilizing the LD_PRELOAD mechanism to capture
-baseline performance data.
-
-[AB (Apache HTTP server benchmarking tool)](https://httpd.apache.org/docs/2.4/programs/ab.html)
-is used for measuring the maximum connections-per-second and requests-per-second.
-
-[NGINX](https://www.nginx.com) is a popular open source HTTP server
-application.  Because NGINX utilizes the POSIX socket interface APIs, the test
-configuration uses the LD_PRELOAD mechanism to connect NGINX to the VPP
-Hoststack using the VPP Communications Library (VCL) LD_PRELOAD library
-(libvcl_ldpreload.so).
-
-In the future, a version of NGINX which has been modified to
-directly use the VCL application APIs will be added to determine the
-difference in performance of 'VCL Native' applications versus utilizing
-LD_PRELOAD which inherently has more overhead and other limitations.
-
-The test configuration is as follows:
-
-           TG     Network         DUT
-         [ AB ]=============[ VPP -> nginx ]
-
-where,
-
-1. nginx attaches to VPP and listens on TCP port 80
-2. ab runs CPS and RPS tests with packets flowing from the Test Generator node,
-   across 100G NICs, through VPP hoststack to NGINX.
-3. At the end of the tests, the results are reported by AB.
diff --git a/docs/content/methodology/internet_protocol_security_ipsec.md b/docs/content/methodology/internet_protocol_security_ipsec.md
deleted file mode 100644
index 711004f2c0..0000000000
--- a/docs/content/methodology/internet_protocol_security_ipsec.md
+++ /dev/null
@@ -1,74 +0,0 @@
----
-title: "Internet Protocol Security (IPsec)"
-weight: 11
----
-
-# Internet Protocol Security (IPsec)
-
-VPP IPsec performance tests are executed for the following crypto
-plugins:
-
-- `crypto_native`, used for software based crypto leveraging CPU
-  platform optimizations e.g. Intel's AES-NI instruction set.
-- `crypto_ipsecmb`, used for hardware based crypto with Intel QAT PCIe
-  cards.
-
-## IPsec with VPP Native SW Crypto
-
-CSIT implements following IPsec test cases relying on VPP native crypto
-(`crypto_native` plugin):
-
- **VPP Crypto Engine** | **ESP Encryption** | **ESP Integrity** | **Scale Tested**
-----------------------:|-------------------:|------------------:|-----------------:
- crypto_native         | AES[128\|256]-GCM  | GCM               | 1 to 60k tunnels
- crypto_native         | AES128-CBC         | SHA[256\|512]     | 1 to 60k tunnels
-
-VPP IPsec with SW crypto are executed in both tunnel and policy modes,
-with tests running on 3-node testbeds: 3n-icx, 3n-tsh.
-
-## IPsec with Intel QAT HW
-
-CSIT implements following IPsec test cases relying on ipsecmb library
-(`crypto_ipsecmb` plugin) and Intel QAT 8950 (50G HW crypto card):
-
-dpdk_cryptodev
-
- **VPP Crypto Engine** | **VPP Crypto Workers** | **ESP Encryption** | **ESP Integrity** | **Scale Tested**
-----------------------:|-----------------------:|-------------------:|------------------:|-----------------:
- crypto_ipsecmb        | sync/all workers       | AES[128\|256]-GCM  | GCM               | 1, 1k tunnels
- crypto_ipsecmb        | sync/all workers       | AES[128]-CBC       | SHA[256\|512]     | 1, 1k tunnels
- crypto_ipsecmb        | async/crypto worker    | AES[128\|256]-GCM  | GCM               | 1, 4, 1k tunnels
- crypto_ipsecmb        | async/crypto worker    | AES[128]-CBC       | SHA[256\|512]     | 1, 4, 1k tunnels
-
-## IPsec with Async Crypto Feature Workers
-
-*TODO Description to be added*
-
-## IPsec Uni-Directional Tests with VPP Native SW Crypto
-
-CSIT implements following IPsec uni-directional test cases relying on VPP native
-crypto (`crypto_native` plugin) in tunnel mode:
-
- **VPP Crypto Engine** | **ESP Encryption** | **ESP Integrity** | **Scale Tested**
-----------------------:|-------------------:|------------------:|-------------------:
- crypto_native         | AES[128\|256]-GCM  | GCM               | 4, 1k, 10k tunnels
- crypto_native         | AES128-CBC         | SHA[512]          | 4, 1k, 10k tunnels
-
-In policy mode:
-
- **VPP Crypto Engine** | **ESP Encryption** | **ESP Integrity** | **Scale Tested**
-----------------------:|-------------------:|------------------:|------------------:
- crypto_native         | AES[256]-GCM       | GCM               | 1, 40, 1k tunnels
-
-The tests are running on 2-node testbeds: 2n-tx2. The uni-directional tests
-are partially addressing a weakness in 2-node testbed setups with T-Rex as
-the traffic generator. With just one DUT node, we can either encrypt or decrypt
-traffic in each direction.
-
-The testcases are only doing encryption - packets are encrypted on the DUT and
-then arrive at TG where no additional packet processing is needed (just
-counting packets).
-
-Decryption would require that the traffic generator generated encrypted packets
-which the DUT then would decrypt. However, T-Rex does not have the capability
-to encrypt packets.
diff --git a/docs/content/methodology/measurements/_index.md b/docs/content/methodology/measurements/_index.md
new file mode 100644
index 0000000000..9e9232969e
--- /dev/null
+++ b/docs/content/methodology/measurements/_index.md
@@ -0,0 +1,6 @@
+---
+bookCollapseSection: true
+bookFlatSection: false
+title: "Measurements"
+weight: 2
+---
diff --git a/docs/content/methodology/measurements/data_plane_throughput/_index.md b/docs/content/methodology/measurements/data_plane_throughput/_index.md
new file mode 100644
index 0000000000..8fc7f66f3e
--- /dev/null
+++ b/docs/content/methodology/measurements/data_plane_throughput/_index.md
@@ -0,0 +1,6 @@
+---
+bookCollapseSection: true
+bookFlatSection: false
+title: "Data Plane Throughput"
+weight: 1
+---
\ No newline at end of file
diff --git a/docs/content/methodology/measurements/data_plane_throughput/data_plane_throughput.md b/docs/content/methodology/measurements/data_plane_throughput/data_plane_throughput.md
new file mode 100644
index 0000000000..865405ba2f
--- /dev/null
+++ b/docs/content/methodology/measurements/data_plane_throughput/data_plane_throughput.md
@@ -0,0 +1,129 @@
+---
+title: "Overview"
+weight: 1
+---
+
+# Data Plane Throughput
+
+Network data plane throughput is measured using multiple test methods in
+order to obtain representative and repeatable results across the large
+set of performance test cases implemented and executed within CSIT.
+
+Following throughput test methods are used:
+
+- MLRsearch - Multiple Loss Ratio search
+- PLRsearch - Probabilistic Loss Ratio search
+- MRR - Maximum Receive Rate
+
+Description of each test method is followed by generic test properties
+shared by all methods.
+
+## MLRsearch Tests
+
+### Description
+
+Multiple Loss Ratio search (MLRsearch) tests discover multiple packet
+throughput rates in a single search, reducing the overall test execution
+time compared to a binary search. Each rate is associated with a
+distinct Packet Loss Ratio (PLR) criteria. In FD.io CSIT two throughput
+rates are discovered: Non-Drop Rate (NDR, with zero packet loss, PLR=0)
+and Partial Drop Rate (PDR, with PLR<0.5%). MLRsearch is compliant with
+RFC2544.
+
+### Usage
+
+MLRsearch tests are run to discover NDR and PDR rates for each VPP and
+DPDK release covered by CSIT report. Results for small frame sizes
+(64B/78B, IMIX) are presented in packet throughput graphs
+(Box-and-Whisker Plots) with NDR and PDR rates plotted against the test
+cases covering popular VPP packet paths.
+
+Each test is executed at least 10 times to verify measurements
+repeatability and results are compared between releases and test
+environments. NDR and PDR packet and bandwidth throughput results for
+all frame sizes and for all tests are presented in detailed results
+tables.
+
+### Details
+
+See [MLRSearch]({{< ref "mlr_search/#MLRsearch" >}}) section for more detail.
+MLRsearch is being standardized in IETF in
+[draft-ietf-bmwg-mlrsearch](https://datatracker.ietf.org/doc/html/draft-ietf-bmwg-mlrsearch-01).
+
+## PLRsearch Tests
+
+### Description
+
+Probabilistic Loss Ratio search (PLRsearch) tests discovers a packet
+throughput rate associated with configured Packet Loss Ratio (PLR)
+criteria for tests run over an extended period of time a.k.a. soak
+testing. PLRsearch assumes that system under test is probabilistic in
+nature, and not deterministic.
+
+### Usage
+
+PLRsearch are run to discover a sustained throughput for PLR=10^-7^
+(close to NDR) for VPP release covered by CSIT report. Results for small
+frame sizes (64B/78B) are presented in packet throughput graphs (Box
+Plots) for a small subset of baseline tests.
+
+Each soak test lasts 30 minutes and is executed at least twice. Results are
+compared against NDR and PDR rates discovered with MLRsearch.
+
+### Details
+
+See [PLRSearch]({{< ref "plr_search/#PLRsearch" >}}) methodology section for
+more detail. PLRsearch is being standardized in IETF in
+[draft-vpolak-bmwg-plrsearch](https://tools.ietf.org/html/draft-vpolak-bmwg-plrsearch).
+
+## MRR Tests
+
+### Description
+
+Maximum Receive Rate (MRR) tests are complementary to MLRsearch tests,
+as they provide a maximum “raw” throughput benchmark for development and
+testing community.
+
+MRR tests measure the packet forwarding rate under the maximum load
+offered by traffic generator (dependent on link type and NIC model) over
+a set trial duration, regardless of packet loss. Maximum load for
+specified Ethernet frame size is set to the bi-directional link rate.
+
+### Usage
+
+MRR tests are much faster than MLRsearch as they rely on a single trial
+or a small set of trials with very short duration. It is this property
+that makes them suitable for continuous execution in daily performance
+trending jobs enabling detection of performance anomalies (regressions,
+progressions) resulting from data plane code changes.
+
+MRR tests are also used for VPP per patch performance jobs verifying
+patch performance vs parent. CSIT reports include MRR throughput
+comparisons between releases and test environments. Small frame sizes
+only (64B/78B, IMIX).
+
+### Details
+
+See [MRR Throughput]({{< ref "mrr/#MRR" >}})
+section for more detail about MRR tests configuration.
+
+FD.io CSIT performance dashboard includes complete description of
+[daily performance trending tests]({{< ref "../../trending/analysis" >}})
+and [VPP per patch tests]({{< ref "../../per_patch_testing.md" >}}).
+
+## Generic Test Properties
+
+All data plane throughput test methodologies share following generic
+properties:
+
+- Tested L2 frame sizes (untagged Ethernet):
+
+  - IPv4 payload: 64B, IMIX (28x64B, 16x570B, 4x1518B), 1518B, 9000B.
+  - IPv6 payload: 78B, IMIX (28x78B, 16x570B, 4x1518B), 1518B, 9000B.
+  - All quoted sizes include frame CRC, but exclude per frame
+    transmission overhead of 20B (preamble, inter frame gap).
+
+- Offered packet load is always bi-directional and symmetric.
+- All measured and reported packet and bandwidth rates are aggregate
+  bi-directional rates reported from external Traffic Generator
+  perspective.
diff --git a/docs/content/methodology/measurements/data_plane_throughput/mlr_search.md b/docs/content/methodology/measurements/data_plane_throughput/mlr_search.md
new file mode 100644
index 0000000000..93bdb51efe
--- /dev/null
+++ b/docs/content/methodology/measurements/data_plane_throughput/mlr_search.md
@@ -0,0 +1,88 @@
+---
+title: "MLR Search"
+weight: 2
+---
+
+# MLR Search
+
+## Overview
+
+Multiple Loss Ratio search (MLRsearch) tests use an optimized search algorithm
+implemented in FD.io CSIT project. MLRsearch discovers any number of
+loss ratio loads in a single search.
+
+Two loss ratio goals are of interest in FD.io CSIT, leading to Non-Drop Rate
+(NDR, loss ratio goal is exact zero) and Partial Drop Rate
+(PDR, non-zero loss ratio goal, currently 0.5%).
+
+MLRsearch discovers all the loads in a single pass, reducing required time
+duration compared to separate `binary search`es[^1] for each rate. Overall
+search time is reduced even further by relying on shorter trial
+durations of intermediate steps, with only the final measurements
+conducted at the specified final trial duration. This results in the
+shorter overall execution time when compared to standard NDR/PDR binary
+search, while guaranteeing similar results.
+
+    Note: All throughput rates are *always* bi-directional aggregates of two
+    equal (symmetric) uni-directional packet rates received and reported by an
+    external traffic generator, unless the test specifically requires
+    unidirectional traffic.
+
+## Search Implementation
+
+Detailed description of the MLRsearch algorithm is included in the IETF
+draft
+[draft-ietf-bmwg-mlrsearch-02](https://datatracker.ietf.org/doc/html/draft-ietf-bmwg-mlrsearch-02)
+that is in the process of being standardized in the IETF Benchmarking
+Methodology Working Group (BMWG).
+(Newer version is published in IETF, describing improvements not yet used
+in CSIT production.)
+
+MLRsearch is also available as a
+[PyPI (Python Package Index) library](https://pypi.org/project/MLRsearch/).
+
+## Algorithm highlights
+
+MRR and receive rate at MRR load are used as initial guesses for the search.
+
+All previously measured trials (except the very first one which can act
+as a warm-up) are taken into consideration, unless superseded
+by a trial at the same load but higher duration.
+
+For every loss ratio goal, tightest upper and lower bound
+(from results of large enough trial duration) form an interval.
+Exit condition is given by that interval reaching low enough relative width.
+Small enough width is achieved by bisecting the current interval.
+The bisection can be uneven, to save measurements based on information theory.
+
+Switching to higher trial duration generally requires a re-measure
+at a load from previous trial duration.
+When the re-measurement does not confirm previous bound classification
+(e.g. tightest lower bound at shorter trial duration becomes
+a newest tightest upper bound upon re-measurement),
+external search is used to find close enough bound of the lost type.
+External search is a generalization of the first stage of
+`exponential search`[^2].
+
+Shorter trial durations use double width goal,
+because one bisection is always safe before risking external search.
+
+Within an iteration for a specific trial duration, smaller loss ratios (NDR)
+are narrowed down first before search continues with higher loss ratios (PDR).
+
+Other heuristics are there, aimed to prevent unneccessarily narrow intervals,
+and to handle corner cases around min and max load.
+
+## Deviations from RFC 2544
+
+CSIT does not have any explicit wait times before and after trial traffic.
+
+Small differences between intended and offered load are tolerated,
+mainly due to various time overheads preventing precise measurement
+of the traffic duration (and TRex can sometimes suffer from duration
+stretching).
+
+The final trial duration is only 30s (10s for reconf tests).
+
+[^1]: [binary search](https://en.wikipedia.org/wiki/Binary_search)
+[^2]: [exponential search](https://en.wikipedia.org/wiki/Exponential_search)
diff --git a/docs/content/methodology/measurements/data_plane_throughput/mrr.md b/docs/content/methodology/measurements/data_plane_throughput/mrr.md
new file mode 100644
index 0000000000..e8c3e62eb6
--- /dev/null
+++ b/docs/content/methodology/measurements/data_plane_throughput/mrr.md
@@ -0,0 +1,56 @@
+---
+title: "MRR"
+weight: 4
+---
+
+# MRR
+
+Maximum Receive Rate (MRR) tests are complementary to MLRsearch tests,
+as they provide a maximum "raw" throughput benchmark for development and
+testing community. MRR tests measure the packet forwarding rate under
+the maximum load offered by traffic generator over a set trial duration,
+regardless of packet loss.
+
+MRR tests are currently used for following test jobs:
+
+- Report performance comparison: 64B, IMIX for vhost, memif.
+- Daily performance trending: 64B, IMIX for vhost, memif.
+- Per-patch performance verification: 64B.
+- Initial iterations of MLRsearch and PLRsearch: 64B.
+
+Maximum offered load for specific L2 Ethernet frame size is set to
+either the maximum bi-directional link rate or tested NIC model
+capacity, as follows:
+
+- For 10GE NICs the maximum packet rate load is 2x14.88 Mpps for 64B, a
+  10GE bi-directional link rate.
+- For 25GE NICs the maximum packet rate load is 2x18.75 Mpps for 64B, a
+  25GE bi-directional link sub-rate limited by 25GE NIC used on TRex TG,
+  XXV710.
+- For 40GE NICs the maximum packet rate load is 2x18.75 Mpps for 64B, a
+  40GE bi-directional link sub-rate limited by 40GE NIC used on TRex
+  TG, XL710. Packet rate for other tested frame sizes is limited by
+  PCIeGen3 x8 bandwidth limitation of ~50Gbps.
+
+MRR test code implements multiple bursts of offered packet load and has
+two configurable burst parameters: individual trial duration and number
+of trials in a single burst. This enables more precise performance
+trending by providing more results data for analysis.
+
+Burst parameter settings vary between different tests using MRR:
+
+- MRR individual trial duration:
+
+  - Report performance comparison: 1 sec.
+  - Daily performance trending: 1 sec.
+  - Per-patch performance verification: 10 sec.
+  - Initial iteration for MLRsearch: 1 sec.
+  - Initial iteration for PLRsearch: 5.2 sec.
+
+- Number of MRR trials per burst:
+
+  - Report performance comparison: 10.
+  - Daily performance trending: 10.
+  - Per-patch performance verification: 5.
+  - Initial iteration for MLRsearch: 1.
+  - Initial iteration for PLRsearch: 1.
diff --git a/docs/content/methodology/measurements/data_plane_throughput/plr_search.md b/docs/content/methodology/measurements/data_plane_throughput/plr_search.md
new file mode 100644
index 0000000000..529bac1f7f
--- /dev/null
+++ b/docs/content/methodology/measurements/data_plane_throughput/plr_search.md
@@ -0,0 +1,383 @@
+---
+title: "PLR Search"
+weight: 3
+---
+
+# PLR Search
+
+## Motivation for PLRsearch
+
+Network providers are interested in throughput a system can sustain.
+
+`RFC 2544`[^1] assumes loss ratio is given by a deterministic function of
+offered load. But NFV software systems are not deterministic enough.
+This makes deterministic algorithms (such as `binary search`[^2] per RFC 2544
+and MLRsearch with single trial) to return results,
+which when repeated show relatively high standard deviation,
+thus making it harder to tell what "the throughput" actually is.
+
+We need another algorithm, which takes this indeterminism into account.
+
+## Generic Algorithm
+
+Detailed description of the PLRsearch algorithm is included in the IETF
+draft `Probabilistic Loss Ratio Search for Packet Throughput`[^3] that is in the
+process of being standardized in the IETF Benchmarking Methodology Working Group
+(BMWG).
+
+### Terms
+
+The rest of this page assumes the reader is familiar with the following terms
+defined in the IETF draft:
+
++ Trial Order Independent System
++ Duration Independent System
++ Target Loss Ratio
++ Critical Load
++ Offered Load regions
+
+  + Zero Loss Region
+  + Non-Deterministic Region
+  + Guaranteed Loss Region
+
++ Fitting Function
+
+  + Stretch Function
+  + Erf Function
+
++ Bayesian Inference
+
+  + Prior distribution
+  + Posterior Distribution
+
++ Numeric Integration
+
+  + Monte Carlo
+  + Importance Sampling
+
+## FD.io CSIT Implementation Specifics
+
+The search receives min_rate and max_rate values, to avoid measurements
+at offered loads not supporeted by the traffic generator.
+
+The implemented tests cases use bidirectional traffic.
+The algorithm stores each rate as bidirectional rate (internally,
+the algorithm is agnostic to flows and directions,
+it only cares about aggregate counts of packets sent and packets lost),
+but debug output from traffic generator lists unidirectional values.
+
+### Measurement Delay
+
+In a sample implemenation in FD.io CSIT project, there is roughly 0.5
+second delay between trials due to restrictons imposed by packet traffic
+generator in use (T-Rex).
+
+As measurements results come in, posterior distribution computation takes
+more time (per sample), although there is a considerable constant part
+(mostly for inverting the fitting functions).
+
+Also, the integrator needs a fair amount of samples to reach the region
+the posterior distribution is concentrated at.
+
+And of course, the speed of the integrator depends on computing power
+of the CPU the algorithm is able to use.
+
+All those timing related effects are addressed by arithmetically increasing
+trial durations with configurable coefficients
+(currently 5.1 seconds for the first trial,
+each subsequent trial being 0.1 second longer).
+
+### Rounding Errors and Underflows
+
+In order to avoid them, the current implementation tracks natural logarithm
+(instead of the original quantity) for any quantity which is never negative.
+Logarithm of zero is minus infinity (not supported by Python),
+so special value "None" is used instead.
+Specific functions for frequent operations (such as "logarithm
+of sum of exponentials") are defined to handle None correctly.
+
+### Fitting Functions
+
+Current implementation uses two fitting functions, called "stretch" and "erf".
+In general, their estimates for critical rate differ,
+which adds a simple source of systematic error,
+on top of randomness error reported by integrator.
+Otherwise the reported stdev of critical rate estimate
+is unrealistically low.
+
+Both functions are not only increasing, but also convex
+(meaning the rate of increase is also increasing).
+
+Both fitting functions have several mathematically equivalent formulas,
+each can lead to an arithmetic overflow or underflow in different sub-terms.
+Overflows can be eliminated by using different exact formulas
+for different argument ranges.
+Underflows can be avoided by using approximate formulas
+in affected argument ranges, such ranges have their own formulas to compute.
+At the end, both fitting function implementations
+contain multiple "if" branches, discontinuities are a possibility
+at range boundaries.
+
+### Prior Distributions
+
+The numeric integrator expects all the parameters to be distributed
+(independently and) uniformly on an interval (-1, 1).
+
+As both "mrr" and "spread" parameters are positive and not dimensionless,
+a transformation is needed. Dimentionality is inherited from max_rate value.
+
+The "mrr" parameter follows a `Lomax distribution`[^4]
+with alpha equal to one, but shifted so that mrr is always greater than 1
+packet per second.
+
+The "stretch" parameter is generated simply as the "mrr" value
+raised to a random power between zero and one;
+thus it follows a `reciprocal distribution`[^5].
+
+### Integrator
+
+After few measurements, the posterior distribution of fitting function
+arguments gets quite concentrated into a small area.
+The integrator is using `Monte Carlo`[^6] with `importance sampling`[^7]
+where the biased distribution is `bivariate Gaussian`[^8] distribution,
+with deliberately larger variance.
+If the generated sample falls outside (-1, 1) interval,
+another sample is generated.
+
+The center and the covariance matrix for the biased distribution
+is based on the first and second moments of samples seen so far
+(within the computation). The center is used directly,
+covariance matrix is scaled up by a heurictic constant (8.0 by default).
+The following additional features are applied
+designed to avoid hyper-focused distributions.
+
+Each computation starts with the biased distribution inherited
+from the previous computation (zero point and unit covariance matrix
+is used in the first computation), but the overal weight of the data
+is set to the weight of the first sample of the computation.
+Also, the center is set to the first sample point.
+When additional samples come, their weight (including the importance correction)
+is compared to sum of the weights of data seen so far (within the iteration).
+If the new sample is more than one e-fold more impactful, both weight values
+(for data so far and for the new sample) are set to (geometric) average
+of the two weights.
+
+This combination showed the best behavior, as the integrator usually follows
+two phases. First phase (where inherited biased distribution
+or single big sample are dominating) is mainly important
+for locating the new area the posterior distribution is concentrated at.
+The second phase (dominated by whole sample population)
+is actually relevant for the critical rate estimation.
+
+### Offered Load Selection
+
+First two measurements are hardcoded to happen at the middle of rate interval
+and at max_rate. Next two measurements follow MRR-like logic,
+offered load is decreased so that it would reach target loss ratio
+if offered load decrease lead to equal decrease of loss rate.
+
+The rest of measurements start directly in between
+erf and stretch estimate average.
+There is one workaround implemented, aimed at reducing the number of consequent
+zero loss measurements (per fitting function). The workaround first stores
+every measurement result which loss ratio was the targed loss ratio or higher.
+Sorted list (called lossy loads) of such results is maintained.
+
+When a sequence of one or more zero loss measurement results is encountered,
+a smallest of lossy loads is drained from the list.
+If the estimate average is smaller than the drained value,
+a weighted average of this estimate and the drained value is used
+as the next offered load. The weight of the estimate decreases exponentially
+with the length of consecutive zero loss results.
+
+This behavior helps the algorithm with convergence speed,
+as it does not need so many zero loss result to get near critical region.
+Using the smallest (not drained yet) of lossy loads makes it sure
+the new offered load is unlikely to result in big loss region.
+Draining even if the estimate is large enough helps to discard
+early measurements when loss hapened at too low offered load.
+Current implementation adds 4 copies of lossy loads and drains 3 of them,
+which leads to fairly stable behavior even for somewhat inconsistent SUTs.
+
+### Caveats
+
+As high loss count measurements add many bits of information,
+they need a large amount of small loss count measurements to balance them,
+making the algorithm converge quite slowly. Typically, this happens
+when few initial measurements suggest spread way bigger then later measurements.
+The workaround in offered load selection helps,
+but more intelligent workarounds could get faster convergence still.
+
+Some systems evidently do not follow the assumption of repeated measurements
+having the same average loss rate (when the offered load is the same).
+The idea of estimating the trend is not implemented at all,
+as the observed trends have varied characteristics.
+
+Probably, using a more realistic fitting functions
+will give better estimates than trend analysis.
+
+## Bottom Line
+
+The notion of Throughput is easy to grasp, but it is harder to measure
+with any accuracy for non-deterministic systems.
+
+Even though the notion of critical rate is harder to grasp than the notion
+of throughput, it is easier to measure using probabilistic methods.
+
+In testing, the difference between througput measurements and critical
+rate measurements is usually small.
+
+In pactice, rules of thumb such as "send at max 95% of purported throughput"
+are common. The correct benchmarking analysis should ask "Which notion is
+95% of throughput an approximation to?" before attempting to answer
+"Is 95% of critical rate safe enough?".
+
+## Algorithmic Analysis
+
+### Motivation
+
+While the estimation computation is based on hard probability science;
+the offered load selection part of PLRsearch logic is pure heuristics,
+motivated by what would a human do based on measurement and computation results.
+
+The quality of any heuristic is not affected by soundness of its motivation,
+just by its ability to achieve the intended goals.
+In case of offered load selection, the goal is to help the search to converge
+to the long duration estimates sooner.
+
+But even those long duration estimates could still be of poor quality.
+Even though the estimate computation is Bayesian (so it is the best it could be
+within the applied assumptions), it can still of poor quality when compared
+to what a human would estimate.
+
+One possible source of poor quality is the randomnes inherently present
+in Monte Carlo numeric integration, but that can be supressed
+by tweaking the time related input parameters.
+
+The most likely source of poor quality then are the assumptions.
+Most importantly, the number and the shape of fitting functions;
+but also others, such as trial order independence and duration independence.
+
+The result can have poor quality in basically two ways.
+One way is related to location. Both upper and lower bounds
+can be overestimates or underestimates, meaning the entire estimated interval
+between lower bound and upper bound lays above or below (respectively)
+of human-estimated interval.
+The other way is related to the estimation interval width.
+The interval can be too wide or too narrow, compared to human estimation.
+
+An estimate from a particular fitting function can be classified
+as an overestimate (or underestimate) just by looking at time evolution
+(without human examining measurement results). Overestimates
+decrease by time, underestimates increase by time (assuming
+the system performance stays constant).
+
+Quality of the width of the estimation interval needs human evaluation,
+and is unrelated to both rate of narrowing (both good and bad estimate intervals
+get narrower at approximately the same relative rate) and relatative width
+(depends heavily on the system being tested).
+
+### Graphical Examples
+
+The following pictures show the upper (red) and lower (blue) bound,
+as well as average of Stretch (pink) and Erf (light green) estimate,
+and offered load chosen (grey), as computed by PLRsearch,
+after each trial measurement within the 30 minute duration of a test run.
+
+Both graphs are focusing on later estimates. Estimates computed from
+few initial measurements are wildly off the y-axis range shown.
+
+The following analysis will rely on frequency of zero loss measurements
+and magnitude of loss ratio if nonzero.
+
+The offered load selection strategy used implies zero loss measurements
+can be gleaned from the graph by looking at offered load points.
+When the points move up farther from lower estimate, it means
+the previous measurement had zero loss. After non-zero loss,
+the offered load starts again right between (the previous values of)
+the estimate curves.
+
+The very big loss ratio results are visible as noticeable jumps
+of both estimates downwards. Medium and small loss ratios are much harder
+to distinguish just by looking at the estimate curves,
+the analysis is based on raw loss ratio measurement results.
+
+The following descriptions should explain why the graphs seem to signal
+low quality estimate at first sight, but a more detailed look
+reveals the quality is good (considering the measurement results).
+
+#### L2 patch
+
+Both fitting functions give similar estimates, the graph shows
+"stochasticity" of measurements (estimates increase and decrease
+within small time regions), and an overall trend of decreasing estimates.
+
+On the first look, the final interval looks fairly narrow,
+especially compared to the region the estimates have travelled
+during the search. But the look at the frequency of zero loss results shows
+this is not a case of overestimation. Measurements at around the same
+offered load have higher probability of zero loss earlier
+(when performed farther from upper bound), but smaller probability later
+(when performed closer to upper bound). That means it is the performance
+of the system under test that decreases (slightly) over time.
+
+With that in mind, the apparent narrowness of the interval
+is not a sign of low quality, just a consequence of PLRsearch assuming
+the performance stays constant.
+
+{{< figure src="/cdocs/PLR_patch.svg" >}}
+
+#### Vhost
+
+This test case shows what looks like a quite broad estimation interval,
+compared to other test cases with similarly looking zero loss frequencies.
+Notable features are infrequent high-loss measurement results
+causing big drops of estimates, and lack of long-term convergence.
+
+Any convergence in medium-sized intervals (during zero loss results)
+is reverted by the big loss results, as they happen quite far
+from the critical load estimates, and the two fitting functions
+extrapolate differently.
+
+In other words, human only seeing estimates from one fitting function
+would expect narrower end interval, but human seeing the measured loss ratios
+agrees that the interval should be wider than that.
+
+{{< figure src="/cdocs/PLR_vhost.svg" >}}
+
+#### Summary
+
+The two graphs show the behavior of PLRsearch algorithm applied to soaking test
+when some of PLRsearch assumptions do not hold:
+
++ L2 patch measurement results violate the assumption
+  of performance not changing over time.
++ Vhost measurement results violate the assumption
+  of Poisson distribution matching the loss counts.
+
+The reported upper and lower bounds can have distance larger or smaller
+than a first look by a human would expect, but a more closer look reveals
+the quality is good, considering the circumstances.
+
+The usefullness of the critical load estimate is of questionable value
+when the assumptions are violated.
+
+Some improvements can be made via more specific workarounds,
+for example long term limit of L2 patch performance could be estmated
+by some heuristic.
+
+Other improvements can be achieved only by asking users
+whether loss patterns matter. Is it better to have single digit losses
+distributed fairly evenly over time (as Poisson distribution would suggest),
+or is it better to have short periods of medium losses
+mixed with long periods of zero losses (as happens in Vhost test)
+with the same overall loss ratio?
+
+[^1]: [RFC 2544: Benchmarking Methodology for Network Interconnect Devices](https://tools.ietf.org/html/rfc2544)
+[^2]: [Binary search](https://en.wikipedia.org/wiki/Binary_search_algorithm)
+[^3]: [Probabilistic Loss Ratio Search for Packet Throughput](https://tools.ietf.org/html/draft-vpolak-bmwg-plrsearch-02)
+[^4]: [Lomax distribution](https://en.wikipedia.org/wiki/Lomax_distribution)
+[^5]: [Reciprocal distribution](https://en.wikipedia.org/wiki/Reciprocal_distribution)
+[^6]: [Monte Carlo](https://en.wikipedia.org/wiki/Monte_Carlo_integration)
+[^7]: [Importance sampling](https://en.wikipedia.org/wiki/Importance_sampling)
+[^8]: [Bivariate Gaussian](https://en.wikipedia.org/wiki/Multivariate_normal_distribution)
diff --git a/docs/content/methodology/measurements/packet_latency.md b/docs/content/methodology/measurements/packet_latency.md
new file mode 100644
index 0000000000..f3606b5ffb
--- /dev/null
+++ b/docs/content/methodology/measurements/packet_latency.md
@@ -0,0 +1,52 @@
+---
+title: "Packet Latency"
+weight: 2
+---
+
+# Packet Latency
+
+TRex Traffic Generator (TG) is used for measuring one-way latency in
+2-Node and 3-Node physical testbed topologies. TRex integrates
+[High Dynamic Range Histogram (HDRH)](http://hdrhistogram.org/)
+functionality and reports per packet latency distribution for latency
+streams sent in parallel to the main load packet streams.
+
+Following methodology is used:
+
+- Only NDRPDR test type measures latency and only after NDR and PDR
+  values are determined. Other test types do not involve latency
+  streams.
+
+- Latency is measured at different background load packet rates:
+
+  - No-Load: latency streams only.
+  - Low-Load: at 10% PDR.
+  - Mid-Load: at 50% PDR.
+  - High-Load: at 90% PDR.
+
+- Latency is measured for all tested packet sizes except IMIX due to
+  TRex TG restriction.
+
+- TG sends dedicated latency streams, one per direction, each at the
+  rate of 9 kpps at the prescribed packet size; these are sent in
+  addition to the main load streams.
+
+- TG reports Min/Avg/Max and HDRH latency values distribution per stream
+  direction, hence two sets of latency values are reported per test case
+  (marked as E-W and W-E).
+
+- +/- 1 usec is the measurement accuracy of TRex TG and the data in HDRH
+  latency values distribution is rounded to microseconds.
+
+- TRex TG introduces a (background) always-on Tx + Rx latency bias of 4
+  usec on average per direction resulting from TRex software writing and
+  reading packet timestamps on CPU cores. Quoted values are based on TG
+  back-to-back latency measurements.
+
+- Latency graphs are not smoothed, each latency value has its own
+  horizontal line across corresponding packet percentiles.
+
+- Percentiles are shown on X-axis using a logarithmic scale, so the
+  maximal latency value (ending at 100% percentile) would be in
+  infinity. The graphs are cut at 99.9999% (hover information still
+  lists 100%).
diff --git a/docs/content/methodology/measurements/telemetry.md b/docs/content/methodology/measurements/telemetry.md
new file mode 100644
index 0000000000..aed32d9e17
--- /dev/null
+++ b/docs/content/methodology/measurements/telemetry.md
@@ -0,0 +1,158 @@
+---
+title: "Telemetry"
+weight: 3
+---
+
+# Telemetry
+
+OpenMetrics specifies the de-facto standard for transmitting cloud-native
+metrics at scale, with support for both text representation and Protocol
+Buffers.
+
+## RFC
+
+- RFC2119
+- RFC5234
+- RFC8174
+- draft-richih-opsawg-openmetrics-00
+
+## Reference
+
+[OpenMetrics](https://github.com/OpenObservability/OpenMetrics/blob/master/specification/OpenMetrics.md)
+
+## Metric Types
+
+- Gauge
+- Counter
+- StateSet
+- Info
+- Histogram
+- GaugeHistogram
+- Summary
+- Unknown
+
+Telemetry module in CSIT currently support only Gauge, Counter and Info.
+
+## Anatomy of CSIT telemetry implementation
+
+Existing implementation consists of several measurment building blocks:
+the main measuring block running search algorithms (MLR, PLR, SOAK, MRR, ...),
+the latency measuring block and the several telemetry blocks with or without
+traffic running on a background.
+
+The main measuring block must not be interrupted by any read operation that can
+impact data plane traffic processing during throughput search algorithm. Thus
+operational reads are done before (pre-stat) and after (post-stat) that block.
+
+Some operational reads must be done while traffic is running and usually
+consists of two reads (pre-run-stat, post-run-stat) with defined delay between
+them.
+
+## MRR measurement
+
+    traffic_start(r=mrr)             traffic_stop  |<   measure   >|
+    |                                |             |    (r=mrr)    |
+    |   pre_run_stat   post_run_stat |   pre_stat  |               |  post_stat
+    |        |               |       |      |      |               |      |
+    o--------o---------------o-------o------o------+---------------+------o------>
+                                                                                t
+    Legend:
+    - pre_run_stat
+      - vpp-clear-runtime
+    - post_run_stat
+      - vpp-show-runtime
+      - bash-perf-stat            // if extended_debug == True
+    - pre_stat
+      - vpp-clear-stats
+      - vpp-enable-packettrace    // if extended_debug == True
+      - vpp-enable-elog
+    - post_stat
+      - vpp-show-stats
+      - vpp-show-packettrace      // if extended_debug == True
+      - vpp-show-elog
+
+    |<                               measure                                 >|
+    |                                (r=mrr)                                  |
+    |                                                                         |
+    |<   traffic_trial0    >|<    traffic_trial1    >|<    traffic_trialN    >|
+    |   (i=0,t=duration)    |    (i=1,t=duration)    |    (i=N,t=duration)    |
+    |                       |                        |                        |
+    o-----------------------o------------------------o------------------------o--->
+                                                                                 t
+
+
+## MLR measurement
+
+    |<  measure  >|   traffic_start(r=pdr)             traffic_stop  traffic_start(r=ndr)             traffic_stop |< [   latency   ] >|
+    |   (r=mlr)   |   |                                |             |                                |            |    .9/.5/.1/.0    |
+    |             |   |  pre_run_stat   post_run_stat  |             |  pre_run_stat   post_run_stat  |            |                   |
+    |             |   |       |               |        |             |       |               |        |            |                   |
+    +-------------+---o-------o---------------o--------o-------------o-------o---------------o--------o------------[-------------------]--->
+                                                                                                                                          t
+    Legend:
+    - pre_run_stat
+      - vpp-clear-runtime
+    - post_run_stat
+      - vpp-show-runtime
+      - bash-perf-stat          // if extended_debug == True
+    - pre_stat
+      - vpp-clear-stats
+      - vpp-enable-packettrace  // if extended_debug == True
+      - vpp-enable-elog
+    - post_stat
+      - vpp-show-stats
+      - vpp-show-packettrace    // if extended_debug == True
+      - vpp-show-elog
+
+## MRR measurement
+
+    traffic_start(r=mrr)       traffic_stop        |<  measure  >|
+    |                          |                   |   (r=mrr)   |
+    |   |<  stat_runtime  >|   |   stat_pre_trial  |             |  stat_post_trial
+    |   |                  |   |      |            |             |    |
+    o---+------------------+---o------o------------+-------------+----o------------>
+                                                                                  t
+    Legend:
+    - stat_runtime
+      - vpp-runtime
+    - stat_pre_trial
+      - vpp-clear-stats
+      - vpp-enable-packettrace  // if extended_debug == True
+    - stat_post_trial
+      - vpp-show-stats
+      - vpp-show-packettrace    // if extended_debug == True
+
+    |<                                measure                                 >|
+    |                                 (r=mrr)                                  |
+    |                                                                          |
+    |<    traffic_trial0    >|<    traffic_trial1    >|<    traffic_trialN    >|
+    |    (i=0,t=duration)    |    (i=1,t=duration)    |    (i=N,t=duration)    |
+    |                        |                        |                        |
+    o------------------------o------------------------o------------------------o--->
+                                                                                  t
+
+    |<                              stat_runtime                              >|
+    |                                                                          |
+    |<       program0       >|<       program1       >|<       programN       >|
+    |       (@=params)       |       (@=params)       |       (@=params)       |
+    |                        |                        |                        |
+    o------------------------o------------------------o------------------------o--->
+                                                                                  t
+
+## MLR measurement
+
+    |<  measure  >|   traffic_start(r=pdr)       traffic_stop   traffic_start(r=ndr)     traffic_stop  |< [  latency  ] >|
+    |   (r=mlr)   |   |                          |              |                          |           |   .9/.5/.1/.0   |
+    |             |   |   |<  stat_runtime  >|   |              |   |<  stat_runtime  >|   |           |                 |
+    |             |   |   |                  |   |              |   |                  |   |           |                 |
+    +-------------+---o---+------------------+---o--------------o---+------------------+---o-----------[-----------------]--->
+                                                                                                                            t
+    Legend:
+    - stat_runtime
+      - vpp-runtime
+    - stat_pre_trial
+      - vpp-clear-stats
+      - vpp-enable-packettrace  // if extended_debug == True
+    - stat_post_trial
+      - vpp-show-stats
+      - vpp-show-packettrace    // if extended_debug == True
diff --git a/docs/content/methodology/multi_core_speedup.md b/docs/content/methodology/multi_core_speedup.md
deleted file mode 100644
index c0c9ae2570..0000000000
--- a/docs/content/methodology/multi_core_speedup.md
+++ /dev/null
@@ -1,51 +0,0 @@
----
-title: "Multi-Core Speedup"
-weight: 13
----
-
-# Multi-Core Speedup
-
-All performance tests are executed with single physical core and with
-multiple cores scenarios.
-
-## Intel Hyper-Threading (HT)
-
-Intel Xeon processors used in FD.io CSIT can operate either in HT
-Disabled mode (single logical core per each physical core) or in HT
-Enabled mode (two logical cores per each physical core). HT setting is
-applied in BIOS and requires server SUT reload for it to take effect,
-making it impractical for continuous changes of HT mode of operation.
-
-Performance tests are executed with server SUTs' Intel XEON processors
-configured with Intel Hyper-Threading Enabled for all Xeon
-Cascadelake and Xeon Icelake testbeds.
-
-## Multi-core Tests
-
-Multi-core tests are executed in the following VPP worker thread and physical
-core configurations:
-
-#. Intel Xeon Icelake and Cascadelake testbeds (2n-icx, 3n-icx, 2n-clx)
-   with Intel HT enabled (2 logical CPU cores per each physical core):
-
-  #. 2t1c - 2 VPP worker threads on 1 physical core.
-  #. 4t2c - 4 VPP worker threads on 2 physical cores.
-  #. 8t4c - 8 VPP worker threads on 4 physical cores.
-
-VPP worker threads are the data plane threads running on isolated
-logical cores. With Intel HT enabled VPP workers are placed as sibling
-threads on each used physical core. VPP control threads (main, stats)
-are running on a separate non-isolated core together with other Linux
-processes.
-
-In all CSIT tests care is taken to ensure that each VPP worker handles
-the same amount of received packet load and does the same amount of
-packet processing work. This is achieved by evenly distributing per
-interface type (e.g. physical, virtual) receive queues over VPP workers
-using default VPP round-robin mapping and by loading these queues with
-the same amount of packet flows.
-
-If number of VPP workers is higher than number of physical or virtual
-interfaces, multiple receive queues are configured on each interface.
-NIC Receive Side Scaling (RSS) for physical interfaces and multi-queue
-for virtual interfaces are used for this purpose.
\ No newline at end of file
diff --git a/docs/content/methodology/network_address_translation.md b/docs/content/methodology/network_address_translation.md
deleted file mode 100644
index ef341dc892..0000000000
--- a/docs/content/methodology/network_address_translation.md
+++ /dev/null
@@ -1,445 +0,0 @@
----
-title: "Network Address Translation"
-weight: 7
----
-
-# Network Address Translation
-
-## NAT44 Prefix Bindings
-
-NAT44 prefix bindings should be representative to target applications,
-where a number of private IPv4 addresses from the range defined by
-RFC1918 is mapped to a smaller set of public IPv4 addresses from the
-public range.
-
-Following quantities are used to describe inside to outside IP address
-and port bindings scenarios:
-
-- Inside-addresses, number of inside source addresses
-  (representing inside hosts).
-- Ports-per-inside-address, number of TCP/UDP source
-  ports per inside source address.
-- Outside-addresses, number of outside (public) source addresses
-  allocated to NAT44.
-- Ports-per-outside-address, number of TCP/UDP source
-  ports per outside source address. The maximal number of
-  ports-per-outside-address usable for NAT is 64 512
-  (in non-reserved port range 1024-65535, RFC4787).
-- Sharing-ratio, equal to inside-addresses divided by outside-addresses.
-
-CSIT NAT44 tests are designed to take into account the maximum number of
-ports (sessions) required per inside host (inside-address) and at the
-same time to maximize the use of outside-address range by using all
-available outside ports. With this in mind, the following scheme of
-NAT44 sharing ratios has been devised for use in CSIT:
-
- **ports-per-inside-address** | **sharing-ratio**
------------------------------:|------------------:
- 63                           | 1024
- 126                          | 512
- 252                          | 256
- 504                          | 128
-
-Initial CSIT NAT44 tests, including associated TG/TRex traffic profiles,
-are based on ports-per-inside-address set to 63 and the sharing ratio of
-1024. This approach is currently used for all NAT44 tests including
-NAT44det (NAT44 deterministic used for Carrier Grade NAT applications)
-and NAT44ed (Endpoint Dependent).
-
-Private address ranges to be used in tests:
-
-- 192.168.0.0 - 192.168.255.255 (192.168/16 prefix)
-
-  - Total of 2^16 (65 536) of usable IPv4 addresses.
-  - Used in tests for up to 65 536 inside addresses (inside hosts).
-
-- 172.16.0.0 - 172.31.255.255  (172.16/12 prefix)
-
-  - Total of 2^20 (1 048 576) of usable IPv4 addresses.
-  - Used in tests for up to 1 048 576 inside addresses (inside hosts).
-
-### NAT44 Session Scale
-
-NAT44 session scale tested is govern by the following logic:
-
-- Number of inside-addresses(hosts) H[i] = (H[i-1] x 2^2) with H(0)=1 024,
-  i = 1,2,3, ...
-
-  - H[i] = 1 024, 4 096, 16 384, 65 536, 262 144, ...
-
-- Number of sessions S[i] = H[i] * ports-per-inside-address
-
-  - ports-per-inside-address = 63
-
- **i** | **hosts** | **sessions**
-------:|----------:|-------------:
- 0     | 1 024     | 64 512
- 1     | 4 096     | 258 048
- 2     | 16 384    | 1 032 192
- 3     | 65 536    | 4 128 768
- 4     | 262 144   | 16 515 072
-
-### NAT44 Deterministic
-
-NAT44det performance tests are using TRex STL (Stateless) API and traffic
-profiles, similar to all other stateless packet forwarding tests like
-ip4, ip6 and l2, sending UDP packets in both directions
-inside-to-outside and outside-to-inside.
-
-The inside-to-outside traffic uses single destination address (20.0.0.0)
-and port (1024).
-The inside-to-outside traffic covers whole inside address and port range,
-the outside-to-inside traffic covers whole outside address and port range.
-
-NAT44det translation entries are created during the ramp-up phase,
-followed by verification that all entries are present,
-before proceeding to the main measurements of the test.
-This ensures session setup does not impact the forwarding performance test.
-
-Associated CSIT test cases use the following naming scheme to indicate
-NAT44det scenario tested:
-
-- ethip4udp-nat44det-h{H}-p{P}-s{S}-[mrr|ndrpdr|soak]
-
-  - {H}, number of inside hosts, H = 1024, 4096, 16384, 65536, 262144.
-  - {P}, number of ports per inside host, P = 63.
-  - {S}, number of sessions, S = 64512, 258048, 1032192, 4128768,
-    16515072.
-  - [mrr|ndrpdr|soak], MRR, NDRPDR or SOAK test.
-
-### NAT44 Endpoint-Dependent
-
-In order to excercise NAT44ed ability to translate based on both
-source and destination address and port, the inside-to-outside traffic
-varies also destination address and port. Destination port is the same
-as source port, destination address has the same offset as the source address,
-but applied to different subnet (starting with 20.0.0.0).
-
-As the mapping is not deterministic (for security reasons),
-we cannot easily use stateless bidirectional traffic profiles.
-Inside address and port range is fully covered,
-but we do not know which outside-to-inside source address and port to use
-to hit an open session.
-
-Therefore, NAT44ed is benchmarked using following methodologies:
-
-- Unidirectional throughput using *stateless* traffic profile.
-- Connections-per-second (CPS) using *stateful* traffic profile.
-- Bidirectional throughput (TPUT, see below) using *stateful* traffic profile.
-
-Unidirectional NAT44ed throughput tests are using TRex STL (Stateless)
-APIs and traffic profiles, but with packets sent only in
-inside-to-outside direction.
-Similarly to NAT44det, NAT44ed unidirectional throughput tests include
-a ramp-up phase to establish and verify the presence of required NAT44ed
-binding entries. As the sessions have finite duration, the test code
-keeps inserting ramp-up trials during the search, if it detects a risk
-of sessions timing out. Any zero loss trial visits all sessions,
-so it acts also as a ramp-up.
-
-Stateful NAT44ed tests are using TRex ASTF (Advanced Stateful) APIs and
-traffic profiles, with packets sent in both directions. Tests are run
-with both UDP and TCP sessions.
-As NAT44ed CPS (connections-per-second) stateful tests
-measure (also) session opening performance,
-they use state reset instead of ramp-up trial.
-NAT44ed TPUT (bidirectional throughput) tests prepend ramp-up trials
-as in the unidirectional tests,
-so the test results describe performance without translation entry
-creation overhead.
-
-Associated CSIT test cases use the following naming scheme to indicate
-NAT44det case tested:
-
-- Stateless: ethip4udp-nat44ed-h{H}-p{P}-s{S}-udir-[mrr|ndrpdr|soak]
-
-  - {H}, number of inside hosts, H = 1024, 4096, 16384, 65536, 262144.
-  - {P}, number of ports per inside host, P = 63.
-  - {S}, number of sessions, S = 64512, 258048, 1032192, 4128768,
-    16515072.
-  - udir-[mrr|ndrpdr|soak], unidirectional stateless tests MRR, NDRPDR
-    or SOAK.
-
-- Stateful: ethip4[udp|tcp]-nat44ed-h{H}-p{P}-s{S}-[cps|tput]-[mrr|ndrpdr|soak]
-
-  - [udp|tcp], UDP or TCP sessions
-  - {H}, number of inside hosts, H = 1024, 4096, 16384, 65536, 262144.
-  - {P}, number of ports per inside host, P = 63.
-  - {S}, number of sessions, S = 64512, 258048, 1032192, 4128768,
-    16515072.
-  - [cps|tput], connections-per-second session establishment rate or
-    packets-per-second average rate, or packets-per-second rate
-    without session establishment.
-  - [mrr|ndrpdr|soak], bidirectional stateful tests MRR, NDRPDR, or SOAK.
-
-## Stateful traffic profiles
-
-There are several important details which distinguish ASTF profiles
-from stateless profiles.
-
-### General considerations
-
-#### Protocols
-
-ASTF profiles are limited to either UDP or TCP protocol.
-
-#### Programs
-
-Each template in the profile defines two "programs", one for the client side
-and one for the server side.
-
-Each program specifies when that side has to wait until enough data is received
-(counted in packets for UDP and in bytes for TCP)
-and when to send additional data. Together, the two programs
-define a single transaction. Due to packet loss, transaction may take longer,
-use more packets (retransmission) or never finish in its entirety.
-
-#### Instances
-
-A client instance is created according to TPS parameter for the trial,
-and sends the first packet of the transaction (in some cases more packets).
-Each client instance uses a different source address (see sequencing below)
-and some source port. The destination address also comes from a range,
-but destination port has to be constant for a given program.
-
-TRex uses an opaque way to chose source ports, but as session counting shows,
-next client with the same source address uses a different source port.
-
-Server instance is created when the first packet arrives to the server side.
-Source address and port of the first packet are used as destination address
-and port for the server responses. This is the ability we need
-when outside surface is not predictable.
-
-When a program reaches its end, the instance is deleted.
-This creates possible issues with server instances. If the server instance
-does not read all the data client has sent, late data packets
-can cause a second copy of server instance to be created,
-which breaks assumptions on how many packet a transaction should have.
-
-The need for server instances to read all the data reduces the overall
-bandwidth TRex is able to create in ASTF mode.
-
-Note that client instances are not created on packets,
-so it is safe to end client program without reading all server data
-(unless the definition of transaction success requires that).
-
-#### Sequencing
-
-ASTF profiles offer two modes for choosing source and destination IP addresses
-for client programs: seqential and pseudorandom.
-In current tests we are using sequential addressing only (if destination
-address varies at all).
-
-For client destination UDP/TCP port, we use a single constant value.
-(TRex can support multiple program pairs in the same traffic profile,
-distinguished by the port number.)
-
-#### Transaction overlap
-
-If a transaction takes longer to finish, compared to period implied by TPS,
-TRex will have multiple client or server instances active at a time.
-
-During calibration testing we have found this increases CPU utilization,
-and for high TPS it can lead to TRex's Rx or Tx buffers becoming full.
-This generally leads to duration stretching, and/or packet loss on TRex.
-
-Currently used transactions were chosen to be short, so risk of bad behavior
-is decreased. But in MRR tests, where load is computed based on NIC ability,
-not TRex ability, anomalous behavior is still possible
-(e.g. MRR values being way lower than NDR).
-
-#### Delays
-
-TRex supports adding constant delays to ASTF programs.
-This can be useful, for example if we want to separate connection establishment
-from data transfer.
-
-But as TRex tracks delayed instances as active, this still results
-in higher CPU utilization and reduced performance issues
-(as other overlaping transactions). So the current tests do not use any delays.
-
-#### Keepalives
-
-Both UDP and TCP protocol implementations in TRex programs support keepalive
-duration. That means there is a configurable period of keepalive time,
-and TRex sends keepalive packets automatically (outside the program)
-for the time the program is active (started, not ended yet)
-but not sending any packets.
-
-For TCP this is generally not a big deal, as the other side usually
-retransmits faster. But for UDP it means a packet loss may leave
-the receiving program running.
-
-In order to avoid keepalive packets, keepalive value is set to a high number.
-Here, "high number" means that even at maximum scale and minimum TPS,
-there are still no keepalive packets sent within the corresponding
-(computed) trial duration. This number is kept the same also for
-smaller scale traffic profiles, to simplify maintenance.
-
-#### Transaction success
-
-The transaction is considered successful at Layer-7 (L7) level
-when both program instances close. At this point, various L7 counters
-(unofficial name) are updated on TRex.
-
-We found that proper close and L7 counter update can be CPU intensive,
-whereas lower-level counters (ipackets, opackets) called L2 counters
-can keep up with higher loads.
-
-For some tests, we do not need to confirm the whole transaction was successful.
-CPS (connections per second) tests are a typical example.
-We care only for NAT44ed creating a session (needs one packet
-in inside-to-outside direction per session) and being able to use it
-(needs one packet in outside-to-inside direction).
-
-Similarly in TPUT tests (packet throuput, counting both control
-and data packets), we care about NAT44ed ability to forward packets,
-we do not care whether aplications (TRex) can fully process them at that rate.
-
-Therefore each type of tests has its own formula (usually just one counter
-already provided by TRex) to count "successful enough" transactions
-and attempted transactions. Currently, all tests relying on L7 counters
-use size-limited profiles, so they know what the count of attempted
-transactions should be, but due to duration stretching
-TRex might have been unable to send that many packets.
-For search purposes, unattempted transactions are treated the same
-as attempted but failed transactions.
-
-Sometimes even the number of transactions as tracked by search algorithm
-does not match the transactions as defined by ASTF programs.
-See TCP TPUT profile below.
-
-### UDP CPS
-
-This profile uses a minimalistic transaction to verify NAT44ed session has been
-created and it allows outside-to-inside traffic.
-
-Client instance sends one packet and ends.
-Server instance sends one packet upon creation and ends.
-
-In principle, packet size is configurable,
-but currently used tests apply only one value (100 bytes frame).
-
-Transaction counts as attempted when opackets counter increases on client side.
-Transaction counts as successful when ipackets counter increases on client side.
-
-### TCP CPS
-
-This profile uses a minimalistic transaction to verify NAT44ed session has been
-created and it allows outside-to-inside traffic.
-
-Client initiates TCP connection. Client waits until connection is confirmed
-(by reading zero data bytes). Client ends.
-Server accepts the connection. Server waits for indirect confirmation
-from client (by waiting for client to initiate close). Server ends.
-
-Without packet loss, the whole transaction takes 7 packets to finish
-(4 and 3 per direction).
-From NAT44ed point of view, only the first two are needed to verify
-the session got created.
-
-Packet size is not configurable, but currently used tests report
-frame size as 64 bytes.
-
-Transaction counts as attempted when tcps_connattempt counter increases
-on client side.
-Transaction counts as successful when tcps_connects counter increases
-on client side.
-
-### UDP TPUT
-
-This profile uses a small transaction of "request-response" type,
-with several packets simulating data payload.
-
-Client sends 5 packets and closes immediately.
-Server reads all 5 packets (needed to avoid late packets creating new
-server instances), then sends 5 packets and closes.
-The value 5 was chosen to mirror what TCP TPUT (see below) choses.
-
-Packet size is configurable, currently we have tests for 100,
-1518 and 9000 bytes frame (to match size of TCP TPUT data frames, see below).
-
-As this is a packet oriented test, we do not track the whole
-10 packet transaction. Similarly to stateless tests, we treat each packet
-as a "transaction" for search algorthm packet loss ratio purposes.
-Therefore a "transaction" is attempted when opacket counter on client
-or server side is increased. Transaction is successful if ipacket counter
-on client or server side is increased.
-
-If one of 5 client packets is lost, server instance will get stuck
-in the reading phase. This probably decreases TRex performance,
-but it leads to more stable results then alternatives.
-
-### TCP TPUT
-
-This profile uses a small transaction of "request-response" type,
-with some data amount to be transferred both ways.
-
-In CSIT release 22.06, TRex behavior changed, so we needed to edit
-the traffic profile. Let us describe the pre-22.06 profile first.
-
-Client connects, sends 5 data packets worth of data,
-receives 5 data packets worth of data and closes its side of the connection.
-Server accepts connection, reads 5 data packets worth of data,
-sends 5 data packets worth of data and closes its side of the connection.
-As usual in TCP, sending side waits for ACK from the receiving side
-before proceeding with next step of its program.
-
-Server read is needed to avoid premature close and second server instance.
-Client read is not stricly needed, but ACKs allow TRex to close
-the server instance quickly, thus saving CPU and improving performance.
-
-The number 5 of data packets was chosen so TRex is able to send them
-in a single burst, even with 9000 byte frame size (TRex has a hard limit
-on initial window size).
-That leads to 16 packets (9 of them in c2s direction) to be exchanged
-if no loss occurs.
-The size of data packets is controlled by the traffic profile setting
-the appropriate maximum segment size. Due to TRex restrictions,
-the minimal size for IPv4 data frame achievable by this method is 70 bytes,
-which is more than our usual minimum of 64 bytes.
-For that reason, the data frame sizes available for testing are 100 bytes
-(that allows room for eventually adding IPv6 ASTF tests),
-1518 bytes and 9000 bytes. There is no control over control packet sizes.
-
-Exactly as in UDP TPUT, ipackets and opackets counters are used for counting
-"transactions" (in fact packets).
-
-If packet loss occurs, there can be large transaction overlap, even if most
-ASTF programs finish eventually. This can lead to big duration stretching
-and somehow uneven rate of packets sent. This makes it hard to interpret
-MRR results (frequently MRR is below NDR for this reason),
-but NDR and PDR results tend to be stable enough.
-
-In 22.06, the "ACK from the receiving side" behavior changed,
-the receiving side started sending ACK sometimes
-also before receiving the full set of 5 data packets.
-If the previous profile is understood as a "single challenge, single response"
-where challenge (and also response) is sent as a burst of 5 data packets,
-the new profile uses "bursts" of 1 packet instead, but issues
-the challenge-response part 5 times sequentially
-(waiting for receiving the response before sending next challenge).
-This new profile happens to have the same overall packet count
-(when no re-transmissions are needed).
-Although it is possibly more taxing for TRex CPU,
-the results are comparable to the old traffic profile.
-
-## Ip4base tests
-
-Contrary to stateless traffic profiles, we do not have a simple limit
-that would guarantee TRex is able to send traffic at specified load.
-For that reason, we have added tests where "nat44ed" is replaced by "ip4base".
-Instead of NAT44ed processing, the tests set minimalistic IPv4 routes,
-so that packets are forwarded in both inside-to-outside and outside-to-inside
-directions.
-
-The packets arrive to server end of TRex with different source address&port
-than in NAT44ed tests (no translation to outside values is done with ip4base),
-but those are not specified in the stateful traffic profiles.
-The server end (as always) uses the received address&port as destination
-for outside-to-inside traffic. Therefore the same stateful traffic profile
-works for both NAT44ed and ip4base test (of the same scale).
-
-The NAT44ed results are displayed together with corresponding ip4base results.
-If they are similar, TRex is probably the bottleneck.
-If NAT44ed result is visibly smaller, it describes the real VPP performance.
diff --git a/docs/content/methodology/overview/_index.md b/docs/content/methodology/overview/_index.md
new file mode 100644
index 0000000000..10f362013f
--- /dev/null
+++ b/docs/content/methodology/overview/_index.md
@@ -0,0 +1,6 @@
+---
+bookCollapseSection: true
+bookFlatSection: false
+title: "Overview"
+weight: 1
+---
diff --git a/docs/content/methodology/overview/dut_state_considerations.md b/docs/content/methodology/overview/dut_state_considerations.md
new file mode 100644
index 0000000000..eca10a22cd
--- /dev/null
+++ b/docs/content/methodology/overview/dut_state_considerations.md
@@ -0,0 +1,148 @@
+---
+title: "DUT State Considerations"
+weight: 5
+---
+
+# DUT State Considerations
+
+This page discusses considerations for Device Under Test (DUT) state.
+DUTs such as VPP require configuration, to be provided before the aplication
+starts (via config files) or just after it starts (via API or CLI access).
+
+During operation DUTs gather various telemetry data, depending on configuration.
+This internal state handling is part of normal operation,
+so any performance impact is included in the test results.
+Accessing telemetry data is additional load on DUT,
+so we are not doing that in main trial measurements that affect results,
+but we include separate trials specifically for gathering runtime telemetry.
+
+But there is one kind of state that needs specific handling.
+This kind of DUT state is dynamically created based on incoming traffic,
+it affects how DUT handles the traffic, and (unlike telemetry counters)
+it has uneven impact on CPU load.
+Typical example is NAT, where detecting new sessions takes more CPU than
+forwarding packet on existing (open or recently closed) sessions.
+We call DUT configurations with this kind of state "stateful",
+and configurations without them "stateless".
+(Even though stateless configurations contain state described in previous
+paragraphs, and some configuration items may have "stateful" in their name,
+such as stateful ACLs.)
+
+# Stateful DUT configurations
+
+Typically, the level of CPU impact of traffic depends on DUT state.
+The first packets causing DUT state to change have higher impact,
+subsequent packets matching that state have lower impact.
+
+From performance point of view, this is similar to traffic phases
+for stateful protocols, see
+[NGFW draft](https://tools.ietf.org/html/draft-ietf-bmwg-ngfw-performance-05#section-4.3.4).
+In CSIT we borrow the terminology (even if it does not fit perfectly,
+see discussion below). Ramp-up traffic causes the state change,
+sustain traffic does not change the state.
+
+As the performance is different, each test has to choose which traffic
+it wants to test, and manipulate the DUT state to achieve the intended impact.
+
+## Ramp-up trial
+
+Tests aiming at sustain performance need to make sure DUT state is created.
+We achieve this via a ramp-up trial, specific purpose of which
+is to create the state.
+
+Subsequent trials need no specific handling, as long as the state
+remains the same. But some state can time-out, so additional ramp-up
+trials are inserted whenever the code detects the state can time-out.
+Note that a trial with zero loss refreshes the state,
+so only the time since the last non-zero loss trial is tracked.
+
+For the state to be set completely, it is important both DUT and TG
+do not lose any packets. We achieve this by setting the profile multiplier
+(TPS from now on) to low enough value.
+
+It is also important each state-affecting packet is sent.
+For size-limited traffic profile it is guaranteed by the size limit.
+For continuous traffic, we set a long enough duration (based on TPS).
+
+At the end of the ramp-up trial, we check DUT state to confirm
+it has been created as expected.
+Test fails if the state is not (completely) created.
+
+## State Reset
+
+Tests aiming at ramp-up performance do not use ramp-up trial,
+and they need to reset the DUT state before each trial measurement.
+The way of resetting the state depends on test,
+usually an API call is used to partially de-configure
+the part that holds the state, and then re-configure it back.
+
+In CSIT we control the DUT state behavior via a test variable "resetter".
+If it is not set, DUT state is not reset.
+If it is set, each search algorithm (including MRR) will invoke it
+before all trial measurements (both main and telemetry ones).
+Any configuration keyword enabling a feature with DUT state
+will check whether a test variable for ramp-up rate is present.
+If it is present, resetter is not set.
+If it is not present, the keyword sets the apropriate resetter value.
+This logic makes sure either ramp-up or state reset are used.
+
+Notes: If both ramp-up and state reset were used, the DUT behavior
+would be identical to just reset, while test would take longer to execute.
+If neither were used, DUT will show different performance in subsequent trials,
+violating assumptions of search algorithms.
+
+## DUT versus protocol ramp-up
+
+There are at least three different causes for bandwidth possibly increasing
+within a single measurement trial.
+
+The first is DUT switching from state modification phase to constant phase,
+it is the primary focus of this document.
+Using ramp-up traffic before main trials eliminates this cause
+for tests wishing to measure the performance of the next phase.
+Using size-limited profiles eliminates the next phase
+for tests wishing to measure performance of this phase.
+
+The second is protocol such as TCP ramping up their throughput to utilize
+the bandwidth available. This is the original meaning of "ramp up"
+in the NGFW draft (see above).
+In existing tests we are not using this meaning of TCP ramp-up.
+Instead we use only small transactions, and large enough initial window
+so TCP acts as ramped-up already.
+
+The third is TCP increasing offered load due to retransmissions triggered by
+packet loss. In CSIT we again try to avoid this behavior
+by using small enough data to transfer, so overlap of multiple transactions
+(primary cause of packet loss) is unlikely.
+But in MRR tests, packet loss and non-constant offered load are still expected.
+
+# Stateless DUT configuratons
+
+These are simple configurations, which do not set any resetter value
+(even if ramp-up duration is not configured).
+Majority of existing tests are of this type, using continuous traffic profiles.
+
+In order to identify limits of Trex performance,
+we have added suites with stateless DUT configuration (VPP ip4base)
+subjected to size-limited ASTF traffic.
+The discovered rates serve as a basis of comparison
+for evaluating the results for stateful DUT configurations (VPP NAT44ed)
+subjected to the same traffic profiles.
+
+# DUT versus TG state
+
+Traffic Generator profiles can be stateful (ASTF) or stateless (STL).
+DUT configuration can be stateful or stateless (with respect to packet traffic).
+
+In CSIT we currently use all four possible configurations:
+
+- Regular stateless VPP tests use stateless traffic profiles.
+
+- Stateless VPP configuration with stateful profile is used as a base for
+  comparison.
+
+- Some stateful DUT configurations (NAT44DET, NAT44ED unidirectional)
+  are tested using stateless traffic profiles and continuous traffic.
+
+- The rest of stateful DUT configurations (NAT44ED bidirectional)
+  are tested using stateful traffic profiles and size limited traffic.
diff --git a/docs/content/methodology/overview/multi_core_speedup.md b/docs/content/methodology/overview/multi_core_speedup.md
new file mode 100644
index 0000000000..f438e8e996
--- /dev/null
+++ b/docs/content/methodology/overview/multi_core_speedup.md
@@ -0,0 +1,51 @@
+---
+title: "Multi-Core Speedup"
+weight: 3
+---
+
+# Multi-Core Speedup
+
+All performance tests are executed with single physical core and with
+multiple cores scenarios.
+
+## Intel Hyper-Threading (HT)
+
+Intel Xeon processors used in FD.io CSIT can operate either in HT
+Disabled mode (single logical core per each physical core) or in HT
+Enabled mode (two logical cores per each physical core). HT setting is
+applied in BIOS and requires server SUT reload for it to take effect,
+making it impractical for continuous changes of HT mode of operation.
+
+Performance tests are executed with server SUTs' Intel XEON processors
+configured with Intel Hyper-Threading Enabled for all Xeon
+Cascadelake and Xeon Icelake testbeds.
+
+## Multi-core Tests
+
+Multi-core tests are executed in the following VPP worker thread and physical
+core configurations:
+
+1. Intel Xeon Icelake and Cascadelake testbeds (2n-icx, 3n-icx, 2n-clx)
+   with Intel HT enabled (2 logical CPU cores per each physical core):
+
+   1. 2t1c - 2 VPP worker threads on 1 physical core.
+   2. 4t2c - 4 VPP worker threads on 2 physical cores.
+  3. 8t4c - 8 VPP worker threads on 4 physical cores.
+
+VPP worker threads are the data plane threads running on isolated
+logical cores. With Intel HT enabled VPP workers are placed as sibling
+threads on each used physical core. VPP control threads (main, stats)
+are running on a separate non-isolated core together with other Linux
+processes.
+
+In all CSIT tests care is taken to ensure that each VPP worker handles
+the same amount of received packet load and does the same amount of
+packet processing work. This is achieved by evenly distributing per
+interface type (e.g. physical, virtual) receive queues over VPP workers
+using default VPP round-robin mapping and by loading these queues with
+the same amount of packet flows.
+
+If number of VPP workers is higher than number of physical or virtual
+interfaces, multiple receive queues are configured on each interface.
+NIC Receive Side Scaling (RSS) for physical interfaces and multi-queue
+for virtual interfaces are used for this purpose.
diff --git a/docs/content/methodology/overview/per_thread_resources.md b/docs/content/methodology/overview/per_thread_resources.md
new file mode 100644
index 0000000000..c23efb50bd
--- /dev/null
+++ b/docs/content/methodology/overview/per_thread_resources.md
@@ -0,0 +1,101 @@
+---
+title: "Per Thread Resources"
+weight: 2
+---
+
+# Per Thread Resources
+
+CSIT test framework is managing mapping of the following resources per thread:
+
+1. Cores, physical cores (pcores) allocated as pairs of sibling logical cores
+   (lcores) if server in HyperThreading/SMT mode, or as single lcores
+   if server not in HyperThreading/SMT mode. Note that if server's
+   processors are running in HyperThreading/SMT mode sibling lcores are
+   always used.
+2. Receive Queues (RxQ), packet receive queues allocated on each
+   physical and logical interface tested.
+3. Transmit Queues(TxQ), packet transmit queues allocated on each
+   physical and logical interface tested.
+
+Approach to mapping per thread resources depends on the application/DUT
+tested (VPP or DPDK apps) and associated thread types, as follows:
+
+1. Data-plane workers, used for data-plane packet processing, when no
+   feature workers present.
+
+   - Cores: data-plane workers are typically tested in 1, 2 and 4 pcore
+     configurations, running on single lcore per pcore or on sibling
+     lcores per pcore. Result is a set of {T}t{C}c thread-core
+     configurations, where{T} stands for a total number of threads
+     (lcores), and {C} for a total number of pcores. Tested
+     configurations are encoded in CSIT test case names,
+     e.g. "1c", "2c", "4c", and test tags "2T1C" (or "1T1C"), "4T2C"
+     (or "2T2C"), "8T4C" (or "4T4C").
+   - Interface Receive Queues (RxQ): as of CSIT-2106 release, number of
+     RxQs used on each physical or virtual interface is equal to the
+     number of data-plane workers. In other words each worker has a
+     dedicated RxQ on each interface tested. This ensures packet
+     processing load to be equal for each worker, subject to RSS flow
+     load balancing efficacy. Note: Before CSIT-2106 total number of
+     RxQs across all interfaces of specific type was equal to the
+     number of data-plane workers.
+   - Interface Transmit Queues (TxQ): number of TxQs used on each
+     physical or virtual interface is equal to the number of data-plane
+     workers. In other words each worker has a dedicated TxQ on each
+     interface tested.
+   - Applies to VPP and DPDK Testpmd and L3Fwd.
+
+2. Data-plane and feature workers (e.g. IPsec async crypto workers), the
+   latter dedicated to specific feature processing.
+
+   - Cores: data-plane and feature workers are tested in 2, 3 and 4
+     pcore configurations, running on single lcore per pcore or on
+     sibling lcores per pcore. This results in a two sets of
+     thread-core combinations separated by "-", {T}t{C}c-{T}t{C}c, with
+     the leading set denoting total number of threads (lcores) and
+     pcores used for data-plane workers, and the trailing set denoting
+     total number of lcores and pcores used for feature workers.
+     Accordingly, tested configurations are encoded in CSIT test case
+     names, e.g. "1c-1c", "1c-2c", "1c-3c", and test tags "2T1C_2T1C"
+     (or "1T1C_1T1C"), "2T1C_4T2C" (or "1T1C_2T2C"), "2T1C_6T3C"
+     (or "1T1C_3T3C").
+   - RxQ and TxQ: no RxQs and no TxQs are used by feature workers.
+   - Applies to VPP only.
+
+3. Management/main worker, control plane and management.
+
+   - Cores: single lcore.
+   - RxQ: not used (VPP default behaviour).
+   - TxQ: single TxQ per interface, allocated but not used (VPP default
+     behaviour).
+   - Applies to VPP only.
+
+## VPP Thread Configuration
+
+Mapping of cores and RxQs to VPP data-plane worker threads is done in
+the VPP startup.conf during test suite setup:
+
+1. `corelist-workers <list_of_cores>`: List of logical cores to run VPP
+   data-plane workers and feature workers. The actual lcores'
+   allocations depends on HyperThreading/SMT server configuration and
+   per test core configuration.
+
+   - For tests without feature workers, by default, all CPU cores
+     configured in startup.conf are used for data-plane workers.
+   - For tests with feature workers, CSIT code distributes lcores across
+     data-plane and feature workers.
+
+2. `num-rx-queues <value>`: Number of Rx queues used per interface.
+
+Mapping of TxQs to VPP data-plane worker threads uses the default VPP
+setting of one TxQ per interface per data-plane worker.
+
+## DPDK Thread Configuration
+
+Mapping of cores and RxQs to DPDK Testpmd/L3Fwd data-plane worker
+threads is done in the startup CLI:
+
+1. `-l <list_of_cores>` - List of logical cores to run DPDK
+   application.
+2. `nb-cores=<N>` - Number of forwarding cores.
+3. `rxq=<N>` - Number of Rx queues used per interface.
diff --git a/docs/content/methodology/overview/terminology.md b/docs/content/methodology/overview/terminology.md
new file mode 100644
index 0000000000..c9115e9291
--- /dev/null
+++ b/docs/content/methodology/overview/terminology.md
@@ -0,0 +1,97 @@
+---
+title: "Terminology"
+weight: 1
+---
+
+# Terminology
+
+- **Frame size**: size of an Ethernet Layer-2 frame on the wire, including
+  any VLAN tags (dot1q, dot1ad) and Ethernet FCS, but excluding Ethernet
+  preamble and inter-frame gap. Measured in Bytes.
+
+- **Packet size**: same as frame size, both terms used interchangeably.
+
+- **Inner L2 size**: for tunneled L2 frames only, size of an encapsulated
+  Ethernet Layer-2 frame, preceded with tunnel header, and followed by
+  tunnel trailer. Measured in Bytes.
+
+- **Inner IP size**: for tunneled IP packets only, size of an encapsulated
+  IPv4 or IPv6 packet, preceded with tunnel header, and followed by
+  tunnel trailer. Measured in Bytes.
+
+- **Device Under Test (DUT)**: In software networking, "device" denotes a
+  specific piece of software tasked with packet processing. Such device
+  is surrounded with other software components (such as operating system
+  kernel). It is not possible to run devices without also running the
+  other components, and hardware resources are shared between both. For
+  purposes of testing, the whole set of hardware and software components
+  is called "System Under Test" (SUT). As SUT is the part of the whole
+  test setup performance of which can be measured with RFC2544, using
+  SUT instead of RFC2544 DUT. Device under test
+  (DUT) can be re-introduced when analyzing test results using whitebox
+  techniques, but this document sticks to blackbox testing.
+
+- **System Under Test (SUT)**: System under test (SUT) is a part of the
+  whole test setup whose performance is to be benchmarked. The complete
+  methodology contains other parts, whose performance is either already
+  established, or not affecting the benchmarking result.
+
+- **Bi-directional throughput tests**: involve packets/frames flowing in
+  both east-west and west-east directions over every tested interface of
+  SUT/DUT. Packet flow metrics are measured per direction, and can be
+  reported as aggregate for both directions (i.e. throughput) and/or
+  separately for each measured direction (i.e. latency). In most cases
+  bi-directional tests use the same (symmetric) load in both directions.
+
+- **Uni-directional throughput tests**: involve packets/frames flowing in
+  only one direction, i.e. either east-west or west-east direction, over
+  every tested interface of SUT/DUT. Packet flow metrics are measured
+  and are reported for measured direction.
+
+- **Packet Loss Ratio (PLR)**: ratio of packets received relative to packets
+  transmitted over the test trial duration, calculated using formula:
+  PLR = ( pkts_transmitted - pkts_received ) / pkts_transmitted.
+  For bi-directional throughput tests aggregate PLR is calculated based
+  on the aggregate number of packets transmitted and received.
+
+- **Packet Throughput Rate**: maximum packet offered load DUT/SUT forwards
+  within the specified Packet Loss Ratio (PLR). In many cases the rate
+  depends on the frame size processed by DUT/SUT. Hence packet
+  throughput rate MUST be quoted with specific frame size as received by
+  DUT/SUT during the measurement. For bi-directional tests, packet
+  throughput rate should be reported as aggregate for both directions.
+  Measured in packets-per-second (pps) or frames-per-second (fps),
+  equivalent metrics.
+
+- **Bandwidth Throughput Rate**: a secondary metric calculated from packet
+  throughput rate using formula: bw_rate = pkt_rate * (frame_size +
+  L1_overhead) * 8, where L1_overhead for Ethernet includes preamble (8
+  Bytes) and inter-frame gap (12 Bytes). For bi-directional tests,
+  bandwidth throughput rate should be reported as aggregate for both
+  directions. Expressed in bits-per-second (bps).
+
+- **Non Drop Rate (NDR)**: maximum packet/bandwith throughput rate sustained
+  by DUT/SUT at PLR equal zero (zero packet loss) specific to tested
+  frame size(s). MUST be quoted with specific packet size as received by
+  DUT/SUT during the measurement. Packet NDR measured in
+  packets-per-second (or fps), bandwidth NDR expressed in
+  bits-per-second (bps).
+
+- **Partial Drop Rate (PDR)**: maximum packet/bandwith throughput rate
+  sustained by DUT/SUT at PLR greater than zero (non-zero packet loss)
+  specific to tested frame size(s). MUST be quoted with specific packet
+  size as received by DUT/SUT during the measurement. Packet PDR
+  measured in packets-per-second (or fps), bandwidth PDR expressed in
+  bits-per-second (bps).
+
+- **Maximum Receive Rate (MRR)**: packet/bandwidth rate regardless of PLR
+  sustained by DUT/SUT under specified Maximum Transmit Rate (MTR)
+  packet load offered by traffic generator. MUST be quoted with both
+  specific packet size and MTR as received by DUT/SUT during the
+  measurement. Packet MRR measured in packets-per-second (or fps),
+  bandwidth MRR expressed in bits-per-second (bps).
+
+- **Trial**: a single measurement step.
+
+- **Trial duration**: amount of time over which packets are transmitted and
+  received in a single measurement step.
diff --git a/docs/content/methodology/overview/vpp_forwarding_modes.md b/docs/content/methodology/overview/vpp_forwarding_modes.md
new file mode 100644
index 0000000000..b3c3bba984
--- /dev/null
+++ b/docs/content/methodology/overview/vpp_forwarding_modes.md
@@ -0,0 +1,104 @@
+---
+title: "VPP Forwarding Modes"
+weight: 4
+---
+
+# VPP Forwarding Modes
+
+VPP is tested in a number of L2, IPv4 and IPv6 packet lookup and forwarding
+modes. Within each mode baseline and scale tests are executed, the latter with
+varying number of FIB entries.
+
+## L2 Ethernet Switching
+
+VPP is tested in three L2 forwarding modes:
+
+- *l2patch*: L2 patch, the fastest point-to-point L2 path that loops
+  packets between two interfaces without any Ethernet frame checks or
+  lookups.
+- *l2xc*: L2 cross-connect, point-to-point L2 path with all Ethernet
+  frame checks, but no MAC learning and no MAC lookup.
+- *l2bd*: L2 bridge-domain, multipoint-to-multipoint L2 path with all
+  Ethernet frame checks, with MAC learning (unless static MACs are used)
+  and MAC lookup.
+
+l2bd tests are executed in baseline and scale configurations:
+
+- *l2bdbase*: Two MAC FIB entries are learned by VPP to enable packet
+  switching between two interfaces in two directions. VPP L2 switching
+  is tested with 254 IPv4 unique flows per direction, varying IPv4
+  source address per flow in order to invoke RSS based packet
+  distribution across VPP workers. The same source and destination MAC
+  address is used for all flows per direction. IPv4 source address is
+  incremented for every packet.
+
+- *l2bdscale*: A high number of MAC FIB entries are learned by VPP to
+  enable packet switching between two interfaces in two directions.
+  Tested MAC FIB sizes include: i) 10k with 5k unique flows per
+  direction, ii) 100k with 2 x 50k flows and iii) 1M with 2 x 500k
+  flows. Unique flows are created by using distinct source and
+  destination MAC addresses that are changed for every packet using
+  incremental ordering, making VPP learn (or refresh) distinct src MAC
+  entries and look up distinct dst MAC entries for every packet. For
+  details, see
+  [Packet Flow Ordering]({{< ref "packet_flow_ordering#Packet Flow Ordering" >}}).
+
+Ethernet wire encapsulations tested include: untagged, dot1q, dot1ad.
+
+## IPv4 Routing
+
+IPv4 routing tests are executed in baseline and scale configurations:
+
+- *ip4base*: Two /32 IPv4 FIB entries are configured in VPP to enable
+  packet routing between two interfaces in two directions. VPP routing
+  is tested with 253 IPv4 unique flows per direction, varying IPv4
+  source address per flow in order to invoke RSS based packet
+  distribution across VPP workers. IPv4 source address is incremented
+  for every packet.
+
+- *ip4scale*: A high number of /32 IPv4 FIB entries are configured in
+  VPP. Tested IPv4 FIB sizes include: i) 20k with 10k unique flows per
+  direction, ii) 200k with 2 * 100k flows and iii) 2M with 2 * 1M
+  flows. Unique flows are created by using distinct IPv4 destination
+  addresses that are changed for every packet, using incremental or
+  random ordering. For details, see
+  [Packet Flow Ordering]({{< ref "packet_flow_ordering#Packet Flow Ordering" >}}).
+
+## IPv6 Routing
+
+Similarly to IPv4, IPv6 routing tests are executed in baseline and scale
+configurations:
+
+- *ip6base*: Two /128 IPv4 FIB entries are configured in VPP to enable
+  packet routing between two interfaces in two directions. VPP routing
+  is tested with 253 IPv6 unique flows per direction, varying IPv6
+  source address per flow in order to invoke RSS based packet
+  distribution across VPP workers. IPv6 source address is incremented
+  for every packet.
+
+- *ip4scale*: A high number of /128 IPv6 FIB entries are configured in
+  VPP. Tested IPv6 FIB sizes include: i) 20k with 10k unique flows per
+  direction, ii) 200k with 2 * 100k flows and iii) 2M with 2 * 1M
+  flows. Unique flows are created by using distinct IPv6 destination
+  addresses that are changed for every packet, using incremental or
+  random ordering. For details, see
+  [Packet Flow Ordering]({{< ref "packet_flow_ordering#Packet Flow Ordering" >}}).
+
+## SRv6 Routing
+
+SRv6 routing tests are executed in a number of baseline configurations,
+in each case SR policy and steering policy are configured for one
+direction and one (or two) SR behaviours (functions) in the other
+directions:
+
+- *srv6enc1sid*: One SID (no SRH present), one SR function - End.
+- *srv6enc2sids*: Two SIDs (SRH present), two SR functions - End and
+  End.DX6.
+- *srv6enc2sids-nodecaps*: Two SIDs (SRH present) without decapsulation,
+  one SR function - End.
+- *srv6proxy-dyn*: Dynamic SRv6 proxy, one SR function - End.AD.
+- *srv6proxy-masq*: Masquerading SRv6 proxy, one SR function - End.AM.
+- *srv6proxy-stat*: Static SRv6 proxy, one SR function - End.AS.
+
+In all listed cases low number of IPv6 flows (253 per direction) is
+routed by VPP.
diff --git a/docs/content/methodology/packet_flow_ordering.md b/docs/content/methodology/packet_flow_ordering.md
deleted file mode 100644
index d2b3bfb90c..0000000000
--- a/docs/content/methodology/packet_flow_ordering.md
+++ /dev/null
@@ -1,42 +0,0 @@
----
-title: "Packet Flow Ordering"
-weight: 9
----
-
-# Packet Flow Ordering
-
-TRex Traffic Generator (TG) supports two main ways how to cover
-address space (on allowed ranges) in scale tests.
-
-In most cases only one field value (e.g. IPv4 destination address) is
-altered, in some cases two fields (e.g. IPv4 destination address and UDP
-destination port) are altered.
-
-## Incremental Ordering
-
-This case is simpler to implement and offers greater control.
-
-When changing two fields, they can be incremented synchronously, or one
-after another. In the latter case we can specify which one is
-incremented each iteration and which is incremented by "carrying over"
-only when the other "wraps around". This way also visits all
-combinations once before the "carry" field also wraps around.
-
-It is possible to use increments other than 1.
-
-## Randomized Ordering
-
-This case chooses each field value at random (from the allowed range).
-In case of two fields, they are treated independently.
-TRex allows to set random seed to get deterministic numbers.
-We use a different seed for each field and traffic direction.
-The seed has to be a non-zero number, we use 1, 2, 3, and so on.
-
-The seeded random mode in TRex requires a "limit" value,
-which acts as a cycle length limit (after this many iterations,
-the seed resets to its initial value).
-We use the maximal allowed limit value (computed as 2^24 - 1).
-
-Randomized profiles do not avoid duplicated values,
-and do not guarantee each possible value is visited,
-so it is not very useful for stateful tests.
diff --git a/docs/content/methodology/packet_latency.md b/docs/content/methodology/packet_latency.md
deleted file mode 100644
index fd7c0e00e8..0000000000
--- a/docs/content/methodology/packet_latency.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-title: "Packet Latency"
-weight: 8
----
-
-# Packet Latency
-
-TRex Traffic Generator (TG) is used for measuring one-way latency in
-2-Node and 3-Node physical testbed topologies. TRex integrates
-[High Dynamic Range Histogram (HDRH)](http://hdrhistogram.org/)
-functionality and reports per packet latency distribution for latency
-streams sent in parallel to the main load packet streams.
-
-Following methodology is used:
-
-- Only NDRPDR test type measures latency and only after NDR and PDR
-  values are determined. Other test types do not involve latency
-  streams.
-- Latency is measured at different background load packet rates:
-
-  - No-Load: latency streams only.
-  - Low-Load: at 10% PDR.
-  - Mid-Load: at 50% PDR.
-  - High-Load: at 90% PDR.
-
-- Latency is measured for all tested packet sizes except IMIX due to
-  TRex TG restriction.
-- TG sends dedicated latency streams, one per direction, each at the
-  rate of 9 kpps at the prescribed packet size; these are sent in
-  addition to the main load streams.
-- TG reports Min/Avg/Max and HDRH latency values distribution per stream
-  direction, hence two sets of latency values are reported per test case
-  (marked as E-W and W-E).
-- +/- 1 usec is the measurement accuracy of TRex TG and the data in HDRH
-  latency values distribution is rounded to microseconds.
-- TRex TG introduces a (background) always-on Tx + Rx latency bias of 4
-  usec on average per direction resulting from TRex software writing and
-  reading packet timestamps on CPU cores. Quoted values are based on TG
-  back-to-back latency measurements.
-- Latency graphs are not smoothed, each latency value has its own
-  horizontal line across corresponding packet percentiles.
-- Percentiles are shown on X-axis using a logarithmic scale, so the
-  maximal latency value (ending at 100% percentile) would be in
-  infinity. The graphs are cut at 99.9999% (hover information still
-  lists 100%).
\ No newline at end of file
diff --git a/docs/content/methodology/per_patch_testing.md b/docs/content/methodology/per_patch_testing.md
new file mode 100644
index 0000000000..a64a52caf6
--- /dev/null
+++ b/docs/content/methodology/per_patch_testing.md
@@ -0,0 +1,230 @@
+---
+title: "Per-patch Testing"
+weight: 5
+---
+
+# Per-patch Testing
+
+Updated for CSIT git commit id: 72b45cfe662107c8e1bb549df71ba51352a898ee.
+
+A methodology similar to trending analysis is used for comparing performance
+before a DUT code change is merged. This can act as a verify job to disallow
+changes which would decrease performance without a good reason.
+
+## Existing jobs
+
+VPP is the only project currently using such jobs.
+They are not started automatically, must be triggered on demand.
+They allow full tag expressions, but some tags are enforced (such as MRR).
+
+There are jobs available for multiple types of testbeds,
+based on various processors.
+Their Gerrit triggers words are of the form "perftest-{node_arch}"
+where the node_arch combinations currently supported are:
+2n-clx, 2n-tx2, 2n-zn2, 3n-tsh.
+
+## Test selection
+
+Gerrit trigger line without any additional arguments selects
+a small set of test cases to run.
+If additional arguments are added to the Gerrit trigger, they are treated
+as Robot tag expressions to select tests to run.
+While very flexible, this method of test selection also allows the user
+to accidentally select too high number of tests, blocking the testbed for days.
+
+What follows is a list of explanations and recommendations
+to help users to select the minimal set of tests cases.
+
+### Verify cycles
+
+When Gerrit schedules multiple jobs to run for the same patch set,
+it waits until all runs are complete.
+While it is waiting, it is possible to trigger more jobs
+(adding runs to the set Gerrit is waiting for), but it is not possible
+to trigger more runs for the same job, until Gerrit is done waiting.
+After Gerrit is done waiting, it becames possible to trigger
+the same job again.
+
+Example. User triggers one set of tests on 2n-icx and immediately
+also triggers other set of tests on 3n-icx. Then the user notices
+2n-icx run end early because of a typo in tag expression.
+When the user tries to re-trigger 2n-icx (with fixed tag expression),
+that comment gets ignored by Jenkins.
+Only when 3n-icx job finishes, the user can trigger 2n-icx.
+
+### One comment many jobs
+
+In the past, the CSIT code which parses for perftest trigger comments
+was buggy, which lead to bad behavior (as in selection all performance test,
+because "perftest" is also a robot tag) when user included multiple
+perftest trigger words in the same comment.
+
+The worst bugs were fixed since then, but it is still recommended
+to use just one trigger word per Gerrit comment, just to be safe.
+
+### Multiple test cases in run
+
+While Robot supports OR operator, it does not support parentheses,
+so the OR operator is not very useful. It is recommended
+to use space instead of OR operator.
+
+Example template:
+perftest-2n-icx {tag_expression_1} {tag_expression_2}
+
+See below for more concrete examples.
+
+### Suite tags
+
+Traditionally, CSIT maintains broad Robot tags that can be used to select tests.
+
+But it is not recommended to use them for test selection,
+as it is not that easy to determine how many test cases are selected.
+
+The recommended way is to look into CSIT repository first,
+and locate a specific suite the user is interested in,
+and use its suite tag. For example, "ethip4-ip4base" is a suite tag
+selecting just one suite in CSIT git repository,
+avoiding all scale, container, and other simialr variants.
+
+Note that CSIT uses "autogen" code generator,
+so the robot running in Jenkins has access to more suites
+than visible just by looking into CSIT git repository,
+so suite tag is not enough to select even the intended suite,
+and user still probably wants to narrow down
+to a single test case within a suite.
+
+### Fully specified tag expressions
+
+Here is one template to select a single test case:
+{test_type}AND{nic_model}AND{nic_driver}AND{cores}AND{frame_size}AND{suite_tag}
+where the variables are all lower case (so AND operator stands out).
+
+Currently only one test type is supported by the performance comparison jobs:
+"mrr".
+The nic_driver options depend on nic_model. For Intel cards "drv_avf"
+(AVF plugin) and "drv_vfio_pci" (DPDK plugin) are popular, for Mellanox
+"drv_rdma_core". Currently, the performance using "drv_af_xdp" is not reliable
+enough, so do not use it unless you are specifically testing for AF_XDP.
+
+The most popular nic_model is "nic_intel-xxv710", but that is not available
+on all testbed types.
+It is safe to use "1c" for cores (unless you are suspection multi-core
+performance is affected differently) and "64b" for frame size ("78b" for ip6
+and more for dot1q and other encapsulated traffic;
+"1518b" is popular for ipsec and other payload-bound tests).
+
+As there are more test cases than CSIT can periodically test,
+it is possible to encounter an old test case that currently fails.
+To avoid that, you can look at "job spec" files we use for periodic testing,
+for example
+[this one](https://github.com/FDio/csit/blob/master/resources/job_specs/report_iterative/2n-icx/vpp-mrr-00.md).
+
+### Shortening triggers
+
+Advanced users may use the following tricks to avoid writing long trigger
+comments.
+
+Robot supports glob matching, which can be used to select multiple suite tags at
+once.
+
+Not specifying one of 6 parts of the recommended expression pattern
+will select all available options. For example not specifying nic_driver
+for nic_intel-xxv710 will select all 3 applicable drivers.
+You can use NOT operator to reject some options (e.g. NOTdrv_af_xdp),
+but beware, with NOT the order matters:
+tag1ANDtag2NOTtag3 is not the same as tag1NOTtag3ANDtag2,
+the latter is evaluated as tag1AND(NOT(tag3ANDtag2)).
+
+Beware when not specifying nic_model. As a precaution,
+CSIT code will insert the defailt NIC model for the tetsbed used.
+Example: Specifying drv_rdma_core without specifying nic_model
+will fail, as the default nic_model is nic_intel-xxv710
+which does not support RDMA core driver.
+
+### Complete example
+
+A user wants to test a VPP change which may affect load balance whith bonding.
+Searching tag documentation for "bonding" finds LBOND tag and its variants.
+Searching CSIT git repository (directory tests/) finds 8 suite files,
+all suited only for 3-node testbeds.
+All suites are using vhost, but differ by the forwarding app inside VM
+(DPDK or VPP), by the forwarding mode of VPP acting as host level vswitch
+(MAC learning or cross connect), and by the number of DUT1-DUT2 links
+available (1 or 2).
+
+As not all NICs and testbeds offer enogh ports for 2 parallel DUT-DUT links,
+the user looks at
+[testbed specifications](https://github.com/FDio/csit/tree/master/topologies/available)
+and finds that only xxv710 NIC on 3n-icx testbed matches the requirements.
+Quick look into the suites confirm the smallest frame size is 64 bytes
+(despite DOT1Q robot tag, as the encapsulation does not happen on TG-DUT links).
+It is ok to use just 1 physical core, as 3n-icx has hyperthreading enabled,
+so VPP vswitch will use 2 worker threads.
+
+The user decides the vswitch forwarding mode is not important
+(so choses cross connect as that has less CPU overhead),
+but wants to test both NIC drivers (not AF_XDP), both apps in VM,
+and both 1 and 2 parallel links.
+
+After shortening, this is the trigger comment fianlly used:
+perftest-3n-icx mrrANDnic_intel-x710AND1cAND64bAND?lbvpplacp-dot1q-l2xcbase-eth-2vhostvr1024-1vm*NOTdrv_af_xdp
+
+## Basic operation
+
+The job builds VPP .deb packages for both the patch under test
+(called "current") and its parent patch (called "parent").
+
+For each test (from a set defined by tag expression),
+both builds are subjected to several trial measurements (BMRR).
+Measured samples are grouped to "parent" sequence,
+followed by "current" sequence. The same Minimal Description Length
+algorithm as in trending is used to decide whether it is one big group,
+or two smaller gropus. If it is one group, a "normal" result
+is declared for the test. If it is two groups, and current average
+is less then parent average, the test is declared a regression.
+If it is two groups and current average is larger or equal,
+the test is declared a progression.
+
+The whole job fails (giving -1) if some trial measurement failed,
+or if any test was declared a regression.
+
+## Temporary specifics
+
+The Minimal Description Length analysis is performed by
+CSIT code equivalent to jumpavg-0.1.3 library available on PyPI.
+
+In hopes of strengthening of signal (code performance) compared to noise
+(all other factors influencing the measured values), several workarounds
+are applied.
+
+In contrast to trending, trial duration is set to 10 seconds,
+and only 5 samples are measured for each build.
+Both parameters are set in ci-management.
+
+This decreases sensitivity to regressions, but also decreases
+probability of false positives.
+
+## Console output
+
+The following information as visible towards the end of Jenkins console output,
+repeated for each analyzed test.
+
+The original 5 values are visible in order they were measured.
+The 5 values after processing are also visible in output,
+this time sorted by value (so people can see minimum and maximum).
+
+The next output is difference of averages. It is the current average
+minus the parent average, expressed as percentage of the parent average.
+
+The next three outputs contain the jumpavg representation
+of the two groups and a combined group.
+Here, "bits" is the description length; for "current" sequence
+it includes effect from "parent" average value
+(jumpavg-0.1.3 penalizes sequences with too close averages).
+
+Next, a sentence describing which grouping description is shorter,
+and by how much bits.
+Finally, the test result classification is visible.
+
+The algorithm does not track test case names,
+so test cases are indexed (from 0).
diff --git a/docs/content/methodology/per_thread_resources.md b/docs/content/methodology/per_thread_resources.md
deleted file mode 100644
index cd862fa824..0000000000
--- a/docs/content/methodology/per_thread_resources.md
+++ /dev/null
@@ -1,102 +0,0 @@
----
-title: "Per Thread Resources"
-weight: 2
----
-
-# Per Thread Resources
-
-CSIT test framework is managing mapping of the following resources per
-thread:
-
-1. Cores, physical cores (pcores) allocated as pairs of sibling logical cores
-   (lcores) if server in HyperThreading/SMT mode, or as single lcores
-   if server not in HyperThreading/SMT mode. Note that if server's
-   processors are running in HyperThreading/SMT mode sibling lcores are
-   always used.
-2. Receive Queues (RxQ), packet receive queues allocated on each
-   physical and logical interface tested.
-3. Transmit Queues(TxQ), packet transmit queues allocated on each
-   physical and logical interface tested.
-
-Approach to mapping per thread resources depends on the application/DUT
-tested (VPP or DPDK apps) and associated thread types, as follows:
-
-1. Data-plane workers, used for data-plane packet processing, when no
-   feature workers present.
-
-   - Cores: data-plane workers are typically tested in 1, 2 and 4 pcore
-     configurations, running on single lcore per pcore or on sibling
-     lcores per pcore. Result is a set of {T}t{C}c thread-core
-     configurations, where{T} stands for a total number of threads
-     (lcores), and {C} for a total number of pcores. Tested
-     configurations are encoded in CSIT test case names,
-     e.g. "1c", "2c", "4c", and test tags "2T1C"(or "1T1C"), "4T2C"
-     (or "2T2C"), "8T4C" (or "4T4C").
-   - Interface Receive Queues (RxQ): as of CSIT-2106 release, number of
-     RxQs used on each physical or virtual interface is equal to the
-     number of data-plane workers. In other words each worker has a
-     dedicated RxQ on each interface tested. This ensures packet
-     processing load to be equal for each worker, subject to RSS flow
-     load balancing efficacy. Note: Before CSIT-2106 total number of
-     RxQs across all interfaces of specific type was equal to the
-     number of data-plane workers.
-   - Interface Transmit Queues (TxQ): number of TxQs used on each
-     physical or virtual interface is equal to the number of data-plane
-     workers. In other words each worker has a dedicated TxQ on each
-     interface tested.
-   - Applies to VPP and DPDK Testpmd and L3Fwd.
-
-2. Data-plane and feature workers (e.g. IPsec async crypto workers), the
-   latter dedicated to specific feature processing.
-
-   - Cores: data-plane and feature workers are tested in 2, 3 and 4
-     pcore configurations, running on single lcore per pcore or on
-     sibling lcores per pcore. This results in a two sets of
-     thread-core combinations separated by "-", {T}t{C}c-{T}t{C}c, with
-     the leading set denoting total number of threads (lcores) and
-     pcores used for data-plane workers, and the trailing set denoting
-     total number of lcores and pcores used for feature workers.
-     Accordingly, tested configurations are encoded in CSIT test case
-     names, e.g. "1c-1c", "1c-2c", "1c-3c", and test tags "2T1C_2T1C"
-     (or "1T1C_1T1C"), "2T1C_4T2C"(or "1T1C_2T2C"), "2T1C_6T3C"
-     (or "1T1C_3T3C").
-   - RxQ and TxQ: no RxQs and no TxQs are used by feature workers.
-   - Applies to VPP only.
-
-3. Management/main worker, control plane and management.
-
-   - Cores: single lcore.
-   - RxQ: not used (VPP default behaviour).
-   - TxQ: single TxQ per interface, allocated but not used
-     (VPP default behaviour).
-   - Applies to VPP only.
-
-## VPP Thread Configuration
-
-Mapping of cores and RxQs to VPP data-plane worker threads is done in
-the VPP startup.conf during test suite setup:
-
-1. `corelist-workers <list_of_cores>`: List of logical cores to run VPP
-   data-plane workers and feature workers. The actual lcores'
-   allocations depends on HyperThreading/SMT server configuration and
-   per test core configuration.
-
-   - For tests without feature workers, by default, all CPU cores
-     configured in startup.conf are used for data-plane workers.
-   - For tests with feature workers, CSIT code distributes lcores across
-     data-plane and feature workers.
-
-2. `num-rx-queues <value>`: Number of Rx queues used per interface.
-
-Mapping of TxQs to VPP data-plane worker threads uses the default VPP
-setting of one TxQ per interface per data-plane worker.
-
-## DPDK Thread Configuration
-
-Mapping of cores and RxQs to DPDK Testpmd/L3Fwd data-plane worker
-threads is done in the startup CLI:
-
-1. `-l <list_of_cores>` - List of logical cores to run DPDK
-   application.
-2. `nb-cores=<N>` - Number of forwarding cores.
-3. `rxq=<N>` - Number of Rx queues used per interface.
diff --git a/docs/content/methodology/reconfiguration_tests.md b/docs/content/methodology/reconfiguration_tests.md
deleted file mode 100644
index 837535526d..0000000000
--- a/docs/content/methodology/reconfiguration_tests.md
+++ /dev/null
@@ -1,68 +0,0 @@
----
-title: "Reconfiguration Tests"
-weight: 16
----
-
-# Reconfiguration Tests
-
-## Overview
-
-Reconf tests are designed to measure the impact of VPP re-configuration
-on data plane traffic.
-While VPP takes some measures against the traffic being
-entirely stopped for a prolonged time,
-the immediate forwarding rate varies during the re-configuration,
-as some configurations steps need the active dataplane worker threads
-to be stopped temporarily.
-
-As the usual methods of measuring throughput need multiple trial measurements
-with somewhat long durations, and the re-configuration process can also be long,
-finding an offered load which would result in zero loss
-during the re-configuration process would be time-consuming.
-
-Instead, reconf tests first find a througput value (lower bound for NDR)
-without re-configuration, and then maintain that ofered load
-during re-configuration. The measured loss count is then assumed to be caused
-by the re-configuration process. The result published by reconf tests
-is the effective blocked time, that is
-the loss count divided by the offered load.
-
-## Current Implementation
-
-Each reconf suite is based on a similar MLRsearch performance suite.
-
-MLRsearch parameters are changed to speed up the throughput discovery.
-For example, PDR is not searched for, and the final trial duration is shorter.
-
-The MLRsearch suite has to contain a configuration parameter
-that can be scaled up, e.g. number of tunnels or number of service chains.
-Currently, only increasing the scale is supported
-as the re-configuration operation. In future, scale decrease
-or other operations can be implemented.
-
-The traffic profile is not changed, so the traffic present is processed
-only by the smaller scale configuration. The added tunnels / chains
-are not targetted by the traffic.
-
-For the re-configuration, the same Robot Framework and Python libraries
-are used, as were used in the initial configuration, with the exception
-of the final calls that do not interact with VPP (e.g. starting
-virtual machines) being skipped to reduce the test overall duration.
-
-## Discussion
-
-Robot Framework introduces a certain overhead, which may affect timing
-of individual VPP API calls, which in turn may affect
-the number of packets lost.
-
-The exact calls executed may contain unnecessary info dumps, repeated commands,
-or commands which change a value that do not need to be changed (e.g. MTU).
-Thus, implementation details are affecting the results, even if their effect
-on the corresponding MLRsearch suite is negligible.
-
-The lower bound for NDR is the only value safe to be used when zero packets lost
-are expected without re-configuration. But different suites show different
-"jitter" in that value. For some suites, the lower bound is not tight,
-allowing full NIC buffers to drain quickly between worker pauses.
-For other suites, lower bound for NDR still has quite a large probability
-of non-zero packet loss even without re-configuration.
diff --git a/docs/content/methodology/root_cause_analysis/_index.md b/docs/content/methodology/root_cause_analysis/_index.md
deleted file mode 100644
index 79cfe73769..0000000000
--- a/docs/content/methodology/root_cause_analysis/_index.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-bookCollapseSection: true
-bookFlatSection: false
-title: "Root Cause Analysis"
-weight: 20
----
\ No newline at end of file
diff --git a/docs/content/methodology/root_cause_analysis/perpatch_performance_tests.md b/docs/content/methodology/root_cause_analysis/perpatch_performance_tests.md
deleted file mode 100644
index 900ea0b874..0000000000
--- a/docs/content/methodology/root_cause_analysis/perpatch_performance_tests.md
+++ /dev/null
@@ -1,228 +0,0 @@
----
-title: "Per-patch performance tests"
-weight: 1
----
-
-# Per-patch performance tests
-
-Updated for CSIT git commit id: 72b45cfe662107c8e1bb549df71ba51352a898ee.
-
-A methodology similar to trending analysis is used for comparing performance
-before a DUT code change is merged. This can act as a verify job to disallow
-changes which would decrease performance without a good reason.
-
-## Existing jobs
-
-VPP is the only project currently using such jobs.
-They are not started automatically, must be triggered on demand.
-They allow full tag expressions, but some tags are enforced (such as MRR).
-
-There are jobs available for multiple types of testbeds,
-based on various processors.
-Their Gerrit triggers words are of the form "perftest-{node_arch}"
-where the node_arch combinations currently supported are:
-2n-clx, 2n-tx2, 2n-zn2, 3n-tsh.
-
-## Test selection
-
-Gerrit trigger line without any additional arguments selects
-a small set of test cases to run.
-If additional arguments are added to the Gerrit trigger, they are treated
-as Robot tag expressions to select tests to run.
-While very flexible, this method of test selection also allows the user
-to accidentally select too high number of tests, blocking the testbed for days.
-
-What follows is a list of explanations and recommendations
-to help users to select the minimal set of tests cases.
-
-### Verify cycles
-
-When Gerrit schedules multiple jobs to run for the same patch set,
-it waits until all runs are complete.
-While it is waiting, it is possible to trigger more jobs
-(adding runs to the set Gerrit is waiting for), but it is not possible
-to trigger more runs for the same job, until Gerrit is done waiting.
-After Gerrit is done waiting, it becames possible to trigger
-the same job again.
-
-Example. User triggers one set of tests on 2n-icx and immediately
-also triggers other set of tests on 3n-icx. Then the user notices
-2n-icx run end early because of a typo in tag expression.
-When the user tries to re-trigger 2n-icx (with fixed tag expression),
-that comment gets ignored by Jenkins.
-Only when 3n-icx job finishes, the user can trigger 2n-icx.
-
-### One comment many jobs
-
-In the past, the CSIT code which parses for perftest trigger comments
-was buggy, which lead to bad behavior (as in selection all performance test,
-because "perftest" is also a robot tag) when user included multiple
-perftest trigger words in the same comment.
-
-The worst bugs were fixed since then, but it is still recommended
-to use just one trigger word per Gerrit comment, just to be safe.
-
-### Multiple test cases in run
-
-While Robot supports OR operator, it does not support parentheses,
-so the OR operator is not very useful. It is recommended
-to use space instead of OR operator.
-
-Example template:
-perftest-2n-icx {tag_expression_1} {tag_expression_2}
-
-See below for more concrete examples.
-
-### Suite tags
-
-Traditionally, CSIT maintains broad Robot tags that can be used to select tests.
-
-But it is not recommended to use them for test selection,
-as it is not that easy to determine how many test cases are selected.
-
-The recommended way is to look into CSIT repository first,
-and locate a specific suite the user is interested in,
-and use its suite tag. For example, "ethip4-ip4base" is a suite tag
-selecting just one suite in CSIT git repository,
-avoiding all scale, container, and other simialr variants.
-
-Note that CSIT uses "autogen" code generator,
-so the robot running in Jenkins has access to more suites
-than visible just by looking into CSIT git repository,
-so suite tag is not enough to select even the intended suite,
-and user still probably wants to narrow down
-to a single test case within a suite.
-
-### Fully specified tag expressions
-
-Here is one template to select a single test case:
-{test_type}AND{nic_model}AND{nic_driver}AND{cores}AND{frame_size}AND{suite_tag}
-where the variables are all lower case (so AND operator stands out).
-
-Currently only one test type is supported by the performance comparison jobs:
-"mrr".
-The nic_driver options depend on nic_model. For Intel cards "drv_avf" (AVF plugin)
-and "drv_vfio_pci" (DPDK plugin) are popular, for Mellanox "drv_rdma_core".
-Currently, the performance using "drv_af_xdp" is not reliable enough, so do not use it
-unless you are specifically testing for AF_XDP.
-
-The most popular nic_model is "nic_intel-xxv710", but that is not available
-on all testbed types.
-It is safe to use "1c" for cores (unless you are suspection multi-core performance
-is affected differently) and "64b" for frame size ("78b" for ip6
-and more for dot1q and other encapsulated traffic;
-"1518b" is popular for ipsec and other payload-bound tests).
-
-As there are more test cases than CSIT can periodically test,
-it is possible to encounter an old test case that currently fails.
-To avoid that, you can look at "job spec" files we use for periodic testing,
-for example
-[this one](https://github.com/FDio/csit/blob/master/resources/job_specs/report_iterative/2n-icx/vpp-mrr-00.md).
-
-### Shortening triggers
-
-Advanced users may use the following tricks to avoid writing long trigger comments.
-
-Robot supports glob matching, which can be used to select multiple suite tags at once.
-
-Not specifying one of 6 parts of the recommended expression pattern
-will select all available options. For example not specifying nic_driver
-for nic_intel-xxv710 will select all 3 applicable drivers.
-You can use NOT operator to reject some options (e.g. NOTdrv_af_xdp),
-but beware, with NOT the order matters:
-tag1ANDtag2NOTtag3 is not the same as tag1NOTtag3ANDtag2,
-the latter is evaluated as tag1AND(NOT(tag3ANDtag2)).
-
-Beware when not specifying nic_model. As a precaution,
-CSIT code will insert the defailt NIC model for the tetsbed used.
-Example: Specifying drv_rdma_core without specifying nic_model
-will fail, as the default nic_model is nic_intel-xxv710
-which does not support RDMA core driver.
-
-### Complete example
-
-A user wants to test a VPP change which may affect load balance whith bonding.
-Searching tag documentation for "bonding" finds LBOND tag and its variants.
-Searching CSIT git repository (directory tests/) finds 8 suite files,
-all suited only for 3-node testbeds.
-All suites are using vhost, but differ by the forwarding app inside VM
-(DPDK or VPP), by the forwarding mode of VPP acting as host level vswitch
-(MAC learning or cross connect), and by the number of DUT1-DUT2 links
-available (1 or 2).
-
-As not all NICs and testbeds offer enogh ports for 2 parallel DUT-DUT links,
-the user looks at
-[testbed specifications](https://github.com/FDio/csit/tree/master/topologies/available)
-and finds that only xxv710 NIC on 3n-icx testbed matches the requirements.
-Quick look into the suites confirm the smallest frame size is 64 bytes
-(despite DOT1Q robot tag, as the encapsulation does not happen on TG-DUT links).
-It is ok to use just 1 physical core, as 3n-icx has hyperthreading enabled,
-so VPP vswitch will use 2 worker threads.
-
-The user decides the vswitch forwarding mode is not important
-(so choses cross connect as that has less CPU overhead),
-but wants to test both NIC drivers (not AF_XDP), both apps in VM,
-and both 1 and 2 parallel links.
-
-After shortening, this is the trigger comment fianlly used:
-perftest-3n-icx mrrANDnic_intel-x710AND1cAND64bAND?lbvpplacp-dot1q-l2xcbase-eth-2vhostvr1024-1vm*NOTdrv_af_xdp
-
-## Basic operation
-
-The job builds VPP .deb packages for both the patch under test
-(called "current") and its parent patch (called "parent").
-
-For each test (from a set defined by tag expression),
-both builds are subjected to several trial measurements (BMRR).
-Measured samples are grouped to "parent" sequence,
-followed by "current" sequence. The same Minimal Description Length
-algorithm as in trending is used to decide whether it is one big group,
-or two smaller gropus. If it is one group, a "normal" result
-is declared for the test. If it is two groups, and current average
-is less then parent average, the test is declared a regression.
-If it is two groups and current average is larger or equal,
-the test is declared a progression.
-
-The whole job fails (giving -1) if some trial measurement failed,
-or if any test was declared a regression.
-
-## Temporary specifics
-
-The Minimal Description Length analysis is performed by
-CSIT code equivalent to jumpavg-0.1.3 library available on PyPI.
-
-In hopes of strengthening of signal (code performance) compared to noise
-(all other factors influencing the measured values), several workarounds
-are applied.
-
-In contrast to trending, trial duration is set to 10 seconds,
-and only 5 samples are measured for each build.
-Both parameters are set in ci-management.
-
-This decreases sensitivity to regressions, but also decreases
-probability of false positives.
-
-## Console output
-
-The following information as visible towards the end of Jenkins console output,
-repeated for each analyzed test.
-
-The original 5 values are visible in order they were measured.
-The 5 values after processing are also visible in output,
-this time sorted by value (so people can see minimum and maximum).
-
-The next output is difference of averages. It is the current average
-minus the parent average, expressed as percentage of the parent average.
-
-The next three outputs contain the jumpavg representation
-of the two groups and a combined group.
-Here, "bits" is the description length; for "current" sequence
-it includes effect from "parent" average value
-(jumpavg-0.1.3 penalizes sequences with too close averages).
-
-Next, a sentence describing which grouping description is shorter,
-and by how much bits.
-Finally, the test result classification is visible.
-
-The algorithm does not track test case names,
-so test cases are indexed (from 0).
diff --git a/docs/content/methodology/suite_generation.md b/docs/content/methodology/suite_generation.md
deleted file mode 100644
index 4fa9dee0ce..0000000000
--- a/docs/content/methodology/suite_generation.md
+++ /dev/null
@@ -1,124 +0,0 @@
----
-title: "Suite Generation"
-weight: 19
----
-
-# Suite Generation
-
-CSIT uses robot suite files to define tests.
-However, not all suite files available for Jenkins jobs
-(or manually started bootstrap scripts) are present in CSIT git repository.
-They are generated only when needed.
-
-## Autogen Library
-
-There is a code generation layer implemented as Python library called "autogen",
-called by various bash scripts.
-
-It generates the full extent of CSIT suites, using the ones in git as templates.
-
-## Sources
-
-The generated suites (and their contents) are affected by multiple information
-sources, listed below.
-
-### Git Suites
-
-The suites present in git repository act as templates for generating suites.
-One of autogen design principles is that any template suite should also act
-as a full suite (no placeholders).
-
-In practice, autogen always re-creates the template suite with exactly
-the same content, it is one of checks that autogen works correctly.
-
-### Regenerate Script
-
-Not all suites present in CSIT git repository act as template for autogen.
-The distinction is on per-directory level. Directories with
-regenerate_testcases.py script usually consider all suites as templates
-(unless possibly not included by the glob patten in the script).
-
-The script also specifies minimal frame size, indirectly, by specifying protocol
-(protocol "ip4" is the default, leading to 64B frame size).
-
-### Constants
-
-Values in Constants.py are taken into consideration when generating suites.
-The values are mostly related to different NIC models and NIC drivers.
-
-### Python Code
-
-Python code in resources/libraries/python/autogen contains several other
-information sources.
-
-#### Testcase Templates
-
-The test case part of template suite is ignored, test case lines
-are created according to text templates in Testcase.py file.
-
-#### Testcase Argument Lists
-
-Each testcase template has different number of "arguments", e.g. values
-to put into various placeholders. Different test types need different
-lists of the argument values, the lists are in regenerate_glob method
-in Regenerator.py file.
-
-#### Iteration Over Values
-
-Python code detects the test type (usually by substrings of suite file name),
-then iterates over different quantities based on type.
-For example, only ndrpdr suite templates generate other types (mrr and soak).
-
-#### Hardcoded Exclusions
-
-Some combinations of values are known not to work, so they are excluded.
-Examples: Density tests for too much CPUs; IMIX for ASTF.
-
-## Non-Sources
-
-Some information sources are available in CSIT repository,
-but do not affect the suites generated by autogen.
-
-### Testbeds
-
-Overall, no information visible in topology yaml files is taken into account
-by autogen.
-
-#### Testbed Architecture
-
-Historically, suite files are agnostic to testbed architecture, e.g. ICX or ALT.
-
-#### Testbed Size
-
-Historically, 2-node and 3-node suites have diferent names, and while
-most of the code is common, the differences are not always simple enough.
-Autogen treat 2-node and 3-node suites as independent templates.
-
-TRex suites are intended for a 1-node circuit of otherwise 2-node or 3-node
-testbeds, so they support all 3 robot tags.
-They are also detected and treated differently by autogen,
-mainly because they need different testcase arguments (no CPU count).
-Autogen does nothing specifically related to the fact they should run
-only in testbeds/NICs with TG-TG line available.
-
-#### Other Topology Info
-
-Some bonding tests need two (parallel) links between DUTs.
-Autogen does not care, as suites are agnostic.
-Robot tag marks the difference, but the link presence is not explicitly checked.
-
-### Job specs
-
-Information in job spec files depend on generated suites (not the other way).
-Autogen should generate more suites, as job spec is limited by time budget.
-More suites should be available for manually triggered verify jobs,
-so autogen covers that.
-
-### Bootstrap Scripts
-
-Historically, bootstrap scripts perform some logic,
-perhaps adding exclusion options to Robot invocation
-(e.g. skipping testbed+NIC combinations for tests that need parallel links).
-
-Once again, the logic here relies on what autogen generates,
-autogen does not look into bootstrap scripts.
diff --git a/docs/content/methodology/telemetry.md b/docs/content/methodology/telemetry.md
deleted file mode 100644
index e7a2571573..0000000000
--- a/docs/content/methodology/telemetry.md
+++ /dev/null
@@ -1,167 +0,0 @@
----
-title: "Telemetry"
-weight: 20
----
-
-# Telemetry
-
-OpenMetrics specifies the de-facto standard for transmitting cloud-native
-metrics at scale, with support for both text representation and Protocol
-Buffers.
-
-## RFC
-
-- RFC2119
-- RFC5234
-- RFC8174
-- draft-richih-opsawg-openmetrics-00
-
-## Reference
-
-[OpenMetrics](https://github.com/OpenObservability/OpenMetrics/blob/master/specification/OpenMetrics.md)
-
-## Metric Types
-
-- Gauge
-- Counter
-- StateSet
-- Info
-- Histogram
-- GaugeHistogram
-- Summary
-- Unknown
-
-Telemetry module in CSIT currently support only Gauge, Counter and Info.
-
-## Anatomy of CSIT telemetry implementation
-
-Existing implementation consists of several measurment building blocks:
-the main measuring block running search algorithms (MLR, PLR, SOAK, MRR, ...),
-the latency measuring block and the several telemetry blocks with or without
-traffic running on a background.
-
-The main measuring block must not be interrupted by any read operation that can
-impact data plane traffic processing during throughput search algorithm. Thus
-operational reads are done before (pre-stat) and after (post-stat) that block.
-
-Some operational reads must be done while traffic is running and usually
-consists of two reads (pre-run-stat, post-run-stat) with defined delay between
-them.
-
-## MRR measurement
-
-  traffic_start(r=mrr)               traffic_stop       |<     measure     >|
-    |                                  |                |      (r=mrr)      |
-    |   pre_run_stat   post_run_stat   |    pre_stat    |                   |  post_stat
-    |        |               |         |       |        |                   |      |
-  --o--------o---------------o---------o-------o--------+-------------------+------o------------>
-                                                                                              t
-
-  Legend:
-    - pre_run_stat
-      - vpp-clear-runtime
-    - post_run_stat
-      - vpp-show-runtime
-      - bash-perf-stat            // if extended_debug == True
-    - pre_stat
-      - vpp-clear-stats
-      - vpp-enable-packettrace    // if extended_debug == True
-      - vpp-enable-elog
-    - post_stat
-      - vpp-show-stats
-      - vpp-show-packettrace      // if extended_debug == True
-      - vpp-show-elog
-
-
-    |<                                measure                                 >|
-    |                                 (r=mrr)                                  |
-    |                                                                          |
-    |<    traffic_trial0    >|<    traffic_trial1    >|<    traffic_trialN    >|
-    |    (i=0,t=duration)    |    (i=1,t=duration)    |    (i=N,t=duration)    |
-    |                        |                        |                        |
-  --o------------------------o------------------------o------------------------o--->
-                                                                                 t
-
-
-## MLR measurement
-
-    |<     measure     >|   traffic_start(r=pdr)               traffic_stop   traffic_start(r=ndr)               traffic_stop  |< [    latency    ] >|
-    |      (r=mlr)      |    |                                  |              |                                  |            |     .9/.5/.1/.0     |
-    |                   |    |   pre_run_stat   post_run_stat   |              |   pre_run_stat   post_run_stat   |            |                     |
-    |                   |    |        |               |         |              |        |               |         |            |                     |
-  --+-------------------+----o--------o---------------o---------o--------------o--------o---------------o---------o------------[---------------------]--->
-                                                                                                                                                       t
-
-  Legend:
-    - pre_run_stat
-      - vpp-clear-runtime
-    - post_run_stat
-      - vpp-show-runtime
-      - bash-perf-stat          // if extended_debug == True
-    - pre_stat
-      - vpp-clear-stats
-      - vpp-enable-packettrace  // if extended_debug == True
-      - vpp-enable-elog
-    - post_stat
-      - vpp-show-stats
-      - vpp-show-packettrace    // if extended_debug == True
-      - vpp-show-elog
-
-
-## MRR measurement
-
-    traffic_start(r=mrr)               traffic_stop                 |<     measure     >|
-      |                                  |                          |      (r=mrr)      |
-      |   |<      stat_runtime      >|   |          stat_pre_trial  |                   |  stat_post_trial
-      |   |                          |   |             |            |                   |     |
-  ----o---+--------------------------+---o-------------o------------+-------------------+-----o------------->
-                                                                                                          t
-
-  Legend:
-    - stat_runtime
-      - vpp-runtime
-    - stat_pre_trial
-      - vpp-clear-stats
-      - vpp-enable-packettrace  // if extended_debug == True
-    - stat_post_trial
-      - vpp-show-stats
-      - vpp-show-packettrace    // if extended_debug == True
-
-
-    |<                                measure                                 >|
-    |                                 (r=mrr)                                  |
-    |                                                                          |
-    |<    traffic_trial0    >|<    traffic_trial1    >|<    traffic_trialN    >|
-    |    (i=0,t=duration)    |    (i=1,t=duration)    |    (i=N,t=duration)    |
-    |                        |                        |                        |
-  --o------------------------o------------------------o------------------------o--->
-                                                                                 t
-
-
-    |<                              stat_runtime                              >|
-    |                                                                          |
-    |<       program0       >|<       program1       >|<       programN       >|
-    |       (@=params)       |       (@=params)       |       (@=params)       |
-    |                        |                        |                        |
-  --o------------------------o------------------------o------------------------o--->
-                                                                                 t
-
-
-## MLR measurement
-
-    |<     measure     >|   traffic_start(r=pdr)               traffic_stop   traffic_start(r=ndr)               traffic_stop  |< [    latency    ] >|
-    |      (r=mlr)      |     |                                  |              |                                  |           |     .9/.5/.1/.0     |
-    |                   |     |   |<      stat_runtime      >|   |              |   |<      stat_runtime      >|   |           |                     |
-    |                   |     |   |                          |   |              |   |                          |   |           |                     |
-  --+-------------------+-----o---+--------------------------+---o--------------o---+--------------------------+---o-----------[---------------------]--->
-                                                                                                                                                       t
-
-  Legend:
-    - stat_runtime
-      - vpp-runtime
-    - stat_pre_trial
-      - vpp-clear-stats
-      - vpp-enable-packettrace  // if extended_debug == True
-    - stat_post_trial
-      - vpp-show-stats
-      - vpp-show-packettrace    // if extended_debug == True
diff --git a/docs/content/methodology/terminology.md b/docs/content/methodology/terminology.md
deleted file mode 100644
index 229db7d145..0000000000
--- a/docs/content/methodology/terminology.md
+++ /dev/null
@@ -1,82 +0,0 @@
----
-title: "Terminology"
-weight: 1
----
-
-# Terminology
-
-- **Frame size**: size of an Ethernet Layer-2 frame on the wire, including
-  any VLAN tags (dot1q, dot1ad) and Ethernet FCS, but excluding Ethernet
-  preamble and inter-frame gap. Measured in Bytes.
-- **Packet size**: same as frame size, both terms used interchangeably.
-- **Inner L2 size**: for tunneled L2 frames only, size of an encapsulated
-  Ethernet Layer-2 frame, preceded with tunnel header, and followed by
-  tunnel trailer. Measured in Bytes.
-- **Inner IP size**: for tunneled IP packets only, size of an encapsulated
-  IPv4 or IPv6 packet, preceded with tunnel header, and followed by
-  tunnel trailer. Measured in Bytes.
-- **Device Under Test (DUT)**: In software networking, "device" denotes a
-  specific piece of software tasked with packet processing. Such device
-  is surrounded with other software components (such as operating system
-  kernel). It is not possible to run devices without also running the
-  other components, and hardware resources are shared between both. For
-  purposes of testing, the whole set of hardware and software components
-  is called "System Under Test" (SUT). As SUT is the part of the whole
-  test setup performance of which can be measured with RFC2544, using
-  SUT instead of RFC2544 DUT. Device under test
-  (DUT) can be re-introduced when analyzing test results using whitebox
-  techniques, but this document sticks to blackbox testing.
-- **System Under Test (SUT)**: System under test (SUT) is a part of the
-  whole test setup whose performance is to be benchmarked. The complete
-  methodology contains other parts, whose performance is either already
-  established, or not affecting the benchmarking result.
-- **Bi-directional throughput tests**: involve packets/frames flowing in
-  both east-west and west-east directions over every tested interface of
-  SUT/DUT. Packet flow metrics are measured per direction, and can be
-  reported as aggregate for both directions (i.e. throughput) and/or
-  separately for each measured direction (i.e. latency). In most cases
-  bi-directional tests use the same (symmetric) load in both directions.
-- **Uni-directional throughput tests**: involve packets/frames flowing in
-  only one direction, i.e. either east-west or west-east direction, over
-  every tested interface of SUT/DUT. Packet flow metrics are measured
-  and are reported for measured direction.
-- **Packet Loss Ratio (PLR)**: ratio of packets received relative to packets
-  transmitted over the test trial duration, calculated using formula:
-  PLR = ( pkts_transmitted - pkts_received ) / pkts_transmitted.
-  For bi-directional throughput tests aggregate PLR is calculated based
-  on the aggregate number of packets transmitted and received.
-- **Packet Throughput Rate**: maximum packet offered load DUT/SUT forwards
-  within the specified Packet Loss Ratio (PLR). In many cases the rate
-  depends on the frame size processed by DUT/SUT. Hence packet
-  throughput rate MUST be quoted with specific frame size as received by
-  DUT/SUT during the measurement. For bi-directional tests, packet
-  throughput rate should be reported as aggregate for both directions.
-  Measured in packets-per-second (pps) or frames-per-second (fps),
-  equivalent metrics.
-- **Bandwidth Throughput Rate**: a secondary metric calculated from packet
-  throughput rate using formula: bw_rate = pkt_rate * (frame_size +
-  L1_overhead) * 8, where L1_overhead for Ethernet includes preamble (8
-  Bytes) and inter-frame gap (12 Bytes). For bi-directional tests,
-  bandwidth throughput rate should be reported as aggregate for both
-  directions. Expressed in bits-per-second (bps).
-- **Non Drop Rate (NDR)**: maximum packet/bandwith throughput rate sustained
-  by DUT/SUT at PLR equal zero (zero packet loss) specific to tested
-  frame size(s). MUST be quoted with specific packet size as received by
-  DUT/SUT during the measurement. Packet NDR measured in
-  packets-per-second (or fps), bandwidth NDR expressed in
-  bits-per-second (bps).
-- **Partial Drop Rate (PDR)**: maximum packet/bandwith throughput rate
-  sustained by DUT/SUT at PLR greater than zero (non-zero packet loss)
-  specific to tested frame size(s). MUST be quoted with specific packet
-  size as received by DUT/SUT during the measurement. Packet PDR
-  measured in packets-per-second (or fps), bandwidth PDR expressed in
-  bits-per-second (bps).
-- **Maximum Receive Rate (MRR)**: packet/bandwidth rate regardless of PLR
-  sustained by DUT/SUT under specified Maximum Transmit Rate (MTR)
-  packet load offered by traffic generator. MUST be quoted with both
-  specific packet size and MTR as received by DUT/SUT during the
-  measurement. Packet MRR measured in packets-per-second (or fps),
-  bandwidth MRR expressed in bits-per-second (bps).
-- **Trial**: a single measurement step.
-- **Trial duration**: amount of time over which packets are transmitted and
-  received in a single measurement step.
diff --git a/docs/content/methodology/test/_index.md b/docs/content/methodology/test/_index.md
new file mode 100644
index 0000000000..857cc7b168
--- /dev/null
+++ b/docs/content/methodology/test/_index.md
@@ -0,0 +1,6 @@
+---
+bookCollapseSection: true
+bookFlatSection: false
+title: "Test"
+weight: 3
+---
diff --git a/docs/content/methodology/test/access_control_lists.md b/docs/content/methodology/test/access_control_lists.md
new file mode 100644
index 0000000000..354e6b72bb
--- /dev/null
+++ b/docs/content/methodology/test/access_control_lists.md
@@ -0,0 +1,66 @@
+---
+title: "Access Control Lists"
+weight: 5
+---
+
+# Access Control Lists
+
+VPP is tested in a number of data plane feature configurations across
+different forwarding modes. Following sections list features tested.
+
+## ACL Security-Groups
+
+Both stateless and stateful access control lists (ACL), also known as
+security-groups, are supported by VPP.
+
+Following ACL configurations are tested for MAC switching with L2
+bridge-domains:
+
+- *l2bdbasemaclrn-iacl{E}sl-{F}flows*: Input stateless ACL, with {E}
+  entries and {F} flows.
+- *l2bdbasemaclrn-oacl{E}sl-{F}flows*: Output stateless ACL, with {E}
+  entries and {F} flows.
+- *l2bdbasemaclrn-iacl{E}sf-{F}flows*: Input stateful ACL, with {E}
+  entries and {F} flows.
+- *l2bdbasemaclrn-oacl{E}sf-{F}flows*: Output stateful ACL, with {E}
+  entries and {F} flows.
+
+Following ACL configurations are tested with IPv4 routing:
+
+- *ip4base-iacl{E}sl-{F}flows*: Input stateless ACL, with {E} entries
+  and {F} flows.
+- *ip4base-oacl{E}sl-{F}flows*: Output stateless ACL, with {E} entries
+  and {F} flows.
+- *ip4base-iacl{E}sf-{F}flows*: Input stateful ACL, with {E} entries and
+  {F} flows.
+- *ip4base-oacl{E}sf-{F}flows*: Output stateful ACL, with {E} entries
+  and {F} flows.
+
+ACL tests are executed with the following combinations of ACL entries
+and number of flows:
+
+- ACL entry definitions
+  - flow non-matching deny entry: (src-ip4, dst-ip4, src-port, dst-port).
+  - flow matching permit ACL entry: (src-ip4, dst-ip4).
+- {E} - number of non-matching deny ACL entries, {E} = [1, 10, 50].
+- {F} - number of UDP flows with different tuple (src-ip4, dst-ip4,
+  src-port, dst-port), {F} = [100, 10k, 100k].
+- All {E}x{F} combinations are tested per ACL type, total of 9.
+
+## ACL MAC-IP
+
+MAC-IP binding ACLs are tested for MAC switching with L2 bridge-domains:
+
+- *l2bdbasemaclrn-macip-iacl{E}sl-{F}flows*: Input stateless ACL, with
+  {E} entries and {F} flows.
+
+MAC-IP ACL tests are executed with the following combinations of ACL
+entries and number of flows:
+
+- ACL entry definitions
+  - flow non-matching deny entry: (dst-ip4, dst-mac, bit-mask)
+  - flow matching permit ACL entry: (dst-ip4, dst-mac, bit-mask)
+- {E} - number of non-matching deny ACL entries, {E} = [1, 10, 50]
+- {F} - number of UDP flows with different tuple (dst-ip4, dst-mac),
+  {F} = [100, 10k, 100k]
+- All {E}x{F} combinations are tested per ACL type, total of 9.
diff --git a/docs/content/methodology/test/generic_segmentation_offload.md b/docs/content/methodology/test/generic_segmentation_offload.md
new file mode 100644
index 0000000000..0032d203de
--- /dev/null
+++ b/docs/content/methodology/test/generic_segmentation_offload.md
@@ -0,0 +1,117 @@
+---
+title: "Generic Segmentation Offload"
+weight: 7
+---
+
+# Generic Segmentation Offload
+
+## Overview
+
+Generic Segmentation Offload (GSO) reduces per-packet processing
+overhead by enabling applications  to pass a multi-packet buffer to
+(v)NIC and process a smaller number of large packets (e.g. frame size of
+64 KB), instead of processing higher numbers of small packets (e.g.
+frame size of 1500 B), thus reducing per-packet overhead.
+
+GSO tests for VPP vhostuser and tapv2 interfaces. All tests cases use iPerf3
+client and server applications running TCP/IP as a traffic generator. For
+performance comparison the same tests are run without GSO enabled.
+
+## GSO Test Topologies
+
+Two VPP GSO test topologies are implemented:
+
+1. iPerfC_GSOvirtio_LinuxVM --- GSOvhost_VPP_GSOvhost --- iPerfS_GSOvirtio_LinuxVM
+   - Tests VPP GSO on vhostuser interfaces and interaction with Linux
+     virtio with GSO enabled.
+2. iPerfC_GSOtap_LinuxNspace --- GSOtapv2_VPP_GSOtapv2 --- iPerfS_GSOtap_LinuxNspace
+   - Tests VPP GSO on tapv2 interfaces and interaction with Linux tap
+     with GSO enabled.
+
+Common configuration:
+
+- iPerfC (client) and iPerfS (server) run in TCP/IP mode without upper
+  bandwidth limit.
+- Trial duration is set to 30 sec.
+- iPerfC, iPerfS and VPP run in the single SUT node.
+
+
+## VPP GSOtap Topology
+
+### VPP Configuration
+
+VPP GSOtap tests are executed without using hyperthreading. VPP worker runs on
+a single core. Multi-core tests are not executed. Each interface belongs to
+separate namespace. Following core pinning scheme is used:
+
+- 1t1c (rxq=1, rx_qsz=4096, tx_qsz=4096)
+  - system isolated: 0,28,56,84
+  - vpp mt:  1
+  - vpp wt:  2
+  - vhost:   3-5
+  - iperf-s: 6
+  - iperf-c: 7
+
+### iPerf3 Server Configuration
+
+iPerf3 version used 3.7
+
+    $ sudo -E -S ip netns exec tap1_namespace iperf3 \
+        --server --daemon --pidfile /tmp/iperf3_server.pid \
+        --logfile /tmp/iperf3.log --port 5201 --affinity <X>
+
+For the full iPerf3 reference please see
+[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst).
+
+
+### iPerf3 Client Configuration
+
+iPerf3 version used 3.7
+
+    $ sudo -E -S ip netns exec tap1_namespace iperf3 \
+        --client 2.2.2.2 --bind 1.1.1.1 --port 5201 --parallel <Y> \
+        --time 30.0 --affinity <X> --zerocopy
+
+For the full iPerf3 reference please see
+[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst).
+
+
+## VPP GSOvhost Topology
+
+### VPP Configuration
+
+VPP GSOvhost tests are executed without using hyperthreading. VPP worker runs
+on a single core. Multi-core tests are not executed. Following core pinning
+scheme is used:
+
+- 1t1c (rxq=1, rx_qsz=1024, tx_qsz=1024)
+  - system isolated: 0,28,56,84
+  - vpp mt:  1
+  - vpp wt:  2
+  - vm-iperf-s: 3,4,5,6,7
+  - vm-iperf-c: 8,9,10,11,12
+  - iperf-s: 1
+  - iperf-c: 1
+
+###  iPerf3 Server Configuration
+
+iPerf3 version used 3.7
+
+    $ sudo iperf3 \
+        --server --daemon --pidfile /tmp/iperf3_server.pid \
+        --logfile /tmp/iperf3.log --port 5201 --affinity X
+
+For the full iPerf3 reference please see
+[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst).
+
+
+### iPerf3 Client Configuration
+
+iPerf3 version used 3.7
+
+    $ sudo iperf3 \
+        --client 2.2.2.2 --bind 1.1.1.1 --port 5201 --parallel <Y> \
+        --time 30.0 --affinity X --zerocopy
+
+For the full iPerf3 reference please see
+[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst).
diff --git a/docs/content/methodology/test/hoststack/_index.md b/docs/content/methodology/test/hoststack/_index.md
new file mode 100644
index 0000000000..2ae872c54e
--- /dev/null
+++ b/docs/content/methodology/test/hoststack/_index.md
@@ -0,0 +1,6 @@
+---
+bookCollapseSection: true
+bookFlatSection: false
+title: "Hoststack"
+weight: 6
+---
diff --git a/docs/content/methodology/test/hoststack/quicudpip_with_vppecho.md b/docs/content/methodology/test/hoststack/quicudpip_with_vppecho.md
new file mode 100644
index 0000000000..c7d57a51b3
--- /dev/null
+++ b/docs/content/methodology/test/hoststack/quicudpip_with_vppecho.md
@@ -0,0 +1,48 @@
+---
+title: "QUIC/UDP/IP with vpp_echo"
+weight: 1
+---
+
+# QUIC/UDP/IP with vpp_echo
+
+[vpp_echo performance testing tool](https://wiki.fd.io/view/VPP/HostStack#External_Echo_Server.2FClient_.28vpp_echo.29)
+is a bespoke performance test application which utilizes the 'native
+HostStack APIs' to verify performance and correct handling of
+connection/stream events with uni-directional and bi-directional
+streams of data.
+
+Because iperf3 does not support the QUIC transport protocol, vpp_echo
+is used for measuring the maximum attainable goodput of the VPP Host
+Stack connection utilizing the QUIC transport protocol across two
+instances of VPP running on separate DUT nodes. The QUIC transport
+protocol supports multiple streams per connection and test cases
+utilize different combinations of QUIC connections and number of
+streams per connection.
+
+The test configuration is as follows:
+
+            DUT1               Network                DUT2
+    [ vpp_echo-client -> VPP1 ]=======[ VPP2 -> vpp_echo-server]
+                          N-streams/connection
+
+where,
+
+1. vpp_echo server attaches to VPP2 and LISTENs on VPP2:TCP port 1234.
+2. vpp_echo client creates one or more connections to VPP1 and opens
+   one or more stream per connection to VPP2:TCP port 1234.
+3. vpp_echo client transmits a uni-directional stream as fast as the
+   VPP Host Stack allows to the vpp_echo server for the test duration.
+4. At the end of the test the vpp_echo client emits the goodput
+   measurements for all streams and the sum of all streams.
+
+Test cases include
+
+1. 1 QUIC Connection with 1 Stream
+2. 1 QUIC connection with 10 Streams
+3. 10 QUIC connetions with 1 Stream
+4. 10 QUIC connections with 10 Streams
+
+with stream sizes to provide reasonable test durations. The VPP Host
+Stack QUIC transport is configured to utilize the picotls encryption
+library. In the future, tests utilizing addtional encryption
+algorithms will be added.
diff --git a/docs/content/methodology/test/hoststack/tcpip_with_iperf3.md b/docs/content/methodology/test/hoststack/tcpip_with_iperf3.md
new file mode 100644
index 0000000000..7baa88ab50
--- /dev/null
+++ b/docs/content/methodology/test/hoststack/tcpip_with_iperf3.md
@@ -0,0 +1,52 @@
+---
+title: "TCP/IP with iperf3"
+weight: 2
+---
+
+# TCP/IP with iperf3
+
+[iperf3 goodput measurement tool](https://github.com/esnet/iperf)
+is used for measuring the maximum attainable goodput of the VPP Host
+Stack connection across two instances of VPP running on separate DUT
+nodes. iperf3 is a popular open source tool for active measurements
+of the maximum achievable goodput on IP networks.
+
+Because iperf3 utilizes the POSIX socket interface APIs, the current
+test configuration utilizes the LD_PRELOAD mechanism in the linux
+kernel to connect iperf3 to the VPP Host Stack using the VPP
+Communications Library (VCL) LD_PRELOAD library (libvcl_ldpreload.so).
+
+In the future, a forked version of iperf3 which has been modified to
+directly use the VCL application APIs may be added to determine the
+difference in performance of 'VCL Native' applications versus utilizing
+LD_PRELOAD which inherently has more overhead and other limitations.
+
+The test configuration is as follows:
+
+           DUT1              Network               DUT2
+    [ iperf3-client -> VPP1 ]=======[ VPP2 -> iperf3-server]
+
+where,
+
+1. iperf3 server attaches to VPP2 and LISTENs on VPP2:TCP port 5201.
+2. iperf3 client attaches to VPP1 and opens one or more stream
+   connections to VPP2:TCP port 5201.
+3. iperf3 client transmits a uni-directional stream as fast as the
+   VPP Host Stack allows to the iperf3 server for the test duration.
+4. At the end of the test the iperf3 client emits the goodput
+   measurements for all streams and the sum of all streams.
+
+Test cases include 1 and 10 Streams with a 20 second test duration
+with the VPP Host Stack configured to utilize the Cubic TCP
+congestion algorithm.
+
+Note: iperf3 is single threaded, so it is expected that the 10 stream
+test shows little or no performance improvement due to
+multi-thread/multi-core execution.
+
+There are also variations of these test cases which use the VPP Network
+Simulator (NSIM) plugin to test the VPP Hoststack goodput with 1 percent
+of the traffic being dropped at the output interface of VPP1 thereby
+simulating a lossy network. The NSIM tests are experimental and the
+test results are not currently representative of typical results in a
+lossy network.
diff --git a/docs/content/methodology/test/hoststack/udpip_with_iperf3.md b/docs/content/methodology/test/hoststack/udpip_with_iperf3.md
new file mode 100644
index 0000000000..01ddf61269
--- /dev/null
+++ b/docs/content/methodology/test/hoststack/udpip_with_iperf3.md
@@ -0,0 +1,44 @@
+---
+title: "UDP/IP with iperf3"
+weight: 3
+---
+
+# UDP/IP with iperf3
+
+[iperf3 goodput measurement tool](https://github.com/esnet/iperf)
+is used for measuring the maximum attainable goodput of the VPP Host
+Stack connection across two instances of VPP running on separate DUT
+nodes. iperf3 is a popular open source tool for active measurements
+of the maximum achievable goodput on IP networks.
+
+Because iperf3 utilizes the POSIX socket interface APIs, the current
+test configuration utilizes the LD_PRELOAD mechanism in the linux
+kernel to connect iperf3 to the VPP Host Stack using the VPP
+Communications Library (VCL) LD_PRELOAD library (libvcl_ldpreload.so).
+
+In the future, a forked version of iperf3 which has been modified to
+directly use the VCL application APIs may be added to determine the
+difference in performance of 'VCL Native' applications versus utilizing
+LD_PRELOAD which inherently has more overhead and other limitations.
+
+The test configuration is as follows:
+
+           DUT1              Network               DUT2
+    [ iperf3-client -> VPP1 ]=======[ VPP2 -> iperf3-server]
+
+where,
+
+1. iperf3 server attaches to VPP2 and LISTENs on VPP2:UDP port 5201.
+2. iperf3 client attaches to VPP1 and transmits one or more streams
+   of packets to VPP2:UDP port 5201.
+3. iperf3 client transmits a uni-directional stream as fast as the
+   VPP Host Stack allows to the iperf3 server for the test duration.
+4. At the end of the test the iperf3 client emits the goodput
+   measurements for all streams and the sum of all streams.
+
+Test cases include 1 and 10 Streams with a 20 second test duration
+with the VPP Host Stack using the UDP transport layer..
+
+Note: iperf3 is single threaded, so it is expected that the 10 stream
+test shows little or no performance improvement due to
+multi-thread/multi-core execution.
diff --git a/docs/content/methodology/test/hoststack/vsap_ab_with_nginx.md b/docs/content/methodology/test/hoststack/vsap_ab_with_nginx.md
new file mode 100644
index 0000000000..2dc4d2b7f9
--- /dev/null
+++ b/docs/content/methodology/test/hoststack/vsap_ab_with_nginx.md
@@ -0,0 +1,39 @@
+---
+title: "VSAP ab with nginx"
+weight: 4
+---
+
+# VSAP ab with nginx
+
+[VSAP (VPP Stack Acceleration Project)](https://wiki.fd.io/view/VSAP)
+aims to establish an industry user space application ecosystem based on
+the VPP hoststack.  As a pre-requisite to adapting open source applications
+using VPP Communications Library to accelerate performance, the VSAP team
+has introduced baseline tests utilizing the LD_PRELOAD mechanism to capture
+baseline performance data.
+
+[AB (Apache HTTP server benchmarking tool)](https://httpd.apache.org/docs/2.4/programs/ab.html)
+is used for measuring the maximum connections-per-second and requests-per-second.
+
+[NGINX](https://www.nginx.com) is a popular open source HTTP server
+application.  Because NGINX utilizes the POSIX socket interface APIs, the test
+configuration uses the LD_PRELOAD mechanism to connect NGINX to the VPP
+Hoststack using the VPP Communications Library (VCL) LD_PRELOAD library
+(libvcl_ldpreload.so).
+
+In the future, a version of NGINX which has been modified to
+directly use the VCL application APIs will be added to determine the
+difference in performance of 'VCL Native' applications versus utilizing
+LD_PRELOAD which inherently has more overhead and other limitations.
+
+The test configuration is as follows:
+
+           TG     Network         DUT
+         [ AB ]=============[ VPP -> nginx ]
+
+where,
+
+1. nginx attaches to VPP and listens on TCP port 80
+2. ab runs CPS and RPS tests with packets flowing from the Test Generator node,
+   across 100G NICs, through VPP hoststack to NGINX.
+3. At the end of the tests, the results are reported by AB.
diff --git a/docs/content/methodology/test/internet_protocol_security.md b/docs/content/methodology/test/internet_protocol_security.md
new file mode 100644
index 0000000000..1a02c43a0a
--- /dev/null
+++ b/docs/content/methodology/test/internet_protocol_security.md
@@ -0,0 +1,73 @@
+---
+title: "Internet Protocol Security"
+weight: 4
+---
+
+# Internet Protocol Security
+
+VPP Internet Protocol Security (IPsec) performance tests are executed for the
+following crypto plugins:
+
+- `crypto_native`, used for software based crypto leveraging CPU
+  platform optimizations e.g. Intel's AES-NI instruction set.
+- `crypto_ipsecmb`, used for hardware based crypto with Intel QAT PCIe cards.
+
+## IPsec with VPP Native SW Crypto
+
+CSIT implements following IPsec test cases relying on VPP native crypto
+(`crypto_native` plugin):
+
+ **VPP Crypto Engine** | **ESP Encryption** | **ESP Integrity** | **Scale Tested**
+----------------------:|-------------------:|------------------:|-----------------:
+ crypto_native         | AES[128\|256]-GCM  | GCM               | 1 to 60k tunnels
+ crypto_native         | AES128-CBC         | SHA[256\|512]     | 1 to 60k tunnels
+
+VPP IPsec with SW crypto are executed in both tunnel and policy modes,
+with tests running on 3-node testbeds: 3n-icx, 3n-tsh.
+
+## IPsec with Intel QAT HW
+
+CSIT implements following IPsec test cases relying on ipsecmb library
+(`crypto_ipsecmb` plugin) and Intel QAT 8950 (50G HW crypto card):
+
+dpdk_cryptodev
+
+ **VPP Crypto Engine** | **VPP Crypto Workers** | **ESP Encryption** | **ESP Integrity** | **Scale Tested**
+----------------------:|-----------------------:|-------------------:|------------------:|-----------------:
+ crypto_ipsecmb        | sync/all workers       | AES[128\|256]-GCM  | GCM               | 1, 1k tunnels
+ crypto_ipsecmb        | sync/all workers       | AES[128]-CBC       | SHA[256\|512]     | 1, 1k tunnels
+ crypto_ipsecmb        | async/crypto worker    | AES[128\|256]-GCM  | GCM               | 1, 4, 1k tunnels
+ crypto_ipsecmb        | async/crypto worker    | AES[128]-CBC       | SHA[256\|512]     | 1, 4, 1k tunnels
+
+## IPsec with Async Crypto Feature Workers
+
+*TODO Description to be added*
+
+## IPsec Uni-Directional Tests with VPP Native SW Crypto
+
+CSIT implements following IPsec uni-directional test cases relying on VPP native
+crypto (`crypto_native` plugin) in tunnel mode:
+
+ **VPP Crypto Engine** | **ESP Encryption** | **ESP Integrity** | **Scale Tested**
+----------------------:|-------------------:|------------------:|-------------------:
+ crypto_native         | AES[128\|256]-GCM  | GCM               | 4, 1k, 10k tunnels
+ crypto_native         | AES128-CBC         | SHA[512]          | 4, 1k, 10k tunnels
+
+In policy mode:
+
+ **VPP Crypto Engine** | **ESP Encryption** | **ESP Integrity** | **Scale Tested**
+----------------------:|-------------------:|------------------:|------------------:
+ crypto_native         | AES[256]-GCM       | GCM               | 1, 40, 1k tunnels
+
+The tests are running on 2-node testbeds: 2n-tx2. The uni-directional tests
+are partially addressing a weakness in 2-node testbed setups with T-Rex as
+the traffic generator. With just one DUT node, we can either encrypt or decrypt
+traffic in each direction.
+
+The testcases are only doing encryption - packets are encrypted on the DUT and
+then arrive at TG where no additional packet processing is needed (just
+counting packets).
+
+Decryption would require that the traffic generator generated encrypted packets
+which the DUT then would decrypt. However, T-Rex does not have the capability
+to encrypt packets.
diff --git a/docs/content/methodology/test/network_address_translation.md b/docs/content/methodology/test/network_address_translation.md
new file mode 100644
index 0000000000..f443eabc5f
--- /dev/null
+++ b/docs/content/methodology/test/network_address_translation.md
@@ -0,0 +1,445 @@
+---
+title: "Network Address Translation"
+weight: 1
+---
+
+# Network Address Translation
+
+## NAT44 Prefix Bindings
+
+NAT44 prefix bindings should be representative to target applications,
+where a number of private IPv4 addresses from the range defined by
+RFC1918 is mapped to a smaller set of public IPv4 addresses from the
+public range.
+
+Following quantities are used to describe inside to outside IP address
+and port bindings scenarios:
+
+- Inside-addresses, number of inside source addresses
+  (representing inside hosts).
+- Ports-per-inside-address, number of TCP/UDP source
+  ports per inside source address.
+- Outside-addresses, number of outside (public) source addresses
+  allocated to NAT44.
+- Ports-per-outside-address, number of TCP/UDP source
+  ports per outside source address. The maximal number of
+  ports-per-outside-address usable for NAT is 64 512
+  (in non-reserved port range 1024-65535, RFC4787).
+- Sharing-ratio, equal to inside-addresses divided by outside-addresses.
+
+CSIT NAT44 tests are designed to take into account the maximum number of
+ports (sessions) required per inside host (inside-address) and at the
+same time to maximize the use of outside-address range by using all
+available outside ports. With this in mind, the following scheme of
+NAT44 sharing ratios has been devised for use in CSIT:
+
+ **ports-per-inside-address** | **sharing-ratio**
+-----------------------------:|------------------:
+ 63                           | 1024
+ 126                          | 512
+ 252                          | 256
+ 504                          | 128
+
+Initial CSIT NAT44 tests, including associated TG/TRex traffic profiles,
+are based on ports-per-inside-address set to 63 and the sharing ratio of
+1024. This approach is currently used for all NAT44 tests including
+NAT44det (NAT44 deterministic used for Carrier Grade NAT applications)
+and NAT44ed (Endpoint Dependent).
+
+Private address ranges to be used in tests:
+
+- 192.168.0.0 - 192.168.255.255 (192.168/16 prefix)
+
+  - Total of 2^16 (65 536) of usable IPv4 addresses.
+  - Used in tests for up to 65 536 inside addresses (inside hosts).
+
+- 172.16.0.0 - 172.31.255.255  (172.16/12 prefix)
+
+  - Total of 2^20 (1 048 576) of usable IPv4 addresses.
+  - Used in tests for up to 1 048 576 inside addresses (inside hosts).
+
+### NAT44 Session Scale
+
+NAT44 session scale tested is govern by the following logic:
+
+- Number of inside-addresses(hosts) H[i] = (H[i-1] x 2^2) with H(0)=1 024,
+  i = 1,2,3, ...
+
+  - H[i] = 1 024, 4 096, 16 384, 65 536, 262 144, ...
+
+- Number of sessions S[i] = H[i] * ports-per-inside-address
+
+  - ports-per-inside-address = 63
+
+ **i** | **hosts** | **sessions**
+------:|----------:|-------------:
+ 0     | 1 024     | 64 512
+ 1     | 4 096     | 258 048
+ 2     | 16 384    | 1 032 192
+ 3     | 65 536    | 4 128 768
+ 4     | 262 144   | 16 515 072
+
+### NAT44 Deterministic
+
+NAT44det performance tests are using TRex STL (Stateless) API and traffic
+profiles, similar to all other stateless packet forwarding tests like
+ip4, ip6 and l2, sending UDP packets in both directions
+inside-to-outside and outside-to-inside.
+
+The inside-to-outside traffic uses single destination address (20.0.0.0)
+and port (1024).
+The inside-to-outside traffic covers whole inside address and port range,
+the outside-to-inside traffic covers whole outside address and port range.
+
+NAT44det translation entries are created during the ramp-up phase,
+followed by verification that all entries are present,
+before proceeding to the main measurements of the test.
+This ensures session setup does not impact the forwarding performance test.
+
+Associated CSIT test cases use the following naming scheme to indicate
+NAT44det scenario tested:
+
+- ethip4udp-nat44det-h{H}-p{P}-s{S}-[mrr|ndrpdr|soak]
+
+  - {H}, number of inside hosts, H = 1024, 4096, 16384, 65536, 262144.
+  - {P}, number of ports per inside host, P = 63.
+  - {S}, number of sessions, S = 64512, 258048, 1032192, 4128768,
+    16515072.
+  - [mrr|ndrpdr|soak], MRR, NDRPDR or SOAK test.
+
+### NAT44 Endpoint-Dependent
+
+In order to excercise NAT44ed ability to translate based on both
+source and destination address and port, the inside-to-outside traffic
+varies also destination address and port. Destination port is the same
+as source port, destination address has the same offset as the source address,
+but applied to different subnet (starting with 20.0.0.0).
+
+As the mapping is not deterministic (for security reasons),
+we cannot easily use stateless bidirectional traffic profiles.
+Inside address and port range is fully covered,
+but we do not know which outside-to-inside source address and port to use
+to hit an open session.
+
+Therefore, NAT44ed is benchmarked using following methodologies:
+
+- Unidirectional throughput using *stateless* traffic profile.
+- Connections-per-second (CPS) using *stateful* traffic profile.
+- Bidirectional throughput (TPUT, see below) using *stateful* traffic profile.
+
+Unidirectional NAT44ed throughput tests are using TRex STL (Stateless)
+APIs and traffic profiles, but with packets sent only in
+inside-to-outside direction.
+Similarly to NAT44det, NAT44ed unidirectional throughput tests include
+a ramp-up phase to establish and verify the presence of required NAT44ed
+binding entries. As the sessions have finite duration, the test code
+keeps inserting ramp-up trials during the search, if it detects a risk
+of sessions timing out. Any zero loss trial visits all sessions,
+so it acts also as a ramp-up.
+
+Stateful NAT44ed tests are using TRex ASTF (Advanced Stateful) APIs and
+traffic profiles, with packets sent in both directions. Tests are run
+with both UDP and TCP sessions.
+As NAT44ed CPS (connections-per-second) stateful tests
+measure (also) session opening performance,
+they use state reset instead of ramp-up trial.
+NAT44ed TPUT (bidirectional throughput) tests prepend ramp-up trials
+as in the unidirectional tests,
+so the test results describe performance without translation entry
+creation overhead.
+
+Associated CSIT test cases use the following naming scheme to indicate
+NAT44det case tested:
+
+- Stateless: ethip4udp-nat44ed-h{H}-p{P}-s{S}-udir-[mrr|ndrpdr|soak]
+
+  - {H}, number of inside hosts, H = 1024, 4096, 16384, 65536, 262144.
+  - {P}, number of ports per inside host, P = 63.
+  - {S}, number of sessions, S = 64512, 258048, 1032192, 4128768,
+    16515072.
+  - udir-[mrr|ndrpdr|soak], unidirectional stateless tests MRR, NDRPDR
+    or SOAK.
+
+- Stateful: ethip4[udp|tcp]-nat44ed-h{H}-p{P}-s{S}-[cps|tput]-[mrr|ndrpdr|soak]
+
+  - [udp|tcp], UDP or TCP sessions
+  - {H}, number of inside hosts, H = 1024, 4096, 16384, 65536, 262144.
+  - {P}, number of ports per inside host, P = 63.
+  - {S}, number of sessions, S = 64512, 258048, 1032192, 4128768,
+    16515072.
+  - [cps|tput], connections-per-second session establishment rate or
+    packets-per-second average rate, or packets-per-second rate
+    without session establishment.
+  - [mrr|ndrpdr|soak], bidirectional stateful tests MRR, NDRPDR, or SOAK.
+
+## Stateful traffic profiles
+
+There are several important details which distinguish ASTF profiles
+from stateless profiles.
+
+### General considerations
+
+#### Protocols
+
+ASTF profiles are limited to either UDP or TCP protocol.
+
+#### Programs
+
+Each template in the profile defines two "programs", one for the client side
+and one for the server side.
+
+Each program specifies when that side has to wait until enough data is received
+(counted in packets for UDP and in bytes for TCP)
+and when to send additional data. Together, the two programs
+define a single transaction. Due to packet loss, transaction may take longer,
+use more packets (retransmission) or never finish in its entirety.
+
+#### Instances
+
+A client instance is created according to TPS parameter for the trial,
+and sends the first packet of the transaction (in some cases more packets).
+Each client instance uses a different source address (see sequencing below)
+and some source port. The destination address also comes from a range,
+but destination port has to be constant for a given program.
+
+TRex uses an opaque way to chose source ports, but as session counting shows,
+next client with the same source address uses a different source port.
+
+Server instance is created when the first packet arrives to the server side.
+Source address and port of the first packet are used as destination address
+and port for the server responses. This is the ability we need
+when outside surface is not predictable.
+
+When a program reaches its end, the instance is deleted.
+This creates possible issues with server instances. If the server instance
+does not read all the data client has sent, late data packets
+can cause a second copy of server instance to be created,
+which breaks assumptions on how many packet a transaction should have.
+
+The need for server instances to read all the data reduces the overall
+bandwidth TRex is able to create in ASTF mode.
+
+Note that client instances are not created on packets,
+so it is safe to end client program without reading all server data
+(unless the definition of transaction success requires that).
+
+#### Sequencing
+
+ASTF profiles offer two modes for choosing source and destination IP addresses
+for client programs: seqential and pseudorandom.
+In current tests we are using sequential addressing only (if destination
+address varies at all).
+
+For client destination UDP/TCP port, we use a single constant value.
+(TRex can support multiple program pairs in the same traffic profile,
+distinguished by the port number.)
+
+#### Transaction overlap
+
+If a transaction takes longer to finish, compared to period implied by TPS,
+TRex will have multiple client or server instances active at a time.
+
+During calibration testing we have found this increases CPU utilization,
+and for high TPS it can lead to TRex's Rx or Tx buffers becoming full.
+This generally leads to duration stretching, and/or packet loss on TRex.
+
+Currently used transactions were chosen to be short, so risk of bad behavior
+is decreased. But in MRR tests, where load is computed based on NIC ability,
+not TRex ability, anomalous behavior is still possible
+(e.g. MRR values being way lower than NDR).
+
+#### Delays
+
+TRex supports adding constant delays to ASTF programs.
+This can be useful, for example if we want to separate connection establishment
+from data transfer.
+
+But as TRex tracks delayed instances as active, this still results
+in higher CPU utilization and reduced performance issues
+(as other overlaping transactions). So the current tests do not use any delays.
+
+#### Keepalives
+
+Both UDP and TCP protocol implementations in TRex programs support keepalive
+duration. That means there is a configurable period of keepalive time,
+and TRex sends keepalive packets automatically (outside the program)
+for the time the program is active (started, not ended yet)
+but not sending any packets.
+
+For TCP this is generally not a big deal, as the other side usually
+retransmits faster. But for UDP it means a packet loss may leave
+the receiving program running.
+
+In order to avoid keepalive packets, keepalive value is set to a high number.
+Here, "high number" means that even at maximum scale and minimum TPS,
+there are still no keepalive packets sent within the corresponding
+(computed) trial duration. This number is kept the same also for
+smaller scale traffic profiles, to simplify maintenance.
+
+#### Transaction success
+
+The transaction is considered successful at Layer-7 (L7) level
+when both program instances close. At this point, various L7 counters
+(unofficial name) are updated on TRex.
+
+We found that proper close and L7 counter update can be CPU intensive,
+whereas lower-level counters (ipackets, opackets) called L2 counters
+can keep up with higher loads.
+
+For some tests, we do not need to confirm the whole transaction was successful.
+CPS (connections per second) tests are a typical example.
+We care only for NAT44ed creating a session (needs one packet
+in inside-to-outside direction per session) and being able to use it
+(needs one packet in outside-to-inside direction).
+
+Similarly in TPUT tests (packet throuput, counting both control
+and data packets), we care about NAT44ed ability to forward packets,
+we do not care whether aplications (TRex) can fully process them at that rate.
+
+Therefore each type of tests has its own formula (usually just one counter
+already provided by TRex) to count "successful enough" transactions
+and attempted transactions. Currently, all tests relying on L7 counters
+use size-limited profiles, so they know what the count of attempted
+transactions should be, but due to duration stretching
+TRex might have been unable to send that many packets.
+For search purposes, unattempted transactions are treated the same
+as attempted but failed transactions.
+
+Sometimes even the number of transactions as tracked by search algorithm
+does not match the transactions as defined by ASTF programs.
+See TCP TPUT profile below.
+
+### UDP CPS
+
+This profile uses a minimalistic transaction to verify NAT44ed session has been
+created and it allows outside-to-inside traffic.
+
+Client instance sends one packet and ends.
+Server instance sends one packet upon creation and ends.
+
+In principle, packet size is configurable,
+but currently used tests apply only one value (100 bytes frame).
+
+Transaction counts as attempted when opackets counter increases on client side.
+Transaction counts as successful when ipackets counter increases on client side.
+
+### TCP CPS
+
+This profile uses a minimalistic transaction to verify NAT44ed session has been
+created and it allows outside-to-inside traffic.
+
+Client initiates TCP connection. Client waits until connection is confirmed
+(by reading zero data bytes). Client ends.
+Server accepts the connection. Server waits for indirect confirmation
+from client (by waiting for client to initiate close). Server ends.
+
+Without packet loss, the whole transaction takes 7 packets to finish
+(4 and 3 per direction).
+From NAT44ed point of view, only the first two are needed to verify
+the session got created.
+
+Packet size is not configurable, but currently used tests report
+frame size as 64 bytes.
+
+Transaction counts as attempted when tcps_connattempt counter increases
+on client side.
+Transaction counts as successful when tcps_connects counter increases
+on client side.
+
+### UDP TPUT
+
+This profile uses a small transaction of "request-response" type,
+with several packets simulating data payload.
+
+Client sends 5 packets and closes immediately.
+Server reads all 5 packets (needed to avoid late packets creating new
+server instances), then sends 5 packets and closes.
+The value 5 was chosen to mirror what TCP TPUT (see below) choses.
+
+Packet size is configurable, currently we have tests for 100,
+1518 and 9000 bytes frame (to match size of TCP TPUT data frames, see below).
+
+As this is a packet oriented test, we do not track the whole
+10 packet transaction. Similarly to stateless tests, we treat each packet
+as a "transaction" for search algorthm packet loss ratio purposes.
+Therefore a "transaction" is attempted when opacket counter on client
+or server side is increased. Transaction is successful if ipacket counter
+on client or server side is increased.
+
+If one of 5 client packets is lost, server instance will get stuck
+in the reading phase. This probably decreases TRex performance,
+but it leads to more stable results then alternatives.
+
+### TCP TPUT
+
+This profile uses a small transaction of "request-response" type,
+with some data amount to be transferred both ways.
+
+In CSIT release 22.06, TRex behavior changed, so we needed to edit
+the traffic profile. Let us describe the pre-22.06 profile first.
+
+Client connects, sends 5 data packets worth of data,
+receives 5 data packets worth of data and closes its side of the connection.
+Server accepts connection, reads 5 data packets worth of data,
+sends 5 data packets worth of data and closes its side of the connection.
+As usual in TCP, sending side waits for ACK from the receiving side
+before proceeding with next step of its program.
+
+Server read is needed to avoid premature close and second server instance.
+Client read is not stricly needed, but ACKs allow TRex to close
+the server instance quickly, thus saving CPU and improving performance.
+
+The number 5 of data packets was chosen so TRex is able to send them
+in a single burst, even with 9000 byte frame size (TRex has a hard limit
+on initial window size).
+That leads to 16 packets (9 of them in c2s direction) to be exchanged
+if no loss occurs.
+The size of data packets is controlled by the traffic profile setting
+the appropriate maximum segment size. Due to TRex restrictions,
+the minimal size for IPv4 data frame achievable by this method is 70 bytes,
+which is more than our usual minimum of 64 bytes.
+For that reason, the data frame sizes available for testing are 100 bytes
+(that allows room for eventually adding IPv6 ASTF tests),
+1518 bytes and 9000 bytes. There is no control over control packet sizes.
+
+Exactly as in UDP TPUT, ipackets and opackets counters are used for counting
+"transactions" (in fact packets).
+
+If packet loss occurs, there can be large transaction overlap, even if most
+ASTF programs finish eventually. This can lead to big duration stretching
+and somehow uneven rate of packets sent. This makes it hard to interpret
+MRR results (frequently MRR is below NDR for this reason),
+but NDR and PDR results tend to be stable enough.
+
+In 22.06, the "ACK from the receiving side" behavior changed,
+the receiving side started sending ACK sometimes
+also before receiving the full set of 5 data packets.
+If the previous profile is understood as a "single challenge, single response"
+where challenge (and also response) is sent as a burst of 5 data packets,
+the new profile uses "bursts" of 1 packet instead, but issues
+the challenge-response part 5 times sequentially
+(waiting for receiving the response before sending next challenge).
+This new profile happens to have the same overall packet count
+(when no re-transmissions are needed).
+Although it is possibly more taxing for TRex CPU,
+the results are comparable to the old traffic profile.
+
+## Ip4base tests
+
+Contrary to stateless traffic profiles, we do not have a simple limit
+that would guarantee TRex is able to send traffic at specified load.
+For that reason, we have added tests where "nat44ed" is replaced by "ip4base".
+Instead of NAT44ed processing, the tests set minimalistic IPv4 routes,
+so that packets are forwarded in both inside-to-outside and outside-to-inside
+directions.
+
+The packets arrive to server end of TRex with different source address&port
+than in NAT44ed tests (no translation to outside values is done with ip4base),
+but those are not specified in the stateful traffic profiles.
+The server end (as always) uses the received address&port as destination
+for outside-to-inside traffic. Therefore the same stateful traffic profile
+works for both NAT44ed and ip4base test (of the same scale).
+
+The NAT44ed results are displayed together with corresponding ip4base results.
+If they are similar, TRex is probably the bottleneck.
+If NAT44ed result is visibly smaller, it describes the real VPP performance.
diff --git a/docs/content/methodology/test/packet_flow_ordering.md b/docs/content/methodology/test/packet_flow_ordering.md
new file mode 100644
index 0000000000..c2c87038d4
--- /dev/null
+++ b/docs/content/methodology/test/packet_flow_ordering.md
@@ -0,0 +1,42 @@
+---
+title: "Packet Flow Ordering"
+weight: 2
+---
+
+# Packet Flow Ordering
+
+TRex Traffic Generator (TG) supports two main ways how to cover
+address space (on allowed ranges) in scale tests.
+
+In most cases only one field value (e.g. IPv4 destination address) is
+altered, in some cases two fields (e.g. IPv4 destination address and UDP
+destination port) are altered.
+
+## Incremental Ordering
+
+This case is simpler to implement and offers greater control.
+
+When changing two fields, they can be incremented synchronously, or one
+after another. In the latter case we can specify which one is
+incremented each iteration and which is incremented by "carrying over"
+only when the other "wraps around". This way also visits all
+combinations once before the "carry" field also wraps around.
+
+It is possible to use increments other than 1.
+
+## Randomized Ordering
+
+This case chooses each field value at random (from the allowed range).
+In case of two fields, they are treated independently.
+TRex allows to set random seed to get deterministic numbers.
+We use a different seed for each field and traffic direction.
+The seed has to be a non-zero number, we use 1, 2, 3, and so on.
+
+The seeded random mode in TRex requires a "limit" value,
+which acts as a cycle length limit (after this many iterations,
+the seed resets to its initial value).
+We use the maximal allowed limit value (computed as 2^24 - 1).
+
+Randomized profiles do not avoid duplicated values,
+and do not guarantee each possible value is visited,
+so it is not very useful for stateful tests.
diff --git a/docs/content/methodology/test/reconfiguration.md b/docs/content/methodology/test/reconfiguration.md
new file mode 100644
index 0000000000..6dec4d918b
--- /dev/null
+++ b/docs/content/methodology/test/reconfiguration.md
@@ -0,0 +1,68 @@
+---
+title: "Reconfiguration"
+weight: 8
+---
+
+# Reconfiguration
+
+## Overview
+
+Reconf tests are designed to measure the impact of VPP re-configuration
+on data plane traffic.
+While VPP takes some measures against the traffic being
+entirely stopped for a prolonged time,
+the immediate forwarding rate varies during the re-configuration,
+as some configurations steps need the active dataplane worker threads
+to be stopped temporarily.
+
+As the usual methods of measuring throughput need multiple trial measurements
+with somewhat long durations, and the re-configuration process can also be long,
+finding an offered load which would result in zero loss
+during the re-configuration process would be time-consuming.
+
+Instead, reconf tests first find a througput value (lower bound for NDR)
+without re-configuration, and then maintain that ofered load
+during re-configuration. The measured loss count is then assumed to be caused
+by the re-configuration process. The result published by reconf tests
+is the effective blocked time, that is
+the loss count divided by the offered load.
+
+## Current Implementation
+
+Each reconf suite is based on a similar MLRsearch performance suite.
+
+MLRsearch parameters are changed to speed up the throughput discovery.
+For example, PDR is not searched for, and the final trial duration is shorter.
+
+The MLRsearch suite has to contain a configuration parameter
+that can be scaled up, e.g. number of tunnels or number of service chains.
+Currently, only increasing the scale is supported
+as the re-configuration operation. In future, scale decrease
+or other operations can be implemented.
+
+The traffic profile is not changed, so the traffic present is processed
+only by the smaller scale configuration. The added tunnels / chains
+are not targetted by the traffic.
+
+For the re-configuration, the same Robot Framework and Python libraries
+are used, as were used in the initial configuration, with the exception
+of the final calls that do not interact with VPP (e.g. starting
+virtual machines) being skipped to reduce the test overall duration.
+
+## Discussion
+
+Robot Framework introduces a certain overhead, which may affect timing
+of individual VPP API calls, which in turn may affect
+the number of packets lost.
+
+The exact calls executed may contain unnecessary info dumps, repeated commands,
+or commands which change a value that do not need to be changed (e.g. MTU).
+Thus, implementation details are affecting the results, even if their effect
+on the corresponding MLRsearch suite is negligible.
+
+The lower bound for NDR is the only value safe to be used when zero packets lost
+are expected without re-configuration. But different suites show different
+"jitter" in that value. For some suites, the lower bound is not tight,
+allowing full NIC buffers to drain quickly between worker pauses.
+For other suites, lower bound for NDR still has quite a large probability
+of non-zero packet loss even without re-configuration.
diff --git a/docs/content/methodology/test/tunnel_encapsulations.md b/docs/content/methodology/test/tunnel_encapsulations.md
new file mode 100644
index 0000000000..c047c43dfa
--- /dev/null
+++ b/docs/content/methodology/test/tunnel_encapsulations.md
@@ -0,0 +1,87 @@
+---
+title: "Tunnel Encapsulations"
+weight: 3
+---
+
+# Tunnel Encapsulations
+
+Tunnel encapsulations testing is grouped based on the type of outer
+header: IPv4 or IPv6.
+
+## IPv4 Tunnels
+
+VPP is tested in the following IPv4 tunnel baseline configurations:
+
+- *ip4vxlan-l2bdbase*: VXLAN over IPv4 tunnels with L2 bridge-domain MAC
+  switching.
+- *ip4vxlan-l2xcbase*: VXLAN over IPv4 tunnels with L2 cross-connect.
+- *ip4lispip4-ip4base*: LISP over IPv4 tunnels with IPv4 routing.
+- *ip4lispip6-ip6base*: LISP over IPv4 tunnels with IPv6 routing.
+- *ip4gtpusw-ip4base*: GTPU over IPv4 tunnels with IPv4 routing.
+
+In all cases listed above low number of MAC, IPv4, IPv6 flows (253 or 254 per
+direction) is switched or routed by VPP.
+
+In addition selected IPv4 tunnels are tested at scale:
+
+- *dot1q--ip4vxlanscale-l2bd*: VXLAN over IPv4 tunnels with L2 bridge-
+  domain MAC switching, with scaled up dot1q VLANs (10, 100, 1k),
+  mapped to scaled up L2 bridge-domains (10, 100, 1k), that are in turn
+  mapped to (10, 100, 1k) VXLAN tunnels. 64.5k flows are transmitted per
+  direction.
+
+## IPv6 Tunnels
+
+VPP is tested in the following IPv6 tunnel baseline configurations:
+
+- *ip6lispip4-ip4base*: LISP over IPv4 tunnels with IPv4 routing.
+- *ip6lispip6-ip6base*: LISP over IPv4 tunnels with IPv6 routing.
+
+In all cases listed above low number of IPv4, IPv6 flows (253 or 254 per
+direction) is routed by VPP.
+
+## GENEVE
+
+### GENEVE Prefix Bindings
+
+GENEVE prefix bindings should be representative to target applications, where
+a packet flows of particular set of IPv4 addresses (L3 underlay network) is
+routed via dedicated GENEVE interface by building an L2 overlay.
+
+Private address ranges to be used in tests:
+
+- East hosts ip address range: 10.0.1.0 - 10.127.255.255 (10.0/9 prefix)
+  - Total of 2^23 - 256 (8 388 352) of usable IPv4 addresses
+  - Usable in tests for up to 32 767 GENEVE tunnels (IPv4 underlay networks)
+- West hosts ip address range: 10.128.1.0 - 10.255.255.255 (10.128/9 prefix)
+  - Total of 2^23 - 256 (8 388 352) of usable IPv4 addresses
+  - Usable in tests for up to 32 767 GENEVE tunnels (IPv4 underlay networks)
+
+### GENEVE Tunnel Scale
+
+If N is a number of GENEVE tunnels (and IPv4 underlay networks) then TG sends
+256 packet flows in every of N different sets:
+
+- i = 1,2,3, ... N - GENEVE tunnel index
+- East-West direction: GENEVE encapsulated packets
+  - Outer IP header:
+    - src ip: 1.1.1.1
+    - dst ip: 1.1.1.2
+  - GENEVE header:
+    - vni: i
+  - Inner IP header:
+    - src_ip_range(i) = 10.(0 + rounddown(i/255)).(modulo(i/255)).(0-to-255)
+    - dst_ip_range(i) = 10.(128 + rounddown(i/255)).(modulo(i/255)).(0-to-255)
+- West-East direction: non-encapsulated packets
+  - IP header:
+    - src_ip_range(i) = 10.(128 + rounddown(i/255)).(modulo(i/255)).(0-to-255)
+    - dst_ip_range(i) = 10.(0 + rounddown(i/255)).(modulo(i/255)).(0-to-255)
+
+ **geneve-tunnels** | **total-flows**
+-------------------:|----------------:
+ 1                  | 256
+ 4                  | 1 024
+ 16                 | 4 096
+ 64                 | 16 384
+ 256                | 65 536
+ 1 024              | 262 144
diff --git a/docs/content/methodology/test/vpp_device.md b/docs/content/methodology/test/vpp_device.md
new file mode 100644
index 0000000000..0a5ee90308
--- /dev/null
+++ b/docs/content/methodology/test/vpp_device.md
@@ -0,0 +1,15 @@
+---
+title: "VPP Device"
+weight: 9
+---
+
+# VPP Device
+
+Includes VPP_Device test environment for functional VPP
+device tests integrated into LFN CI/CD infrastructure. VPP_Device tests
+run on 1-Node testbeds (1n-skx, 1n-arm) and rely on Linux SRIOV Virtual
+Function (VF), dot1q VLAN tagging and external loopback cables to
+facilitate packet passing over external physical links. Initial focus is
+on few baseline tests. New device tests can be added by small edits
+to existing CSIT Performance (2-node) test. RF test definition code
+stays unchanged with the exception of traffic generator related L2 KWs.
diff --git a/docs/content/methodology/trending/_index.md b/docs/content/methodology/trending/_index.md
new file mode 100644
index 0000000000..4289e7ff96
--- /dev/null
+++ b/docs/content/methodology/trending/_index.md
@@ -0,0 +1,12 @@
+---
+bookCollapseSection: true
+bookFlatSection: false
+title: "Trending"
+weight: 4
+---
+
+# Trending
+
+This document describes a high-level design of a system for continuous
+performance measuring, trending and change detection for FD.io VPP SW
+data plane (and other performance tests run within CSIT sub-project).
diff --git a/docs/content/methodology/trending/analysis.md b/docs/content/methodology/trending/analysis.md
new file mode 100644
index 0000000000..fe952259ab
--- /dev/null
+++ b/docs/content/methodology/trending/analysis.md
@@ -0,0 +1,224 @@
+---
+title: "Analysis"
+weight: 1
+---
+
+# Trend Analysis
+
+All measured performance trend data is treated as time-series data
+that is modeled as a concatenation of groups,
+within each group the samples come (independently) from
+the same normal distribution (with some center and standard deviation).
+
+Center of the normal distribution for the group (equal to population average)
+is called a trend for the group.
+All the analysis is based on finding the right partition into groups
+and comparing their trends.
+
+## Anomalies in graphs
+
+In graphs, the start of the following group is marked as a regression (red
+circle) or progression (green circle), if the new trend is lower (or higher
+respectively) then the previous group's.
+
+## Implementation details
+
+### Partitioning into groups
+
+While sometimes the samples within a group are far from being distributed
+normally, currently we do not have a better tractable model.
+
+Here, "sample" should be the result of single trial measurement, with group
+boundaries set only at test run granularity. But in order to avoid detecting
+causes unrelated to VPP performance, the current presentation takes average of
+all trials within the run as the sample. Effectively, this acts as a single
+trial with aggregate duration.
+
+Performance graphs show the run average as a dot (not all individual trial
+results).
+
+The group boundaries are selected based on `Minimum Description Length`[^1].
+
+### Minimum Description Length
+
+`Minimum Description Length`[^1] (MDL) is a particular formalization
+of `Occam's razor`[^2] principle.
+
+The general formulation mandates to evaluate a large set of models,
+but for anomaly detection purposes, it is useful to consider
+a smaller set of models, so that scoring and comparing them is easier.
+
+For each candidate model, the data should be compressed losslessly,
+which includes model definitions, encoded model parameters,
+and the raw data encoded based on probabilities computed by the model.
+The model resulting in shortest compressed message is the "the" correct model.
+
+For our model set (groups of normally distributed samples),
+we need to encode group length (which penalizes too many groups),
+group average (more on that later), group stdev and then all the samples.
+
+Luckily, the "all the samples" part turns out to be quite easy to compute.
+If sample values are considered as coordinates in (multi-dimensional)
+Euclidean space, fixing stdev means the point with allowed coordinates
+lays on a sphere. Fixing average intersects the sphere with a (hyper)-plane,
+and Gaussian probability density on the resulting sphere is constant.
+So the only contribution is the "area" of the sphere, which only depends
+on the number of samples and stdev.
+
+A somehow ambiguous part is in choosing which encoding
+is used for group size, average and stdev.
+Different encodings cause different biases to large or small values.
+In our implementation we have chosen probability density
+corresponding to uniform distribution (from zero to maximal sample value)
+for stdev and average of the first group,
+but for averages of subsequent groups we have chosen a distribution
+which discourages delimiting groups with averages close together.
+
+Our implementation assumes that measurement precision is 1.0 pps.
+Thus it is slightly wrong for trial durations other than 1.0 seconds.
+Also, all the calculations assume 1.0 pps is totally negligible,
+compared to stdev value.
+
+The group selection algorithm currently has no parameters,
+all the aforementioned encodings and handling of precision is hard-coded.
+In principle, every group selection is examined, and the one encodable
+with least amount of bits is selected.
+As the bit amount for a selection is just sum of bits for every group,
+finding the best selection takes number of comparisons
+quadratically increasing with the size of data,
+the overall time complexity being probably cubic.
+
+The resulting group distribution looks good
+if samples are distributed normally enough within a group.
+But for obviously different distributions (for example
+`bimodal distribution`[^3]) the groups tend to focus on less relevant factors
+(such as "outlier" density).
+
+## Common Patterns
+
+When an anomaly is detected, it frequently falls into few known patterns,
+each having its typical behavior over time.
+
+We are going to describe the behaviors,
+as they motivate our choice of trend compliance metrics.
+
+### Sample time and analysis time
+
+But first we need to distinguish two roles time plays in analysis,
+so it is more clear which role we are referring to.
+
+Sample time is the more obvious one.
+It is the time the sample is generated.
+It is the start time or the end time of the Jenkins job run,
+does not really matter which (parallel runs are disabled,
+and length of gap between samples does not affect metrics).
+
+Analysis time is the time the current analysis is computed.
+Again, the exact time does not usually matter,
+what matters is how many later (and how fewer earlier) samples
+were considered in the computation.
+
+For some patterns, it is usual for a previously reported
+anomaly to "vanish", or previously unseen anomaly to "appear late",
+as later samples change which partition into groups is more probable.
+
+Dashboard and graphs are always showing the latest analysis time,
+the compliance metrics are using earlier sample time
+with the same latest analysis time.
+
+Alerting e-mails use the latest analysis time at the time of sending,
+so the values reported there are likely to be different
+from the later analysis time results shown in dashboard and graphs.
+
+### Ordinary regression
+
+The real performance changes from previously stable value
+into a new stable value.
+
+For medium to high magnitude of the change, one run
+is enough for anomaly detection to mark this regression.
+
+Ordinary progressions are detected in the same way.
+
+### Small regression
+
+The real performance changes from previously stable value
+into a new stable value, but the difference is small.
+
+For the anomaly detection algorithm, this change is harder to detect,
+depending on the standard deviation of the previous group.
+
+If the new performance value stays stable, eventually
+the detection algorithm is able to detect this anomaly
+when there are enough samples around the new value.
+
+If the difference is too small, it may remain undetected
+(as new performance change happens, or full history of samples
+is still not enough for the detection).
+
+Small progressions have the same behavior.
+
+### Reverted regression
+
+This pattern can have two different causes.
+We would like to distinguish them, but that is usually
+not possible to do just by looking at the measured values (and not telemetry).
+
+In one cause, the real DUT performance has changed,
+but got restored immediately.
+In the other cause, no real performance change happened,
+just some temporary infrastructure issue
+has caused a wrong low value to be measured.
+
+For small measured changes, this pattern may remain undetected.
+For medium and big measured changes, this is detected when the regression
+happens on just the last sample.
+
+For big changes, the revert is also immediately detected
+as a subsequent progression. The trend is usually different
+from the previously stable trend (as the two population averages
+are not likely to be exactly equal), but the difference
+between the two trends is relatively small.
+
+For medium changes, the detection algorithm may need several new samples
+to detect a progression (as it dislikes single sample groups),
+in the meantime reporting regressions (difference decreasing
+with analysis time), until it stabilizes the same way as for big changes
+(regression followed by progression, small difference
+between the old stable trend and last trend).
+
+As it is very hard for a fault code or an infrastructure issue
+to increase performance, the opposite (temporary progression)
+almost never happens.
+
+### Summary
+
+There is a trade-off between detecting small regressions
+and not reporting the same old regressions for a long time.
+
+For people reading e-mails, a sudden regression with a big number of samples
+in the last group means this regression was hard for the algorithm to detect.
+
+If there is a big regression with just one run in the last group,
+we are not sure if it is real, or just a temporary issue.
+It is useful to wait some time before starting an investigation.
+
+With decreasing (absolute value of) difference, the number of expected runs
+increases. If there is not enough runs, we still cannot distinguish
+real regression from temporary regression just from the current metrics
+(although humans frequently can tell by looking at the graph).
+
+When there is a regression or progression with just a small difference,
+it is probably an artifact of a temporary regression.
+Not worth examining, unless temporary regressions happen somewhat frequently.
+
+It is not easy for the metrics to locate the previous stable value,
+especially if multiple anomalies happened in the last few weeks.
+It is good to compare last trend with long term trend maximum,
+as it highlights the difference between "now" and "what could be".
+It is good to exclude last week from the trend maximum,
+as including the last week would hide all real progressions.
+
+[^1]: [Minimum Description Length](https://en.wikipedia.org/wiki/Minimum_description_length)
+[^2]: [Occam's Razor](https://en.wikipedia.org/wiki/Occam%27s_razor)
+[^3]: [Bimodal Distribution](https://en.wikipedia.org/wiki/Bimodal_distribution)
diff --git a/docs/content/methodology/trending/presentation.md b/docs/content/methodology/trending/presentation.md
new file mode 100644
index 0000000000..84925b46c8
--- /dev/null
+++ b/docs/content/methodology/trending/presentation.md
@@ -0,0 +1,34 @@
+---
+title: "Presentation"
+weight: 2
+---
+
+# Trend Presentation
+
+## Failed tests
+
+The Failed tests tables list the tests which failed during the last test run.
+Separate tables are generated for each testbed.
+
+## Regressions and progressions
+
+These tables list tests which encountered a regression or progression during the
+specified time period, which is currently set to the last 21 days.
+
+## Trendline Graphs
+
+Trendline graphs show measured per run averages of MRR values, NDR or PDR
+values, group average values, and detected anomalies.
+The graphs are constructed as follows:
+
+- X-axis represents the date in the format MMDD.
+- Y-axis represents run-average MRR value, NDR or PDR values in Mpps. For PDR
+  tests also a graph with average latency at 50% PDR [us] is generated.
+- Markers to indicate anomaly classification:
+  - Regression - red circle.
+  - Progression - green circle.
+- The line shows average MRR value of each group.
+
+In addition the graphs show dynamic labels while hovering over graph data
+points, presenting the CSIT build date, measured value, VPP reference, trend job
+build ID and the LF testbed ID.
diff --git a/docs/content/methodology/trending_methodology/_index.md b/docs/content/methodology/trending_methodology/_index.md
deleted file mode 100644
index 551d950cc7..0000000000
--- a/docs/content/methodology/trending_methodology/_index.md
+++ /dev/null
@@ -1,6 +0,0 @@
----
-bookCollapseSection: true
-bookFlatSection: false
-title: "Trending Methodology"
-weight: 22
----
\ No newline at end of file
diff --git a/docs/content/methodology/trending_methodology/overview.md b/docs/content/methodology/trending_methodology/overview.md
deleted file mode 100644
index 90d8a2507c..0000000000
--- a/docs/content/methodology/trending_methodology/overview.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-title: "Overview"
-weight: 1
----
-
-# Overview
-
-This document describes a high-level design of a system for continuous
-performance measuring, trending and change detection for FD.io VPP SW
-data plane (and other performance tests run within CSIT sub-project).
diff --git a/docs/content/methodology/trending_methodology/trend_analysis.md b/docs/content/methodology/trending_methodology/trend_analysis.md
deleted file mode 100644
index 7f1870f577..0000000000
--- a/docs/content/methodology/trending_methodology/trend_analysis.md
+++ /dev/null
@@ -1,224 +0,0 @@
----
-title: "Trending Analysis"
-weight: 2
----
-
-# Trend Analysis
-
-All measured performance trend data is treated as time-series data
-that is modeled as a concatenation of groups,
-within each group the samples come (independently) from
-the same normal distribution (with some center and standard deviation).
-
-Center of the normal distribution for the group (equal to population average)
-is called a trend for the group.
-All the analysis is based on finding the right partition into groups
-and comparing their trends.
-
-## Anomalies in graphs
-
-In graphs, the start of the following group is marked as a regression (red
-circle) or progression (green circle), if the new trend is lower (or higher
-respectively) then the previous group's.
-
-## Implementation details
-
-### Partitioning into groups
-
-While sometimes the samples within a group are far from being distributed
-normally, currently we do not have a better tractable model.
-
-Here, "sample" should be the result of single trial measurement, with group
-boundaries set only at test run granularity. But in order to avoid detecting
-causes unrelated to VPP performance, the current presentation takes average of
-all trials within the run as the sample. Effectively, this acts as a single
-trial with aggregate duration.
-
-Performance graphs show the run average as a dot (not all individual trial
-results).
-
-The group boundaries are selected based on `Minimum Description Length`[^1].
-
-### Minimum Description Length
-
-`Minimum Description Length`[^1] (MDL) is a particular formalization
-of `Occam's razor`[^2] principle.
-
-The general formulation mandates to evaluate a large set of models,
-but for anomaly detection purposes, it is useful to consider
-a smaller set of models, so that scoring and comparing them is easier.
-
-For each candidate model, the data should be compressed losslessly,
-which includes model definitions, encoded model parameters,
-and the raw data encoded based on probabilities computed by the model.
-The model resulting in shortest compressed message is the "the" correct model.
-
-For our model set (groups of normally distributed samples),
-we need to encode group length (which penalizes too many groups),
-group average (more on that later), group stdev and then all the samples.
-
-Luckily, the "all the samples" part turns out to be quite easy to compute.
-If sample values are considered as coordinates in (multi-dimensional)
-Euclidean space, fixing stdev means the point with allowed coordinates
-lays on a sphere. Fixing average intersects the sphere with a (hyper)-plane,
-and Gaussian probability density on the resulting sphere is constant.
-So the only contribution is the "area" of the sphere, which only depends
-on the number of samples and stdev.
-
-A somehow ambiguous part is in choosing which encoding
-is used for group size, average and stdev.
-Different encodings cause different biases to large or small values.
-In our implementation we have chosen probability density
-corresponding to uniform distribution (from zero to maximal sample value)
-for stdev and average of the first group,
-but for averages of subsequent groups we have chosen a distribution
-which discourages delimiting groups with averages close together.
-
-Our implementation assumes that measurement precision is 1.0 pps.
-Thus it is slightly wrong for trial durations other than 1.0 seconds.
-Also, all the calculations assume 1.0 pps is totally negligible,
-compared to stdev value.
-
-The group selection algorithm currently has no parameters,
-all the aforementioned encodings and handling of precision is hard-coded.
-In principle, every group selection is examined, and the one encodable
-with least amount of bits is selected.
-As the bit amount for a selection is just sum of bits for every group,
-finding the best selection takes number of comparisons
-quadratically increasing with the size of data,
-the overall time complexity being probably cubic.
-
-The resulting group distribution looks good
-if samples are distributed normally enough within a group.
-But for obviously different distributions (for example
-`bimodal distribution`[^3]) the groups tend to focus on less relevant factors
-(such as "outlier" density).
-
-## Common Patterns
-
-When an anomaly is detected, it frequently falls into few known patterns,
-each having its typical behavior over time.
-
-We are going to describe the behaviors,
-as they motivate our choice of trend compliance metrics.
-
-### Sample time and analysis time
-
-But first we need to distinguish two roles time plays in analysis,
-so it is more clear which role we are referring to.
-
-Sample time is the more obvious one.
-It is the time the sample is generated.
-It is the start time or the end time of the Jenkins job run,
-does not really matter which (parallel runs are disabled,
-and length of gap between samples does not affect metrics).
-
-Analysis time is the time the current analysis is computed.
-Again, the exact time does not usually matter,
-what matters is how many later (and how fewer earlier) samples
-were considered in the computation.
-
-For some patterns, it is usual for a previously reported
-anomaly to "vanish", or previously unseen anomaly to "appear late",
-as later samples change which partition into groups is more probable.
-
-Dashboard and graphs are always showing the latest analysis time,
-the compliance metrics are using earlier sample time
-with the same latest analysis time.
-
-Alerting e-mails use the latest analysis time at the time of sending,
-so the values reported there are likely to be different
-from the later analysis time results shown in dashboard and graphs.
-
-### Ordinary regression
-
-The real performance changes from previously stable value
-into a new stable value.
-
-For medium to high magnitude of the change, one run
-is enough for anomaly detection to mark this regression.
-
-Ordinary progressions are detected in the same way.
-
-### Small regression
-
-The real performance changes from previously stable value
-into a new stable value, but the difference is small.
-
-For the anomaly detection algorithm, this change is harder to detect,
-depending on the standard deviation of the previous group.
-
-If the new performance value stays stable, eventually
-the detection algorithm is able to detect this anomaly
-when there are enough samples around the new value.
-
-If the difference is too small, it may remain undetected
-(as new performance change happens, or full history of samples
-is still not enough for the detection).
-
-Small progressions have the same behavior.
-
-### Reverted regression
-
-This pattern can have two different causes.
-We would like to distinguish them, but that is usually
-not possible to do just by looking at the measured values (and not telemetry).
-
-In one cause, the real DUT performance has changed,
-but got restored immediately.
-In the other cause, no real performance change happened,
-just some temporary infrastructure issue
-has caused a wrong low value to be measured.
-
-For small measured changes, this pattern may remain undetected.
-For medium and big measured changes, this is detected when the regression
-happens on just the last sample.
-
-For big changes, the revert is also immediately detected
-as a subsequent progression. The trend is usually different
-from the previously stable trend (as the two population averages
-are not likely to be exactly equal), but the difference
-between the two trends is relatively small.
-
-For medium changes, the detection algorithm may need several new samples
-to detect a progression (as it dislikes single sample groups),
-in the meantime reporting regressions (difference decreasing
-with analysis time), until it stabilizes the same way as for big changes
-(regression followed by progression, small difference
-between the old stable trend and last trend).
-
-As it is very hard for a fault code or an infrastructure issue
-to increase performance, the opposite (temporary progression)
-almost never happens.
-
-### Summary
-
-There is a trade-off between detecting small regressions
-and not reporting the same old regressions for a long time.
-
-For people reading e-mails, a sudden regression with a big number of samples
-in the last group means this regression was hard for the algorithm to detect.
-
-If there is a big regression with just one run in the last group,
-we are not sure if it is real, or just a temporary issue.
-It is useful to wait some time before starting an investigation.
-
-With decreasing (absolute value of) difference, the number of expected runs
-increases. If there is not enough runs, we still cannot distinguish
-real regression from temporary regression just from the current metrics
-(although humans frequently can tell by looking at the graph).
-
-When there is a regression or progression with just a small difference,
-it is probably an artifact of a temporary regression.
-Not worth examining, unless temporary regressions happen somewhat frequently.
-
-It is not easy for the metrics to locate the previous stable value,
-especially if multiple anomalies happened in the last few weeks.
-It is good to compare last trend with long term trend maximum,
-as it highlights the difference between "now" and "what could be".
-It is good to exclude last week from the trend maximum,
-as including the last week would hide all real progressions.
-
-[^1]: [Minimum Description Length](https://en.wikipedia.org/wiki/Minimum_description_length)
-[^2]: [Occam's razor](https://en.wikipedia.org/wiki/Occam%27s_razor)
-[^3]: [bimodal distribution](https://en.wikipedia.org/wiki/Bimodal_distribution)
diff --git a/docs/content/methodology/trending_methodology/trend_presentation.md b/docs/content/methodology/trending_methodology/trend_presentation.md
deleted file mode 100644
index 4c58589a0b..0000000000
--- a/docs/content/methodology/trending_methodology/trend_presentation.md
+++ /dev/null
@@ -1,36 +0,0 @@
----
-title: "Trending Presentation"
-weight: 3
----
-
-# Trend Presentation
-
-## Failed tests
-
-The Failed tests tables list the tests which failed during the last test run.
-Separate tables are generated for each testbed.
-
-## Regressions and progressions
-
-These tables list tests which encountered a regression or progression during the
-specified time period, which is currently set to the last 21 days.
-
-## Trendline Graphs
-
-Trendline graphs show measured per run averages of MRR values, NDR or PDR
-values, group average values, and detected anomalies.
-The graphs are constructed as follows:
-
-- X-axis represents the date in the format MMDD.
-- Y-axis represents run-average MRR value, NDR or PDR values in Mpps. For PDR
-  tests also a graph with average latency at 50% PDR [us] is generated.
-- Markers to indicate anomaly classification:
-
-  - Regression - red circle.
-  - Progression - green circle.
-
-- The line shows average MRR value of each group.
-
-In addition the graphs show dynamic labels while hovering over graph data
-points, presenting the CSIT build date, measured value, VPP reference, trend job
-build ID and the LF testbed ID.
diff --git a/docs/content/methodology/trex_traffic_generator.md b/docs/content/methodology/trex_traffic_generator.md
deleted file mode 100644
index 4f62d91c47..0000000000
--- a/docs/content/methodology/trex_traffic_generator.md
+++ /dev/null
@@ -1,195 +0,0 @@
----
-title: "TRex Traffic Generator"
-weight: 5
----
-
-# TRex Traffic Generator
-
-## Usage
-
-[TRex traffic generator](https://trex-tgn.cisco.com) is used for majority of
-CSIT performance tests. TRex is used in multiple types of performance tests,
-see [Data Plane Throughtput]({{< ref "data_plane_throughput/data_plane_throughput/#Data Plane Throughtput" >}})
-for more detail.
-
-## Traffic modes
-
-TRex is primarily used in two (mutually incompatible) modes.
-
-### Stateless mode
-
-Sometimes abbreviated as STL.
-A mode with high performance, which is unable to react to incoming traffic.
-We use this mode whenever it is possible.
-Typical test where this mode is not applicable is NAT44ED,
-as DUT does not assign deterministic outside address+port combinations,
-so we are unable to create traffic that does not lose packets
-in out2in direction.
-
-Measurement results are based on simple L2 counters
-(opackets, ipackets) for each traffic direction.
-
-### Stateful mode
-
-A mode capable of reacting to incoming traffic.
-Contrary to the stateless mode, only UDP and TCP is supported
-(carried over IPv4 or IPv6 packets).
-Performance is limited, as TRex needs to do more CPU processing.
-TRex suports two subtypes of stateful traffic,
-CSIT uses ASTF (Advanced STateFul mode).
-
-This mode is suitable for NAT44ED tests, as clients send packets from inside,
-and servers react to it, so they see the outside address and port to respond to.
-Also, they do not send traffic before NAT44ED has created the corresponding
-translation entry.
-
-When possible, L2 counters (opackets, ipackets) are used.
-Some tests need L7 counters, which track protocol state (e.g. TCP),
-but those values are less than reliable on high loads.
-
-## Traffic Continuity
-
-Generated traffic is either continuous, or limited (by number of transactions).
-Both modes support both continuities in principle.
-
-### Continuous traffic
-
-Traffic is started without any data size goal.
-Traffic is ended based on time duration, as hinted by search algorithm.
-This is useful when DUT behavior does not depend on the traffic duration.
-The default for stateless mode.
-
-### Limited traffic
-
-Traffic has defined data size goal (given as number of transactions),
-duration is computed based on this goal.
-Traffic is ended when the size goal is reached,
-or when the computed duration is reached.
-This is useful when DUT behavior depends on traffic size,
-e.g. target number of NAT translation entries, each to be hit exactly once
-per direction.
-This is used mainly for stateful mode.
-
-## Traffic synchronicity
-
-Traffic can be generated synchronously (test waits for duration)
-or asynchronously (test operates during traffic and stops traffic explicitly).
-
-### Synchronous traffic
-
-Trial measurement is driven by given (or precomputed) duration,
-no activity from test driver during the traffic.
-Used for most trials.
-
-### Asynchronous traffic
-
-Traffic is started, but then the test driver is free to perform
-other actions, before stopping the traffic explicitly.
-This is used mainly by reconf tests, but also by some trials
-used for runtime telemetry.
-
-## Trafic profiles
-
-TRex supports several ways to define the traffic.
-CSIT uses small Python modules based on Scapy as definitions.
-Details of traffic profiles depend on modes (STL or ASTF),
-but some are common for both modes.
-
-Search algorithms are intentionally unaware of the traffic mode used,
-so CSIT defines some terms to use instead of mode-specific TRex terms.
-
-### Transactions
-
-TRex traffic profile defines a small number of behaviors,
-in CSIT called transaction templates. Traffic profiles also instruct
-TRex how to create a large number of transactions based on the templates.
-
-Continuous traffic loops over the generated transactions.
-Limited traffic usually executes each transaction once
-(typically as constant number of loops over source addresses,
-each loop with different source ports).
-
-Currently, ASTF profiles define one transaction template each.
-Number of packets expected per one transaction varies based on profile details,
-as does the criterion for when a transaction is considered successful.
-
-Stateless transactions are just one packet (sent from one TG port,
-successful if received on the other TG port).
-Thus unidirectional stateless profiles define one transaction template,
-bidirectional stateless profiles define two transaction templates.
-
-### TPS multiplier
-
-TRex aims to open transaction specified by the profile at a steady rate.
-While TRex allows the transaction template to define its intended "cps" value,
-CSIT does not specify it, so the default value of 1 is applied,
-meaning TRex will open one transaction per second (and transaction template)
-by default. But CSIT invocation uses "multiplier" (mult) argument
-when starting the traffic, that multiplies the cps value,
-meaning it acts as TPS (transactions per second) input.
-
-With a slight abuse of nomenclature, bidirectional stateless tests
-set "packets per transaction" value to 2, just to keep the TPS semantics
-as a unidirectional input value.
-
-### Duration stretching
-
-TRex can be IO-bound, CPU-bound, or have any other reason
-why it is not able to generate the traffic at the requested TPS.
-Some conditions are detected, leading to TRex failure,
-for example when the bandwidth does not fit into the line capacity.
-But many reasons are not detected.
-
-Unfortunately, TRex frequently reacts by not honoring the duration
-in synchronous mode, taking longer to send the traffic,
-leading to lower then requested load offered to DUT.
-This usualy breaks assumptions used in search algorithms,
-so it has to be avoided.
-
-For stateless traffic, the behavior is quite deterministic,
-so the workaround is to apply a fictional TPS limit (max_rate)
-to search algorithms, usually depending only on the NIC used.
-
-For stateful traffic the behavior is not deterministic enough,
-for example the limit for TCP traffic depends on DUT packet loss.
-In CSIT we decided to use logic similar to asynchronous traffic.
-The traffic driver sleeps for a time, then stops the traffic explicitly.
-The library that parses counters into measurement results
-than usually treats unsent packets/transactions as lost/failed.
-
-We have added a IP4base tests for every NAT44ED test,
-so that users can compare results.
-If the results are very similar, it is probable TRex was the bottleneck.
-
-### Startup delay
-
-By investigating TRex behavior, it was found that TRex does not start
-the traffic in ASTF mode immediately. There is a delay of zero traffic,
-after which the traffic rate ramps up to the defined TPS value.
-
-It is possible to poll for counters during the traffic
-(fist nonzero means traffic has started),
-but that was found to influence the NDR results.
-
-Thus "sleep and stop" stategy is used, which needs a correction
-to the computed duration so traffic is stopped after the intended
-duration of real traffic. Luckily, it turns out this correction
-is not dependend on traffic profile nor CPU used by TRex,
-so a fixed constant (0.112 seconds) works well.
-Unfortunately, the constant may depend on TRex version,
-or execution environment (e.g. TRex in AWS).
-
-The result computations need a precise enough duration of the real traffic,
-luckily server side of TRex has precise enough counter for that.
-
-It is unknown whether stateless traffic profiles also exhibit a startup delay.
-Unfortunately, stateless mode does not have similarly precise duration counter,
-so some results (mostly MRR) are affected by less precise duration measurement
-in Python part of CSIT code.
-
-## Measuring Latency
-
-If measurement of latency is requested, two more packet streams are
-created (one for each direction) with TRex flow_stats parameter set to
-STLFlowLatencyStats. In that case, returned statistics will also include
-min/avg/max latency values and encoded HDRHistogram data.
\ No newline at end of file
diff --git a/docs/content/methodology/tunnel_encapsulations.md b/docs/content/methodology/tunnel_encapsulations.md
deleted file mode 100644
index 52505b7efb..0000000000
--- a/docs/content/methodology/tunnel_encapsulations.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-title: "Tunnel Encapsulations"
-weight: 10
----
-
-# Tunnel Encapsulations
-
-Tunnel encapsulations testing is grouped based on the type of outer
-header: IPv4 or IPv6.
-
-## IPv4 Tunnels
-
-VPP is tested in the following IPv4 tunnel baseline configurations:
-
-- *ip4vxlan-l2bdbase*: VXLAN over IPv4 tunnels with L2 bridge-domain MAC
-  switching.
-- *ip4vxlan-l2xcbase*: VXLAN over IPv4 tunnels with L2 cross-connect.
-- *ip4lispip4-ip4base*: LISP over IPv4 tunnels with IPv4 routing.
-- *ip4lispip6-ip6base*: LISP over IPv4 tunnels with IPv6 routing.
-- *ip4gtpusw-ip4base*: GTPU over IPv4 tunnels with IPv4 routing.
-
-In all cases listed above low number of MAC, IPv4, IPv6 flows (253 or 254 per
-direction) is switched or routed by VPP.
-
-In addition selected IPv4 tunnels are tested at scale:
-
-- *dot1q--ip4vxlanscale-l2bd*: VXLAN over IPv4 tunnels with L2 bridge-
-  domain MAC switching, with scaled up dot1q VLANs (10, 100, 1k),
-  mapped to scaled up L2 bridge-domains (10, 100, 1k), that are in turn
-  mapped to (10, 100, 1k) VXLAN tunnels. 64.5k flows are transmitted per
-  direction.
-
-## IPv6 Tunnels
-
-VPP is tested in the following IPv6 tunnel baseline configurations:
-
-- *ip6lispip4-ip4base*: LISP over IPv4 tunnels with IPv4 routing.
-- *ip6lispip6-ip6base*: LISP over IPv4 tunnels with IPv6 routing.
-
-In all cases listed above low number of IPv4, IPv6 flows (253 or 254 per
-direction) is routed by VPP.
diff --git a/docs/content/methodology/vpp_device_functional.md b/docs/content/methodology/vpp_device_functional.md
deleted file mode 100644
index 2bad5973b6..0000000000
--- a/docs/content/methodology/vpp_device_functional.md
+++ /dev/null
@@ -1,15 +0,0 @@
----
-title: "VPP_Device Functional"
-weight: 18
----
-
-# VPP_Device Functional
-
-Includes VPP_Device test environment for functional VPP
-device tests integrated into LFN CI/CD infrastructure. VPP_Device tests
-run on 1-Node testbeds (1n-skx, 1n-arm) and rely on Linux SRIOV Virtual
-Function (VF), dot1q VLAN tagging and external loopback cables to
-facilitate packet passing over external physical links. Initial focus is
-on few baseline tests. New device tests can be added by small edits
-to existing CSIT Performance (2-node) test. RF test definition code
-stays unchanged with the exception of traffic generator related L2 KWs.
diff --git a/docs/content/methodology/vpp_forwarding_modes.md b/docs/content/methodology/vpp_forwarding_modes.md
deleted file mode 100644
index 1cc199c607..0000000000
--- a/docs/content/methodology/vpp_forwarding_modes.md
+++ /dev/null
@@ -1,104 +0,0 @@
----
-title: "VPP Forwarding Modes"
-weight: 3
----
-
-# VPP Forwarding Modes
-
-VPP is tested in a number of L2, IPv4 and IPv6 packet lookup and
-forwarding modes. Within each mode baseline and scale tests are
-executed, the latter with varying number of FIB entries.
-
-## L2 Ethernet Switching
-
-VPP is tested in three L2 forwarding modes:
-
-- *l2patch*: L2 patch, the fastest point-to-point L2 path that loops
-  packets between two interfaces without any Ethernet frame checks or
-  lookups.
-- *l2xc*: L2 cross-connect, point-to-point L2 path with all Ethernet
-  frame checks, but no MAC learning and no MAC lookup.
-- *l2bd*: L2 bridge-domain, multipoint-to-multipoint L2 path with all
-  Ethernet frame checks, with MAC learning (unless static MACs are used)
-  and MAC lookup.
-
-l2bd tests are executed in baseline and scale configurations:
-
-- *l2bdbase*: Two MAC FIB entries are learned by VPP to enable packet
-  switching between two interfaces in two directions. VPP L2 switching
-  is tested with 254 IPv4 unique flows per direction, varying IPv4
-  source address per flow in order to invoke RSS based packet
-  distribution across VPP workers. The same source and destination MAC
-  address is used for all flows per direction. IPv4 source address is
-  incremented for every packet.
-
-- *l2bdscale*: A high number of MAC FIB entries are learned by VPP to
-  enable packet switching between two interfaces in two directions.
-  Tested MAC FIB sizes include: i) 10k with 5k unique flows per
-  direction, ii) 100k with 2 x 50k flows and iii) 1M with 2 x 500k
-  flows. Unique flows are created by using distinct source and
-  destination MAC addresses that are changed for every packet using
-  incremental ordering, making VPP learn (or refresh) distinct src MAC
-  entries and look up distinct dst MAC entries for every packet. For
-  details, see
-  [Packet Flow Ordering]({{< ref "packet_flow_ordering#Packet Flow Ordering" >}}).
-
-Ethernet wire encapsulations tested include: untagged, dot1q, dot1ad.
-
-## IPv4 Routing
-
-IPv4 routing tests are executed in baseline and scale configurations:
-
-- *ip4base*: Two /32 IPv4 FIB entries are configured in VPP to enable
-  packet routing between two interfaces in two directions. VPP routing
-  is tested with 253 IPv4 unique flows per direction, varying IPv4
-  source address per flow in order to invoke RSS based packet
-  distribution across VPP workers. IPv4 source address is incremented
-  for every packet.
-
-- *ip4scale*: A high number of /32 IPv4 FIB entries are configured in
-  VPP. Tested IPv4 FIB sizes include: i) 20k with 10k unique flows per
-  direction, ii) 200k with 2 * 100k flows and iii) 2M with 2 * 1M
-  flows. Unique flows are created by using distinct IPv4 destination
-  addresses that are changed for every packet, using incremental or
-  random ordering. For details, see
-  [Packet Flow Ordering]({{< ref "packet_flow_ordering#Packet Flow Ordering" >}}).
-
-## IPv6 Routing
-
-Similarly to IPv4, IPv6 routing tests are executed in baseline and scale
-configurations:
-
-- *ip6base*: Two /128 IPv4 FIB entries are configured in VPP to enable
-  packet routing between two interfaces in two directions. VPP routing
-  is tested with 253 IPv6 unique flows per direction, varying IPv6
-  source address per flow in order to invoke RSS based packet
-  distribution across VPP workers. IPv6 source address is incremented
-  for every packet.
-
-- *ip4scale*: A high number of /128 IPv6 FIB entries are configured in
-  VPP. Tested IPv6 FIB sizes include: i) 20k with 10k unique flows per
-  direction, ii) 200k with 2 * 100k flows and iii) 2M with 2 * 1M
-  flows. Unique flows are created by using distinct IPv6 destination
-  addresses that are changed for every packet, using incremental or
-  random ordering. For details, see
-  [Packet Flow Ordering]({{< ref "packet_flow_ordering#Packet Flow Ordering" >}}).
-
-## SRv6 Routing
-
-SRv6 routing tests are executed in a number of baseline configurations,
-in each case SR policy and steering policy are configured for one
-direction and one (or two) SR behaviours (functions) in the other
-directions:
-
-- *srv6enc1sid*: One SID (no SRH present), one SR function - End.
-- *srv6enc2sids*: Two SIDs (SRH present), two SR functions - End and
-  End.DX6.
-- *srv6enc2sids-nodecaps*: Two SIDs (SRH present) without decapsulation,
-  one SR function - End.
-- *srv6proxy-dyn*: Dynamic SRv6 proxy, one SR function - End.AD.
-- *srv6proxy-masq*: Masquerading SRv6 proxy, one SR function - End.AM.
-- *srv6proxy-stat*: Static SRv6 proxy, one SR function - End.AS.
-
-In all listed cases low number of IPv6 flows (253 per direction) is
-routed by VPP.
diff --git a/docs/content/methodology/vpp_startup_settings.md b/docs/content/methodology/vpp_startup_settings.md
deleted file mode 100644
index 6e40091a6c..0000000000
--- a/docs/content/methodology/vpp_startup_settings.md
+++ /dev/null
@@ -1,44 +0,0 @@
----
-title: "VPP Startup Settings"
-weight: 17
----
-
-# VPP Startup Settings
-
-CSIT code manipulates a number of VPP settings in startup.conf for
-optimized performance. List of common settings applied to all tests and
-test dependent settings follows.
-
-## Common Settings
-
-List of VPP startup.conf settings applied to all tests:
-
-1. heap-size <value> - set separately for ip4, ip6, stats, main
-   depending on scale tested.
-2. no-tx-checksum-offload - disables UDP / TCP TX checksum offload in
-   DPDK. Typically needed for use faster vector PMDs (together with
-   no-multi-seg).
-3. buffers-per-numa <value> - sets a number of memory buffers allocated
-   to VPP per CPU socket. VPP default is 16384. Needs to be increased for
-   scenarios with large number of interfaces and worker threads. To
-   accommodate for scale tests, CSIT is setting it to the maximum possible
-   value corresponding to the limit of DPDK memory mappings (currently
-   256). For Xeon Skylake platforms configured with 2MB hugepages and VPP
-   data-size and buffer-size defaults (2048B and 2496B respectively), this
-   results in value of 215040 (256 * 840 = 215040, 840 * 2496B buffers fit
-   in 2MB hugepage).
-
-## Per Test Settings
-
-List of vpp startup.conf settings applied dynamically per test:
-
-1. corelist-workers <list_of_cores> - list of logical cores to run VPP
-   worker data plane threads. Depends on HyperThreading and core per
-   test configuration.
-2. num-rx-queues <value> - depends on a number of VPP threads and NIC
-   interfaces.
-3. no-multi-seg - disables multi-segment buffers in DPDK, improves
-   packet throughput, but disables Jumbo MTU support. Disabled for all
-   tests apart from the ones that require Jumbo 9000B frame support.
-4. UIO driver - depends on topology file definition.
-5. QAT VFs - depends on NRThreads, each thread = 1QAT VFs.
-- 
cgit 1.2.3-korg