From 374954b9d648f503f6783325a1266457953a998d Mon Sep 17 00:00:00 2001 From: Tibor Frank Date: Wed, 3 May 2023 13:53:27 +0000 Subject: C-Docs: New structure Change-Id: I73d107f94b28b138f3350a9e1eedb0555583a9ca Signed-off-by: Tibor Frank --- docs/content/_index.md | 48 +- docs/content/infrastructure/_index.md | 3 +- .../infrastructure/fdio_csit_logical_topologies.md | 2 +- .../fdio_csit_testbed_specifications.md | 1861 -------------------- .../infrastructure/fdio_csit_testbed_versioning.md | 2 +- .../fdio_dc_testbed_specifications.md | 1861 ++++++++++++++++++++ .../infrastructure/fdio_dc_vexxhost_inventory.md | 6 +- .../infrastructure/testbed_configuration/_index.md | 2 +- .../infrastructure/trex_traffic_generator.md | 195 ++ .../content/infrastructure/vpp_startup_settings.md | 44 + docs/content/introduction/_index.md | 5 - .../introduction/automating_vpp_api_flag_day.md | 303 ---- docs/content/introduction/bash_code_style.md | 651 ------- docs/content/introduction/branches.md | 192 -- docs/content/introduction/dashboard_history.md | 17 - docs/content/introduction/design.md | 148 -- docs/content/introduction/model_schema.md | 60 - docs/content/introduction/perf_triggers_design.md | 44 - docs/content/introduction/test_code_guidelines.md | 294 ---- docs/content/introduction/test_naming.md | 107 -- .../introduction/test_scenarios_overview.md | 61 - docs/content/introduction/test_tag_description.md | 863 --------- docs/content/introduction/testing_in_vagrant.md | 85 - docs/content/methodology/_index.md | 4 +- docs/content/methodology/access_control_lists.md | 70 - .../methodology/data_plane_throughput/_index.md | 6 - .../data_plane_throughput/data_plane_throughput.md | 129 -- .../methodology/data_plane_throughput/mlrsearch.md | 88 - .../data_plane_throughput/mrr_throughput.md | 56 - .../methodology/data_plane_throughput/plrsearch.md | 383 ---- .../methodology/dut_state_considerations.md | 148 -- .../methodology/generic_segmentation_offload.md | 116 -- docs/content/methodology/geneve.md | 66 - .../methodology/hoststack_testing/_index.md | 6 - .../hoststack_testing/quicudpip_with_vppecho.md | 48 - .../hoststack_testing/tcpip_with_iperf3.md | 52 - .../hoststack_testing/udpip_with_iperf3.md | 44 - .../hoststack_testing/vsap_ab_with_nginx.md | 39 - .../internet_protocol_security_ipsec.md | 74 - docs/content/methodology/measurements/_index.md | 6 + .../measurements/data_plane_throughput/_index.md | 6 + .../data_plane_throughput/data_plane_throughput.md | 129 ++ .../data_plane_throughput/mlr_search.md | 88 + .../measurements/data_plane_throughput/mrr.md | 56 + .../data_plane_throughput/plr_search.md | 383 ++++ .../methodology/measurements/packet_latency.md | 52 + docs/content/methodology/measurements/telemetry.md | 158 ++ docs/content/methodology/multi_core_speedup.md | 51 - .../methodology/network_address_translation.md | 445 ----- docs/content/methodology/overview/_index.md | 6 + .../overview/dut_state_considerations.md | 148 ++ .../methodology/overview/multi_core_speedup.md | 51 + .../methodology/overview/per_thread_resources.md | 101 ++ docs/content/methodology/overview/terminology.md | 97 + .../methodology/overview/vpp_forwarding_modes.md | 104 ++ docs/content/methodology/packet_flow_ordering.md | 42 - docs/content/methodology/packet_latency.md | 45 - docs/content/methodology/per_patch_testing.md | 230 +++ docs/content/methodology/per_thread_resources.md | 102 -- docs/content/methodology/reconfiguration_tests.md | 68 - .../methodology/root_cause_analysis/_index.md | 6 - .../perpatch_performance_tests.md | 228 --- docs/content/methodology/suite_generation.md | 124 -- docs/content/methodology/telemetry.md | 167 -- docs/content/methodology/terminology.md | 82 - docs/content/methodology/test/_index.md | 6 + .../methodology/test/access_control_lists.md | 66 + .../test/generic_segmentation_offload.md | 117 ++ docs/content/methodology/test/hoststack/_index.md | 6 + .../test/hoststack/quicudpip_with_vppecho.md | 48 + .../test/hoststack/tcpip_with_iperf3.md | 52 + .../test/hoststack/udpip_with_iperf3.md | 44 + .../test/hoststack/vsap_ab_with_nginx.md | 39 + .../methodology/test/internet_protocol_security.md | 73 + .../test/network_address_translation.md | 445 +++++ .../methodology/test/packet_flow_ordering.md | 42 + docs/content/methodology/test/reconfiguration.md | 68 + .../methodology/test/tunnel_encapsulations.md | 87 + docs/content/methodology/test/vpp_device.md | 15 + docs/content/methodology/trending/_index.md | 12 + docs/content/methodology/trending/analysis.md | 224 +++ docs/content/methodology/trending/presentation.md | 34 + .../methodology/trending_methodology/_index.md | 6 - .../methodology/trending_methodology/overview.md | 10 - .../trending_methodology/trend_analysis.md | 224 --- .../trending_methodology/trend_presentation.md | 36 - docs/content/methodology/trex_traffic_generator.md | 195 -- docs/content/methodology/tunnel_encapsulations.md | 41 - docs/content/methodology/vpp_device_functional.md | 15 - docs/content/methodology/vpp_forwarding_modes.md | 104 -- docs/content/methodology/vpp_startup_settings.md | 44 - docs/content/overview/_index.md | 6 + docs/content/overview/c_dash/_index.md | 6 + docs/content/overview/c_dash/design.md | 6 + docs/content/overview/c_dash/releases.md | 8 + docs/content/overview/c_dash/structure.md | 20 + docs/content/overview/csit/_index.md | 6 + docs/content/overview/csit/design.md | 148 ++ docs/content/overview/csit/suite_generation.md | 123 ++ docs/content/overview/csit/test_naming.md | 112 ++ docs/content/overview/csit/test_scenarios.md | 66 + docs/content/overview/csit/test_tags.md | 863 +++++++++ docs/content/release_notes/_index.md | 7 +- docs/content/release_notes/csit_rls2306/_index.md | 6 + .../release_notes/csit_rls2306/dpdk_performance.md | 27 + .../release_notes/csit_rls2306/trex_performance.md | 24 + .../release_notes/csit_rls2306/vpp_device.md | 24 + .../release_notes/csit_rls2306/vpp_performance.md | 42 + docs/content/release_notes/dpdk.md | 31 - docs/content/release_notes/previous/_index.md | 6 + .../release_notes/previous/csit_rls2302/_index.md | 6 + .../previous/csit_rls2302/dpdk_performance.md | 31 + .../previous/csit_rls2302/trex_performance.md | 26 + .../previous/csit_rls2302/vpp_device.md | 26 + .../previous/csit_rls2302/vpp_performance.md | 93 + docs/content/release_notes/trex.md | 26 - docs/content/release_notes/vpp.md | 95 - docs/content/release_notes/vpp_device.md | 24 - 118 files changed, 6800 insertions(+), 8269 deletions(-) delete mode 100644 docs/content/infrastructure/fdio_csit_testbed_specifications.md create mode 100644 docs/content/infrastructure/fdio_dc_testbed_specifications.md create mode 100644 docs/content/infrastructure/trex_traffic_generator.md create mode 100644 docs/content/infrastructure/vpp_startup_settings.md delete mode 100644 docs/content/introduction/_index.md delete mode 100644 docs/content/introduction/automating_vpp_api_flag_day.md delete mode 100644 docs/content/introduction/bash_code_style.md delete mode 100644 docs/content/introduction/branches.md delete mode 100644 docs/content/introduction/dashboard_history.md delete mode 100644 docs/content/introduction/design.md delete mode 100644 docs/content/introduction/model_schema.md delete mode 100644 docs/content/introduction/perf_triggers_design.md delete mode 100644 docs/content/introduction/test_code_guidelines.md delete mode 100644 docs/content/introduction/test_naming.md delete mode 100644 docs/content/introduction/test_scenarios_overview.md delete mode 100644 docs/content/introduction/test_tag_description.md delete mode 100644 docs/content/introduction/testing_in_vagrant.md delete mode 100644 docs/content/methodology/access_control_lists.md delete mode 100644 docs/content/methodology/data_plane_throughput/_index.md delete mode 100644 docs/content/methodology/data_plane_throughput/data_plane_throughput.md delete mode 100644 docs/content/methodology/data_plane_throughput/mlrsearch.md delete mode 100644 docs/content/methodology/data_plane_throughput/mrr_throughput.md delete mode 100644 docs/content/methodology/data_plane_throughput/plrsearch.md delete mode 100644 docs/content/methodology/dut_state_considerations.md delete mode 100644 docs/content/methodology/generic_segmentation_offload.md delete mode 100644 docs/content/methodology/geneve.md delete mode 100644 docs/content/methodology/hoststack_testing/_index.md delete mode 100644 docs/content/methodology/hoststack_testing/quicudpip_with_vppecho.md delete mode 100644 docs/content/methodology/hoststack_testing/tcpip_with_iperf3.md delete mode 100644 docs/content/methodology/hoststack_testing/udpip_with_iperf3.md delete mode 100644 docs/content/methodology/hoststack_testing/vsap_ab_with_nginx.md delete mode 100644 docs/content/methodology/internet_protocol_security_ipsec.md create mode 100644 docs/content/methodology/measurements/_index.md create mode 100644 docs/content/methodology/measurements/data_plane_throughput/_index.md create mode 100644 docs/content/methodology/measurements/data_plane_throughput/data_plane_throughput.md create mode 100644 docs/content/methodology/measurements/data_plane_throughput/mlr_search.md create mode 100644 docs/content/methodology/measurements/data_plane_throughput/mrr.md create mode 100644 docs/content/methodology/measurements/data_plane_throughput/plr_search.md create mode 100644 docs/content/methodology/measurements/packet_latency.md create mode 100644 docs/content/methodology/measurements/telemetry.md delete mode 100644 docs/content/methodology/multi_core_speedup.md delete mode 100644 docs/content/methodology/network_address_translation.md create mode 100644 docs/content/methodology/overview/_index.md create mode 100644 docs/content/methodology/overview/dut_state_considerations.md create mode 100644 docs/content/methodology/overview/multi_core_speedup.md create mode 100644 docs/content/methodology/overview/per_thread_resources.md create mode 100644 docs/content/methodology/overview/terminology.md create mode 100644 docs/content/methodology/overview/vpp_forwarding_modes.md delete mode 100644 docs/content/methodology/packet_flow_ordering.md delete mode 100644 docs/content/methodology/packet_latency.md create mode 100644 docs/content/methodology/per_patch_testing.md delete mode 100644 docs/content/methodology/per_thread_resources.md delete mode 100644 docs/content/methodology/reconfiguration_tests.md delete mode 100644 docs/content/methodology/root_cause_analysis/_index.md delete mode 100644 docs/content/methodology/root_cause_analysis/perpatch_performance_tests.md delete mode 100644 docs/content/methodology/suite_generation.md delete mode 100644 docs/content/methodology/telemetry.md delete mode 100644 docs/content/methodology/terminology.md create mode 100644 docs/content/methodology/test/_index.md create mode 100644 docs/content/methodology/test/access_control_lists.md create mode 100644 docs/content/methodology/test/generic_segmentation_offload.md create mode 100644 docs/content/methodology/test/hoststack/_index.md create mode 100644 docs/content/methodology/test/hoststack/quicudpip_with_vppecho.md create mode 100644 docs/content/methodology/test/hoststack/tcpip_with_iperf3.md create mode 100644 docs/content/methodology/test/hoststack/udpip_with_iperf3.md create mode 100644 docs/content/methodology/test/hoststack/vsap_ab_with_nginx.md create mode 100644 docs/content/methodology/test/internet_protocol_security.md create mode 100644 docs/content/methodology/test/network_address_translation.md create mode 100644 docs/content/methodology/test/packet_flow_ordering.md create mode 100644 docs/content/methodology/test/reconfiguration.md create mode 100644 docs/content/methodology/test/tunnel_encapsulations.md create mode 100644 docs/content/methodology/test/vpp_device.md create mode 100644 docs/content/methodology/trending/_index.md create mode 100644 docs/content/methodology/trending/analysis.md create mode 100644 docs/content/methodology/trending/presentation.md delete mode 100644 docs/content/methodology/trending_methodology/_index.md delete mode 100644 docs/content/methodology/trending_methodology/overview.md delete mode 100644 docs/content/methodology/trending_methodology/trend_analysis.md delete mode 100644 docs/content/methodology/trending_methodology/trend_presentation.md delete mode 100644 docs/content/methodology/trex_traffic_generator.md delete mode 100644 docs/content/methodology/tunnel_encapsulations.md delete mode 100644 docs/content/methodology/vpp_device_functional.md delete mode 100644 docs/content/methodology/vpp_forwarding_modes.md delete mode 100644 docs/content/methodology/vpp_startup_settings.md create mode 100644 docs/content/overview/_index.md create mode 100644 docs/content/overview/c_dash/_index.md create mode 100644 docs/content/overview/c_dash/design.md create mode 100644 docs/content/overview/c_dash/releases.md create mode 100644 docs/content/overview/c_dash/structure.md create mode 100644 docs/content/overview/csit/_index.md create mode 100644 docs/content/overview/csit/design.md create mode 100644 docs/content/overview/csit/suite_generation.md create mode 100644 docs/content/overview/csit/test_naming.md create mode 100644 docs/content/overview/csit/test_scenarios.md create mode 100644 docs/content/overview/csit/test_tags.md create mode 100644 docs/content/release_notes/csit_rls2306/_index.md create mode 100644 docs/content/release_notes/csit_rls2306/dpdk_performance.md create mode 100644 docs/content/release_notes/csit_rls2306/trex_performance.md create mode 100644 docs/content/release_notes/csit_rls2306/vpp_device.md create mode 100644 docs/content/release_notes/csit_rls2306/vpp_performance.md delete mode 100644 docs/content/release_notes/dpdk.md create mode 100644 docs/content/release_notes/previous/_index.md create mode 100644 docs/content/release_notes/previous/csit_rls2302/_index.md create mode 100644 docs/content/release_notes/previous/csit_rls2302/dpdk_performance.md create mode 100644 docs/content/release_notes/previous/csit_rls2302/trex_performance.md create mode 100644 docs/content/release_notes/previous/csit_rls2302/vpp_device.md create mode 100644 docs/content/release_notes/previous/csit_rls2302/vpp_performance.md delete mode 100644 docs/content/release_notes/trex.md delete mode 100644 docs/content/release_notes/vpp.md delete mode 100644 docs/content/release_notes/vpp_device.md (limited to 'docs/content') diff --git a/docs/content/_index.md b/docs/content/_index.md index 15cd1ec3f1..eda7ecf8f9 100644 --- a/docs/content/_index.md +++ b/docs/content/_index.md @@ -3,39 +3,27 @@ title: "FD.io CSIT" type: "docs" --- -# Report Structure +# Documentation Structure -FD.io CSIT Dashboard Documentation contains system performance and functional -testing data. - -Documentation is structured as follows: - -1. INTRODUCTION: General introduction to CSIT Performance Dashboard. - - **Dashboard History**: Version changes. - - **Test Scenarios Overview**: A brief overview of test scenarios - covered in this report. - - **Design**: Framework modular design hierarchy. - - **Test Naming**: Test naming convention. - - **Test Tags Descriptions**: Robot Framework Tags used for test suite and - test case grouping and selection. -2. METHODOLOGY: - - **Overview**: Tested logical topologies, test coverage and naming - specifics. -3. RELEASE NOTES: Performance tests executed in physical FD.io - testbeds. - - **VPP Performance**: Changes, added tests, environment or methodology - changes, known issues. - - **DPDK Performance**: Changes, added tests, environment or methodology - changes, known issues. - - **TRex Performance**: Changes, added tests, environment or methodology - changes, known issues. - - **VPP Device**: Changes, added tests, environment or methodology - changes, known issues. -4. INFRASTRUCTURE: +1. OVERVIEW: General introduction to CSIT Performance Dashboard and CSIT itself. + - **C-Dash** + - **CSIT** +2. METHODOLOGY + - **Overview** + - **Measurement** + - **Test** + - **Trending** + - **Per-patch Testing** +3. RELEASE NOTES: Performance tests executed in physical FD.io testbeds. + - **CSIT rls2306** + - **Previous** +4. INFRASTRUCTURE - **FD.io DC Vexxhost Inventory**: Physical testbeds location. - - **FD.io CSIT Testbed Specifications**: Specification of the physical + - **FD.io DC Testbed Specifications**: Specification of the physical testbed infrastructure. - - **FD.io CSIT Testbed Configuration**: Configuration of the physical + - **FD.io DC Testbed Configuration**: Configuration of the physical testbed infrastructure. - **FD.io CSIT Testbed Versioning**: CSIT testbed versioning. - **FD.io CSIT Logical Topologies**: CSIT Logical Topologies. + - **VPP Startup Settings** + - **TRex Traffic Generator** diff --git a/docs/content/infrastructure/_index.md b/docs/content/infrastructure/_index.md index 3ccc042a8b..c5dbd21d87 100644 --- a/docs/content/infrastructure/_index.md +++ b/docs/content/infrastructure/_index.md @@ -1,5 +1,6 @@ --- +bookCollapseSection: false bookFlatSection: true title: "Infrastructure" weight: 4 ---- \ No newline at end of file +--- diff --git a/docs/content/infrastructure/fdio_csit_logical_topologies.md b/docs/content/infrastructure/fdio_csit_logical_topologies.md index 5dd323d30c..4e9c22b357 100644 --- a/docs/content/infrastructure/fdio_csit_logical_topologies.md +++ b/docs/content/infrastructure/fdio_csit_logical_topologies.md @@ -1,6 +1,6 @@ --- title: "FD.io CSIT Logical Topologies" -weight: 4 +weight: 5 --- # FD.io CSIT Logical Topologies diff --git a/docs/content/infrastructure/fdio_csit_testbed_specifications.md b/docs/content/infrastructure/fdio_csit_testbed_specifications.md deleted file mode 100644 index 24a30cf1fa..0000000000 --- a/docs/content/infrastructure/fdio_csit_testbed_specifications.md +++ /dev/null @@ -1,1861 +0,0 @@ ---- -bookToc: true -title: "FD.io CSIT Testbed Specifications" -weight: 2 ---- - -# FD.io CSIT Testbed Specifications - -## Purpose - -This note includes specification of the physical testbed infrastructure -hosted by LFN FD.io CSIT project. - -## Server Management - -### Addressing - -Each server has a LOM (Lights-Out-Management e.g. SM IPMI) and a -Management port, which are connected to two different VLANs. - -#### LOM (IPMI) VLAN - - - Subnet: 10.30.50.0/24 - - Gateway: 10.30.50.1 - - Broadcast: 10.30.50.255 - - DNS1: 199.204.44.24 - - DNS2: 199.204.47.54 - -#### Management VLAN - - Subnet: 10.30.51.0/24 - - Gateway: 10.30.51.1 - - Broadcast: 10.30.51.255 - - DNS1: 199.204.44.24 - - DNS2: 199.204.47.54 - -To access these hosts, VPN connection is required. - -## Testbeds Overview - -### Summary List - -``` - #. Type Purpose SUT TG #TB #SUT #TG #skx #ps1 #rng #tx2 #tsh #alt #clx #zn2 #icx #snr #spr - 1. 1-Node-Skylake nomad skx na 5 5 0 5 0 0 0 0 0 0 0 0 0 0 - 2. 1-Node-Cascadelake nomad clx na 1 1 0 0 0 0 0 0 0 1 0 0 0 0 - 3. 1-Node-AmpereAltra nomad alt na 2 2 0 0 0 0 0 0 2 0 0 0 0 0 - 4. 2-Node-IxiaPS1L47 tcp skx ps1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 - 5. 2-Node-Cascadelake perf clx clx 3 3 3 0 0 0 0 0 0 6 0 0 0 0 - 6. 2-Node-ThunderX2 perf tx2 skx 1 1 .5 .5 0 0 1 0 0 0 0 0 0 0 - 7. 2-Node-Icelake perf icx icx 4 4 4 0 0 0 0 0 0 0 0 8 0 0 - 8. 3-Node-Rangeley perf rng skx 1 3 1 0 0 2 0 0 0 0 0 0 0 0 - 9. 3-Node-Taishan perf tsh skx 1 2 .5 .5 0 0 0 2 0 0 0 0 0 0 -10. 3-Node-Altra perf alt icx 1 2 1 0 0 0 0 0 2 0 0 1 0 0 -11. 2-Node-Zen2 perf zn2 zn2 1 1 1 0 0 0 0 0 0 0 2 0 0 0 -12. 3-Node-Icelake perf icx icx 2 4 2 0 0 0 0 0 0 0 0 6 0 0 -13. 3-Node-SnowRidge perf snr icx 1 2 .5 0 0 0 0 0 0 0 0 .5 2 0 -13. 2-Node-SapphireRapids perf spr spr 4 4 4 0 0 0 0 0 0 0 0 0 0 8 - Totals: 28 35 18.5 7 1 2 1 2 4 7 2 15.5 2 8 -``` - -### 1-Node-Skylake Xeon Intel (1n-skx) - -Each 1-Node-Skylake testbed includes one SUT (Server-Type-B6) with NIC -ports connected back-to-back ([Server Types](#server-types)). -Used for FD.io VPP_Device functional driver tests. - -### 1-Node-ThunderX2 Arm Marvell (1n-tx2) - -Each 1-Node-ThunderX2 testbed includes one SUT (Server-Type-E11) with NIC -ports connected back-to-back ([Server Types](#server-types)). -Used for FD.io VPP_Device functional driver tests. - -### 1-Node-Cascadelake Xeon Intel (1n-clx) - -Each 1-Node-Cascadelake testbed includes one SUT (Server-Type-C1) with -NIC ports connected back-to-back ([Server Types](#server-types)). - -Used for FD.io VPP_Device functional driver tests. - -### 2-Node-IxiaPS1L47 Ixia PSOne L47 (2n-ps1) - -Each 2-Node-IxiaPS1L47 testbed includes one SUT (Server-Type-B1) and one -TG (Ixia PSOne appliance) with 10GE interfaces connected in a 2-node -circular topology ([Server Types](#server-types)). -Used for FD.io TCP/IP and HTTP performance tests. - -### 2-Node-Cascadelake Xeon Intel (2n-clx) - -Each 2-Node-Cascadelake testbed includes one SUT (Server-Type-C2) and -one TG (Server-Type-C3) connected in a 2-node circular topology -([Server Types](#server-types)). -Used for FD.io performance tests. - -### 2-Node-Zen2 EPYC AMD (2n-zn2) - -Each 2-Node-Zen2 testbed includes one SUT (Server-Type-D1) and -one TG (Server-Type-D2) connected in a 2-node circular topology -([Server Types](#server-types)). -Used for FD.io performance tests. - -### 2-Node-ThunderX2 Arm Marvell (2x-tx2) - -Each 2-Node-ThunderX2 testbed includes one SUT (Server-Type-E22) and -one TG (Server-Type-E31) connected in a 2-node circular topology -([Server Types](#server-types)). -Used for FD.io performance tests. - -### 2-Node-Icelake Xeon Intel (2n-icx) - -Each 2-Node-Icelake testbed includes one SUT (Server-Type-F1) and -one TG (Server-Type-F2) connected in a 2-node circular topology -([Server Types](#server-types)). -Used for FD.io performance tests. - -### 3-Node-Rangeley Atom Testbeds - -Each 3-Node-Rangeley testbed includes two SUTs (Server-Type-B5) and one -TG (Server-Type-2) connected in a 3-node circular topology -([Server Types](#server-types)). -Used for FD.io performance tests. - -### 3-Node-TaiShan Arm Huawei (3n-tsh) - -Each 3-Node-TaiShan testbed includes two SUTs (Server-Type-E21) and one -TG (Server-Type-E31) connected in a 3-node circular topology -([Server Types](#server-types)). -Used for FD.io performance tests. - -### 3-Node-Altra Arm Ampere (3n-alt) - -Each 3-Node-Altra testbed includes two SUTs (Server-Type-E23) and one -TG (Server-Type-F4) connected in a 3-node circular topology -([Server Types](#server-types)). -Used for FD.io performance tests. - -### 3-Node-Icelake Xeon Intel (3n-icx) - -Each 3-Node-Icelake testbed includes two SUTs (Server-Type-F1) and one -TG (Server-Type-F3) connected in a 3-node circular topology -([Server Types](#server-types)). -Used for FD.io performance tests. - -### 3-Node-SnowRidge Atom Intel (3n-snr) - -Each 3-Node-SnowRidge testbed includes two SUTs (Server-Type-G1) and one -TG (Server-Type-F4) connected in a 3-node circular topology -([Server Types](#server-types)). -Used for FD.io performance tests. - -### 2-Node-Full-SapphireRapids Xeon Intel (2nf-spr) - -One 2-Node-Full-SapphireRapids testbed includes one SUT (Server-Type-H1) and -one TG (Server-Type-H2) connected in a 2-node physical topology -with NUMA (socket) daisy chaining. For more detail see -[Server Types](#server-types) and [Testbed Topology-TODO](#TODO). -Used for FD.io performance tests in a full system SUT setup with all PCIe -Gen5 x16 lane slots populated with 2p200GbE NICs. - -### 2-Node-SapphireRapids Xeon Intel (2n-spr) - -Each 2-Node-SapphireRapids testbed includes one SUT (Server-Type-H5) and -one TG (Server-Type-H6) connected in a 2-node circular topology. For more -detail see [Server Types](#server-types) and [Testbed Topology-TODO](#TODO). -Used for FD.io performance tests. - - -## Tesdtbed Naming Convention - -Following naming convention is used within this page to specify physical -connectivity and wiring across defined CSIT testbeds: - -- **testbedname**: testbedN. -- **hostname**: - - traffic-generator: tN-tgW. - - system-under-testX: tN-sutX. -- **portnames**: - - tN-tgW-cY/pZ. - - tN-sutX-cY/pZ. -- **where**: - - N - testbed number. - - tgW - server acts as traffic-generator with W index. - - sutX - server acts as system-under-test with X index. - - Y - PCIe slot number denoting a NIC card number within the host. - - Z - port number on the NIC card. - -## Server Types - -FD.io CSIT lab contains following server types: - -1. **Server-Type-B2**: Purpose - Skylake Xeon hosts for FD.io builds and data processing. - - Quantity: 2 - - Physical connectivity: - - IPMI and host management ports. - - Main HW configuration: - - Chassis: SuperMicro SYS-7049GP-TRT. - - Motherboard: SuperMicro X11DPG-QT. - - Processors: 2* Intel Platinum 8180 2.5 GHz. - - RAM Memory: 16* 16GB DDR4-2666MHz. - - Disks: 2* 1.6TB 6G SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot2 18:00.xx: empty. - - PCIe Slot4 3b:00.xx: empty. - - PCIe Slot9 5e:00.xx: empty. - - Numa1: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot6 86:00.xx: empty. - - PCIe Slot8 af:00.xx: empty. - - PCIe Slot10 d8:00.xx: empty. - -2. **Server-Type-B6**: Purpose - Skylake Xeon SUT for FD.io VPP_Device functional tests. - - Quantity: 2. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 1-node topologies. - - Main HW configuration: - - Chassis: SuperMicro SYS-7049GP-TRT. - - Motherboard: SuperMicro X11DPG-QT. - - Processors: 2* Intel Platinum 8180 2.5 GHz. - - RAM Memory: 16* 16GB DDR4-2666MHz. - - Disks: 2* 1.6TB 6G SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot2 18:00.xx: x710-4p10GE Intel. - - PCIe Slot4 3b:00.xx: x710-4p10GE Intel. - - PCIe Slot9 5e:00.xx: empty. - - Numa1: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot6 86:00.xx: empty. - - PCIe Slot8 af:00.xx: empty. - - PCIe Slot10 d8:00.xx: empty. - -3. **Server-Type-B7**: Purpose - Ixia PerfectStorm One Appliance TG for FD.io TCP/IP performance tests. - - Quantity: 1. - - Physical connectivity: - - Host management interface: 10/100/1000-BaseT. - - 8-port 10GE SFP+ integrated NIC. - - Main HW configuration: - - Chassis: PS10GE4NG. - - Motherboard: SuperMicro X11DPG-QT. - - Processors: Quad-Core, Intel Processor. - - HW accelerators: FPGA offload. - - RAM Memory: 64GB. - - Disks: 1 * 1 TB, Enterprise Class, High MTBF. - - Physical Interfaces: 4 * 10GE SFP+. - - Operating System: Native IxOS. - - Interface configuration: - - Port-1: 10GE SFP+. - - Port-2: 10GE SFP+. - - Port-3: 10GE SFP+. - - Port-4: 10GE SFP+. - -4. **Server-Type-B8**: Purpose - Skylake Xeon SUT for TCP/IP host stack tests. - - Quantity: 1. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports. - - Main HW configuration: - - Chassis: SuperMicro SYS-7049GP-TRT. - - Motherboard: SuperMicro X11DPG-QT. - - Processors: 2* Intel Platinum 8180 2.5 GHz. - - RAM Memory: 16* 16GB DDR4-2666MHz. - - Disks: 2* 1.6TB 6G SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot2 18:00.xx: x710-4p10GE Intel. - - PCIe Slot4 3b:00.xx: empty. - - PCIe Slot9 5e:00.xx: empty. - - Numa1: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot6 86:00.xx: empty. - - PCIe Slot8 af:00.xx: empty. - - PCIe Slot10 d8:00.xx: empty. -5. Server-Type-C1: Purpose - Cascadelake Xeon SUT for FD.io VPP_Device functional tests. - - Quantity: 1. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 1-node testbed topologies. - - Main HW configuration: - - Chassis: SuperMicro SYS-7049GP-TRT. - - Motherboard: SuperMicro X11DPG-QT. - - Processors: 2* Intel Platinum 8280 2.7 GHz. - - RAM Memory: 12* 16GB DDR4-2933. - - Disks: 2* 1.92TB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot2 18:00.xx: x710-4p10GE Intel. - - PCIe Slot4 3b:00.xx: x710-4p10GE Intel. - - PCIe Slot9 5e:00.xx: empty. - - Numa1: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot6 86:00.xx: empty. - - PCIe Slot8 af:00.xx: empty. - - PCIe Slot10 d8:00.xx: empty. - -6. **Server-Type-C2**: Purpose - Cascadelake Xeon SUT for FD.io performance testing. - - Quantity: 3 - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 2-node testbed topologies. - - Main HW configuration: - - Chassis: SuperMicro SYS-7049GP-TRT. - - Motherboard: SuperMicro X11DPG-QT. - - Processors: 2* Intel Gold 6252N 2.3 GHz. - - RAM Memory: 12* 16GB DDR4-2933. - - Disks: 2* 1.92TB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot2 18:00.xx: x710-4p10GE Intel. - - PCIe Slot4 3b:00.xx: xxv710-DA2-2p25GE Intel. - - PCIe Slot9 5e:00.xx: ConnectX5-2p100GE Mellanox. - - Numa1: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot6 86:00.xx: e810-2p100GE Intel. - - PCIe Slot8 af:00.xx: empty. - - PCIe Slot10 d8:00.xx: empty. - -7. **Server-Type-C3**: Purpose - Cascadelake Xeon TG for FD.io performance testing. - - Quantity: 3. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 2-node testbed topologies. - - Main HW configuration: - - Chassis: SuperMicro SYS-7049GP-TRT. - - Motherboard: SuperMicro X11DPG-QT. - - Processors: 2* Intel Platinum 8280 2.7 GHz. - - RAM Memory: 12* 16GB DDR4-2933. - - Disks: 2* 1.92TB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot2 18:00.xx: x710-4p10GE Intel. - - PCIe Slot4 3b:00.xx: xxv710-DA2 2p25GE Intel. - - PCIe Slot9 5e:00.xx: ConnectX5-2p100GE Mellanox. - - Numa1: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot6 86:00.xx: ConnectX5-2p100GE Mellanox. - - PCIe Slot8 af:00.xx: ConnectX5-2p100GE Mellanox. - - PCIe Slot10 d8:00.xx: empty. - -8. **Server-Type-C4**: Purpose - Cascadelake Xeon Backend hosts for FD.io builds and data processing. - - Quantity: 3. - - Physical connectivity: - - IPMI and host management ports. - - no NIC ports, standalone setup. - - Main HW configuration: - - Chassis: SuperMicro 1029P-WTRT. - - Motherboard: SuperMicro X11DDW-NT. - - Processors: 2* Intel Platinum 8280 2.7 GHz. - - RAM Memory: 12* 16GB DDR4-2933. - - Disks: 4* 1.92TB SATA SSD. - - NICs configuration: - - Numa0: - - no cards. - - Numa1: - - no cards. - -9. **Server-Type-D1**: Purpose - Zen2 EPYC SUT for FD.io performance testing. - - Quantity: 1. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 2-node testbed topologies. - - Main HW configuration: - - Chassis: SuperMicro AS-1114S-WTRT - - Processors: 1* AMD EPYC 7532 2.4 GHz. - - RAM Memory: 8* 32GB DDR4-2933. - - Disks: 1* 1TB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot1 01:00.xx: x710-4p10GE Intel. - - PCIe Slot2 41:00.xx: xxv710-DA2-2p25GE Intel. - - PCIe Slot3 81:00.xx: mcx556a-edat ConnectX5-2p100GE Mellanox. - -10. **Server-Type-D2**: Purpose - Zen2 EPYC TG for FD.io performance testing. - - Quantity: 1. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 2-node testbed topologies. - - Main HW configuration: - - Chassis: SuperMicro AS-1114S-WTRT - - Processors: 1* AMD EPYC 7532 2.4 GHz. - - RAM Memory: 8* 32GB DDR4-2933. - - Disks: 1* 1TB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot1 01:00.xx: mcx556a-edat ConnectX5-2p100GE Mellanox. - - PCIe Slot2 41:00.xx: x710-4p10GE Intel. - - PCIe Slot3 81:00.xx: xxv710-DA2 2p25GE Intel. - -11. **Server-Type-E11**: Purpose - ThunderX2 Arm Marvell SUT for FD.io VPP_Device functional tests. - - Quantity: 2 - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 1-node topologies. - - Main HW configuration: - - Chassis: GIGABYTE Rack Mount - - Motherboard: MT91-FS4-00 - - Processors: 2 * ThunderX2 ARMv8 CN9980 2.20 GHz - - RAM Memory: 16 * 16GB DIMM - - Disks: 2 * 480GB 6G SATA SSD SAMSUNG MZ7LH480 - - NICs configuration: - - Numa0: - - PCIe Slot4 05:00.xx: XL710-QDA2-2p40GE Intel. - - PCIe Slot8 0b:00.xx: ConnectX5-2p10/25GE Mellanox. - - Numa1: - - PCIe Slot14 91:00.xx: XL710-QDA2-2p40GE Intel. - - PCIe Slot26 9a:00.xx: ConnectX5-2p10/25GE Mellanox. - -12. **Server-Type-E21**: Purpose - TaiShan Arm Huawei SUT for FD.io performance testing. - - Quantity: 2 - - Physical connectivity: - - IPMI(?) and host management ports. - - NIC ports connected into 3-node topology. - - Main HW configuration: - - Chassis: Huawei TaiShan 2280. - - Processors: 2* hip07-d05 ~ 32* Arm Cortex-A72 - - RAM Memory: 8* 16GB DDR4-2400MT/s - - Disks: 1* 4TB SATA HDD - - NICs configuration: - - PCIe Slot4 e9:00.xx: connectx4-2p25GE Mellanox. - - PCIe Slot6 11:00.xx: x520-2p10GE Intel. - -13. **Server-Type-E22**: Purpose - ThunderX2 Arm Marvell SUT for FD.io performance testing. - - Quantity: 1 - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 2-node topologies. - - Main HW configuration: - - Chassis: Gigabyte R181-T90 1U - - Motherboard: MT91-FS1 - - Processors: 2* ThunderX2 ARMv8 CN9975 2.0 GHz - - RAM Memory: 4* 32GB RDIMM - - Disks: 1* 480GB SSD Micron, 1* 1000GB HDD Seagate_25 - - NICs configuration: - - Numa0: - - no cards - - Numa1: - - PCIe Slot18 91:00.xx: XL710-QDA2-2p40GE Intel. - -14. **Server-Type-E23**: Purpose - Altra Arm Ampere SUT for FD.io performance testing. - - Quantity: 2 - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 3-node topologies. - - Main HW configuration: - - Chassis: WIWYNN Mt.Jade Server System B81.030Z1.0007 2U - - Motherboard: Mt.Jade Motherboard - - Processors: 2* Ampere(R) Altra(R) Q80-30 Processor (Neoverse N1) - - Processor Signature: Implementor 0x41, Variant 0x3, Architecture 15, Part 0xd0c, Revision 1 - - RAM Memory: 16* 8GB DDR4-3200MT/s - - Disks: 2* 960GB SSD Samsung M.2 NVMe PM983 - - NICs configuration: - - Numa0: - - PCIe Slot1 0004:04:00.x: xl710-QDA2-2p40GE Intel. - - Numa1: - - no cards. -15. **Server-Type-E24**: Purpose - Altra Arm Ampere for FD.io build. - - Quantity: 2. - - Physical connectivity: - - IPMI and host management ports. - - Main HW configuration: - - Chassis: Gigabyte R152-P30-00 1U - - Motherboard: MP32-AR1-00 - - Processors: 1* Ampere(R) Altra(R) Q80-30 Processor (Neoverse N1) - - Processor Signature: Implementor 0x0a, Variant 0x1, Architecture 6, Part 0x000, Revision 1 - - RAM Memory: 12* 16GB DDR4-3200MT/s - - Disks: 1* 960GB SSD Samsung M.2 NVMe PM983 - -16. **Server-Type-E31**: Purpose - Skylake Xeon TG for FD.io performance testing. - - Quantity: 1 - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 2-node and 3-node topologies. - - Main HW configuration: - - Chassis: SuperMicro SYS-7049GP-TRT. - - Motherboard: SuperMicro X11DPG-QT. - - Processors: 2* Intel Platinum 8180 2.5 GHz. - - RAM Memory: 16* 16GB DDR4-2666MHz. - - Disks: 2* 1.6TB 6G SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot2 18:00.xx: x710-4p10GE Intel. - - PCIe Slot4 3b:00.xx: xxv710-DA2 2p25GE Intel. - - PCIe Slot9 5e:00.xx: empty. - - Numa1: (x16, x16, x16 PCIe3.0 lanes) - - PCIe Slot6 86:00.xx: empty. - - PCIe Slot8 af:00.xx: XL710-QDA2-2p40GE Intel. - - PCIe Slot10 d8:00.xx: x710-4p10GE Intel. - -17. **Server-Type-F1**: Purpose - Icelake Xeon SUT for FD.io performance testing. - - Quantity: 8. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 2-node or 3-node testbed topologies. - - Main HW configuration: - - Chassis: SuperMicro SYS-740GP-TNRT. - - Motherboard: Super X12DPG-QT6. - - Processors: 2* Intel Platinum 8358 2.6 GHz. - - RAM Memory: 16* 16GB DDR4-3200. - - Disks: 2* 960GB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe4.0 lanes) - - PCIe Slot2 18:00.xx: xxv710-DA2-2p25GE Intel. - - PCIe Slot4 3b:00.xx: e810-XXVDA4-4p25GE Intel. - - PCIe Slot9 5e:00.xx: e810-2CQDA2-2p100GE Intel. - - Numa1: (x16, x16, x16 PCIe4.0 lanes) - - PCIe Slot6 86:00.xx: empty. - - PCIe Slot8 af:00.xx: empty. - - PCIe Slot10 d8:00.xx: empty. - -18. **Server-Type-F2**: Purpose - Icelake Xeon TG for FD.io performance testing. - - Quantity: 3. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 2-node testbed topologies. - - Main HW configuration: - - Chassis: SuperMicro SYS-740GP-TNRT. - - Motherboard: Super X12DPG-QT6. - - Processors: 2* Intel Platinum 8358 2.6 GHz. - - RAM Memory: 16* 16GB DDR4-3200. - - Disks: 2* 960GB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe4.0 lanes) - - PCIe Slot2 18:00.xx: xxv710-DA2-2p25GE Intel. - - PCIe Slot4 3b:00.xx: e810-XXVDA4-4p25GE Intel. - - PCIe Slot9 5e:00.xx: e810-2CQDA2-2p100GE Intel. - - Numa1: (x16, x16, x16 PCIe4.0 lanes) - - PCIe Slot6 86:00.xx: e810-2CQDA2-2p100GE Intel. - - PCIe Slot8 af:00.xx: empty. - - PCIe Slot10 d8:00.xx: empty. - -19. **Server-Type-F3**: Purpose - Icelake Xeon TG for FD.io performance testing. - - Quantity: 3. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 3-node testbed topologies. - - Main HW configuration: - - Chassis: SuperMicro SYS-740GP-TNRT. - - Motherboard: Super X12DPG-QT6. - - Processors: 2* Intel Platinum 8358 2.6 GHz. - - RAM Memory: 16* 16GB DDR4-3200. - - Disks: 2* 960GB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe4.0 lanes) - - PCIe Slot2 18:00.xx: xxv710-DA2-2p25GE Intel. - - PCIe Slot4 3b:00.xx: e810-XXVDA4-4p25GE Intel. - - PCIe Slot9 5e:00.xx: e810-2CQDA2-2p100GE Intel. - - Numa1: (x16, x16, x16 PCIe4.0 lanes) - - PCIe Slot6 86:00.xx: empty. - - PCIe Slot8 af:00.xx: empty. - - PCIe Slot10 d8:00.xx: empty. -20. **Server-Type-F4**: Purpose - Icelake Xeon Shared TG for FD.io performance testing. - - Quantity: 3. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 2-node and/or 3-node testbed topologies. - - Main HW configuration: - - Chassis: SuperMicro SYS-740GP-TNRT. - - Motherboard: Super X12DPG-QT6. - - Processors: 2* Intel Platinum 8358 2.6 GHz. - - RAM Memory: 16* 16GB DDR4-3200. - - Disks: 2* 960GB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe4.0 lanes) - - PCIe Slot2 18:00.xx: xxv710-DA2-2p25GE Intel. - - PCIe Slot4 3b:00.xx: empty. - - PCIe Slot9 5e:00.xx: empty. - - Numa1: (x16, x16, x16 PCIe4.0 lanes) - - PCIe Slot6 86:00.xx: e810-XXVDA4-4p25GE Intel. - - PCIe Slot8 af:00.xx: e810-2CQDA2-2p100GE Intel. - - PCIe Slot10 d8:00.xx: empty. - -21. **Server-Type-G1**: Purpose - SnowRidge Atom SUT for FD.io performance testing. - - Quantity: 2 - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 3-node testbed topology. - - Main HW configuration: - - Chassis: Intel JACOBSVILLE SDP. - - Motherboard: Intel JACOBSVILLE E63448-400. - - Processors: 1* Intel Atom P5362B 2.2 GHz. - - RAM Memory: 2* 16GB DDR4-2933. - - Disks: ?* ? SATA SSD. - - NICs configuration: - - Numa0: (x16, PCIe3.0 lane) - - PCIe BuiltIn ec:00.xx: e810-XXVDA4-4p25GE Intel. - -22. **Server-Type-H1**: Purpose - SapphireRapids Xeon SUT for FD.io full system performance testing. - - Quantity: 1. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 3-numa-node testbed topologies. - - Main HW configuration: - - Chassis: SuperMicro SYS-741GE-TNRT. - - Motherboard: Super X13DEG-QT-P. - - Processors: 2* Intel Platinum 8462Y+ 32 core 2.8 GHz 300W TDP. - - RAM Memory: 16* 32GB DDR5-4800. - - Disks: 2* 960GB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe5.0 lanes) - - PCIe Slot2 18:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. - - PCIe Slot4 3b:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. - - PCIe Slot10 5e:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. - - Numa1: (x16, x16, x16 PCIe5.0 lanes) - - PCIe Slot7 86:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. - - PCIe Slot9 af:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. - - PCIe Slot11 d8:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. - -23. **Server-Type-H2**: Purpose - SapphireRapids Xeon TG for FD.io full system performance testing. - - Quantity: 1. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 3-numa-node testbed topologies. - - Main HW configuration: - - Chassis: SuperMicro SYS-741GE-TNRT. - - Motherboard: Super X13DEG-QT-P. - - Processors: 2* Intel Platinum 8462Y+ 32 core 2.8 GHz 300W TDP. - - RAM Memory: 16* 32GB DDR5-4800. - - Disks: 2* 960GB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe5.0 lanes) - - PCIe Slot2 18:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. - - PCIe Slot4 3b:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. - - PCIe Slot10 5e:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. - - Numa1: (x16, x16, x16 PCIe5.0 lanes) - - PCIe Slot7 86:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. - - PCIe Slot9 af:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. - - PCIe Slot11 d8:00.xx: empty. - -24. **Server-Type-H3**: Purpose - SapphireRapids Xeon SUT for FD.io performance testing. - - Quantity: 1. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 3-numa-node testbed topologies. - - Main HW configuration: - - Chassis: SuperMicro SYS-741GE-TNRT. - - Motherboard: Super X13DEG-QT-P. - - Processors: 2* Intel Platinum 8462Y+ 32 core 2.8 GHz 300W TDP. - - RAM Memory: 16* 32GB DDR5-4800. - - Disks: 2* 960GB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe5.0 lanes) - - PCIe Slot2 18:00.xx: e810-2CQDA2-2p100GE Intel. - - PCIe Slot4 3b:00.xx: e810-XXVDA4-4p25GE Intel. - - PCIe Slot10 5e:00.xx: empty. - - Numa1: (x16, x16, x16 PCIe5.0 lanes) - - PCIe Slot7 86:00.xx: e810-2CQDA2-2p100GE Intel. - - PCIe Slot9 af:00.xx: e810-XXVDA4-4p25GE Intel. - - PCIe Slot11 d8:00.xx: empty. - -25. **Server-Type-H4**: Purpose - SapphireRapids Xeon TG for FD.io performance testing. - - Quantity: 1. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 3-numa-node testbed topologies. - - Main HW configuration: - - Chassis: SuperMicro SYS-741GE-TNRT. - - Motherboard: Super X13DEG-QT-P. - - Processors: 2* Intel Platinum 8462Y+ 32 core 2.8 GHz 300W TDP. - - RAM Memory: 16* 32GB DDR5-4800. - - Disks: 2* 960GB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe5.0 lanes) - - PCIe Slot2 18:00.xx: e810-2CQDA2-2p100GE Intel. - - PCIe Slot4 3b:00.xx: e810-XXVDA4-4p25GE Intel. - - PCIe Slot10 5e:00.xx: empty. - - Numa1: (x16, x16, x16 PCIe5.0 lanes) - - PCIe Slot7 86:00.xx: empty. - - PCIe Slot9 af:00.xx: empty. - - PCIe Slot11 d8:00.xx: empty. - -26. **Server-Type-H5**: Purpose - SapphireRapids Xeon SUT for FD.io performance testing. - - Quantity: 2. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 2-node testbed topologies. - - Main HW configuration: - - Chassis: SuperMicro SYS-741GE-TNRT. - - Motherboard: Super X13DEG-QT-P. - - Processors: 2* Intel Platinum 8462Y+ 32 core 2.8 GHz 300W TDP. - - RAM Memory: 16* 32GB DDR5-4800. - - Disks: 2* 960GB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe5.0 lanes) - - PCIe Slot2 18:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. - - PCIe Slot4 3b:00.xx: e810-2CQDA2-2p100GE Intel. - - PCIe Slot10 5e:00.xx: e810-XXVDA4-4p25GE Intel. - - Numa1: (x16, x16, x16 PCIe5.0 lanes) - - PCIe Slot7 86:00.xx: empty. - - PCIe Slot9 af:00.xx: empty. - - PCIe Slot11 d8:00.xx: empty. - -27. **Server-Type-H6**: Purpose - SapphireRapids Xeon TG for FD.io performance testing. - - Quantity: 2. - - Physical connectivity: - - IPMI and host management ports. - - NIC ports connected into 2-node testbed topologies plus loopbacks in Numa1 for TG self-test. - - Main HW configuration: - - Chassis: SuperMicro SYS-741GE-TNRT. - - Motherboard: Super X13DEG-QT-P. - - Processors: 2* Intel Platinum 8462Y+ 32 core 2.8 GHz 300W TDP. - - RAM Memory: 16* 32GB DDR5-4800. - - Disks: 2* 960GB SATA SSD. - - NICs configuration: - - Numa0: (x16, x16, x16 PCIe5.0 lanes) - - PCIe Slot2 18:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. - - PCIe Slot4 3b:00.xx: e810-2CQDA2-2p100GE Intel. - - PCIe Slot10 5e:00.xx: e810-XXVDA4-4p25GE Intel. - - Numa1: (x16, x16, x16 PCIe5.0 lanes) - - PCIe Slot7 86:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. - - PCIe Slot9 af:00.xx: e810-2CQDA2-2p100GE Intel. - - PCIe Slot11 d8:00.xx: empty. - -## Testbeds Configuration - -### 1-Node-Skylake (1n-skx) - -``` -- SUT [Server-Type-B6]: - - testbedname: testbed11. - - hostname: s1-t11-sut1. - - IPMI IP: 10.30.50.47 - - Host IP: 10.30.51.50 - - portnames: - - s1-t11-sut1-c2/p1 - 10GE-port1 x710-4p10GE. - - s1-t11-sut1-c2/p2 - 10GE-port2 x710-4p10GE. - - s1-t11-sut1-c2/p3 - 10GE-port3 x710-4p10GE. - - s1-t11-sut1-c2/p4 - 10GE-port4 x710-4p10GE. - - s1-t11-sut1-c4/p1 - 10GE-port1 x710-4p10GE. - - s1-t11-sut1-c4/p2 - 10GE-port2 x710-4p10GE. - - s1-t11-sut1-c4/p3 - 10GE-port3 x710-4p10GE. - - s1-t11-sut1-c4/p4 - 10GE-port4 x710-4p10GE. -- SUT [Server-Type-B6]: - - testbedname: testbed12. - - hostname: s2-t12-sut1. - - IPMI IP: 10.30.50.48 - - Host IP: 10.30.51.51 - - portnames: - - s2-t12-sut1-c2/p1 - 10GE-port1 x710-4p10GE. - - s2-t12-sut1-c2/p2 - 10GE-port2 x710-4p10GE. - - s2-t12-sut1-c2/p3 - 10GE-port3 x710-4p10GE. - - s2-t12-sut1-c2/p4 - 10GE-port4 x710-4p10GE. - - s2-t12-sut1-c4/p1 - 10GE-port1 x710-4p10GE. - - s2-t12-sut1-c4/p2 - 10GE-port2 x710-4p10GE. - - s2-t12-sut1-c4/p3 - 10GE-port3 x710-4p10GE. - - s2-t12-sut1-c4/p4 - 10GE-port4 x710-4p10GE. -``` - -### 1-Node-ThunderX2 (1n-tx2) - -``` -- SUT [Server-Type-E11]: - - testbedname: testbed13 - - hostname: s55-t13-sut1 - - IPMI IP: 10.30.50.70 - - Host IP: 10.30.51.70 - - portnames: - - s55-t13-sut1-c4/p1 - 40GE-port1 XL710-QDA2-2p40GE. - - s55-t13-sut1-c4/p2 - 40GE-port2 XL710-QDA2-2p40GE. - - s55-t13-sut1-c8/p1 - 40GE-port1 ConnectX5-2p10/25GE Mellanox. - - s55-t13-sut1-c8/p2 - 40GE-port2 ConnectX5-2p10/25GE Mellanox. - - s55-t13-sut1-c14/p1 - 40GE-port1 XL710-QDA2-2p40GE. - - s55-t13-sut1-c14/p2 - 40GE-port2 XL710-QDA2-2p40GE. - - s55-t13-sut1-c26/p1 - 40GE-port1 ConnectX5-2p10/25GE Mellanox. - - s55-t13-sut1-c26/p2 - 40GE-port2 ConnectX5-2p10/25GE Mellanox. -- SUT [Server-Type-E11]: - - testbedname: testbed14 - - hostname: s56-t14-sut1 - - IPMI IP: 10.30.50.71 - - Host IP: 10.30.51.71 - - portnames: - - s56-t14-sut1-c4/p1 - 40GE-port1 XL710-QDA2-2p40GE. - - s56-t14-sut1-c4/p2 - 40GE-port2 XL710-QDA2-2p40GE. - - s56-t14-sut1-c8/p1 - 40GE-port1 ConnectX5-2p10/25GE Mellanox. - - s56-t14-sut1-c8/p2 - 40GE-port2 ConnectX5-2p10/25GE Mellanox. - - s56-t14-sut1-c14/p1 - 40GE-port1 XL710-QDA2-2p40GE. - - s56-t14-sut1-c14/p2 - 40GE-port2 XL710-QDA2-2p40GE. - - s56-t14-sut1-c26/p1 - 40GE-port1 ConnectX5-2p10/25GE Mellanox. - - s56-t14-sut1-c26/p2 - 40GE-port2 ConnectX5-2p10/25GE Mellanox. -``` - -### 1-Node-Cascadelake (1n-clx) - -``` -- SUT [Server-Type-C1]: - - testbedname: testbed11. - - hostname: s32-t14-sut1. - - IPMI IP: 10.30.55.17 - - Host IP: 10.32.8.17 - - portnames: - - s32-t14-sut1-c2/p1 - 10GE-port1 x710-4p10GE. - - s32-t14-sut1-c2/p2 - 10GE-port2 x710-4p10GE. - - s32-t14-sut1-c2/p3 - 10GE-port3 x710-4p10GE. - - s32-t14-sut1-c2/p4 - 10GE-port4 x710-4p10GE. - - s32-t14-sut1-c4/p1 - 10GE-port1 x710-4p10GE. - - s32-t14-sut1-c4/p2 - 10GE-port2 x710-4p10GE. - - s32-t14-sut1-c4/p3 - 10GE-port3 x710-4p10GE. - - s32-t14-sut1-c4/p4 - 10GE-port4 x710-4p10GE. -``` - -### 2-Node-IxiaPS1L47 (2n-ps1) - -``` -- SUT [Server-Type-B8]: - - testbedname: testbed25. - - hostname: s25-t25-sut1. - - IPMI IP: 10.30.50.58 - - Host IP: 10.30.51.61 - - portnames: - - s25-t25-sut1-c2/p1 - 10GE-port1 x710-4p10GE. - - s25-t25-sut1-c2/p2 - 10GE-port2 x710-4p10GE. - - s25-t25-sut1-c2/p3 - 10GE-port3 x710-4p10GE. - - s25-t25-sut1-c2/p4 - 10GE-port4 x710-4p10GE. -- TG [Server-Type-B7]: - - testbedname: testbed25. - - hostname: s26-t25-tg1. - - IPMI IP: 10.30.50.59 - - Host IP: 10.30.51.62 - - portnames: - - s26-t25-tg1-p1 - 10GE-port1. - - s26-t25-tg1-p2 - 10GE-port2. - - s26-t25-tg1-p3 - 10GE-port3. - - s26-t25-tg1-p4 - 10GE-port4. -``` - -### 2-Node-Cascadelake (2n-clx) - -{{< figure src="/cdocs/testbed-2n-clx.svg" >}} - -``` -- SUT [Server-Type-C2]: - - testbedname: testbed27. - - hostname: s33-t27-sut1. - - IPMI IP: 10.30.55.18 - - Host IP: 10.32.8.18 - - portnames: - - s33-t27-sut1-c2/p1 - 10GE-port1 x710-4p10GE. - - s33-t27-sut1-c2/p2 - 10GE-port2 x710-4p10GE. - - s33-t27-sut1-c2/p3 - 10GE-port3 x710-4p10GE. - - s33-t27-sut1-c2/p4 - 10GE-port4 x710-4p10GE. - - s33-t27-sut1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s33-t27-sut1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s33-t27-sut1-c6/p1 - 100GE-port1 e810-2p100GE. - - s33-t27-sut1-c6/p2 - 100GE-port2 e810-2p100GE. - - s33-t27-sut1-c9/p1 - 100GE-port1 ConnectX5-2p100GE. - - s33-t27-sut1-c9/p2 - 100GE-port2 ConnectX5-2p100GE. -- TG [Server-Type-C3]: - - testbedname: testbed27. - - hostname: s34-t27-tg1. - - IPMI IP: 10.30.55.19 - - Host IP: 10.32.8.19 - - portnames: - - s34-t27-tg1-c2/p1 - 10GE-port1 x710-4p10GE. - - s34-t27-tg1-c2/p2 - 10GE-port2 x710-4p10GE. - - s34-t27-tg1-c2/p3 - 10GE-port3 x710-4p10GE. - - s34-t27-tg1-c2/p4 - 10GE-port4 x710-4p10GE. - - s34-t27-tg1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s34-t27-tg1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s34-t27-tg1-c6/p1 - 100GE-port1 ConnectX5-2p100GE. - - s34-t27-tg1-c6/p2 - 100GE-port2 ConnectX5-2p100GE. - - s38-t27-tg1-c8/p1 - 100GE-port1 ConnectX5-2p100GE. - - s38-t27-tg1-c8/p2 - 100GE-port2 ConnectX5-2p100GE. - - s34-t27-tg1-c9/p1 - 100GE-port1 ConnectX5-2p100GE. - - s34-t27-tg1-c9/p2 - 100GE-port2 ConnectX5-2p100GE. -- SUT [Server-Type-C2]: - - testbedname: testbed28. - - hostname: s35-t28-sut1. - - IPMI IP: 10.30.55.20 - - Host IP: 10.32.8.20 - - portnames: - - s35-t28-sut1-c2/p1 - 10GE-port1 x710-4p10GE. - - s35-t28-sut1-c2/p2 - 10GE-port2 x710-4p10GE. - - s35-t28-sut1-c2/p3 - 10GE-port3 x710-4p10GE. - - s35-t28-sut1-c2/p4 - 10GE-port4 x710-4p10GE. - - s35-t28-sut1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s35-t28-sut1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s35-t28-sut1-c6/p1 - 100GE-port1 e810-2p100GE. - - s35-t28-sut1-c6/p2 - 100GE-port2 e810-2p100GE. - - s35-t28-sut1-c9/p1 - 100GE-port1 ConnectX5-2p100GE. - - s35-t28-sut1-c9/p2 - 100GE-port2 ConnectX5-2p100GE. -- TG [Server-Type-C3]: - - testbedname: testbed28. - - hostname: s36-t28-tg1. - - IPMI IP: 10.30.55.21 - - Host IP: 10.32.8.21 - - portnames: - - s36-t28-tg1-c2/p1 - 10GE-port1 x710-4p10GE. - - s36-t28-tg1-c2/p2 - 10GE-port2 x710-4p10GE. - - s36-t28-tg1-c2/p3 - 10GE-port3 x710-4p10GE. - - s36-t28-tg1-c2/p4 - 10GE-port4 x710-4p10GE. - - s36-t28-tg1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s36-t28-tg1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s36-t28-tg1-c6/p1 - 100GE-port1 ConnectX5-2p100GE. - - s36-t28-tg1-c6/p2 - 100GE-port2 ConnectX5-2p100GE. - - s38-t28-tg1-c8/p1 - 100GE-port1 ConnectX5-2p100GE. - - s38-t28-tg1-c8/p2 - 100GE-port2 ConnectX5-2p100GE. - - s36-t28-tg1-c9/p1 - 100GE-port1 ConnectX5-2p100GE. - - s36-t28-tg1-c9/p2 - 100GE-port2 ConnectX5-2p100GE. -- SUT [Server-Type-C2]: - - testbedname: testbed29. - - hostname: s37-t29-sut1. - - IPMI IP: 10.30.55.22 - - Host IP: 10.32.8.22 - - portnames: - - s37-t29-sut1-c2/p1 - 10GE-port1 x710-4p10GE. - - s37-t29-sut1-c2/p2 - 10GE-port2 x710-4p10GE. - - s37-t29-sut1-c2/p3 - 10GE-port3 x710-4p10GE. - - s37-t29-sut1-c2/p4 - 10GE-port4 x710-4p10GE. - - s37-t29-sut1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s37-t29-sut1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s37-t29-sut1-c6/p1 - 100GE-port1 e810-2p100GE. - - s37-t29-sut1-c6/p2 - 100GE-port2 e810-2p100GE. - - s37-t29-sut1-c9/p1 - 100GE-port1 ConnectX5-2p100GE. - - s37-t29-sut1-c9/p2 - 100GE-port2 ConnectX5-2p100GE. -- TG [Server-Type-C3]: - - testbedname: testbed29. - - hostname: s38-t29-tg1. - - IPMI IP: 10.30.55.23 - - Host IP: 10.32.8.23 - - portnames: - - s38-t29-tg1-c2/p1 - 10GE-port1 x710-4p10GE. - - s38-t29-tg1-c2/p2 - 10GE-port2 x710-4p10GE. - - s38-t29-tg1-c2/p3 - 10GE-port3 x710-4p10GE. - - s38-t29-tg1-c2/p4 - 10GE-port4 x710-4p10GE. - - s38-t29-tg1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s38-t29-tg1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s38-t29-tg1-c6/p1 - 100GE-port1 ConnectX5-2p100GE. - - s38-t29-tg1-c6/p2 - 100GE-port2 ConnectX5-2p100GE. - - s38-t29-tg1-c9/p1 - 100GE-port1 ConnectX5-2p100GE. - - s38-t29-tg1-c9/p2 - 100GE-port2 ConnectX5-2p100GE. -``` - -### 2-Node-Zen2 (2n-zn2) - -{{< figure src="/cdocs/testbed-2n-zn2.svg" >}} - -``` -- SUT [Server-Type-D1]: - - testbedname: testbed210. - - hostname: s60-t210-sut1. - - IPMI IP: 10.30.55.24 - - Host IP: 10.32.8.24 - - portnames: - - s60-t210-sut1-c1/p1 - 10GE-port1 x710-4p10GE. - - s60-t210-sut1-c1/p2 - 10GE-port2 x710-4p10GE. - - s60-t210-sut1-c1/p3 - 10GE-port3 x710-4p10GE. - - s60-t210-sut1-c1/p4 - 10GE-port4 x710-4p10GE. - - s60-t210-sut1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s60-t210-sut1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s60-t210-sut1-c3/p1 - 100GE-port1 ConnectX5-2p100GE. - - s60-t210-sut1-c3/p2 - 100GE-port2 ConnectX5-2p100GE. -- TG [Server-Type-D2]: - - testbedname: testbed210. - - hostname: s61-t210-tg1. - - IPMI IP: 10.30.55.25 - - Host IP: 10.32.8.25 - - portnames: - - s61-t210-tg1-c1/p1 - 100GE-port1 ConnectX5-2p100GE. - - s61-t210-tg1-c1/p2 - 100GE-port2 ConnectX5-2p100GE. - - s61-t210-tg1-c2/p1 - 10GE-port1 x710-4p10GE. - - s61-t210-tg1-c2/p2 - 10GE-port2 x710-4p10GE. - - s61-t210-tg1-c2/p3 - 10GE-port3 x710-4p10GE. - - s61-t210-tg1-c2/p4 - 10GE-port4 x710-4p10GE. - - s61-t210-tg1-c3/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s61-t210-tg1-c3/p2 - 25GE-port2 xxv710-DA2-2p25GE. -``` - -### 2-Node-ThunderX2 (2x-tx2) - -{{< figure src="/cdocs/testbed-2n-tx2.svg" >}} - -``` -- SUT [Server-Type-E22]: - - testbedname: testbed211. - - hostname: s27-t211-sut1. - - IPMI IP: 10.30.50.69 - - Host IP: 10.30.51.69 - - portnames: - - s27-t211-sut1-c18/p1 - 40GE-port1 XL710-QDA2-2p40GE. - - s27-t211-sut1-c18/p2 - 40GE-port2 XL710-QDA2-2p40GE. -- TG [Server-Type-E31]: - - testbedname: testbed33 and testbed211. - - hostname: s19-t33t211-tg1. - - IPMI IP: 10.30.50.46 - - Host IP: 10.30.51.49 - - portnames: - - s19-t33t211-tg1-c2/p1 - 10GE-port1 x710-4p10GE. - - s19-t33t211-tg1-c2/p2 - 10GE-port2 x710-4p10GE. - - s19-t33t211-tg1-c2/p3 - 10GE-port3 x710-4p10GE. - - s19-t33t211-tg1-c2/p4 - 10GE-port4 x710-4p10GE. - - s19-t33t211-tg1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s19-t33t211-tg1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s19-t33t211-tg1-c8/p1 - 40GE-port1 xl710-QDA2-2p40GE. - - s19-t33t211-tg1-c8/p2 - 40GE-port2 xl710-QDA2-2p40GE. - - s19-t33t211-tg1-c10/p1 - 10GE-port1 x710-4p10GE. - - s19-t33t211-tg1-c10/p2 - 10GE-port2 x710-4p10GE. - - s19-t33t211-tg1-c10/p3 - 10GE-port3 x710-4p10GE. - - s19-t33t211-tg1-c10/p4 - 10GE-port4 x710-4p10GE. -``` - -### 2-Node-Icelake (2n-icx) - -{{< figure src="/cdocs/testbed-2n-icx.svg" >}} - -``` -- SUT [Server-Type-F1]: - - testbedname: testbed212. - - hostname: s71-t212-sut1. - - IPMI IP: 10.30.50.81 - - Host IP: 10.30.51.81 - - portnames: - - s71-t212-sut1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s71-t212-sut1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s71-t212-sut1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s71-t212-sut1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s71-t212-sut1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s71-t212-sut1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s71-t212-sut1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s71-t212-sut1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -- TG [Server-Type-F2]: - - testbedname: testbed212. - - hostname: s72-t212-tg1. - - IPMI IP: 10.30.50.82 - - Host IP: 10.30.51.82 - - portnames: - - s72-t212-tg1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s72-t212-tg1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s72-t212-tg1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s72-t212-tg1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s72-t212-tg1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s72-t212-tg1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s72-t212-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s72-t212-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. - - s72-t212-tg1-c6/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s72-t212-tg1-c6/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -- SUT [Server-Type-F1]: - - testbedname: testbed213. - - hostname: s83-t213-sut1. - - IPMI IP: 10.30.50.83 - - Host IP: 10.30.51.83 - - portnames: - - s83-t213-sut1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s83-t213-sut1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s83-t213-sut1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s83-t213-sut1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s83-t213-sut1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s83-t213-sut1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s83-t213-sut1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s83-t213-sut1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -- TG [Server-Type-F2]: - - testbedname: testbed213. - - hostname: s84-t213-tg1. - - IPMI IP: 10.30.50.84 - - Host IP: 10.30.51.84 - - portnames: - - s84-t213-tg1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s84-t213-tg1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s84-t213-tg1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s84-t213-tg1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s84-t213-tg1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s84-t213-tg1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s84-t213-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s84-t213-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. - - s84-t213-tg1-c6/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s84-t213-tg1-c6/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -- SUT [Server-Type-F1]: - - testbedname: testbed214. - - hostname: s85-t214-sut1. - - IPMI IP: 10.30.50.85 - - Host IP: 10.30.51.85 - - portnames: - - s85-t214-sut1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s85-t214-sut1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s85-t214-sut1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s85-t214-sut1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s85-t214-sut1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s85-t214-sut1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s85-t214-sut1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s85-t214-sut1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -- TG [Server-Type-F2]: - - testbedname: testbed214. - - hostname: s86-t214-tg1. - - IPMI IP: 10.30.50.86 - - Host IP: 10.30.51.86 - - portnames: - - s86-t214-tg1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s86-t214-tg1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s86-t214-tg1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s86-t214-tg1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s86-t214-tg1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s86-t214-tg1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s86-t214-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s86-t214-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. - - s86-t214-tg1-c6/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s86-t214-tg1-c6/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -- SUT [Server-Type-F1]: - - testbedname: testbed215. - - hostname: s87-t215-sut1. - - IPMI IP: 10.30.50.87 - - Host IP: 10.30.51.87 - - portnames: - - s87-t215-sut1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s87-t215-sut1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s87-t215-sut1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s87-t215-sut1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s87-t215-sut1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s87-t215-sut1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s87-t215-sut1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s87-t215-sut1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -- TG [Server-Type-F2]: - - testbedname: testbed215. - - hostname: s88-t215-tg1. - - IPMI IP: 10.30.50.88 - - Host IP: 10.30.51.88 - - portnames: - - s88-t215-tg1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s88-t215-tg1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s88-t215-tg1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s88-t215-tg1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s88-t215-tg1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s88-t215-tg1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s88-t215-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s88-t215-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. - - s88-t215-tg1-c6/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s88-t215-tg1-c6/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -``` - -### 3-Node-Rangeley (3n-rng) - -Note: There is no IPMI. Serial console is accessible via VIRL2 and VIRL3 USB. - -``` -- ServerB22 [Server-Type-B5]: - - testbedname: testbed35. - - hostname: s22-t35-sut1 (vex-yul-rot-netgate-1). - - IPMI IP: 10.30.51.29 - screen -r /dev/ttyUSB0 - - Host IP: 10.30.51.9 - - portnames: - - s22-t35-sut1-p1 - 10GE-port1 ix0 82599. - - s22-t35-sut1-p2 - 10GE-port2 ix1 82599. - - 1GB ports (tbd) -- ServerB23 [Server-Type-B5]: - - testbedname: testbed35. - - hostname: s23-t35-sut2 (vex-yul-rot-netgate-2). - - IPMI IP: 10.30.51.30 - screen -r /dev/ttyUSB1 - - Host IP: 10.30.51.10 - - portnames: - - s23-t35-sut1-p1 - 10GE-port1 ix0 82599. - - s23-t35-sut1-p2 - 10GE-port2 ix1 82599. - - 1GB ports (tbd) -- ServerB24 [Server-Type-B5]: - - testbedname: testbed35. - - hostname: s24-t35-sut3 (vex-yul-rot-netgate-3). - - IPMI IP: 10.30.51.30 - screen -r /dev/ttyUSB2 - - Host IP: 10.30.51.11 - - portnames: - - s24-t35-sut1-p1 - 10GE-port1 ix0 82599. - - s24-t35-sut1-p2 - 10GE-port2 ix1 82599. - - 1GB ports (tbd) -``` - -### 3-Node-Taishan (3n-tsh) - -{{< figure src="/cdocs/testbed-3n-tsh.svg" >}} - -``` -- SUT [Server-Type-E21]: - - testbedname: testbed33. - - hostname: s17-t33-sut1. - - IPMI IP: 10.30.50.36 - - Host IP: 10.30.51.36 - - portnames: - - s17-t33-sut1-c6/p1 - 10GE-port1 x520-2p10GE. - - s17-t33-sut1-c6/p2 - 10GE-port2 x520-2p10GE. - - s17-t33-sut1-c4/p1 - 25GE-port1 cx4-2p25GE. - - s17-t33-sut1-c4/p2 - 25GE-port2 cx4-2p25GE. -- SUT [Server-Type-E21]: - - testbedname: testbed33. - - hostname: s18-t33-sut2. - - IPMI IP: 10.30.50.37 - - Host IP: 10.30.51.37 - - portnames: - - s18-t33-sut2-c6/p1 - 10GE-port1 x520-2p10GE. - - s18-t33-sut2-c6/p2 - 10GE-port2 x520-2p10GE. - - s18-t33-sut2-c4/p1 - 25GE-port1 cx4-2p25GE. - - s18-t33-sut2-c4/p2 - 25GE-port2 cx4-2p25GE. -- TG [Server-Type-E31]: - - testbedname: testbed33 and testbed211. - - hostname: s19-t33t211-tg1. - - IPMI IP: 10.30.50.46 - - Host IP: 10.30.51.49 - - portnames: - - s19-t33t211-tg1-c2/p1 - 10GE-port1 x710-4p10GE. - - s19-t33t211-tg1-c2/p2 - 10GE-port2 x710-4p10GE. - - s19-t33t211-tg1-c2/p3 - 10GE-port3 x710-4p10GE. - - s19-t33t211-tg1-c2/p4 - 10GE-port4 x710-4p10GE. - - s19-t33t211-tg1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s19-t33t211-tg1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s19-t33t211-tg1-c8/p1 - 40GE-port1 xl710-QDA2-2p40GE. - - s19-t33t211-tg1-c8/p2 - 40GE-port2 xl710-QDA2-2p40GE. - - s19-t33t211-tg1-c10/p1 - 10GE-port1 x710-4p10GE. - - s19-t33t211-tg1-c10/p2 - 10GE-port2 x710-4p10GE. - - s19-t33t211-tg1-c10/p3 - 10GE-port3 x710-4p10GE. - - s19-t33t211-tg1-c10/p4 - 10GE-port4 x710-4p10GE. -``` - -### 3-Node-Altra (3n-alt) - -{{< figure src="/cdocs/testbed-3n-alt.svg" >}} - -``` -- SUT [Server-Type-E23]: - - testbedname: testbed34. - - hostname: s62-t34-sut1. - - IPMI IP: 10.30.50.72 - - Host IP: 10.30.51.72 - - portnames: - - s62-t34-sut1-c1/p1 - 40GE-port1 xl710-QDA2-2p40GE. - - s62-t34-sut1-c1/p2 - 40GE-port2 xl710-QDA2-2p40GE. -- SUT [Server-Type-E23]: - - testbedname: testbed34. - - hostname: s63-t34-sut2. - - IPMI IP: 10.30.50.73 - - Host IP: 10.30.51.73 - - portnames: - - s63-t34-sut2-c1/p1 - 40GE-port1 xl710-QDA2-2p40GE. - - s63-t34-sut2-c1/p2 - 40GE-port2 xl710-QDA2-2p40GE. -- TG [Server-Type-F4]: - - testbedname: testbed34. - - hostname: s64-t34-tg1. - - IPMI IP: 10.30.50.74 - - Host IP: 10.30.51.74 - - portnames: - - s64-t34-tg1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s64-t34-tg1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s64-t34-tg1-c4/p1 - 40GE-port1 xl710-QDA2-2p40GE. - - s64-t34-tg1-c4/p2 - 40GE-port2 xl710-QDA2-2p40GE. - - s64-t34-tg1-c6/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s64-t34-tg1-c6/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s64-t34-tg1-c6/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s64-t34-tg1-c6/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s64-t34-tg1-c8/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s64-t34-tg1-c8/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -``` - -### 3-Node-Icelake (3n-icx) - -{{< figure src="/cdocs/testbed-3n-icx.svg" >}} - -``` -- ServerF1 [Server-Type-F1]: - - testbedname: testbed37. - - hostname: s65-t37-sut1. - - IPMI IP: 10.30.50.75 - - Host IP: 10.30.51.75 - - portnames: - - s65-t37-sut1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s65-t37-sut1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s65-t37-sut1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s65-t37-sut1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s65-t37-sut1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s65-t37-sut1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s65-t37-sut1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s65-t37-sut1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -- ServerF1 [Server-Type-F1]: - - testbedname: testbed37. - - hostname: s66-t37-sut2. - - IPMI IP: 10.30.50.76 - - Host IP: 10.30.51.76 - - portnames: - - s66-t37-sut2-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s66-t37-sut2-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s66-t37-sut2-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s66-t37-sut2-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s66-t37-sut2-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s66-t37-sut2-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s66-t37-sut2-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s66-t37-sut2-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -- ServerF3 [Server-Type-F3]: - - testbedname: testbed37. - - hostname: s67-t37-tg1. - - IPMI IP: 10.30.50.77 - - Host IP: 10.30.51.77 - - portnames: - - s67-t37-tg1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s67-t37-tg1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s67-t37-tg1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s67-t37-tg1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s67-t37-tg1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s67-t37-tg1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s67-t37-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s67-t37-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -- ServerF1 [Server-Type-F1]: - - testbedname: testbed38. - - hostname: s78-t38-sut1. - - IPMI IP: 10.30.50.78 - - Host IP: 10.30.51.78 - - portnames: - - s78-t38-sut1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s78-t38-sut1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s78-t38-sut1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s78-t38-sut1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s78-t38-sut1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s78-t38-sut1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s78-t38-sut1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s78-t38-sut1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -- ServerF1 [Server-Type-F1]: - - testbedname: testbed38. - - hostname: s79-t38-sut2. - - IPMI IP: 10.30.50.79 - - Host IP: 10.30.51.79 - - portnames: - - s79-t38-sut2-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s79-t38-sut2-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s79-t38-sut2-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s79-t38-sut2-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s79-t38-sut2-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s79-t38-sut2-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s79-t38-sut2-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s79-t38-sut2-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -- ServerF3 [Server-Type-F3]: - - testbedname: testbed38. - - hostname: s80-t38-tg1. - - IPMI IP: 10.30.50.80 - - Host IP: 10.30.51.80 - - portnames: - - s80-t38-tg1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. - - s80-t38-tg1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. - - s80-t38-tg1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s80-t38-tg1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s80-t38-tg1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s80-t38-tg1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s80-t38-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s80-t38-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -``` - -### 3-Node-SnowRidge (3n-snr) - -{{< figure src="/cdocs/testbed-3n-snr.svg" >}} - -``` -- ServerG1 [Server-Type-G1]: - - testbedname: testbed39. - - hostname: s93-t39-sut1. - - IPMI IP: 10.30.50.93 - - Host IP: 10.30.51.93 - - portnames: - - s93-t39-sut1-c1/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s93-t39-sut1-c1/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s93-t39-sut1-c1/p2 - 25GE-port3 e810-XXVDA4-4p25GE. - - s93-t39-sut1-c1/p2 - 25GE-port4 e810-XXVDA4-4p25GE. -- ServerG1 [Server-Type-G1]: - - testbedname: testbed39. - - hostname: s94-t39-sut2. - - IPMI IP: 10.30.50.94 - - Host IP: 10.30.51.94 - - portnames: - - s94-t39-sut2-c1/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s94-t39-sut2-c1/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s94-t39-sut2-c1/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s94-t39-sut2-c1/p4 - 25GE-port4 e810-XXVDA4-4p25GE. -- ServerF4 [Server-Type-F4]: - - testbedname: testbed39. - - hostname: s89-t39t310-tg1. - - IPMI IP: 10.30.50.89 - - Host IP: 10.30.51.89 - - portnames: - - s89-t39t310-tg1-c6/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s89-t39t310-tg1-c6/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s89-t39t310-tg1-c6/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s89-t39t310-tg1-c6/p4 - 25GE-port4 e810-XXVDA4-4p25GE. -``` - -### 2-Node-SapphireRapids (2n-spr) - -{{< figure src="/cdocs/testbed-2n-spr.svg" >}} - -``` -- SUT [Server-Type-H1]: - - testbedname: testbed21. - - hostname: s52-t21-sut1. - - IPMI IP: 10.30.50.52 - - Host IP: 10.30.51.52 - - portnames: - - s52-t21-sut1-c2/p1 - 200GE-port1 ConnectX7-2p200GE. - - s52-t21-sut1-c2/p2 - 200GE-port2 ConnectX7-2p200GE. - - s52-t21-sut1-c4/p1 - 200GE-port1 ConnectX7-2p200GE. - - s52-t21-sut1-c4/p2 - 200GE-port2 ConnectX7-2p200GE. - - s52-t21-sut1-c10/p1 - 200GE-port1 ConnectX7-2p200GE. - - s52-t21-sut1-c10/p2 - 200GE-port2 ConnectX7-2p200GE. - - s52-t21-sut1-c7/p1 - 200GE-port1 ConnectX7-2p200GE. - - s52-t21-sut1-c7/p2 - 200GE-port2 ConnectX7-2p200GE. - - s52-t21-sut1-c9/p1 - 200GE-port1 ConnectX7-2p200GE. - - s52-t21-sut1-c9/p2 - 200GE-port2 ConnectX7-2p200GE. - - s52-t21-sut1-c11/p1 - 200GE-port1 ConnectX7-2p200GE. - - s52-t21-sut1-c11/p2 - 200GE-port2 ConnectX7-2p200GE. -- TG [Server-Type-H2]: - - testbedname: testbed21. - - hostname: s53-t21-tg1. - - IPMI IP: 10.30.50.53 - - Host IP: 10.30.51.53 - - portnames: - - s53-t21-tg1-c2/p1 - 200GE-port1 ConnectX7-2p200GE. - - s53-t21-tg1-c2/p2 - 200GE-port2 ConnectX7-2p200GE. - - s53-t21-tg1-c4/p1 - 200GE-port1 ConnectX7-2p200GE. - - s53-t21-tg1-c4/p2 - 200GE-port2 ConnectX7-2p200GE. - - s53-t21-tg1-c10/p1 - 200GE-port1 ConnectX7-2p200GE. - - s53-t21-tg1-c10/p2 - 200GE-port2 ConnectX7-2p200GE. - - s53-t21-tg1-c7/p1 - 200GE-port1 ConnectX7-2p200GE. - - s53-t21-tg1-c7/p2 - 200GE-port2 ConnectX7-2p200GE. - - s53-t21-tg1-c9/p1 - 200GE-port1 ConnectX7-2p200GE. - - s53-t21-tg1-c9/p2 - 200GE-port2 ConnectX7-2p200GE. -- SUT [Server-Type-H3]: - - testbedname: testbed22. - - hostname: s54-t22-sut1. - - IPMI IP: 10.30.50.54 - - Host IP: 10.30.51.54 - - portnames: - - s54-t22-sut1-c2/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s54-t22-sut1-c2/p2 - 100GE-port2 e810-2CQDA2-2p100GE. - - s54-t22-sut1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s54-t22-sut1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s54-t22-sut1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s54-t22-sut1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s54-t22-sut1-c7/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s54-t22-sut1-c7/p2 - 100GE-port2 e810-2CQDA2-2p100GE. - - s54-t22-sut1-c9/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s54-t22-sut1-c9/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s54-t22-sut1-c9/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s54-t22-sut1-c9/p4 - 25GE-port4 e810-XXVDA4-4p25GE. -- TG [Server-Type-H4]: - - testbedname: testbed22. - - hostname: s55-t22-tg1. - - IPMI IP: 10.30.50.55 - - Host IP: 10.30.51.55 - - portnames: - - s55-t22-tg1-c2/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s55-t22-tg1-c2/p2 - 100GE-port2 e810-2CQDA2-2p100GE. - - s55-t22-tg1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s55-t22-tg1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s55-t22-tg1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s55-t22-tg1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. -- SUT [Server-Type-H5]: - - testbedname: testbed23. - - hostname: s56-t23-sut1. - - IPMI IP: 10.30.50.56 - - Host IP: 10.30.51.56 - - portnames: - - s56-t23-sut1-c2/p1 - 200GE-port1 ConnectX7-2p200GE. - - s56-t23-sut1-c2/p2 - 200GE-port2 ConnectX7-2p200GE. - - s56-t23-sut1-c4/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s56-t23-sut1-c4/p2 - 100GE-port2 e810-2CQDA2-2p100GE. - - s56-t23-sut1-c10/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s56-t23-sut1-c10/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s56-t23-sut1-c10/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s56-t23-sut1-c10/p4 - 25GE-port4 e810-XXVDA4-4p25GE. -- TG [Server-Type-H6]: - - testbedname: testbed23. - - hostname: s57-t23-tg1. - - IPMI IP: 10.30.50.57 - - Host IP: 10.30.51.57 - - portnames: - - s57-t23-tg1-c2/p1 - 200GE-port1 ConnectX7-2p200GE. - - s57-t23-tg1-c2/p2 - 200GE-port2 ConnectX7-2p200GE. - - s57-t23-tg1-c4/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s57-t23-tg1-c4/p2 - 100GE-port2 e810-2CQDA2-2p100GE. - - s57-t23-tg1-c10/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s57-t23-tg1-c10/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s57-t23-tg1-c10/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s57-t23-tg1-c10/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s57-t23-tg1-c7/p1 - 200GE-port1 ConnectX7-2p200GE. - - s57-t23-tg1-c7/p2 - 200GE-port2 ConnectX7-2p200GE. - - s57-t23-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s57-t23-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -- SUT [Server-Type-H5]: - - testbedname: testbed24. - - hostname: s58-t24-sut1. - - IPMI IP: 10.30.50.58 - - Host IP: 10.30.51.58 - - portnames: - - s58-t24-sut1-c2/p1 - 200GE-port1 ConnectX7-2p200GE. - - s58-t24-sut1-c2/p2 - 200GE-port2 ConnectX7-2p200GE. - - s58-t24-sut1-c4/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s58-t24-sut1-c4/p2 - 100GE-port2 e810-2CQDA2-2p100GE. - - s58-t24-sut1-c10/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s58-t24-sut1-c10/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s58-t24-sut1-c10/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s58-t24-sut1-c10/p4 - 25GE-port4 e810-XXVDA4-4p25GE. -- TG [Server-Type-H6]: - - testbedname: testbed24. - - hostname: s59-t24-tg1. - - IPMI IP: 10.30.50.59 - - Host IP: 10.30.51.59 - - portnames: - - s59-t24-tg1-c2/p1 - 200GE-port1 ConnectX7-2p200GE. - - s59-t24-tg1-c2/p2 - 200GE-port2 ConnectX7-2p200GE. - - s59-t24-tg1-c4/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s59-t24-tg1-c4/p2 - 100GE-port2 e810-2CQDA2-2p100GE. - - s59-t24-tg1-c10/p1 - 25GE-port1 e810-XXVDA4-4p25GE. - - s59-t24-tg1-c10/p2 - 25GE-port2 e810-XXVDA4-4p25GE. - - s59-t24-tg1-c10/p3 - 25GE-port3 e810-XXVDA4-4p25GE. - - s59-t24-tg1-c10/p4 - 25GE-port4 e810-XXVDA4-4p25GE. - - s59-t24-tg1-c7/p1 - 200GE-port1 ConnectX7-2p200GE. - - s59-t24-tg1-c7/p2 - 200GE-port2 ConnectX7-2p200GE. - - s59-t24-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. - - s59-t24-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. -``` - -## Testbed Wiring - -### 1-Node-Skylake (1n-skx) - -``` -- testbed11: - - ring1 10GE-ports x710-4p10GE: - - s1-t11-sut1-c2/p1 to s1-t11-sut1-c4/p1. - - ring2 10GE-ports x710-4p10GE: - - s1-t11-sut1-c2/p2 to s1-t11-sut1-c4/p2. - - ring3 10GE-ports x710-4p10GE: - - s1-t11-sut1-c2/p3 to s1-t11-sut1-c4/p3. - - ring4 10GE-ports x710-4p10GE: - - s1-t11-sut1-c2/p4 to s1-t11-sut1-c4/p4. - - ring5 100GE-ports e810-2p100GE: - - s1-t11-sut1-c5/p1 to s1-t11-sut1-c6/p1. - - ring6 100GE-ports e810-2p100GE: - - s1-t11-sut1-c5/p2 to s1-t11-sut1-c6/p2. -- testbed12: - - ring1 10GE-ports x710-4p10GE: - - s2-t12-sut1-c2/p1 to s2-t12-sut1-c4/p1. - - ring2 10GE-ports x710-4p10GE: - - s2-t12-sut1-c2/p2 to s2-t12-sut1-c4/p2. - - ring3 10GE-ports x710-4p10GE: - - s2-t12-sut1-c2/p3 to s2-t12-sut1-c4/p3. - - ring4 10GE-ports x710-4p10GE: - - s2-t12-sut1-c2/p4 to s2-t12-sut1-c4/p4. - - ring5 100GE-ports e810-2p100GE: - - s2-t12-sut1-c5/p1 to s2-t12-sut1-c6/p1. - - ring6 100GE-ports e810-2p100GE: - - s2-t12-sut1-c5/p2 to s2-t12-sut1-c6/p2. -``` - -### 1-Node-ThunderX2 (1n-tx2) - -``` -- testbed13: - - ring1 40GE-ports XL710-QDA2-2p40GE on SUTs: - - s55-t13-sut1-c4/p1 - s55-t13-sut1-c14/p1. - - ring2 40GE-ports XL710-QDA2-2p40GE on SUTs: - - s55-t13-sut1-c4/p2 - s55-t13-sut1-c14/p2. - - ring3 10/25GE-ports ConnectX5-2p10/25GE on SUTs: - - s55-t13-sut1-c8/p1 - s55-t13-sut1-c26/p1. - - ring4 10/25GE-ports ConnectX5-2p10/25GE on SUTs: - - s55-t13-sut1-c8/p2 - s55-t13-sut1-c26/p2. - -- testbed14: - - ring1 40GE-ports XL710-QDA2-2p40GE on SUTs: - - s56-t14-sut1-c4/p1 - s56-t14-sut1-c14/p1. - - ring2 40GE-ports XL710-QDA2-2p40GE on SUTs: - - s56-t14-sut1-c4/p2 - s56-t14-sut1-c14/p2. - - ring3 10/25GE-ports ConnectX5-2p10/25GE on SUTs: - - s56-t14-sut1-c8/p1 - s56-t14-sut1-c26/p1. - - ring4 10/25GE-ports ConnectX5-2p10/25GE on SUTs: - - s56-t14-sut1-c8/p2 - s56-t14-sut1-c26/p2. -``` - -### 2-Node-IxiaPS1L47 (2n-ps1) - -``` -- testbed25: - - link1 10GE-port x710-4p10GE on SUT: - - t25-tg1-p1 to t25-sut1-c2/p1. - - link2 10GE-port x710-4p10GE on SUT: - - t25-tg1-p2 to t25-sut1-c2/p2. - - link3 10GE-port x710-4p10GE on SUT: - - t25-tg1-p3 to t25-sut1-c2/p3. - - link4 10GE-port x710-4p10GE on SUT: - - t25-tg1-p4 to t25-sut1-c2/p4. -``` - -### 2-Node-Cascadelake (2n-clx) - -``` -- testbed27: - - ring1 10GE-ports x710-4p10GE on SUT: - - s34-t27-tg1-c2/p1 to s33-t27-sut1-c2/p1. - - s33-t27-sut1-c2/p2 to s34-t27-tg1-c2/p2. - - ring2 10GE-ports x710-4p10GE on SUT: - - s34-t27-tg1-c2/p3 to s33-t27-sut1-c2/p3. - - s33-t27-sut1-c2/p4 to s34-t27-tg1-c2/p4. - - ring3 25GE-ports xxv710-DA2-2p25GE on SUT - - s34-t27-tg1-c4/p1 to s33-t27-sut1-c4/p1. - - s33-t27-sut1-c4/p2 to s34-t27-tg1-c4/p2. - - ring4 100GE-ports ConnectX5-2p100GE on SUT: - - s34-t27-tg1-c9/p1 to s33-t27-sut1-c9/p1. - - s33-t27-sut1-c9/p2 to s34-t27-tg1-c9/p2. - - ring5 100GE-ports e810-2p100GE on SUT 100GE-ports ConnectX5-2p100GE on TG: - - s34-t27-tg1-c6/p1 to s33-t27-sut1-c6/p1. - - s33-t27-sut1-c6/p2 to s34-t27-tg1-c6/p2. - - ring6 100GE-ports e810-2p100GE on TG: - - s34-t27-tg1-c8/p1 to s34-t27-tg1-c8/p2. - - s34-t27-tg1-c8/p2 to s34-t27-tg1-c8/p1. -- testbed28: - - ring1 10GE-ports x710-4p10GE on SUT: - - s36-t28-tg1-c2/p1 to s35-t28-sut1-c2/p1. - - s35-t28-sut1-c2/p2 to s36-t28-tg1-c2/p2. - - ring2 10GE-ports x710-4p10GE on SUT: - - s36-t28-tg1-c2/p3 to s35-t28-sut1-c2/p3. - - s35-t28-sut1-c2/p4 to s36-t28-tg1-c2/p4. - - ring3 25GE-ports xxv710-DA2-2p25GE on SUT - - s36-t28-tg1-c4/p1 to s35-t28-sut1-c4/p1. - - s35-t28-sut1-c4/p2 to s36-t28-tg1-c4/p2. - - ring4 100GE-ports ConnectX5-2p100GE on SUT: - - s36-t28-tg1-c9/p1 to s35-t28-sut1-c9/p1. - - s35-t28-sut1-c9/p2 to s36-t28-tg1-c9/p2. - - ring5 100GE-ports e810-2p100GE on SUT 100GE-ports ConnectX5-2p100GE on TG: - - s36-t28-tg1-c6/p1 to s35-t28-sut1-c6/p1. - - s35-t28-sut1-c6/p2 to s36-t28-tg1-c6/p2. - - ring6 100GE-ports e810-2p100GE on TG: - - s36-t28-tg1-c8/p1 to s36-t28-tg1-c8/p2. - - s36-t28-tg1-c8/p2 to s36-t28-tg1-c8/p1. -- testbed29: - - ring1 10GE-ports x710-4p10GE on SUT: - - s38-t29-tg1-c2/p1 to s37-t29-sut1-c2/p1. - - s37-t29-sut1-c2/p2 to s38-t29-tg1-c2/p2. - - ring2 10GE-ports x710-4p10GE on SUT: - - s38-t29-tg1-c2/p3 to s37-t29-sut1-c2/p3. - - s37-t29-sut1-c2/p4 to s38-t29-tg1-c2/p4. - - ring3 25GE-ports xxv710-DA2-2p25GE on SUT - - s38-t29-tg1-c4/p1 to s37-t29-sut1-c4/p1. - - s37-t29-sut1-c4/p2 to s38-t29-tg1-c4/p2. - - ring4 100GE-ports ConnectX5-2p100GE on SUT: - - s38-t29-tg1-c9/p1 to s37-t29-sut1-c9/p1. - - s37-t29-sut1-c9/p2 to s38-t29-tg1-c9/p2. - - ring5 100GE-ports e810-2p100GE on SUT 100GE-ports ConnectX5-2p100GE on TG: - - s38-t29-tg1-c6/p1 to s37-t29-sut1-c6/p1. - - s37-t29-sut1-c6/p2 to s38-t29-tg1-c6/p2. -``` - -### 2-Node-Zen2 (2n-zn2) - -``` -- testbed210: - - ring1 10GE-ports x710-4p10GE on SUT: - - s61-t210-tg1-c2/p1 to s60-t210-sut1-c1/p1. - - s60-t210-sut1-c1/p2 to s61-t210-tg1-c2/p2. - - ring2 10GE-ports x710-4p10GE on SUT: - - s61-t210-tg1-c2/p3 to s60-t210-sut1-c1/p3. - - s60-t210-sut1-c1/p4 to s61-t210-tg1-c2/p4. - - ring3 25GE-ports xxv710-DA2-2p25GE on SUT - - s61-t210-tg1-c3/p1 to s60-t210-sut1-c2/p1. - - s60-t210-sut1-c2/p2 to s61-t210-tg1-c3/p2. - - ring4 100GE-ports ConnectX5-2p100GE on SUT: - - s61-t210-tg1-c1/p1 to s60-t210-sut1-c3/p1. - - s60-t210-sut1-c3/p2 to s61-t210-tg1-c1/p2. -``` - -### 2-Node-ThunderX2 (2n-tx2) - -``` -- testbed211: - - ring1 10GE-ports x520-2p10GE on SUTs: - - s27-t211-sut1-c18/p1 - s19-t33t211-tg1-c8/p1. - - s27-t211-sut1-c18/p2 - s19-t33t211-tg1-c8/p2. -``` - -### 2-Node-Icelake (2n-icx) - -``` -- testbed212: - - ring1 25GE-ports xxv710-DA2-2p25GE on SUT - - s72-t212-tg1-c2/p1 to s71-t212-sut1-c2/p1. - - s71-t212-sut1-c2/p2 to s72-t212-tg1-c2/p2. - - ring2 25GE-ports e810-XXVDA4-2p25GE on SUT: - - s72-t212-tg1-c4/p1 to s71-t212-sut1-c4/p1. - - s71-t212-sut1-c4/p2 to s72-t212-tg1-c4/p2. - - s72-t212-tg1-c4/p3 to s71-t212-sut1-c4/p3. - - s71-t212-sut1-c4/p4 to s72-t212-tg1-c4/p4. - - ring3 100GE-ports e810-2CQDA2-2p100GE on SUT: - - s72-t212-tg1-c9/p1 to s71-t212-sut1-c9/p1. - - s71-t212-sut1-c9/p2 to s72-t212-tg1-c9/p2. - - ring4 100GE-ports e810-2CQDA2-2p100GE on SUT: - - s72-t212-tg1-c6/p1 to s72-t212-tg1-c6/p2. - - s72-t212-tg1-c6/p2 to s72-t212-tg1-c6/p1. -- testbed213: - - ring1 25GE-ports xxv710-DA2-2p25GE on SUT - - s84-t213-tg1-c2/p1 to s83-t213-sut1-c2/p1. - - s83-t213-sut1-c2/p2 to s84-t213-tg1-c2/p2. - - ring2 25GE-ports e810-XXVDA4-2p25GE on SUT: - - s84-t213-tg1-c4/p1 to s83-t213-sut1-c4/p1. - - s83-t213-sut1-c4/p2 to s84-t213-tg1-c4/p2. - - s84-t213-tg1-c4/p3 to s83-t213-sut1-c4/p3. - - s83-t213-sut1-c4/p4 to s84-t213-tg1-c4/p4. - - ring3 100GE-ports e810-2CQDA2-2p100GE on SUT: - - s84-t213-tg1-c9/p1 to s83-t213-sut1-c9/p1. - - s83-t213-sut1-c9/p2 to s84-t213-tg1-c9/p2. - - ring4 100GE-ports e810-2CQDA2-2p100GE on SUT: - - s84-t213-tg1-c6/p1 to s84-t213-tg1-c6/p2. - - s84-t213-tg1-c6/p2 to s84-t213-tg1-c6/p1. -- testbed214: - - ring1 25GE-ports xxv710-DA2-2p25GE on SUT - - s86-t214-tg1-c2/p1 to s85-t214-sut1-c2/p1. - - s85-t214-sut1-c2/p2 to s86-t214-tg1-c2/p2. - - ring2 25GE-ports e810-XXVDA4-2p25GE on SUT: - - s86-t214-tg1-c4/p1 to s85-t214-sut1-c4/p1. - - s85-t214-sut1-c4/p2 to s86-t214-tg1-c4/p2. - - s86-t214-tg1-c4/p3 to s85-t214-sut1-c4/p3. - - s85-t214-sut1-c4/p4 to s86-t214-tg1-c4/p4. - - ring3 100GE-ports e810-2CQDA2-2p100GE on SUT: - - s86-t214-tg1-c9/p1 to s85-t214-sut1-c9/p1. - - s85-t214-sut1-c9/p2 to s86-t214-tg1-c9/p2. - - ring4 100GE-ports e810-2CQDA2-2p100GE on SUT: - - s86-t214-tg1-c6/p1 to s86-t214-tg1-c6/p2. - - s86-t214-tg1-c6/p2 to s86-t214-tg1-c6/p1. -- testbed215: - - ring1 25GE-ports xxv710-DA2-2p25GE on SUT - - s88-t215-tg1-c2/p1 to s87-t215-sut1-c2/p1. - - s87-t215-sut1-c2/p2 to s88-t215-tg1-c2/p2. - - ring2 25GE-ports e810-XXVDA4-2p25GE on SUT: - - s88-t215-tg1-c4/p1 to s87-t215-sut1-c4/p1. - - s87-t215-sut1-c4/p2 to s88-t215-tg1-c4/p2. - - s88-t215-tg1-c4/p3 to s87-t215-sut1-c4/p3. - - s87-t215-sut1-c4/p4 to s88-t215-tg1-c4/p4. - - ring3 100GE-ports e810-2CQDA2-2p100GE on SUT: - - s88-t215-tg1-c9/p1 to s87-t215-sut1-c9/p1. - - s87-t215-sut1-c9/p2 to s88-t215-tg1-c9/p2. - - ring4 100GE-ports e810-2CQDA2-2p100GE on SUT: - - s88-t215-tg1-c6/p1 to s88-t215-tg1-c6/p2. - - s88-t215-tg1-c6/p2 to s88-t215-tg1-c6/p1. -``` - -### 3-Node-Rangeley (3n-rng) - -``` -To be completed. -``` - -### 3-Node-Taishan (3n-tsh) - -``` -- testbed33: - - ring1 10GE-ports x520-2p10GE on SUTs: - - s19-t33t211-tg1-c2/p2 - s17-t33-sut1-c6/p2. - - s17-t33-sut1-c6/p1 - s18-t33-sut2-c6/p2. - - s18-t33-sut2-c6/p1 - s19-t33t211-tg1-c2/p1. - - ring2 25GE-ports cx4-2p25GE on SUTs: - - s19-t33t211-tg1-c4/p2 - s17-t33-sut1-c4/p2. - - s17-t33-sut1-c4/p1 - s18-t33-sut2-c4/p2. - - s18-t33-sut2-c4/p1 - s19-t33t211-tg1-c4/p1. -``` - -### 3-Node-Altra (3n-alt) - -``` -- testbed34: - - ring1 40GE-ports xl710-QDA2-2p40GE on SUTs: - - s64-t34-tg1-c4/p1 - s62-t34-sut1-c1/p2. - - s62-t34-sut1-c1/p1 - s63-t34-sut2-c1/p2. - - s63-t34-sut2-c1/p1 - s64-t34-tg1-c4/p2. -``` - -### 3-Node-Icelake (3n-icx) - -``` -- testbed37: - - ring1 25GE-ports xxv710-DA2-2p25GE on SUTs: - - s67-t37-tg1-c2/p1 to s65-t37-sut1-c2/p1. - - s65-t37-sut1-c2/p2 to s66-t37-sut2-c2/p2. - - s66-t37-sut2-c2/p1 to s67-t37-tg1-c2/p2. - - ring2 25GE-ports e810-XXVDA4-4p25GE on SUT: - - s67-t37-tg1-c4/p1 to s65-t37-sut1-c4/p1. - - s65-t37-sut1-c4/p2 to s66-t37-sut2-c4/p2. - - s66-t37-sut2-c4/p1 to s67-t37-tg1-c4/p2. - - s67-t37-tg1-c4/p3 to s65-t37-sut1-c4/p3. - - s65-t37-sut1-c4/p4 to s66-t37-sut2-c4/p4. - - s66-t37-sut2-c4/p3 to s67-t37-tg1-c4/p4. - - ring3 100GE-ports e810-2CQDA2-2p100GE on SUT - - s67-t37-tg1-c9/p1 to s65-t37-sut1-c9/p1. - - s65-t37-sut1-c9/p2 to s66-t37-sut2-c9/p2. - - s66-t37-sut2-c9/p1 to s67-t37-tg1-c9/p2. -- testbed38: - - ring1 25GE-ports xxv710-DA2-2p25GE on SUTs: - - s80-t38-tg1-c2/p1 to s78-t38-sut1-c2/p1. - - s78-t38-sut1-c2/p2 to s79-t38-sut2-c2/p2. - - s79-t38-sut2-c2/p1 to s80-t38-tg1-c2/p2. - - ring2 25GE-ports e810-XXVDA4-4p25GE on SUT: - - s80-t38-tg1-c4/p1 to s78-t38-sut1-c4/p1. - - s78-t38-sut1-c4/p2 to s79-t38-sut2-c4/p2. - - s79-t38-sut2-c4/p1 to s80-t38-tg1-c4/p2. - - s80-t38-tg1-c4/p3 to s78-t38-sut1-c4/p3. - - s78-t38-sut1-c4/p4 to s79-t38-sut2-c4/p4. - - s79-t38-sut2-c4/p3 to s80-t38-tg1-c4/p4. - - ring3 100GE-ports e810-2CQDA2-2p100GE on SUT - - s80-t38-tg1-c9/p1 to s78-t38-sut1-c9/p1. - - s78-t38-sut1-c9/p2 to s79-t38-sut2-c9/p2. - - s79-t38-sut2-c9/p1 to s80-t38-tg1-c9/p2. -``` - -### 3-Node-SnowRidge (3n-snr) - -``` -- testbed39: - - ring1 25GE-ports e810-XXVDA4-4p25GE: - - s89-t39t310-tg1-c6/p1 to s93-t39-sut1-c1/p1. - - s93-t39-sut1-c1/p2 to s94-t39-sut2-c1/p2. - - s94-t39-sut2-c1/p1 to s89-t39t310-tg1-c6/p2. - - s89-t39t310-tg1-c6/p3 to s93-t39-sut1-c1/p3. - - s93-t39-sut1-c1/p4 to s94-t39-sut2-c1/p4. - - s94-t39-sut2-c1/p3 to s89-t39t310-tg1-c6/p4. -``` - -### 2-Node-SapphireRapids (2n-spr) - -``` -- testbed21: - - ring1 200GE-ports ConnectX7-2p200GE: - - s53-t21-tg1-c2/p1 to s52-t21-sut1-c2/p1 - - s53-t21-tg1-c7/p1 to s52-t21-sut1-c7/p1 - - s52-t21-sut1-c4/p2 to s52-t21-sut1-c9/p2 - - ring2 200GE-ports ConnectX7-2p200GE: - - s53-t21-tg1-c2/p2 to s52-t21-sut1-c2/p2 - - s53-t21-tg1-c7/p2 to s52-t21-sut1-c7/p2 - - s52-t21-sut1-c10/p1 to s52-t21-sut1-c11/p1 - - ring3 200GE-ports ConnectX7-2p200GE: - - s53-t21-tg1-c4/p1 to s52-t21-sut1-c4/p1 - - s53-t21-tg1-c9/p1 to s52-t21-sut1-c9/p1 - - s52-t21-sut1-c10/p2 to s52-t21-sut1-c11/p2 -- testbed22: - - ring1 100GE-ports e810-2CQDA2-2p100GE: - - s55-t22-tg1-c4/p1 to s54-t22-sut1-c9/p2 - - s55-t22-tg1-c4/p2 to s54-t22-sut1-c4/p2 - - s54-t22-sut1-c9/p1 to s54-t22-sut1-c4/p1 - - ring2 25GE-ports e810-XXVDA4-4p25GE: - - s55-t22-tg1-c2/p1 to s54-t22-sut1-c2/p1 - - s55-t22-tg1-c2/p2 to s54-t22-sut1-c7/p1 - - s54-t22-sut1-c2/p2 to s54-t22-sut1-c7/p2 -- testbed23: - - ring1 200GE-ports ConnectX7-2p200GE: - - s56-t23-sut1-c2/p1 to s57-t23-tg1-c2/p1. - - s57-t23-tg1-c2/p2 to s56-t23-sut1-c2/p2. - - ring2 100GE-ports e810-2CQDA2-2p100GE: - - s56-t23-sut1-c4/p1 to s57-t23-tg1-c4/p1. - - s57-t23-tg1-c4/p2 to s56-t23-sut1-c4/p2. - - ring3 25GE-ports e810-XXVDA4-2p25GE: - - s56-t23-sut1-c10/p1 to s57-t23-tg1-c10/p1. - - s56-t23-sut1-c10/p2 to s57-t23-tg1-c10/p2. - - s56-t23-sut1-c10/p3 to s57-t23-tg1-c10/p3. - - s56-t23-sut1-c10/p4 to s57-t23-tg1-c10/p4. - - ring4 200GE-ports ConnectX7-2p200GE: - - s57-t23-tg1-c7/p1 to s57-t23-tg1-c7/p2. - - ring5 100GE-ports e810-2CQDA2-2p100GE: - - s57-t23-tg1-c9/p1 to s57-t23-tg1-c9/p2. -- testbed24: - - ring1 200GE-ports ConnectX7-2p200GE: - - s58-t24-sut1-c2/p1 to s59-t24-tg1-c2/p1. - - s59-t24-tg1-c2/p2 to s58-t24-sut1-c2/p2. - - ring2 100GE-ports e810-2CQDA2-2p100GE: - - s58-t24-sut1-c4/p1 to s59-t24-tg1-c4/p1. - - s59-t24-tg1-c4/p2 to s58-t24-sut1-c4/p2. - - ring3 25GE-ports e810-XXVDA4-2p25GE: - - s58-t24-sut1-c10/p1 to s59-t24-tg1-c10/p1. - - s58-t24-sut1-c10/p2 to s59-t24-tg1-c10/p2. - - s58-t24-sut1-c10/p3 to s59-t24-tg1-c10/p3. - - s58-t24-sut1-c10/p4 to s59-t24-tg1-c10/p4. - - ring4 200GE-ports ConnectX7-2p200GE: - - s59-t24-tg1-c7/p1 to s59-t24-tg1-c7/p2. - - ring5 100GE-ports e810-2CQDA2-2p100GE: - - s59-t24-tg1-c9/p1 to s59-t24-tg1-c9/p2. -``` diff --git a/docs/content/infrastructure/fdio_csit_testbed_versioning.md b/docs/content/infrastructure/fdio_csit_testbed_versioning.md index 5185c787f7..4e8fb69659 100644 --- a/docs/content/infrastructure/fdio_csit_testbed_versioning.md +++ b/docs/content/infrastructure/fdio_csit_testbed_versioning.md @@ -1,7 +1,7 @@ --- bookToc: true title: "FD.io CSIT Testbed Versioning" -weight: 3 +weight: 4 --- # FD.io CSIT Testbed Versioning diff --git a/docs/content/infrastructure/fdio_dc_testbed_specifications.md b/docs/content/infrastructure/fdio_dc_testbed_specifications.md new file mode 100644 index 0000000000..3daa3824e2 --- /dev/null +++ b/docs/content/infrastructure/fdio_dc_testbed_specifications.md @@ -0,0 +1,1861 @@ +--- +bookToc: true +title: "FD.io DC Testbed Specifications" +weight: 2 +--- + +# FD.io DC Testbed Specifications + +## Purpose + +This note includes specification of the physical testbed infrastructure +hosted by LFN FD.io CSIT project. + +## Server Management + +### Addressing + +Each server has a LOM (Lights-Out-Management e.g. SM IPMI) and a +Management port, which are connected to two different VLANs. + +#### LOM (IPMI) VLAN + + - Subnet: 10.30.50.0/24 + - Gateway: 10.30.50.1 + - Broadcast: 10.30.50.255 + - DNS1: 199.204.44.24 + - DNS2: 199.204.47.54 + +#### Management VLAN + - Subnet: 10.30.51.0/24 + - Gateway: 10.30.51.1 + - Broadcast: 10.30.51.255 + - DNS1: 199.204.44.24 + - DNS2: 199.204.47.54 + +To access these hosts, VPN connection is required. + +## Testbeds Overview + +### Summary List + +``` + #. Type Purpose SUT TG #TB #SUT #TG #skx #ps1 #rng #tx2 #tsh #alt #clx #zn2 #icx #snr #spr + 1. 1-Node-Skylake nomad skx na 5 5 0 5 0 0 0 0 0 0 0 0 0 0 + 2. 1-Node-Cascadelake nomad clx na 1 1 0 0 0 0 0 0 0 1 0 0 0 0 + 3. 1-Node-AmpereAltra nomad alt na 2 2 0 0 0 0 0 0 2 0 0 0 0 0 + 4. 2-Node-IxiaPS1L47 tcp skx ps1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 + 5. 2-Node-Cascadelake perf clx clx 3 3 3 0 0 0 0 0 0 6 0 0 0 0 + 6. 2-Node-ThunderX2 perf tx2 skx 1 1 .5 .5 0 0 1 0 0 0 0 0 0 0 + 7. 2-Node-Icelake perf icx icx 4 4 4 0 0 0 0 0 0 0 0 8 0 0 + 8. 3-Node-Rangeley perf rng skx 1 3 1 0 0 2 0 0 0 0 0 0 0 0 + 9. 3-Node-Taishan perf tsh skx 1 2 .5 .5 0 0 0 2 0 0 0 0 0 0 +10. 3-Node-Altra perf alt icx 1 2 1 0 0 0 0 0 2 0 0 1 0 0 +11. 2-Node-Zen2 perf zn2 zn2 1 1 1 0 0 0 0 0 0 0 2 0 0 0 +12. 3-Node-Icelake perf icx icx 2 4 2 0 0 0 0 0 0 0 0 6 0 0 +13. 3-Node-SnowRidge perf snr icx 1 2 .5 0 0 0 0 0 0 0 0 .5 2 0 +13. 2-Node-SapphireRapids perf spr spr 4 4 4 0 0 0 0 0 0 0 0 0 0 8 + Totals: 28 35 18.5 7 1 2 1 2 4 7 2 15.5 2 8 +``` + +### 1-Node-Skylake Xeon Intel (1n-skx) + +Each 1-Node-Skylake testbed includes one SUT (Server-Type-B6) with NIC +ports connected back-to-back ([Server Types](#server-types)). +Used for FD.io VPP_Device functional driver tests. + +### 1-Node-ThunderX2 Arm Marvell (1n-tx2) + +Each 1-Node-ThunderX2 testbed includes one SUT (Server-Type-E11) with NIC +ports connected back-to-back ([Server Types](#server-types)). +Used for FD.io VPP_Device functional driver tests. + +### 1-Node-Cascadelake Xeon Intel (1n-clx) + +Each 1-Node-Cascadelake testbed includes one SUT (Server-Type-C1) with +NIC ports connected back-to-back ([Server Types](#server-types)). + +Used for FD.io VPP_Device functional driver tests. + +### 2-Node-IxiaPS1L47 Ixia PSOne L47 (2n-ps1) + +Each 2-Node-IxiaPS1L47 testbed includes one SUT (Server-Type-B1) and one +TG (Ixia PSOne appliance) with 10GE interfaces connected in a 2-node +circular topology ([Server Types](#server-types)). +Used for FD.io TCP/IP and HTTP performance tests. + +### 2-Node-Cascadelake Xeon Intel (2n-clx) + +Each 2-Node-Cascadelake testbed includes one SUT (Server-Type-C2) and +one TG (Server-Type-C3) connected in a 2-node circular topology +([Server Types](#server-types)). +Used for FD.io performance tests. + +### 2-Node-Zen2 EPYC AMD (2n-zn2) + +Each 2-Node-Zen2 testbed includes one SUT (Server-Type-D1) and +one TG (Server-Type-D2) connected in a 2-node circular topology +([Server Types](#server-types)). +Used for FD.io performance tests. + +### 2-Node-ThunderX2 Arm Marvell (2x-tx2) + +Each 2-Node-ThunderX2 testbed includes one SUT (Server-Type-E22) and +one TG (Server-Type-E31) connected in a 2-node circular topology +([Server Types](#server-types)). +Used for FD.io performance tests. + +### 2-Node-Icelake Xeon Intel (2n-icx) + +Each 2-Node-Icelake testbed includes one SUT (Server-Type-F1) and +one TG (Server-Type-F2) connected in a 2-node circular topology +([Server Types](#server-types)). +Used for FD.io performance tests. + +### 3-Node-Rangeley Atom Testbeds + +Each 3-Node-Rangeley testbed includes two SUTs (Server-Type-B5) and one +TG (Server-Type-2) connected in a 3-node circular topology +([Server Types](#server-types)). +Used for FD.io performance tests. + +### 3-Node-TaiShan Arm Huawei (3n-tsh) + +Each 3-Node-TaiShan testbed includes two SUTs (Server-Type-E21) and one +TG (Server-Type-E31) connected in a 3-node circular topology +([Server Types](#server-types)). +Used for FD.io performance tests. + +### 3-Node-Altra Arm Ampere (3n-alt) + +Each 3-Node-Altra testbed includes two SUTs (Server-Type-E23) and one +TG (Server-Type-F4) connected in a 3-node circular topology +([Server Types](#server-types)). +Used for FD.io performance tests. + +### 3-Node-Icelake Xeon Intel (3n-icx) + +Each 3-Node-Icelake testbed includes two SUTs (Server-Type-F1) and one +TG (Server-Type-F3) connected in a 3-node circular topology +([Server Types](#server-types)). +Used for FD.io performance tests. + +### 3-Node-SnowRidge Atom Intel (3n-snr) + +Each 3-Node-SnowRidge testbed includes two SUTs (Server-Type-G1) and one +TG (Server-Type-F4) connected in a 3-node circular topology +([Server Types](#server-types)). +Used for FD.io performance tests. + +### 2-Node-Full-SapphireRapids Xeon Intel (2nf-spr) + +One 2-Node-Full-SapphireRapids testbed includes one SUT (Server-Type-H1) and +one TG (Server-Type-H2) connected in a 2-node physical topology +with NUMA (socket) daisy chaining. For more detail see +[Server Types](#server-types) and [Testbed Topology-TODO](#TODO). +Used for FD.io performance tests in a full system SUT setup with all PCIe +Gen5 x16 lane slots populated with 2p200GbE NICs. + +### 2-Node-SapphireRapids Xeon Intel (2n-spr) + +Each 2-Node-SapphireRapids testbed includes one SUT (Server-Type-H5) and +one TG (Server-Type-H6) connected in a 2-node circular topology. For more +detail see [Server Types](#server-types) and [Testbed Topology-TODO](#TODO). +Used for FD.io performance tests. + + +## Tesdtbed Naming Convention + +Following naming convention is used within this page to specify physical +connectivity and wiring across defined CSIT testbeds: + +- **testbedname**: testbedN. +- **hostname**: + - traffic-generator: tN-tgW. + - system-under-testX: tN-sutX. +- **portnames**: + - tN-tgW-cY/pZ. + - tN-sutX-cY/pZ. +- **where**: + - N - testbed number. + - tgW - server acts as traffic-generator with W index. + - sutX - server acts as system-under-test with X index. + - Y - PCIe slot number denoting a NIC card number within the host. + - Z - port number on the NIC card. + +## Server Types + +FD.io CSIT lab contains following server types: + +1. **Server-Type-B2**: Purpose - Skylake Xeon hosts for FD.io builds and data processing. + - Quantity: 2 + - Physical connectivity: + - IPMI and host management ports. + - Main HW configuration: + - Chassis: SuperMicro SYS-7049GP-TRT. + - Motherboard: SuperMicro X11DPG-QT. + - Processors: 2* Intel Platinum 8180 2.5 GHz. + - RAM Memory: 16* 16GB DDR4-2666MHz. + - Disks: 2* 1.6TB 6G SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot2 18:00.xx: empty. + - PCIe Slot4 3b:00.xx: empty. + - PCIe Slot9 5e:00.xx: empty. + - Numa1: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot6 86:00.xx: empty. + - PCIe Slot8 af:00.xx: empty. + - PCIe Slot10 d8:00.xx: empty. + +2. **Server-Type-B6**: Purpose - Skylake Xeon SUT for FD.io VPP_Device functional tests. + - Quantity: 2. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 1-node topologies. + - Main HW configuration: + - Chassis: SuperMicro SYS-7049GP-TRT. + - Motherboard: SuperMicro X11DPG-QT. + - Processors: 2* Intel Platinum 8180 2.5 GHz. + - RAM Memory: 16* 16GB DDR4-2666MHz. + - Disks: 2* 1.6TB 6G SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot2 18:00.xx: x710-4p10GE Intel. + - PCIe Slot4 3b:00.xx: x710-4p10GE Intel. + - PCIe Slot9 5e:00.xx: empty. + - Numa1: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot6 86:00.xx: empty. + - PCIe Slot8 af:00.xx: empty. + - PCIe Slot10 d8:00.xx: empty. + +3. **Server-Type-B7**: Purpose - Ixia PerfectStorm One Appliance TG for FD.io TCP/IP performance tests. + - Quantity: 1. + - Physical connectivity: + - Host management interface: 10/100/1000-BaseT. + - 8-port 10GE SFP+ integrated NIC. + - Main HW configuration: + - Chassis: PS10GE4NG. + - Motherboard: SuperMicro X11DPG-QT. + - Processors: Quad-Core, Intel Processor. + - HW accelerators: FPGA offload. + - RAM Memory: 64GB. + - Disks: 1 * 1 TB, Enterprise Class, High MTBF. + - Physical Interfaces: 4 * 10GE SFP+. + - Operating System: Native IxOS. + - Interface configuration: + - Port-1: 10GE SFP+. + - Port-2: 10GE SFP+. + - Port-3: 10GE SFP+. + - Port-4: 10GE SFP+. + +4. **Server-Type-B8**: Purpose - Skylake Xeon SUT for TCP/IP host stack tests. + - Quantity: 1. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports. + - Main HW configuration: + - Chassis: SuperMicro SYS-7049GP-TRT. + - Motherboard: SuperMicro X11DPG-QT. + - Processors: 2* Intel Platinum 8180 2.5 GHz. + - RAM Memory: 16* 16GB DDR4-2666MHz. + - Disks: 2* 1.6TB 6G SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot2 18:00.xx: x710-4p10GE Intel. + - PCIe Slot4 3b:00.xx: empty. + - PCIe Slot9 5e:00.xx: empty. + - Numa1: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot6 86:00.xx: empty. + - PCIe Slot8 af:00.xx: empty. + - PCIe Slot10 d8:00.xx: empty. +5. Server-Type-C1: Purpose - Cascadelake Xeon SUT for FD.io VPP_Device functional tests. + - Quantity: 1. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 1-node testbed topologies. + - Main HW configuration: + - Chassis: SuperMicro SYS-7049GP-TRT. + - Motherboard: SuperMicro X11DPG-QT. + - Processors: 2* Intel Platinum 8280 2.7 GHz. + - RAM Memory: 12* 16GB DDR4-2933. + - Disks: 2* 1.92TB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot2 18:00.xx: x710-4p10GE Intel. + - PCIe Slot4 3b:00.xx: x710-4p10GE Intel. + - PCIe Slot9 5e:00.xx: empty. + - Numa1: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot6 86:00.xx: empty. + - PCIe Slot8 af:00.xx: empty. + - PCIe Slot10 d8:00.xx: empty. + +6. **Server-Type-C2**: Purpose - Cascadelake Xeon SUT for FD.io performance testing. + - Quantity: 3 + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 2-node testbed topologies. + - Main HW configuration: + - Chassis: SuperMicro SYS-7049GP-TRT. + - Motherboard: SuperMicro X11DPG-QT. + - Processors: 2* Intel Gold 6252N 2.3 GHz. + - RAM Memory: 12* 16GB DDR4-2933. + - Disks: 2* 1.92TB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot2 18:00.xx: x710-4p10GE Intel. + - PCIe Slot4 3b:00.xx: xxv710-DA2-2p25GE Intel. + - PCIe Slot9 5e:00.xx: ConnectX5-2p100GE Mellanox. + - Numa1: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot6 86:00.xx: e810-2p100GE Intel. + - PCIe Slot8 af:00.xx: empty. + - PCIe Slot10 d8:00.xx: empty. + +7. **Server-Type-C3**: Purpose - Cascadelake Xeon TG for FD.io performance testing. + - Quantity: 3. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 2-node testbed topologies. + - Main HW configuration: + - Chassis: SuperMicro SYS-7049GP-TRT. + - Motherboard: SuperMicro X11DPG-QT. + - Processors: 2* Intel Platinum 8280 2.7 GHz. + - RAM Memory: 12* 16GB DDR4-2933. + - Disks: 2* 1.92TB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot2 18:00.xx: x710-4p10GE Intel. + - PCIe Slot4 3b:00.xx: xxv710-DA2 2p25GE Intel. + - PCIe Slot9 5e:00.xx: ConnectX5-2p100GE Mellanox. + - Numa1: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot6 86:00.xx: ConnectX5-2p100GE Mellanox. + - PCIe Slot8 af:00.xx: ConnectX5-2p100GE Mellanox. + - PCIe Slot10 d8:00.xx: empty. + +8. **Server-Type-C4**: Purpose - Cascadelake Xeon Backend hosts for FD.io builds and data processing. + - Quantity: 3. + - Physical connectivity: + - IPMI and host management ports. + - no NIC ports, standalone setup. + - Main HW configuration: + - Chassis: SuperMicro 1029P-WTRT. + - Motherboard: SuperMicro X11DDW-NT. + - Processors: 2* Intel Platinum 8280 2.7 GHz. + - RAM Memory: 12* 16GB DDR4-2933. + - Disks: 4* 1.92TB SATA SSD. + - NICs configuration: + - Numa0: + - no cards. + - Numa1: + - no cards. + +9. **Server-Type-D1**: Purpose - Zen2 EPYC SUT for FD.io performance testing. + - Quantity: 1. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 2-node testbed topologies. + - Main HW configuration: + - Chassis: SuperMicro AS-1114S-WTRT + - Processors: 1* AMD EPYC 7532 2.4 GHz. + - RAM Memory: 8* 32GB DDR4-2933. + - Disks: 1* 1TB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot1 01:00.xx: x710-4p10GE Intel. + - PCIe Slot2 41:00.xx: xxv710-DA2-2p25GE Intel. + - PCIe Slot3 81:00.xx: mcx556a-edat ConnectX5-2p100GE Mellanox. + +10. **Server-Type-D2**: Purpose - Zen2 EPYC TG for FD.io performance testing. + - Quantity: 1. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 2-node testbed topologies. + - Main HW configuration: + - Chassis: SuperMicro AS-1114S-WTRT + - Processors: 1* AMD EPYC 7532 2.4 GHz. + - RAM Memory: 8* 32GB DDR4-2933. + - Disks: 1* 1TB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot1 01:00.xx: mcx556a-edat ConnectX5-2p100GE Mellanox. + - PCIe Slot2 41:00.xx: x710-4p10GE Intel. + - PCIe Slot3 81:00.xx: xxv710-DA2 2p25GE Intel. + +11. **Server-Type-E11**: Purpose - ThunderX2 Arm Marvell SUT for FD.io VPP_Device functional tests. + - Quantity: 2 + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 1-node topologies. + - Main HW configuration: + - Chassis: GIGABYTE Rack Mount + - Motherboard: MT91-FS4-00 + - Processors: 2 * ThunderX2 ARMv8 CN9980 2.20 GHz + - RAM Memory: 16 * 16GB DIMM + - Disks: 2 * 480GB 6G SATA SSD SAMSUNG MZ7LH480 + - NICs configuration: + - Numa0: + - PCIe Slot4 05:00.xx: XL710-QDA2-2p40GE Intel. + - PCIe Slot8 0b:00.xx: ConnectX5-2p10/25GE Mellanox. + - Numa1: + - PCIe Slot14 91:00.xx: XL710-QDA2-2p40GE Intel. + - PCIe Slot26 9a:00.xx: ConnectX5-2p10/25GE Mellanox. + +12. **Server-Type-E21**: Purpose - TaiShan Arm Huawei SUT for FD.io performance testing. + - Quantity: 2 + - Physical connectivity: + - IPMI(?) and host management ports. + - NIC ports connected into 3-node topology. + - Main HW configuration: + - Chassis: Huawei TaiShan 2280. + - Processors: 2* hip07-d05 ~ 32* Arm Cortex-A72 + - RAM Memory: 8* 16GB DDR4-2400MT/s + - Disks: 1* 4TB SATA HDD + - NICs configuration: + - PCIe Slot4 e9:00.xx: connectx4-2p25GE Mellanox. + - PCIe Slot6 11:00.xx: x520-2p10GE Intel. + +13. **Server-Type-E22**: Purpose - ThunderX2 Arm Marvell SUT for FD.io performance testing. + - Quantity: 1 + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 2-node topologies. + - Main HW configuration: + - Chassis: Gigabyte R181-T90 1U + - Motherboard: MT91-FS1 + - Processors: 2* ThunderX2 ARMv8 CN9975 2.0 GHz + - RAM Memory: 4* 32GB RDIMM + - Disks: 1* 480GB SSD Micron, 1* 1000GB HDD Seagate_25 + - NICs configuration: + - Numa0: + - no cards + - Numa1: + - PCIe Slot18 91:00.xx: XL710-QDA2-2p40GE Intel. + +14. **Server-Type-E23**: Purpose - Altra Arm Ampere SUT for FD.io performance testing. + - Quantity: 2 + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 3-node topologies. + - Main HW configuration: + - Chassis: WIWYNN Mt.Jade Server System B81.030Z1.0007 2U + - Motherboard: Mt.Jade Motherboard + - Processors: 2* Ampere(R) Altra(R) Q80-30 Processor (Neoverse N1) + - Processor Signature: Implementor 0x41, Variant 0x3, Architecture 15, Part 0xd0c, Revision 1 + - RAM Memory: 16* 8GB DDR4-3200MT/s + - Disks: 2* 960GB SSD Samsung M.2 NVMe PM983 + - NICs configuration: + - Numa0: + - PCIe Slot1 0004:04:00.x: xl710-QDA2-2p40GE Intel. + - Numa1: + - no cards. +15. **Server-Type-E24**: Purpose - Altra Arm Ampere for FD.io build. + - Quantity: 2. + - Physical connectivity: + - IPMI and host management ports. + - Main HW configuration: + - Chassis: Gigabyte R152-P30-00 1U + - Motherboard: MP32-AR1-00 + - Processors: 1* Ampere(R) Altra(R) Q80-30 Processor (Neoverse N1) + - Processor Signature: Implementor 0x0a, Variant 0x1, Architecture 6, Part 0x000, Revision 1 + - RAM Memory: 12* 16GB DDR4-3200MT/s + - Disks: 1* 960GB SSD Samsung M.2 NVMe PM983 + +16. **Server-Type-E31**: Purpose - Skylake Xeon TG for FD.io performance testing. + - Quantity: 1 + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 2-node and 3-node topologies. + - Main HW configuration: + - Chassis: SuperMicro SYS-7049GP-TRT. + - Motherboard: SuperMicro X11DPG-QT. + - Processors: 2* Intel Platinum 8180 2.5 GHz. + - RAM Memory: 16* 16GB DDR4-2666MHz. + - Disks: 2* 1.6TB 6G SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot2 18:00.xx: x710-4p10GE Intel. + - PCIe Slot4 3b:00.xx: xxv710-DA2 2p25GE Intel. + - PCIe Slot9 5e:00.xx: empty. + - Numa1: (x16, x16, x16 PCIe3.0 lanes) + - PCIe Slot6 86:00.xx: empty. + - PCIe Slot8 af:00.xx: XL710-QDA2-2p40GE Intel. + - PCIe Slot10 d8:00.xx: x710-4p10GE Intel. + +17. **Server-Type-F1**: Purpose - Icelake Xeon SUT for FD.io performance testing. + - Quantity: 8. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 2-node or 3-node testbed topologies. + - Main HW configuration: + - Chassis: SuperMicro SYS-740GP-TNRT. + - Motherboard: Super X12DPG-QT6. + - Processors: 2* Intel Platinum 8358 2.6 GHz. + - RAM Memory: 16* 16GB DDR4-3200. + - Disks: 2* 960GB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe4.0 lanes) + - PCIe Slot2 18:00.xx: xxv710-DA2-2p25GE Intel. + - PCIe Slot4 3b:00.xx: e810-XXVDA4-4p25GE Intel. + - PCIe Slot9 5e:00.xx: e810-2CQDA2-2p100GE Intel. + - Numa1: (x16, x16, x16 PCIe4.0 lanes) + - PCIe Slot6 86:00.xx: empty. + - PCIe Slot8 af:00.xx: empty. + - PCIe Slot10 d8:00.xx: empty. + +18. **Server-Type-F2**: Purpose - Icelake Xeon TG for FD.io performance testing. + - Quantity: 3. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 2-node testbed topologies. + - Main HW configuration: + - Chassis: SuperMicro SYS-740GP-TNRT. + - Motherboard: Super X12DPG-QT6. + - Processors: 2* Intel Platinum 8358 2.6 GHz. + - RAM Memory: 16* 16GB DDR4-3200. + - Disks: 2* 960GB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe4.0 lanes) + - PCIe Slot2 18:00.xx: xxv710-DA2-2p25GE Intel. + - PCIe Slot4 3b:00.xx: e810-XXVDA4-4p25GE Intel. + - PCIe Slot9 5e:00.xx: e810-2CQDA2-2p100GE Intel. + - Numa1: (x16, x16, x16 PCIe4.0 lanes) + - PCIe Slot6 86:00.xx: e810-2CQDA2-2p100GE Intel. + - PCIe Slot8 af:00.xx: empty. + - PCIe Slot10 d8:00.xx: empty. + +19. **Server-Type-F3**: Purpose - Icelake Xeon TG for FD.io performance testing. + - Quantity: 3. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 3-node testbed topologies. + - Main HW configuration: + - Chassis: SuperMicro SYS-740GP-TNRT. + - Motherboard: Super X12DPG-QT6. + - Processors: 2* Intel Platinum 8358 2.6 GHz. + - RAM Memory: 16* 16GB DDR4-3200. + - Disks: 2* 960GB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe4.0 lanes) + - PCIe Slot2 18:00.xx: xxv710-DA2-2p25GE Intel. + - PCIe Slot4 3b:00.xx: e810-XXVDA4-4p25GE Intel. + - PCIe Slot9 5e:00.xx: e810-2CQDA2-2p100GE Intel. + - Numa1: (x16, x16, x16 PCIe4.0 lanes) + - PCIe Slot6 86:00.xx: empty. + - PCIe Slot8 af:00.xx: empty. + - PCIe Slot10 d8:00.xx: empty. +20. **Server-Type-F4**: Purpose - Icelake Xeon Shared TG for FD.io performance testing. + - Quantity: 3. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 2-node and/or 3-node testbed topologies. + - Main HW configuration: + - Chassis: SuperMicro SYS-740GP-TNRT. + - Motherboard: Super X12DPG-QT6. + - Processors: 2* Intel Platinum 8358 2.6 GHz. + - RAM Memory: 16* 16GB DDR4-3200. + - Disks: 2* 960GB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe4.0 lanes) + - PCIe Slot2 18:00.xx: xxv710-DA2-2p25GE Intel. + - PCIe Slot4 3b:00.xx: empty. + - PCIe Slot9 5e:00.xx: empty. + - Numa1: (x16, x16, x16 PCIe4.0 lanes) + - PCIe Slot6 86:00.xx: e810-XXVDA4-4p25GE Intel. + - PCIe Slot8 af:00.xx: e810-2CQDA2-2p100GE Intel. + - PCIe Slot10 d8:00.xx: empty. + +21. **Server-Type-G1**: Purpose - SnowRidge Atom SUT for FD.io performance testing. + - Quantity: 2 + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 3-node testbed topology. + - Main HW configuration: + - Chassis: Intel JACOBSVILLE SDP. + - Motherboard: Intel JACOBSVILLE E63448-400. + - Processors: 1* Intel Atom P5362B 2.2 GHz. + - RAM Memory: 2* 16GB DDR4-2933. + - Disks: ?* ? SATA SSD. + - NICs configuration: + - Numa0: (x16, PCIe3.0 lane) + - PCIe BuiltIn ec:00.xx: e810-XXVDA4-4p25GE Intel. + +22. **Server-Type-H1**: Purpose - SapphireRapids Xeon SUT for FD.io full system performance testing. + - Quantity: 1. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 3-numa-node testbed topologies. + - Main HW configuration: + - Chassis: SuperMicro SYS-741GE-TNRT. + - Motherboard: Super X13DEG-QT-P. + - Processors: 2* Intel Platinum 8462Y+ 32 core 2.8 GHz 300W TDP. + - RAM Memory: 16* 32GB DDR5-4800. + - Disks: 2* 960GB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe5.0 lanes) + - PCIe Slot2 18:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. + - PCIe Slot4 3b:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. + - PCIe Slot10 5e:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. + - Numa1: (x16, x16, x16 PCIe5.0 lanes) + - PCIe Slot7 86:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. + - PCIe Slot9 af:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. + - PCIe Slot11 d8:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. + +23. **Server-Type-H2**: Purpose - SapphireRapids Xeon TG for FD.io full system performance testing. + - Quantity: 1. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 3-numa-node testbed topologies. + - Main HW configuration: + - Chassis: SuperMicro SYS-741GE-TNRT. + - Motherboard: Super X13DEG-QT-P. + - Processors: 2* Intel Platinum 8462Y+ 32 core 2.8 GHz 300W TDP. + - RAM Memory: 16* 32GB DDR5-4800. + - Disks: 2* 960GB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe5.0 lanes) + - PCIe Slot2 18:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. + - PCIe Slot4 3b:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. + - PCIe Slot10 5e:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. + - Numa1: (x16, x16, x16 PCIe5.0 lanes) + - PCIe Slot7 86:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. + - PCIe Slot9 af:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. + - PCIe Slot11 d8:00.xx: empty. + +24. **Server-Type-H3**: Purpose - SapphireRapids Xeon SUT for FD.io performance testing. + - Quantity: 1. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 3-numa-node testbed topologies. + - Main HW configuration: + - Chassis: SuperMicro SYS-741GE-TNRT. + - Motherboard: Super X13DEG-QT-P. + - Processors: 2* Intel Platinum 8462Y+ 32 core 2.8 GHz 300W TDP. + - RAM Memory: 16* 32GB DDR5-4800. + - Disks: 2* 960GB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe5.0 lanes) + - PCIe Slot2 18:00.xx: e810-2CQDA2-2p100GE Intel. + - PCIe Slot4 3b:00.xx: e810-XXVDA4-4p25GE Intel. + - PCIe Slot10 5e:00.xx: empty. + - Numa1: (x16, x16, x16 PCIe5.0 lanes) + - PCIe Slot7 86:00.xx: e810-2CQDA2-2p100GE Intel. + - PCIe Slot9 af:00.xx: e810-XXVDA4-4p25GE Intel. + - PCIe Slot11 d8:00.xx: empty. + +25. **Server-Type-H4**: Purpose - SapphireRapids Xeon TG for FD.io performance testing. + - Quantity: 1. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 3-numa-node testbed topologies. + - Main HW configuration: + - Chassis: SuperMicro SYS-741GE-TNRT. + - Motherboard: Super X13DEG-QT-P. + - Processors: 2* Intel Platinum 8462Y+ 32 core 2.8 GHz 300W TDP. + - RAM Memory: 16* 32GB DDR5-4800. + - Disks: 2* 960GB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe5.0 lanes) + - PCIe Slot2 18:00.xx: e810-2CQDA2-2p100GE Intel. + - PCIe Slot4 3b:00.xx: e810-XXVDA4-4p25GE Intel. + - PCIe Slot10 5e:00.xx: empty. + - Numa1: (x16, x16, x16 PCIe5.0 lanes) + - PCIe Slot7 86:00.xx: empty. + - PCIe Slot9 af:00.xx: empty. + - PCIe Slot11 d8:00.xx: empty. + +26. **Server-Type-H5**: Purpose - SapphireRapids Xeon SUT for FD.io performance testing. + - Quantity: 2. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 2-node testbed topologies. + - Main HW configuration: + - Chassis: SuperMicro SYS-741GE-TNRT. + - Motherboard: Super X13DEG-QT-P. + - Processors: 2* Intel Platinum 8462Y+ 32 core 2.8 GHz 300W TDP. + - RAM Memory: 16* 32GB DDR5-4800. + - Disks: 2* 960GB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe5.0 lanes) + - PCIe Slot2 18:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. + - PCIe Slot4 3b:00.xx: e810-2CQDA2-2p100GE Intel. + - PCIe Slot10 5e:00.xx: e810-XXVDA4-4p25GE Intel. + - Numa1: (x16, x16, x16 PCIe5.0 lanes) + - PCIe Slot7 86:00.xx: empty. + - PCIe Slot9 af:00.xx: empty. + - PCIe Slot11 d8:00.xx: empty. + +27. **Server-Type-H6**: Purpose - SapphireRapids Xeon TG for FD.io performance testing. + - Quantity: 2. + - Physical connectivity: + - IPMI and host management ports. + - NIC ports connected into 2-node testbed topologies plus loopbacks in Numa1 for TG self-test. + - Main HW configuration: + - Chassis: SuperMicro SYS-741GE-TNRT. + - Motherboard: Super X13DEG-QT-P. + - Processors: 2* Intel Platinum 8462Y+ 32 core 2.8 GHz 300W TDP. + - RAM Memory: 16* 32GB DDR5-4800. + - Disks: 2* 960GB SATA SSD. + - NICs configuration: + - Numa0: (x16, x16, x16 PCIe5.0 lanes) + - PCIe Slot2 18:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. + - PCIe Slot4 3b:00.xx: e810-2CQDA2-2p100GE Intel. + - PCIe Slot10 5e:00.xx: e810-XXVDA4-4p25GE Intel. + - Numa1: (x16, x16, x16 PCIe5.0 lanes) + - PCIe Slot7 86:00.xx: MCX713106AS-VEAT ConnectX7-2p200GE Nvidia. + - PCIe Slot9 af:00.xx: e810-2CQDA2-2p100GE Intel. + - PCIe Slot11 d8:00.xx: empty. + +## Testbeds Configuration + +### 1-Node-Skylake (1n-skx) + +``` +- SUT [Server-Type-B6]: + - testbedname: testbed11. + - hostname: s1-t11-sut1. + - IPMI IP: 10.30.50.47 + - Host IP: 10.30.51.50 + - portnames: + - s1-t11-sut1-c2/p1 - 10GE-port1 x710-4p10GE. + - s1-t11-sut1-c2/p2 - 10GE-port2 x710-4p10GE. + - s1-t11-sut1-c2/p3 - 10GE-port3 x710-4p10GE. + - s1-t11-sut1-c2/p4 - 10GE-port4 x710-4p10GE. + - s1-t11-sut1-c4/p1 - 10GE-port1 x710-4p10GE. + - s1-t11-sut1-c4/p2 - 10GE-port2 x710-4p10GE. + - s1-t11-sut1-c4/p3 - 10GE-port3 x710-4p10GE. + - s1-t11-sut1-c4/p4 - 10GE-port4 x710-4p10GE. +- SUT [Server-Type-B6]: + - testbedname: testbed12. + - hostname: s2-t12-sut1. + - IPMI IP: 10.30.50.48 + - Host IP: 10.30.51.51 + - portnames: + - s2-t12-sut1-c2/p1 - 10GE-port1 x710-4p10GE. + - s2-t12-sut1-c2/p2 - 10GE-port2 x710-4p10GE. + - s2-t12-sut1-c2/p3 - 10GE-port3 x710-4p10GE. + - s2-t12-sut1-c2/p4 - 10GE-port4 x710-4p10GE. + - s2-t12-sut1-c4/p1 - 10GE-port1 x710-4p10GE. + - s2-t12-sut1-c4/p2 - 10GE-port2 x710-4p10GE. + - s2-t12-sut1-c4/p3 - 10GE-port3 x710-4p10GE. + - s2-t12-sut1-c4/p4 - 10GE-port4 x710-4p10GE. +``` + +### 1-Node-ThunderX2 (1n-tx2) + +``` +- SUT [Server-Type-E11]: + - testbedname: testbed13 + - hostname: s55-t13-sut1 + - IPMI IP: 10.30.50.70 + - Host IP: 10.30.51.70 + - portnames: + - s55-t13-sut1-c4/p1 - 40GE-port1 XL710-QDA2-2p40GE. + - s55-t13-sut1-c4/p2 - 40GE-port2 XL710-QDA2-2p40GE. + - s55-t13-sut1-c8/p1 - 40GE-port1 ConnectX5-2p10/25GE Mellanox. + - s55-t13-sut1-c8/p2 - 40GE-port2 ConnectX5-2p10/25GE Mellanox. + - s55-t13-sut1-c14/p1 - 40GE-port1 XL710-QDA2-2p40GE. + - s55-t13-sut1-c14/p2 - 40GE-port2 XL710-QDA2-2p40GE. + - s55-t13-sut1-c26/p1 - 40GE-port1 ConnectX5-2p10/25GE Mellanox. + - s55-t13-sut1-c26/p2 - 40GE-port2 ConnectX5-2p10/25GE Mellanox. +- SUT [Server-Type-E11]: + - testbedname: testbed14 + - hostname: s56-t14-sut1 + - IPMI IP: 10.30.50.71 + - Host IP: 10.30.51.71 + - portnames: + - s56-t14-sut1-c4/p1 - 40GE-port1 XL710-QDA2-2p40GE. + - s56-t14-sut1-c4/p2 - 40GE-port2 XL710-QDA2-2p40GE. + - s56-t14-sut1-c8/p1 - 40GE-port1 ConnectX5-2p10/25GE Mellanox. + - s56-t14-sut1-c8/p2 - 40GE-port2 ConnectX5-2p10/25GE Mellanox. + - s56-t14-sut1-c14/p1 - 40GE-port1 XL710-QDA2-2p40GE. + - s56-t14-sut1-c14/p2 - 40GE-port2 XL710-QDA2-2p40GE. + - s56-t14-sut1-c26/p1 - 40GE-port1 ConnectX5-2p10/25GE Mellanox. + - s56-t14-sut1-c26/p2 - 40GE-port2 ConnectX5-2p10/25GE Mellanox. +``` + +### 1-Node-Cascadelake (1n-clx) + +``` +- SUT [Server-Type-C1]: + - testbedname: testbed11. + - hostname: s32-t14-sut1. + - IPMI IP: 10.30.55.17 + - Host IP: 10.32.8.17 + - portnames: + - s32-t14-sut1-c2/p1 - 10GE-port1 x710-4p10GE. + - s32-t14-sut1-c2/p2 - 10GE-port2 x710-4p10GE. + - s32-t14-sut1-c2/p3 - 10GE-port3 x710-4p10GE. + - s32-t14-sut1-c2/p4 - 10GE-port4 x710-4p10GE. + - s32-t14-sut1-c4/p1 - 10GE-port1 x710-4p10GE. + - s32-t14-sut1-c4/p2 - 10GE-port2 x710-4p10GE. + - s32-t14-sut1-c4/p3 - 10GE-port3 x710-4p10GE. + - s32-t14-sut1-c4/p4 - 10GE-port4 x710-4p10GE. +``` + +### 2-Node-IxiaPS1L47 (2n-ps1) + +``` +- SUT [Server-Type-B8]: + - testbedname: testbed25. + - hostname: s25-t25-sut1. + - IPMI IP: 10.30.50.58 + - Host IP: 10.30.51.61 + - portnames: + - s25-t25-sut1-c2/p1 - 10GE-port1 x710-4p10GE. + - s25-t25-sut1-c2/p2 - 10GE-port2 x710-4p10GE. + - s25-t25-sut1-c2/p3 - 10GE-port3 x710-4p10GE. + - s25-t25-sut1-c2/p4 - 10GE-port4 x710-4p10GE. +- TG [Server-Type-B7]: + - testbedname: testbed25. + - hostname: s26-t25-tg1. + - IPMI IP: 10.30.50.59 + - Host IP: 10.30.51.62 + - portnames: + - s26-t25-tg1-p1 - 10GE-port1. + - s26-t25-tg1-p2 - 10GE-port2. + - s26-t25-tg1-p3 - 10GE-port3. + - s26-t25-tg1-p4 - 10GE-port4. +``` + +### 2-Node-Cascadelake (2n-clx) + +{{< figure src="/cdocs/testbed-2n-clx.svg" >}} + +``` +- SUT [Server-Type-C2]: + - testbedname: testbed27. + - hostname: s33-t27-sut1. + - IPMI IP: 10.30.55.18 + - Host IP: 10.32.8.18 + - portnames: + - s33-t27-sut1-c2/p1 - 10GE-port1 x710-4p10GE. + - s33-t27-sut1-c2/p2 - 10GE-port2 x710-4p10GE. + - s33-t27-sut1-c2/p3 - 10GE-port3 x710-4p10GE. + - s33-t27-sut1-c2/p4 - 10GE-port4 x710-4p10GE. + - s33-t27-sut1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s33-t27-sut1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s33-t27-sut1-c6/p1 - 100GE-port1 e810-2p100GE. + - s33-t27-sut1-c6/p2 - 100GE-port2 e810-2p100GE. + - s33-t27-sut1-c9/p1 - 100GE-port1 ConnectX5-2p100GE. + - s33-t27-sut1-c9/p2 - 100GE-port2 ConnectX5-2p100GE. +- TG [Server-Type-C3]: + - testbedname: testbed27. + - hostname: s34-t27-tg1. + - IPMI IP: 10.30.55.19 + - Host IP: 10.32.8.19 + - portnames: + - s34-t27-tg1-c2/p1 - 10GE-port1 x710-4p10GE. + - s34-t27-tg1-c2/p2 - 10GE-port2 x710-4p10GE. + - s34-t27-tg1-c2/p3 - 10GE-port3 x710-4p10GE. + - s34-t27-tg1-c2/p4 - 10GE-port4 x710-4p10GE. + - s34-t27-tg1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s34-t27-tg1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s34-t27-tg1-c6/p1 - 100GE-port1 ConnectX5-2p100GE. + - s34-t27-tg1-c6/p2 - 100GE-port2 ConnectX5-2p100GE. + - s38-t27-tg1-c8/p1 - 100GE-port1 ConnectX5-2p100GE. + - s38-t27-tg1-c8/p2 - 100GE-port2 ConnectX5-2p100GE. + - s34-t27-tg1-c9/p1 - 100GE-port1 ConnectX5-2p100GE. + - s34-t27-tg1-c9/p2 - 100GE-port2 ConnectX5-2p100GE. +- SUT [Server-Type-C2]: + - testbedname: testbed28. + - hostname: s35-t28-sut1. + - IPMI IP: 10.30.55.20 + - Host IP: 10.32.8.20 + - portnames: + - s35-t28-sut1-c2/p1 - 10GE-port1 x710-4p10GE. + - s35-t28-sut1-c2/p2 - 10GE-port2 x710-4p10GE. + - s35-t28-sut1-c2/p3 - 10GE-port3 x710-4p10GE. + - s35-t28-sut1-c2/p4 - 10GE-port4 x710-4p10GE. + - s35-t28-sut1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s35-t28-sut1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s35-t28-sut1-c6/p1 - 100GE-port1 e810-2p100GE. + - s35-t28-sut1-c6/p2 - 100GE-port2 e810-2p100GE. + - s35-t28-sut1-c9/p1 - 100GE-port1 ConnectX5-2p100GE. + - s35-t28-sut1-c9/p2 - 100GE-port2 ConnectX5-2p100GE. +- TG [Server-Type-C3]: + - testbedname: testbed28. + - hostname: s36-t28-tg1. + - IPMI IP: 10.30.55.21 + - Host IP: 10.32.8.21 + - portnames: + - s36-t28-tg1-c2/p1 - 10GE-port1 x710-4p10GE. + - s36-t28-tg1-c2/p2 - 10GE-port2 x710-4p10GE. + - s36-t28-tg1-c2/p3 - 10GE-port3 x710-4p10GE. + - s36-t28-tg1-c2/p4 - 10GE-port4 x710-4p10GE. + - s36-t28-tg1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s36-t28-tg1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s36-t28-tg1-c6/p1 - 100GE-port1 ConnectX5-2p100GE. + - s36-t28-tg1-c6/p2 - 100GE-port2 ConnectX5-2p100GE. + - s38-t28-tg1-c8/p1 - 100GE-port1 ConnectX5-2p100GE. + - s38-t28-tg1-c8/p2 - 100GE-port2 ConnectX5-2p100GE. + - s36-t28-tg1-c9/p1 - 100GE-port1 ConnectX5-2p100GE. + - s36-t28-tg1-c9/p2 - 100GE-port2 ConnectX5-2p100GE. +- SUT [Server-Type-C2]: + - testbedname: testbed29. + - hostname: s37-t29-sut1. + - IPMI IP: 10.30.55.22 + - Host IP: 10.32.8.22 + - portnames: + - s37-t29-sut1-c2/p1 - 10GE-port1 x710-4p10GE. + - s37-t29-sut1-c2/p2 - 10GE-port2 x710-4p10GE. + - s37-t29-sut1-c2/p3 - 10GE-port3 x710-4p10GE. + - s37-t29-sut1-c2/p4 - 10GE-port4 x710-4p10GE. + - s37-t29-sut1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s37-t29-sut1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s37-t29-sut1-c6/p1 - 100GE-port1 e810-2p100GE. + - s37-t29-sut1-c6/p2 - 100GE-port2 e810-2p100GE. + - s37-t29-sut1-c9/p1 - 100GE-port1 ConnectX5-2p100GE. + - s37-t29-sut1-c9/p2 - 100GE-port2 ConnectX5-2p100GE. +- TG [Server-Type-C3]: + - testbedname: testbed29. + - hostname: s38-t29-tg1. + - IPMI IP: 10.30.55.23 + - Host IP: 10.32.8.23 + - portnames: + - s38-t29-tg1-c2/p1 - 10GE-port1 x710-4p10GE. + - s38-t29-tg1-c2/p2 - 10GE-port2 x710-4p10GE. + - s38-t29-tg1-c2/p3 - 10GE-port3 x710-4p10GE. + - s38-t29-tg1-c2/p4 - 10GE-port4 x710-4p10GE. + - s38-t29-tg1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s38-t29-tg1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s38-t29-tg1-c6/p1 - 100GE-port1 ConnectX5-2p100GE. + - s38-t29-tg1-c6/p2 - 100GE-port2 ConnectX5-2p100GE. + - s38-t29-tg1-c9/p1 - 100GE-port1 ConnectX5-2p100GE. + - s38-t29-tg1-c9/p2 - 100GE-port2 ConnectX5-2p100GE. +``` + +### 2-Node-Zen2 (2n-zn2) + +{{< figure src="/cdocs/testbed-2n-zn2.svg" >}} + +``` +- SUT [Server-Type-D1]: + - testbedname: testbed210. + - hostname: s60-t210-sut1. + - IPMI IP: 10.30.55.24 + - Host IP: 10.32.8.24 + - portnames: + - s60-t210-sut1-c1/p1 - 10GE-port1 x710-4p10GE. + - s60-t210-sut1-c1/p2 - 10GE-port2 x710-4p10GE. + - s60-t210-sut1-c1/p3 - 10GE-port3 x710-4p10GE. + - s60-t210-sut1-c1/p4 - 10GE-port4 x710-4p10GE. + - s60-t210-sut1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s60-t210-sut1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s60-t210-sut1-c3/p1 - 100GE-port1 ConnectX5-2p100GE. + - s60-t210-sut1-c3/p2 - 100GE-port2 ConnectX5-2p100GE. +- TG [Server-Type-D2]: + - testbedname: testbed210. + - hostname: s61-t210-tg1. + - IPMI IP: 10.30.55.25 + - Host IP: 10.32.8.25 + - portnames: + - s61-t210-tg1-c1/p1 - 100GE-port1 ConnectX5-2p100GE. + - s61-t210-tg1-c1/p2 - 100GE-port2 ConnectX5-2p100GE. + - s61-t210-tg1-c2/p1 - 10GE-port1 x710-4p10GE. + - s61-t210-tg1-c2/p2 - 10GE-port2 x710-4p10GE. + - s61-t210-tg1-c2/p3 - 10GE-port3 x710-4p10GE. + - s61-t210-tg1-c2/p4 - 10GE-port4 x710-4p10GE. + - s61-t210-tg1-c3/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s61-t210-tg1-c3/p2 - 25GE-port2 xxv710-DA2-2p25GE. +``` + +### 2-Node-ThunderX2 (2x-tx2) + +{{< figure src="/cdocs/testbed-2n-tx2.svg" >}} + +``` +- SUT [Server-Type-E22]: + - testbedname: testbed211. + - hostname: s27-t211-sut1. + - IPMI IP: 10.30.50.69 + - Host IP: 10.30.51.69 + - portnames: + - s27-t211-sut1-c18/p1 - 40GE-port1 XL710-QDA2-2p40GE. + - s27-t211-sut1-c18/p2 - 40GE-port2 XL710-QDA2-2p40GE. +- TG [Server-Type-E31]: + - testbedname: testbed33 and testbed211. + - hostname: s19-t33t211-tg1. + - IPMI IP: 10.30.50.46 + - Host IP: 10.30.51.49 + - portnames: + - s19-t33t211-tg1-c2/p1 - 10GE-port1 x710-4p10GE. + - s19-t33t211-tg1-c2/p2 - 10GE-port2 x710-4p10GE. + - s19-t33t211-tg1-c2/p3 - 10GE-port3 x710-4p10GE. + - s19-t33t211-tg1-c2/p4 - 10GE-port4 x710-4p10GE. + - s19-t33t211-tg1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s19-t33t211-tg1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s19-t33t211-tg1-c8/p1 - 40GE-port1 xl710-QDA2-2p40GE. + - s19-t33t211-tg1-c8/p2 - 40GE-port2 xl710-QDA2-2p40GE. + - s19-t33t211-tg1-c10/p1 - 10GE-port1 x710-4p10GE. + - s19-t33t211-tg1-c10/p2 - 10GE-port2 x710-4p10GE. + - s19-t33t211-tg1-c10/p3 - 10GE-port3 x710-4p10GE. + - s19-t33t211-tg1-c10/p4 - 10GE-port4 x710-4p10GE. +``` + +### 2-Node-Icelake (2n-icx) + +{{< figure src="/cdocs/testbed-2n-icx.svg" >}} + +``` +- SUT [Server-Type-F1]: + - testbedname: testbed212. + - hostname: s71-t212-sut1. + - IPMI IP: 10.30.50.81 + - Host IP: 10.30.51.81 + - portnames: + - s71-t212-sut1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s71-t212-sut1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s71-t212-sut1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s71-t212-sut1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s71-t212-sut1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s71-t212-sut1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s71-t212-sut1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s71-t212-sut1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +- TG [Server-Type-F2]: + - testbedname: testbed212. + - hostname: s72-t212-tg1. + - IPMI IP: 10.30.50.82 + - Host IP: 10.30.51.82 + - portnames: + - s72-t212-tg1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s72-t212-tg1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s72-t212-tg1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s72-t212-tg1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s72-t212-tg1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s72-t212-tg1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s72-t212-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s72-t212-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. + - s72-t212-tg1-c6/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s72-t212-tg1-c6/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +- SUT [Server-Type-F1]: + - testbedname: testbed213. + - hostname: s83-t213-sut1. + - IPMI IP: 10.30.50.83 + - Host IP: 10.30.51.83 + - portnames: + - s83-t213-sut1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s83-t213-sut1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s83-t213-sut1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s83-t213-sut1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s83-t213-sut1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s83-t213-sut1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s83-t213-sut1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s83-t213-sut1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +- TG [Server-Type-F2]: + - testbedname: testbed213. + - hostname: s84-t213-tg1. + - IPMI IP: 10.30.50.84 + - Host IP: 10.30.51.84 + - portnames: + - s84-t213-tg1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s84-t213-tg1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s84-t213-tg1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s84-t213-tg1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s84-t213-tg1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s84-t213-tg1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s84-t213-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s84-t213-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. + - s84-t213-tg1-c6/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s84-t213-tg1-c6/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +- SUT [Server-Type-F1]: + - testbedname: testbed214. + - hostname: s85-t214-sut1. + - IPMI IP: 10.30.50.85 + - Host IP: 10.30.51.85 + - portnames: + - s85-t214-sut1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s85-t214-sut1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s85-t214-sut1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s85-t214-sut1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s85-t214-sut1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s85-t214-sut1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s85-t214-sut1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s85-t214-sut1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +- TG [Server-Type-F2]: + - testbedname: testbed214. + - hostname: s86-t214-tg1. + - IPMI IP: 10.30.50.86 + - Host IP: 10.30.51.86 + - portnames: + - s86-t214-tg1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s86-t214-tg1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s86-t214-tg1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s86-t214-tg1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s86-t214-tg1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s86-t214-tg1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s86-t214-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s86-t214-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. + - s86-t214-tg1-c6/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s86-t214-tg1-c6/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +- SUT [Server-Type-F1]: + - testbedname: testbed215. + - hostname: s87-t215-sut1. + - IPMI IP: 10.30.50.87 + - Host IP: 10.30.51.87 + - portnames: + - s87-t215-sut1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s87-t215-sut1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s87-t215-sut1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s87-t215-sut1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s87-t215-sut1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s87-t215-sut1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s87-t215-sut1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s87-t215-sut1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +- TG [Server-Type-F2]: + - testbedname: testbed215. + - hostname: s88-t215-tg1. + - IPMI IP: 10.30.50.88 + - Host IP: 10.30.51.88 + - portnames: + - s88-t215-tg1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s88-t215-tg1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s88-t215-tg1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s88-t215-tg1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s88-t215-tg1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s88-t215-tg1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s88-t215-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s88-t215-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. + - s88-t215-tg1-c6/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s88-t215-tg1-c6/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +``` + +### 3-Node-Rangeley (3n-rng) + +Note: There is no IPMI. Serial console is accessible via VIRL2 and VIRL3 USB. + +``` +- ServerB22 [Server-Type-B5]: + - testbedname: testbed35. + - hostname: s22-t35-sut1 (vex-yul-rot-netgate-1). + - IPMI IP: 10.30.51.29 - screen -r /dev/ttyUSB0 + - Host IP: 10.30.51.9 + - portnames: + - s22-t35-sut1-p1 - 10GE-port1 ix0 82599. + - s22-t35-sut1-p2 - 10GE-port2 ix1 82599. + - 1GB ports (tbd) +- ServerB23 [Server-Type-B5]: + - testbedname: testbed35. + - hostname: s23-t35-sut2 (vex-yul-rot-netgate-2). + - IPMI IP: 10.30.51.30 - screen -r /dev/ttyUSB1 + - Host IP: 10.30.51.10 + - portnames: + - s23-t35-sut1-p1 - 10GE-port1 ix0 82599. + - s23-t35-sut1-p2 - 10GE-port2 ix1 82599. + - 1GB ports (tbd) +- ServerB24 [Server-Type-B5]: + - testbedname: testbed35. + - hostname: s24-t35-sut3 (vex-yul-rot-netgate-3). + - IPMI IP: 10.30.51.30 - screen -r /dev/ttyUSB2 + - Host IP: 10.30.51.11 + - portnames: + - s24-t35-sut1-p1 - 10GE-port1 ix0 82599. + - s24-t35-sut1-p2 - 10GE-port2 ix1 82599. + - 1GB ports (tbd) +``` + +### 3-Node-Taishan (3n-tsh) + +{{< figure src="/cdocs/testbed-3n-tsh.svg" >}} + +``` +- SUT [Server-Type-E21]: + - testbedname: testbed33. + - hostname: s17-t33-sut1. + - IPMI IP: 10.30.50.36 + - Host IP: 10.30.51.36 + - portnames: + - s17-t33-sut1-c6/p1 - 10GE-port1 x520-2p10GE. + - s17-t33-sut1-c6/p2 - 10GE-port2 x520-2p10GE. + - s17-t33-sut1-c4/p1 - 25GE-port1 cx4-2p25GE. + - s17-t33-sut1-c4/p2 - 25GE-port2 cx4-2p25GE. +- SUT [Server-Type-E21]: + - testbedname: testbed33. + - hostname: s18-t33-sut2. + - IPMI IP: 10.30.50.37 + - Host IP: 10.30.51.37 + - portnames: + - s18-t33-sut2-c6/p1 - 10GE-port1 x520-2p10GE. + - s18-t33-sut2-c6/p2 - 10GE-port2 x520-2p10GE. + - s18-t33-sut2-c4/p1 - 25GE-port1 cx4-2p25GE. + - s18-t33-sut2-c4/p2 - 25GE-port2 cx4-2p25GE. +- TG [Server-Type-E31]: + - testbedname: testbed33 and testbed211. + - hostname: s19-t33t211-tg1. + - IPMI IP: 10.30.50.46 + - Host IP: 10.30.51.49 + - portnames: + - s19-t33t211-tg1-c2/p1 - 10GE-port1 x710-4p10GE. + - s19-t33t211-tg1-c2/p2 - 10GE-port2 x710-4p10GE. + - s19-t33t211-tg1-c2/p3 - 10GE-port3 x710-4p10GE. + - s19-t33t211-tg1-c2/p4 - 10GE-port4 x710-4p10GE. + - s19-t33t211-tg1-c4/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s19-t33t211-tg1-c4/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s19-t33t211-tg1-c8/p1 - 40GE-port1 xl710-QDA2-2p40GE. + - s19-t33t211-tg1-c8/p2 - 40GE-port2 xl710-QDA2-2p40GE. + - s19-t33t211-tg1-c10/p1 - 10GE-port1 x710-4p10GE. + - s19-t33t211-tg1-c10/p2 - 10GE-port2 x710-4p10GE. + - s19-t33t211-tg1-c10/p3 - 10GE-port3 x710-4p10GE. + - s19-t33t211-tg1-c10/p4 - 10GE-port4 x710-4p10GE. +``` + +### 3-Node-Altra (3n-alt) + +{{< figure src="/cdocs/testbed-3n-alt.svg" >}} + +``` +- SUT [Server-Type-E23]: + - testbedname: testbed34. + - hostname: s62-t34-sut1. + - IPMI IP: 10.30.50.72 + - Host IP: 10.30.51.72 + - portnames: + - s62-t34-sut1-c1/p1 - 40GE-port1 xl710-QDA2-2p40GE. + - s62-t34-sut1-c1/p2 - 40GE-port2 xl710-QDA2-2p40GE. +- SUT [Server-Type-E23]: + - testbedname: testbed34. + - hostname: s63-t34-sut2. + - IPMI IP: 10.30.50.73 + - Host IP: 10.30.51.73 + - portnames: + - s63-t34-sut2-c1/p1 - 40GE-port1 xl710-QDA2-2p40GE. + - s63-t34-sut2-c1/p2 - 40GE-port2 xl710-QDA2-2p40GE. +- TG [Server-Type-F4]: + - testbedname: testbed34. + - hostname: s64-t34-tg1. + - IPMI IP: 10.30.50.74 + - Host IP: 10.30.51.74 + - portnames: + - s64-t34-tg1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s64-t34-tg1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s64-t34-tg1-c4/p1 - 40GE-port1 xl710-QDA2-2p40GE. + - s64-t34-tg1-c4/p2 - 40GE-port2 xl710-QDA2-2p40GE. + - s64-t34-tg1-c6/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s64-t34-tg1-c6/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s64-t34-tg1-c6/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s64-t34-tg1-c6/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s64-t34-tg1-c8/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s64-t34-tg1-c8/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +``` + +### 3-Node-Icelake (3n-icx) + +{{< figure src="/cdocs/testbed-3n-icx.svg" >}} + +``` +- ServerF1 [Server-Type-F1]: + - testbedname: testbed37. + - hostname: s65-t37-sut1. + - IPMI IP: 10.30.50.75 + - Host IP: 10.30.51.75 + - portnames: + - s65-t37-sut1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s65-t37-sut1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s65-t37-sut1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s65-t37-sut1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s65-t37-sut1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s65-t37-sut1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s65-t37-sut1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s65-t37-sut1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +- ServerF1 [Server-Type-F1]: + - testbedname: testbed37. + - hostname: s66-t37-sut2. + - IPMI IP: 10.30.50.76 + - Host IP: 10.30.51.76 + - portnames: + - s66-t37-sut2-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s66-t37-sut2-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s66-t37-sut2-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s66-t37-sut2-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s66-t37-sut2-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s66-t37-sut2-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s66-t37-sut2-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s66-t37-sut2-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +- ServerF3 [Server-Type-F3]: + - testbedname: testbed37. + - hostname: s67-t37-tg1. + - IPMI IP: 10.30.50.77 + - Host IP: 10.30.51.77 + - portnames: + - s67-t37-tg1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s67-t37-tg1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s67-t37-tg1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s67-t37-tg1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s67-t37-tg1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s67-t37-tg1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s67-t37-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s67-t37-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +- ServerF1 [Server-Type-F1]: + - testbedname: testbed38. + - hostname: s78-t38-sut1. + - IPMI IP: 10.30.50.78 + - Host IP: 10.30.51.78 + - portnames: + - s78-t38-sut1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s78-t38-sut1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s78-t38-sut1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s78-t38-sut1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s78-t38-sut1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s78-t38-sut1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s78-t38-sut1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s78-t38-sut1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +- ServerF1 [Server-Type-F1]: + - testbedname: testbed38. + - hostname: s79-t38-sut2. + - IPMI IP: 10.30.50.79 + - Host IP: 10.30.51.79 + - portnames: + - s79-t38-sut2-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s79-t38-sut2-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s79-t38-sut2-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s79-t38-sut2-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s79-t38-sut2-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s79-t38-sut2-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s79-t38-sut2-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s79-t38-sut2-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +- ServerF3 [Server-Type-F3]: + - testbedname: testbed38. + - hostname: s80-t38-tg1. + - IPMI IP: 10.30.50.80 + - Host IP: 10.30.51.80 + - portnames: + - s80-t38-tg1-c2/p1 - 25GE-port1 xxv710-DA2-2p25GE. + - s80-t38-tg1-c2/p2 - 25GE-port2 xxv710-DA2-2p25GE. + - s80-t38-tg1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s80-t38-tg1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s80-t38-tg1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s80-t38-tg1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s80-t38-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s80-t38-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +``` + +### 3-Node-SnowRidge (3n-snr) + +{{< figure src="/cdocs/testbed-3n-snr.svg" >}} + +``` +- ServerG1 [Server-Type-G1]: + - testbedname: testbed39. + - hostname: s93-t39-sut1. + - IPMI IP: 10.30.50.93 + - Host IP: 10.30.51.93 + - portnames: + - s93-t39-sut1-c1/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s93-t39-sut1-c1/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s93-t39-sut1-c1/p2 - 25GE-port3 e810-XXVDA4-4p25GE. + - s93-t39-sut1-c1/p2 - 25GE-port4 e810-XXVDA4-4p25GE. +- ServerG1 [Server-Type-G1]: + - testbedname: testbed39. + - hostname: s94-t39-sut2. + - IPMI IP: 10.30.50.94 + - Host IP: 10.30.51.94 + - portnames: + - s94-t39-sut2-c1/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s94-t39-sut2-c1/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s94-t39-sut2-c1/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s94-t39-sut2-c1/p4 - 25GE-port4 e810-XXVDA4-4p25GE. +- ServerF4 [Server-Type-F4]: + - testbedname: testbed39. + - hostname: s89-t39t310-tg1. + - IPMI IP: 10.30.50.89 + - Host IP: 10.30.51.89 + - portnames: + - s89-t39t310-tg1-c6/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s89-t39t310-tg1-c6/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s89-t39t310-tg1-c6/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s89-t39t310-tg1-c6/p4 - 25GE-port4 e810-XXVDA4-4p25GE. +``` + +### 2-Node-SapphireRapids (2n-spr) + +{{< figure src="/cdocs/testbed-2n-spr.svg" >}} + +``` +- SUT [Server-Type-H1]: + - testbedname: testbed21. + - hostname: s52-t21-sut1. + - IPMI IP: 10.30.50.52 + - Host IP: 10.30.51.52 + - portnames: + - s52-t21-sut1-c2/p1 - 200GE-port1 ConnectX7-2p200GE. + - s52-t21-sut1-c2/p2 - 200GE-port2 ConnectX7-2p200GE. + - s52-t21-sut1-c4/p1 - 200GE-port1 ConnectX7-2p200GE. + - s52-t21-sut1-c4/p2 - 200GE-port2 ConnectX7-2p200GE. + - s52-t21-sut1-c10/p1 - 200GE-port1 ConnectX7-2p200GE. + - s52-t21-sut1-c10/p2 - 200GE-port2 ConnectX7-2p200GE. + - s52-t21-sut1-c7/p1 - 200GE-port1 ConnectX7-2p200GE. + - s52-t21-sut1-c7/p2 - 200GE-port2 ConnectX7-2p200GE. + - s52-t21-sut1-c9/p1 - 200GE-port1 ConnectX7-2p200GE. + - s52-t21-sut1-c9/p2 - 200GE-port2 ConnectX7-2p200GE. + - s52-t21-sut1-c11/p1 - 200GE-port1 ConnectX7-2p200GE. + - s52-t21-sut1-c11/p2 - 200GE-port2 ConnectX7-2p200GE. +- TG [Server-Type-H2]: + - testbedname: testbed21. + - hostname: s53-t21-tg1. + - IPMI IP: 10.30.50.53 + - Host IP: 10.30.51.53 + - portnames: + - s53-t21-tg1-c2/p1 - 200GE-port1 ConnectX7-2p200GE. + - s53-t21-tg1-c2/p2 - 200GE-port2 ConnectX7-2p200GE. + - s53-t21-tg1-c4/p1 - 200GE-port1 ConnectX7-2p200GE. + - s53-t21-tg1-c4/p2 - 200GE-port2 ConnectX7-2p200GE. + - s53-t21-tg1-c10/p1 - 200GE-port1 ConnectX7-2p200GE. + - s53-t21-tg1-c10/p2 - 200GE-port2 ConnectX7-2p200GE. + - s53-t21-tg1-c7/p1 - 200GE-port1 ConnectX7-2p200GE. + - s53-t21-tg1-c7/p2 - 200GE-port2 ConnectX7-2p200GE. + - s53-t21-tg1-c9/p1 - 200GE-port1 ConnectX7-2p200GE. + - s53-t21-tg1-c9/p2 - 200GE-port2 ConnectX7-2p200GE. +- SUT [Server-Type-H3]: + - testbedname: testbed22. + - hostname: s54-t22-sut1. + - IPMI IP: 10.30.50.54 + - Host IP: 10.30.51.54 + - portnames: + - s54-t22-sut1-c2/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s54-t22-sut1-c2/p2 - 100GE-port2 e810-2CQDA2-2p100GE. + - s54-t22-sut1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s54-t22-sut1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s54-t22-sut1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s54-t22-sut1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s54-t22-sut1-c7/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s54-t22-sut1-c7/p2 - 100GE-port2 e810-2CQDA2-2p100GE. + - s54-t22-sut1-c9/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s54-t22-sut1-c9/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s54-t22-sut1-c9/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s54-t22-sut1-c9/p4 - 25GE-port4 e810-XXVDA4-4p25GE. +- TG [Server-Type-H4]: + - testbedname: testbed22. + - hostname: s55-t22-tg1. + - IPMI IP: 10.30.50.55 + - Host IP: 10.30.51.55 + - portnames: + - s55-t22-tg1-c2/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s55-t22-tg1-c2/p2 - 100GE-port2 e810-2CQDA2-2p100GE. + - s55-t22-tg1-c4/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s55-t22-tg1-c4/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s55-t22-tg1-c4/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s55-t22-tg1-c4/p4 - 25GE-port4 e810-XXVDA4-4p25GE. +- SUT [Server-Type-H5]: + - testbedname: testbed23. + - hostname: s56-t23-sut1. + - IPMI IP: 10.30.50.56 + - Host IP: 10.30.51.56 + - portnames: + - s56-t23-sut1-c2/p1 - 200GE-port1 ConnectX7-2p200GE. + - s56-t23-sut1-c2/p2 - 200GE-port2 ConnectX7-2p200GE. + - s56-t23-sut1-c4/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s56-t23-sut1-c4/p2 - 100GE-port2 e810-2CQDA2-2p100GE. + - s56-t23-sut1-c10/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s56-t23-sut1-c10/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s56-t23-sut1-c10/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s56-t23-sut1-c10/p4 - 25GE-port4 e810-XXVDA4-4p25GE. +- TG [Server-Type-H6]: + - testbedname: testbed23. + - hostname: s57-t23-tg1. + - IPMI IP: 10.30.50.57 + - Host IP: 10.30.51.57 + - portnames: + - s57-t23-tg1-c2/p1 - 200GE-port1 ConnectX7-2p200GE. + - s57-t23-tg1-c2/p2 - 200GE-port2 ConnectX7-2p200GE. + - s57-t23-tg1-c4/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s57-t23-tg1-c4/p2 - 100GE-port2 e810-2CQDA2-2p100GE. + - s57-t23-tg1-c10/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s57-t23-tg1-c10/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s57-t23-tg1-c10/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s57-t23-tg1-c10/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s57-t23-tg1-c7/p1 - 200GE-port1 ConnectX7-2p200GE. + - s57-t23-tg1-c7/p2 - 200GE-port2 ConnectX7-2p200GE. + - s57-t23-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s57-t23-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +- SUT [Server-Type-H5]: + - testbedname: testbed24. + - hostname: s58-t24-sut1. + - IPMI IP: 10.30.50.58 + - Host IP: 10.30.51.58 + - portnames: + - s58-t24-sut1-c2/p1 - 200GE-port1 ConnectX7-2p200GE. + - s58-t24-sut1-c2/p2 - 200GE-port2 ConnectX7-2p200GE. + - s58-t24-sut1-c4/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s58-t24-sut1-c4/p2 - 100GE-port2 e810-2CQDA2-2p100GE. + - s58-t24-sut1-c10/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s58-t24-sut1-c10/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s58-t24-sut1-c10/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s58-t24-sut1-c10/p4 - 25GE-port4 e810-XXVDA4-4p25GE. +- TG [Server-Type-H6]: + - testbedname: testbed24. + - hostname: s59-t24-tg1. + - IPMI IP: 10.30.50.59 + - Host IP: 10.30.51.59 + - portnames: + - s59-t24-tg1-c2/p1 - 200GE-port1 ConnectX7-2p200GE. + - s59-t24-tg1-c2/p2 - 200GE-port2 ConnectX7-2p200GE. + - s59-t24-tg1-c4/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s59-t24-tg1-c4/p2 - 100GE-port2 e810-2CQDA2-2p100GE. + - s59-t24-tg1-c10/p1 - 25GE-port1 e810-XXVDA4-4p25GE. + - s59-t24-tg1-c10/p2 - 25GE-port2 e810-XXVDA4-4p25GE. + - s59-t24-tg1-c10/p3 - 25GE-port3 e810-XXVDA4-4p25GE. + - s59-t24-tg1-c10/p4 - 25GE-port4 e810-XXVDA4-4p25GE. + - s59-t24-tg1-c7/p1 - 200GE-port1 ConnectX7-2p200GE. + - s59-t24-tg1-c7/p2 - 200GE-port2 ConnectX7-2p200GE. + - s59-t24-tg1-c9/p1 - 100GE-port1 e810-2CQDA2-2p100GE. + - s59-t24-tg1-c9/p2 - 100GE-port2 e810-2CQDA2-2p100GE. +``` + +## Testbed Wiring + +### 1-Node-Skylake (1n-skx) + +``` +- testbed11: + - ring1 10GE-ports x710-4p10GE: + - s1-t11-sut1-c2/p1 to s1-t11-sut1-c4/p1. + - ring2 10GE-ports x710-4p10GE: + - s1-t11-sut1-c2/p2 to s1-t11-sut1-c4/p2. + - ring3 10GE-ports x710-4p10GE: + - s1-t11-sut1-c2/p3 to s1-t11-sut1-c4/p3. + - ring4 10GE-ports x710-4p10GE: + - s1-t11-sut1-c2/p4 to s1-t11-sut1-c4/p4. + - ring5 100GE-ports e810-2p100GE: + - s1-t11-sut1-c5/p1 to s1-t11-sut1-c6/p1. + - ring6 100GE-ports e810-2p100GE: + - s1-t11-sut1-c5/p2 to s1-t11-sut1-c6/p2. +- testbed12: + - ring1 10GE-ports x710-4p10GE: + - s2-t12-sut1-c2/p1 to s2-t12-sut1-c4/p1. + - ring2 10GE-ports x710-4p10GE: + - s2-t12-sut1-c2/p2 to s2-t12-sut1-c4/p2. + - ring3 10GE-ports x710-4p10GE: + - s2-t12-sut1-c2/p3 to s2-t12-sut1-c4/p3. + - ring4 10GE-ports x710-4p10GE: + - s2-t12-sut1-c2/p4 to s2-t12-sut1-c4/p4. + - ring5 100GE-ports e810-2p100GE: + - s2-t12-sut1-c5/p1 to s2-t12-sut1-c6/p1. + - ring6 100GE-ports e810-2p100GE: + - s2-t12-sut1-c5/p2 to s2-t12-sut1-c6/p2. +``` + +### 1-Node-ThunderX2 (1n-tx2) + +``` +- testbed13: + - ring1 40GE-ports XL710-QDA2-2p40GE on SUTs: + - s55-t13-sut1-c4/p1 - s55-t13-sut1-c14/p1. + - ring2 40GE-ports XL710-QDA2-2p40GE on SUTs: + - s55-t13-sut1-c4/p2 - s55-t13-sut1-c14/p2. + - ring3 10/25GE-ports ConnectX5-2p10/25GE on SUTs: + - s55-t13-sut1-c8/p1 - s55-t13-sut1-c26/p1. + - ring4 10/25GE-ports ConnectX5-2p10/25GE on SUTs: + - s55-t13-sut1-c8/p2 - s55-t13-sut1-c26/p2. + +- testbed14: + - ring1 40GE-ports XL710-QDA2-2p40GE on SUTs: + - s56-t14-sut1-c4/p1 - s56-t14-sut1-c14/p1. + - ring2 40GE-ports XL710-QDA2-2p40GE on SUTs: + - s56-t14-sut1-c4/p2 - s56-t14-sut1-c14/p2. + - ring3 10/25GE-ports ConnectX5-2p10/25GE on SUTs: + - s56-t14-sut1-c8/p1 - s56-t14-sut1-c26/p1. + - ring4 10/25GE-ports ConnectX5-2p10/25GE on SUTs: + - s56-t14-sut1-c8/p2 - s56-t14-sut1-c26/p2. +``` + +### 2-Node-IxiaPS1L47 (2n-ps1) + +``` +- testbed25: + - link1 10GE-port x710-4p10GE on SUT: + - t25-tg1-p1 to t25-sut1-c2/p1. + - link2 10GE-port x710-4p10GE on SUT: + - t25-tg1-p2 to t25-sut1-c2/p2. + - link3 10GE-port x710-4p10GE on SUT: + - t25-tg1-p3 to t25-sut1-c2/p3. + - link4 10GE-port x710-4p10GE on SUT: + - t25-tg1-p4 to t25-sut1-c2/p4. +``` + +### 2-Node-Cascadelake (2n-clx) + +``` +- testbed27: + - ring1 10GE-ports x710-4p10GE on SUT: + - s34-t27-tg1-c2/p1 to s33-t27-sut1-c2/p1. + - s33-t27-sut1-c2/p2 to s34-t27-tg1-c2/p2. + - ring2 10GE-ports x710-4p10GE on SUT: + - s34-t27-tg1-c2/p3 to s33-t27-sut1-c2/p3. + - s33-t27-sut1-c2/p4 to s34-t27-tg1-c2/p4. + - ring3 25GE-ports xxv710-DA2-2p25GE on SUT + - s34-t27-tg1-c4/p1 to s33-t27-sut1-c4/p1. + - s33-t27-sut1-c4/p2 to s34-t27-tg1-c4/p2. + - ring4 100GE-ports ConnectX5-2p100GE on SUT: + - s34-t27-tg1-c9/p1 to s33-t27-sut1-c9/p1. + - s33-t27-sut1-c9/p2 to s34-t27-tg1-c9/p2. + - ring5 100GE-ports e810-2p100GE on SUT 100GE-ports ConnectX5-2p100GE on TG: + - s34-t27-tg1-c6/p1 to s33-t27-sut1-c6/p1. + - s33-t27-sut1-c6/p2 to s34-t27-tg1-c6/p2. + - ring6 100GE-ports e810-2p100GE on TG: + - s34-t27-tg1-c8/p1 to s34-t27-tg1-c8/p2. + - s34-t27-tg1-c8/p2 to s34-t27-tg1-c8/p1. +- testbed28: + - ring1 10GE-ports x710-4p10GE on SUT: + - s36-t28-tg1-c2/p1 to s35-t28-sut1-c2/p1. + - s35-t28-sut1-c2/p2 to s36-t28-tg1-c2/p2. + - ring2 10GE-ports x710-4p10GE on SUT: + - s36-t28-tg1-c2/p3 to s35-t28-sut1-c2/p3. + - s35-t28-sut1-c2/p4 to s36-t28-tg1-c2/p4. + - ring3 25GE-ports xxv710-DA2-2p25GE on SUT + - s36-t28-tg1-c4/p1 to s35-t28-sut1-c4/p1. + - s35-t28-sut1-c4/p2 to s36-t28-tg1-c4/p2. + - ring4 100GE-ports ConnectX5-2p100GE on SUT: + - s36-t28-tg1-c9/p1 to s35-t28-sut1-c9/p1. + - s35-t28-sut1-c9/p2 to s36-t28-tg1-c9/p2. + - ring5 100GE-ports e810-2p100GE on SUT 100GE-ports ConnectX5-2p100GE on TG: + - s36-t28-tg1-c6/p1 to s35-t28-sut1-c6/p1. + - s35-t28-sut1-c6/p2 to s36-t28-tg1-c6/p2. + - ring6 100GE-ports e810-2p100GE on TG: + - s36-t28-tg1-c8/p1 to s36-t28-tg1-c8/p2. + - s36-t28-tg1-c8/p2 to s36-t28-tg1-c8/p1. +- testbed29: + - ring1 10GE-ports x710-4p10GE on SUT: + - s38-t29-tg1-c2/p1 to s37-t29-sut1-c2/p1. + - s37-t29-sut1-c2/p2 to s38-t29-tg1-c2/p2. + - ring2 10GE-ports x710-4p10GE on SUT: + - s38-t29-tg1-c2/p3 to s37-t29-sut1-c2/p3. + - s37-t29-sut1-c2/p4 to s38-t29-tg1-c2/p4. + - ring3 25GE-ports xxv710-DA2-2p25GE on SUT + - s38-t29-tg1-c4/p1 to s37-t29-sut1-c4/p1. + - s37-t29-sut1-c4/p2 to s38-t29-tg1-c4/p2. + - ring4 100GE-ports ConnectX5-2p100GE on SUT: + - s38-t29-tg1-c9/p1 to s37-t29-sut1-c9/p1. + - s37-t29-sut1-c9/p2 to s38-t29-tg1-c9/p2. + - ring5 100GE-ports e810-2p100GE on SUT 100GE-ports ConnectX5-2p100GE on TG: + - s38-t29-tg1-c6/p1 to s37-t29-sut1-c6/p1. + - s37-t29-sut1-c6/p2 to s38-t29-tg1-c6/p2. +``` + +### 2-Node-Zen2 (2n-zn2) + +``` +- testbed210: + - ring1 10GE-ports x710-4p10GE on SUT: + - s61-t210-tg1-c2/p1 to s60-t210-sut1-c1/p1. + - s60-t210-sut1-c1/p2 to s61-t210-tg1-c2/p2. + - ring2 10GE-ports x710-4p10GE on SUT: + - s61-t210-tg1-c2/p3 to s60-t210-sut1-c1/p3. + - s60-t210-sut1-c1/p4 to s61-t210-tg1-c2/p4. + - ring3 25GE-ports xxv710-DA2-2p25GE on SUT + - s61-t210-tg1-c3/p1 to s60-t210-sut1-c2/p1. + - s60-t210-sut1-c2/p2 to s61-t210-tg1-c3/p2. + - ring4 100GE-ports ConnectX5-2p100GE on SUT: + - s61-t210-tg1-c1/p1 to s60-t210-sut1-c3/p1. + - s60-t210-sut1-c3/p2 to s61-t210-tg1-c1/p2. +``` + +### 2-Node-ThunderX2 (2n-tx2) + +``` +- testbed211: + - ring1 10GE-ports x520-2p10GE on SUTs: + - s27-t211-sut1-c18/p1 - s19-t33t211-tg1-c8/p1. + - s27-t211-sut1-c18/p2 - s19-t33t211-tg1-c8/p2. +``` + +### 2-Node-Icelake (2n-icx) + +``` +- testbed212: + - ring1 25GE-ports xxv710-DA2-2p25GE on SUT + - s72-t212-tg1-c2/p1 to s71-t212-sut1-c2/p1. + - s71-t212-sut1-c2/p2 to s72-t212-tg1-c2/p2. + - ring2 25GE-ports e810-XXVDA4-2p25GE on SUT: + - s72-t212-tg1-c4/p1 to s71-t212-sut1-c4/p1. + - s71-t212-sut1-c4/p2 to s72-t212-tg1-c4/p2. + - s72-t212-tg1-c4/p3 to s71-t212-sut1-c4/p3. + - s71-t212-sut1-c4/p4 to s72-t212-tg1-c4/p4. + - ring3 100GE-ports e810-2CQDA2-2p100GE on SUT: + - s72-t212-tg1-c9/p1 to s71-t212-sut1-c9/p1. + - s71-t212-sut1-c9/p2 to s72-t212-tg1-c9/p2. + - ring4 100GE-ports e810-2CQDA2-2p100GE on SUT: + - s72-t212-tg1-c6/p1 to s72-t212-tg1-c6/p2. + - s72-t212-tg1-c6/p2 to s72-t212-tg1-c6/p1. +- testbed213: + - ring1 25GE-ports xxv710-DA2-2p25GE on SUT + - s84-t213-tg1-c2/p1 to s83-t213-sut1-c2/p1. + - s83-t213-sut1-c2/p2 to s84-t213-tg1-c2/p2. + - ring2 25GE-ports e810-XXVDA4-2p25GE on SUT: + - s84-t213-tg1-c4/p1 to s83-t213-sut1-c4/p1. + - s83-t213-sut1-c4/p2 to s84-t213-tg1-c4/p2. + - s84-t213-tg1-c4/p3 to s83-t213-sut1-c4/p3. + - s83-t213-sut1-c4/p4 to s84-t213-tg1-c4/p4. + - ring3 100GE-ports e810-2CQDA2-2p100GE on SUT: + - s84-t213-tg1-c9/p1 to s83-t213-sut1-c9/p1. + - s83-t213-sut1-c9/p2 to s84-t213-tg1-c9/p2. + - ring4 100GE-ports e810-2CQDA2-2p100GE on SUT: + - s84-t213-tg1-c6/p1 to s84-t213-tg1-c6/p2. + - s84-t213-tg1-c6/p2 to s84-t213-tg1-c6/p1. +- testbed214: + - ring1 25GE-ports xxv710-DA2-2p25GE on SUT + - s86-t214-tg1-c2/p1 to s85-t214-sut1-c2/p1. + - s85-t214-sut1-c2/p2 to s86-t214-tg1-c2/p2. + - ring2 25GE-ports e810-XXVDA4-2p25GE on SUT: + - s86-t214-tg1-c4/p1 to s85-t214-sut1-c4/p1. + - s85-t214-sut1-c4/p2 to s86-t214-tg1-c4/p2. + - s86-t214-tg1-c4/p3 to s85-t214-sut1-c4/p3. + - s85-t214-sut1-c4/p4 to s86-t214-tg1-c4/p4. + - ring3 100GE-ports e810-2CQDA2-2p100GE on SUT: + - s86-t214-tg1-c9/p1 to s85-t214-sut1-c9/p1. + - s85-t214-sut1-c9/p2 to s86-t214-tg1-c9/p2. + - ring4 100GE-ports e810-2CQDA2-2p100GE on SUT: + - s86-t214-tg1-c6/p1 to s86-t214-tg1-c6/p2. + - s86-t214-tg1-c6/p2 to s86-t214-tg1-c6/p1. +- testbed215: + - ring1 25GE-ports xxv710-DA2-2p25GE on SUT + - s88-t215-tg1-c2/p1 to s87-t215-sut1-c2/p1. + - s87-t215-sut1-c2/p2 to s88-t215-tg1-c2/p2. + - ring2 25GE-ports e810-XXVDA4-2p25GE on SUT: + - s88-t215-tg1-c4/p1 to s87-t215-sut1-c4/p1. + - s87-t215-sut1-c4/p2 to s88-t215-tg1-c4/p2. + - s88-t215-tg1-c4/p3 to s87-t215-sut1-c4/p3. + - s87-t215-sut1-c4/p4 to s88-t215-tg1-c4/p4. + - ring3 100GE-ports e810-2CQDA2-2p100GE on SUT: + - s88-t215-tg1-c9/p1 to s87-t215-sut1-c9/p1. + - s87-t215-sut1-c9/p2 to s88-t215-tg1-c9/p2. + - ring4 100GE-ports e810-2CQDA2-2p100GE on SUT: + - s88-t215-tg1-c6/p1 to s88-t215-tg1-c6/p2. + - s88-t215-tg1-c6/p2 to s88-t215-tg1-c6/p1. +``` + +### 3-Node-Rangeley (3n-rng) + +``` +To be completed. +``` + +### 3-Node-Taishan (3n-tsh) + +``` +- testbed33: + - ring1 10GE-ports x520-2p10GE on SUTs: + - s19-t33t211-tg1-c2/p2 - s17-t33-sut1-c6/p2. + - s17-t33-sut1-c6/p1 - s18-t33-sut2-c6/p2. + - s18-t33-sut2-c6/p1 - s19-t33t211-tg1-c2/p1. + - ring2 25GE-ports cx4-2p25GE on SUTs: + - s19-t33t211-tg1-c4/p2 - s17-t33-sut1-c4/p2. + - s17-t33-sut1-c4/p1 - s18-t33-sut2-c4/p2. + - s18-t33-sut2-c4/p1 - s19-t33t211-tg1-c4/p1. +``` + +### 3-Node-Altra (3n-alt) + +``` +- testbed34: + - ring1 40GE-ports xl710-QDA2-2p40GE on SUTs: + - s64-t34-tg1-c4/p1 - s62-t34-sut1-c1/p2. + - s62-t34-sut1-c1/p1 - s63-t34-sut2-c1/p2. + - s63-t34-sut2-c1/p1 - s64-t34-tg1-c4/p2. +``` + +### 3-Node-Icelake (3n-icx) + +``` +- testbed37: + - ring1 25GE-ports xxv710-DA2-2p25GE on SUTs: + - s67-t37-tg1-c2/p1 to s65-t37-sut1-c2/p1. + - s65-t37-sut1-c2/p2 to s66-t37-sut2-c2/p2. + - s66-t37-sut2-c2/p1 to s67-t37-tg1-c2/p2. + - ring2 25GE-ports e810-XXVDA4-4p25GE on SUT: + - s67-t37-tg1-c4/p1 to s65-t37-sut1-c4/p1. + - s65-t37-sut1-c4/p2 to s66-t37-sut2-c4/p2. + - s66-t37-sut2-c4/p1 to s67-t37-tg1-c4/p2. + - s67-t37-tg1-c4/p3 to s65-t37-sut1-c4/p3. + - s65-t37-sut1-c4/p4 to s66-t37-sut2-c4/p4. + - s66-t37-sut2-c4/p3 to s67-t37-tg1-c4/p4. + - ring3 100GE-ports e810-2CQDA2-2p100GE on SUT + - s67-t37-tg1-c9/p1 to s65-t37-sut1-c9/p1. + - s65-t37-sut1-c9/p2 to s66-t37-sut2-c9/p2. + - s66-t37-sut2-c9/p1 to s67-t37-tg1-c9/p2. +- testbed38: + - ring1 25GE-ports xxv710-DA2-2p25GE on SUTs: + - s80-t38-tg1-c2/p1 to s78-t38-sut1-c2/p1. + - s78-t38-sut1-c2/p2 to s79-t38-sut2-c2/p2. + - s79-t38-sut2-c2/p1 to s80-t38-tg1-c2/p2. + - ring2 25GE-ports e810-XXVDA4-4p25GE on SUT: + - s80-t38-tg1-c4/p1 to s78-t38-sut1-c4/p1. + - s78-t38-sut1-c4/p2 to s79-t38-sut2-c4/p2. + - s79-t38-sut2-c4/p1 to s80-t38-tg1-c4/p2. + - s80-t38-tg1-c4/p3 to s78-t38-sut1-c4/p3. + - s78-t38-sut1-c4/p4 to s79-t38-sut2-c4/p4. + - s79-t38-sut2-c4/p3 to s80-t38-tg1-c4/p4. + - ring3 100GE-ports e810-2CQDA2-2p100GE on SUT + - s80-t38-tg1-c9/p1 to s78-t38-sut1-c9/p1. + - s78-t38-sut1-c9/p2 to s79-t38-sut2-c9/p2. + - s79-t38-sut2-c9/p1 to s80-t38-tg1-c9/p2. +``` + +### 3-Node-SnowRidge (3n-snr) + +``` +- testbed39: + - ring1 25GE-ports e810-XXVDA4-4p25GE: + - s89-t39t310-tg1-c6/p1 to s93-t39-sut1-c1/p1. + - s93-t39-sut1-c1/p2 to s94-t39-sut2-c1/p2. + - s94-t39-sut2-c1/p1 to s89-t39t310-tg1-c6/p2. + - s89-t39t310-tg1-c6/p3 to s93-t39-sut1-c1/p3. + - s93-t39-sut1-c1/p4 to s94-t39-sut2-c1/p4. + - s94-t39-sut2-c1/p3 to s89-t39t310-tg1-c6/p4. +``` + +### 2-Node-SapphireRapids (2n-spr) + +``` +- testbed21: + - ring1 200GE-ports ConnectX7-2p200GE: + - s53-t21-tg1-c2/p1 to s52-t21-sut1-c2/p1 + - s53-t21-tg1-c7/p1 to s52-t21-sut1-c7/p1 + - s52-t21-sut1-c4/p2 to s52-t21-sut1-c9/p2 + - ring2 200GE-ports ConnectX7-2p200GE: + - s53-t21-tg1-c2/p2 to s52-t21-sut1-c2/p2 + - s53-t21-tg1-c7/p2 to s52-t21-sut1-c7/p2 + - s52-t21-sut1-c10/p1 to s52-t21-sut1-c11/p1 + - ring3 200GE-ports ConnectX7-2p200GE: + - s53-t21-tg1-c4/p1 to s52-t21-sut1-c4/p1 + - s53-t21-tg1-c9/p1 to s52-t21-sut1-c9/p1 + - s52-t21-sut1-c10/p2 to s52-t21-sut1-c11/p2 +- testbed22: + - ring1 100GE-ports e810-2CQDA2-2p100GE: + - s55-t22-tg1-c4/p1 to s54-t22-sut1-c9/p2 + - s55-t22-tg1-c4/p2 to s54-t22-sut1-c4/p2 + - s54-t22-sut1-c9/p1 to s54-t22-sut1-c4/p1 + - ring2 25GE-ports e810-XXVDA4-4p25GE: + - s55-t22-tg1-c2/p1 to s54-t22-sut1-c2/p1 + - s55-t22-tg1-c2/p2 to s54-t22-sut1-c7/p1 + - s54-t22-sut1-c2/p2 to s54-t22-sut1-c7/p2 +- testbed23: + - ring1 200GE-ports ConnectX7-2p200GE: + - s56-t23-sut1-c2/p1 to s57-t23-tg1-c2/p1. + - s57-t23-tg1-c2/p2 to s56-t23-sut1-c2/p2. + - ring2 100GE-ports e810-2CQDA2-2p100GE: + - s56-t23-sut1-c4/p1 to s57-t23-tg1-c4/p1. + - s57-t23-tg1-c4/p2 to s56-t23-sut1-c4/p2. + - ring3 25GE-ports e810-XXVDA4-2p25GE: + - s56-t23-sut1-c10/p1 to s57-t23-tg1-c10/p1. + - s56-t23-sut1-c10/p2 to s57-t23-tg1-c10/p2. + - s56-t23-sut1-c10/p3 to s57-t23-tg1-c10/p3. + - s56-t23-sut1-c10/p4 to s57-t23-tg1-c10/p4. + - ring4 200GE-ports ConnectX7-2p200GE: + - s57-t23-tg1-c7/p1 to s57-t23-tg1-c7/p2. + - ring5 100GE-ports e810-2CQDA2-2p100GE: + - s57-t23-tg1-c9/p1 to s57-t23-tg1-c9/p2. +- testbed24: + - ring1 200GE-ports ConnectX7-2p200GE: + - s58-t24-sut1-c2/p1 to s59-t24-tg1-c2/p1. + - s59-t24-tg1-c2/p2 to s58-t24-sut1-c2/p2. + - ring2 100GE-ports e810-2CQDA2-2p100GE: + - s58-t24-sut1-c4/p1 to s59-t24-tg1-c4/p1. + - s59-t24-tg1-c4/p2 to s58-t24-sut1-c4/p2. + - ring3 25GE-ports e810-XXVDA4-2p25GE: + - s58-t24-sut1-c10/p1 to s59-t24-tg1-c10/p1. + - s58-t24-sut1-c10/p2 to s59-t24-tg1-c10/p2. + - s58-t24-sut1-c10/p3 to s59-t24-tg1-c10/p3. + - s58-t24-sut1-c10/p4 to s59-t24-tg1-c10/p4. + - ring4 200GE-ports ConnectX7-2p200GE: + - s59-t24-tg1-c7/p1 to s59-t24-tg1-c7/p2. + - ring5 100GE-ports e810-2CQDA2-2p100GE: + - s59-t24-tg1-c9/p1 to s59-t24-tg1-c9/p2. +``` diff --git a/docs/content/infrastructure/fdio_dc_vexxhost_inventory.md b/docs/content/infrastructure/fdio_dc_vexxhost_inventory.md index 25934af770..140c74ffc4 100644 --- a/docs/content/infrastructure/fdio_dc_vexxhost_inventory.md +++ b/docs/content/infrastructure/fdio_dc_vexxhost_inventory.md @@ -7,7 +7,7 @@ weight: 1 Captured inventory data: - **name**: CSIT functional server name as tracked in - [CSIT testbed specification]({{< ref "fdio_csit_testbed_specifications#FD.io CSIT Testbed Specifications" >}}), + [CSIT testbed specification]({{< ref "fdio_dc_testbed_specifications#FD.io CSIT Testbed Specifications" >}}), followed by "/" and the actual configured hostname, unless it is the same as CSIT name. - **oper-status**: operational status (up|down). @@ -24,8 +24,8 @@ Captured inventory data: ## Missing Equipment Inventory 1. Ixia PerfectStorm One Appliance - - [**Specification**]({{< ref "fdio_csit_testbed_specifications#2-node-ixiaps1l47-ixia-psone-l47-2n-ps1" >}}) - - [**Wiring**]({{< ref "fdio_csit_testbed_specifications#2-node-ixiaps1l47-2n-ps1" >}}) + - [**Specification**]({{< ref "fdio_dc_testbed_specifications#2-node-ixiaps1l47-ixia-psone-l47-2n-ps1" >}}) + - [**Wiring**]({{< ref "fdio_dc_testbed_specifications#2-node-ixiaps1l47-2n-ps1" >}}) - **mgmt-ip4**: 10.30.51.62 s26-t25-tg1 - **ipmi-ip4**: 10.30.50.59 s26-t25-tg1 diff --git a/docs/content/infrastructure/testbed_configuration/_index.md b/docs/content/infrastructure/testbed_configuration/_index.md index d0716003c5..79d0250474 100644 --- a/docs/content/infrastructure/testbed_configuration/_index.md +++ b/docs/content/infrastructure/testbed_configuration/_index.md @@ -1,6 +1,6 @@ --- bookCollapseSection: true bookFlatSection: false -title: "FD.io CSIT Testbed Configuration" +title: "FD.io DC Testbed Configuration" weight: 3 --- \ No newline at end of file diff --git a/docs/content/infrastructure/trex_traffic_generator.md b/docs/content/infrastructure/trex_traffic_generator.md new file mode 100644 index 0000000000..3497447cbf --- /dev/null +++ b/docs/content/infrastructure/trex_traffic_generator.md @@ -0,0 +1,195 @@ +--- +title: "TRex Traffic Generator" +weight: 7 +--- + +# TRex Traffic Generator + +## Usage + +[TRex traffic generator](https://trex-tgn.cisco.com) is used for majority of +CSIT performance tests. TRex is used in multiple types of performance tests, +see [Data Plane Throughtput]({{< ref "../methodology/measurements/data_plane_throughput/data_plane_throughput/#Data Plane Throughtput" >}}) +for more details. + +## Traffic modes + +TRex is primarily used in two (mutually incompatible) modes. + +### Stateless mode + +Sometimes abbreviated as STL. +A mode with high performance, which is unable to react to incoming traffic. +We use this mode whenever it is possible. +Typical test where this mode is not applicable is NAT44ED, +as DUT does not assign deterministic outside address+port combinations, +so we are unable to create traffic that does not lose packets +in out2in direction. + +Measurement results are based on simple L2 counters +(opackets, ipackets) for each traffic direction. + +### Stateful mode + +A mode capable of reacting to incoming traffic. +Contrary to the stateless mode, only UDP and TCP is supported +(carried over IPv4 or IPv6 packets). +Performance is limited, as TRex needs to do more CPU processing. +TRex suports two subtypes of stateful traffic, +CSIT uses ASTF (Advanced STateFul mode). + +This mode is suitable for NAT44ED tests, as clients send packets from inside, +and servers react to it, so they see the outside address and port to respond to. +Also, they do not send traffic before NAT44ED has created the corresponding +translation entry. + +When possible, L2 counters (opackets, ipackets) are used. +Some tests need L7 counters, which track protocol state (e.g. TCP), +but those values are less than reliable on high loads. + +## Traffic Continuity + +Generated traffic is either continuous, or limited (by number of transactions). +Both modes support both continuities in principle. + +### Continuous traffic + +Traffic is started without any data size goal. +Traffic is ended based on time duration, as hinted by search algorithm. +This is useful when DUT behavior does not depend on the traffic duration. +The default for stateless mode. + +### Limited traffic + +Traffic has defined data size goal (given as number of transactions), +duration is computed based on this goal. +Traffic is ended when the size goal is reached, +or when the computed duration is reached. +This is useful when DUT behavior depends on traffic size, +e.g. target number of NAT translation entries, each to be hit exactly once +per direction. +This is used mainly for stateful mode. + +## Traffic synchronicity + +Traffic can be generated synchronously (test waits for duration) +or asynchronously (test operates during traffic and stops traffic explicitly). + +### Synchronous traffic + +Trial measurement is driven by given (or precomputed) duration, +no activity from test driver during the traffic. +Used for most trials. + +### Asynchronous traffic + +Traffic is started, but then the test driver is free to perform +other actions, before stopping the traffic explicitly. +This is used mainly by reconf tests, but also by some trials +used for runtime telemetry. + +## Trafic profiles + +TRex supports several ways to define the traffic. +CSIT uses small Python modules based on Scapy as definitions. +Details of traffic profiles depend on modes (STL or ASTF), +but some are common for both modes. + +Search algorithms are intentionally unaware of the traffic mode used, +so CSIT defines some terms to use instead of mode-specific TRex terms. + +### Transactions + +TRex traffic profile defines a small number of behaviors, +in CSIT called transaction templates. Traffic profiles also instruct +TRex how to create a large number of transactions based on the templates. + +Continuous traffic loops over the generated transactions. +Limited traffic usually executes each transaction once +(typically as constant number of loops over source addresses, +each loop with different source ports). + +Currently, ASTF profiles define one transaction template each. +Number of packets expected per one transaction varies based on profile details, +as does the criterion for when a transaction is considered successful. + +Stateless transactions are just one packet (sent from one TG port, +successful if received on the other TG port). +Thus unidirectional stateless profiles define one transaction template, +bidirectional stateless profiles define two transaction templates. + +### TPS multiplier + +TRex aims to open transaction specified by the profile at a steady rate. +While TRex allows the transaction template to define its intended "cps" value, +CSIT does not specify it, so the default value of 1 is applied, +meaning TRex will open one transaction per second (and transaction template) +by default. But CSIT invocation uses "multiplier" (mult) argument +when starting the traffic, that multiplies the cps value, +meaning it acts as TPS (transactions per second) input. + +With a slight abuse of nomenclature, bidirectional stateless tests +set "packets per transaction" value to 2, just to keep the TPS semantics +as a unidirectional input value. + +### Duration stretching + +TRex can be IO-bound, CPU-bound, or have any other reason +why it is not able to generate the traffic at the requested TPS. +Some conditions are detected, leading to TRex failure, +for example when the bandwidth does not fit into the line capacity. +But many reasons are not detected. + +Unfortunately, TRex frequently reacts by not honoring the duration +in synchronous mode, taking longer to send the traffic, +leading to lower then requested load offered to DUT. +This usualy breaks assumptions used in search algorithms, +so it has to be avoided. + +For stateless traffic, the behavior is quite deterministic, +so the workaround is to apply a fictional TPS limit (max_rate) +to search algorithms, usually depending only on the NIC used. + +For stateful traffic the behavior is not deterministic enough, +for example the limit for TCP traffic depends on DUT packet loss. +In CSIT we decided to use logic similar to asynchronous traffic. +The traffic driver sleeps for a time, then stops the traffic explicitly. +The library that parses counters into measurement results +than usually treats unsent packets/transactions as lost/failed. + +We have added a IP4base tests for every NAT44ED test, +so that users can compare results. +If the results are very similar, it is probable TRex was the bottleneck. + +### Startup delay + +By investigating TRex behavior, it was found that TRex does not start +the traffic in ASTF mode immediately. There is a delay of zero traffic, +after which the traffic rate ramps up to the defined TPS value. + +It is possible to poll for counters during the traffic +(fist nonzero means traffic has started), +but that was found to influence the NDR results. + +Thus "sleep and stop" stategy is used, which needs a correction +to the computed duration so traffic is stopped after the intended +duration of real traffic. Luckily, it turns out this correction +is not dependend on traffic profile nor CPU used by TRex, +so a fixed constant (0.112 seconds) works well. +Unfortunately, the constant may depend on TRex version, +or execution environment (e.g. TRex in AWS). + +The result computations need a precise enough duration of the real traffic, +luckily server side of TRex has precise enough counter for that. + +It is unknown whether stateless traffic profiles also exhibit a startup delay. +Unfortunately, stateless mode does not have similarly precise duration counter, +so some results (mostly MRR) are affected by less precise duration measurement +in Python part of CSIT code. + +## Measuring Latency + +If measurement of latency is requested, two more packet streams are +created (one for each direction) with TRex flow_stats parameter set to +STLFlowLatencyStats. In that case, returned statistics will also include +min/avg/max latency values and encoded HDRHistogram data. diff --git a/docs/content/infrastructure/vpp_startup_settings.md b/docs/content/infrastructure/vpp_startup_settings.md new file mode 100644 index 0000000000..7361d4b21f --- /dev/null +++ b/docs/content/infrastructure/vpp_startup_settings.md @@ -0,0 +1,44 @@ +--- +title: "VPP Startup Settings" +weight: 6 +--- + +# VPP Startup Settings + +CSIT code manipulates a number of VPP settings in startup.conf for +optimized performance. List of common settings applied to all tests and +test dependent settings follows. + +## Common Settings + +List of VPP startup.conf settings applied to all tests: + +1. heap-size - set separately for ip4, ip6, stats, main + depending on scale tested. +2. no-tx-checksum-offload - disables UDP / TCP TX checksum offload in + DPDK. Typically needed for use faster vector PMDs (together with + no-multi-seg). +3. buffers-per-numa - sets a number of memory buffers allocated + to VPP per CPU socket. VPP default is 16384. Needs to be increased for + scenarios with large number of interfaces and worker threads. To + accommodate for scale tests, CSIT is setting it to the maximum possible + value corresponding to the limit of DPDK memory mappings (currently + 256). For Xeon Skylake platforms configured with 2MB hugepages and VPP + data-size and buffer-size defaults (2048B and 2496B respectively), this + results in value of 215040 (256 * 840 = 215040, 840 * 2496B buffers fit + in 2MB hugepage). + +## Per Test Settings + +List of vpp startup.conf settings applied dynamically per test: + +1. corelist-workers - list of logical cores to run VPP + worker data plane threads. Depends on HyperThreading and core per + test configuration. +2. num-rx-queues - depends on a number of VPP threads and NIC + interfaces. +3. no-multi-seg - disables multi-segment buffers in DPDK, improves + packet throughput, but disables Jumbo MTU support. Disabled for all + tests apart from the ones that require Jumbo 9000B frame support. +4. UIO driver - depends on topology file definition. +5. QAT VFs - depends on NRThreads, each thread = 1QAT VFs. diff --git a/docs/content/introduction/_index.md b/docs/content/introduction/_index.md deleted file mode 100644 index e028786bd1..0000000000 --- a/docs/content/introduction/_index.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -bookFlatSection: true -title: "Introduction" -weight: 1 ---- \ No newline at end of file diff --git a/docs/content/introduction/automating_vpp_api_flag_day.md b/docs/content/introduction/automating_vpp_api_flag_day.md deleted file mode 100644 index 131adeab9d..0000000000 --- a/docs/content/introduction/automating_vpp_api_flag_day.md +++ /dev/null @@ -1,303 +0,0 @@ ---- -bookHidden: true -title: "VPP API Flag Day Algorithms" ---- - -# VPP API Flag Day Algorithm - -## Abstract - -This document describes the current solution to the problem of -automating the detection of VPP API changes which are not backwards -compatible with existing CSIT tests, by defining the "Flag Day" -process of deploying a new set of CSIT tests which are compatible -with the new version of the VPP API without causing a halt to the -normal VPP/CSIT operational CI process. This is initially -limited to changes in \*.api files contained in the vpp repo. -Eventually the detection algorithm could be extended to include -other integration points such as "directory" structure of stats -segment or PAPI python library dependencies. - -## Motivation - -Aside of per-release activities (release report), CSIT also provides testing -that requires somewhat tight coupling to the latest (merged but not released) -VPP code. Currently, HEAD of one project is run against somewhat older codebase -of the other project. Definition of what is the older codebase to use -is maintained by CSIT project. For older CSIT codebase, there are so-called -"oper" branches. For older VPP codebase, CSIT master HEAD contains identifiers -for "stable" VPP builds. Such older codebases are also used for verify jobs, -where HEAD of the other project is replaced by the commit under review. - -One particular type of jobs useful for VPP development is trending jobs. -They test latests VPP build with latest oper branch of CSIT, -and analytics is applied to detect regressions in preformance. -For this to work properly, VPP project needs a warning against breaking -the assumptions the current oper branch makes about VPP behavior. -In the past, the most frequent type of such breakage was API change. - -Earlier attempts to create a process to minimize breakage have focused -on creating a new verify job for VPP (called api-crc job) that -votes -1 on a change that affects CRC values for API messages CSIT uses. -The list of messages and CRC values (multiple "collections" are allowed) -is maintained in CSIT repository (in oper branch). -The process was less explicit on how should CSIT project maintain such list. -As CSIT was not willing to support two incpompatible API messages -by the same codebase (commit), there were unavoidable windows -where either trenging jobs, or CSIT verify jobs were failing. - -Practice showed that human (or infra) errors can create two kinds of breakages. -Either the unavoidable short window gets long, affecting a trending job run -or two, or the api-crc job starts giving -1 to innocent changes -because oper branch went out of sync with VPP HEAD codebase. -This second type of failure prevents any merges to VPP for a long time -(12 hours is the typical time, give time zone differences). - -The current version of this document introduces two new requirements. -Firstly, the api-crc job should not give false -1, under any -(reasonable) circumstances. That means, if a VPP change -(nor any of its unmerged ancestor commits) does not affect any CRC values -for messages used by CSIT, -1 should only mean "rebase is needed", -and rebasing to HEAD should result in +1 from the api-crc job. -Secondly, no more than one VPP change is allowed to be processed -(at the same time). - -## Naming - -It is easier to define the process after chosing shorter names -for notions that need long definition. - -Note: Everytime a single job is mentioned, -in practice it can be a set of jobs covering parts of functionality. -A "run" of the set of jobs passes only if each job within the set -has been run (again) and passed. - -## Jobs - -+ A *vpp verify* job: Any job run automatically, and voting on open VPP changes. - Some verify jobs compile and package VPP for target operating system - and processor architecture, the packages are NOT archived (currently). - They should be cached somewhere in future to speed up in downstream jobs, - but currently each such downstream job can clone and build. - -+ The *api-crc* job: Quick verify job for VPP changes, that accesses - CSIT repository (checkout latest oper branch HEAD) to figure out - whether merging the change is safe from CSIT point of view. - Here, -1 means CSIT is not ready. +1 means CSIT looks to be ready - for the new CRC values, but there still may be failures on real tests. - -+ A *trending* job: Any job that is started by timer and performs testing. - It checkouts CSIT latest oper branch HEAD, downloads the most recent - completely uploaded VPP package, and unconditionally runs the tests. - CRC checks are optional, ideally only written to console log - without otherwise affecting the test cases. - -+ A *vpp-csit* job: A slower verify job for VPP changes, that accesses CSIT - repository and runs tests from the correct CSIT commit (chosen as in trending) - against the VPP (built from the VPP patch under review). - Vote -1 means there were test failures. +1 means no test failures, meaning - there either was no API change, or it was backward compatible. - -+ A *csit-vpp* job: Verify job for open CSIT changes. Downloads the - (completely uploaded) VPP package marked as "stable", and runs a selection - of tests (from the CSIT patch under review). - Vote +1 means all tests have passed, so it is safe to merge - the patch under review. - -+ A *patch-on-patch* job: Manually triggered non-voting job - for open CSIT changes. Compiles and packages from VPP source - (usually of an unmerged change). Then runs the same tests as csit-vpp job. - This job is used to prove the CSIT patch under review is supporting - the specified VPP code. - In practice, this can be a vpp-csit job started with CSIT_REF set. - -+ A *manual verification* is done by a CSIT committer, locally executing steps - equivalent to the patch-on-patch job. This can to save time and resources. - -## CRC Collections - -Any commit in/for the CSIT repository contains a file (supported_crcs.yaml), -which contains either one or two collections. A collection is a mapping -that maps API message name to its CRC value. - -A collection name specifies which VPP build is this collection for. -An API message name is present in a collection if and only if -it is used by a test implementation (can be in different CSIT commit) -targeted at the VPP build (pointed out by the collection name). - -+ The *stable collection*: Usually required, listed first, has comments and name - pointing to the VPP build this CSIT commit marks as stable. - The stable collection is only missing in deactivating changes (see below) - when not mergeable yet. - -+ The *active collection*: Optional, listed second, has comments and name - pointing to the VPP Gerrit (including patch set number) - the currently active API process is processing. - The patch set number part can be behind the actual Gerrit state. - This is safe, because api-crc job on the active API change will fail - if the older patch is no longer API-equivalent to the newer patch. - -## Changes - -+ An *API change*: The name for any Gerrit Change for VPP repository - that does not pass api-crc job right away, and needs this whole process. - This usually means .api files are edited, but a patch that affects - the way CRC values are computed is also an API change. - - Full name could be VPP API Change, but as no CSIT change is named "API change" - (and this document does not talk about other FD.io or external projects), - "API change" is shorter. - -+ A *blocked change*: The name for open Gerrit Change for VPP repository - that got -1 from some of voting verify jobs. - -+ A *VPP-blocked change": A blocked change which got -1 from some "pure VPP" - verify job, meaning no CSIT code has been involved in the vote. - Example: "make test" fails. - - VPP contributor is expected to fix the change, or VPP developers - are expected to found a cause in an earlier VPP change, and fix it. - No interaction with CSIT developers is necessary. - -+ A *CSIT-blocked change*: A blocked change which is not VPP-blocked, - but does not pass some vpp-csit job. - To fix a CSIT-blocked change, an interaction with a CSIT committer - is usually necessary. Even if a VPP developer is experienced enough - to identify the cause of the failure, a merge to CSIT is usually needed - for a full fix. - - This process does not specify what to do with CSIT-blocked changes - that are not also API changes. - -+ A *candidate API change*: An API change that meets all requirements - to become active (see below). Currently, the requirements are: - - + No -1 nor -2 from from any human reviewer. - - + All verify jobs (except vpp-csit ones) pass. - - + +1 from a VPP committer. - - The reason is to avoid situations where an API change becomes active, - but the VPP committers are unwilling to merge it for some reason. - -+ The *active API change*: The candidate API change currently being processed - by the API Flag Day Algorithm. - While many API changes can be candidates at the same time, - only one is allowed be active at a time. - -+ The *activating change*: The name for a Gerrit Change for CSIT repository - that does not change the test code, but adds the active CRC collection. - Merge of the opening change (to latest CSIT oper branch) defines - which API change has become active. - -+ The *deactivating change*: The name for Gerrit Change for CSIT repository - that only supports tests and CRC values for VPP with the active API change. - That implies the previously stable CRC collection is deleted, - and any edits to the test implementation are done here. - -+ The *mergeable deactivating change*: The deactivating change with additional - requirements. Details on the requirements are listed in the next section. - Merging this change finishes the process for the active API change. - -It is possible for a single CSIT change to act both as a mergeable -deactivating change for one API change, and as an activating change -for another API change. As English lacks a good adjective for such a thing, -this document does not name this change. -When this documents says a change is activating or deactivating, -it allows the possibility for the change to fullfill also other purposes -(e.g. acting as deactivating / activating change for another API change). - -## Algorithm Steps - -The following steps describe the application of the API "Flag Day" algorithm: - -#. A VPP patch for an API change is submitted to - gerrit for review. -#. The api-crc job detects the API CRC values have changed - for some messages used by CSIT. -#. The api-crc job runs in parallel with any other vpp-csit verify job, - so those other jobs can hint at the impact on CSIT. - Currently, any such vpp-csit job is non-voting, - as the current process does not guarantee such jobs passes - when the API change is merged. -#. If the api-crc job fails, an email with the appropriate reason - is sent to the VPP patch submitter and vpp-api-dev@lists.fd.io - including the VPP patch information and .api files that are edited. -#. The VPP patch developer works with a VPP committer - to ensure the patch meets requirements to become a candidate (see above). -#. The VPP patch developer and CSIT team create a CSIT JIRA ticket - to identify the work required to support the new VPP API version. -#. CSIT developer creates a patch of the deactivating change - (upload to Gerrit not required yet). -#. CSIT developer runs patch-on-patch job (or manual verification). - Both developers iterate until the verification passes. - Note that in this phase csit-vpp job is expected to vote -1, - as the deactivating change is not mergeable yet. -#. CSIT developer creates the activating change, uploads to Gerrit, - waits for vote (usual review cycle applies). -#. When CSIT committer is satisfied, the activating change is merged - to CSIT master branch and cherry-picked to the latest oper branch. - This enters a "critical section" of the process. - Merges of other activating changes are not allowed from now on. - The targeted API change becomes the active API change. - This does not break any jobs. -#. VPP developer (or CSIT committer) issues a recheck on the VPP patch. -#. On failure, VPP and CSIT committers analyze what went wrong. - Typically, the active CRC collection is matching only an older patch set, - but a newer patch set needs different CRC values. - Either due to improvements on the VPP change in question, - or due to a rebase over previously merged (unrelated) API change. - VPP perhaps needs to rebase, and CSIT definitely needs - to merge edits to the active collection. Then issue a recheck again, - and iterate until success. -#. On success, VPP Committer merges the active API change patch. - (This is also a delayed verification of the current active CRC collection.) -#. VPP committer sends an e-mail to vpp-api-dev stating the support for - the previous CRC values will soon be removed, implying other changes - (whether API or not) should be rebased soon. -#. VPP merge jobs create and upload new VPP packages. - This breaks trending jobs, but both VPP and CSIT verify jobs still work. -#. CSIT developer makes the deactivating change mergeable: - The stable VPP build indicator is bumped to the build - that contains the active API change. The active CRC collection - (added by the activating change) is renamed to the new stable collection. - (The previous stable collection has already been deleted.) - At this time, the deactivating change should be uploaded to Gerrit and - csit verify jobs should be triggered. -#. CSIT committer reviews the code, perhaps triggering any additional jobs - needed to verify the tests using the edited APIs are still working. -#. When satisfied, CSIT committer merges the mergeable deactivating change - (to both master and oper). - The merge fixes trending jobs. VPP and CSIT verify jobs continue to work. - The merge also breaks some verify jobs for old changes in VPP, - as announced when the active API change was merged. - The merge is the point where the process leaves the "critical section", - thus allowing merges of activating changes for other API changes. -#. CSIT committer sends an e-mail to vpp-api-dev stating the support for - the previous CRC values has been removed, and rebase is needed - for all affected VPP changes. -#. Recheck of existing VPP patches in gerrit may cause the "VPP - API Incompatible Change Test" to send an email to the patch - submitter to rebase the patch to pick up the compatible VPP API - version files. - -### Real life examples - -Simple API change: https://gerrit.fd.io/r/c/vpp/+/23829 - -Activating change: https://gerrit.fd.io/r/c/csit/+/23956 - -Mergeable deactivating change: https://gerrit.fd.io/r/c/csit/+/24280 - -Less straightforward mergeable deactivating change: -https://gerrit.fd.io/r/c/csit/+/22526 -It shows: - -+ Crc edits: supported_crcs.yaml -+ Version bump: VPP_STABLE_VER_UBUNTU_BIONIC -+ And even a way to work around failing tests: - eth2p-ethicmpv4-ip4base-eth-1tap-dev.robot - -Simple change that is both deactivating and activating: -https://gerrit.fd.io/r/c/csit/+/23969 diff --git a/docs/content/introduction/bash_code_style.md b/docs/content/introduction/bash_code_style.md deleted file mode 100644 index bbd0c37196..0000000000 --- a/docs/content/introduction/bash_code_style.md +++ /dev/null @@ -1,651 +0,0 @@ ---- -bookHidden: true -title: "Bash Code Style" ---- - -The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", -"SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", -"MAY", and "OPTIONAL" in this document are to be interpreted as -described in [BCP 14](https://tools.ietf.org/html/bcp14), -[RFC2119](https://tools.ietf.org/html/rfc2119), -[RFC8174](https://tools.ietf.org/html/rfc8174) -when, and only when, they appear in all capitals, as shown here. - -This document SHALL describe guidelines for writing reliable, maintainable, -reusable and readable code for CSIT. - -# Proposed Style - -# File Types - -Bash files SHOULD NOT be monolithic. Generally, this document -considers two types of bash files: - -+ Entry script: Assumed to be called by user, - or a script "external" in some way. - - + Sources bash libraries and calls functions defined there. - -+ Library file: To be sourced by entry scipts, possibly also by other libraries. - - + Sources other libraries for functions it needs. - - + Or relies on a related file already having sourced that. - - + Documentation SHALL imply which case it is. - - + Defines multiple functions other scripts can call. - -# Safety - -+ Variable expansions MUST be quoted, to prevent word splitting. - - + This includes special "variables" such as "${1}". - - + RECOMMENDED even if the value is safe, as in "$?" and "$#". - - + It is RECOMMENDED to quote strings in general, - so text editors can syntax-highlight them. - - + Even if the string is a numeric value. - - + Commands and known options can get their own highlight, no need to quote. - - + Example: You do not need to quote every word of - "pip install --upgrade virtualenv". - - + Code SHALL NOT quote glob characters you need to expand (obviously). - - + OPTIONALLY do not quote adjacent characters (such as dot or fore-slash), - so that syntax highlighting makes them stand out compared to surrounding - ordinary strings. - - + Example: cp "logs"/*."log" "."/ - - + Command substitution on right hand side of assignment are safe - without quotes. - - + Note that command substitution limits the scope for quotes, - so it is NOT REQUIRED to escape the quotes in deeper levels. - - + Both backtics and "dollar round-bracket" provide command substitution. - The folowing rules are RECOMMENDED: - - + For simple constructs, use "dollar round-bracket". - - + If there are round brackets in the surrounding text, use backticks, - as some editor highlighting logic can get confused. - - + Avoid nested command substitution. - - + Put intermediate results into local variables, - use "|| die" on each step of command substitution. - - + Code SHOULD NOT be structured in a way where - word splitting is intended. - - + Example: Variable holding string of multiple command lines arguments. - - + Solution: Array variable should be used in this case. - - + Expansion MUST use quotes then: "${name[@]}". - - + Word splitting MAY be used when creating arrays from command substitution. - -+ Code MUST always check the exit code of commands. - - + Traditionally, error code checking is done either by "set -e" - or by appending "|| die" after each command. - The first is unreliable, due to many rules affecting "set -e" behavior - (see ), but "|| die" - relies on humans identifying each command, which is also unreliable. - When was the last time you checked error code of "echo" command, - for example? - - + Another example: "set -e" in your function has no effect - if any ancestor call is done with logical or, - for example in "func || code=$?" construct. - - + As there is no reliable method of error detection, and there are two - largely independent unreliable methods, the best what we can do - is to apply both. So, code SHOULD explicitly - check each command (with "|| die" and similar) AND have "set -e" applied. - - + Code MUST explicitly check each command, unless the command is well known, - and considered safe (such as the aforementioned "echo"). - - + The well known commands MUST still be checked implicitly via "set -e". - - + See below for specific "set -e" recommendations. - -+ Code SHOULD use "readlink -e" (or "-f" if target does not exist yet) - to normalize any path value to absolute path without symlinks. - It helps with debugging and identifies malformed paths. - -+ Code SHOULD use such normalized paths for sourcing. - -+ When exiting on a known error, code MUST print a longer, helpful message, - in order for the user to fix their situation if possible. - -+ When error happens at an unexpected place, it is RECOMMENDED for the message - to be short and generic, instead of speculative. - -# Bash Options - -+ Code MUST apply "-x" to make debugging easier. - - + Code MAY temporarily supress such output in order to avoid spam - (e.g. in long busy loops), but it is still NOT RECOMMENDED to do so. - -+ Code MUST apply "-e" for early error detection. - - + But code still SHOULD use "|| die" for most commands, - as "-e" has numerous rules and exceptions. - - + Code MAY apply "+e" temporarily for commands which (possibly nonzero) - exit code it interested in. - - + Code MUST to store "$?" and call "set -e" immediatelly afterwards. - - + Code MUST NOT use this approach when calling functions. - - + That is because functions are instructed to apply "set -e" on their own - which (when triggered) will exit the whole entry script. - - + Unless overriden by ERR trap. - But code SHOULD NOT set any ERR trap. - - + If code needs exit code of a function, it is RECOMMENDED to use - pattern 'code="0"; called_function || code="${?}"'. - - + In this case, contributor MUST make sure nothing in the - called_function sub-graph relies on "set -e" behavior, - because the call being part of "or construct" disables it. - - + Code MAY append "|| true" for benign commands, - when it is clear non-zero exit codes make no difference. - - + Also in this case, the contributor MUST make sure nothing within - the called sub-graph depends on "set -e", as it is disabled. - -+ Code MUST apply "-u" as unset variable is generally a typo, thus an error. - - + Code MAY temporarily apply "+u" if a command needs that to pass. - - + Virtualenv activation is the only known example so far. - -+ Code MUST apply "-o pipefail" to make sure "-e" picks errors - inside piped construct. - - + Code MAY use "|| true" inside a pipe construct, in the (inprobable) case - when non-zero exit code still results in a meaningful pipe output. - -+ All together: "set -exuo pipefail". - - + Code MUST put that line near start of every file, so we are sure - the options are applied no matter what. - - + "Near start" means "before any nontrivial code". - - + Basically only copyright is RECOMMENDED to appear before. - - + Also code MUST put the line near start of function bodies - and subshell invocations. - -# Functions - -There are (at least) two possibilities how a code from an external file -can be executed. Either the file contains a code block to execute -on each "source" invocation, or the file just defines functions -which have to be called separately. - -This document considers the "function way" to be better, -here are some pros and cons: - -+ Cons: - - + The function way takes more space. Files have more lines, - and the code in function body is one indent deeper. - - + It is not easy to create functions for low-level argument manipulation, - as "shift" command in the function code does not affect the caller context. - - + Call sites frequently refer to code two times, - when sourcing the definition and when executing the function. - - + It is not clear when a library can rely on its relative - to have performed the sourcing already. - - + Ideally, each library should detect if it has been sourced already - and return early, which takes even more space. - -+ Pros: - - + Some code blocks are more useful when used as function, - to make call site shorter. - - + Examples: Trap functions, "die" function. - - + The "import" part and "function" part usually have different side effects, - making the documentation more focused (even if longer overall). - - + There is zero risk of argument-less invocation picking arguments - from parent context. - - + This safety feature is the main reason for chosing the "function way". - - + This allows code blocks to support optional arguments. - -+ Rules: - - + Library files MUST be only "source"d. For example if "tox" calls a script, - it is an entry script. - - + Library files (upon sourcing) MUST minimize size effect. - - + The only permitted side effects MUST by directly related to: - - + Defining functions (without executing them). - - + Sourcing sub-library files. - - + If a bash script indirectly call another bash script, - it is not a "source" operation, variables are not shared, - so the called script MUST be considered an entry script, - even if it implements logic fitting into a single function. - - + Entry scripts SHOULD avoid duplicating any logic. - - + Clear duplicated blocks MUST be moved into libraries as functions. - - + Blocks with low amount of duplication MAY remain in entry scripts. - - + Usual motives for not creating functions are: - - + The extracted function would have too much logic for processing - arguments (instead of hardcoding values as in entry script). - - + The arguments needed would be too verbose. - - + And using "set +x" would take too much vertical space - (when compared to entry script implementation). - -# Variables - -This document describes two kinds of variables: called "local" and "global". - -+ Local variables: - - + Variable name MUST contain only lower case letters, digits and underscores. - - + Code MUST NOT export local variables. - - + Code MUST NOT rely on local variables set in different contexts. - - + Documentation is NOT REQUIRED. - - + Variable name SHOULD be descriptive enough. - - + Local variable MUST be initialized before first use. - - + Code SHOULD have a comment if a reader might have missed - the initialization. - - + Unset local variables when leaving the function. - - + Explicitly typeset by "local" builtin command. - - + Require strict naming convention, e.g. function_name__variable_name. - -+ Global variables: - - + Variable name MUST contain only upper case letters, digits and underscores. - - + They SHOULD NOT be exported, unless external commands need them - (e.g. PYTHONPATH). - - + Code MUST document if a function (or its inner call) - reads a global variable. - - + Code MUST document if a function (or its inner call) - sets or rewrites a global variable. - - + If a function "wants to return a value", it SHOULD be implemented - as the function setting (or rewriting) a global variable, - and the call sites reading that variable. - - + If a function "wants to accept an argument", it IS RECOMMENDED - to be implemented as the call sites setting or rewriting global variables, - and the function reading that variables. - But see below for direct arguments. - -+ Code MUST use curly brackets when referencing variables, - e.g. "${my_variable}". - - + It makes related constructs (such as ${name:-default}) less surprising. - - + It looks more similar to Robot Framework variables (which is good). - -# Arguments - -Bash scripts and functions MAY accept arguments, named "${1}", "${2}" and so on. -As a whole available via "$@". -You MAY use "shift" command to consume an argument. - -## Contexts - -Functions never have access to parent arguments, but they can read and write -variables set or read by parent contexts. - -### Arguments Or Variables - -+ Both arguments and global variables MAY act as an input. - -+ In general, if the caller is likely to supply the value already placed - in a global variable of known name, it is RECOMMENDED - to use that global variable. - -+ Construct "${NAME:-value}" can be used equally well for arguments, - so default values are possible for both input methods. - -+ Arguments are positional, so there are restrictions on which input - is optional. - -+ Functions SHOULD either look at arguments (possibly also - reading global variables to use as defaults), or look at variables only. - -+ Code MUST NOT rely on "${0}", it SHOULD use "${BASH_SOURCE[0]}" instead - (and apply "readlink -e") to get the current block location. - -+ For entry scripts, it is RECOMMENDED to use standard parsing capabilities. - - + For most Linux distros, "getopt" is RECOMMENDED. - -# Working Directory Handling - -+ Functions SHOULD act correctly without neither assuming - what the currect working directory is, nor changing it. - - + That is why global variables and arguments SHOULD contain - (normalized) full paths. - - + Motivation: Different call sites MAY rely on different working directories. - -+ A function MAY return (also with nonzero exit code) when working directory - is changed. - - + In this case the function documentation MUST clearly state where (and when) - is the working directory changed. - - + Exception: Functions with undocumented exit code. - - + Those functions MUST return nonzero code only on "set -e" or "die". - - + Note that both "set -e" and "die" by default result in exit of the whole - entry script, but the caller MAY have altered that behavior - (by registering ERR trap, or redefining die function). - - + Any callers which use "set +e" or "|| true" MUST make sure - their (and their caller ancestors') assumption on working directory - are not affected. - - + Such callers SHOULD do that by restoring the original working directory - either in their code, - - + or contributors SHOULD do such restoration in the function code, - (see below) if that is more convenient. - - + Motivation: Callers MAY rely on this side effect to simplify their logic. - -+ A function MAY assume a particular directory is already set - as the working directory (to save space). - - + In this case function documentation MUST clearly state what the assumed - working directory is. - - + Motivation: Callers MAY call several functions with common - directory of interest. - - + Example: Several dowload actions to execute in sequence, - implemented as functions assuming ${DOWNLOAD_DIR} - is the working directory. - -+ A function MAY change the working directory transiently, - before restoring it back before return. - - + Such functions SHOULD use command "pushd" to change the working directory. - - + Such functions SHOULD use "trap 'trap - RETURN; popd' RETURN" - imediately after the pushd. - - + In that case, the "trap - RETURN" part MUST be included, - to restore any trap set by ancestor. - - + Functions MAY call "trap - RETURN; popd" exlicitly. - - + Such functions MUST NOT call another pushd (before an explicit popd), - as traps do not stack within a function. - -+ If entry scripts also use traps to restore working directory (or other state), - they SHOULD use EXIT traps instead. - - + That is because "exit" command, as well as the default behavior - of "die" or "set -e" cause direct exit (without skipping function returns). - -# Function Size - -+ In general, code SHOULD follow reasoning similar to how pylint - limits code complexity. - -+ It is RECOMMENDED to have functions somewhat simpler than Python functions, - as Bash is generally more verbose and less readable. - -+ If code contains comments in order to partition a block - into sub-blocks, the sub-blocks SHOULD be moved into separate functions. - - + Unless the sub-blocks are essentially one-liners, - not readable just because external commands do not have - obvious enough parameters. Use common sense. - -# Documentation - -+ The library path and filename is visible from source sites. It SHOULD be - descriptive enough, so reader do not need to look inside to determine - how and why is the sourced file used. - - + If code would use several functions with similar names, - it is RECOMMENDED to create a (well-named) sub-library for them. - - + Code MAY create deep library trees if needed, it SHOULD store - common path prefixes into global variables to make sourcing easier. - - + Contributors, look at other files in the subdirectory. You SHOULD - improve their filenames when adding-removing other filenames. - - + Library files SHOULD NOT have executable flag set. - - + Library files SHOULD have an extension .sh (or perhaps .bash). - - + It is RECOMMENDED for entry scripts to also have executable flag unset - and have .sh extension. - -+ Each entry script MUST start with a shebang. - - + "#!/bin/usr/env bash" is RECOMMENDED. - - + Code SHOULD put an empty line after shebang. - - + Library files SHOULD NOT contain a shebang, as "source" is the primary - method to include them. - -+ Following that, there SHOULD be a block of comment lines with copyright. - - + It is a boilerplate, but human eyes are good at ignoring it. - - + Overhead for git is also negligible. - -+ Following that, there MUST be "set -exuo pipefail". - - + It acts as an anchor for humans to start paying attention. - -Then it depends on script type. - -## Library Documentation - -+ Following "set -exuo pipefail" SHALL come the "import part" documentation. - -+ Then SHALL be the import code - ("source" commands and a bare minimum they need). - -+ Then SHALL be the function definitions, and inside: - - + The body SHALL sart with the function documentation explaining API contract. - Similar to Robot [Documentation] or Python function-level docstring. - - + See below. - - + "set -exuo pipefail" SHALL be the first executable line - in the function body, except functions which legitimely need - different flags. Those SHALL also start with appropriate "set" command(s). - - + Lines containing code itself SHALL follow. - - + "Code itself" SHALL include comment lines - explaining any non-obvious logic. - - + There SHALL be two empty lines between function definitions. - -More details on function documentation: - -Generally, code SHOULD use comments to explain anything -not obvious from the funtion name. - -+ Function documentation SHOULD start with short description of function - operation or motivation, but only if not obvious from function name. - -+ Documentation SHOULD continue with listing any non-obvious side effect: - - + Documentation MUST list all read global variables. - - + Documentation SHOULD include descriptions of semantics - of global variable values. - It is RECOMMENDED to mention which function is supposed to set them. - - + The "include descriptions" part SHOULD apply to other items as well. - - + Documentation MUST list all global variables set, unset, reset, - or otherwise updated. - - + It is RECOMMENDED to list all hardcoded values used in code. - - + Not critical, but can hint at future improvements. - - + Documentation MUST list all files or directories read - (so caller can make sure their content is ready). - - + Documentation MUST list all files or directories updated - (created, deleted, emptied, otherwise edited). - - + Documentation SHOULD list all functions called (so reader can look them up). - - + Documentation SHOULD mention where are the functions defined, - if not in the current file. - - + Documentation SHOULD list all external commands executed. - - + Because their behavior can change "out of bounds", meaning - the contributor changing the implementation of the extrenal command - can be unaware of this particular function interested in its side effects. - - + Documentation SHOULD explain exit code (coming from - the last executed command). - - + Usually, most functions SHOULD be "pass or die", - but some callers MAY be interested in nonzero exit codes - without using global variables to store them. - - + Remember, "exit 1" ends not only the function, but all scripts - in the source chain, so code MUST NOT use it for other purposes. - - + Code SHOULD call "die" function instead. This way the caller can - redefine that function, if there is a good reason for not exiting - on function failure. - -## Entry Script Documentation - -+ After "set -exuo pipefail", high-level description SHALL come. - - + Entry scripts are rarely reused, so detailed side effects - are OPTIONAL to document. - - + But code SHOULD document the primary side effects. - -+ Then SHALL come few commented lines to import the library with "die" function. - -+ Then block of "source" commands for sourcing other libraries needed SHALL be. - - + In alphabetical order, any "special" library SHOULD be - in the previous block (for "die"). - -+ Then block os commands processing arguments SHOULD be (if needed). - -+ Then SHALL come block of function calls (with parameters as needed). - -# Other General Recommendations - -+ Code SHOULD NOT not repeat itself, even in documentation: - - + For hardcoded values, a general description SHOULD be written - (instead of copying the value), so when someone edits the value - in the code, the description still applies. - - + If affected directory name is taken from a global variable, - documentation MAY distribute the directory description - over the two items. - - + If most of side effects come from an inner call, - documentation MAY point the reader to the documentation - of the called function (instead of listing all the side effects). - -+ But documentation SHOULD repeat it if the information crosses functions. - - + Item description MUST NOT be skipped just because the reader - should have read parent/child documentation already. - - + Frequently it is RECOMMENDED to copy&paste item descriptions - between functions. - - + But sometimes it is RECOMMENDED to vary the descriptions. For example: - - + A global variable setter MAY document how does it figure out the value - (without caring about what it will be used for by other functions). - - + A global variable reader MAY document how does it use the value - (without caring about how has it been figured out by the setter). - -+ When possible, Bash code SHOULD be made to look like Python - (or Robot Framework). Those are three primary languages CSIT code relies on, - so it is nicer for the readers to see similar expressions when possible. - Examples: - - + Code MUST use indentation, 1 level is 4 spaces. - - + Code SHOULD use "if" instead of "&&" constructs. - - + For comparisons, code SHOULD use operators such as "!=" (needs "[["). - -+ Code MUST NOT use more than 80 characters per line. - - + If long external command invocations are needed, - code SHOULD use array variables to shorten them. - - + If long strings (or arrays) are needed, code SHOULD use "+=" operator - to grow the value over multiple lines. - - + If "|| die" does not fit with the command, code SHOULD use curly braces: - - + Current line has "|| {", - - + Next line has the die commands (indented one level deeper), - - + Final line closes with "}" at original intent level. diff --git a/docs/content/introduction/branches.md b/docs/content/introduction/branches.md deleted file mode 100644 index 20759b9c78..0000000000 --- a/docs/content/introduction/branches.md +++ /dev/null @@ -1,192 +0,0 @@ ---- -bookHidden: true -title: "Git Branches in CSIT" ---- - -# Git Branches in CSIT - -## Overview - -This document describes how to create and remove git branches in CSIT project. - -To be able to perform everything described in this file, you must be **logged -in as a committer**. - -## Operational Branches - -For more information about operational branches see -[CSIT/Branching Strategy](https://wiki.fd.io/view/CSIT/Branching_Strategy) and -[CSIT/Jobs](https://wiki.fd.io/view/CSIT/Jobs) on -[fd.io](https://fd.io) [wiki](https://wiki.fd.io/view/CSIT) pages. - -> Note: The branch `rls2009_lts` is used here only as an example. - -### Pre-requisites - -1. The last builds of weekly and semiweekly jobs must finish with status - *"Success"*. -1. If any of watched jobs failed, try to find the root cause, fix it and run it - again. - -The watched jobs are: - -- master: - - [csit-vpp-device-master-ubuntu1804-1n-skx-weekly](https://jenkins.fd.io/view/csit/job/csit-vpp-device-master-ubuntu1804-1n-skx-weekly) - - [csit-vpp-device-master-ubuntu1804-1n-skx-semiweekly](https://jenkins.fd.io/view/csit/job/csit-vpp-device-master-ubuntu1804-1n-skx-semiweekly) -- 2009_lts: - - [csit-vpp-device-2009_lts-ubuntu1804-1n-skx-weekly](https://jenkins.fd.io/view/csit/job/csit-vpp-device-2009_lts-ubuntu1804-1n-skx-weekly) - - [csit-vpp-device-2009_lts-ubuntu1804-1n-skx-semiweekly](https://jenkins.fd.io/view/csit/job/csit-vpp-device-2009_lts-ubuntu1804-1n-skx-semiweekly) - -### Procedure - -**A. CSIT Operational Branch** -1. Take the revision string from the last successful build of the **weekly** - job, e.g. **Revision**: 0f9b20775b4a656b67c7039e2dda4cf676af2b21. -1. Open [Gerrit](https://gerrit.fd.io). -1. Go to - [Browse --> Repositories --> csit --> Branches](https://gerrit.fd.io/r/admin/repos/csit,branches). -1. Click `CREATE NEW`. -1. Fill in the revision number and the name of the new operational branch. Its - format is: `oper-YYMMDD` for master and `oper-rls{RELEASE}-{YYMMDD}` or - `oper-rls{RELEASE}_lts-{YYMMDD}` for release branches. -1. Click "CREATE". -1. If needed, delete old operational branches by clicking "DELETE". - -**B. VPP Stable version** -1. Open the console log of the last successful **semiweekly** build and search - for VPP version (e.g. vpp_21 ...). -1. You should find the string with this structure: - `vpp_21.01-rc0~469-g7acab3790~b368_amd64.deb` -1. Modify [VPP_STABLE_VER_UBUNTU_BIONIC](../../VPP_STABLE_VER_UBUNTU_BIONIC) - and [VPP_STABLE_VER_CENTOS](../../VPP_STABLE_VER_CENTOS) files. -1. Use a string with the build number, e.g. `21.01-rc0~469_g7acab3790~b129` - for [VPP_STABLE_VER_CENTOS](../../VPP_STABLE_VER_CENTOS) and a string - without the build number, e.g. `21.01-rc0~469_g7acab3790` for - [VPP_STABLE_VER_UBUNTU_BIONIC](../../VPP_STABLE_VER_UBUNTU_BIONIC). -1. Update the stable versions in master and in all LTS branches. - -## Release Branches - -> Note: VPP release 21.01 is used here only as an example. - -### Pre-requisites - -1. VPP release manager sends the information email to announce that the RC1 - milestone for VPP {release}, e.g. 21.01, is complete, and the artifacts are - available. -1. The artifacts (*.deb and *.rpm) should be available at - `https://packagecloud.io/fdio/{release}`. For example see artifacts for the - [VPP release 20.01](https://packagecloud.io/fdio/2101). The last available - build is to be used. -1. All CSIT patches for the release are merged in CSIT master branch. - -### Procedure - -**A. Release branch** - -1. Open [Gerrit](https://gerrit.fd.io). -1. Go to - [Browse --> Repositories --> csit --> Branches](https://gerrit.fd.io/r/admin/repos/csit,branches). -1. Save the revision string of master for further use. -1. Click `CREATE NEW`. -1. Fill in the revision number and the name of the new release branch. Its - format is: `rlsYYMM`, e.g. rls2101. -1. Click "CREATE". - -**B. Jenkins jobs** - -See ["Add CSIT rls2101 branch"](https://gerrit.fd.io/r/c/ci-management/+/30439) -and ["Add report jobs to csit rls2101 branch"](https://gerrit.fd.io/r/c/ci-management/+/30462) -patches as an example. - -1. [csit.yaml](https://github.com/FDio/ci-management/blob/master/jjb/csit/csit.yaml): - Documentation of the source code and the Report - - Add release branch (rls2101) for `csit-docs-merge-{stream}` and - `csit-report-merge-{stream}` (project --> stream). -1. [csit-perf.yaml](https://github.com/FDio/ci-management/blob/master/jjb/csit/csit-perf.yaml): - Verify jobs - - Add release branch (rls2101) to `project --> jobs --> - csit-vpp-perf-verify-{stream}-{node-arch} --> stream`. - - Add release branch (rls2101) to `project --> project: 'csit' --> stream`. - - Add release branch (rls2101) to `project --> project: 'csit' --> stream_report`. -1. [csit-tox.yaml](https://github.com/FDio/ci-management/blob/master/jjb/csit/csit-tox.yaml): - tox - - Add release branch (rls2101) to `project --> stream`. -1. [csit-vpp-device.yaml](https://github.com/FDio/ci-management/blob/master/jjb/csit/csit-vpp-device.yaml): - csit-vpp-device - - Add release branch (rls2101) to `project --> jobs (weekly / semiweekly) --> stream`. - - Add release branch (rls2101) to `project --> project: 'csit' --> stream`. - -**C. VPP Stable version** - -See the patch -[Update of VPP_REPO_URL and VPP_STABLE_VER files](https://gerrit.fd.io/r/c/csit/+/30461) -and / or -[rls2101: Update VPP_STABLE_VER files to release version](https://gerrit.fd.io/r/c/csit/+/30976) -as an example. - -1. Find the last successful build on the - [Package Cloud](https://packagecloud.io) for the release, e.g. - [VPP release 20.01](https://packagecloud.io/fdio/2101). -1. Clone the release branch to your PC: - `git clone --depth 1 ssh://@gerrit.fd.io:29418/csit --branch rls{RELEASE}` -1. Modify [VPP_STABLE_VER_UBUNTU_BIONIC](../../VPP_STABLE_VER_UBUNTU_BIONIC) - and [VPP_STABLE_VER_CENTOS](../../VPP_STABLE_VER_CENTOS) files with the last - successful build. -1. Modify [VPP_REPO_URL](../../VPP_REPO_URL) to point to the new release, e.g. - `https://packagecloud.io/install/repositories/fdio/2101`. -1. You can also modify the [.gitreview](../../.gitreview) file and set the new - default branch. -1. Wait until the verify jobs - - [csit-vpp-device-2101-ubuntu1804-1n-skx](https://jenkins.fd.io/job/csit-vpp-device-2101-ubuntu1804-1n-skx) - - [csit-vpp-device-2101-ubuntu1804-1n-tx2](https://jenkins.fd.io/job/csit-vpp-device-2101-ubuntu1804-1n-tx2) - - successfully finish and merge the patch. - -**D. CSIT Operational Branch** - -1. Manually start (Build with Parameters) the weekly job - [csit-vpp-device-2101-ubuntu1804-1n-skx-weekly](https://jenkins.fd.io/view/csit/job/csit-vpp-device-2101-ubuntu1804-1n-skx-weekly) -1. When it successfully finishes, take the revision string e.g. **Revision**: - 876b6c1ae05bfb1ad54ff253ea021f3b46780fd4 to create a new operational branch - for the new release. -1. Open [Gerrit](https://gerrit.fd.io). -1. Go to - [Browse --> Repositories --> csit --> Branches](https://gerrit.fd.io/r/admin/repos/csit,branches). -1. Click `CREATE NEW`. -1. Fill in the revision number and the name of the new operational branch. Its - format is: `oper-rls{RELEASE}-YYMMDD` e.g. `oper-rls2101-201217`. -1. Click "CREATE". -1. Manually start (Build with Parameters) the semiweekly job - [csit-vpp-device-2101-ubuntu1804-1n-skx-semiweekly](https://jenkins.fd.io/view/csit/job/csit-vpp-device-2101-ubuntu1804-1n-skx-semiweekly) -1. When it successfully finishes check in console log if it used the right VPP - version (search for `VPP_VERSION=`) from the right repository (search for - `REPO_URL=`). - -**E. Announcement** - -If everything is as it should be, send the announcement email to -`csit-dev@lists.fd.io` mailing list. - -*Example:* - -Subject: -```text -CSIT rls2101 branch pulled out -``` - -Body: -```text -CSIT rls2101 branch [0] is created and fully functional. - -Corresponding operational branch (oper-rls2101-201217) has been created too. - -We are starting dry runs for performance ndrpdr iterative tests to get initial -ndrpdr values with available rc1 packages as well as to test all the infra -before starting report data collection runs. - -Regards, - - -[0] https://git.fd.io/csit/log/?h=rls2101 -``` diff --git a/docs/content/introduction/dashboard_history.md b/docs/content/introduction/dashboard_history.md deleted file mode 100644 index f7f9db576a..0000000000 --- a/docs/content/introduction/dashboard_history.md +++ /dev/null @@ -1,17 +0,0 @@ ---- -title: "Dashboard History" -weight: 1 ---- - -# Dashboard History - -FD.io {{< release_csit >}} Dashboard History and per .[ww] revision changes are -listed below. - - **.[ww] Revision** | **Changes** ---------------------|------------------ - .10 | Initial revision - -FD.io CSIT Revision follow CSIT-[yy][mm].[ww] numbering format, with version -denoted by concatenation of two digit year [yy] and two digit month [mm], and -maintenance revision identified by two digit calendar week number [ww]. diff --git a/docs/content/introduction/design.md b/docs/content/introduction/design.md deleted file mode 100644 index ba31477c4d..0000000000 --- a/docs/content/introduction/design.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: "Design" -weight: 3 ---- - -# Design - -FD.io CSIT system design needs to meet continuously expanding requirements of -FD.io projects including VPP, related sub-systems (e.g. plugin applications, -DPDK drivers) and FD.io applications (e.g. DPDK applications), as well as -growing number of compute platforms running those applications. With CSIT -project scope and charter including both FD.io continuous testing AND -performance trending/comparisons, those evolving requirements further amplify -the need for CSIT framework modularity, flexibility and usability. - -## Design Hierarchy - -CSIT follows a hierarchical system design with SUTs and DUTs at the bottom level -of the hierarchy, presentation level at the top level and a number of functional -layers in-between. The current CSIT system design including CSIT framework is -depicted in the figure below. - -{{< figure src="/cdocs/csit_design_picture.svg" title="CSIT Design" >}} - -A brief bottom-up description is provided here: - -1. SUTs, DUTs, TGs - - SUTs - Systems Under Test; - - DUTs - Devices Under Test; - - TGs - Traffic Generators; -2. Level-1 libraries - Robot and Python - - Lowest level CSIT libraries abstracting underlying test environment, SUT, - DUT and TG specifics; - - Used commonly across multiple L2 KWs; - - Performance and functional tests: - - L1 KWs (KeyWords) are implemented as RF libraries and Python - libraries; - - Performance TG L1 KWs: - - All L1 KWs are implemented as Python libraries: - - Support for TRex only today; - - CSIT IXIA drivers in progress; - - Performance data plane traffic profiles: - - TG-specific stream profiles provide full control of: - - Packet definition - layers, MACs, IPs, ports, combinations thereof - e.g. IPs and UDP ports; - - Stream definitions - different streams can run together, delayed, - one after each other; - - Stream profiles are independent of CSIT framework and can be used - in any T-rex setup, can be sent anywhere to repeat tests with - exactly the same setup; - - Easily extensible - one can create a new stream profile that meets - tests requirements; - - Same stream profile can be used for different tests with the same - traffic needs; - - Functional data plane traffic scripts: - - Scapy specific traffic scripts; -3. Level-2 libraries - Robot resource files: - - Higher level CSIT libraries abstracting required functions for executing - tests; - - L2 KWs are classified into the following functional categories: - - Configuration, test, verification, state report; - - Suite setup, suite teardown; - - Test setup, test teardown; -4. Tests - Robot: - - Test suites with test cases; - - Performance tests using physical testbed environment: - - VPP; - - DPDK-Testpmd; - - DPDK-L3Fwd; - - Tools: - - Documentation generator; - - Report generator; - - Testbed environment setup ansible playbooks; - - Operational debugging scripts; - -5. Test Lifecycle Abstraction - -A well coded test must follow a disciplined abstraction of the test -lifecycles that includes setup, configuration, test and verification. In -addition to improve test execution efficiency, the commmon aspects of -test setup and configuration shared across multiple test cases should be -done only once. Translating these high-level guidelines into the Robot -Framework one arrives to definition of a well coded RF tests for FD.io -CSIT. Anatomy of Good Tests for CSIT: - -1. Suite Setup - Suite startup Configuration common to all Test Cases in suite: - uses Configuration KWs, Verification KWs, StateReport KWs; -2. Test Setup - Test startup Configuration common to multiple Test Cases: uses - Configuration KWs, StateReport KWs; -3. Test Case - uses L2 KWs with RF Gherkin style: - - prefixed with {Given} - Verification of Test setup, reading state: uses - Configuration KWs, Verification KWs, StateReport KWs; - - prefixed with {When} - Test execution: Configuration KWs, Test KWs; - - prefixed with {Then} - Verification of Test execution, reading state: uses - Verification KWs, StateReport KWs; -4. Test Teardown - post Test teardown with Configuration cleanup and - Verification common to multiple Test Cases - uses: Configuration KWs, - Verification KWs, StateReport KWs; -5. Suite Teardown - Suite post-test Configuration cleanup: uses Configuration - KWs, Verification KWs, StateReport KWs; - -## RF Keywords Functional Classification - -CSIT RF KWs are classified into the functional categories matching the test -lifecycle events described earlier. All CSIT RF L2 and L1 KWs have been grouped -into the following functional categories: - -1. Configuration; -2. Test; -3. Verification; -4. StateReport; -5. SuiteSetup; -6. TestSetup; -7. SuiteTeardown; -8. TestTeardown; - -## RF Keywords Naming Guidelines - -Readability counts: "..code is read much more often than it is written." -Hence following a good and consistent grammar practice is important when -writing Robot Framework KeyWords and Tests. All CSIT test cases -are coded using Gherkin style and include only L2 KWs references. L2 KWs are -coded using simple style and include L2 KWs, L1 KWs, and L1 python references. -To improve readability, the proposal is to use the same grammar for both -Robot Framework KW styles, and to formalize the grammar of English -sentences used for naming the Robot Framework KWs. Robot -Framework KWs names are short sentences expressing functional description of -the command. They must follow English sentence grammar in one of the following -forms: - -1. **Imperative** - verb-object(s): *"Do something"*, verb in base form. -2. **Declarative** - subject-verb-object(s): *"Subject does something"*, verb in - a third-person singular present tense form. -3. **Affirmative** - modal_verb-verb-object(s): *"Subject should be something"*, - *"Object should exist"*, verb in base form. -4. **Negative** - modal_verb-Not-verb-object(s): *"Subject should not be - something"*, *"Object should not exist"*, verb in base form. - -Passive form MUST NOT be used. However a usage of past participle as an -adjective is okay. See usage examples provided in the Coding guidelines -section below. Following sections list applicability of the above -grammar forms to different Robot Framework KW categories. Usage -examples are provided, both good and bad. - -## Coding Guidelines - -Coding guidelines can be found on -[Design optimizations wiki page](https://wiki.fd.io/view/CSIT/Design_Optimizations). \ No newline at end of file diff --git a/docs/content/introduction/model_schema.md b/docs/content/introduction/model_schema.md deleted file mode 100644 index ae3ba38fd7..0000000000 --- a/docs/content/introduction/model_schema.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -bookHidden: true -title: "Model Schema" ---- - -# Model Schema - -This document describes what is currently implemented in CSIT, -especially the export side (UTI), not import side (PAL). - -## Version - -This document is valid for CSIT model version 1.4.0. - -It is recommended to use semantic versioning: https://semver.org/ -That means, if the new model misses a field present in the old model, -bump the major version. If the new model adds a field -not present in the old model, bump the minor version. -Any other edit in the implmenetation (or documentation) bumps the patch version. -If you change value type or formatting, -consider whether the parser (PAL) understands the new value correctly. -Renaming a field is the same as adding a new one and removing the old one. -Parser (PAL) has to know exact major version and minimal minor version, -and unless bugs, it can ignore patch version and bumped minor version. - -## UTI - -UTI stands for Unified Test Interface. -It mainly focuses on exporting information gathered during test run -into JSON output files. - -### Output Structure - -UTI outputs come in filesystem tree structure (single tree), where directories -correspond to suite levels and files correspond to suite setup, suite teardown -or any test case at this level of suite. -The directory name comes from SUITE_NAME Robot variable (the last part -as the previous parts are higher level suites), converted to lowercase. -If the suite name contains spaces (Robot converts underscores to spaces), -they are replaced with underscores. - -The filesystem tree is rooted under tests/ (as suites in git are there), -and for each component (test case, suite setup, suite teardown). - -Although we expect only ASCII text in the exported files, -we manipulate files using UTF-8 encoding, -so if Robot Framework uses a non-ascii character, it will be handled. - -### JSON schemas - -CSIT model is formally defined as a collection of JSON schema documents, -one for each output file type. - -The current version specifies only one output file type: -Info output for test case. - -The authoritative JSON schema documents are in JSON format. -Git repository also contains YAML formatted document and conversion utility, -which simplifies maintaining of the JSON document -(no need to track brackets and commas), but are not authoritative. diff --git a/docs/content/introduction/perf_triggers_design.md b/docs/content/introduction/perf_triggers_design.md deleted file mode 100644 index 445846f4d9..0000000000 --- a/docs/content/introduction/perf_triggers_design.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -bookHidden: true -title: "Performance Triggers Design" ---- - -# Performance Triggers Design - -*Syntax* - trigger_keyword [{tag1} {tag2}AND{tag3} !{tag4} !{tag5}] - -*Inputs* - - trigger_keyword for vpp-* jobs: 'perftest' - - trigger_keyword for csit-* jobs: 'csit-perftest' - - tags: existing CSIT tags [4]_ i.e. ip4base, ip6base, iacldst, memif - -Set of default tags appended to user input, under control by CSIT - - always-on for vpp-csit*.job: 'mrr' 'nic_intel_x710-da2' '1t1c' - - if input with no tags, following set applied: - - 'mrrANDnic_intel-x710AND1t1cAND64bANDip4base' - - 'mrrANDnic_intel-x710AND1t1cAND78bANDip6base' - - 'mrrANDnic_intel-x710AND1t1cAND64bANDl2bdbase' - -Examples - input: 'perftest' - expanded: 'mrrANDnic_intel_x710-da2AND1t1cAND64bANDl2bdbase mrrANDnic_intel_x710-da2AND1t1cAND64bANDip4base mrrANDnic_intel_x710-da2AND1t1cAND78bANDip6base' - input: 'perftest l2bdbase l2xcbase' - expanded: 'mrrANDnic_intel_x710-da2ANDl2bdbase mrrANDnic_intel_x710-da2ANDl2xcbase' - input: 'perftest ip4base !feature' - expanded: 'mrrANDnic_intel_x710-da2ANDip4base' not 'feature' - input: 'perftest ip4base !feature !lbond_dpdk' - expanded: 'mrrANDnic_intel_x710-da2ANDip4base' not 'feature' not 'lbond_dpdk' - input: 'perftestxyx ip4base !feature !lbond_dpdk' - invalid: detected as error - input: 'perftestip4base !feature !lbond_dpdk' - invalid: detected as error - input: 'perftest ip4base!feature!lbond_dpdk' - invalid expand: 'mrrANDnic_intel_x710-da2ANDip4base!feature!lbond_dpdk' - execution of RobotFramework will fail - -Constrains - Trigger keyword must be different for every job to avoid running multiple jobs - at once. Trigger keyword must not be substring of job name or any other - message printed by JJB bach to gerrit message which can lead to recursive - execution. diff --git a/docs/content/introduction/test_code_guidelines.md b/docs/content/introduction/test_code_guidelines.md deleted file mode 100644 index 9707d63ea6..0000000000 --- a/docs/content/introduction/test_code_guidelines.md +++ /dev/null @@ -1,294 +0,0 @@ ---- -bookHidden: true -title: "CSIT Test Code Guidelines" ---- - -# CSIT Test Code Guidelines - -The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", -"SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", -"MAY", and "OPTIONAL" in this document are to be interpreted as -described in [BCP 14](https://tools.ietf.org/html/bcp14), -[RFC2119](https://tools.ietf.org/html/rfc2119), -[RFC8174](https://tools.ietf.org/html/rfc8174) -when, and only when, they appear in all capitals, as shown here. - -This document SHALL describe guidelines for writing reliable, maintainable, -reusable and readable code for CSIT. - -# RobotFramework test case files and resource files - -+ General - - + Contributors SHOULD look at requirements.txt in root CSIT directory - for the currently used Robot Framework version. - Contributors SHOULD read - [Robot Framework User Guide](http://robotframework.org/robotframework/latest/RobotFrameworkUserGuide.html) - for more details. - - + RobotFramework test case files and resource files - SHALL use special extension .robot - - + Pipe and space separated file format (without trailing pipe - and without pipe aligning) SHALL be used. - Tabs are invisible characters, which are error prone. - 4-spaces separation is prone to accidental double space - acting as a separator. - - + Files SHALL be encoded in UTF-8 (the default Robot source file encoding). - Usage of non-ASCII characters SHOULD be avoided if possible. - It is RECOMMENDED to - [escape](http://robotframework.org/robotframework/latest/RobotFrameworkUserGuide.html#escaping) - non-ASCII characters. - - + Line length SHALL be limited to 80 characters. - - + There SHALL be licence text present at the beginning of each file. - - + Copy-pasting of the code NOT RECOMMENDED practice, any code that could be - re-used SHOULD be put into a library (Robot resource, Python library, ...). - -+ Test cases - - + It is RECOMMENDED to use data-driven test case definitions - anytime suite contains test cases similar in structure. - Typically, a suite SHOULD define a Template keyword, and test cases - SHOULD only specify tags and argument values - - *** Settings *** - | Test Template | Local Template - ... - - *** Test Cases *** - | tc01-64B-1c-eth-l2patch-mrr - | | [Tags] | 64B | 1C - | | framesize=${64} | phy_cores=${1} - - + Test case templates (or testcases) SHALL be written in Behavior-driven style - i.e. in readable English, so that even non-technical project stakeholders - can understand it - - *** Keywords *** - | Local Template - | | [Documentation] - | | ... | [Cfg] DUT runs L2 patch config with ${phy_cores} phy core(s). - | | ... | [Ver] Measure NDR and PDR values using MLRsearch algorithm.\ - | | ... - | | ... | *Arguments:* - | | ... | - frame_size - Framesize in Bytes in integer - | | ... | or string (IMIX_v4_1). Type: integer, string - | | ... | - phy_cores - Number of physical cores. Type: integer - | | ... | - rxq - Number of RX queues, default value: ${None}. - | | ... | Type: integer - | | ... - | | [Arguments] | ${frame_size} | ${phy_cores} | ${rxq}=${None} - | | ... - | | Set Test Variable | \${frame_size} - | | ... - | | Given Add worker threads and rxqueues to all DUTs - | | ... | ${phy_cores} | ${rxq} - | | And Add PCI devices to all DUTs - | | Set Max Rate And Jumbo And Handle Multi Seg - | | And Apply startup configuration on all VPP DUTs - | | When Initialize L2 patch - | | Then Find NDR and PDR intervals using optimized search - - + Every suite and test case template (or testcase) - SHALL contain short documentation. - Generated CSIT web pages display the documentation. - - + You SHOULD NOT use hard-coded constants. - It is RECOMMENDED to use the variable table - (\*\*\*Variables\*\*\*) to define test case specific values. - You SHALL use the assignment sign = after the variable name - to make assigning variables slightly more explicit - - *** Variables *** - | ${traffic_profile}= | trex-stl-2n-ethip4-ip4src254 - - + Common test case specific settings of the test environment SHALL be done - in Test Setup keyword defined in the Setting table. - - + Run Keywords construction is RECOMMENDED if it is more readable - than a keyword. - - + Separate keyword is RECOMMENDED if the construction is less readable. - - + Post-test cleaning and processing actions SHALL be done in Test Teardown - part of the Setting table (e.g. download statistics from VPP nodes). - This part is executed even if the test case has failed. On the other hand - it is possible to disable the tear-down from command line, thus leaving - the system in “broken” state for investigation. - - + Every testcase SHALL be correctly tagged. List of defined tags is in - csit/docs/introduction/test_tag_documentation.rst - - + Whenever possible, common tags SHALL be set using Force Tags - in Settings table. - - + User high-level keywords specific for the particular test suite - SHOULD be implemented in the Keywords table of suitable Robot resource file - to enable readability and code-reuse. - - + Such keywords MAY be implemented in Keywords table of the suite instead, - if the contributor believes no other test will use such keywords. - But this is NOT RECOMMENDED in general, as keywords in Resources - are easier to maintain. - - + All test case names (and suite names) SHALL conform - to current naming convention. - https://wiki.fd.io/view/CSIT/csit-test-naming - - + Frequently, different suites use the same test case layout. - It is RECOMMENDED to use autogeneration scripts available, - possibly extending them if their current functionality is not sufficient. - -+ Resource files - - + SHALL be used to implement higher-level keywords that are used in test cases - or other higher-level (or medium-level) keywords. - - + Every keyword SHALL contain Documentation where the purpose and arguments - of the keyword are described. Also document types, return values, - and any specific assumptions the particular keyword relies on. - - + A keyword usage example SHALL be the part of the Documentation. - The example SHALL use pipe and space separated format - (with escaped pipes and) with a trailing pipe. - - + The reason was possbile usage of Robot's libdoc tool - to generate tests and resources documentation. In that case - example keyword usage would be rendered in table. - - + Keyword name SHALL describe what the keyword does, - specifically and in a reasonable length (“short sentence”). - - + Keyword names SHALL be short enough for call sites - to fit within line length limit. - - + If a keyword argument has a most commonly used value, it is RECOMMENDED - to set it as default. This makes keyword code longer, - but suite code shorter, and readability (and maintainability) - of suites SHALL always more important. - - + If there is intermediate data (created by one keyword, to be used - by another keyword) of singleton semantics (it is clear that the test case - can have at most one instance of such data, even if the instance - is complex, for example ${nodes}), it is RECOMMENDED to store it - in test variables. You SHALL document test variables read or written - by a keyword. This makes the test template code less verbose. - As soon as the data instance is not unique, you SHALL pass it around - via arguments and return values explicitly (this makes lower level keywords - more reusable and less bug prone). - - + It is RECOMMENDED to pass arguments explicitly via [Arguments] line. - Setting test variables takes more space and is less explicit. - Using arguments embedded in keyword name makes them less visible, - and it makes it harder for the line containing the resulting long name - to fit into the maximum character limit, so you SHOULD NOT use them. - -# Python library files - -+ General - - + SHALL be used to implement low-level keywords that are called from - resource files (of higher-level keywords) or from test cases. - - + Higher-level keywords MAY be implemented in python library file too. - it is RECOMMENDED especially in the case that their implementation - in resource file would be too difficult or impossible, - e.g. complex data structures or functional programming. - - + Every keyword, Python module, class, method, enum SHALL contain - docstring with the short description and used input parameters - and possible return value(s) or raised exceptions. - - + The docstrings SHOULD conform to - [PEP 257](https://www.python.org/dev/peps/pep-0257/) - and other quality standards. - - + CSIT contributions SHALL use a specific formatting for documenting - arguments, return values and similar. - - + Keyword usage examples MAY be grouped and used - in the class/module documentation string, to provide better overview - of the usage and relationships between keywords. - - + Keyword name SHALL describe what the keyword does, - specifically and in a reasonable length (“short sentence”). - See https://wiki.fd.io/view/CSIT/csit-test-naming - - + Python implementation of a keyword is a function, - so its name in the python library should be lowercase_with_underscores. - Robot call sites should usename with first letter capitalized, and spaces. - -+ Coding - - + It is RECOMMENDED to use some standard development tool - (e.g. PyCharm Community Edition) and follow - [PEP-8](https://www.python.org/dev/peps/pep-0008/) recommendations. - - + All python code (not only Robot libraries) SHALL adhere to PEP-8 standard. - This is reported by CSIT Jenkins verify job. - - + Indentation: You SHALL NOT use tab for indents! - Indent is defined as four spaces. - - + Line length: SHALL be limited to 80 characters. - - + CSIT Python code assumes PYTHONPATH is set - to the root of cloned CSIT git repository, creating a tree of sub-packages. - You SHALL use that tree for importing, for example - - from resources.libraries.python.ssh import exec_cmd_no_error - - + Imports SHALL be grouped in the following order: - - 1. standard library imports, - 2. related third party imports, - 3. local application/library specific imports. - - You SHALL put a blank line between each group of imports. - - + You SHALL use two blank lines between top-level definitions, - one blank line between method definitions. - - + You SHALL NOT execute any active code on library import. - - + You SHALL NOT use global variables inside library files. - - + You MAY define constants inside library files. - - + It is NOT RECOMMENDED to use hard-coded constants (e.g. numbers, - paths without any description). It is RECOMMENDED to use - configuration file(s), like /csit/resources/libraries/python/Constants.py, - with appropriate comments. - - + The code SHALL log at the lowest possible level of implementation, - for debugging purposes. You SHALL use same style for similar events. - You SHALL keep logging as verbose as necessary. - - + You SHALL use the most appropriate exception not general one (Exception) - if possible. You SHOULD create your own exception - if necessary and implement there logging, level debug. - - + You MAY use RuntimeException for generally unexpected failures. - - + It is RECOMMENDED to use RuntimeError also for - infrastructure failures, e.g. losing SSH connection to SUT. - - + You MAY use EnvironmentError and its cublasses instead, - if the distinction is informative for callers. - - + It is RECOMMENDED to use AssertionError when SUT is at fault. - - + For each class (e.g. exception) it is RECOMMENDED to implement __repr__() - which SHALL return a string usable as a constructor call - (including repr()ed arguments). - When logging, you SHOULD log the repr form, unless the internal structure - of the object in question would likely result in too long output. - This is helpful for debugging. - - + For composing and formatting strings, you SHOULD use .format() - with named arguments. - Example: "repr() of name: {name!r}".format(name=name) diff --git a/docs/content/introduction/test_naming.md b/docs/content/introduction/test_naming.md deleted file mode 100644 index 22e2c0bf8a..0000000000 --- a/docs/content/introduction/test_naming.md +++ /dev/null @@ -1,107 +0,0 @@ ---- -title: "Test Naming" -weight: 4 ---- - -# Test Naming - -## Background - -{{< release_csit >}} follows a common structured naming convention for all -performance and system functional tests, introduced in CSIT 17.01. - -The naming should be intuitive for majority of the tests. Complete -description of CSIT test naming convention is provided on -[CSIT test naming wiki page](https://wiki.fd.io/view/CSIT/csit-test-naming). -Below few illustrative examples of the naming usage for test suites across CSIT -performance, functional and Honeycomb management test areas. - -## Naming Convention - -The CSIT approach is to use tree naming convention and to encode following -testing information into test suite and test case names: - -1. packet network port configuration - * port type, physical or virtual; - * number of ports; - * NIC model, if applicable; - * port-NIC locality, if applicable; -2. packet encapsulations; -3. VPP packet processing - * packet forwarding mode; - * packet processing function(s); -4. packet forwarding path - * if present, network functions (processes, containers, VMs) and their - topology within the computer; -5. main measured variable, type of test. - -Proposed convention is to encode ports and NICs on the left (underlay), -followed by outer-most frame header, then other stacked headers up to the -header processed by vSwitch-VPP, then VPP forwarding function, then encap on -vhost interface, number of vhost interfaces, number of VMs. If chained VMs -present, they get added on the right. Test topology is expected to be -symmetric, in other words packets enter and leave SUT through ports specified -on the left of the test name. Here some examples to illustrate the convention -followed by the complete legend, and tables mapping the new test filenames to -old ones. - -## Naming Examples - -CSIT test suite naming examples (filename.robot) for common tested VPP -topologies: - -1. **Physical port to physical port - a.k.a. NIC-to-NIC, Phy-to-Phy, P2P** - * *PortNICConfig-WireEncapsulation-PacketForwardingFunction- - PacketProcessingFunction1-...-PacketProcessingFunctionN-TestType* - * *10ge2p1x520-dot1q-l2bdbasemaclrn-ndrdisc.robot* => 2 ports of 10GE on Intel - x520 NIC, dot1q tagged Ethernet, L2 bridge-domain baseline switching with - MAC learning, NDR throughput discovery. - * *10ge2p1x520-ethip4vxlan-l2bdbasemaclrn-ndrchk.robot* => 2 ports of 10GE on - Intel x520 NIC, IPv4 VXLAN Ethernet, L2 bridge-domain baseline switching - with MAC learning, NDR throughput discovery. - * *10ge2p1x520-ethip4-ip4base-ndrdisc.robot* => 2 ports of 10GE on Intel x520 - NIC, IPv4 baseline routed forwarding, NDR throughput discovery. - * *10ge2p1x520-ethip6-ip6scale200k-ndrdisc.robot* => 2 ports of 10GE on Intel - x520 NIC, IPv6 scaled up routed forwarding, NDR throughput discovery. - * *10ge2p1x520-ethip4-ip4base-iacldstbase-ndrdisc.robot* => 2 ports of 10GE on - Intel x520 NIC, IPv4 baseline routed forwarding, ingress Access Control - Lists baseline matching on destination, NDR throughput discovery. - * *40ge2p1vic1385-ethip4-ip4base-ndrdisc.robot* => 2 ports of 40GE on Cisco - vic1385 NIC, IPv4 baseline routed forwarding, NDR throughput discovery. - * *eth2p-ethip4-ip4base-func.robot* => 2 ports of Ethernet, IPv4 baseline - routed forwarding, functional tests. -2. **Physical port to VM (or VM chain) to physical port - a.k.a. NIC2VM2NIC, - P2V2P, NIC2VMchain2NIC, P2V2V2P** - * *PortNICConfig-WireEncapsulation-PacketForwardingFunction- - PacketProcessingFunction1-...-PacketProcessingFunctionN-VirtEncapsulation- - VirtPortConfig-VMconfig-TestType* - * *10ge2p1x520-dot1q-l2bdbasemaclrn-eth-2vhost-1vm-ndrdisc.robot* => 2 ports - of 10GE on Intel x520 NIC, dot1q tagged Ethernet, L2 bridge-domain switching - to/from two vhost interfaces and one VM, NDR throughput discovery. - * *10ge2p1x520-ethip4vxlan-l2bdbasemaclrn-eth-2vhost-1vm-ndrdisc.robot* => 2 - ports of 10GE on Intel x520 NIC, IPv4 VXLAN Ethernet, L2 bridge-domain - switching to/from two vhost interfaces and one VM, NDR throughput discovery. - * *10ge2p1x520-ethip4vxlan-l2bdbasemaclrn-eth-4vhost-2vm-ndrdisc.robot* => 2 - ports of 10GE on Intel x520 NIC, IPv4 VXLAN Ethernet, L2 bridge-domain - switching to/from four vhost interfaces and two VMs, NDR throughput - discovery. - * *eth2p-ethip4vxlan-l2bdbasemaclrn-eth-2vhost-1vm-func.robot* => 2 ports of - Ethernet, IPv4 VXLAN Ethernet, L2 bridge-domain switching to/from two vhost - interfaces and one VM, functional tests. -3. **API CRUD tests - Create (Write), Read (Retrieve), Update (Modify), Delete - (Destroy) operations for configuration and operational data** - * *ManagementTestKeyword-ManagementOperation-ManagedFunction1-...- - ManagedFunctionN-ManagementAPI1-ManagementAPIN-TestType* - * *mgmt-cfg-lisp-apivat-func* => configuration of LISP with VAT API calls, - functional tests. - * *mgmt-cfg-l2bd-apihc-apivat-func* => configuration of L2 Bridge-Domain with - Honeycomb API and VAT API calls, functional tests. - * *mgmt-oper-int-apihcnc-func* => reading status and operational data of - interface with Honeycomb NetConf API calls, functional tests. - * *mgmt-cfg-int-tap-apihcnc-func* => configuration of tap interfaces with - Honeycomb NetConf API calls, functional tests. - * *mgmt-notif-int-subint-apihcnc-func* => notifications of interface and - sub-interface events with Honeycomb NetConf Notifications, functional tests. - -For complete description of CSIT test naming convention please refer to -[CSIT test naming wiki page](https://wiki.fd.io/view/CSIT/csit-test-naming>). diff --git a/docs/content/introduction/test_scenarios_overview.md b/docs/content/introduction/test_scenarios_overview.md deleted file mode 100644 index 415ee3403f..0000000000 --- a/docs/content/introduction/test_scenarios_overview.md +++ /dev/null @@ -1,61 +0,0 @@ ---- -title: "Test Scenarios Overview" -weight: 2 ---- - -# Test Scenarios Overview - -FD.io CSIT Dashboard includes multiple test scenarios of VPP -centric applications, topologies and use cases. In addition it also -covers baseline tests of DPDK sample applications. Tests are executed in -physical (performance tests) and virtual environments (functional -tests). - -Brief overview of test scenarios covered in this documentation: - -1. **VPP Performance**: VPP performance tests are executed in physical - FD.io testbeds, focusing on VPP network data plane performance in - NIC-to-NIC switching topologies. VPP application runs in - bare-metal host user-mode handling NICs. TRex is used as a traffic generator. -2. **VPP Vhostuser Performance with KVM VMs**: VPP VM service switching - performance tests using vhostuser virtual interface for - interconnecting multiple NF-in-VM instances. VPP vswitch - instance runs in bare-metal user-mode handling NICs and connecting - over vhost-user interfaces to VM instances each running VPP with virtio - virtual interfaces. Similarly to VPP Performance, tests are run across a - range of configurations. TRex is used as a traffic generator. -3. **VPP Memif Performance with LXC and Docker Containers**: VPP - Container service switching performance tests using memif virtual - interface for interconnecting multiple VPP-in-container instances. - VPP vswitch instance runs in bare-metal user-mode handling NICs and - connecting over memif (Slave side) interfaces to more instances of - VPP running in LXC or in Docker Containers, both with memif - interfaces (Master side). Similarly to VPP Performance, tests are - run across a range of configurations. TRex is used as a traffic - generator. -4. **DPDK Performance**: VPP uses DPDK to drive the NICs and physical - interfaces. DPDK performance tests are used as a baseline to - profile performance of the DPDK sub-system. Two DPDK applications - are tested: Testpmd and L3fwd. DPDK tests are executed in the same - testing environment as VPP tests. DPDK Testpmd and L3fwd - applications run in host user-mode. TRex is used as a traffic - generator. -5. **T-Rex Performance**: T-Rex perfomance tests are executed in physical - FD.io testbeds, focusing on T-Rex data plane performance in NIC-to-NIC - loopback topologies. -6. **VPP Functional**: VPP functional tests are executed in virtual - FD.io testbeds, focusing on VPP packet processing functionality, - including both network data plane and in-line control plane. Tests - cover vNIC-to-vNIC vNIC-to-nestedVM-to-vNIC forwarding topologies. - Scapy is used as a traffic generator. - -All CSIT test data included in this report is auto-generated from Robot -Framework json output files produced by Linux Foundation FD.io Jenkins jobs -executed against {{< release_vpp >}} artifacts. - -FD.io CSIT system is developed using two main coding platforms: Robot -Framework and Python. {{< release_csit >}} source code for the executed test -suites is available in corresponding CSIT branch in the directory -`./tests/`. A local copy of CSIT source code -can be obtained by cloning CSIT git repository - `git clone -https://gerrit.fd.io/r/csit`. diff --git a/docs/content/introduction/test_tag_description.md b/docs/content/introduction/test_tag_description.md deleted file mode 100644 index 630afa864e..0000000000 --- a/docs/content/introduction/test_tag_description.md +++ /dev/null @@ -1,863 +0,0 @@ ---- -title: "Test Tags Descriptions" -weight: 5 ---- - -# Test Tags Descriptions - -All CSIT test cases are labelled with Robot Framework tags used to allow for -easy test case type identification, test case grouping and selection for -execution. Following sections list currently used CSIT tags and their -descriptions. - -## Testbed Topology Tags - -**2_NODE_DOUBLE_LINK_TOPO** - - 2 nodes connected in a circular topology with two links interconnecting - the devices. - -**2_NODE_SINGLE_LINK_TOPO** - - 2 nodes connected in a circular topology with at least one link - interconnecting devices. - -**3_NODE_DOUBLE_LINK_TOPO** - - 3 nodes connected in a circular topology with two links interconnecting - the devices. - -**3_NODE_SINGLE_LINK_TOPO** - - 3 nodes connected in a circular topology with at least one link - interconnecting devices. - -## Objective Tags - -**SKIP_PATCH** - - Test case(s) marked to not run in case of vpp-csit-verify (i.e. VPP patch) - and csit-vpp-verify jobs (i.e. CSIT patch). - -**SKIP_VPP_PATCH** - - Test case(s) marked to not run in case of vpp-csit-verify (i.e. VPP patch). - -## Environment Tags - -**HW_ENV** - - DUTs and TGs are running on bare metal. - -**VM_ENV** - - DUTs and TGs are running in virtual environment. - -**VPP_VM_ENV** - - DUTs with VPP and capable of running Virtual Machine. - -## NIC Model Tags - -**NIC_Intel-X520-DA2** - - Intel X520-DA2 NIC. - -**NIC_Intel-XL710** - - Intel XL710 NIC. - -**NIC_Intel-X710** - - Intel X710 NIC. - -**NIC_Intel-XXV710** - - Intel XXV710 NIC. - -**NIC_Cisco-VIC-1227** - - VIC-1227 by Cisco. - -**NIC_Cisco-VIC-1385** - - VIC-1385 by Cisco. - -**NIC_Amazon-Nitro-50G** - - Amazon EC2 ENA NIC. - -## Scaling Tags - -**FIB_20K** - - 2x10,000 entries in single fib table - -**FIB_200K** - - 2x100,000 entries in single fib table - -**FIB_1M** - - 2x500,000 entries in single fib table - -**FIB_2M** - - 2x1,000,000 entries in single fib table - -**L2BD_1** - - Test with 1 L2 bridge domain. - -**L2BD_10** - - Test with 10 L2 bridge domains. - -**L2BD_100** - - Test with 100 L2 bridge domains. - -**L2BD_1K** - - Test with 1000 L2 bridge domains. - -**VLAN_1** - - Test with 1 VLAN sub-interface. - -**VLAN_10** - - Test with 10 VLAN sub-interfaces. - -**VLAN_100** - - Test with 100 VLAN sub-interfaces. - -**VLAN_1K** - - Test with 1000 VLAN sub-interfaces. - -**VXLAN_1** - - Test with 1 VXLAN tunnel. - -**VXLAN_10** - - Test with 10 VXLAN tunnels. - -**VXLAN_100* - - Test with 100 VXLAN tunnels. - -**VXLAN_1K** - - Test with 1000 VXLAN tunnels. - -**TNL_{t}** - - IPSec in tunnel mode - {t} tunnels. - -**SRC_USER_{u}** - - Traffic flow with {u} unique IPs (users) in one direction. - {u}=(1,10,100,1000,2000,4000). - -**100_FLOWS** - - Traffic stream with 100 unique flows (10 IPs/users x 10 UDP ports) in one - direction. - -**10k_FLOWS** - - Traffic stream with 10 000 unique flows (10 IPs/users x 1000 UDP ports) in - one direction. - -**100k_FLOWS** - - Traffic stream with 100 000 unique flows (100 IPs/users x 1000 UDP ports) in - one direction. - -**HOSTS_{h}** - - Stateless or stateful traffic stream with {h} client source IP4 addresses, - usually with 63 flow differing in source port number. Could be UDP or TCP. - If NAT is used, the clients are inside. Outside IP range can differ. - {h}=(1024,4096,16384,65536,262144). - -**GENEVE4_{t}TUN** - - Test with {t} GENEVE IPv4 tunnel. {t}=(1,4,16,64,256,1024) - -## Test Category Tags - -**DEVICETEST* - - All vpp_device functional test cases. - -**PERFTEST** - - All performance test cases. - -## VPP Device Type Tags - -**SCAPY** - - All test cases that uses Scapy for packet generation and validation. - -## erformance Type Tags - -**NDRPDR** - - Single test finding both No Drop Rate and Partial Drop Rate simultaneously. - The search is done by optimized algorithm which performs - multiple trial runs at different durations and transmit rates. - The results come from the final trials, which have duration of 30 seconds. - -**MRR** - - Performance tests where TG sends the traffic at maximum rate (line rate) - and reports total sent/received packets over trial duration. - The result is an average of 10 trials of 1 second duration. - -**SOAK** - - Performance tests using PLRsearch to find the critical load. - -**RECONF** - - Performance tests aimed to measure lost packets (time) when performing - reconfiguration while full throughput offered load is applied. - -## Ethernet Frame Size Tags - -These are describing the traffic offered by Traffic Generator, -"primary" traffic in case of asymmetric load. -For traffic between DUTs, or for "secondary" traffic, see ${overhead} value. - -**{b}B** - - {b} Bytes frames used for test. - -**IMIX** - - IMIX frame sequence (28x 64B, 16x 570B, 4x 1518B) used for test. - -## Test Type Tags - -**BASE** - - Baseline test cases, no encapsulation, no feature(s) configured in tests. - No scaling whatsoever, beyond minimum needed for RSS. - -**IP4BASE** - - IPv4 baseline test cases, no encapsulation, no feature(s) configured in - tests. Minimal number of routes. Other quantities may be scaled. - -**IP6BASE** - - IPv6 baseline test cases, no encapsulation, no feature(s) configured in - tests. - -**L2XCBASE** - - L2XC baseline test cases, no encapsulation, no feature(s) configured in - tests. - -**L2BDBASE** - - L2BD baseline test cases, no encapsulation, no feature(s) configured in - tests. - -**L2PATCH** - - L2PATCH baseline test cases, no encapsulation, no feature(s) configured in - tests. - -**SCALE** - - Scale test cases. Other tags specify which quantities are scaled. - Also applies if scaling is set on TG only (e.g. DUT works as IP4BASE). - -**ENCAP** - - Test cases where encapsulation is used. Use also encapsulation tag(s). - -**FEATURE** - - At least one feature is configured in test cases. Use also feature tag(s). - -**UDP** - - Tests which use any kind of UDP traffic (STL or ASTF profile). - -**TCP** - - Tests which use any kind of TCP traffic (STL or ASTF profile). - -**TREX** - - Tests which test trex traffic without any software DUTs in the traffic path. - -**UDP_UDIR** - - Tests which use unidirectional UDP traffic (STL profile only). - -**UDP_BIDIR** - - Tests which use bidirectional UDP traffic (STL profile only). - -**UDP_CPS** - - Tests which measure connections per second on minimal UDP pseudoconnections. - This implies ASTF traffic profile is used. - This tag selects specific output processing in PAL. - -**TCP_CPS** - - Tests which measure connections per second on empty TCP connections. - This implies ASTF traffic profile is used. - This tag selects specific output processing in PAL. - -**TCP_RPS** - - Tests which measure requests per second on empty TCP connections. - This implies ASTF traffic profile is used. - This tag selects specific output processing in PAL. - -**UDP_PPS** - - Tests which measure packets per second on lightweight UDP transactions. - This implies ASTF traffic profile is used. - This tag selects specific output processing in PAL. - -**TCP_PPS** - - Tests which measure packets per second on lightweight TCP transactions. - This implies ASTF traffic profile is used. - This tag selects specific output processing in PAL. - -**HTTP** - - Tests which use traffic formed of valid HTTP requests (and responses). - -**LDP_NGINX** - - LDP NGINX is un-modified NGINX with VPP via LD_PRELOAD. - -**NF_DENSITY** - - Performance tests that measure throughput of multiple VNF and CNF - service topologies at different service densities. - -## NF Service Density Tags - -**CHAIN** - - NF service density tests with VNF or CNF service chain topology(ies). - -**PIPE** - - NF service density tests with CNF service pipeline topology(ies). - -**NF_L3FWDIP4** - - NF service density tests with DPDK l3fwd IPv4 routing as NF workload. - -**NF_VPPIP4** - - NF service density tests with VPP IPv4 routing as NF workload. - -**{r}R{c}C** - - Service density matrix locator {r}R{c}C, {r}Row denoting number of - service instances, {c}Column denoting number of NFs per service - instance. {r}=(1,2,4,6,8,10), {c}=(1,2,4,6,8,10). - -**{n}VM{t}T** - - Service density {n}VM{t}T, {n}Number of NF Qemu VMs, {t}Number of threads - per NF. - -**{n}DCRt}T** - - Service density {n}DCR{t}T, {n}Number of NF Docker containers, {t}Number of - threads per NF. - -**{n}_ADDED_CHAINS** - - {n}Number of chains (or pipelines) added (and/or removed) - during RECONF test. - -## Forwarding Mode Tags - -**L2BDMACSTAT** - - VPP L2 bridge-domain, L2 MAC static. - -**L2BDMACLRN** - - VPP L2 bridge-domain, L2 MAC learning. - -**L2XCFWD** - - VPP L2 point-to-point cross-connect. - -**IP4FWD** - - VPP IPv4 routed forwarding. - -**IP6FWD** - - VPP IPv6 routed forwarding. - -**LOADBALANCER_MAGLEV** - - VPP Load balancer maglev mode. - -**LOADBALANCER_L3DSR** - - VPP Load balancer l3dsr mode. - -**LOADBALANCER_NAT4** - - VPP Load balancer nat4 mode. - -**N2N** - - Mode, where NICs from the same physical server are directly - connected with a cable. - -## Underlay Tags - -**IP4UNRLAY** - - IPv4 underlay. - -**IP6UNRLAY** - - IPv6 underlay. - -**MPLSUNRLAY** - - MPLS underlay. - -## Overlay Tags - -**L2OVRLAY** - - L2 overlay. - -**IP4OVRLAY** - - IPv4 overlay (IPv4 payload). - -**IP6OVRLAY** - - IPv6 overlay (IPv6 payload). - -## Tagging Tags - -**DOT1Q** - - All test cases with dot1q. - -**DOT1AD** - - All test cases with dot1ad. - -## Encapsulation Tags - -**ETH** - - All test cases with base Ethernet (no encapsulation). - -**LISP** - - All test cases with LISP. - -**LISPGPE** - - All test cases with LISP-GPE. - -**LISP_IP4o4** - - All test cases with LISP_IP4o4. - -**LISPGPE_IP4o4** - - All test cases with LISPGPE_IP4o4. - -**LISPGPE_IP6o4** - - All test cases with LISPGPE_IP6o4. - -**LISPGPE_IP4o6** - - All test cases with LISPGPE_IP4o6. - -**LISPGPE_IP6o6** - - All test cases with LISPGPE_IP6o6. - -**VXLAN** - - All test cases with Vxlan. - -**VXLANGPE** - - All test cases with VXLAN-GPE. - -**GRE** - - All test cases with GRE. - -**GTPU** - - All test cases with GTPU. - -**GTPU_HWACCEL** - - All test cases with GTPU_HWACCEL. - -**IPSEC** - - All test cases with IPSEC. - -**WIREGUARD** - - All test cases with WIREGUARD. - -**SRv6** - - All test cases with Segment routing over IPv6 dataplane. - -**SRv6_1SID** - - All SRv6 test cases with single SID. - -**SRv6_2SID_DECAP** - - All SRv6 test cases with two SIDs and with decapsulation. - -**SRv6_2SID_NODECAP** - - All SRv6 test cases with two SIDs and without decapsulation. - -**GENEVE** - - All test cases with GENEVE. - -**GENEVE_L3MODE** - - All test cases with GENEVE tunnel in L3 mode. - -**FLOW** - - All test cases with FLOW. - -**FLOW_DIR** - - All test cases with FLOW_DIR. - -**FLOW_RSS** - - All test cases with FLOW_RSS. - -**NTUPLE** - - All test cases with NTUPLE. - -**L2TPV3** - - All test cases with L2TPV3. - -## Interface Tags - -**PHY** - - All test cases which use physical interface(s). - -**GSO** - - All test cases which uses Generic Segmentation Offload. - -**VHOST** - - All test cases which uses VHOST. - -**VHOST_1024** - - All test cases which uses VHOST DPDK driver with qemu queue size set - to 1024. - -**VIRTIO** - - All test cases which uses VIRTIO native VPP driver. - -**VIRTIO_1024** - - All test cases which uses VIRTIO native VPP driver with qemu queue size set - to 1024. - -**CFS_OPT** - - All test cases which uses VM with optimised scheduler policy. - -**TUNTAP* - - All test cases which uses TUN and TAP. - -**AFPKT** - - All test cases which uses AFPKT. - -**NETMAP** - - All test cases which uses Netmap. - -**MEMIF** - - All test cases which uses Memif. - -**SINGLE_MEMIF** - - All test cases which uses only single Memif connection per DUT. One DUT - instance is running in container having one physical interface exposed to - container. - -**LBOND** - - All test cases which uses link bonding (BondEthernet interface). - -**LBOND_DPDK** - - All test cases which uses DPDK link bonding. - -**LBOND_VPP** - - All test cases which uses VPP link bonding. - -**LBOND_MODE_XOR** - - All test cases which uses link bonding with mode XOR. - -**LBOND_MODE_LACP** - - All test cases which uses link bonding with mode LACP. - -**LBOND_LB_L34** - - All test cases which uses link bonding with load-balance mode l34. - -**LBOND_{n}L** - - All test cases which use {n} link(s) for link bonding. - -**DRV_{d}** - - All test cases which NIC Driver for DUT is set to {d}. Default is VFIO_PCI. - {d}=(AVF, RDMA_CORE, VFIO_PCI, AF_XDP). - -**TG_DRV_{d}** - - All test cases which NIC Driver for TG is set to {d}. Default is IGB_UIO. - {d}=(RDMA_CORE, IGB_UIO). - -**RXQ_SIZE_{n}** - - All test cases which RXQ size (RX descriptors) are set to {n}. Default is 0, - which means VPP (API) default. - -**TXQ_SIZE_{n}** - - All test cases which TXQ size (TX descriptors) are set to {n}. Default is 0, - which means VPP (API) default. - -## Feature Tags - -**IACLDST** - - iACL destination. - -**ADLALWLIST** - - ADL allowlist. - -**NAT44** - - NAT44 configured and tested. - -**NAT64** - - NAT44 configured and tested. - -**ACL** - - ACL plugin configured and tested. - -**IACL** - - ACL plugin configured and tested on input path. - -**OACL** - - ACL plugin configured and tested on output path. - -**ACL_STATELESS** - - ACL plugin configured and tested in stateless mode (permit action). - -**ACL_STATEFUL** - - ACL plugin configured and tested in stateful mode (permit+reflect action). - -**ACL1** - - ACL plugin configured and tested with 1 not-hitting ACE. - -**ACL10** - - ACL plugin configured and tested with 10 not-hitting ACEs. - -**ACL50** - - ACL plugin configured and tested with 50 not-hitting ACEs. - -**SRv6_PROXY** - - SRv6 endpoint to SR-unaware appliance via proxy. - -**SRv6_PROXY_STAT** - - SRv6 endpoint to SR-unaware appliance via static proxy. - -**SRv6_PROXY_DYN** - - SRv6 endpoint to SR-unaware appliance via dynamic proxy. - -**SRv6_PROXY_MASQ** - - SRv6 endpoint to SR-unaware appliance via masquerading proxy. - -## Encryption Tags - -**IPSECSW** - - Crypto in software. - -**IPSECHW** - - Crypto in hardware. - -**IPSECTRAN** - - IPSec in transport mode. - -**IPSECTUN** - - IPSec in tunnel mode. - -**IPSECINT** - - IPSec in interface mode. - -**AES** - - IPSec using AES algorithms. - -**AES_128_CBC** - - IPSec using AES 128 CBC algorithms. - -**AES_128_GCM** - - IPSec using AES 128 GCM algorithms. - -**AES_256_GCM** - - IPSec using AES 256 GCM algorithms. - -**HMAC** - - IPSec using HMAC integrity algorithms. - -**HMAC_SHA_256** - - IPSec using HMAC SHA 256 integrity algorithms. - -**HMAC_SHA_512** - - IPSec using HMAC SHA 512 integrity algorithms. - -**SCHEDULER** - - IPSec using crypto sw scheduler engine. - -**FASTPATH** - - IPSec policy mode with spd fast path enabled. - -## Client-Workload Tags - -**VM** - - All test cases which use at least one virtual machine. - -**LXC** - - All test cases which use Linux container and LXC utils. - -**DRC** - - All test cases which use at least one Docker container. - -**DOCKER** - - All test cases which use Docker as container manager. - -**APP** - - All test cases with specific APP use. - -## Container Orchestration Tags - -**{n}VSWITCH** - - {n} VPP running in {n} Docker container(s) acting as a VSWITCH. - {n}=(1). - -**{n}VNF** - - {n} VPP running in {n} Docker container(s) acting as a VNF work load. - {n}=(1). - -## Multi-Threading Tags - -**STHREAD** - - *Dynamic tag*. - All test cases using single poll mode thread. - -**MTHREAD** - - *Dynamic tag*. - All test cases using more then one poll mode driver thread. - -**{n}NUMA** - - All test cases with packet processing on {n} socket(s). {n}=(1,2). - -**{c}C** - - {c} worker thread pinned to {c} dedicated physical core; or if - HyperThreading is enabled, {c}*2 worker threads each pinned to a separate - logical core within 1 dedicated physical core. Main thread pinned to core 1. - {t}=(1,2,4). - -**{t}T{c}C** - - *Dynamic tag*. - {t} worker threads pinned to {c} dedicated physical cores. Main thread - pinned to core 1. By default CSIT is configuring same amount of receive - queues per interface as worker threads. {t}=(1,2,4,8), {t}=(1,2,4). diff --git a/docs/content/introduction/testing_in_vagrant.md b/docs/content/introduction/testing_in_vagrant.md deleted file mode 100644 index 34ca596d0a..0000000000 --- a/docs/content/introduction/testing_in_vagrant.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -bookHidden: true -title: "Running CSIT locally in Vagrant" ---- - -# Running CSIT locally in Vagrant - -## Install prerequisites - -Run all commands from command line. - -1. Download and install virtualbox from - [official page](https://www.virtualbox.org/wiki/Downloads). - To verify the installation, run VBoxManage - - - on windows - - "C:\Program Files\Oracle\VirtualBox\VBoxManage.exe" --version - - - on nix - - VBoxManage --version - Tested version: 6.1.16r140961 - -2. Download and install latest vagrant from - [official page](https://www.vagrantup.com/downloads.html). - To verify the installtion, run - - vagrant -v - Tested version: Vagrant 2.2.15 - -3. Install vagrant plugins:: - - vagrant plugin install vagrant-vbguest - vagrant plugin install vagrant-cachier - - If you are behind a proxy, install proxyconf plugin and update proxy - settings in Vagrantfile:: - - vagrant plugin install vagrant-proxyconf - -## Set up and run Vagrant virtualbox - -Before running following commands change working directory to Vagrant specific directory -(from within root CSIT directory) - - cd csit.infra.vagrant - -This allows Vagrant to automatically find Vagrantfile and corresponding Vagrant environment. - -Start the provisioning - - vagrant up --provider virtualbox - -Your new VPP Device virtualbox machine will be created and configured. -Master branch of csit project will be cloned inside virtual machine into -/home/vagrant/csit folder. - -Once the process is finished, you can login to the box using - - vagrant ssh - -In case you need to completely rebuild the box and start from scratch, -run these commands - - vagrant destroy -f - vagrant up --provider virtualbox - -## Run tests - -From within the box run the tests using - - cd /home/vagrant/csit/resources/libraries/bash/entry - ./bootstrap_vpp_device.sh csit-vpp-device-master-ubuntu2004-1n-vbox - -To run only selected tests based on TAGS, export environment variables before -running the test suite - - export GERRIT_EVENT_TYPE="comment-added" - export GERRIT_EVENT_COMMENT_TEXT="devicetest memif" - - # now it will run tests, selected based on tags - ./bootstrap_vpp_device.sh csit-vpp-device-master-ubuntu2004-1n-vbox - - diff --git a/docs/content/methodology/_index.md b/docs/content/methodology/_index.md index 6f0dcae783..dbef64db94 100644 --- a/docs/content/methodology/_index.md +++ b/docs/content/methodology/_index.md @@ -1,6 +1,6 @@ --- -bookCollapseSection: true +bookCollapseSection: false bookFlatSection: true title: "Methodology" weight: 2 ---- \ No newline at end of file +--- diff --git a/docs/content/methodology/access_control_lists.md b/docs/content/methodology/access_control_lists.md deleted file mode 100644 index 9767d3f86a..0000000000 --- a/docs/content/methodology/access_control_lists.md +++ /dev/null @@ -1,70 +0,0 @@ ---- -title: "Access Control Lists" -weight: 12 ---- - -# Access Control Lists - -VPP is tested in a number of data plane feature configurations across -different forwarding modes. Following sections list features tested. - -## ACL Security-Groups - -Both stateless and stateful access control lists (ACL), also known as -security-groups, are supported by VPP. - -Following ACL configurations are tested for MAC switching with L2 -bridge-domains: - -- *l2bdbasemaclrn-iacl{E}sl-{F}flows*: Input stateless ACL, with {E} - entries and {F} flows. -- *l2bdbasemaclrn-oacl{E}sl-{F}flows*: Output stateless ACL, with {E} - entries and {F} flows. -- *l2bdbasemaclrn-iacl{E}sf-{F}flows*: Input stateful ACL, with {E} - entries and {F} flows. -- *l2bdbasemaclrn-oacl{E}sf-{F}flows*: Output stateful ACL, with {E} - entries and {F} flows. - -Following ACL configurations are tested with IPv4 routing: - -- *ip4base-iacl{E}sl-{F}flows*: Input stateless ACL, with {E} entries - and {F} flows. -- *ip4base-oacl{E}sl-{F}flows*: Output stateless ACL, with {E} entries - and {F} flows. -- *ip4base-iacl{E}sf-{F}flows*: Input stateful ACL, with {E} entries and - {F} flows. -- *ip4base-oacl{E}sf-{F}flows*: Output stateful ACL, with {E} entries - and {F} flows. - -ACL tests are executed with the following combinations of ACL entries -and number of flows: - -- ACL entry definitions - - - flow non-matching deny entry: (src-ip4, dst-ip4, src-port, dst-port). - - flow matching permit ACL entry: (src-ip4, dst-ip4). - -- {E} - number of non-matching deny ACL entries, {E} = [1, 10, 50]. -- {F} - number of UDP flows with different tuple (src-ip4, dst-ip4, - src-port, dst-port), {F} = [100, 10k, 100k]. -- All {E}x{F} combinations are tested per ACL type, total of 9. - -## ACL MAC-IP - -MAC-IP binding ACLs are tested for MAC switching with L2 bridge-domains: - -- *l2bdbasemaclrn-macip-iacl{E}sl-{F}flows*: Input stateless ACL, with - {E} entries and {F} flows. - -MAC-IP ACL tests are executed with the following combinations of ACL -entries and number of flows: - -- ACL entry definitions - - - flow non-matching deny entry: (dst-ip4, dst-mac, bit-mask) - - flow matching permit ACL entry: (dst-ip4, dst-mac, bit-mask) - -- {E} - number of non-matching deny ACL entries, {E} = [1, 10, 50] -- {F} - number of UDP flows with different tuple (dst-ip4, dst-mac), - {F} = [100, 10k, 100k] -- All {E}x{F} combinations are tested per ACL type, total of 9. diff --git a/docs/content/methodology/data_plane_throughput/_index.md b/docs/content/methodology/data_plane_throughput/_index.md deleted file mode 100644 index 5791438b3b..0000000000 --- a/docs/content/methodology/data_plane_throughput/_index.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -bookCollapseSection: true -bookFlatSection: false -title: "Data Plane Throughput" -weight: 4 ---- \ No newline at end of file diff --git a/docs/content/methodology/data_plane_throughput/data_plane_throughput.md b/docs/content/methodology/data_plane_throughput/data_plane_throughput.md deleted file mode 100644 index 7ff1d38d17..0000000000 --- a/docs/content/methodology/data_plane_throughput/data_plane_throughput.md +++ /dev/null @@ -1,129 +0,0 @@ ---- -title: "Data Plane Throughput" -weight: 1 ---- - -# Data Plane Throughput - -Network data plane throughput is measured using multiple test methods in -order to obtain representative and repeatable results across the large -set of performance test cases implemented and executed within CSIT. - -Following throughput test methods are used: - -- MLRsearch - Multiple Loss Ratio search -- MRR - Maximum Receive Rate -- PLRsearch - Probabilistic Loss Ratio search - -Description of each test method is followed by generic test properties -shared by all methods. - -## MLRsearch Tests - -### Description - -Multiple Loss Ratio search (MLRsearch) tests discover multiple packet -throughput rates in a single search, reducing the overall test execution -time compared to a binary search. Each rate is associated with a -distinct Packet Loss Ratio (PLR) criteria. In FD.io CSIT two throughput -rates are discovered: Non-Drop Rate (NDR, with zero packet loss, PLR=0) -and Partial Drop Rate (PDR, with PLR<0.5%). MLRsearch is compliant with -RFC2544. - -### Usage - -MLRsearch tests are run to discover NDR and PDR rates for each VPP and -DPDK release covered by CSIT report. Results for small frame sizes -(64b/78B, IMIX) are presented in packet throughput graphs -(Box-and-Whisker Plots) with NDR and PDR rates plotted against the test -cases covering popular VPP packet paths. - -Each test is executed at least 10 times to verify measurements -repeatability and results are compared between releases and test -environments. NDR and PDR packet and bandwidth throughput results for -all frame sizes and for all tests are presented in detailed results -tables. - -### Details - -See [MLRSearch]({{< ref "mlrsearch/#MLRsearch" >}}) section for more detail. -MLRsearch is being standardized in IETF in -[draft-ietf-bmwg-mlrsearch](https://datatracker.ietf.org/doc/html/draft-ietf-bmwg-mlrsearch-01). - -## MRR Tests - -### Description - -Maximum Receive Rate (MRR) tests are complementary to MLRsearch tests, -as they provide a maximum “raw” throughput benchmark for development and -testing community. - -MRR tests measure the packet forwarding rate under the maximum load -offered by traffic generator (dependent on link type and NIC model) over -a set trial duration, regardless of packet loss. Maximum load for -specified Ethernet frame size is set to the bi-directional link rate. - -### Usage - -MRR tests are much faster than MLRsearch as they rely on a single trial -or a small set of trials with very short duration. It is this property -that makes them suitable for continuous execution in daily performance -trending jobs enabling detection of performance anomalies (regressions, -progressions) resulting from data plane code changes. - -MRR tests are also used for VPP per patch performance jobs verifying -patch performance vs parent. CSIT reports include MRR throughput -comparisons between releases and test environments. Small frame sizes -only (64b/78B, IMIX). - -### Details - -See [MRR Throughput]({{< ref "mrr_throughput/#MRR Throughput" >}}) -section for more detail about MRR tests configuration. - -FD.io CSIT performance dashboard includes complete description of -[daily performance trending tests](https://s3-docs.fd.io/csit/master/trending/methodology/performance_tests.html) -and [VPP per patch tests](https://s3-docs.fd.io/csit/master/trending/methodology/perpatch_performance_tests.html). - -## PLRsearch Tests - -### Description - -Probabilistic Loss Ratio search (PLRsearch) tests discovers a packet -throughput rate associated with configured Packet Loss Ratio (PLR) -criteria for tests run over an extended period of time a.k.a. soak -testing. PLRsearch assumes that system under test is probabilistic in -nature, and not deterministic. - -### Usage - -PLRsearch are run to discover a sustained throughput for PLR=10^-7 -(close to NDR) for VPP release covered by CSIT report. Results for small -frame sizes (64b/78B) are presented in packet throughput graphs (Box -Plots) for a small subset of baseline tests. - -Each soak test lasts 30 minutes and is executed at least twice. Results are -compared against NDR and PDR rates discovered with MLRsearch. - -### Details - -See [PLRSearch]({{< ref "plrsearch/#PLRsearch" >}}) methodology section for -more detail. PLRsearch is being standardized in IETF in -[draft-vpolak-bmwg-plrsearch](https://tools.ietf.org/html/draft-vpolak-bmwg-plrsearch). - -## Generic Test Properties - -All data plane throughput test methodologies share following generic -properties: - -- Tested L2 frame sizes (untagged Ethernet): - - - IPv4 payload: 64B, IMIX (28x64B, 16x570B, 4x1518B), 1518B, 9000B. - - IPv6 payload: 78B, IMIX (28x78B, 16x570B, 4x1518B), 1518B, 9000B. - - All quoted sizes include frame CRC, but exclude per frame - transmission overhead of 20B (preamble, inter frame gap). - -- Offered packet load is always bi-directional and symmetric. -- All measured and reported packet and bandwidth rates are aggregate - bi-directional rates reported from external Traffic Generator - perspective. \ No newline at end of file diff --git a/docs/content/methodology/data_plane_throughput/mlrsearch.md b/docs/content/methodology/data_plane_throughput/mlrsearch.md deleted file mode 100644 index 73039c9b02..0000000000 --- a/docs/content/methodology/data_plane_throughput/mlrsearch.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -title: "MLRsearch" -weight: 2 ---- - -# MLRsearch - -## Overview - -Multiple Loss Ratio search (MLRsearch) tests use an optimized search algorithm -implemented in FD.io CSIT project. MLRsearch discovers any number of -loss ratio loads in a single search. - -Two loss ratio goals are of interest in FD.io CSIT, leading to Non-Drop Rate -(NDR, loss ratio goal is exact zero) and Partial Drop Rate -(PDR, non-zero loss ratio goal, currently 0.5%). - -MLRsearch discovers all the loads in a single pass, reducing required time -duration compared to separate `binary search`es[^1] for each rate. Overall -search time is reduced even further by relying on shorter trial -durations of intermediate steps, with only the final measurements -conducted at the specified final trial duration. This results in the -shorter overall execution time when compared to standard NDR/PDR binary -search, while guaranteeing similar results. - -.. Note:: All throughput rates are *always* bi-directional - aggregates of two equal (symmetric) uni-directional packet rates - received and reported by an external traffic generator, - unless the test specifically requires unidirectional traffic. - -## Search Implementation - -Detailed description of the MLRsearch algorithm is included in the IETF -draft -[draft-ietf-bmwg-mlrsearch-02](https://datatracker.ietf.org/doc/html/draft-ietf-bmwg-mlrsearch-02) -that is in the process of being standardized in the IETF Benchmarking -Methodology Working Group (BMWG). -(Newer version is published in IETF, describing improvements not yet used -in CSIT production.) - -MLRsearch is also available as a -[PyPI (Python Package Index) library](https://pypi.org/project/MLRsearch/). - -## Algorithm highlights - -MRR and receive rate at MRR load are used as initial guesses for the search. - -All previously measured trials (except the very first one which can act -as a warm-up) are taken into consideration, unless superseded -by a trial at the same load but higher duration. - -For every loss ratio goal, tightest upper and lower bound -(from results of large enough trial duration) form an interval. -Exit condition is given by that interval reaching low enough relative width. -Small enough width is achieved by bisecting the current interval. -The bisection can be uneven, to save measurements based on information theory. - -Switching to higher trial duration generally requires a re-measure -at a load from previous trial duration. -When the re-measurement does not confirm previous bound classification -(e.g. tightest lower bound at shorter trial duration becomes -a newest tightest upper bound upon re-measurement), -external search is used to find close enough bound of the lost type. -External search is a generalization of the first stage of -`exponential search`[^2]. - -Shorter trial durations use double width goal, -because one bisection is always safe before risking external search. - -Within an iteration for a specific trial duration, smaller loss ratios (NDR) -are narrowed down first before search continues with higher loss ratios (PDR). - -Other heuristics are there, aimed to prevent unneccessarily narrow intervals, -and to handle corner cases around min and max load. - -## Deviations from RFC 2544 - -CSIT does not have any explicit wait times before and after trial traffic. - -Small differences between intended and offered load are tolerated, -mainly due to various time overheads preventing precise measurement -of the traffic duration (and TRex can sometimes suffer from duration -stretching). - -The final trial duration is only 30s (10s for reconf tests). - -[^1]: [binary search](https://en.wikipedia.org/wiki/Binary_search) -[^2]: [exponential search](https://en.wikipedia.org/wiki/Exponential_search) diff --git a/docs/content/methodology/data_plane_throughput/mrr_throughput.md b/docs/content/methodology/data_plane_throughput/mrr_throughput.md deleted file mode 100644 index 076946fb66..0000000000 --- a/docs/content/methodology/data_plane_throughput/mrr_throughput.md +++ /dev/null @@ -1,56 +0,0 @@ ---- -title: "MRR Throughput" -weight: 4 ---- - -# MRR Throughput - -Maximum Receive Rate (MRR) tests are complementary to MLRsearch tests, -as they provide a maximum "raw" throughput benchmark for development and -testing community. MRR tests measure the packet forwarding rate under -the maximum load offered by traffic generator over a set trial duration, -regardless of packet loss. - -MRR tests are currently used for following test jobs: - -- Report performance comparison: 64B, IMIX for vhost, memif. -- Daily performance trending: 64B, IMIX for vhost, memif. -- Per-patch performance verification: 64B. -- Initial iterations of MLRsearch and PLRsearch: 64B. - -Maximum offered load for specific L2 Ethernet frame size is set to -either the maximum bi-directional link rate or tested NIC model -capacity, as follows: - -- For 10GE NICs the maximum packet rate load is 2x14.88 Mpps for 64B, a - 10GE bi-directional link rate. -- For 25GE NICs the maximum packet rate load is 2x18.75 Mpps for 64B, a - 25GE bi-directional link sub-rate limited by 25GE NIC used on TRex TG, - XXV710. -- For 40GE NICs the maximum packet rate load is 2x18.75 Mpps for 64B, a - 40GE bi-directional link sub-rate limited by 40GE NIC used on TRex - TG,XL710. Packet rate for other tested frame sizes is limited by - PCIeGen3 x8 bandwidth limitation of ~50Gbps. - -MRR test code implements multiple bursts of offered packet load and has -two configurable burst parameters: individual trial duration and number -of trials in a single burst. This enables more precise performance -trending by providing more results data for analysis. - -Burst parameter settings vary between different tests using MRR: - -- MRR individual trial duration: - - - Report performance comparison: 1 sec. - - Daily performance trending: 1 sec. - - Per-patch performance verification: 10 sec. - - Initial iteration for MLRsearch: 1 sec. - - Initial iteration for PLRsearch: 5.2 sec. - -- Number of MRR trials per burst: - - - Report performance comparison: 10. - - Daily performance trending: 10. - - Per-patch performance verification: 5. - - Initial iteration for MLRsearch: 1. - - Initial iteration for PLRsearch: 1. \ No newline at end of file diff --git a/docs/content/methodology/data_plane_throughput/plrsearch.md b/docs/content/methodology/data_plane_throughput/plrsearch.md deleted file mode 100644 index 1facccc63b..0000000000 --- a/docs/content/methodology/data_plane_throughput/plrsearch.md +++ /dev/null @@ -1,383 +0,0 @@ ---- -title: "PLRsearch" -weight: 3 ---- - -# PLRsearch - -## Motivation for PLRsearch - -Network providers are interested in throughput a system can sustain. - -`RFC 2544`[^3] assumes loss ratio is given by a deterministic function of -offered load. But NFV software systems are not deterministic enough. -This makes deterministic algorithms (such as `binary search`[^9] per RFC 2544 -and MLRsearch with single trial) to return results, -which when repeated show relatively high standard deviation, -thus making it harder to tell what "the throughput" actually is. - -We need another algorithm, which takes this indeterminism into account. - -## Generic Algorithm - -Detailed description of the PLRsearch algorithm is included in the IETF -draft `draft-vpolak-bmwg-plrsearch-02`[^1] that is in the process -of being standardized in the IETF Benchmarking Methodology Working Group (BMWG). - -### Terms - -The rest of this page assumes the reader is familiar with the following terms -defined in the IETF draft: - -+ Trial Order Independent System -+ Duration Independent System -+ Target Loss Ratio -+ Critical Load -+ Offered Load regions - - + Zero Loss Region - + Non-Deterministic Region - + Guaranteed Loss Region - -+ Fitting Function - - + Stretch Function - + Erf Function - -+ Bayesian Inference - - + Prior distribution - + Posterior Distribution - -+ Numeric Integration - - + Monte Carlo - + Importance Sampling - -## FD.io CSIT Implementation Specifics - -The search receives min_rate and max_rate values, to avoid measurements -at offered loads not supporeted by the traffic generator. - -The implemented tests cases use bidirectional traffic. -The algorithm stores each rate as bidirectional rate (internally, -the algorithm is agnostic to flows and directions, -it only cares about aggregate counts of packets sent and packets lost), -but debug output from traffic generator lists unidirectional values. - -### Measurement Delay - -In a sample implemenation in FD.io CSIT project, there is roughly 0.5 -second delay between trials due to restrictons imposed by packet traffic -generator in use (T-Rex). - -As measurements results come in, posterior distribution computation takes -more time (per sample), although there is a considerable constant part -(mostly for inverting the fitting functions). - -Also, the integrator needs a fair amount of samples to reach the region -the posterior distribution is concentrated at. - -And of course, the speed of the integrator depends on computing power -of the CPU the algorithm is able to use. - -All those timing related effects are addressed by arithmetically increasing -trial durations with configurable coefficients -(currently 5.1 seconds for the first trial, -each subsequent trial being 0.1 second longer). - -### Rounding Errors and Underflows - -In order to avoid them, the current implementation tracks natural logarithm -(instead of the original quantity) for any quantity which is never negative. -Logarithm of zero is minus infinity (not supported by Python), -so special value "None" is used instead. -Specific functions for frequent operations (such as "logarithm -of sum of exponentials") are defined to handle None correctly. - -### Fitting Functions - -Current implementation uses two fitting functions, called "stretch" and "erf". -In general, their estimates for critical rate differ, -which adds a simple source of systematic error, -on top of randomness error reported by integrator. -Otherwise the reported stdev of critical rate estimate -is unrealistically low. - -Both functions are not only increasing, but also convex -(meaning the rate of increase is also increasing). - -Both fitting functions have several mathematically equivalent formulas, -each can lead to an arithmetic overflow or underflow in different sub-terms. -Overflows can be eliminated by using different exact formulas -for different argument ranges. -Underflows can be avoided by using approximate formulas -in affected argument ranges, such ranges have their own formulas to compute. -At the end, both fitting function implementations -contain multiple "if" branches, discontinuities are a possibility -at range boundaries. - -### Prior Distributions - -The numeric integrator expects all the parameters to be distributed -(independently and) uniformly on an interval (-1, 1). - -As both "mrr" and "spread" parameters are positive and not dimensionless, -a transformation is needed. Dimentionality is inherited from max_rate value. - -The "mrr" parameter follows a `Lomax distribution`[^4] -with alpha equal to one, but shifted so that mrr is always greater than 1 -packet per second. - -The "stretch" parameter is generated simply as the "mrr" value -raised to a random power between zero and one; -thus it follows a `reciprocal distribution`[^5]. - -### Integrator - -After few measurements, the posterior distribution of fitting function -arguments gets quite concentrated into a small area. -The integrator is using `Monte Carlo`[^6] with `importance sampling`[^7] -where the biased distribution is `bivariate Gaussian`[^8] distribution, -with deliberately larger variance. -If the generated sample falls outside (-1, 1) interval, -another sample is generated. - -The center and the covariance matrix for the biased distribution -is based on the first and second moments of samples seen so far -(within the computation). The center is used directly, -covariance matrix is scaled up by a heurictic constant (8.0 by default). -The following additional features are applied -designed to avoid hyper-focused distributions. - -Each computation starts with the biased distribution inherited -from the previous computation (zero point and unit covariance matrix -is used in the first computation), but the overal weight of the data -is set to the weight of the first sample of the computation. -Also, the center is set to the first sample point. -When additional samples come, their weight (including the importance correction) -is compared to sum of the weights of data seen so far (within the iteration). -If the new sample is more than one e-fold more impactful, both weight values -(for data so far and for the new sample) are set to (geometric) average -of the two weights. - -This combination showed the best behavior, as the integrator usually follows -two phases. First phase (where inherited biased distribution -or single big sample are dominating) is mainly important -for locating the new area the posterior distribution is concentrated at. -The second phase (dominated by whole sample population) -is actually relevant for the critical rate estimation. - -### Offered Load Selection - -First two measurements are hardcoded to happen at the middle of rate interval -and at max_rate. Next two measurements follow MRR-like logic, -offered load is decreased so that it would reach target loss ratio -if offered load decrease lead to equal decrease of loss rate. - -The rest of measurements start directly in between -erf and stretch estimate average. -There is one workaround implemented, aimed at reducing the number of consequent -zero loss measurements (per fitting function). The workaround first stores -every measurement result which loss ratio was the targed loss ratio or higher. -Sorted list (called lossy loads) of such results is maintained. - -When a sequence of one or more zero loss measurement results is encountered, -a smallest of lossy loads is drained from the list. -If the estimate average is smaller than the drained value, -a weighted average of this estimate and the drained value is used -as the next offered load. The weight of the estimate decreases exponentially -with the length of consecutive zero loss results. - -This behavior helps the algorithm with convergence speed, -as it does not need so many zero loss result to get near critical region. -Using the smallest (not drained yet) of lossy loads makes it sure -the new offered load is unlikely to result in big loss region. -Draining even if the estimate is large enough helps to discard -early measurements when loss hapened at too low offered load. -Current implementation adds 4 copies of lossy loads and drains 3 of them, -which leads to fairly stable behavior even for somewhat inconsistent SUTs. - -### Caveats - -As high loss count measurements add many bits of information, -they need a large amount of small loss count measurements to balance them, -making the algorithm converge quite slowly. Typically, this happens -when few initial measurements suggest spread way bigger then later measurements. -The workaround in offered load selection helps, -but more intelligent workarounds could get faster convergence still. - -Some systems evidently do not follow the assumption of repeated measurements -having the same average loss rate (when the offered load is the same). -The idea of estimating the trend is not implemented at all, -as the observed trends have varied characteristics. - -Probably, using a more realistic fitting functions -will give better estimates than trend analysis. - -## Bottom Line - -The notion of Throughput is easy to grasp, but it is harder to measure -with any accuracy for non-deterministic systems. - -Even though the notion of critical rate is harder to grasp than the notion -of throughput, it is easier to measure using probabilistic methods. - -In testing, the difference between througput measurements and critical -rate measurements is usually small. - -In pactice, rules of thumb such as "send at max 95% of purported throughput" -are common. The correct benchmarking analysis should ask "Which notion is -95% of throughput an approximation to?" before attempting to answer -"Is 95% of critical rate safe enough?". - -## Algorithmic Analysis - -### Motivation - -While the estimation computation is based on hard probability science; -the offered load selection part of PLRsearch logic is pure heuristics, -motivated by what would a human do based on measurement and computation results. - -The quality of any heuristic is not affected by soundness of its motivation, -just by its ability to achieve the intended goals. -In case of offered load selection, the goal is to help the search to converge -to the long duration estimates sooner. - -But even those long duration estimates could still be of poor quality. -Even though the estimate computation is Bayesian (so it is the best it could be -within the applied assumptions), it can still of poor quality when compared -to what a human would estimate. - -One possible source of poor quality is the randomnes inherently present -in Monte Carlo numeric integration, but that can be supressed -by tweaking the time related input parameters. - -The most likely source of poor quality then are the assumptions. -Most importantly, the number and the shape of fitting functions; -but also others, such as trial order independence and duration independence. - -The result can have poor quality in basically two ways. -One way is related to location. Both upper and lower bounds -can be overestimates or underestimates, meaning the entire estimated interval -between lower bound and upper bound lays above or below (respectively) -of human-estimated interval. -The other way is related to the estimation interval width. -The interval can be too wide or too narrow, compared to human estimation. - -An estimate from a particular fitting function can be classified -as an overestimate (or underestimate) just by looking at time evolution -(without human examining measurement results). Overestimates -decrease by time, underestimates increase by time (assuming -the system performance stays constant). - -Quality of the width of the estimation interval needs human evaluation, -and is unrelated to both rate of narrowing (both good and bad estimate intervals -get narrower at approximately the same relative rate) and relatative width -(depends heavily on the system being tested). - -### Graphical Examples - -The following pictures show the upper (red) and lower (blue) bound, -as well as average of Stretch (pink) and Erf (light green) estimate, -and offered load chosen (grey), as computed by PLRsearch, -after each trial measurement within the 30 minute duration of a test run. - -Both graphs are focusing on later estimates. Estimates computed from -few initial measurements are wildly off the y-axis range shown. - -The following analysis will rely on frequency of zero loss measurements -and magnitude of loss ratio if nonzero. - -The offered load selection strategy used implies zero loss measurements -can be gleaned from the graph by looking at offered load points. -When the points move up farther from lower estimate, it means -the previous measurement had zero loss. After non-zero loss, -the offered load starts again right between (the previous values of) -the estimate curves. - -The very big loss ratio results are visible as noticeable jumps -of both estimates downwards. Medium and small loss ratios are much harder -to distinguish just by looking at the estimate curves, -the analysis is based on raw loss ratio measurement results. - -The following descriptions should explain why the graphs seem to signal -low quality estimate at first sight, but a more detailed look -reveals the quality is good (considering the measurement results). - -#### L2 patch - -Both fitting functions give similar estimates, the graph shows -"stochasticity" of measurements (estimates increase and decrease -within small time regions), and an overall trend of decreasing estimates. - -On the first look, the final interval looks fairly narrow, -especially compared to the region the estimates have travelled -during the search. But the look at the frequency of zero loss results shows -this is not a case of overestimation. Measurements at around the same -offered load have higher probability of zero loss earlier -(when performed farther from upper bound), but smaller probability later -(when performed closer to upper bound). That means it is the performance -of the system under test that decreases (slightly) over time. - -With that in mind, the apparent narrowness of the interval -is not a sign of low quality, just a consequence of PLRsearch assuming -the performance stays constant. - -{{< figure src="/cdocs/PLR_patch.svg" >}} - -#### Vhost - -This test case shows what looks like a quite broad estimation interval, -compared to other test cases with similarly looking zero loss frequencies. -Notable features are infrequent high-loss measurement results -causing big drops of estimates, and lack of long-term convergence. - -Any convergence in medium-sized intervals (during zero loss results) -is reverted by the big loss results, as they happen quite far -from the critical load estimates, and the two fitting functions -extrapolate differently. - -In other words, human only seeing estimates from one fitting function -would expect narrower end interval, but human seeing the measured loss ratios -agrees that the interval should be wider than that. - -{{< figure src="/cdocs/PLR_vhost.svg" >}} - -#### Summary - -The two graphs show the behavior of PLRsearch algorithm applied to soaking test -when some of PLRsearch assumptions do not hold: - -+ L2 patch measurement results violate the assumption - of performance not changing over time. -+ Vhost measurement results violate the assumption - of Poisson distribution matching the loss counts. - -The reported upper and lower bounds can have distance larger or smaller -than a first look by a human would expect, but a more closer look reveals -the quality is good, considering the circumstances. - -The usefullness of the critical load estimate is of questionable value -when the assumptions are violated. - -Some improvements can be made via more specific workarounds, -for example long term limit of L2 patch performance could be estmated -by some heuristic. - -Other improvements can be achieved only by asking users -whether loss patterns matter. Is it better to have single digit losses -distributed fairly evenly over time (as Poisson distribution would suggest), -or is it better to have short periods of medium losses -mixed with long periods of zero losses (as happens in Vhost test) -with the same overall loss ratio? - -[^1]: [draft-vpolak-bmwg-plrsearch-02](https://tools.ietf.org/html/draft-vpolak-bmwg-plrsearch-02) -[^2]: [plrsearch draft](https://tools.ietf.org/html/draft-vpolak-bmwg-plrsearch-00) -[^3]: [RFC 2544](https://tools.ietf.org/html/rfc2544) -[^4]: [Lomax distribution](https://en.wikipedia.org/wiki/Lomax_distribution) -[^5]: [reciprocal distribution](https://en.wikipedia.org/wiki/Reciprocal_distribution) -[^6]: [Monte Carlo](https://en.wikipedia.org/wiki/Monte_Carlo_integration) -[^7]: [importance sampling](https://en.wikipedia.org/wiki/Importance_sampling) -[^8]: [bivariate Gaussian](https://en.wikipedia.org/wiki/Multivariate_normal_distribution) -[^9]: [binary search](https://en.wikipedia.org/wiki/Binary_search_algorithm) \ No newline at end of file diff --git a/docs/content/methodology/dut_state_considerations.md b/docs/content/methodology/dut_state_considerations.md deleted file mode 100644 index 55e408f5f2..0000000000 --- a/docs/content/methodology/dut_state_considerations.md +++ /dev/null @@ -1,148 +0,0 @@ ---- -title: "DUT state considerations" -weight: 6 ---- - -# DUT state considerations - -This page discusses considerations for Device Under Test (DUT) state. -DUTs such as VPP require configuration, to be provided before the aplication -starts (via config files) or just after it starts (via API or CLI access). - -During operation DUTs gather various telemetry data, depending on configuration. -This internal state handling is part of normal operation, -so any performance impact is included in the test results. -Accessing telemetry data is additional load on DUT, -so we are not doing that in main trial measurements that affect results, -but we include separate trials specifically for gathering runtime telemetry. - -But there is one kind of state that needs specific handling. -This kind of DUT state is dynamically created based on incoming traffic, -it affects how DUT handles the traffic, and (unlike telemetry counters) -it has uneven impact on CPU load. -Typical example is NAT, where detecting new sessions takes more CPU than -forwarding packet on existing (open or recently closed) sessions. -We call DUT configurations with this kind of state "stateful", -and configurations without them "stateless". -(Even though stateless configurations contain state described in previous -paragraphs, and some configuration items may have "stateful" in their name, -such as stateful ACLs.) - -# Stateful DUT configurations - -Typically, the level of CPU impact of traffic depends on DUT state. -The first packets causing DUT state to change have higher impact, -subsequent packets matching that state have lower impact. - -From performance point of view, this is similar to traffic phases -for stateful protocols, see -[NGFW draft](https://tools.ietf.org/html/draft-ietf-bmwg-ngfw-performance-05#section-4.3.4). -In CSIT we borrow the terminology (even if it does not fit perfectly, -see discussion below). Ramp-up traffic causes the state change, -sustain traffic does not change the state. - -As the performance is different, each test has to choose which traffic -it wants to test, and manipulate the DUT state to achieve the intended impact. - -## Ramp-up trial - -Tests aiming at sustain performance need to make sure DUT state is created. -We achieve this via a ramp-up trial, specific purpose of which -is to create the state. - -Subsequent trials need no specific handling, as long as the state -remains the same. But some state can time-out, so additional ramp-up -trials are inserted whenever the code detects the state can time-out. -Note that a trial with zero loss refreshes the state, -so only the time since the last non-zero loss trial is tracked. - -For the state to be set completely, it is important both DUT and TG -do not lose any packets. We achieve this by setting the profile multiplier -(TPS from now on) to low enough value. - -It is also important each state-affecting packet is sent. -For size-limited traffic profile it is guaranteed by the size limit. -For continuous traffic, we set a long enough duration (based on TPS). - -At the end of the ramp-up trial, we check DUT state to confirm -it has been created as expected. -Test fails if the state is not (completely) created. - -## State Reset - -Tests aiming at ramp-up performance do not use ramp-up trial, -and they need to reset the DUT state before each trial measurement. -The way of resetting the state depends on test, -usually an API call is used to partially de-configure -the part that holds the state, and then re-configure it back. - -In CSIT we control the DUT state behavior via a test variable "resetter". -If it is not set, DUT state is not reset. -If it is set, each search algorithm (including MRR) will invoke it -before all trial measurements (both main and telemetry ones). -Any configuration keyword enabling a feature with DUT state -will check whether a test variable for ramp-up rate is present. -If it is present, resetter is not set. -If it is not present, the keyword sets the apropriate resetter value. -This logic makes sure either ramp-up or state reset are used. - -Notes: If both ramp-up and state reset were used, the DUT behavior -would be identical to just reset, while test would take longer to execute. -If neither were used, DUT will show different performance in subsequent trials, -violating assumptions of search algorithms. - -## DUT versus protocol ramp-up - -There are at least three different causes for bandwidth possibly increasing -within a single measurement trial. - -The first is DUT switching from state modification phase to constant phase, -it is the primary focus of this document. -Using ramp-up traffic before main trials eliminates this cause -for tests wishing to measure the performance of the next phase. -Using size-limited profiles eliminates the next phase -for tests wishing to measure performance of this phase. - -The second is protocol such as TCP ramping up their throughput to utilize -the bandwidth available. This is the original meaning of "ramp up" -in the NGFW draft (see above). -In existing tests we are not using this meaning of TCP ramp-up. -Instead we use only small transactions, and large enough initial window -so TCP acts as ramped-up already. - -The third is TCP increasing offered load due to retransmissions triggered by -packet loss. In CSIT we again try to avoid this behavior -by using small enough data to transfer, so overlap of multiple transactions -(primary cause of packet loss) is unlikely. -But in MRR tests, packet loss and non-constant offered load are still expected. - -# Stateless DUT configuratons - -These are simple configurations, which do not set any resetter value -(even if ramp-up duration is not configured). -Majority of existing tests are of this type, using continuous traffic profiles. - -In order to identify limits of Trex performance, -we have added suites with stateless DUT configuration (VPP ip4base) -subjected to size-limited ASTF traffic. -The discovered rates serve as a basis of comparison -for evaluating the results for stateful DUT configurations (VPP NAT44ed) -subjected to the same traffic profiles. - -# DUT versus TG state - -Traffic Generator profiles can be stateful (ASTF) or stateless (STL). -DUT configuration can be stateful or stateless (with respect to packet traffic). - -In CSIT we currently use all four possible configurations: - -- Regular stateless VPP tests use stateless traffic profiles. - -- Stateless VPP configuration with stateful profile is used as a base for - comparison. - -- Some stateful DUT configurations (NAT44DET, NAT44ED unidirectional) - are tested using stateless traffic profiles and continuous traffic. - -- The rest of stateful DUT configurations (NAT44ED bidirectional) - are tested using stateful traffic profiles and size limited traffic. diff --git a/docs/content/methodology/generic_segmentation_offload.md b/docs/content/methodology/generic_segmentation_offload.md deleted file mode 100644 index ddb19ba826..0000000000 --- a/docs/content/methodology/generic_segmentation_offload.md +++ /dev/null @@ -1,116 +0,0 @@ ---- -title: "Generic Segmentation Offload" -weight: 15 ---- - -# Generic Segmentation Offload - -## Overview - -Generic Segmentation Offload (GSO) reduces per-packet processing -overhead by enabling applications to pass a multi-packet buffer to -(v)NIC and process a smaller number of large packets (e.g. frame size of -64 KB), instead of processing higher numbers of small packets (e.g. -frame size of 1500 B), thus reducing per-packet overhead. - -GSO tests for VPP vhostuser and tapv2 interfaces. All tests cases use iPerf3 -client and server applications running TCP/IP as a traffic generator. For -performance comparison the same tests are run without GSO enabled. - -## GSO Test Topologies - -Two VPP GSO test topologies are implemented: - -1. iPerfC_GSOvirtio_LinuxVM --- GSOvhost_VPP_GSOvhost --- iPerfS_GSOvirtio_LinuxVM - - - Tests VPP GSO on vhostuser interfaces and interaction with Linux - virtio with GSO enabled. - -2. iPerfC_GSOtap_LinuxNspace --- GSOtapv2_VPP_GSOtapv2 --- iPerfS_GSOtap_LinuxNspace - - - Tests VPP GSO on tapv2 interfaces and interaction with Linux tap - with GSO enabled. - -Common configuration: - -- iPerfC (client) and iPerfS (server) run in TCP/IP mode without upper - bandwidth limit. -- Trial duration is set to 30 sec. -- iPerfC, iPerfS and VPP run in the single SUT node. - - -## VPP GSOtap Topology - -### VPP Configuration - -VPP GSOtap tests are executed without using hyperthreading. VPP worker runs on -a single core. Multi-core tests are not executed. Each interface belongs to -separate namespace. Following core pinning scheme is used: - -- 1t1c (rxq=1, rx_qsz=4096, tx_qsz=4096) - - system isolated: 0,28,56,84 - - vpp mt: 1 - - vpp wt: 2 - - vhost: 3-5 - - iperf-s: 6 - - iperf-c: 7 - -### iPerf3 Server Configuration - -iPerf3 version used 3.7 - - $ sudo -E -S ip netns exec tap1_namespace iperf3 \ - --server --daemon --pidfile /tmp/iperf3_server.pid --logfile /tmp/iperf3.log --port 5201 --affinity - -For the full iPerf3 reference please see: -[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst). - - -### iPerf3 Client Configuration - -iPerf3 version used 3.7 - - $ sudo -E -S ip netns exec tap1_namespace iperf3 \ - --client 2.2.2.2 --bind 1.1.1.1 --port 5201 --parallel --time 30.0 --affinity --zerocopy - -For the full iPerf3 reference please see: -[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst). - - -## VPP GSOvhost Topology - -### VPP Configuration - -VPP GSOvhost tests are executed without using hyperthreading. VPP worker runs -on a single core. Multi-core tests are not executed. Following core pinning -scheme is used: - -- 1t1c (rxq=1, rx_qsz=1024, tx_qsz=1024) - - system isolated: 0,28,56,84 - - vpp mt: 1 - - vpp wt: 2 - - vm-iperf-s: 3,4,5,6,7 - - vm-iperf-c: 8,9,10,11,12 - - iperf-s: 1 - - iperf-c: 1 - -### iPerf3 Server Configuration - -iPerf3 version used 3.7 - - $ sudo iperf3 \ - --server --daemon --pidfile /tmp/iperf3_server.pid --logfile /tmp/iperf3.log --port 5201 --affinity X - -For the full iPerf3 reference please see: -[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst). - - -### iPerf3 Client Configuration - -iPerf3 version used 3.7 - - $ sudo iperf3 \ - --client 2.2.2.2 --bind 1.1.1.1 --port 5201 --parallel --time 30.0 --affinity X --zerocopy - -For the full iPerf3 reference please see: -[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst). \ No newline at end of file diff --git a/docs/content/methodology/geneve.md b/docs/content/methodology/geneve.md deleted file mode 100644 index f4a0af92e7..0000000000 --- a/docs/content/methodology/geneve.md +++ /dev/null @@ -1,66 +0,0 @@ ---- -title: "GENEVE" -weight: 21 ---- - -# GENEVE - -## GENEVE Prefix Bindings - -GENEVE prefix bindings should be representative to target applications, where -a packet flows of particular set of IPv4 addresses (L3 underlay network) is -routed via dedicated GENEVE interface by building an L2 overlay. - -Private address ranges to be used in tests: - -- East hosts ip address range: 10.0.1.0 - 10.127.255.255 (10.0/9 prefix) - - - Total of 2^23 - 256 (8 388 352) of usable IPv4 addresses - - Usable in tests for up to 32 767 GENEVE tunnels (IPv4 underlay networks) - -- West hosts ip address range: 10.128.1.0 - 10.255.255.255 (10.128/9 prefix) - - - Total of 2^23 - 256 (8 388 352) of usable IPv4 addresses - - Usable in tests for up to 32 767 GENEVE tunnels (IPv4 underlay networks) - -## GENEVE Tunnel Scale - -If N is a number of GENEVE tunnels (and IPv4 underlay networks) then TG sends -256 packet flows in every of N different sets: - -- i = 1,2,3, ... N - GENEVE tunnel index - -- East-West direction: GENEVE encapsulated packets - - - Outer IP header: - - - src ip: 1.1.1.1 - - - dst ip: 1.1.1.2 - - - GENEVE header: - - - vni: i - - - Inner IP header: - - - src_ip_range(i) = 10.(0 + rounddown(i/255)).(modulo(i/255)).(0-to-255) - - - dst_ip_range(i) = 10.(128 + rounddown(i/255)).(modulo(i/255)).(0-to-255) - -- West-East direction: non-encapsulated packets - - - IP header: - - - src_ip_range(i) = 10.(128 + rounddown(i/255)).(modulo(i/255)).(0-to-255) - - - dst_ip_range(i) = 10.(0 + rounddown(i/255)).(modulo(i/255)).(0-to-255) - - **geneve-tunnels** | **total-flows** --------------------:|----------------: - 1 | 256 - 4 | 1 024 - 16 | 4 096 - 64 | 16 384 - 256 | 65 536 - 1 024 | 262 144 \ No newline at end of file diff --git a/docs/content/methodology/hoststack_testing/_index.md b/docs/content/methodology/hoststack_testing/_index.md deleted file mode 100644 index b658313040..0000000000 --- a/docs/content/methodology/hoststack_testing/_index.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -bookCollapseSection: true -bookFlatSection: false -title: "Hoststack Testing" -weight: 14 ---- \ No newline at end of file diff --git a/docs/content/methodology/hoststack_testing/quicudpip_with_vppecho.md b/docs/content/methodology/hoststack_testing/quicudpip_with_vppecho.md deleted file mode 100644 index c7d57a51b3..0000000000 --- a/docs/content/methodology/hoststack_testing/quicudpip_with_vppecho.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -title: "QUIC/UDP/IP with vpp_echo" -weight: 1 ---- - -# QUIC/UDP/IP with vpp_echo - -[vpp_echo performance testing tool](https://wiki.fd.io/view/VPP/HostStack#External_Echo_Server.2FClient_.28vpp_echo.29) -is a bespoke performance test application which utilizes the 'native -HostStack APIs' to verify performance and correct handling of -connection/stream events with uni-directional and bi-directional -streams of data. - -Because iperf3 does not support the QUIC transport protocol, vpp_echo -is used for measuring the maximum attainable goodput of the VPP Host -Stack connection utilizing the QUIC transport protocol across two -instances of VPP running on separate DUT nodes. The QUIC transport -protocol supports multiple streams per connection and test cases -utilize different combinations of QUIC connections and number of -streams per connection. - -The test configuration is as follows: - - DUT1 Network DUT2 - [ vpp_echo-client -> VPP1 ]=======[ VPP2 -> vpp_echo-server] - N-streams/connection - -where, - -1. vpp_echo server attaches to VPP2 and LISTENs on VPP2:TCP port 1234. -2. vpp_echo client creates one or more connections to VPP1 and opens - one or more stream per connection to VPP2:TCP port 1234. -3. vpp_echo client transmits a uni-directional stream as fast as the - VPP Host Stack allows to the vpp_echo server for the test duration. -4. At the end of the test the vpp_echo client emits the goodput - measurements for all streams and the sum of all streams. - -Test cases include - -1. 1 QUIC Connection with 1 Stream -2. 1 QUIC connection with 10 Streams -3. 10 QUIC connetions with 1 Stream -4. 10 QUIC connections with 10 Streams - -with stream sizes to provide reasonable test durations. The VPP Host -Stack QUIC transport is configured to utilize the picotls encryption -library. In the future, tests utilizing addtional encryption -algorithms will be added. diff --git a/docs/content/methodology/hoststack_testing/tcpip_with_iperf3.md b/docs/content/methodology/hoststack_testing/tcpip_with_iperf3.md deleted file mode 100644 index 7baa88ab50..0000000000 --- a/docs/content/methodology/hoststack_testing/tcpip_with_iperf3.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -title: "TCP/IP with iperf3" -weight: 2 ---- - -# TCP/IP with iperf3 - -[iperf3 goodput measurement tool](https://github.com/esnet/iperf) -is used for measuring the maximum attainable goodput of the VPP Host -Stack connection across two instances of VPP running on separate DUT -nodes. iperf3 is a popular open source tool for active measurements -of the maximum achievable goodput on IP networks. - -Because iperf3 utilizes the POSIX socket interface APIs, the current -test configuration utilizes the LD_PRELOAD mechanism in the linux -kernel to connect iperf3 to the VPP Host Stack using the VPP -Communications Library (VCL) LD_PRELOAD library (libvcl_ldpreload.so). - -In the future, a forked version of iperf3 which has been modified to -directly use the VCL application APIs may be added to determine the -difference in performance of 'VCL Native' applications versus utilizing -LD_PRELOAD which inherently has more overhead and other limitations. - -The test configuration is as follows: - - DUT1 Network DUT2 - [ iperf3-client -> VPP1 ]=======[ VPP2 -> iperf3-server] - -where, - -1. iperf3 server attaches to VPP2 and LISTENs on VPP2:TCP port 5201. -2. iperf3 client attaches to VPP1 and opens one or more stream - connections to VPP2:TCP port 5201. -3. iperf3 client transmits a uni-directional stream as fast as the - VPP Host Stack allows to the iperf3 server for the test duration. -4. At the end of the test the iperf3 client emits the goodput - measurements for all streams and the sum of all streams. - -Test cases include 1 and 10 Streams with a 20 second test duration -with the VPP Host Stack configured to utilize the Cubic TCP -congestion algorithm. - -Note: iperf3 is single threaded, so it is expected that the 10 stream -test shows little or no performance improvement due to -multi-thread/multi-core execution. - -There are also variations of these test cases which use the VPP Network -Simulator (NSIM) plugin to test the VPP Hoststack goodput with 1 percent -of the traffic being dropped at the output interface of VPP1 thereby -simulating a lossy network. The NSIM tests are experimental and the -test results are not currently representative of typical results in a -lossy network. diff --git a/docs/content/methodology/hoststack_testing/udpip_with_iperf3.md b/docs/content/methodology/hoststack_testing/udpip_with_iperf3.md deleted file mode 100644 index 01ddf61269..0000000000 --- a/docs/content/methodology/hoststack_testing/udpip_with_iperf3.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -title: "UDP/IP with iperf3" -weight: 3 ---- - -# UDP/IP with iperf3 - -[iperf3 goodput measurement tool](https://github.com/esnet/iperf) -is used for measuring the maximum attainable goodput of the VPP Host -Stack connection across two instances of VPP running on separate DUT -nodes. iperf3 is a popular open source tool for active measurements -of the maximum achievable goodput on IP networks. - -Because iperf3 utilizes the POSIX socket interface APIs, the current -test configuration utilizes the LD_PRELOAD mechanism in the linux -kernel to connect iperf3 to the VPP Host Stack using the VPP -Communications Library (VCL) LD_PRELOAD library (libvcl_ldpreload.so). - -In the future, a forked version of iperf3 which has been modified to -directly use the VCL application APIs may be added to determine the -difference in performance of 'VCL Native' applications versus utilizing -LD_PRELOAD which inherently has more overhead and other limitations. - -The test configuration is as follows: - - DUT1 Network DUT2 - [ iperf3-client -> VPP1 ]=======[ VPP2 -> iperf3-server] - -where, - -1. iperf3 server attaches to VPP2 and LISTENs on VPP2:UDP port 5201. -2. iperf3 client attaches to VPP1 and transmits one or more streams - of packets to VPP2:UDP port 5201. -3. iperf3 client transmits a uni-directional stream as fast as the - VPP Host Stack allows to the iperf3 server for the test duration. -4. At the end of the test the iperf3 client emits the goodput - measurements for all streams and the sum of all streams. - -Test cases include 1 and 10 Streams with a 20 second test duration -with the VPP Host Stack using the UDP transport layer.. - -Note: iperf3 is single threaded, so it is expected that the 10 stream -test shows little or no performance improvement due to -multi-thread/multi-core execution. diff --git a/docs/content/methodology/hoststack_testing/vsap_ab_with_nginx.md b/docs/content/methodology/hoststack_testing/vsap_ab_with_nginx.md deleted file mode 100644 index 2dc4d2b7f9..0000000000 --- a/docs/content/methodology/hoststack_testing/vsap_ab_with_nginx.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "VSAP ab with nginx" -weight: 4 ---- - -# VSAP ab with nginx - -[VSAP (VPP Stack Acceleration Project)](https://wiki.fd.io/view/VSAP) -aims to establish an industry user space application ecosystem based on -the VPP hoststack. As a pre-requisite to adapting open source applications -using VPP Communications Library to accelerate performance, the VSAP team -has introduced baseline tests utilizing the LD_PRELOAD mechanism to capture -baseline performance data. - -[AB (Apache HTTP server benchmarking tool)](https://httpd.apache.org/docs/2.4/programs/ab.html) -is used for measuring the maximum connections-per-second and requests-per-second. - -[NGINX](https://www.nginx.com) is a popular open source HTTP server -application. Because NGINX utilizes the POSIX socket interface APIs, the test -configuration uses the LD_PRELOAD mechanism to connect NGINX to the VPP -Hoststack using the VPP Communications Library (VCL) LD_PRELOAD library -(libvcl_ldpreload.so). - -In the future, a version of NGINX which has been modified to -directly use the VCL application APIs will be added to determine the -difference in performance of 'VCL Native' applications versus utilizing -LD_PRELOAD which inherently has more overhead and other limitations. - -The test configuration is as follows: - - TG Network DUT - [ AB ]=============[ VPP -> nginx ] - -where, - -1. nginx attaches to VPP and listens on TCP port 80 -2. ab runs CPS and RPS tests with packets flowing from the Test Generator node, - across 100G NICs, through VPP hoststack to NGINX. -3. At the end of the tests, the results are reported by AB. diff --git a/docs/content/methodology/internet_protocol_security_ipsec.md b/docs/content/methodology/internet_protocol_security_ipsec.md deleted file mode 100644 index 711004f2c0..0000000000 --- a/docs/content/methodology/internet_protocol_security_ipsec.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -title: "Internet Protocol Security (IPsec)" -weight: 11 ---- - -# Internet Protocol Security (IPsec) - -VPP IPsec performance tests are executed for the following crypto -plugins: - -- `crypto_native`, used for software based crypto leveraging CPU - platform optimizations e.g. Intel's AES-NI instruction set. -- `crypto_ipsecmb`, used for hardware based crypto with Intel QAT PCIe - cards. - -## IPsec with VPP Native SW Crypto - -CSIT implements following IPsec test cases relying on VPP native crypto -(`crypto_native` plugin): - - **VPP Crypto Engine** | **ESP Encryption** | **ESP Integrity** | **Scale Tested** -----------------------:|-------------------:|------------------:|-----------------: - crypto_native | AES[128\|256]-GCM | GCM | 1 to 60k tunnels - crypto_native | AES128-CBC | SHA[256\|512] | 1 to 60k tunnels - -VPP IPsec with SW crypto are executed in both tunnel and policy modes, -with tests running on 3-node testbeds: 3n-icx, 3n-tsh. - -## IPsec with Intel QAT HW - -CSIT implements following IPsec test cases relying on ipsecmb library -(`crypto_ipsecmb` plugin) and Intel QAT 8950 (50G HW crypto card): - -dpdk_cryptodev - - **VPP Crypto Engine** | **VPP Crypto Workers** | **ESP Encryption** | **ESP Integrity** | **Scale Tested** -----------------------:|-----------------------:|-------------------:|------------------:|-----------------: - crypto_ipsecmb | sync/all workers | AES[128\|256]-GCM | GCM | 1, 1k tunnels - crypto_ipsecmb | sync/all workers | AES[128]-CBC | SHA[256\|512] | 1, 1k tunnels - crypto_ipsecmb | async/crypto worker | AES[128\|256]-GCM | GCM | 1, 4, 1k tunnels - crypto_ipsecmb | async/crypto worker | AES[128]-CBC | SHA[256\|512] | 1, 4, 1k tunnels - -## IPsec with Async Crypto Feature Workers - -*TODO Description to be added* - -## IPsec Uni-Directional Tests with VPP Native SW Crypto - -CSIT implements following IPsec uni-directional test cases relying on VPP native -crypto (`crypto_native` plugin) in tunnel mode: - - **VPP Crypto Engine** | **ESP Encryption** | **ESP Integrity** | **Scale Tested** -----------------------:|-------------------:|------------------:|-------------------: - crypto_native | AES[128\|256]-GCM | GCM | 4, 1k, 10k tunnels - crypto_native | AES128-CBC | SHA[512] | 4, 1k, 10k tunnels - -In policy mode: - - **VPP Crypto Engine** | **ESP Encryption** | **ESP Integrity** | **Scale Tested** -----------------------:|-------------------:|------------------:|------------------: - crypto_native | AES[256]-GCM | GCM | 1, 40, 1k tunnels - -The tests are running on 2-node testbeds: 2n-tx2. The uni-directional tests -are partially addressing a weakness in 2-node testbed setups with T-Rex as -the traffic generator. With just one DUT node, we can either encrypt or decrypt -traffic in each direction. - -The testcases are only doing encryption - packets are encrypted on the DUT and -then arrive at TG where no additional packet processing is needed (just -counting packets). - -Decryption would require that the traffic generator generated encrypted packets -which the DUT then would decrypt. However, T-Rex does not have the capability -to encrypt packets. diff --git a/docs/content/methodology/measurements/_index.md b/docs/content/methodology/measurements/_index.md new file mode 100644 index 0000000000..9e9232969e --- /dev/null +++ b/docs/content/methodology/measurements/_index.md @@ -0,0 +1,6 @@ +--- +bookCollapseSection: true +bookFlatSection: false +title: "Measurements" +weight: 2 +--- diff --git a/docs/content/methodology/measurements/data_plane_throughput/_index.md b/docs/content/methodology/measurements/data_plane_throughput/_index.md new file mode 100644 index 0000000000..8fc7f66f3e --- /dev/null +++ b/docs/content/methodology/measurements/data_plane_throughput/_index.md @@ -0,0 +1,6 @@ +--- +bookCollapseSection: true +bookFlatSection: false +title: "Data Plane Throughput" +weight: 1 +--- \ No newline at end of file diff --git a/docs/content/methodology/measurements/data_plane_throughput/data_plane_throughput.md b/docs/content/methodology/measurements/data_plane_throughput/data_plane_throughput.md new file mode 100644 index 0000000000..865405ba2f --- /dev/null +++ b/docs/content/methodology/measurements/data_plane_throughput/data_plane_throughput.md @@ -0,0 +1,129 @@ +--- +title: "Overview" +weight: 1 +--- + +# Data Plane Throughput + +Network data plane throughput is measured using multiple test methods in +order to obtain representative and repeatable results across the large +set of performance test cases implemented and executed within CSIT. + +Following throughput test methods are used: + +- MLRsearch - Multiple Loss Ratio search +- PLRsearch - Probabilistic Loss Ratio search +- MRR - Maximum Receive Rate + +Description of each test method is followed by generic test properties +shared by all methods. + +## MLRsearch Tests + +### Description + +Multiple Loss Ratio search (MLRsearch) tests discover multiple packet +throughput rates in a single search, reducing the overall test execution +time compared to a binary search. Each rate is associated with a +distinct Packet Loss Ratio (PLR) criteria. In FD.io CSIT two throughput +rates are discovered: Non-Drop Rate (NDR, with zero packet loss, PLR=0) +and Partial Drop Rate (PDR, with PLR<0.5%). MLRsearch is compliant with +RFC2544. + +### Usage + +MLRsearch tests are run to discover NDR and PDR rates for each VPP and +DPDK release covered by CSIT report. Results for small frame sizes +(64B/78B, IMIX) are presented in packet throughput graphs +(Box-and-Whisker Plots) with NDR and PDR rates plotted against the test +cases covering popular VPP packet paths. + +Each test is executed at least 10 times to verify measurements +repeatability and results are compared between releases and test +environments. NDR and PDR packet and bandwidth throughput results for +all frame sizes and for all tests are presented in detailed results +tables. + +### Details + +See [MLRSearch]({{< ref "mlr_search/#MLRsearch" >}}) section for more detail. +MLRsearch is being standardized in IETF in +[draft-ietf-bmwg-mlrsearch](https://datatracker.ietf.org/doc/html/draft-ietf-bmwg-mlrsearch-01). + +## PLRsearch Tests + +### Description + +Probabilistic Loss Ratio search (PLRsearch) tests discovers a packet +throughput rate associated with configured Packet Loss Ratio (PLR) +criteria for tests run over an extended period of time a.k.a. soak +testing. PLRsearch assumes that system under test is probabilistic in +nature, and not deterministic. + +### Usage + +PLRsearch are run to discover a sustained throughput for PLR=10^-7^ +(close to NDR) for VPP release covered by CSIT report. Results for small +frame sizes (64B/78B) are presented in packet throughput graphs (Box +Plots) for a small subset of baseline tests. + +Each soak test lasts 30 minutes and is executed at least twice. Results are +compared against NDR and PDR rates discovered with MLRsearch. + +### Details + +See [PLRSearch]({{< ref "plr_search/#PLRsearch" >}}) methodology section for +more detail. PLRsearch is being standardized in IETF in +[draft-vpolak-bmwg-plrsearch](https://tools.ietf.org/html/draft-vpolak-bmwg-plrsearch). + +## MRR Tests + +### Description + +Maximum Receive Rate (MRR) tests are complementary to MLRsearch tests, +as they provide a maximum “raw” throughput benchmark for development and +testing community. + +MRR tests measure the packet forwarding rate under the maximum load +offered by traffic generator (dependent on link type and NIC model) over +a set trial duration, regardless of packet loss. Maximum load for +specified Ethernet frame size is set to the bi-directional link rate. + +### Usage + +MRR tests are much faster than MLRsearch as they rely on a single trial +or a small set of trials with very short duration. It is this property +that makes them suitable for continuous execution in daily performance +trending jobs enabling detection of performance anomalies (regressions, +progressions) resulting from data plane code changes. + +MRR tests are also used for VPP per patch performance jobs verifying +patch performance vs parent. CSIT reports include MRR throughput +comparisons between releases and test environments. Small frame sizes +only (64B/78B, IMIX). + +### Details + +See [MRR Throughput]({{< ref "mrr/#MRR" >}}) +section for more detail about MRR tests configuration. + +FD.io CSIT performance dashboard includes complete description of +[daily performance trending tests]({{< ref "../../trending/analysis" >}}) +and [VPP per patch tests]({{< ref "../../per_patch_testing.md" >}}). + +## Generic Test Properties + +All data plane throughput test methodologies share following generic +properties: + +- Tested L2 frame sizes (untagged Ethernet): + + - IPv4 payload: 64B, IMIX (28x64B, 16x570B, 4x1518B), 1518B, 9000B. + - IPv6 payload: 78B, IMIX (28x78B, 16x570B, 4x1518B), 1518B, 9000B. + - All quoted sizes include frame CRC, but exclude per frame + transmission overhead of 20B (preamble, inter frame gap). + +- Offered packet load is always bi-directional and symmetric. +- All measured and reported packet and bandwidth rates are aggregate + bi-directional rates reported from external Traffic Generator + perspective. diff --git a/docs/content/methodology/measurements/data_plane_throughput/mlr_search.md b/docs/content/methodology/measurements/data_plane_throughput/mlr_search.md new file mode 100644 index 0000000000..93bdb51efe --- /dev/null +++ b/docs/content/methodology/measurements/data_plane_throughput/mlr_search.md @@ -0,0 +1,88 @@ +--- +title: "MLR Search" +weight: 2 +--- + +# MLR Search + +## Overview + +Multiple Loss Ratio search (MLRsearch) tests use an optimized search algorithm +implemented in FD.io CSIT project. MLRsearch discovers any number of +loss ratio loads in a single search. + +Two loss ratio goals are of interest in FD.io CSIT, leading to Non-Drop Rate +(NDR, loss ratio goal is exact zero) and Partial Drop Rate +(PDR, non-zero loss ratio goal, currently 0.5%). + +MLRsearch discovers all the loads in a single pass, reducing required time +duration compared to separate `binary search`es[^1] for each rate. Overall +search time is reduced even further by relying on shorter trial +durations of intermediate steps, with only the final measurements +conducted at the specified final trial duration. This results in the +shorter overall execution time when compared to standard NDR/PDR binary +search, while guaranteeing similar results. + + Note: All throughput rates are *always* bi-directional aggregates of two + equal (symmetric) uni-directional packet rates received and reported by an + external traffic generator, unless the test specifically requires + unidirectional traffic. + +## Search Implementation + +Detailed description of the MLRsearch algorithm is included in the IETF +draft +[draft-ietf-bmwg-mlrsearch-02](https://datatracker.ietf.org/doc/html/draft-ietf-bmwg-mlrsearch-02) +that is in the process of being standardized in the IETF Benchmarking +Methodology Working Group (BMWG). +(Newer version is published in IETF, describing improvements not yet used +in CSIT production.) + +MLRsearch is also available as a +[PyPI (Python Package Index) library](https://pypi.org/project/MLRsearch/). + +## Algorithm highlights + +MRR and receive rate at MRR load are used as initial guesses for the search. + +All previously measured trials (except the very first one which can act +as a warm-up) are taken into consideration, unless superseded +by a trial at the same load but higher duration. + +For every loss ratio goal, tightest upper and lower bound +(from results of large enough trial duration) form an interval. +Exit condition is given by that interval reaching low enough relative width. +Small enough width is achieved by bisecting the current interval. +The bisection can be uneven, to save measurements based on information theory. + +Switching to higher trial duration generally requires a re-measure +at a load from previous trial duration. +When the re-measurement does not confirm previous bound classification +(e.g. tightest lower bound at shorter trial duration becomes +a newest tightest upper bound upon re-measurement), +external search is used to find close enough bound of the lost type. +External search is a generalization of the first stage of +`exponential search`[^2]. + +Shorter trial durations use double width goal, +because one bisection is always safe before risking external search. + +Within an iteration for a specific trial duration, smaller loss ratios (NDR) +are narrowed down first before search continues with higher loss ratios (PDR). + +Other heuristics are there, aimed to prevent unneccessarily narrow intervals, +and to handle corner cases around min and max load. + +## Deviations from RFC 2544 + +CSIT does not have any explicit wait times before and after trial traffic. + +Small differences between intended and offered load are tolerated, +mainly due to various time overheads preventing precise measurement +of the traffic duration (and TRex can sometimes suffer from duration +stretching). + +The final trial duration is only 30s (10s for reconf tests). + +[^1]: [binary search](https://en.wikipedia.org/wiki/Binary_search) +[^2]: [exponential search](https://en.wikipedia.org/wiki/Exponential_search) diff --git a/docs/content/methodology/measurements/data_plane_throughput/mrr.md b/docs/content/methodology/measurements/data_plane_throughput/mrr.md new file mode 100644 index 0000000000..e8c3e62eb6 --- /dev/null +++ b/docs/content/methodology/measurements/data_plane_throughput/mrr.md @@ -0,0 +1,56 @@ +--- +title: "MRR" +weight: 4 +--- + +# MRR + +Maximum Receive Rate (MRR) tests are complementary to MLRsearch tests, +as they provide a maximum "raw" throughput benchmark for development and +testing community. MRR tests measure the packet forwarding rate under +the maximum load offered by traffic generator over a set trial duration, +regardless of packet loss. + +MRR tests are currently used for following test jobs: + +- Report performance comparison: 64B, IMIX for vhost, memif. +- Daily performance trending: 64B, IMIX for vhost, memif. +- Per-patch performance verification: 64B. +- Initial iterations of MLRsearch and PLRsearch: 64B. + +Maximum offered load for specific L2 Ethernet frame size is set to +either the maximum bi-directional link rate or tested NIC model +capacity, as follows: + +- For 10GE NICs the maximum packet rate load is 2x14.88 Mpps for 64B, a + 10GE bi-directional link rate. +- For 25GE NICs the maximum packet rate load is 2x18.75 Mpps for 64B, a + 25GE bi-directional link sub-rate limited by 25GE NIC used on TRex TG, + XXV710. +- For 40GE NICs the maximum packet rate load is 2x18.75 Mpps for 64B, a + 40GE bi-directional link sub-rate limited by 40GE NIC used on TRex + TG, XL710. Packet rate for other tested frame sizes is limited by + PCIeGen3 x8 bandwidth limitation of ~50Gbps. + +MRR test code implements multiple bursts of offered packet load and has +two configurable burst parameters: individual trial duration and number +of trials in a single burst. This enables more precise performance +trending by providing more results data for analysis. + +Burst parameter settings vary between different tests using MRR: + +- MRR individual trial duration: + + - Report performance comparison: 1 sec. + - Daily performance trending: 1 sec. + - Per-patch performance verification: 10 sec. + - Initial iteration for MLRsearch: 1 sec. + - Initial iteration for PLRsearch: 5.2 sec. + +- Number of MRR trials per burst: + + - Report performance comparison: 10. + - Daily performance trending: 10. + - Per-patch performance verification: 5. + - Initial iteration for MLRsearch: 1. + - Initial iteration for PLRsearch: 1. diff --git a/docs/content/methodology/measurements/data_plane_throughput/plr_search.md b/docs/content/methodology/measurements/data_plane_throughput/plr_search.md new file mode 100644 index 0000000000..529bac1f7f --- /dev/null +++ b/docs/content/methodology/measurements/data_plane_throughput/plr_search.md @@ -0,0 +1,383 @@ +--- +title: "PLR Search" +weight: 3 +--- + +# PLR Search + +## Motivation for PLRsearch + +Network providers are interested in throughput a system can sustain. + +`RFC 2544`[^1] assumes loss ratio is given by a deterministic function of +offered load. But NFV software systems are not deterministic enough. +This makes deterministic algorithms (such as `binary search`[^2] per RFC 2544 +and MLRsearch with single trial) to return results, +which when repeated show relatively high standard deviation, +thus making it harder to tell what "the throughput" actually is. + +We need another algorithm, which takes this indeterminism into account. + +## Generic Algorithm + +Detailed description of the PLRsearch algorithm is included in the IETF +draft `Probabilistic Loss Ratio Search for Packet Throughput`[^3] that is in the +process of being standardized in the IETF Benchmarking Methodology Working Group +(BMWG). + +### Terms + +The rest of this page assumes the reader is familiar with the following terms +defined in the IETF draft: + ++ Trial Order Independent System ++ Duration Independent System ++ Target Loss Ratio ++ Critical Load ++ Offered Load regions + + + Zero Loss Region + + Non-Deterministic Region + + Guaranteed Loss Region + ++ Fitting Function + + + Stretch Function + + Erf Function + ++ Bayesian Inference + + + Prior distribution + + Posterior Distribution + ++ Numeric Integration + + + Monte Carlo + + Importance Sampling + +## FD.io CSIT Implementation Specifics + +The search receives min_rate and max_rate values, to avoid measurements +at offered loads not supporeted by the traffic generator. + +The implemented tests cases use bidirectional traffic. +The algorithm stores each rate as bidirectional rate (internally, +the algorithm is agnostic to flows and directions, +it only cares about aggregate counts of packets sent and packets lost), +but debug output from traffic generator lists unidirectional values. + +### Measurement Delay + +In a sample implemenation in FD.io CSIT project, there is roughly 0.5 +second delay between trials due to restrictons imposed by packet traffic +generator in use (T-Rex). + +As measurements results come in, posterior distribution computation takes +more time (per sample), although there is a considerable constant part +(mostly for inverting the fitting functions). + +Also, the integrator needs a fair amount of samples to reach the region +the posterior distribution is concentrated at. + +And of course, the speed of the integrator depends on computing power +of the CPU the algorithm is able to use. + +All those timing related effects are addressed by arithmetically increasing +trial durations with configurable coefficients +(currently 5.1 seconds for the first trial, +each subsequent trial being 0.1 second longer). + +### Rounding Errors and Underflows + +In order to avoid them, the current implementation tracks natural logarithm +(instead of the original quantity) for any quantity which is never negative. +Logarithm of zero is minus infinity (not supported by Python), +so special value "None" is used instead. +Specific functions for frequent operations (such as "logarithm +of sum of exponentials") are defined to handle None correctly. + +### Fitting Functions + +Current implementation uses two fitting functions, called "stretch" and "erf". +In general, their estimates for critical rate differ, +which adds a simple source of systematic error, +on top of randomness error reported by integrator. +Otherwise the reported stdev of critical rate estimate +is unrealistically low. + +Both functions are not only increasing, but also convex +(meaning the rate of increase is also increasing). + +Both fitting functions have several mathematically equivalent formulas, +each can lead to an arithmetic overflow or underflow in different sub-terms. +Overflows can be eliminated by using different exact formulas +for different argument ranges. +Underflows can be avoided by using approximate formulas +in affected argument ranges, such ranges have their own formulas to compute. +At the end, both fitting function implementations +contain multiple "if" branches, discontinuities are a possibility +at range boundaries. + +### Prior Distributions + +The numeric integrator expects all the parameters to be distributed +(independently and) uniformly on an interval (-1, 1). + +As both "mrr" and "spread" parameters are positive and not dimensionless, +a transformation is needed. Dimentionality is inherited from max_rate value. + +The "mrr" parameter follows a `Lomax distribution`[^4] +with alpha equal to one, but shifted so that mrr is always greater than 1 +packet per second. + +The "stretch" parameter is generated simply as the "mrr" value +raised to a random power between zero and one; +thus it follows a `reciprocal distribution`[^5]. + +### Integrator + +After few measurements, the posterior distribution of fitting function +arguments gets quite concentrated into a small area. +The integrator is using `Monte Carlo`[^6] with `importance sampling`[^7] +where the biased distribution is `bivariate Gaussian`[^8] distribution, +with deliberately larger variance. +If the generated sample falls outside (-1, 1) interval, +another sample is generated. + +The center and the covariance matrix for the biased distribution +is based on the first and second moments of samples seen so far +(within the computation). The center is used directly, +covariance matrix is scaled up by a heurictic constant (8.0 by default). +The following additional features are applied +designed to avoid hyper-focused distributions. + +Each computation starts with the biased distribution inherited +from the previous computation (zero point and unit covariance matrix +is used in the first computation), but the overal weight of the data +is set to the weight of the first sample of the computation. +Also, the center is set to the first sample point. +When additional samples come, their weight (including the importance correction) +is compared to sum of the weights of data seen so far (within the iteration). +If the new sample is more than one e-fold more impactful, both weight values +(for data so far and for the new sample) are set to (geometric) average +of the two weights. + +This combination showed the best behavior, as the integrator usually follows +two phases. First phase (where inherited biased distribution +or single big sample are dominating) is mainly important +for locating the new area the posterior distribution is concentrated at. +The second phase (dominated by whole sample population) +is actually relevant for the critical rate estimation. + +### Offered Load Selection + +First two measurements are hardcoded to happen at the middle of rate interval +and at max_rate. Next two measurements follow MRR-like logic, +offered load is decreased so that it would reach target loss ratio +if offered load decrease lead to equal decrease of loss rate. + +The rest of measurements start directly in between +erf and stretch estimate average. +There is one workaround implemented, aimed at reducing the number of consequent +zero loss measurements (per fitting function). The workaround first stores +every measurement result which loss ratio was the targed loss ratio or higher. +Sorted list (called lossy loads) of such results is maintained. + +When a sequence of one or more zero loss measurement results is encountered, +a smallest of lossy loads is drained from the list. +If the estimate average is smaller than the drained value, +a weighted average of this estimate and the drained value is used +as the next offered load. The weight of the estimate decreases exponentially +with the length of consecutive zero loss results. + +This behavior helps the algorithm with convergence speed, +as it does not need so many zero loss result to get near critical region. +Using the smallest (not drained yet) of lossy loads makes it sure +the new offered load is unlikely to result in big loss region. +Draining even if the estimate is large enough helps to discard +early measurements when loss hapened at too low offered load. +Current implementation adds 4 copies of lossy loads and drains 3 of them, +which leads to fairly stable behavior even for somewhat inconsistent SUTs. + +### Caveats + +As high loss count measurements add many bits of information, +they need a large amount of small loss count measurements to balance them, +making the algorithm converge quite slowly. Typically, this happens +when few initial measurements suggest spread way bigger then later measurements. +The workaround in offered load selection helps, +but more intelligent workarounds could get faster convergence still. + +Some systems evidently do not follow the assumption of repeated measurements +having the same average loss rate (when the offered load is the same). +The idea of estimating the trend is not implemented at all, +as the observed trends have varied characteristics. + +Probably, using a more realistic fitting functions +will give better estimates than trend analysis. + +## Bottom Line + +The notion of Throughput is easy to grasp, but it is harder to measure +with any accuracy for non-deterministic systems. + +Even though the notion of critical rate is harder to grasp than the notion +of throughput, it is easier to measure using probabilistic methods. + +In testing, the difference between througput measurements and critical +rate measurements is usually small. + +In pactice, rules of thumb such as "send at max 95% of purported throughput" +are common. The correct benchmarking analysis should ask "Which notion is +95% of throughput an approximation to?" before attempting to answer +"Is 95% of critical rate safe enough?". + +## Algorithmic Analysis + +### Motivation + +While the estimation computation is based on hard probability science; +the offered load selection part of PLRsearch logic is pure heuristics, +motivated by what would a human do based on measurement and computation results. + +The quality of any heuristic is not affected by soundness of its motivation, +just by its ability to achieve the intended goals. +In case of offered load selection, the goal is to help the search to converge +to the long duration estimates sooner. + +But even those long duration estimates could still be of poor quality. +Even though the estimate computation is Bayesian (so it is the best it could be +within the applied assumptions), it can still of poor quality when compared +to what a human would estimate. + +One possible source of poor quality is the randomnes inherently present +in Monte Carlo numeric integration, but that can be supressed +by tweaking the time related input parameters. + +The most likely source of poor quality then are the assumptions. +Most importantly, the number and the shape of fitting functions; +but also others, such as trial order independence and duration independence. + +The result can have poor quality in basically two ways. +One way is related to location. Both upper and lower bounds +can be overestimates or underestimates, meaning the entire estimated interval +between lower bound and upper bound lays above or below (respectively) +of human-estimated interval. +The other way is related to the estimation interval width. +The interval can be too wide or too narrow, compared to human estimation. + +An estimate from a particular fitting function can be classified +as an overestimate (or underestimate) just by looking at time evolution +(without human examining measurement results). Overestimates +decrease by time, underestimates increase by time (assuming +the system performance stays constant). + +Quality of the width of the estimation interval needs human evaluation, +and is unrelated to both rate of narrowing (both good and bad estimate intervals +get narrower at approximately the same relative rate) and relatative width +(depends heavily on the system being tested). + +### Graphical Examples + +The following pictures show the upper (red) and lower (blue) bound, +as well as average of Stretch (pink) and Erf (light green) estimate, +and offered load chosen (grey), as computed by PLRsearch, +after each trial measurement within the 30 minute duration of a test run. + +Both graphs are focusing on later estimates. Estimates computed from +few initial measurements are wildly off the y-axis range shown. + +The following analysis will rely on frequency of zero loss measurements +and magnitude of loss ratio if nonzero. + +The offered load selection strategy used implies zero loss measurements +can be gleaned from the graph by looking at offered load points. +When the points move up farther from lower estimate, it means +the previous measurement had zero loss. After non-zero loss, +the offered load starts again right between (the previous values of) +the estimate curves. + +The very big loss ratio results are visible as noticeable jumps +of both estimates downwards. Medium and small loss ratios are much harder +to distinguish just by looking at the estimate curves, +the analysis is based on raw loss ratio measurement results. + +The following descriptions should explain why the graphs seem to signal +low quality estimate at first sight, but a more detailed look +reveals the quality is good (considering the measurement results). + +#### L2 patch + +Both fitting functions give similar estimates, the graph shows +"stochasticity" of measurements (estimates increase and decrease +within small time regions), and an overall trend of decreasing estimates. + +On the first look, the final interval looks fairly narrow, +especially compared to the region the estimates have travelled +during the search. But the look at the frequency of zero loss results shows +this is not a case of overestimation. Measurements at around the same +offered load have higher probability of zero loss earlier +(when performed farther from upper bound), but smaller probability later +(when performed closer to upper bound). That means it is the performance +of the system under test that decreases (slightly) over time. + +With that in mind, the apparent narrowness of the interval +is not a sign of low quality, just a consequence of PLRsearch assuming +the performance stays constant. + +{{< figure src="/cdocs/PLR_patch.svg" >}} + +#### Vhost + +This test case shows what looks like a quite broad estimation interval, +compared to other test cases with similarly looking zero loss frequencies. +Notable features are infrequent high-loss measurement results +causing big drops of estimates, and lack of long-term convergence. + +Any convergence in medium-sized intervals (during zero loss results) +is reverted by the big loss results, as they happen quite far +from the critical load estimates, and the two fitting functions +extrapolate differently. + +In other words, human only seeing estimates from one fitting function +would expect narrower end interval, but human seeing the measured loss ratios +agrees that the interval should be wider than that. + +{{< figure src="/cdocs/PLR_vhost.svg" >}} + +#### Summary + +The two graphs show the behavior of PLRsearch algorithm applied to soaking test +when some of PLRsearch assumptions do not hold: + ++ L2 patch measurement results violate the assumption + of performance not changing over time. ++ Vhost measurement results violate the assumption + of Poisson distribution matching the loss counts. + +The reported upper and lower bounds can have distance larger or smaller +than a first look by a human would expect, but a more closer look reveals +the quality is good, considering the circumstances. + +The usefullness of the critical load estimate is of questionable value +when the assumptions are violated. + +Some improvements can be made via more specific workarounds, +for example long term limit of L2 patch performance could be estmated +by some heuristic. + +Other improvements can be achieved only by asking users +whether loss patterns matter. Is it better to have single digit losses +distributed fairly evenly over time (as Poisson distribution would suggest), +or is it better to have short periods of medium losses +mixed with long periods of zero losses (as happens in Vhost test) +with the same overall loss ratio? + +[^1]: [RFC 2544: Benchmarking Methodology for Network Interconnect Devices](https://tools.ietf.org/html/rfc2544) +[^2]: [Binary search](https://en.wikipedia.org/wiki/Binary_search_algorithm) +[^3]: [Probabilistic Loss Ratio Search for Packet Throughput](https://tools.ietf.org/html/draft-vpolak-bmwg-plrsearch-02) +[^4]: [Lomax distribution](https://en.wikipedia.org/wiki/Lomax_distribution) +[^5]: [Reciprocal distribution](https://en.wikipedia.org/wiki/Reciprocal_distribution) +[^6]: [Monte Carlo](https://en.wikipedia.org/wiki/Monte_Carlo_integration) +[^7]: [Importance sampling](https://en.wikipedia.org/wiki/Importance_sampling) +[^8]: [Bivariate Gaussian](https://en.wikipedia.org/wiki/Multivariate_normal_distribution) diff --git a/docs/content/methodology/measurements/packet_latency.md b/docs/content/methodology/measurements/packet_latency.md new file mode 100644 index 0000000000..f3606b5ffb --- /dev/null +++ b/docs/content/methodology/measurements/packet_latency.md @@ -0,0 +1,52 @@ +--- +title: "Packet Latency" +weight: 2 +--- + +# Packet Latency + +TRex Traffic Generator (TG) is used for measuring one-way latency in +2-Node and 3-Node physical testbed topologies. TRex integrates +[High Dynamic Range Histogram (HDRH)](http://hdrhistogram.org/) +functionality and reports per packet latency distribution for latency +streams sent in parallel to the main load packet streams. + +Following methodology is used: + +- Only NDRPDR test type measures latency and only after NDR and PDR + values are determined. Other test types do not involve latency + streams. + +- Latency is measured at different background load packet rates: + + - No-Load: latency streams only. + - Low-Load: at 10% PDR. + - Mid-Load: at 50% PDR. + - High-Load: at 90% PDR. + +- Latency is measured for all tested packet sizes except IMIX due to + TRex TG restriction. + +- TG sends dedicated latency streams, one per direction, each at the + rate of 9 kpps at the prescribed packet size; these are sent in + addition to the main load streams. + +- TG reports Min/Avg/Max and HDRH latency values distribution per stream + direction, hence two sets of latency values are reported per test case + (marked as E-W and W-E). + +- +/- 1 usec is the measurement accuracy of TRex TG and the data in HDRH + latency values distribution is rounded to microseconds. + +- TRex TG introduces a (background) always-on Tx + Rx latency bias of 4 + usec on average per direction resulting from TRex software writing and + reading packet timestamps on CPU cores. Quoted values are based on TG + back-to-back latency measurements. + +- Latency graphs are not smoothed, each latency value has its own + horizontal line across corresponding packet percentiles. + +- Percentiles are shown on X-axis using a logarithmic scale, so the + maximal latency value (ending at 100% percentile) would be in + infinity. The graphs are cut at 99.9999% (hover information still + lists 100%). diff --git a/docs/content/methodology/measurements/telemetry.md b/docs/content/methodology/measurements/telemetry.md new file mode 100644 index 0000000000..aed32d9e17 --- /dev/null +++ b/docs/content/methodology/measurements/telemetry.md @@ -0,0 +1,158 @@ +--- +title: "Telemetry" +weight: 3 +--- + +# Telemetry + +OpenMetrics specifies the de-facto standard for transmitting cloud-native +metrics at scale, with support for both text representation and Protocol +Buffers. + +## RFC + +- RFC2119 +- RFC5234 +- RFC8174 +- draft-richih-opsawg-openmetrics-00 + +## Reference + +[OpenMetrics](https://github.com/OpenObservability/OpenMetrics/blob/master/specification/OpenMetrics.md) + +## Metric Types + +- Gauge +- Counter +- StateSet +- Info +- Histogram +- GaugeHistogram +- Summary +- Unknown + +Telemetry module in CSIT currently support only Gauge, Counter and Info. + +## Anatomy of CSIT telemetry implementation + +Existing implementation consists of several measurment building blocks: +the main measuring block running search algorithms (MLR, PLR, SOAK, MRR, ...), +the latency measuring block and the several telemetry blocks with or without +traffic running on a background. + +The main measuring block must not be interrupted by any read operation that can +impact data plane traffic processing during throughput search algorithm. Thus +operational reads are done before (pre-stat) and after (post-stat) that block. + +Some operational reads must be done while traffic is running and usually +consists of two reads (pre-run-stat, post-run-stat) with defined delay between +them. + +## MRR measurement + + traffic_start(r=mrr) traffic_stop |< measure >| + | | | (r=mrr) | + | pre_run_stat post_run_stat | pre_stat | | post_stat + | | | | | | | | + o--------o---------------o-------o------o------+---------------+------o------> + t + Legend: + - pre_run_stat + - vpp-clear-runtime + - post_run_stat + - vpp-show-runtime + - bash-perf-stat // if extended_debug == True + - pre_stat + - vpp-clear-stats + - vpp-enable-packettrace // if extended_debug == True + - vpp-enable-elog + - post_stat + - vpp-show-stats + - vpp-show-packettrace // if extended_debug == True + - vpp-show-elog + + |< measure >| + | (r=mrr) | + | | + |< traffic_trial0 >|< traffic_trial1 >|< traffic_trialN >| + | (i=0,t=duration) | (i=1,t=duration) | (i=N,t=duration) | + | | | | + o-----------------------o------------------------o------------------------o---> + t + + +## MLR measurement + + |< measure >| traffic_start(r=pdr) traffic_stop traffic_start(r=ndr) traffic_stop |< [ latency ] >| + | (r=mlr) | | | | | | .9/.5/.1/.0 | + | | | pre_run_stat post_run_stat | | pre_run_stat post_run_stat | | | + | | | | | | | | | | | | + +-------------+---o-------o---------------o--------o-------------o-------o---------------o--------o------------[-------------------]---> + t + Legend: + - pre_run_stat + - vpp-clear-runtime + - post_run_stat + - vpp-show-runtime + - bash-perf-stat // if extended_debug == True + - pre_stat + - vpp-clear-stats + - vpp-enable-packettrace // if extended_debug == True + - vpp-enable-elog + - post_stat + - vpp-show-stats + - vpp-show-packettrace // if extended_debug == True + - vpp-show-elog + +## MRR measurement + + traffic_start(r=mrr) traffic_stop |< measure >| + | | | (r=mrr) | + | |< stat_runtime >| | stat_pre_trial | | stat_post_trial + | | | | | | | | + o---+------------------+---o------o------------+-------------+----o------------> + t + Legend: + - stat_runtime + - vpp-runtime + - stat_pre_trial + - vpp-clear-stats + - vpp-enable-packettrace // if extended_debug == True + - stat_post_trial + - vpp-show-stats + - vpp-show-packettrace // if extended_debug == True + + |< measure >| + | (r=mrr) | + | | + |< traffic_trial0 >|< traffic_trial1 >|< traffic_trialN >| + | (i=0,t=duration) | (i=1,t=duration) | (i=N,t=duration) | + | | | | + o------------------------o------------------------o------------------------o---> + t + + |< stat_runtime >| + | | + |< program0 >|< program1 >|< programN >| + | (@=params) | (@=params) | (@=params) | + | | | | + o------------------------o------------------------o------------------------o---> + t + +## MLR measurement + + |< measure >| traffic_start(r=pdr) traffic_stop traffic_start(r=ndr) traffic_stop |< [ latency ] >| + | (r=mlr) | | | | | | .9/.5/.1/.0 | + | | | |< stat_runtime >| | | |< stat_runtime >| | | | + | | | | | | | | | | | | + +-------------+---o---+------------------+---o--------------o---+------------------+---o-----------[-----------------]---> + t + Legend: + - stat_runtime + - vpp-runtime + - stat_pre_trial + - vpp-clear-stats + - vpp-enable-packettrace // if extended_debug == True + - stat_post_trial + - vpp-show-stats + - vpp-show-packettrace // if extended_debug == True diff --git a/docs/content/methodology/multi_core_speedup.md b/docs/content/methodology/multi_core_speedup.md deleted file mode 100644 index c0c9ae2570..0000000000 --- a/docs/content/methodology/multi_core_speedup.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -title: "Multi-Core Speedup" -weight: 13 ---- - -# Multi-Core Speedup - -All performance tests are executed with single physical core and with -multiple cores scenarios. - -## Intel Hyper-Threading (HT) - -Intel Xeon processors used in FD.io CSIT can operate either in HT -Disabled mode (single logical core per each physical core) or in HT -Enabled mode (two logical cores per each physical core). HT setting is -applied in BIOS and requires server SUT reload for it to take effect, -making it impractical for continuous changes of HT mode of operation. - -Performance tests are executed with server SUTs' Intel XEON processors -configured with Intel Hyper-Threading Enabled for all Xeon -Cascadelake and Xeon Icelake testbeds. - -## Multi-core Tests - -Multi-core tests are executed in the following VPP worker thread and physical -core configurations: - -#. Intel Xeon Icelake and Cascadelake testbeds (2n-icx, 3n-icx, 2n-clx) - with Intel HT enabled (2 logical CPU cores per each physical core): - - #. 2t1c - 2 VPP worker threads on 1 physical core. - #. 4t2c - 4 VPP worker threads on 2 physical cores. - #. 8t4c - 8 VPP worker threads on 4 physical cores. - -VPP worker threads are the data plane threads running on isolated -logical cores. With Intel HT enabled VPP workers are placed as sibling -threads on each used physical core. VPP control threads (main, stats) -are running on a separate non-isolated core together with other Linux -processes. - -In all CSIT tests care is taken to ensure that each VPP worker handles -the same amount of received packet load and does the same amount of -packet processing work. This is achieved by evenly distributing per -interface type (e.g. physical, virtual) receive queues over VPP workers -using default VPP round-robin mapping and by loading these queues with -the same amount of packet flows. - -If number of VPP workers is higher than number of physical or virtual -interfaces, multiple receive queues are configured on each interface. -NIC Receive Side Scaling (RSS) for physical interfaces and multi-queue -for virtual interfaces are used for this purpose. \ No newline at end of file diff --git a/docs/content/methodology/network_address_translation.md b/docs/content/methodology/network_address_translation.md deleted file mode 100644 index ef341dc892..0000000000 --- a/docs/content/methodology/network_address_translation.md +++ /dev/null @@ -1,445 +0,0 @@ ---- -title: "Network Address Translation" -weight: 7 ---- - -# Network Address Translation - -## NAT44 Prefix Bindings - -NAT44 prefix bindings should be representative to target applications, -where a number of private IPv4 addresses from the range defined by -RFC1918 is mapped to a smaller set of public IPv4 addresses from the -public range. - -Following quantities are used to describe inside to outside IP address -and port bindings scenarios: - -- Inside-addresses, number of inside source addresses - (representing inside hosts). -- Ports-per-inside-address, number of TCP/UDP source - ports per inside source address. -- Outside-addresses, number of outside (public) source addresses - allocated to NAT44. -- Ports-per-outside-address, number of TCP/UDP source - ports per outside source address. The maximal number of - ports-per-outside-address usable for NAT is 64 512 - (in non-reserved port range 1024-65535, RFC4787). -- Sharing-ratio, equal to inside-addresses divided by outside-addresses. - -CSIT NAT44 tests are designed to take into account the maximum number of -ports (sessions) required per inside host (inside-address) and at the -same time to maximize the use of outside-address range by using all -available outside ports. With this in mind, the following scheme of -NAT44 sharing ratios has been devised for use in CSIT: - - **ports-per-inside-address** | **sharing-ratio** ------------------------------:|------------------: - 63 | 1024 - 126 | 512 - 252 | 256 - 504 | 128 - -Initial CSIT NAT44 tests, including associated TG/TRex traffic profiles, -are based on ports-per-inside-address set to 63 and the sharing ratio of -1024. This approach is currently used for all NAT44 tests including -NAT44det (NAT44 deterministic used for Carrier Grade NAT applications) -and NAT44ed (Endpoint Dependent). - -Private address ranges to be used in tests: - -- 192.168.0.0 - 192.168.255.255 (192.168/16 prefix) - - - Total of 2^16 (65 536) of usable IPv4 addresses. - - Used in tests for up to 65 536 inside addresses (inside hosts). - -- 172.16.0.0 - 172.31.255.255 (172.16/12 prefix) - - - Total of 2^20 (1 048 576) of usable IPv4 addresses. - - Used in tests for up to 1 048 576 inside addresses (inside hosts). - -### NAT44 Session Scale - -NAT44 session scale tested is govern by the following logic: - -- Number of inside-addresses(hosts) H[i] = (H[i-1] x 2^2) with H(0)=1 024, - i = 1,2,3, ... - - - H[i] = 1 024, 4 096, 16 384, 65 536, 262 144, ... - -- Number of sessions S[i] = H[i] * ports-per-inside-address - - - ports-per-inside-address = 63 - - **i** | **hosts** | **sessions** -------:|----------:|-------------: - 0 | 1 024 | 64 512 - 1 | 4 096 | 258 048 - 2 | 16 384 | 1 032 192 - 3 | 65 536 | 4 128 768 - 4 | 262 144 | 16 515 072 - -### NAT44 Deterministic - -NAT44det performance tests are using TRex STL (Stateless) API and traffic -profiles, similar to all other stateless packet forwarding tests like -ip4, ip6 and l2, sending UDP packets in both directions -inside-to-outside and outside-to-inside. - -The inside-to-outside traffic uses single destination address (20.0.0.0) -and port (1024). -The inside-to-outside traffic covers whole inside address and port range, -the outside-to-inside traffic covers whole outside address and port range. - -NAT44det translation entries are created during the ramp-up phase, -followed by verification that all entries are present, -before proceeding to the main measurements of the test. -This ensures session setup does not impact the forwarding performance test. - -Associated CSIT test cases use the following naming scheme to indicate -NAT44det scenario tested: - -- ethip4udp-nat44det-h{H}-p{P}-s{S}-[mrr|ndrpdr|soak] - - - {H}, number of inside hosts, H = 1024, 4096, 16384, 65536, 262144. - - {P}, number of ports per inside host, P = 63. - - {S}, number of sessions, S = 64512, 258048, 1032192, 4128768, - 16515072. - - [mrr|ndrpdr|soak], MRR, NDRPDR or SOAK test. - -### NAT44 Endpoint-Dependent - -In order to excercise NAT44ed ability to translate based on both -source and destination address and port, the inside-to-outside traffic -varies also destination address and port. Destination port is the same -as source port, destination address has the same offset as the source address, -but applied to different subnet (starting with 20.0.0.0). - -As the mapping is not deterministic (for security reasons), -we cannot easily use stateless bidirectional traffic profiles. -Inside address and port range is fully covered, -but we do not know which outside-to-inside source address and port to use -to hit an open session. - -Therefore, NAT44ed is benchmarked using following methodologies: - -- Unidirectional throughput using *stateless* traffic profile. -- Connections-per-second (CPS) using *stateful* traffic profile. -- Bidirectional throughput (TPUT, see below) using *stateful* traffic profile. - -Unidirectional NAT44ed throughput tests are using TRex STL (Stateless) -APIs and traffic profiles, but with packets sent only in -inside-to-outside direction. -Similarly to NAT44det, NAT44ed unidirectional throughput tests include -a ramp-up phase to establish and verify the presence of required NAT44ed -binding entries. As the sessions have finite duration, the test code -keeps inserting ramp-up trials during the search, if it detects a risk -of sessions timing out. Any zero loss trial visits all sessions, -so it acts also as a ramp-up. - -Stateful NAT44ed tests are using TRex ASTF (Advanced Stateful) APIs and -traffic profiles, with packets sent in both directions. Tests are run -with both UDP and TCP sessions. -As NAT44ed CPS (connections-per-second) stateful tests -measure (also) session opening performance, -they use state reset instead of ramp-up trial. -NAT44ed TPUT (bidirectional throughput) tests prepend ramp-up trials -as in the unidirectional tests, -so the test results describe performance without translation entry -creation overhead. - -Associated CSIT test cases use the following naming scheme to indicate -NAT44det case tested: - -- Stateless: ethip4udp-nat44ed-h{H}-p{P}-s{S}-udir-[mrr|ndrpdr|soak] - - - {H}, number of inside hosts, H = 1024, 4096, 16384, 65536, 262144. - - {P}, number of ports per inside host, P = 63. - - {S}, number of sessions, S = 64512, 258048, 1032192, 4128768, - 16515072. - - udir-[mrr|ndrpdr|soak], unidirectional stateless tests MRR, NDRPDR - or SOAK. - -- Stateful: ethip4[udp|tcp]-nat44ed-h{H}-p{P}-s{S}-[cps|tput]-[mrr|ndrpdr|soak] - - - [udp|tcp], UDP or TCP sessions - - {H}, number of inside hosts, H = 1024, 4096, 16384, 65536, 262144. - - {P}, number of ports per inside host, P = 63. - - {S}, number of sessions, S = 64512, 258048, 1032192, 4128768, - 16515072. - - [cps|tput], connections-per-second session establishment rate or - packets-per-second average rate, or packets-per-second rate - without session establishment. - - [mrr|ndrpdr|soak], bidirectional stateful tests MRR, NDRPDR, or SOAK. - -## Stateful traffic profiles - -There are several important details which distinguish ASTF profiles -from stateless profiles. - -### General considerations - -#### Protocols - -ASTF profiles are limited to either UDP or TCP protocol. - -#### Programs - -Each template in the profile defines two "programs", one for the client side -and one for the server side. - -Each program specifies when that side has to wait until enough data is received -(counted in packets for UDP and in bytes for TCP) -and when to send additional data. Together, the two programs -define a single transaction. Due to packet loss, transaction may take longer, -use more packets (retransmission) or never finish in its entirety. - -#### Instances - -A client instance is created according to TPS parameter for the trial, -and sends the first packet of the transaction (in some cases more packets). -Each client instance uses a different source address (see sequencing below) -and some source port. The destination address also comes from a range, -but destination port has to be constant for a given program. - -TRex uses an opaque way to chose source ports, but as session counting shows, -next client with the same source address uses a different source port. - -Server instance is created when the first packet arrives to the server side. -Source address and port of the first packet are used as destination address -and port for the server responses. This is the ability we need -when outside surface is not predictable. - -When a program reaches its end, the instance is deleted. -This creates possible issues with server instances. If the server instance -does not read all the data client has sent, late data packets -can cause a second copy of server instance to be created, -which breaks assumptions on how many packet a transaction should have. - -The need for server instances to read all the data reduces the overall -bandwidth TRex is able to create in ASTF mode. - -Note that client instances are not created on packets, -so it is safe to end client program without reading all server data -(unless the definition of transaction success requires that). - -#### Sequencing - -ASTF profiles offer two modes for choosing source and destination IP addresses -for client programs: seqential and pseudorandom. -In current tests we are using sequential addressing only (if destination -address varies at all). - -For client destination UDP/TCP port, we use a single constant value. -(TRex can support multiple program pairs in the same traffic profile, -distinguished by the port number.) - -#### Transaction overlap - -If a transaction takes longer to finish, compared to period implied by TPS, -TRex will have multiple client or server instances active at a time. - -During calibration testing we have found this increases CPU utilization, -and for high TPS it can lead to TRex's Rx or Tx buffers becoming full. -This generally leads to duration stretching, and/or packet loss on TRex. - -Currently used transactions were chosen to be short, so risk of bad behavior -is decreased. But in MRR tests, where load is computed based on NIC ability, -not TRex ability, anomalous behavior is still possible -(e.g. MRR values being way lower than NDR). - -#### Delays - -TRex supports adding constant delays to ASTF programs. -This can be useful, for example if we want to separate connection establishment -from data transfer. - -But as TRex tracks delayed instances as active, this still results -in higher CPU utilization and reduced performance issues -(as other overlaping transactions). So the current tests do not use any delays. - -#### Keepalives - -Both UDP and TCP protocol implementations in TRex programs support keepalive -duration. That means there is a configurable period of keepalive time, -and TRex sends keepalive packets automatically (outside the program) -for the time the program is active (started, not ended yet) -but not sending any packets. - -For TCP this is generally not a big deal, as the other side usually -retransmits faster. But for UDP it means a packet loss may leave -the receiving program running. - -In order to avoid keepalive packets, keepalive value is set to a high number. -Here, "high number" means that even at maximum scale and minimum TPS, -there are still no keepalive packets sent within the corresponding -(computed) trial duration. This number is kept the same also for -smaller scale traffic profiles, to simplify maintenance. - -#### Transaction success - -The transaction is considered successful at Layer-7 (L7) level -when both program instances close. At this point, various L7 counters -(unofficial name) are updated on TRex. - -We found that proper close and L7 counter update can be CPU intensive, -whereas lower-level counters (ipackets, opackets) called L2 counters -can keep up with higher loads. - -For some tests, we do not need to confirm the whole transaction was successful. -CPS (connections per second) tests are a typical example. -We care only for NAT44ed creating a session (needs one packet -in inside-to-outside direction per session) and being able to use it -(needs one packet in outside-to-inside direction). - -Similarly in TPUT tests (packet throuput, counting both control -and data packets), we care about NAT44ed ability to forward packets, -we do not care whether aplications (TRex) can fully process them at that rate. - -Therefore each type of tests has its own formula (usually just one counter -already provided by TRex) to count "successful enough" transactions -and attempted transactions. Currently, all tests relying on L7 counters -use size-limited profiles, so they know what the count of attempted -transactions should be, but due to duration stretching -TRex might have been unable to send that many packets. -For search purposes, unattempted transactions are treated the same -as attempted but failed transactions. - -Sometimes even the number of transactions as tracked by search algorithm -does not match the transactions as defined by ASTF programs. -See TCP TPUT profile below. - -### UDP CPS - -This profile uses a minimalistic transaction to verify NAT44ed session has been -created and it allows outside-to-inside traffic. - -Client instance sends one packet and ends. -Server instance sends one packet upon creation and ends. - -In principle, packet size is configurable, -but currently used tests apply only one value (100 bytes frame). - -Transaction counts as attempted when opackets counter increases on client side. -Transaction counts as successful when ipackets counter increases on client side. - -### TCP CPS - -This profile uses a minimalistic transaction to verify NAT44ed session has been -created and it allows outside-to-inside traffic. - -Client initiates TCP connection. Client waits until connection is confirmed -(by reading zero data bytes). Client ends. -Server accepts the connection. Server waits for indirect confirmation -from client (by waiting for client to initiate close). Server ends. - -Without packet loss, the whole transaction takes 7 packets to finish -(4 and 3 per direction). -From NAT44ed point of view, only the first two are needed to verify -the session got created. - -Packet size is not configurable, but currently used tests report -frame size as 64 bytes. - -Transaction counts as attempted when tcps_connattempt counter increases -on client side. -Transaction counts as successful when tcps_connects counter increases -on client side. - -### UDP TPUT - -This profile uses a small transaction of "request-response" type, -with several packets simulating data payload. - -Client sends 5 packets and closes immediately. -Server reads all 5 packets (needed to avoid late packets creating new -server instances), then sends 5 packets and closes. -The value 5 was chosen to mirror what TCP TPUT (see below) choses. - -Packet size is configurable, currently we have tests for 100, -1518 and 9000 bytes frame (to match size of TCP TPUT data frames, see below). - -As this is a packet oriented test, we do not track the whole -10 packet transaction. Similarly to stateless tests, we treat each packet -as a "transaction" for search algorthm packet loss ratio purposes. -Therefore a "transaction" is attempted when opacket counter on client -or server side is increased. Transaction is successful if ipacket counter -on client or server side is increased. - -If one of 5 client packets is lost, server instance will get stuck -in the reading phase. This probably decreases TRex performance, -but it leads to more stable results then alternatives. - -### TCP TPUT - -This profile uses a small transaction of "request-response" type, -with some data amount to be transferred both ways. - -In CSIT release 22.06, TRex behavior changed, so we needed to edit -the traffic profile. Let us describe the pre-22.06 profile first. - -Client connects, sends 5 data packets worth of data, -receives 5 data packets worth of data and closes its side of the connection. -Server accepts connection, reads 5 data packets worth of data, -sends 5 data packets worth of data and closes its side of the connection. -As usual in TCP, sending side waits for ACK from the receiving side -before proceeding with next step of its program. - -Server read is needed to avoid premature close and second server instance. -Client read is not stricly needed, but ACKs allow TRex to close -the server instance quickly, thus saving CPU and improving performance. - -The number 5 of data packets was chosen so TRex is able to send them -in a single burst, even with 9000 byte frame size (TRex has a hard limit -on initial window size). -That leads to 16 packets (9 of them in c2s direction) to be exchanged -if no loss occurs. -The size of data packets is controlled by the traffic profile setting -the appropriate maximum segment size. Due to TRex restrictions, -the minimal size for IPv4 data frame achievable by this method is 70 bytes, -which is more than our usual minimum of 64 bytes. -For that reason, the data frame sizes available for testing are 100 bytes -(that allows room for eventually adding IPv6 ASTF tests), -1518 bytes and 9000 bytes. There is no control over control packet sizes. - -Exactly as in UDP TPUT, ipackets and opackets counters are used for counting -"transactions" (in fact packets). - -If packet loss occurs, there can be large transaction overlap, even if most -ASTF programs finish eventually. This can lead to big duration stretching -and somehow uneven rate of packets sent. This makes it hard to interpret -MRR results (frequently MRR is below NDR for this reason), -but NDR and PDR results tend to be stable enough. - -In 22.06, the "ACK from the receiving side" behavior changed, -the receiving side started sending ACK sometimes -also before receiving the full set of 5 data packets. -If the previous profile is understood as a "single challenge, single response" -where challenge (and also response) is sent as a burst of 5 data packets, -the new profile uses "bursts" of 1 packet instead, but issues -the challenge-response part 5 times sequentially -(waiting for receiving the response before sending next challenge). -This new profile happens to have the same overall packet count -(when no re-transmissions are needed). -Although it is possibly more taxing for TRex CPU, -the results are comparable to the old traffic profile. - -## Ip4base tests - -Contrary to stateless traffic profiles, we do not have a simple limit -that would guarantee TRex is able to send traffic at specified load. -For that reason, we have added tests where "nat44ed" is replaced by "ip4base". -Instead of NAT44ed processing, the tests set minimalistic IPv4 routes, -so that packets are forwarded in both inside-to-outside and outside-to-inside -directions. - -The packets arrive to server end of TRex with different source address&port -than in NAT44ed tests (no translation to outside values is done with ip4base), -but those are not specified in the stateful traffic profiles. -The server end (as always) uses the received address&port as destination -for outside-to-inside traffic. Therefore the same stateful traffic profile -works for both NAT44ed and ip4base test (of the same scale). - -The NAT44ed results are displayed together with corresponding ip4base results. -If they are similar, TRex is probably the bottleneck. -If NAT44ed result is visibly smaller, it describes the real VPP performance. diff --git a/docs/content/methodology/overview/_index.md b/docs/content/methodology/overview/_index.md new file mode 100644 index 0000000000..10f362013f --- /dev/null +++ b/docs/content/methodology/overview/_index.md @@ -0,0 +1,6 @@ +--- +bookCollapseSection: true +bookFlatSection: false +title: "Overview" +weight: 1 +--- diff --git a/docs/content/methodology/overview/dut_state_considerations.md b/docs/content/methodology/overview/dut_state_considerations.md new file mode 100644 index 0000000000..eca10a22cd --- /dev/null +++ b/docs/content/methodology/overview/dut_state_considerations.md @@ -0,0 +1,148 @@ +--- +title: "DUT State Considerations" +weight: 5 +--- + +# DUT State Considerations + +This page discusses considerations for Device Under Test (DUT) state. +DUTs such as VPP require configuration, to be provided before the aplication +starts (via config files) or just after it starts (via API or CLI access). + +During operation DUTs gather various telemetry data, depending on configuration. +This internal state handling is part of normal operation, +so any performance impact is included in the test results. +Accessing telemetry data is additional load on DUT, +so we are not doing that in main trial measurements that affect results, +but we include separate trials specifically for gathering runtime telemetry. + +But there is one kind of state that needs specific handling. +This kind of DUT state is dynamically created based on incoming traffic, +it affects how DUT handles the traffic, and (unlike telemetry counters) +it has uneven impact on CPU load. +Typical example is NAT, where detecting new sessions takes more CPU than +forwarding packet on existing (open or recently closed) sessions. +We call DUT configurations with this kind of state "stateful", +and configurations without them "stateless". +(Even though stateless configurations contain state described in previous +paragraphs, and some configuration items may have "stateful" in their name, +such as stateful ACLs.) + +# Stateful DUT configurations + +Typically, the level of CPU impact of traffic depends on DUT state. +The first packets causing DUT state to change have higher impact, +subsequent packets matching that state have lower impact. + +From performance point of view, this is similar to traffic phases +for stateful protocols, see +[NGFW draft](https://tools.ietf.org/html/draft-ietf-bmwg-ngfw-performance-05#section-4.3.4). +In CSIT we borrow the terminology (even if it does not fit perfectly, +see discussion below). Ramp-up traffic causes the state change, +sustain traffic does not change the state. + +As the performance is different, each test has to choose which traffic +it wants to test, and manipulate the DUT state to achieve the intended impact. + +## Ramp-up trial + +Tests aiming at sustain performance need to make sure DUT state is created. +We achieve this via a ramp-up trial, specific purpose of which +is to create the state. + +Subsequent trials need no specific handling, as long as the state +remains the same. But some state can time-out, so additional ramp-up +trials are inserted whenever the code detects the state can time-out. +Note that a trial with zero loss refreshes the state, +so only the time since the last non-zero loss trial is tracked. + +For the state to be set completely, it is important both DUT and TG +do not lose any packets. We achieve this by setting the profile multiplier +(TPS from now on) to low enough value. + +It is also important each state-affecting packet is sent. +For size-limited traffic profile it is guaranteed by the size limit. +For continuous traffic, we set a long enough duration (based on TPS). + +At the end of the ramp-up trial, we check DUT state to confirm +it has been created as expected. +Test fails if the state is not (completely) created. + +## State Reset + +Tests aiming at ramp-up performance do not use ramp-up trial, +and they need to reset the DUT state before each trial measurement. +The way of resetting the state depends on test, +usually an API call is used to partially de-configure +the part that holds the state, and then re-configure it back. + +In CSIT we control the DUT state behavior via a test variable "resetter". +If it is not set, DUT state is not reset. +If it is set, each search algorithm (including MRR) will invoke it +before all trial measurements (both main and telemetry ones). +Any configuration keyword enabling a feature with DUT state +will check whether a test variable for ramp-up rate is present. +If it is present, resetter is not set. +If it is not present, the keyword sets the apropriate resetter value. +This logic makes sure either ramp-up or state reset are used. + +Notes: If both ramp-up and state reset were used, the DUT behavior +would be identical to just reset, while test would take longer to execute. +If neither were used, DUT will show different performance in subsequent trials, +violating assumptions of search algorithms. + +## DUT versus protocol ramp-up + +There are at least three different causes for bandwidth possibly increasing +within a single measurement trial. + +The first is DUT switching from state modification phase to constant phase, +it is the primary focus of this document. +Using ramp-up traffic before main trials eliminates this cause +for tests wishing to measure the performance of the next phase. +Using size-limited profiles eliminates the next phase +for tests wishing to measure performance of this phase. + +The second is protocol such as TCP ramping up their throughput to utilize +the bandwidth available. This is the original meaning of "ramp up" +in the NGFW draft (see above). +In existing tests we are not using this meaning of TCP ramp-up. +Instead we use only small transactions, and large enough initial window +so TCP acts as ramped-up already. + +The third is TCP increasing offered load due to retransmissions triggered by +packet loss. In CSIT we again try to avoid this behavior +by using small enough data to transfer, so overlap of multiple transactions +(primary cause of packet loss) is unlikely. +But in MRR tests, packet loss and non-constant offered load are still expected. + +# Stateless DUT configuratons + +These are simple configurations, which do not set any resetter value +(even if ramp-up duration is not configured). +Majority of existing tests are of this type, using continuous traffic profiles. + +In order to identify limits of Trex performance, +we have added suites with stateless DUT configuration (VPP ip4base) +subjected to size-limited ASTF traffic. +The discovered rates serve as a basis of comparison +for evaluating the results for stateful DUT configurations (VPP NAT44ed) +subjected to the same traffic profiles. + +# DUT versus TG state + +Traffic Generator profiles can be stateful (ASTF) or stateless (STL). +DUT configuration can be stateful or stateless (with respect to packet traffic). + +In CSIT we currently use all four possible configurations: + +- Regular stateless VPP tests use stateless traffic profiles. + +- Stateless VPP configuration with stateful profile is used as a base for + comparison. + +- Some stateful DUT configurations (NAT44DET, NAT44ED unidirectional) + are tested using stateless traffic profiles and continuous traffic. + +- The rest of stateful DUT configurations (NAT44ED bidirectional) + are tested using stateful traffic profiles and size limited traffic. diff --git a/docs/content/methodology/overview/multi_core_speedup.md b/docs/content/methodology/overview/multi_core_speedup.md new file mode 100644 index 0000000000..f438e8e996 --- /dev/null +++ b/docs/content/methodology/overview/multi_core_speedup.md @@ -0,0 +1,51 @@ +--- +title: "Multi-Core Speedup" +weight: 3 +--- + +# Multi-Core Speedup + +All performance tests are executed with single physical core and with +multiple cores scenarios. + +## Intel Hyper-Threading (HT) + +Intel Xeon processors used in FD.io CSIT can operate either in HT +Disabled mode (single logical core per each physical core) or in HT +Enabled mode (two logical cores per each physical core). HT setting is +applied in BIOS and requires server SUT reload for it to take effect, +making it impractical for continuous changes of HT mode of operation. + +Performance tests are executed with server SUTs' Intel XEON processors +configured with Intel Hyper-Threading Enabled for all Xeon +Cascadelake and Xeon Icelake testbeds. + +## Multi-core Tests + +Multi-core tests are executed in the following VPP worker thread and physical +core configurations: + +1. Intel Xeon Icelake and Cascadelake testbeds (2n-icx, 3n-icx, 2n-clx) + with Intel HT enabled (2 logical CPU cores per each physical core): + + 1. 2t1c - 2 VPP worker threads on 1 physical core. + 2. 4t2c - 4 VPP worker threads on 2 physical cores. + 3. 8t4c - 8 VPP worker threads on 4 physical cores. + +VPP worker threads are the data plane threads running on isolated +logical cores. With Intel HT enabled VPP workers are placed as sibling +threads on each used physical core. VPP control threads (main, stats) +are running on a separate non-isolated core together with other Linux +processes. + +In all CSIT tests care is taken to ensure that each VPP worker handles +the same amount of received packet load and does the same amount of +packet processing work. This is achieved by evenly distributing per +interface type (e.g. physical, virtual) receive queues over VPP workers +using default VPP round-robin mapping and by loading these queues with +the same amount of packet flows. + +If number of VPP workers is higher than number of physical or virtual +interfaces, multiple receive queues are configured on each interface. +NIC Receive Side Scaling (RSS) for physical interfaces and multi-queue +for virtual interfaces are used for this purpose. diff --git a/docs/content/methodology/overview/per_thread_resources.md b/docs/content/methodology/overview/per_thread_resources.md new file mode 100644 index 0000000000..c23efb50bd --- /dev/null +++ b/docs/content/methodology/overview/per_thread_resources.md @@ -0,0 +1,101 @@ +--- +title: "Per Thread Resources" +weight: 2 +--- + +# Per Thread Resources + +CSIT test framework is managing mapping of the following resources per thread: + +1. Cores, physical cores (pcores) allocated as pairs of sibling logical cores + (lcores) if server in HyperThreading/SMT mode, or as single lcores + if server not in HyperThreading/SMT mode. Note that if server's + processors are running in HyperThreading/SMT mode sibling lcores are + always used. +2. Receive Queues (RxQ), packet receive queues allocated on each + physical and logical interface tested. +3. Transmit Queues(TxQ), packet transmit queues allocated on each + physical and logical interface tested. + +Approach to mapping per thread resources depends on the application/DUT +tested (VPP or DPDK apps) and associated thread types, as follows: + +1. Data-plane workers, used for data-plane packet processing, when no + feature workers present. + + - Cores: data-plane workers are typically tested in 1, 2 and 4 pcore + configurations, running on single lcore per pcore or on sibling + lcores per pcore. Result is a set of {T}t{C}c thread-core + configurations, where{T} stands for a total number of threads + (lcores), and {C} for a total number of pcores. Tested + configurations are encoded in CSIT test case names, + e.g. "1c", "2c", "4c", and test tags "2T1C" (or "1T1C"), "4T2C" + (or "2T2C"), "8T4C" (or "4T4C"). + - Interface Receive Queues (RxQ): as of CSIT-2106 release, number of + RxQs used on each physical or virtual interface is equal to the + number of data-plane workers. In other words each worker has a + dedicated RxQ on each interface tested. This ensures packet + processing load to be equal for each worker, subject to RSS flow + load balancing efficacy. Note: Before CSIT-2106 total number of + RxQs across all interfaces of specific type was equal to the + number of data-plane workers. + - Interface Transmit Queues (TxQ): number of TxQs used on each + physical or virtual interface is equal to the number of data-plane + workers. In other words each worker has a dedicated TxQ on each + interface tested. + - Applies to VPP and DPDK Testpmd and L3Fwd. + +2. Data-plane and feature workers (e.g. IPsec async crypto workers), the + latter dedicated to specific feature processing. + + - Cores: data-plane and feature workers are tested in 2, 3 and 4 + pcore configurations, running on single lcore per pcore or on + sibling lcores per pcore. This results in a two sets of + thread-core combinations separated by "-", {T}t{C}c-{T}t{C}c, with + the leading set denoting total number of threads (lcores) and + pcores used for data-plane workers, and the trailing set denoting + total number of lcores and pcores used for feature workers. + Accordingly, tested configurations are encoded in CSIT test case + names, e.g. "1c-1c", "1c-2c", "1c-3c", and test tags "2T1C_2T1C" + (or "1T1C_1T1C"), "2T1C_4T2C" (or "1T1C_2T2C"), "2T1C_6T3C" + (or "1T1C_3T3C"). + - RxQ and TxQ: no RxQs and no TxQs are used by feature workers. + - Applies to VPP only. + +3. Management/main worker, control plane and management. + + - Cores: single lcore. + - RxQ: not used (VPP default behaviour). + - TxQ: single TxQ per interface, allocated but not used (VPP default + behaviour). + - Applies to VPP only. + +## VPP Thread Configuration + +Mapping of cores and RxQs to VPP data-plane worker threads is done in +the VPP startup.conf during test suite setup: + +1. `corelist-workers `: List of logical cores to run VPP + data-plane workers and feature workers. The actual lcores' + allocations depends on HyperThreading/SMT server configuration and + per test core configuration. + + - For tests without feature workers, by default, all CPU cores + configured in startup.conf are used for data-plane workers. + - For tests with feature workers, CSIT code distributes lcores across + data-plane and feature workers. + +2. `num-rx-queues `: Number of Rx queues used per interface. + +Mapping of TxQs to VPP data-plane worker threads uses the default VPP +setting of one TxQ per interface per data-plane worker. + +## DPDK Thread Configuration + +Mapping of cores and RxQs to DPDK Testpmd/L3Fwd data-plane worker +threads is done in the startup CLI: + +1. `-l ` - List of logical cores to run DPDK + application. +2. `nb-cores=` - Number of forwarding cores. +3. `rxq=` - Number of Rx queues used per interface. diff --git a/docs/content/methodology/overview/terminology.md b/docs/content/methodology/overview/terminology.md new file mode 100644 index 0000000000..c9115e9291 --- /dev/null +++ b/docs/content/methodology/overview/terminology.md @@ -0,0 +1,97 @@ +--- +title: "Terminology" +weight: 1 +--- + +# Terminology + +- **Frame size**: size of an Ethernet Layer-2 frame on the wire, including + any VLAN tags (dot1q, dot1ad) and Ethernet FCS, but excluding Ethernet + preamble and inter-frame gap. Measured in Bytes. + +- **Packet size**: same as frame size, both terms used interchangeably. + +- **Inner L2 size**: for tunneled L2 frames only, size of an encapsulated + Ethernet Layer-2 frame, preceded with tunnel header, and followed by + tunnel trailer. Measured in Bytes. + +- **Inner IP size**: for tunneled IP packets only, size of an encapsulated + IPv4 or IPv6 packet, preceded with tunnel header, and followed by + tunnel trailer. Measured in Bytes. + +- **Device Under Test (DUT)**: In software networking, "device" denotes a + specific piece of software tasked with packet processing. Such device + is surrounded with other software components (such as operating system + kernel). It is not possible to run devices without also running the + other components, and hardware resources are shared between both. For + purposes of testing, the whole set of hardware and software components + is called "System Under Test" (SUT). As SUT is the part of the whole + test setup performance of which can be measured with RFC2544, using + SUT instead of RFC2544 DUT. Device under test + (DUT) can be re-introduced when analyzing test results using whitebox + techniques, but this document sticks to blackbox testing. + +- **System Under Test (SUT)**: System under test (SUT) is a part of the + whole test setup whose performance is to be benchmarked. The complete + methodology contains other parts, whose performance is either already + established, or not affecting the benchmarking result. + +- **Bi-directional throughput tests**: involve packets/frames flowing in + both east-west and west-east directions over every tested interface of + SUT/DUT. Packet flow metrics are measured per direction, and can be + reported as aggregate for both directions (i.e. throughput) and/or + separately for each measured direction (i.e. latency). In most cases + bi-directional tests use the same (symmetric) load in both directions. + +- **Uni-directional throughput tests**: involve packets/frames flowing in + only one direction, i.e. either east-west or west-east direction, over + every tested interface of SUT/DUT. Packet flow metrics are measured + and are reported for measured direction. + +- **Packet Loss Ratio (PLR)**: ratio of packets received relative to packets + transmitted over the test trial duration, calculated using formula: + PLR = ( pkts_transmitted - pkts_received ) / pkts_transmitted. + For bi-directional throughput tests aggregate PLR is calculated based + on the aggregate number of packets transmitted and received. + +- **Packet Throughput Rate**: maximum packet offered load DUT/SUT forwards + within the specified Packet Loss Ratio (PLR). In many cases the rate + depends on the frame size processed by DUT/SUT. Hence packet + throughput rate MUST be quoted with specific frame size as received by + DUT/SUT during the measurement. For bi-directional tests, packet + throughput rate should be reported as aggregate for both directions. + Measured in packets-per-second (pps) or frames-per-second (fps), + equivalent metrics. + +- **Bandwidth Throughput Rate**: a secondary metric calculated from packet + throughput rate using formula: bw_rate = pkt_rate * (frame_size + + L1_overhead) * 8, where L1_overhead for Ethernet includes preamble (8 + Bytes) and inter-frame gap (12 Bytes). For bi-directional tests, + bandwidth throughput rate should be reported as aggregate for both + directions. Expressed in bits-per-second (bps). + +- **Non Drop Rate (NDR)**: maximum packet/bandwith throughput rate sustained + by DUT/SUT at PLR equal zero (zero packet loss) specific to tested + frame size(s). MUST be quoted with specific packet size as received by + DUT/SUT during the measurement. Packet NDR measured in + packets-per-second (or fps), bandwidth NDR expressed in + bits-per-second (bps). + +- **Partial Drop Rate (PDR)**: maximum packet/bandwith throughput rate + sustained by DUT/SUT at PLR greater than zero (non-zero packet loss) + specific to tested frame size(s). MUST be quoted with specific packet + size as received by DUT/SUT during the measurement. Packet PDR + measured in packets-per-second (or fps), bandwidth PDR expressed in + bits-per-second (bps). + +- **Maximum Receive Rate (MRR)**: packet/bandwidth rate regardless of PLR + sustained by DUT/SUT under specified Maximum Transmit Rate (MTR) + packet load offered by traffic generator. MUST be quoted with both + specific packet size and MTR as received by DUT/SUT during the + measurement. Packet MRR measured in packets-per-second (or fps), + bandwidth MRR expressed in bits-per-second (bps). + +- **Trial**: a single measurement step. + +- **Trial duration**: amount of time over which packets are transmitted and + received in a single measurement step. diff --git a/docs/content/methodology/overview/vpp_forwarding_modes.md b/docs/content/methodology/overview/vpp_forwarding_modes.md new file mode 100644 index 0000000000..b3c3bba984 --- /dev/null +++ b/docs/content/methodology/overview/vpp_forwarding_modes.md @@ -0,0 +1,104 @@ +--- +title: "VPP Forwarding Modes" +weight: 4 +--- + +# VPP Forwarding Modes + +VPP is tested in a number of L2, IPv4 and IPv6 packet lookup and forwarding +modes. Within each mode baseline and scale tests are executed, the latter with +varying number of FIB entries. + +## L2 Ethernet Switching + +VPP is tested in three L2 forwarding modes: + +- *l2patch*: L2 patch, the fastest point-to-point L2 path that loops + packets between two interfaces without any Ethernet frame checks or + lookups. +- *l2xc*: L2 cross-connect, point-to-point L2 path with all Ethernet + frame checks, but no MAC learning and no MAC lookup. +- *l2bd*: L2 bridge-domain, multipoint-to-multipoint L2 path with all + Ethernet frame checks, with MAC learning (unless static MACs are used) + and MAC lookup. + +l2bd tests are executed in baseline and scale configurations: + +- *l2bdbase*: Two MAC FIB entries are learned by VPP to enable packet + switching between two interfaces in two directions. VPP L2 switching + is tested with 254 IPv4 unique flows per direction, varying IPv4 + source address per flow in order to invoke RSS based packet + distribution across VPP workers. The same source and destination MAC + address is used for all flows per direction. IPv4 source address is + incremented for every packet. + +- *l2bdscale*: A high number of MAC FIB entries are learned by VPP to + enable packet switching between two interfaces in two directions. + Tested MAC FIB sizes include: i) 10k with 5k unique flows per + direction, ii) 100k with 2 x 50k flows and iii) 1M with 2 x 500k + flows. Unique flows are created by using distinct source and + destination MAC addresses that are changed for every packet using + incremental ordering, making VPP learn (or refresh) distinct src MAC + entries and look up distinct dst MAC entries for every packet. For + details, see + [Packet Flow Ordering]({{< ref "packet_flow_ordering#Packet Flow Ordering" >}}). + +Ethernet wire encapsulations tested include: untagged, dot1q, dot1ad. + +## IPv4 Routing + +IPv4 routing tests are executed in baseline and scale configurations: + +- *ip4base*: Two /32 IPv4 FIB entries are configured in VPP to enable + packet routing between two interfaces in two directions. VPP routing + is tested with 253 IPv4 unique flows per direction, varying IPv4 + source address per flow in order to invoke RSS based packet + distribution across VPP workers. IPv4 source address is incremented + for every packet. + +- *ip4scale*: A high number of /32 IPv4 FIB entries are configured in + VPP. Tested IPv4 FIB sizes include: i) 20k with 10k unique flows per + direction, ii) 200k with 2 * 100k flows and iii) 2M with 2 * 1M + flows. Unique flows are created by using distinct IPv4 destination + addresses that are changed for every packet, using incremental or + random ordering. For details, see + [Packet Flow Ordering]({{< ref "packet_flow_ordering#Packet Flow Ordering" >}}). + +## IPv6 Routing + +Similarly to IPv4, IPv6 routing tests are executed in baseline and scale +configurations: + +- *ip6base*: Two /128 IPv4 FIB entries are configured in VPP to enable + packet routing between two interfaces in two directions. VPP routing + is tested with 253 IPv6 unique flows per direction, varying IPv6 + source address per flow in order to invoke RSS based packet + distribution across VPP workers. IPv6 source address is incremented + for every packet. + +- *ip4scale*: A high number of /128 IPv6 FIB entries are configured in + VPP. Tested IPv6 FIB sizes include: i) 20k with 10k unique flows per + direction, ii) 200k with 2 * 100k flows and iii) 2M with 2 * 1M + flows. Unique flows are created by using distinct IPv6 destination + addresses that are changed for every packet, using incremental or + random ordering. For details, see + [Packet Flow Ordering]({{< ref "packet_flow_ordering#Packet Flow Ordering" >}}). + +## SRv6 Routing + +SRv6 routing tests are executed in a number of baseline configurations, +in each case SR policy and steering policy are configured for one +direction and one (or two) SR behaviours (functions) in the other +directions: + +- *srv6enc1sid*: One SID (no SRH present), one SR function - End. +- *srv6enc2sids*: Two SIDs (SRH present), two SR functions - End and + End.DX6. +- *srv6enc2sids-nodecaps*: Two SIDs (SRH present) without decapsulation, + one SR function - End. +- *srv6proxy-dyn*: Dynamic SRv6 proxy, one SR function - End.AD. +- *srv6proxy-masq*: Masquerading SRv6 proxy, one SR function - End.AM. +- *srv6proxy-stat*: Static SRv6 proxy, one SR function - End.AS. + +In all listed cases low number of IPv6 flows (253 per direction) is +routed by VPP. diff --git a/docs/content/methodology/packet_flow_ordering.md b/docs/content/methodology/packet_flow_ordering.md deleted file mode 100644 index d2b3bfb90c..0000000000 --- a/docs/content/methodology/packet_flow_ordering.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -title: "Packet Flow Ordering" -weight: 9 ---- - -# Packet Flow Ordering - -TRex Traffic Generator (TG) supports two main ways how to cover -address space (on allowed ranges) in scale tests. - -In most cases only one field value (e.g. IPv4 destination address) is -altered, in some cases two fields (e.g. IPv4 destination address and UDP -destination port) are altered. - -## Incremental Ordering - -This case is simpler to implement and offers greater control. - -When changing two fields, they can be incremented synchronously, or one -after another. In the latter case we can specify which one is -incremented each iteration and which is incremented by "carrying over" -only when the other "wraps around". This way also visits all -combinations once before the "carry" field also wraps around. - -It is possible to use increments other than 1. - -## Randomized Ordering - -This case chooses each field value at random (from the allowed range). -In case of two fields, they are treated independently. -TRex allows to set random seed to get deterministic numbers. -We use a different seed for each field and traffic direction. -The seed has to be a non-zero number, we use 1, 2, 3, and so on. - -The seeded random mode in TRex requires a "limit" value, -which acts as a cycle length limit (after this many iterations, -the seed resets to its initial value). -We use the maximal allowed limit value (computed as 2^24 - 1). - -Randomized profiles do not avoid duplicated values, -and do not guarantee each possible value is visited, -so it is not very useful for stateful tests. diff --git a/docs/content/methodology/packet_latency.md b/docs/content/methodology/packet_latency.md deleted file mode 100644 index fd7c0e00e8..0000000000 --- a/docs/content/methodology/packet_latency.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -title: "Packet Latency" -weight: 8 ---- - -# Packet Latency - -TRex Traffic Generator (TG) is used for measuring one-way latency in -2-Node and 3-Node physical testbed topologies. TRex integrates -[High Dynamic Range Histogram (HDRH)](http://hdrhistogram.org/) -functionality and reports per packet latency distribution for latency -streams sent in parallel to the main load packet streams. - -Following methodology is used: - -- Only NDRPDR test type measures latency and only after NDR and PDR - values are determined. Other test types do not involve latency - streams. -- Latency is measured at different background load packet rates: - - - No-Load: latency streams only. - - Low-Load: at 10% PDR. - - Mid-Load: at 50% PDR. - - High-Load: at 90% PDR. - -- Latency is measured for all tested packet sizes except IMIX due to - TRex TG restriction. -- TG sends dedicated latency streams, one per direction, each at the - rate of 9 kpps at the prescribed packet size; these are sent in - addition to the main load streams. -- TG reports Min/Avg/Max and HDRH latency values distribution per stream - direction, hence two sets of latency values are reported per test case - (marked as E-W and W-E). -- +/- 1 usec is the measurement accuracy of TRex TG and the data in HDRH - latency values distribution is rounded to microseconds. -- TRex TG introduces a (background) always-on Tx + Rx latency bias of 4 - usec on average per direction resulting from TRex software writing and - reading packet timestamps on CPU cores. Quoted values are based on TG - back-to-back latency measurements. -- Latency graphs are not smoothed, each latency value has its own - horizontal line across corresponding packet percentiles. -- Percentiles are shown on X-axis using a logarithmic scale, so the - maximal latency value (ending at 100% percentile) would be in - infinity. The graphs are cut at 99.9999% (hover information still - lists 100%). \ No newline at end of file diff --git a/docs/content/methodology/per_patch_testing.md b/docs/content/methodology/per_patch_testing.md new file mode 100644 index 0000000000..a64a52caf6 --- /dev/null +++ b/docs/content/methodology/per_patch_testing.md @@ -0,0 +1,230 @@ +--- +title: "Per-patch Testing" +weight: 5 +--- + +# Per-patch Testing + +Updated for CSIT git commit id: 72b45cfe662107c8e1bb549df71ba51352a898ee. + +A methodology similar to trending analysis is used for comparing performance +before a DUT code change is merged. This can act as a verify job to disallow +changes which would decrease performance without a good reason. + +## Existing jobs + +VPP is the only project currently using such jobs. +They are not started automatically, must be triggered on demand. +They allow full tag expressions, but some tags are enforced (such as MRR). + +There are jobs available for multiple types of testbeds, +based on various processors. +Their Gerrit triggers words are of the form "perftest-{node_arch}" +where the node_arch combinations currently supported are: +2n-clx, 2n-tx2, 2n-zn2, 3n-tsh. + +## Test selection + +Gerrit trigger line without any additional arguments selects +a small set of test cases to run. +If additional arguments are added to the Gerrit trigger, they are treated +as Robot tag expressions to select tests to run. +While very flexible, this method of test selection also allows the user +to accidentally select too high number of tests, blocking the testbed for days. + +What follows is a list of explanations and recommendations +to help users to select the minimal set of tests cases. + +### Verify cycles + +When Gerrit schedules multiple jobs to run for the same patch set, +it waits until all runs are complete. +While it is waiting, it is possible to trigger more jobs +(adding runs to the set Gerrit is waiting for), but it is not possible +to trigger more runs for the same job, until Gerrit is done waiting. +After Gerrit is done waiting, it becames possible to trigger +the same job again. + +Example. User triggers one set of tests on 2n-icx and immediately +also triggers other set of tests on 3n-icx. Then the user notices +2n-icx run end early because of a typo in tag expression. +When the user tries to re-trigger 2n-icx (with fixed tag expression), +that comment gets ignored by Jenkins. +Only when 3n-icx job finishes, the user can trigger 2n-icx. + +### One comment many jobs + +In the past, the CSIT code which parses for perftest trigger comments +was buggy, which lead to bad behavior (as in selection all performance test, +because "perftest" is also a robot tag) when user included multiple +perftest trigger words in the same comment. + +The worst bugs were fixed since then, but it is still recommended +to use just one trigger word per Gerrit comment, just to be safe. + +### Multiple test cases in run + +While Robot supports OR operator, it does not support parentheses, +so the OR operator is not very useful. It is recommended +to use space instead of OR operator. + +Example template: +perftest-2n-icx {tag_expression_1} {tag_expression_2} + +See below for more concrete examples. + +### Suite tags + +Traditionally, CSIT maintains broad Robot tags that can be used to select tests. + +But it is not recommended to use them for test selection, +as it is not that easy to determine how many test cases are selected. + +The recommended way is to look into CSIT repository first, +and locate a specific suite the user is interested in, +and use its suite tag. For example, "ethip4-ip4base" is a suite tag +selecting just one suite in CSIT git repository, +avoiding all scale, container, and other simialr variants. + +Note that CSIT uses "autogen" code generator, +so the robot running in Jenkins has access to more suites +than visible just by looking into CSIT git repository, +so suite tag is not enough to select even the intended suite, +and user still probably wants to narrow down +to a single test case within a suite. + +### Fully specified tag expressions + +Here is one template to select a single test case: +{test_type}AND{nic_model}AND{nic_driver}AND{cores}AND{frame_size}AND{suite_tag} +where the variables are all lower case (so AND operator stands out). + +Currently only one test type is supported by the performance comparison jobs: +"mrr". +The nic_driver options depend on nic_model. For Intel cards "drv_avf" +(AVF plugin) and "drv_vfio_pci" (DPDK plugin) are popular, for Mellanox +"drv_rdma_core". Currently, the performance using "drv_af_xdp" is not reliable +enough, so do not use it unless you are specifically testing for AF_XDP. + +The most popular nic_model is "nic_intel-xxv710", but that is not available +on all testbed types. +It is safe to use "1c" for cores (unless you are suspection multi-core +performance is affected differently) and "64b" for frame size ("78b" for ip6 +and more for dot1q and other encapsulated traffic; +"1518b" is popular for ipsec and other payload-bound tests). + +As there are more test cases than CSIT can periodically test, +it is possible to encounter an old test case that currently fails. +To avoid that, you can look at "job spec" files we use for periodic testing, +for example +[this one](https://github.com/FDio/csit/blob/master/resources/job_specs/report_iterative/2n-icx/vpp-mrr-00.md). + +### Shortening triggers + +Advanced users may use the following tricks to avoid writing long trigger +comments. + +Robot supports glob matching, which can be used to select multiple suite tags at +once. + +Not specifying one of 6 parts of the recommended expression pattern +will select all available options. For example not specifying nic_driver +for nic_intel-xxv710 will select all 3 applicable drivers. +You can use NOT operator to reject some options (e.g. NOTdrv_af_xdp), +but beware, with NOT the order matters: +tag1ANDtag2NOTtag3 is not the same as tag1NOTtag3ANDtag2, +the latter is evaluated as tag1AND(NOT(tag3ANDtag2)). + +Beware when not specifying nic_model. As a precaution, +CSIT code will insert the defailt NIC model for the tetsbed used. +Example: Specifying drv_rdma_core without specifying nic_model +will fail, as the default nic_model is nic_intel-xxv710 +which does not support RDMA core driver. + +### Complete example + +A user wants to test a VPP change which may affect load balance whith bonding. +Searching tag documentation for "bonding" finds LBOND tag and its variants. +Searching CSIT git repository (directory tests/) finds 8 suite files, +all suited only for 3-node testbeds. +All suites are using vhost, but differ by the forwarding app inside VM +(DPDK or VPP), by the forwarding mode of VPP acting as host level vswitch +(MAC learning or cross connect), and by the number of DUT1-DUT2 links +available (1 or 2). + +As not all NICs and testbeds offer enogh ports for 2 parallel DUT-DUT links, +the user looks at +[testbed specifications](https://github.com/FDio/csit/tree/master/topologies/available) +and finds that only xxv710 NIC on 3n-icx testbed matches the requirements. +Quick look into the suites confirm the smallest frame size is 64 bytes +(despite DOT1Q robot tag, as the encapsulation does not happen on TG-DUT links). +It is ok to use just 1 physical core, as 3n-icx has hyperthreading enabled, +so VPP vswitch will use 2 worker threads. + +The user decides the vswitch forwarding mode is not important +(so choses cross connect as that has less CPU overhead), +but wants to test both NIC drivers (not AF_XDP), both apps in VM, +and both 1 and 2 parallel links. + +After shortening, this is the trigger comment fianlly used: +perftest-3n-icx mrrANDnic_intel-x710AND1cAND64bAND?lbvpplacp-dot1q-l2xcbase-eth-2vhostvr1024-1vm*NOTdrv_af_xdp + +## Basic operation + +The job builds VPP .deb packages for both the patch under test +(called "current") and its parent patch (called "parent"). + +For each test (from a set defined by tag expression), +both builds are subjected to several trial measurements (BMRR). +Measured samples are grouped to "parent" sequence, +followed by "current" sequence. The same Minimal Description Length +algorithm as in trending is used to decide whether it is one big group, +or two smaller gropus. If it is one group, a "normal" result +is declared for the test. If it is two groups, and current average +is less then parent average, the test is declared a regression. +If it is two groups and current average is larger or equal, +the test is declared a progression. + +The whole job fails (giving -1) if some trial measurement failed, +or if any test was declared a regression. + +## Temporary specifics + +The Minimal Description Length analysis is performed by +CSIT code equivalent to jumpavg-0.1.3 library available on PyPI. + +In hopes of strengthening of signal (code performance) compared to noise +(all other factors influencing the measured values), several workarounds +are applied. + +In contrast to trending, trial duration is set to 10 seconds, +and only 5 samples are measured for each build. +Both parameters are set in ci-management. + +This decreases sensitivity to regressions, but also decreases +probability of false positives. + +## Console output + +The following information as visible towards the end of Jenkins console output, +repeated for each analyzed test. + +The original 5 values are visible in order they were measured. +The 5 values after processing are also visible in output, +this time sorted by value (so people can see minimum and maximum). + +The next output is difference of averages. It is the current average +minus the parent average, expressed as percentage of the parent average. + +The next three outputs contain the jumpavg representation +of the two groups and a combined group. +Here, "bits" is the description length; for "current" sequence +it includes effect from "parent" average value +(jumpavg-0.1.3 penalizes sequences with too close averages). + +Next, a sentence describing which grouping description is shorter, +and by how much bits. +Finally, the test result classification is visible. + +The algorithm does not track test case names, +so test cases are indexed (from 0). diff --git a/docs/content/methodology/per_thread_resources.md b/docs/content/methodology/per_thread_resources.md deleted file mode 100644 index cd862fa824..0000000000 --- a/docs/content/methodology/per_thread_resources.md +++ /dev/null @@ -1,102 +0,0 @@ ---- -title: "Per Thread Resources" -weight: 2 ---- - -# Per Thread Resources - -CSIT test framework is managing mapping of the following resources per -thread: - -1. Cores, physical cores (pcores) allocated as pairs of sibling logical cores - (lcores) if server in HyperThreading/SMT mode, or as single lcores - if server not in HyperThreading/SMT mode. Note that if server's - processors are running in HyperThreading/SMT mode sibling lcores are - always used. -2. Receive Queues (RxQ), packet receive queues allocated on each - physical and logical interface tested. -3. Transmit Queues(TxQ), packet transmit queues allocated on each - physical and logical interface tested. - -Approach to mapping per thread resources depends on the application/DUT -tested (VPP or DPDK apps) and associated thread types, as follows: - -1. Data-plane workers, used for data-plane packet processing, when no - feature workers present. - - - Cores: data-plane workers are typically tested in 1, 2 and 4 pcore - configurations, running on single lcore per pcore or on sibling - lcores per pcore. Result is a set of {T}t{C}c thread-core - configurations, where{T} stands for a total number of threads - (lcores), and {C} for a total number of pcores. Tested - configurations are encoded in CSIT test case names, - e.g. "1c", "2c", "4c", and test tags "2T1C"(or "1T1C"), "4T2C" - (or "2T2C"), "8T4C" (or "4T4C"). - - Interface Receive Queues (RxQ): as of CSIT-2106 release, number of - RxQs used on each physical or virtual interface is equal to the - number of data-plane workers. In other words each worker has a - dedicated RxQ on each interface tested. This ensures packet - processing load to be equal for each worker, subject to RSS flow - load balancing efficacy. Note: Before CSIT-2106 total number of - RxQs across all interfaces of specific type was equal to the - number of data-plane workers. - - Interface Transmit Queues (TxQ): number of TxQs used on each - physical or virtual interface is equal to the number of data-plane - workers. In other words each worker has a dedicated TxQ on each - interface tested. - - Applies to VPP and DPDK Testpmd and L3Fwd. - -2. Data-plane and feature workers (e.g. IPsec async crypto workers), the - latter dedicated to specific feature processing. - - - Cores: data-plane and feature workers are tested in 2, 3 and 4 - pcore configurations, running on single lcore per pcore or on - sibling lcores per pcore. This results in a two sets of - thread-core combinations separated by "-", {T}t{C}c-{T}t{C}c, with - the leading set denoting total number of threads (lcores) and - pcores used for data-plane workers, and the trailing set denoting - total number of lcores and pcores used for feature workers. - Accordingly, tested configurations are encoded in CSIT test case - names, e.g. "1c-1c", "1c-2c", "1c-3c", and test tags "2T1C_2T1C" - (or "1T1C_1T1C"), "2T1C_4T2C"(or "1T1C_2T2C"), "2T1C_6T3C" - (or "1T1C_3T3C"). - - RxQ and TxQ: no RxQs and no TxQs are used by feature workers. - - Applies to VPP only. - -3. Management/main worker, control plane and management. - - - Cores: single lcore. - - RxQ: not used (VPP default behaviour). - - TxQ: single TxQ per interface, allocated but not used - (VPP default behaviour). - - Applies to VPP only. - -## VPP Thread Configuration - -Mapping of cores and RxQs to VPP data-plane worker threads is done in -the VPP startup.conf during test suite setup: - -1. `corelist-workers `: List of logical cores to run VPP - data-plane workers and feature workers. The actual lcores' - allocations depends on HyperThreading/SMT server configuration and - per test core configuration. - - - For tests without feature workers, by default, all CPU cores - configured in startup.conf are used for data-plane workers. - - For tests with feature workers, CSIT code distributes lcores across - data-plane and feature workers. - -2. `num-rx-queues `: Number of Rx queues used per interface. - -Mapping of TxQs to VPP data-plane worker threads uses the default VPP -setting of one TxQ per interface per data-plane worker. - -## DPDK Thread Configuration - -Mapping of cores and RxQs to DPDK Testpmd/L3Fwd data-plane worker -threads is done in the startup CLI: - -1. `-l ` - List of logical cores to run DPDK - application. -2. `nb-cores=` - Number of forwarding cores. -3. `rxq=` - Number of Rx queues used per interface. diff --git a/docs/content/methodology/reconfiguration_tests.md b/docs/content/methodology/reconfiguration_tests.md deleted file mode 100644 index 837535526d..0000000000 --- a/docs/content/methodology/reconfiguration_tests.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -title: "Reconfiguration Tests" -weight: 16 ---- - -# Reconfiguration Tests - -## Overview - -Reconf tests are designed to measure the impact of VPP re-configuration -on data plane traffic. -While VPP takes some measures against the traffic being -entirely stopped for a prolonged time, -the immediate forwarding rate varies during the re-configuration, -as some configurations steps need the active dataplane worker threads -to be stopped temporarily. - -As the usual methods of measuring throughput need multiple trial measurements -with somewhat long durations, and the re-configuration process can also be long, -finding an offered load which would result in zero loss -during the re-configuration process would be time-consuming. - -Instead, reconf tests first find a througput value (lower bound for NDR) -without re-configuration, and then maintain that ofered load -during re-configuration. The measured loss count is then assumed to be caused -by the re-configuration process. The result published by reconf tests -is the effective blocked time, that is -the loss count divided by the offered load. - -## Current Implementation - -Each reconf suite is based on a similar MLRsearch performance suite. - -MLRsearch parameters are changed to speed up the throughput discovery. -For example, PDR is not searched for, and the final trial duration is shorter. - -The MLRsearch suite has to contain a configuration parameter -that can be scaled up, e.g. number of tunnels or number of service chains. -Currently, only increasing the scale is supported -as the re-configuration operation. In future, scale decrease -or other operations can be implemented. - -The traffic profile is not changed, so the traffic present is processed -only by the smaller scale configuration. The added tunnels / chains -are not targetted by the traffic. - -For the re-configuration, the same Robot Framework and Python libraries -are used, as were used in the initial configuration, with the exception -of the final calls that do not interact with VPP (e.g. starting -virtual machines) being skipped to reduce the test overall duration. - -## Discussion - -Robot Framework introduces a certain overhead, which may affect timing -of individual VPP API calls, which in turn may affect -the number of packets lost. - -The exact calls executed may contain unnecessary info dumps, repeated commands, -or commands which change a value that do not need to be changed (e.g. MTU). -Thus, implementation details are affecting the results, even if their effect -on the corresponding MLRsearch suite is negligible. - -The lower bound for NDR is the only value safe to be used when zero packets lost -are expected without re-configuration. But different suites show different -"jitter" in that value. For some suites, the lower bound is not tight, -allowing full NIC buffers to drain quickly between worker pauses. -For other suites, lower bound for NDR still has quite a large probability -of non-zero packet loss even without re-configuration. diff --git a/docs/content/methodology/root_cause_analysis/_index.md b/docs/content/methodology/root_cause_analysis/_index.md deleted file mode 100644 index 79cfe73769..0000000000 --- a/docs/content/methodology/root_cause_analysis/_index.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -bookCollapseSection: true -bookFlatSection: false -title: "Root Cause Analysis" -weight: 20 ---- \ No newline at end of file diff --git a/docs/content/methodology/root_cause_analysis/perpatch_performance_tests.md b/docs/content/methodology/root_cause_analysis/perpatch_performance_tests.md deleted file mode 100644 index 900ea0b874..0000000000 --- a/docs/content/methodology/root_cause_analysis/perpatch_performance_tests.md +++ /dev/null @@ -1,228 +0,0 @@ ---- -title: "Per-patch performance tests" -weight: 1 ---- - -# Per-patch performance tests - -Updated for CSIT git commit id: 72b45cfe662107c8e1bb549df71ba51352a898ee. - -A methodology similar to trending analysis is used for comparing performance -before a DUT code change is merged. This can act as a verify job to disallow -changes which would decrease performance without a good reason. - -## Existing jobs - -VPP is the only project currently using such jobs. -They are not started automatically, must be triggered on demand. -They allow full tag expressions, but some tags are enforced (such as MRR). - -There are jobs available for multiple types of testbeds, -based on various processors. -Their Gerrit triggers words are of the form "perftest-{node_arch}" -where the node_arch combinations currently supported are: -2n-clx, 2n-tx2, 2n-zn2, 3n-tsh. - -## Test selection - -Gerrit trigger line without any additional arguments selects -a small set of test cases to run. -If additional arguments are added to the Gerrit trigger, they are treated -as Robot tag expressions to select tests to run. -While very flexible, this method of test selection also allows the user -to accidentally select too high number of tests, blocking the testbed for days. - -What follows is a list of explanations and recommendations -to help users to select the minimal set of tests cases. - -### Verify cycles - -When Gerrit schedules multiple jobs to run for the same patch set, -it waits until all runs are complete. -While it is waiting, it is possible to trigger more jobs -(adding runs to the set Gerrit is waiting for), but it is not possible -to trigger more runs for the same job, until Gerrit is done waiting. -After Gerrit is done waiting, it becames possible to trigger -the same job again. - -Example. User triggers one set of tests on 2n-icx and immediately -also triggers other set of tests on 3n-icx. Then the user notices -2n-icx run end early because of a typo in tag expression. -When the user tries to re-trigger 2n-icx (with fixed tag expression), -that comment gets ignored by Jenkins. -Only when 3n-icx job finishes, the user can trigger 2n-icx. - -### One comment many jobs - -In the past, the CSIT code which parses for perftest trigger comments -was buggy, which lead to bad behavior (as in selection all performance test, -because "perftest" is also a robot tag) when user included multiple -perftest trigger words in the same comment. - -The worst bugs were fixed since then, but it is still recommended -to use just one trigger word per Gerrit comment, just to be safe. - -### Multiple test cases in run - -While Robot supports OR operator, it does not support parentheses, -so the OR operator is not very useful. It is recommended -to use space instead of OR operator. - -Example template: -perftest-2n-icx {tag_expression_1} {tag_expression_2} - -See below for more concrete examples. - -### Suite tags - -Traditionally, CSIT maintains broad Robot tags that can be used to select tests. - -But it is not recommended to use them for test selection, -as it is not that easy to determine how many test cases are selected. - -The recommended way is to look into CSIT repository first, -and locate a specific suite the user is interested in, -and use its suite tag. For example, "ethip4-ip4base" is a suite tag -selecting just one suite in CSIT git repository, -avoiding all scale, container, and other simialr variants. - -Note that CSIT uses "autogen" code generator, -so the robot running in Jenkins has access to more suites -than visible just by looking into CSIT git repository, -so suite tag is not enough to select even the intended suite, -and user still probably wants to narrow down -to a single test case within a suite. - -### Fully specified tag expressions - -Here is one template to select a single test case: -{test_type}AND{nic_model}AND{nic_driver}AND{cores}AND{frame_size}AND{suite_tag} -where the variables are all lower case (so AND operator stands out). - -Currently only one test type is supported by the performance comparison jobs: -"mrr". -The nic_driver options depend on nic_model. For Intel cards "drv_avf" (AVF plugin) -and "drv_vfio_pci" (DPDK plugin) are popular, for Mellanox "drv_rdma_core". -Currently, the performance using "drv_af_xdp" is not reliable enough, so do not use it -unless you are specifically testing for AF_XDP. - -The most popular nic_model is "nic_intel-xxv710", but that is not available -on all testbed types. -It is safe to use "1c" for cores (unless you are suspection multi-core performance -is affected differently) and "64b" for frame size ("78b" for ip6 -and more for dot1q and other encapsulated traffic; -"1518b" is popular for ipsec and other payload-bound tests). - -As there are more test cases than CSIT can periodically test, -it is possible to encounter an old test case that currently fails. -To avoid that, you can look at "job spec" files we use for periodic testing, -for example -[this one](https://github.com/FDio/csit/blob/master/resources/job_specs/report_iterative/2n-icx/vpp-mrr-00.md). - -### Shortening triggers - -Advanced users may use the following tricks to avoid writing long trigger comments. - -Robot supports glob matching, which can be used to select multiple suite tags at once. - -Not specifying one of 6 parts of the recommended expression pattern -will select all available options. For example not specifying nic_driver -for nic_intel-xxv710 will select all 3 applicable drivers. -You can use NOT operator to reject some options (e.g. NOTdrv_af_xdp), -but beware, with NOT the order matters: -tag1ANDtag2NOTtag3 is not the same as tag1NOTtag3ANDtag2, -the latter is evaluated as tag1AND(NOT(tag3ANDtag2)). - -Beware when not specifying nic_model. As a precaution, -CSIT code will insert the defailt NIC model for the tetsbed used. -Example: Specifying drv_rdma_core without specifying nic_model -will fail, as the default nic_model is nic_intel-xxv710 -which does not support RDMA core driver. - -### Complete example - -A user wants to test a VPP change which may affect load balance whith bonding. -Searching tag documentation for "bonding" finds LBOND tag and its variants. -Searching CSIT git repository (directory tests/) finds 8 suite files, -all suited only for 3-node testbeds. -All suites are using vhost, but differ by the forwarding app inside VM -(DPDK or VPP), by the forwarding mode of VPP acting as host level vswitch -(MAC learning or cross connect), and by the number of DUT1-DUT2 links -available (1 or 2). - -As not all NICs and testbeds offer enogh ports for 2 parallel DUT-DUT links, -the user looks at -[testbed specifications](https://github.com/FDio/csit/tree/master/topologies/available) -and finds that only xxv710 NIC on 3n-icx testbed matches the requirements. -Quick look into the suites confirm the smallest frame size is 64 bytes -(despite DOT1Q robot tag, as the encapsulation does not happen on TG-DUT links). -It is ok to use just 1 physical core, as 3n-icx has hyperthreading enabled, -so VPP vswitch will use 2 worker threads. - -The user decides the vswitch forwarding mode is not important -(so choses cross connect as that has less CPU overhead), -but wants to test both NIC drivers (not AF_XDP), both apps in VM, -and both 1 and 2 parallel links. - -After shortening, this is the trigger comment fianlly used: -perftest-3n-icx mrrANDnic_intel-x710AND1cAND64bAND?lbvpplacp-dot1q-l2xcbase-eth-2vhostvr1024-1vm*NOTdrv_af_xdp - -## Basic operation - -The job builds VPP .deb packages for both the patch under test -(called "current") and its parent patch (called "parent"). - -For each test (from a set defined by tag expression), -both builds are subjected to several trial measurements (BMRR). -Measured samples are grouped to "parent" sequence, -followed by "current" sequence. The same Minimal Description Length -algorithm as in trending is used to decide whether it is one big group, -or two smaller gropus. If it is one group, a "normal" result -is declared for the test. If it is two groups, and current average -is less then parent average, the test is declared a regression. -If it is two groups and current average is larger or equal, -the test is declared a progression. - -The whole job fails (giving -1) if some trial measurement failed, -or if any test was declared a regression. - -## Temporary specifics - -The Minimal Description Length analysis is performed by -CSIT code equivalent to jumpavg-0.1.3 library available on PyPI. - -In hopes of strengthening of signal (code performance) compared to noise -(all other factors influencing the measured values), several workarounds -are applied. - -In contrast to trending, trial duration is set to 10 seconds, -and only 5 samples are measured for each build. -Both parameters are set in ci-management. - -This decreases sensitivity to regressions, but also decreases -probability of false positives. - -## Console output - -The following information as visible towards the end of Jenkins console output, -repeated for each analyzed test. - -The original 5 values are visible in order they were measured. -The 5 values after processing are also visible in output, -this time sorted by value (so people can see minimum and maximum). - -The next output is difference of averages. It is the current average -minus the parent average, expressed as percentage of the parent average. - -The next three outputs contain the jumpavg representation -of the two groups and a combined group. -Here, "bits" is the description length; for "current" sequence -it includes effect from "parent" average value -(jumpavg-0.1.3 penalizes sequences with too close averages). - -Next, a sentence describing which grouping description is shorter, -and by how much bits. -Finally, the test result classification is visible. - -The algorithm does not track test case names, -so test cases are indexed (from 0). diff --git a/docs/content/methodology/suite_generation.md b/docs/content/methodology/suite_generation.md deleted file mode 100644 index 4fa9dee0ce..0000000000 --- a/docs/content/methodology/suite_generation.md +++ /dev/null @@ -1,124 +0,0 @@ ---- -title: "Suite Generation" -weight: 19 ---- - -# Suite Generation - -CSIT uses robot suite files to define tests. -However, not all suite files available for Jenkins jobs -(or manually started bootstrap scripts) are present in CSIT git repository. -They are generated only when needed. - -## Autogen Library - -There is a code generation layer implemented as Python library called "autogen", -called by various bash scripts. - -It generates the full extent of CSIT suites, using the ones in git as templates. - -## Sources - -The generated suites (and their contents) are affected by multiple information -sources, listed below. - -### Git Suites - -The suites present in git repository act as templates for generating suites. -One of autogen design principles is that any template suite should also act -as a full suite (no placeholders). - -In practice, autogen always re-creates the template suite with exactly -the same content, it is one of checks that autogen works correctly. - -### Regenerate Script - -Not all suites present in CSIT git repository act as template for autogen. -The distinction is on per-directory level. Directories with -regenerate_testcases.py script usually consider all suites as templates -(unless possibly not included by the glob patten in the script). - -The script also specifies minimal frame size, indirectly, by specifying protocol -(protocol "ip4" is the default, leading to 64B frame size). - -### Constants - -Values in Constants.py are taken into consideration when generating suites. -The values are mostly related to different NIC models and NIC drivers. - -### Python Code - -Python code in resources/libraries/python/autogen contains several other -information sources. - -#### Testcase Templates - -The test case part of template suite is ignored, test case lines -are created according to text templates in Testcase.py file. - -#### Testcase Argument Lists - -Each testcase template has different number of "arguments", e.g. values -to put into various placeholders. Different test types need different -lists of the argument values, the lists are in regenerate_glob method -in Regenerator.py file. - -#### Iteration Over Values - -Python code detects the test type (usually by substrings of suite file name), -then iterates over different quantities based on type. -For example, only ndrpdr suite templates generate other types (mrr and soak). - -#### Hardcoded Exclusions - -Some combinations of values are known not to work, so they are excluded. -Examples: Density tests for too much CPUs; IMIX for ASTF. - -## Non-Sources - -Some information sources are available in CSIT repository, -but do not affect the suites generated by autogen. - -### Testbeds - -Overall, no information visible in topology yaml files is taken into account -by autogen. - -#### Testbed Architecture - -Historically, suite files are agnostic to testbed architecture, e.g. ICX or ALT. - -#### Testbed Size - -Historically, 2-node and 3-node suites have diferent names, and while -most of the code is common, the differences are not always simple enough. -Autogen treat 2-node and 3-node suites as independent templates. - -TRex suites are intended for a 1-node circuit of otherwise 2-node or 3-node -testbeds, so they support all 3 robot tags. -They are also detected and treated differently by autogen, -mainly because they need different testcase arguments (no CPU count). -Autogen does nothing specifically related to the fact they should run -only in testbeds/NICs with TG-TG line available. - -#### Other Topology Info - -Some bonding tests need two (parallel) links between DUTs. -Autogen does not care, as suites are agnostic. -Robot tag marks the difference, but the link presence is not explicitly checked. - -### Job specs - -Information in job spec files depend on generated suites (not the other way). -Autogen should generate more suites, as job spec is limited by time budget. -More suites should be available for manually triggered verify jobs, -so autogen covers that. - -### Bootstrap Scripts - -Historically, bootstrap scripts perform some logic, -perhaps adding exclusion options to Robot invocation -(e.g. skipping testbed+NIC combinations for tests that need parallel links). - -Once again, the logic here relies on what autogen generates, -autogen does not look into bootstrap scripts. diff --git a/docs/content/methodology/telemetry.md b/docs/content/methodology/telemetry.md deleted file mode 100644 index e7a2571573..0000000000 --- a/docs/content/methodology/telemetry.md +++ /dev/null @@ -1,167 +0,0 @@ ---- -title: "Telemetry" -weight: 20 ---- - -# Telemetry - -OpenMetrics specifies the de-facto standard for transmitting cloud-native -metrics at scale, with support for both text representation and Protocol -Buffers. - -## RFC - -- RFC2119 -- RFC5234 -- RFC8174 -- draft-richih-opsawg-openmetrics-00 - -## Reference - -[OpenMetrics](https://github.com/OpenObservability/OpenMetrics/blob/master/specification/OpenMetrics.md) - -## Metric Types - -- Gauge -- Counter -- StateSet -- Info -- Histogram -- GaugeHistogram -- Summary -- Unknown - -Telemetry module in CSIT currently support only Gauge, Counter and Info. - -## Anatomy of CSIT telemetry implementation - -Existing implementation consists of several measurment building blocks: -the main measuring block running search algorithms (MLR, PLR, SOAK, MRR, ...), -the latency measuring block and the several telemetry blocks with or without -traffic running on a background. - -The main measuring block must not be interrupted by any read operation that can -impact data plane traffic processing during throughput search algorithm. Thus -operational reads are done before (pre-stat) and after (post-stat) that block. - -Some operational reads must be done while traffic is running and usually -consists of two reads (pre-run-stat, post-run-stat) with defined delay between -them. - -## MRR measurement - - traffic_start(r=mrr) traffic_stop |< measure >| - | | | (r=mrr) | - | pre_run_stat post_run_stat | pre_stat | | post_stat - | | | | | | | | - --o--------o---------------o---------o-------o--------+-------------------+------o------------> - t - - Legend: - - pre_run_stat - - vpp-clear-runtime - - post_run_stat - - vpp-show-runtime - - bash-perf-stat // if extended_debug == True - - pre_stat - - vpp-clear-stats - - vpp-enable-packettrace // if extended_debug == True - - vpp-enable-elog - - post_stat - - vpp-show-stats - - vpp-show-packettrace // if extended_debug == True - - vpp-show-elog - - - |< measure >| - | (r=mrr) | - | | - |< traffic_trial0 >|< traffic_trial1 >|< traffic_trialN >| - | (i=0,t=duration) | (i=1,t=duration) | (i=N,t=duration) | - | | | | - --o------------------------o------------------------o------------------------o---> - t - - -## MLR measurement - - |< measure >| traffic_start(r=pdr) traffic_stop traffic_start(r=ndr) traffic_stop |< [ latency ] >| - | (r=mlr) | | | | | | .9/.5/.1/.0 | - | | | pre_run_stat post_run_stat | | pre_run_stat post_run_stat | | | - | | | | | | | | | | | | - --+-------------------+----o--------o---------------o---------o--------------o--------o---------------o---------o------------[---------------------]---> - t - - Legend: - - pre_run_stat - - vpp-clear-runtime - - post_run_stat - - vpp-show-runtime - - bash-perf-stat // if extended_debug == True - - pre_stat - - vpp-clear-stats - - vpp-enable-packettrace // if extended_debug == True - - vpp-enable-elog - - post_stat - - vpp-show-stats - - vpp-show-packettrace // if extended_debug == True - - vpp-show-elog - - -## MRR measurement - - traffic_start(r=mrr) traffic_stop |< measure >| - | | | (r=mrr) | - | |< stat_runtime >| | stat_pre_trial | | stat_post_trial - | | | | | | | | - ----o---+--------------------------+---o-------------o------------+-------------------+-----o-------------> - t - - Legend: - - stat_runtime - - vpp-runtime - - stat_pre_trial - - vpp-clear-stats - - vpp-enable-packettrace // if extended_debug == True - - stat_post_trial - - vpp-show-stats - - vpp-show-packettrace // if extended_debug == True - - - |< measure >| - | (r=mrr) | - | | - |< traffic_trial0 >|< traffic_trial1 >|< traffic_trialN >| - | (i=0,t=duration) | (i=1,t=duration) | (i=N,t=duration) | - | | | | - --o------------------------o------------------------o------------------------o---> - t - - - |< stat_runtime >| - | | - |< program0 >|< program1 >|< programN >| - | (@=params) | (@=params) | (@=params) | - | | | | - --o------------------------o------------------------o------------------------o---> - t - - -## MLR measurement - - |< measure >| traffic_start(r=pdr) traffic_stop traffic_start(r=ndr) traffic_stop |< [ latency ] >| - | (r=mlr) | | | | | | .9/.5/.1/.0 | - | | | |< stat_runtime >| | | |< stat_runtime >| | | | - | | | | | | | | | | | | - --+-------------------+-----o---+--------------------------+---o--------------o---+--------------------------+---o-----------[---------------------]---> - t - - Legend: - - stat_runtime - - vpp-runtime - - stat_pre_trial - - vpp-clear-stats - - vpp-enable-packettrace // if extended_debug == True - - stat_post_trial - - vpp-show-stats - - vpp-show-packettrace // if extended_debug == True diff --git a/docs/content/methodology/terminology.md b/docs/content/methodology/terminology.md deleted file mode 100644 index 229db7d145..0000000000 --- a/docs/content/methodology/terminology.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -title: "Terminology" -weight: 1 ---- - -# Terminology - -- **Frame size**: size of an Ethernet Layer-2 frame on the wire, including - any VLAN tags (dot1q, dot1ad) and Ethernet FCS, but excluding Ethernet - preamble and inter-frame gap. Measured in Bytes. -- **Packet size**: same as frame size, both terms used interchangeably. -- **Inner L2 size**: for tunneled L2 frames only, size of an encapsulated - Ethernet Layer-2 frame, preceded with tunnel header, and followed by - tunnel trailer. Measured in Bytes. -- **Inner IP size**: for tunneled IP packets only, size of an encapsulated - IPv4 or IPv6 packet, preceded with tunnel header, and followed by - tunnel trailer. Measured in Bytes. -- **Device Under Test (DUT)**: In software networking, "device" denotes a - specific piece of software tasked with packet processing. Such device - is surrounded with other software components (such as operating system - kernel). It is not possible to run devices without also running the - other components, and hardware resources are shared between both. For - purposes of testing, the whole set of hardware and software components - is called "System Under Test" (SUT). As SUT is the part of the whole - test setup performance of which can be measured with RFC2544, using - SUT instead of RFC2544 DUT. Device under test - (DUT) can be re-introduced when analyzing test results using whitebox - techniques, but this document sticks to blackbox testing. -- **System Under Test (SUT)**: System under test (SUT) is a part of the - whole test setup whose performance is to be benchmarked. The complete - methodology contains other parts, whose performance is either already - established, or not affecting the benchmarking result. -- **Bi-directional throughput tests**: involve packets/frames flowing in - both east-west and west-east directions over every tested interface of - SUT/DUT. Packet flow metrics are measured per direction, and can be - reported as aggregate for both directions (i.e. throughput) and/or - separately for each measured direction (i.e. latency). In most cases - bi-directional tests use the same (symmetric) load in both directions. -- **Uni-directional throughput tests**: involve packets/frames flowing in - only one direction, i.e. either east-west or west-east direction, over - every tested interface of SUT/DUT. Packet flow metrics are measured - and are reported for measured direction. -- **Packet Loss Ratio (PLR)**: ratio of packets received relative to packets - transmitted over the test trial duration, calculated using formula: - PLR = ( pkts_transmitted - pkts_received ) / pkts_transmitted. - For bi-directional throughput tests aggregate PLR is calculated based - on the aggregate number of packets transmitted and received. -- **Packet Throughput Rate**: maximum packet offered load DUT/SUT forwards - within the specified Packet Loss Ratio (PLR). In many cases the rate - depends on the frame size processed by DUT/SUT. Hence packet - throughput rate MUST be quoted with specific frame size as received by - DUT/SUT during the measurement. For bi-directional tests, packet - throughput rate should be reported as aggregate for both directions. - Measured in packets-per-second (pps) or frames-per-second (fps), - equivalent metrics. -- **Bandwidth Throughput Rate**: a secondary metric calculated from packet - throughput rate using formula: bw_rate = pkt_rate * (frame_size + - L1_overhead) * 8, where L1_overhead for Ethernet includes preamble (8 - Bytes) and inter-frame gap (12 Bytes). For bi-directional tests, - bandwidth throughput rate should be reported as aggregate for both - directions. Expressed in bits-per-second (bps). -- **Non Drop Rate (NDR)**: maximum packet/bandwith throughput rate sustained - by DUT/SUT at PLR equal zero (zero packet loss) specific to tested - frame size(s). MUST be quoted with specific packet size as received by - DUT/SUT during the measurement. Packet NDR measured in - packets-per-second (or fps), bandwidth NDR expressed in - bits-per-second (bps). -- **Partial Drop Rate (PDR)**: maximum packet/bandwith throughput rate - sustained by DUT/SUT at PLR greater than zero (non-zero packet loss) - specific to tested frame size(s). MUST be quoted with specific packet - size as received by DUT/SUT during the measurement. Packet PDR - measured in packets-per-second (or fps), bandwidth PDR expressed in - bits-per-second (bps). -- **Maximum Receive Rate (MRR)**: packet/bandwidth rate regardless of PLR - sustained by DUT/SUT under specified Maximum Transmit Rate (MTR) - packet load offered by traffic generator. MUST be quoted with both - specific packet size and MTR as received by DUT/SUT during the - measurement. Packet MRR measured in packets-per-second (or fps), - bandwidth MRR expressed in bits-per-second (bps). -- **Trial**: a single measurement step. -- **Trial duration**: amount of time over which packets are transmitted and - received in a single measurement step. diff --git a/docs/content/methodology/test/_index.md b/docs/content/methodology/test/_index.md new file mode 100644 index 0000000000..857cc7b168 --- /dev/null +++ b/docs/content/methodology/test/_index.md @@ -0,0 +1,6 @@ +--- +bookCollapseSection: true +bookFlatSection: false +title: "Test" +weight: 3 +--- diff --git a/docs/content/methodology/test/access_control_lists.md b/docs/content/methodology/test/access_control_lists.md new file mode 100644 index 0000000000..354e6b72bb --- /dev/null +++ b/docs/content/methodology/test/access_control_lists.md @@ -0,0 +1,66 @@ +--- +title: "Access Control Lists" +weight: 5 +--- + +# Access Control Lists + +VPP is tested in a number of data plane feature configurations across +different forwarding modes. Following sections list features tested. + +## ACL Security-Groups + +Both stateless and stateful access control lists (ACL), also known as +security-groups, are supported by VPP. + +Following ACL configurations are tested for MAC switching with L2 +bridge-domains: + +- *l2bdbasemaclrn-iacl{E}sl-{F}flows*: Input stateless ACL, with {E} + entries and {F} flows. +- *l2bdbasemaclrn-oacl{E}sl-{F}flows*: Output stateless ACL, with {E} + entries and {F} flows. +- *l2bdbasemaclrn-iacl{E}sf-{F}flows*: Input stateful ACL, with {E} + entries and {F} flows. +- *l2bdbasemaclrn-oacl{E}sf-{F}flows*: Output stateful ACL, with {E} + entries and {F} flows. + +Following ACL configurations are tested with IPv4 routing: + +- *ip4base-iacl{E}sl-{F}flows*: Input stateless ACL, with {E} entries + and {F} flows. +- *ip4base-oacl{E}sl-{F}flows*: Output stateless ACL, with {E} entries + and {F} flows. +- *ip4base-iacl{E}sf-{F}flows*: Input stateful ACL, with {E} entries and + {F} flows. +- *ip4base-oacl{E}sf-{F}flows*: Output stateful ACL, with {E} entries + and {F} flows. + +ACL tests are executed with the following combinations of ACL entries +and number of flows: + +- ACL entry definitions + - flow non-matching deny entry: (src-ip4, dst-ip4, src-port, dst-port). + - flow matching permit ACL entry: (src-ip4, dst-ip4). +- {E} - number of non-matching deny ACL entries, {E} = [1, 10, 50]. +- {F} - number of UDP flows with different tuple (src-ip4, dst-ip4, + src-port, dst-port), {F} = [100, 10k, 100k]. +- All {E}x{F} combinations are tested per ACL type, total of 9. + +## ACL MAC-IP + +MAC-IP binding ACLs are tested for MAC switching with L2 bridge-domains: + +- *l2bdbasemaclrn-macip-iacl{E}sl-{F}flows*: Input stateless ACL, with + {E} entries and {F} flows. + +MAC-IP ACL tests are executed with the following combinations of ACL +entries and number of flows: + +- ACL entry definitions + - flow non-matching deny entry: (dst-ip4, dst-mac, bit-mask) + - flow matching permit ACL entry: (dst-ip4, dst-mac, bit-mask) +- {E} - number of non-matching deny ACL entries, {E} = [1, 10, 50] +- {F} - number of UDP flows with different tuple (dst-ip4, dst-mac), + {F} = [100, 10k, 100k] +- All {E}x{F} combinations are tested per ACL type, total of 9. diff --git a/docs/content/methodology/test/generic_segmentation_offload.md b/docs/content/methodology/test/generic_segmentation_offload.md new file mode 100644 index 0000000000..0032d203de --- /dev/null +++ b/docs/content/methodology/test/generic_segmentation_offload.md @@ -0,0 +1,117 @@ +--- +title: "Generic Segmentation Offload" +weight: 7 +--- + +# Generic Segmentation Offload + +## Overview + +Generic Segmentation Offload (GSO) reduces per-packet processing +overhead by enabling applications to pass a multi-packet buffer to +(v)NIC and process a smaller number of large packets (e.g. frame size of +64 KB), instead of processing higher numbers of small packets (e.g. +frame size of 1500 B), thus reducing per-packet overhead. + +GSO tests for VPP vhostuser and tapv2 interfaces. All tests cases use iPerf3 +client and server applications running TCP/IP as a traffic generator. For +performance comparison the same tests are run without GSO enabled. + +## GSO Test Topologies + +Two VPP GSO test topologies are implemented: + +1. iPerfC_GSOvirtio_LinuxVM --- GSOvhost_VPP_GSOvhost --- iPerfS_GSOvirtio_LinuxVM + - Tests VPP GSO on vhostuser interfaces and interaction with Linux + virtio with GSO enabled. +2. iPerfC_GSOtap_LinuxNspace --- GSOtapv2_VPP_GSOtapv2 --- iPerfS_GSOtap_LinuxNspace + - Tests VPP GSO on tapv2 interfaces and interaction with Linux tap + with GSO enabled. + +Common configuration: + +- iPerfC (client) and iPerfS (server) run in TCP/IP mode without upper + bandwidth limit. +- Trial duration is set to 30 sec. +- iPerfC, iPerfS and VPP run in the single SUT node. + + +## VPP GSOtap Topology + +### VPP Configuration + +VPP GSOtap tests are executed without using hyperthreading. VPP worker runs on +a single core. Multi-core tests are not executed. Each interface belongs to +separate namespace. Following core pinning scheme is used: + +- 1t1c (rxq=1, rx_qsz=4096, tx_qsz=4096) + - system isolated: 0,28,56,84 + - vpp mt: 1 + - vpp wt: 2 + - vhost: 3-5 + - iperf-s: 6 + - iperf-c: 7 + +### iPerf3 Server Configuration + +iPerf3 version used 3.7 + + $ sudo -E -S ip netns exec tap1_namespace iperf3 \ + --server --daemon --pidfile /tmp/iperf3_server.pid \ + --logfile /tmp/iperf3.log --port 5201 --affinity + +For the full iPerf3 reference please see +[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst). + + +### iPerf3 Client Configuration + +iPerf3 version used 3.7 + + $ sudo -E -S ip netns exec tap1_namespace iperf3 \ + --client 2.2.2.2 --bind 1.1.1.1 --port 5201 --parallel \ + --time 30.0 --affinity --zerocopy + +For the full iPerf3 reference please see +[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst). + + +## VPP GSOvhost Topology + +### VPP Configuration + +VPP GSOvhost tests are executed without using hyperthreading. VPP worker runs +on a single core. Multi-core tests are not executed. Following core pinning +scheme is used: + +- 1t1c (rxq=1, rx_qsz=1024, tx_qsz=1024) + - system isolated: 0,28,56,84 + - vpp mt: 1 + - vpp wt: 2 + - vm-iperf-s: 3,4,5,6,7 + - vm-iperf-c: 8,9,10,11,12 + - iperf-s: 1 + - iperf-c: 1 + +### iPerf3 Server Configuration + +iPerf3 version used 3.7 + + $ sudo iperf3 \ + --server --daemon --pidfile /tmp/iperf3_server.pid \ + --logfile /tmp/iperf3.log --port 5201 --affinity X + +For the full iPerf3 reference please see +[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst). + + +### iPerf3 Client Configuration + +iPerf3 version used 3.7 + + $ sudo iperf3 \ + --client 2.2.2.2 --bind 1.1.1.1 --port 5201 --parallel \ + --time 30.0 --affinity X --zerocopy + +For the full iPerf3 reference please see +[iPerf3 docs](https://github.com/esnet/iperf/blob/master/docs/invoking.rst). diff --git a/docs/content/methodology/test/hoststack/_index.md b/docs/content/methodology/test/hoststack/_index.md new file mode 100644 index 0000000000..2ae872c54e --- /dev/null +++ b/docs/content/methodology/test/hoststack/_index.md @@ -0,0 +1,6 @@ +--- +bookCollapseSection: true +bookFlatSection: false +title: "Hoststack" +weight: 6 +--- diff --git a/docs/content/methodology/test/hoststack/quicudpip_with_vppecho.md b/docs/content/methodology/test/hoststack/quicudpip_with_vppecho.md new file mode 100644 index 0000000000..c7d57a51b3 --- /dev/null +++ b/docs/content/methodology/test/hoststack/quicudpip_with_vppecho.md @@ -0,0 +1,48 @@ +--- +title: "QUIC/UDP/IP with vpp_echo" +weight: 1 +--- + +# QUIC/UDP/IP with vpp_echo + +[vpp_echo performance testing tool](https://wiki.fd.io/view/VPP/HostStack#External_Echo_Server.2FClient_.28vpp_echo.29) +is a bespoke performance test application which utilizes the 'native +HostStack APIs' to verify performance and correct handling of +connection/stream events with uni-directional and bi-directional +streams of data. + +Because iperf3 does not support the QUIC transport protocol, vpp_echo +is used for measuring the maximum attainable goodput of the VPP Host +Stack connection utilizing the QUIC transport protocol across two +instances of VPP running on separate DUT nodes. The QUIC transport +protocol supports multiple streams per connection and test cases +utilize different combinations of QUIC connections and number of +streams per connection. + +The test configuration is as follows: + + DUT1 Network DUT2 + [ vpp_echo-client -> VPP1 ]=======[ VPP2 -> vpp_echo-server] + N-streams/connection + +where, + +1. vpp_echo server attaches to VPP2 and LISTENs on VPP2:TCP port 1234. +2. vpp_echo client creates one or more connections to VPP1 and opens + one or more stream per connection to VPP2:TCP port 1234. +3. vpp_echo client transmits a uni-directional stream as fast as the + VPP Host Stack allows to the vpp_echo server for the test duration. +4. At the end of the test the vpp_echo client emits the goodput + measurements for all streams and the sum of all streams. + +Test cases include + +1. 1 QUIC Connection with 1 Stream +2. 1 QUIC connection with 10 Streams +3. 10 QUIC connetions with 1 Stream +4. 10 QUIC connections with 10 Streams + +with stream sizes to provide reasonable test durations. The VPP Host +Stack QUIC transport is configured to utilize the picotls encryption +library. In the future, tests utilizing addtional encryption +algorithms will be added. diff --git a/docs/content/methodology/test/hoststack/tcpip_with_iperf3.md b/docs/content/methodology/test/hoststack/tcpip_with_iperf3.md new file mode 100644 index 0000000000..7baa88ab50 --- /dev/null +++ b/docs/content/methodology/test/hoststack/tcpip_with_iperf3.md @@ -0,0 +1,52 @@ +--- +title: "TCP/IP with iperf3" +weight: 2 +--- + +# TCP/IP with iperf3 + +[iperf3 goodput measurement tool](https://github.com/esnet/iperf) +is used for measuring the maximum attainable goodput of the VPP Host +Stack connection across two instances of VPP running on separate DUT +nodes. iperf3 is a popular open source tool for active measurements +of the maximum achievable goodput on IP networks. + +Because iperf3 utilizes the POSIX socket interface APIs, the current +test configuration utilizes the LD_PRELOAD mechanism in the linux +kernel to connect iperf3 to the VPP Host Stack using the VPP +Communications Library (VCL) LD_PRELOAD library (libvcl_ldpreload.so). + +In the future, a forked version of iperf3 which has been modified to +directly use the VCL application APIs may be added to determine the +difference in performance of 'VCL Native' applications versus utilizing +LD_PRELOAD which inherently has more overhead and other limitations. + +The test configuration is as follows: + + DUT1 Network DUT2 + [ iperf3-client -> VPP1 ]=======[ VPP2 -> iperf3-server] + +where, + +1. iperf3 server attaches to VPP2 and LISTENs on VPP2:TCP port 5201. +2. iperf3 client attaches to VPP1 and opens one or more stream + connections to VPP2:TCP port 5201. +3. iperf3 client transmits a uni-directional stream as fast as the + VPP Host Stack allows to the iperf3 server for the test duration. +4. At the end of the test the iperf3 client emits the goodput + measurements for all streams and the sum of all streams. + +Test cases include 1 and 10 Streams with a 20 second test duration +with the VPP Host Stack configured to utilize the Cubic TCP +congestion algorithm. + +Note: iperf3 is single threaded, so it is expected that the 10 stream +test shows little or no performance improvement due to +multi-thread/multi-core execution. + +There are also variations of these test cases which use the VPP Network +Simulator (NSIM) plugin to test the VPP Hoststack goodput with 1 percent +of the traffic being dropped at the output interface of VPP1 thereby +simulating a lossy network. The NSIM tests are experimental and the +test results are not currently representative of typical results in a +lossy network. diff --git a/docs/content/methodology/test/hoststack/udpip_with_iperf3.md b/docs/content/methodology/test/hoststack/udpip_with_iperf3.md new file mode 100644 index 0000000000..01ddf61269 --- /dev/null +++ b/docs/content/methodology/test/hoststack/udpip_with_iperf3.md @@ -0,0 +1,44 @@ +--- +title: "UDP/IP with iperf3" +weight: 3 +--- + +# UDP/IP with iperf3 + +[iperf3 goodput measurement tool](https://github.com/esnet/iperf) +is used for measuring the maximum attainable goodput of the VPP Host +Stack connection across two instances of VPP running on separate DUT +nodes. iperf3 is a popular open source tool for active measurements +of the maximum achievable goodput on IP networks. + +Because iperf3 utilizes the POSIX socket interface APIs, the current +test configuration utilizes the LD_PRELOAD mechanism in the linux +kernel to connect iperf3 to the VPP Host Stack using the VPP +Communications Library (VCL) LD_PRELOAD library (libvcl_ldpreload.so). + +In the future, a forked version of iperf3 which has been modified to +directly use the VCL application APIs may be added to determine the +difference in performance of 'VCL Native' applications versus utilizing +LD_PRELOAD which inherently has more overhead and other limitations. + +The test configuration is as follows: + + DUT1 Network DUT2 + [ iperf3-client -> VPP1 ]=======[ VPP2 -> iperf3-server] + +where, + +1. iperf3 server attaches to VPP2 and LISTENs on VPP2:UDP port 5201. +2. iperf3 client attaches to VPP1 and transmits one or more streams + of packets to VPP2:UDP port 5201. +3. iperf3 client transmits a uni-directional stream as fast as the + VPP Host Stack allows to the iperf3 server for the test duration. +4. At the end of the test the iperf3 client emits the goodput + measurements for all streams and the sum of all streams. + +Test cases include 1 and 10 Streams with a 20 second test duration +with the VPP Host Stack using the UDP transport layer.. + +Note: iperf3 is single threaded, so it is expected that the 10 stream +test shows little or no performance improvement due to +multi-thread/multi-core execution. diff --git a/docs/content/methodology/test/hoststack/vsap_ab_with_nginx.md b/docs/content/methodology/test/hoststack/vsap_ab_with_nginx.md new file mode 100644 index 0000000000..2dc4d2b7f9 --- /dev/null +++ b/docs/content/methodology/test/hoststack/vsap_ab_with_nginx.md @@ -0,0 +1,39 @@ +--- +title: "VSAP ab with nginx" +weight: 4 +--- + +# VSAP ab with nginx + +[VSAP (VPP Stack Acceleration Project)](https://wiki.fd.io/view/VSAP) +aims to establish an industry user space application ecosystem based on +the VPP hoststack. As a pre-requisite to adapting open source applications +using VPP Communications Library to accelerate performance, the VSAP team +has introduced baseline tests utilizing the LD_PRELOAD mechanism to capture +baseline performance data. + +[AB (Apache HTTP server benchmarking tool)](https://httpd.apache.org/docs/2.4/programs/ab.html) +is used for measuring the maximum connections-per-second and requests-per-second. + +[NGINX](https://www.nginx.com) is a popular open source HTTP server +application. Because NGINX utilizes the POSIX socket interface APIs, the test +configuration uses the LD_PRELOAD mechanism to connect NGINX to the VPP +Hoststack using the VPP Communications Library (VCL) LD_PRELOAD library +(libvcl_ldpreload.so). + +In the future, a version of NGINX which has been modified to +directly use the VCL application APIs will be added to determine the +difference in performance of 'VCL Native' applications versus utilizing +LD_PRELOAD which inherently has more overhead and other limitations. + +The test configuration is as follows: + + TG Network DUT + [ AB ]=============[ VPP -> nginx ] + +where, + +1. nginx attaches to VPP and listens on TCP port 80 +2. ab runs CPS and RPS tests with packets flowing from the Test Generator node, + across 100G NICs, through VPP hoststack to NGINX. +3. At the end of the tests, the results are reported by AB. diff --git a/docs/content/methodology/test/internet_protocol_security.md b/docs/content/methodology/test/internet_protocol_security.md new file mode 100644 index 0000000000..1a02c43a0a --- /dev/null +++ b/docs/content/methodology/test/internet_protocol_security.md @@ -0,0 +1,73 @@ +--- +title: "Internet Protocol Security" +weight: 4 +--- + +# Internet Protocol Security + +VPP Internet Protocol Security (IPsec) performance tests are executed for the +following crypto plugins: + +- `crypto_native`, used for software based crypto leveraging CPU + platform optimizations e.g. Intel's AES-NI instruction set. +- `crypto_ipsecmb`, used for hardware based crypto with Intel QAT PCIe cards. + +## IPsec with VPP Native SW Crypto + +CSIT implements following IPsec test cases relying on VPP native crypto +(`crypto_native` plugin): + + **VPP Crypto Engine** | **ESP Encryption** | **ESP Integrity** | **Scale Tested** +----------------------:|-------------------:|------------------:|-----------------: + crypto_native | AES[128\|256]-GCM | GCM | 1 to 60k tunnels + crypto_native | AES128-CBC | SHA[256\|512] | 1 to 60k tunnels + +VPP IPsec with SW crypto are executed in both tunnel and policy modes, +with tests running on 3-node testbeds: 3n-icx, 3n-tsh. + +## IPsec with Intel QAT HW + +CSIT implements following IPsec test cases relying on ipsecmb library +(`crypto_ipsecmb` plugin) and Intel QAT 8950 (50G HW crypto card): + +dpdk_cryptodev + + **VPP Crypto Engine** | **VPP Crypto Workers** | **ESP Encryption** | **ESP Integrity** | **Scale Tested** +----------------------:|-----------------------:|-------------------:|------------------:|-----------------: + crypto_ipsecmb | sync/all workers | AES[128\|256]-GCM | GCM | 1, 1k tunnels + crypto_ipsecmb | sync/all workers | AES[128]-CBC | SHA[256\|512] | 1, 1k tunnels + crypto_ipsecmb | async/crypto worker | AES[128\|256]-GCM | GCM | 1, 4, 1k tunnels + crypto_ipsecmb | async/crypto worker | AES[128]-CBC | SHA[256\|512] | 1, 4, 1k tunnels + +## IPsec with Async Crypto Feature Workers + +*TODO Description to be added* + +## IPsec Uni-Directional Tests with VPP Native SW Crypto + +CSIT implements following IPsec uni-directional test cases relying on VPP native +crypto (`crypto_native` plugin) in tunnel mode: + + **VPP Crypto Engine** | **ESP Encryption** | **ESP Integrity** | **Scale Tested** +----------------------:|-------------------:|------------------:|-------------------: + crypto_native | AES[128\|256]-GCM | GCM | 4, 1k, 10k tunnels + crypto_native | AES128-CBC | SHA[512] | 4, 1k, 10k tunnels + +In policy mode: + + **VPP Crypto Engine** | **ESP Encryption** | **ESP Integrity** | **Scale Tested** +----------------------:|-------------------:|------------------:|------------------: + crypto_native | AES[256]-GCM | GCM | 1, 40, 1k tunnels + +The tests are running on 2-node testbeds: 2n-tx2. The uni-directional tests +are partially addressing a weakness in 2-node testbed setups with T-Rex as +the traffic generator. With just one DUT node, we can either encrypt or decrypt +traffic in each direction. + +The testcases are only doing encryption - packets are encrypted on the DUT and +then arrive at TG where no additional packet processing is needed (just +counting packets). + +Decryption would require that the traffic generator generated encrypted packets +which the DUT then would decrypt. However, T-Rex does not have the capability +to encrypt packets. diff --git a/docs/content/methodology/test/network_address_translation.md b/docs/content/methodology/test/network_address_translation.md new file mode 100644 index 0000000000..f443eabc5f --- /dev/null +++ b/docs/content/methodology/test/network_address_translation.md @@ -0,0 +1,445 @@ +--- +title: "Network Address Translation" +weight: 1 +--- + +# Network Address Translation + +## NAT44 Prefix Bindings + +NAT44 prefix bindings should be representative to target applications, +where a number of private IPv4 addresses from the range defined by +RFC1918 is mapped to a smaller set of public IPv4 addresses from the +public range. + +Following quantities are used to describe inside to outside IP address +and port bindings scenarios: + +- Inside-addresses, number of inside source addresses + (representing inside hosts). +- Ports-per-inside-address, number of TCP/UDP source + ports per inside source address. +- Outside-addresses, number of outside (public) source addresses + allocated to NAT44. +- Ports-per-outside-address, number of TCP/UDP source + ports per outside source address. The maximal number of + ports-per-outside-address usable for NAT is 64 512 + (in non-reserved port range 1024-65535, RFC4787). +- Sharing-ratio, equal to inside-addresses divided by outside-addresses. + +CSIT NAT44 tests are designed to take into account the maximum number of +ports (sessions) required per inside host (inside-address) and at the +same time to maximize the use of outside-address range by using all +available outside ports. With this in mind, the following scheme of +NAT44 sharing ratios has been devised for use in CSIT: + + **ports-per-inside-address** | **sharing-ratio** +-----------------------------:|------------------: + 63 | 1024 + 126 | 512 + 252 | 256 + 504 | 128 + +Initial CSIT NAT44 tests, including associated TG/TRex traffic profiles, +are based on ports-per-inside-address set to 63 and the sharing ratio of +1024. This approach is currently used for all NAT44 tests including +NAT44det (NAT44 deterministic used for Carrier Grade NAT applications) +and NAT44ed (Endpoint Dependent). + +Private address ranges to be used in tests: + +- 192.168.0.0 - 192.168.255.255 (192.168/16 prefix) + + - Total of 2^16 (65 536) of usable IPv4 addresses. + - Used in tests for up to 65 536 inside addresses (inside hosts). + +- 172.16.0.0 - 172.31.255.255 (172.16/12 prefix) + + - Total of 2^20 (1 048 576) of usable IPv4 addresses. + - Used in tests for up to 1 048 576 inside addresses (inside hosts). + +### NAT44 Session Scale + +NAT44 session scale tested is govern by the following logic: + +- Number of inside-addresses(hosts) H[i] = (H[i-1] x 2^2) with H(0)=1 024, + i = 1,2,3, ... + + - H[i] = 1 024, 4 096, 16 384, 65 536, 262 144, ... + +- Number of sessions S[i] = H[i] * ports-per-inside-address + + - ports-per-inside-address = 63 + + **i** | **hosts** | **sessions** +------:|----------:|-------------: + 0 | 1 024 | 64 512 + 1 | 4 096 | 258 048 + 2 | 16 384 | 1 032 192 + 3 | 65 536 | 4 128 768 + 4 | 262 144 | 16 515 072 + +### NAT44 Deterministic + +NAT44det performance tests are using TRex STL (Stateless) API and traffic +profiles, similar to all other stateless packet forwarding tests like +ip4, ip6 and l2, sending UDP packets in both directions +inside-to-outside and outside-to-inside. + +The inside-to-outside traffic uses single destination address (20.0.0.0) +and port (1024). +The inside-to-outside traffic covers whole inside address and port range, +the outside-to-inside traffic covers whole outside address and port range. + +NAT44det translation entries are created during the ramp-up phase, +followed by verification that all entries are present, +before proceeding to the main measurements of the test. +This ensures session setup does not impact the forwarding performance test. + +Associated CSIT test cases use the following naming scheme to indicate +NAT44det scenario tested: + +- ethip4udp-nat44det-h{H}-p{P}-s{S}-[mrr|ndrpdr|soak] + + - {H}, number of inside hosts, H = 1024, 4096, 16384, 65536, 262144. + - {P}, number of ports per inside host, P = 63. + - {S}, number of sessions, S = 64512, 258048, 1032192, 4128768, + 16515072. + - [mrr|ndrpdr|soak], MRR, NDRPDR or SOAK test. + +### NAT44 Endpoint-Dependent + +In order to excercise NAT44ed ability to translate based on both +source and destination address and port, the inside-to-outside traffic +varies also destination address and port. Destination port is the same +as source port, destination address has the same offset as the source address, +but applied to different subnet (starting with 20.0.0.0). + +As the mapping is not deterministic (for security reasons), +we cannot easily use stateless bidirectional traffic profiles. +Inside address and port range is fully covered, +but we do not know which outside-to-inside source address and port to use +to hit an open session. + +Therefore, NAT44ed is benchmarked using following methodologies: + +- Unidirectional throughput using *stateless* traffic profile. +- Connections-per-second (CPS) using *stateful* traffic profile. +- Bidirectional throughput (TPUT, see below) using *stateful* traffic profile. + +Unidirectional NAT44ed throughput tests are using TRex STL (Stateless) +APIs and traffic profiles, but with packets sent only in +inside-to-outside direction. +Similarly to NAT44det, NAT44ed unidirectional throughput tests include +a ramp-up phase to establish and verify the presence of required NAT44ed +binding entries. As the sessions have finite duration, the test code +keeps inserting ramp-up trials during the search, if it detects a risk +of sessions timing out. Any zero loss trial visits all sessions, +so it acts also as a ramp-up. + +Stateful NAT44ed tests are using TRex ASTF (Advanced Stateful) APIs and +traffic profiles, with packets sent in both directions. Tests are run +with both UDP and TCP sessions. +As NAT44ed CPS (connections-per-second) stateful tests +measure (also) session opening performance, +they use state reset instead of ramp-up trial. +NAT44ed TPUT (bidirectional throughput) tests prepend ramp-up trials +as in the unidirectional tests, +so the test results describe performance without translation entry +creation overhead. + +Associated CSIT test cases use the following naming scheme to indicate +NAT44det case tested: + +- Stateless: ethip4udp-nat44ed-h{H}-p{P}-s{S}-udir-[mrr|ndrpdr|soak] + + - {H}, number of inside hosts, H = 1024, 4096, 16384, 65536, 262144. + - {P}, number of ports per inside host, P = 63. + - {S}, number of sessions, S = 64512, 258048, 1032192, 4128768, + 16515072. + - udir-[mrr|ndrpdr|soak], unidirectional stateless tests MRR, NDRPDR + or SOAK. + +- Stateful: ethip4[udp|tcp]-nat44ed-h{H}-p{P}-s{S}-[cps|tput]-[mrr|ndrpdr|soak] + + - [udp|tcp], UDP or TCP sessions + - {H}, number of inside hosts, H = 1024, 4096, 16384, 65536, 262144. + - {P}, number of ports per inside host, P = 63. + - {S}, number of sessions, S = 64512, 258048, 1032192, 4128768, + 16515072. + - [cps|tput], connections-per-second session establishment rate or + packets-per-second average rate, or packets-per-second rate + without session establishment. + - [mrr|ndrpdr|soak], bidirectional stateful tests MRR, NDRPDR, or SOAK. + +## Stateful traffic profiles + +There are several important details which distinguish ASTF profiles +from stateless profiles. + +### General considerations + +#### Protocols + +ASTF profiles are limited to either UDP or TCP protocol. + +#### Programs + +Each template in the profile defines two "programs", one for the client side +and one for the server side. + +Each program specifies when that side has to wait until enough data is received +(counted in packets for UDP and in bytes for TCP) +and when to send additional data. Together, the two programs +define a single transaction. Due to packet loss, transaction may take longer, +use more packets (retransmission) or never finish in its entirety. + +#### Instances + +A client instance is created according to TPS parameter for the trial, +and sends the first packet of the transaction (in some cases more packets). +Each client instance uses a different source address (see sequencing below) +and some source port. The destination address also comes from a range, +but destination port has to be constant for a given program. + +TRex uses an opaque way to chose source ports, but as session counting shows, +next client with the same source address uses a different source port. + +Server instance is created when the first packet arrives to the server side. +Source address and port of the first packet are used as destination address +and port for the server responses. This is the ability we need +when outside surface is not predictable. + +When a program reaches its end, the instance is deleted. +This creates possible issues with server instances. If the server instance +does not read all the data client has sent, late data packets +can cause a second copy of server instance to be created, +which breaks assumptions on how many packet a transaction should have. + +The need for server instances to read all the data reduces the overall +bandwidth TRex is able to create in ASTF mode. + +Note that client instances are not created on packets, +so it is safe to end client program without reading all server data +(unless the definition of transaction success requires that). + +#### Sequencing + +ASTF profiles offer two modes for choosing source and destination IP addresses +for client programs: seqential and pseudorandom. +In current tests we are using sequential addressing only (if destination +address varies at all). + +For client destination UDP/TCP port, we use a single constant value. +(TRex can support multiple program pairs in the same traffic profile, +distinguished by the port number.) + +#### Transaction overlap + +If a transaction takes longer to finish, compared to period implied by TPS, +TRex will have multiple client or server instances active at a time. + +During calibration testing we have found this increases CPU utilization, +and for high TPS it can lead to TRex's Rx or Tx buffers becoming full. +This generally leads to duration stretching, and/or packet loss on TRex. + +Currently used transactions were chosen to be short, so risk of bad behavior +is decreased. But in MRR tests, where load is computed based on NIC ability, +not TRex ability, anomalous behavior is still possible +(e.g. MRR values being way lower than NDR). + +#### Delays + +TRex supports adding constant delays to ASTF programs. +This can be useful, for example if we want to separate connection establishment +from data transfer. + +But as TRex tracks delayed instances as active, this still results +in higher CPU utilization and reduced performance issues +(as other overlaping transactions). So the current tests do not use any delays. + +#### Keepalives + +Both UDP and TCP protocol implementations in TRex programs support keepalive +duration. That means there is a configurable period of keepalive time, +and TRex sends keepalive packets automatically (outside the program) +for the time the program is active (started, not ended yet) +but not sending any packets. + +For TCP this is generally not a big deal, as the other side usually +retransmits faster. But for UDP it means a packet loss may leave +the receiving program running. + +In order to avoid keepalive packets, keepalive value is set to a high number. +Here, "high number" means that even at maximum scale and minimum TPS, +there are still no keepalive packets sent within the corresponding +(computed) trial duration. This number is kept the same also for +smaller scale traffic profiles, to simplify maintenance. + +#### Transaction success + +The transaction is considered successful at Layer-7 (L7) level +when both program instances close. At this point, various L7 counters +(unofficial name) are updated on TRex. + +We found that proper close and L7 counter update can be CPU intensive, +whereas lower-level counters (ipackets, opackets) called L2 counters +can keep up with higher loads. + +For some tests, we do not need to confirm the whole transaction was successful. +CPS (connections per second) tests are a typical example. +We care only for NAT44ed creating a session (needs one packet +in inside-to-outside direction per session) and being able to use it +(needs one packet in outside-to-inside direction). + +Similarly in TPUT tests (packet throuput, counting both control +and data packets), we care about NAT44ed ability to forward packets, +we do not care whether aplications (TRex) can fully process them at that rate. + +Therefore each type of tests has its own formula (usually just one counter +already provided by TRex) to count "successful enough" transactions +and attempted transactions. Currently, all tests relying on L7 counters +use size-limited profiles, so they know what the count of attempted +transactions should be, but due to duration stretching +TRex might have been unable to send that many packets. +For search purposes, unattempted transactions are treated the same +as attempted but failed transactions. + +Sometimes even the number of transactions as tracked by search algorithm +does not match the transactions as defined by ASTF programs. +See TCP TPUT profile below. + +### UDP CPS + +This profile uses a minimalistic transaction to verify NAT44ed session has been +created and it allows outside-to-inside traffic. + +Client instance sends one packet and ends. +Server instance sends one packet upon creation and ends. + +In principle, packet size is configurable, +but currently used tests apply only one value (100 bytes frame). + +Transaction counts as attempted when opackets counter increases on client side. +Transaction counts as successful when ipackets counter increases on client side. + +### TCP CPS + +This profile uses a minimalistic transaction to verify NAT44ed session has been +created and it allows outside-to-inside traffic. + +Client initiates TCP connection. Client waits until connection is confirmed +(by reading zero data bytes). Client ends. +Server accepts the connection. Server waits for indirect confirmation +from client (by waiting for client to initiate close). Server ends. + +Without packet loss, the whole transaction takes 7 packets to finish +(4 and 3 per direction). +From NAT44ed point of view, only the first two are needed to verify +the session got created. + +Packet size is not configurable, but currently used tests report +frame size as 64 bytes. + +Transaction counts as attempted when tcps_connattempt counter increases +on client side. +Transaction counts as successful when tcps_connects counter increases +on client side. + +### UDP TPUT + +This profile uses a small transaction of "request-response" type, +with several packets simulating data payload. + +Client sends 5 packets and closes immediately. +Server reads all 5 packets (needed to avoid late packets creating new +server instances), then sends 5 packets and closes. +The value 5 was chosen to mirror what TCP TPUT (see below) choses. + +Packet size is configurable, currently we have tests for 100, +1518 and 9000 bytes frame (to match size of TCP TPUT data frames, see below). + +As this is a packet oriented test, we do not track the whole +10 packet transaction. Similarly to stateless tests, we treat each packet +as a "transaction" for search algorthm packet loss ratio purposes. +Therefore a "transaction" is attempted when opacket counter on client +or server side is increased. Transaction is successful if ipacket counter +on client or server side is increased. + +If one of 5 client packets is lost, server instance will get stuck +in the reading phase. This probably decreases TRex performance, +but it leads to more stable results then alternatives. + +### TCP TPUT + +This profile uses a small transaction of "request-response" type, +with some data amount to be transferred both ways. + +In CSIT release 22.06, TRex behavior changed, so we needed to edit +the traffic profile. Let us describe the pre-22.06 profile first. + +Client connects, sends 5 data packets worth of data, +receives 5 data packets worth of data and closes its side of the connection. +Server accepts connection, reads 5 data packets worth of data, +sends 5 data packets worth of data and closes its side of the connection. +As usual in TCP, sending side waits for ACK from the receiving side +before proceeding with next step of its program. + +Server read is needed to avoid premature close and second server instance. +Client read is not stricly needed, but ACKs allow TRex to close +the server instance quickly, thus saving CPU and improving performance. + +The number 5 of data packets was chosen so TRex is able to send them +in a single burst, even with 9000 byte frame size (TRex has a hard limit +on initial window size). +That leads to 16 packets (9 of them in c2s direction) to be exchanged +if no loss occurs. +The size of data packets is controlled by the traffic profile setting +the appropriate maximum segment size. Due to TRex restrictions, +the minimal size for IPv4 data frame achievable by this method is 70 bytes, +which is more than our usual minimum of 64 bytes. +For that reason, the data frame sizes available for testing are 100 bytes +(that allows room for eventually adding IPv6 ASTF tests), +1518 bytes and 9000 bytes. There is no control over control packet sizes. + +Exactly as in UDP TPUT, ipackets and opackets counters are used for counting +"transactions" (in fact packets). + +If packet loss occurs, there can be large transaction overlap, even if most +ASTF programs finish eventually. This can lead to big duration stretching +and somehow uneven rate of packets sent. This makes it hard to interpret +MRR results (frequently MRR is below NDR for this reason), +but NDR and PDR results tend to be stable enough. + +In 22.06, the "ACK from the receiving side" behavior changed, +the receiving side started sending ACK sometimes +also before receiving the full set of 5 data packets. +If the previous profile is understood as a "single challenge, single response" +where challenge (and also response) is sent as a burst of 5 data packets, +the new profile uses "bursts" of 1 packet instead, but issues +the challenge-response part 5 times sequentially +(waiting for receiving the response before sending next challenge). +This new profile happens to have the same overall packet count +(when no re-transmissions are needed). +Although it is possibly more taxing for TRex CPU, +the results are comparable to the old traffic profile. + +## Ip4base tests + +Contrary to stateless traffic profiles, we do not have a simple limit +that would guarantee TRex is able to send traffic at specified load. +For that reason, we have added tests where "nat44ed" is replaced by "ip4base". +Instead of NAT44ed processing, the tests set minimalistic IPv4 routes, +so that packets are forwarded in both inside-to-outside and outside-to-inside +directions. + +The packets arrive to server end of TRex with different source address&port +than in NAT44ed tests (no translation to outside values is done with ip4base), +but those are not specified in the stateful traffic profiles. +The server end (as always) uses the received address&port as destination +for outside-to-inside traffic. Therefore the same stateful traffic profile +works for both NAT44ed and ip4base test (of the same scale). + +The NAT44ed results are displayed together with corresponding ip4base results. +If they are similar, TRex is probably the bottleneck. +If NAT44ed result is visibly smaller, it describes the real VPP performance. diff --git a/docs/content/methodology/test/packet_flow_ordering.md b/docs/content/methodology/test/packet_flow_ordering.md new file mode 100644 index 0000000000..c2c87038d4 --- /dev/null +++ b/docs/content/methodology/test/packet_flow_ordering.md @@ -0,0 +1,42 @@ +--- +title: "Packet Flow Ordering" +weight: 2 +--- + +# Packet Flow Ordering + +TRex Traffic Generator (TG) supports two main ways how to cover +address space (on allowed ranges) in scale tests. + +In most cases only one field value (e.g. IPv4 destination address) is +altered, in some cases two fields (e.g. IPv4 destination address and UDP +destination port) are altered. + +## Incremental Ordering + +This case is simpler to implement and offers greater control. + +When changing two fields, they can be incremented synchronously, or one +after another. In the latter case we can specify which one is +incremented each iteration and which is incremented by "carrying over" +only when the other "wraps around". This way also visits all +combinations once before the "carry" field also wraps around. + +It is possible to use increments other than 1. + +## Randomized Ordering + +This case chooses each field value at random (from the allowed range). +In case of two fields, they are treated independently. +TRex allows to set random seed to get deterministic numbers. +We use a different seed for each field and traffic direction. +The seed has to be a non-zero number, we use 1, 2, 3, and so on. + +The seeded random mode in TRex requires a "limit" value, +which acts as a cycle length limit (after this many iterations, +the seed resets to its initial value). +We use the maximal allowed limit value (computed as 2^24 - 1). + +Randomized profiles do not avoid duplicated values, +and do not guarantee each possible value is visited, +so it is not very useful for stateful tests. diff --git a/docs/content/methodology/test/reconfiguration.md b/docs/content/methodology/test/reconfiguration.md new file mode 100644 index 0000000000..6dec4d918b --- /dev/null +++ b/docs/content/methodology/test/reconfiguration.md @@ -0,0 +1,68 @@ +--- +title: "Reconfiguration" +weight: 8 +--- + +# Reconfiguration + +## Overview + +Reconf tests are designed to measure the impact of VPP re-configuration +on data plane traffic. +While VPP takes some measures against the traffic being +entirely stopped for a prolonged time, +the immediate forwarding rate varies during the re-configuration, +as some configurations steps need the active dataplane worker threads +to be stopped temporarily. + +As the usual methods of measuring throughput need multiple trial measurements +with somewhat long durations, and the re-configuration process can also be long, +finding an offered load which would result in zero loss +during the re-configuration process would be time-consuming. + +Instead, reconf tests first find a througput value (lower bound for NDR) +without re-configuration, and then maintain that ofered load +during re-configuration. The measured loss count is then assumed to be caused +by the re-configuration process. The result published by reconf tests +is the effective blocked time, that is +the loss count divided by the offered load. + +## Current Implementation + +Each reconf suite is based on a similar MLRsearch performance suite. + +MLRsearch parameters are changed to speed up the throughput discovery. +For example, PDR is not searched for, and the final trial duration is shorter. + +The MLRsearch suite has to contain a configuration parameter +that can be scaled up, e.g. number of tunnels or number of service chains. +Currently, only increasing the scale is supported +as the re-configuration operation. In future, scale decrease +or other operations can be implemented. + +The traffic profile is not changed, so the traffic present is processed +only by the smaller scale configuration. The added tunnels / chains +are not targetted by the traffic. + +For the re-configuration, the same Robot Framework and Python libraries +are used, as were used in the initial configuration, with the exception +of the final calls that do not interact with VPP (e.g. starting +virtual machines) being skipped to reduce the test overall duration. + +## Discussion + +Robot Framework introduces a certain overhead, which may affect timing +of individual VPP API calls, which in turn may affect +the number of packets lost. + +The exact calls executed may contain unnecessary info dumps, repeated commands, +or commands which change a value that do not need to be changed (e.g. MTU). +Thus, implementation details are affecting the results, even if their effect +on the corresponding MLRsearch suite is negligible. + +The lower bound for NDR is the only value safe to be used when zero packets lost +are expected without re-configuration. But different suites show different +"jitter" in that value. For some suites, the lower bound is not tight, +allowing full NIC buffers to drain quickly between worker pauses. +For other suites, lower bound for NDR still has quite a large probability +of non-zero packet loss even without re-configuration. diff --git a/docs/content/methodology/test/tunnel_encapsulations.md b/docs/content/methodology/test/tunnel_encapsulations.md new file mode 100644 index 0000000000..c047c43dfa --- /dev/null +++ b/docs/content/methodology/test/tunnel_encapsulations.md @@ -0,0 +1,87 @@ +--- +title: "Tunnel Encapsulations" +weight: 3 +--- + +# Tunnel Encapsulations + +Tunnel encapsulations testing is grouped based on the type of outer +header: IPv4 or IPv6. + +## IPv4 Tunnels + +VPP is tested in the following IPv4 tunnel baseline configurations: + +- *ip4vxlan-l2bdbase*: VXLAN over IPv4 tunnels with L2 bridge-domain MAC + switching. +- *ip4vxlan-l2xcbase*: VXLAN over IPv4 tunnels with L2 cross-connect. +- *ip4lispip4-ip4base*: LISP over IPv4 tunnels with IPv4 routing. +- *ip4lispip6-ip6base*: LISP over IPv4 tunnels with IPv6 routing. +- *ip4gtpusw-ip4base*: GTPU over IPv4 tunnels with IPv4 routing. + +In all cases listed above low number of MAC, IPv4, IPv6 flows (253 or 254 per +direction) is switched or routed by VPP. + +In addition selected IPv4 tunnels are tested at scale: + +- *dot1q--ip4vxlanscale-l2bd*: VXLAN over IPv4 tunnels with L2 bridge- + domain MAC switching, with scaled up dot1q VLANs (10, 100, 1k), + mapped to scaled up L2 bridge-domains (10, 100, 1k), that are in turn + mapped to (10, 100, 1k) VXLAN tunnels. 64.5k flows are transmitted per + direction. + +## IPv6 Tunnels + +VPP is tested in the following IPv6 tunnel baseline configurations: + +- *ip6lispip4-ip4base*: LISP over IPv4 tunnels with IPv4 routing. +- *ip6lispip6-ip6base*: LISP over IPv4 tunnels with IPv6 routing. + +In all cases listed above low number of IPv4, IPv6 flows (253 or 254 per +direction) is routed by VPP. + +## GENEVE + +### GENEVE Prefix Bindings + +GENEVE prefix bindings should be representative to target applications, where +a packet flows of particular set of IPv4 addresses (L3 underlay network) is +routed via dedicated GENEVE interface by building an L2 overlay. + +Private address ranges to be used in tests: + +- East hosts ip address range: 10.0.1.0 - 10.127.255.255 (10.0/9 prefix) + - Total of 2^23 - 256 (8 388 352) of usable IPv4 addresses + - Usable in tests for up to 32 767 GENEVE tunnels (IPv4 underlay networks) +- West hosts ip address range: 10.128.1.0 - 10.255.255.255 (10.128/9 prefix) + - Total of 2^23 - 256 (8 388 352) of usable IPv4 addresses + - Usable in tests for up to 32 767 GENEVE tunnels (IPv4 underlay networks) + +### GENEVE Tunnel Scale + +If N is a number of GENEVE tunnels (and IPv4 underlay networks) then TG sends +256 packet flows in every of N different sets: + +- i = 1,2,3, ... N - GENEVE tunnel index +- East-West direction: GENEVE encapsulated packets + - Outer IP header: + - src ip: 1.1.1.1 + - dst ip: 1.1.1.2 + - GENEVE header: + - vni: i + - Inner IP header: + - src_ip_range(i) = 10.(0 + rounddown(i/255)).(modulo(i/255)).(0-to-255) + - dst_ip_range(i) = 10.(128 + rounddown(i/255)).(modulo(i/255)).(0-to-255) +- West-East direction: non-encapsulated packets + - IP header: + - src_ip_range(i) = 10.(128 + rounddown(i/255)).(modulo(i/255)).(0-to-255) + - dst_ip_range(i) = 10.(0 + rounddown(i/255)).(modulo(i/255)).(0-to-255) + + **geneve-tunnels** | **total-flows** +-------------------:|----------------: + 1 | 256 + 4 | 1 024 + 16 | 4 096 + 64 | 16 384 + 256 | 65 536 + 1 024 | 262 144 diff --git a/docs/content/methodology/test/vpp_device.md b/docs/content/methodology/test/vpp_device.md new file mode 100644 index 0000000000..0a5ee90308 --- /dev/null +++ b/docs/content/methodology/test/vpp_device.md @@ -0,0 +1,15 @@ +--- +title: "VPP Device" +weight: 9 +--- + +# VPP Device + +Includes VPP_Device test environment for functional VPP +device tests integrated into LFN CI/CD infrastructure. VPP_Device tests +run on 1-Node testbeds (1n-skx, 1n-arm) and rely on Linux SRIOV Virtual +Function (VF), dot1q VLAN tagging and external loopback cables to +facilitate packet passing over external physical links. Initial focus is +on few baseline tests. New device tests can be added by small edits +to existing CSIT Performance (2-node) test. RF test definition code +stays unchanged with the exception of traffic generator related L2 KWs. diff --git a/docs/content/methodology/trending/_index.md b/docs/content/methodology/trending/_index.md new file mode 100644 index 0000000000..4289e7ff96 --- /dev/null +++ b/docs/content/methodology/trending/_index.md @@ -0,0 +1,12 @@ +--- +bookCollapseSection: true +bookFlatSection: false +title: "Trending" +weight: 4 +--- + +# Trending + +This document describes a high-level design of a system for continuous +performance measuring, trending and change detection for FD.io VPP SW +data plane (and other performance tests run within CSIT sub-project). diff --git a/docs/content/methodology/trending/analysis.md b/docs/content/methodology/trending/analysis.md new file mode 100644 index 0000000000..fe952259ab --- /dev/null +++ b/docs/content/methodology/trending/analysis.md @@ -0,0 +1,224 @@ +--- +title: "Analysis" +weight: 1 +--- + +# Trend Analysis + +All measured performance trend data is treated as time-series data +that is modeled as a concatenation of groups, +within each group the samples come (independently) from +the same normal distribution (with some center and standard deviation). + +Center of the normal distribution for the group (equal to population average) +is called a trend for the group. +All the analysis is based on finding the right partition into groups +and comparing their trends. + +## Anomalies in graphs + +In graphs, the start of the following group is marked as a regression (red +circle) or progression (green circle), if the new trend is lower (or higher +respectively) then the previous group's. + +## Implementation details + +### Partitioning into groups + +While sometimes the samples within a group are far from being distributed +normally, currently we do not have a better tractable model. + +Here, "sample" should be the result of single trial measurement, with group +boundaries set only at test run granularity. But in order to avoid detecting +causes unrelated to VPP performance, the current presentation takes average of +all trials within the run as the sample. Effectively, this acts as a single +trial with aggregate duration. + +Performance graphs show the run average as a dot (not all individual trial +results). + +The group boundaries are selected based on `Minimum Description Length`[^1]. + +### Minimum Description Length + +`Minimum Description Length`[^1] (MDL) is a particular formalization +of `Occam's razor`[^2] principle. + +The general formulation mandates to evaluate a large set of models, +but for anomaly detection purposes, it is useful to consider +a smaller set of models, so that scoring and comparing them is easier. + +For each candidate model, the data should be compressed losslessly, +which includes model definitions, encoded model parameters, +and the raw data encoded based on probabilities computed by the model. +The model resulting in shortest compressed message is the "the" correct model. + +For our model set (groups of normally distributed samples), +we need to encode group length (which penalizes too many groups), +group average (more on that later), group stdev and then all the samples. + +Luckily, the "all the samples" part turns out to be quite easy to compute. +If sample values are considered as coordinates in (multi-dimensional) +Euclidean space, fixing stdev means the point with allowed coordinates +lays on a sphere. Fixing average intersects the sphere with a (hyper)-plane, +and Gaussian probability density on the resulting sphere is constant. +So the only contribution is the "area" of the sphere, which only depends +on the number of samples and stdev. + +A somehow ambiguous part is in choosing which encoding +is used for group size, average and stdev. +Different encodings cause different biases to large or small values. +In our implementation we have chosen probability density +corresponding to uniform distribution (from zero to maximal sample value) +for stdev and average of the first group, +but for averages of subsequent groups we have chosen a distribution +which discourages delimiting groups with averages close together. + +Our implementation assumes that measurement precision is 1.0 pps. +Thus it is slightly wrong for trial durations other than 1.0 seconds. +Also, all the calculations assume 1.0 pps is totally negligible, +compared to stdev value. + +The group selection algorithm currently has no parameters, +all the aforementioned encodings and handling of precision is hard-coded. +In principle, every group selection is examined, and the one encodable +with least amount of bits is selected. +As the bit amount for a selection is just sum of bits for every group, +finding the best selection takes number of comparisons +quadratically increasing with the size of data, +the overall time complexity being probably cubic. + +The resulting group distribution looks good +if samples are distributed normally enough within a group. +But for obviously different distributions (for example +`bimodal distribution`[^3]) the groups tend to focus on less relevant factors +(such as "outlier" density). + +## Common Patterns + +When an anomaly is detected, it frequently falls into few known patterns, +each having its typical behavior over time. + +We are going to describe the behaviors, +as they motivate our choice of trend compliance metrics. + +### Sample time and analysis time + +But first we need to distinguish two roles time plays in analysis, +so it is more clear which role we are referring to. + +Sample time is the more obvious one. +It is the time the sample is generated. +It is the start time or the end time of the Jenkins job run, +does not really matter which (parallel runs are disabled, +and length of gap between samples does not affect metrics). + +Analysis time is the time the current analysis is computed. +Again, the exact time does not usually matter, +what matters is how many later (and how fewer earlier) samples +were considered in the computation. + +For some patterns, it is usual for a previously reported +anomaly to "vanish", or previously unseen anomaly to "appear late", +as later samples change which partition into groups is more probable. + +Dashboard and graphs are always showing the latest analysis time, +the compliance metrics are using earlier sample time +with the same latest analysis time. + +Alerting e-mails use the latest analysis time at the time of sending, +so the values reported there are likely to be different +from the later analysis time results shown in dashboard and graphs. + +### Ordinary regression + +The real performance changes from previously stable value +into a new stable value. + +For medium to high magnitude of the change, one run +is enough for anomaly detection to mark this regression. + +Ordinary progressions are detected in the same way. + +### Small regression + +The real performance changes from previously stable value +into a new stable value, but the difference is small. + +For the anomaly detection algorithm, this change is harder to detect, +depending on the standard deviation of the previous group. + +If the new performance value stays stable, eventually +the detection algorithm is able to detect this anomaly +when there are enough samples around the new value. + +If the difference is too small, it may remain undetected +(as new performance change happens, or full history of samples +is still not enough for the detection). + +Small progressions have the same behavior. + +### Reverted regression + +This pattern can have two different causes. +We would like to distinguish them, but that is usually +not possible to do just by looking at the measured values (and not telemetry). + +In one cause, the real DUT performance has changed, +but got restored immediately. +In the other cause, no real performance change happened, +just some temporary infrastructure issue +has caused a wrong low value to be measured. + +For small measured changes, this pattern may remain undetected. +For medium and big measured changes, this is detected when the regression +happens on just the last sample. + +For big changes, the revert is also immediately detected +as a subsequent progression. The trend is usually different +from the previously stable trend (as the two population averages +are not likely to be exactly equal), but the difference +between the two trends is relatively small. + +For medium changes, the detection algorithm may need several new samples +to detect a progression (as it dislikes single sample groups), +in the meantime reporting regressions (difference decreasing +with analysis time), until it stabilizes the same way as for big changes +(regression followed by progression, small difference +between the old stable trend and last trend). + +As it is very hard for a fault code or an infrastructure issue +to increase performance, the opposite (temporary progression) +almost never happens. + +### Summary + +There is a trade-off between detecting small regressions +and not reporting the same old regressions for a long time. + +For people reading e-mails, a sudden regression with a big number of samples +in the last group means this regression was hard for the algorithm to detect. + +If there is a big regression with just one run in the last group, +we are not sure if it is real, or just a temporary issue. +It is useful to wait some time before starting an investigation. + +With decreasing (absolute value of) difference, the number of expected runs +increases. If there is not enough runs, we still cannot distinguish +real regression from temporary regression just from the current metrics +(although humans frequently can tell by looking at the graph). + +When there is a regression or progression with just a small difference, +it is probably an artifact of a temporary regression. +Not worth examining, unless temporary regressions happen somewhat frequently. + +It is not easy for the metrics to locate the previous stable value, +especially if multiple anomalies happened in the last few weeks. +It is good to compare last trend with long term trend maximum, +as it highlights the difference between "now" and "what could be". +It is good to exclude last week from the trend maximum, +as including the last week would hide all real progressions. + +[^1]: [Minimum Description Length](https://en.wikipedia.org/wiki/Minimum_description_length) +[^2]: [Occam's Razor](https://en.wikipedia.org/wiki/Occam%27s_razor) +[^3]: [Bimodal Distribution](https://en.wikipedia.org/wiki/Bimodal_distribution) diff --git a/docs/content/methodology/trending/presentation.md b/docs/content/methodology/trending/presentation.md new file mode 100644 index 0000000000..84925b46c8 --- /dev/null +++ b/docs/content/methodology/trending/presentation.md @@ -0,0 +1,34 @@ +--- +title: "Presentation" +weight: 2 +--- + +# Trend Presentation + +## Failed tests + +The Failed tests tables list the tests which failed during the last test run. +Separate tables are generated for each testbed. + +## Regressions and progressions + +These tables list tests which encountered a regression or progression during the +specified time period, which is currently set to the last 21 days. + +## Trendline Graphs + +Trendline graphs show measured per run averages of MRR values, NDR or PDR +values, group average values, and detected anomalies. +The graphs are constructed as follows: + +- X-axis represents the date in the format MMDD. +- Y-axis represents run-average MRR value, NDR or PDR values in Mpps. For PDR + tests also a graph with average latency at 50% PDR [us] is generated. +- Markers to indicate anomaly classification: + - Regression - red circle. + - Progression - green circle. +- The line shows average MRR value of each group. + +In addition the graphs show dynamic labels while hovering over graph data +points, presenting the CSIT build date, measured value, VPP reference, trend job +build ID and the LF testbed ID. diff --git a/docs/content/methodology/trending_methodology/_index.md b/docs/content/methodology/trending_methodology/_index.md deleted file mode 100644 index 551d950cc7..0000000000 --- a/docs/content/methodology/trending_methodology/_index.md +++ /dev/null @@ -1,6 +0,0 @@ ---- -bookCollapseSection: true -bookFlatSection: false -title: "Trending Methodology" -weight: 22 ---- \ No newline at end of file diff --git a/docs/content/methodology/trending_methodology/overview.md b/docs/content/methodology/trending_methodology/overview.md deleted file mode 100644 index 90d8a2507c..0000000000 --- a/docs/content/methodology/trending_methodology/overview.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: "Overview" -weight: 1 ---- - -# Overview - -This document describes a high-level design of a system for continuous -performance measuring, trending and change detection for FD.io VPP SW -data plane (and other performance tests run within CSIT sub-project). diff --git a/docs/content/methodology/trending_methodology/trend_analysis.md b/docs/content/methodology/trending_methodology/trend_analysis.md deleted file mode 100644 index 7f1870f577..0000000000 --- a/docs/content/methodology/trending_methodology/trend_analysis.md +++ /dev/null @@ -1,224 +0,0 @@ ---- -title: "Trending Analysis" -weight: 2 ---- - -# Trend Analysis - -All measured performance trend data is treated as time-series data -that is modeled as a concatenation of groups, -within each group the samples come (independently) from -the same normal distribution (with some center and standard deviation). - -Center of the normal distribution for the group (equal to population average) -is called a trend for the group. -All the analysis is based on finding the right partition into groups -and comparing their trends. - -## Anomalies in graphs - -In graphs, the start of the following group is marked as a regression (red -circle) or progression (green circle), if the new trend is lower (or higher -respectively) then the previous group's. - -## Implementation details - -### Partitioning into groups - -While sometimes the samples within a group are far from being distributed -normally, currently we do not have a better tractable model. - -Here, "sample" should be the result of single trial measurement, with group -boundaries set only at test run granularity. But in order to avoid detecting -causes unrelated to VPP performance, the current presentation takes average of -all trials within the run as the sample. Effectively, this acts as a single -trial with aggregate duration. - -Performance graphs show the run average as a dot (not all individual trial -results). - -The group boundaries are selected based on `Minimum Description Length`[^1]. - -### Minimum Description Length - -`Minimum Description Length`[^1] (MDL) is a particular formalization -of `Occam's razor`[^2] principle. - -The general formulation mandates to evaluate a large set of models, -but for anomaly detection purposes, it is useful to consider -a smaller set of models, so that scoring and comparing them is easier. - -For each candidate model, the data should be compressed losslessly, -which includes model definitions, encoded model parameters, -and the raw data encoded based on probabilities computed by the model. -The model resulting in shortest compressed message is the "the" correct model. - -For our model set (groups of normally distributed samples), -we need to encode group length (which penalizes too many groups), -group average (more on that later), group stdev and then all the samples. - -Luckily, the "all the samples" part turns out to be quite easy to compute. -If sample values are considered as coordinates in (multi-dimensional) -Euclidean space, fixing stdev means the point with allowed coordinates -lays on a sphere. Fixing average intersects the sphere with a (hyper)-plane, -and Gaussian probability density on the resulting sphere is constant. -So the only contribution is the "area" of the sphere, which only depends -on the number of samples and stdev. - -A somehow ambiguous part is in choosing which encoding -is used for group size, average and stdev. -Different encodings cause different biases to large or small values. -In our implementation we have chosen probability density -corresponding to uniform distribution (from zero to maximal sample value) -for stdev and average of the first group, -but for averages of subsequent groups we have chosen a distribution -which discourages delimiting groups with averages close together. - -Our implementation assumes that measurement precision is 1.0 pps. -Thus it is slightly wrong for trial durations other than 1.0 seconds. -Also, all the calculations assume 1.0 pps is totally negligible, -compared to stdev value. - -The group selection algorithm currently has no parameters, -all the aforementioned encodings and handling of precision is hard-coded. -In principle, every group selection is examined, and the one encodable -with least amount of bits is selected. -As the bit amount for a selection is just sum of bits for every group, -finding the best selection takes number of comparisons -quadratically increasing with the size of data, -the overall time complexity being probably cubic. - -The resulting group distribution looks good -if samples are distributed normally enough within a group. -But for obviously different distributions (for example -`bimodal distribution`[^3]) the groups tend to focus on less relevant factors -(such as "outlier" density). - -## Common Patterns - -When an anomaly is detected, it frequently falls into few known patterns, -each having its typical behavior over time. - -We are going to describe the behaviors, -as they motivate our choice of trend compliance metrics. - -### Sample time and analysis time - -But first we need to distinguish two roles time plays in analysis, -so it is more clear which role we are referring to. - -Sample time is the more obvious one. -It is the time the sample is generated. -It is the start time or the end time of the Jenkins job run, -does not really matter which (parallel runs are disabled, -and length of gap between samples does not affect metrics). - -Analysis time is the time the current analysis is computed. -Again, the exact time does not usually matter, -what matters is how many later (and how fewer earlier) samples -were considered in the computation. - -For some patterns, it is usual for a previously reported -anomaly to "vanish", or previously unseen anomaly to "appear late", -as later samples change which partition into groups is more probable. - -Dashboard and graphs are always showing the latest analysis time, -the compliance metrics are using earlier sample time -with the same latest analysis time. - -Alerting e-mails use the latest analysis time at the time of sending, -so the values reported there are likely to be different -from the later analysis time results shown in dashboard and graphs. - -### Ordinary regression - -The real performance changes from previously stable value -into a new stable value. - -For medium to high magnitude of the change, one run -is enough for anomaly detection to mark this regression. - -Ordinary progressions are detected in the same way. - -### Small regression - -The real performance changes from previously stable value -into a new stable value, but the difference is small. - -For the anomaly detection algorithm, this change is harder to detect, -depending on the standard deviation of the previous group. - -If the new performance value stays stable, eventually -the detection algorithm is able to detect this anomaly -when there are enough samples around the new value. - -If the difference is too small, it may remain undetected -(as new performance change happens, or full history of samples -is still not enough for the detection). - -Small progressions have the same behavior. - -### Reverted regression - -This pattern can have two different causes. -We would like to distinguish them, but that is usually -not possible to do just by looking at the measured values (and not telemetry). - -In one cause, the real DUT performance has changed, -but got restored immediately. -In the other cause, no real performance change happened, -just some temporary infrastructure issue -has caused a wrong low value to be measured. - -For small measured changes, this pattern may remain undetected. -For medium and big measured changes, this is detected when the regression -happens on just the last sample. - -For big changes, the revert is also immediately detected -as a subsequent progression. The trend is usually different -from the previously stable trend (as the two population averages -are not likely to be exactly equal), but the difference -between the two trends is relatively small. - -For medium changes, the detection algorithm may need several new samples -to detect a progression (as it dislikes single sample groups), -in the meantime reporting regressions (difference decreasing -with analysis time), until it stabilizes the same way as for big changes -(regression followed by progression, small difference -between the old stable trend and last trend). - -As it is very hard for a fault code or an infrastructure issue -to increase performance, the opposite (temporary progression) -almost never happens. - -### Summary - -There is a trade-off between detecting small regressions -and not reporting the same old regressions for a long time. - -For people reading e-mails, a sudden regression with a big number of samples -in the last group means this regression was hard for the algorithm to detect. - -If there is a big regression with just one run in the last group, -we are not sure if it is real, or just a temporary issue. -It is useful to wait some time before starting an investigation. - -With decreasing (absolute value of) difference, the number of expected runs -increases. If there is not enough runs, we still cannot distinguish -real regression from temporary regression just from the current metrics -(although humans frequently can tell by looking at the graph). - -When there is a regression or progression with just a small difference, -it is probably an artifact of a temporary regression. -Not worth examining, unless temporary regressions happen somewhat frequently. - -It is not easy for the metrics to locate the previous stable value, -especially if multiple anomalies happened in the last few weeks. -It is good to compare last trend with long term trend maximum, -as it highlights the difference between "now" and "what could be". -It is good to exclude last week from the trend maximum, -as including the last week would hide all real progressions. - -[^1]: [Minimum Description Length](https://en.wikipedia.org/wiki/Minimum_description_length) -[^2]: [Occam's razor](https://en.wikipedia.org/wiki/Occam%27s_razor) -[^3]: [bimodal distribution](https://en.wikipedia.org/wiki/Bimodal_distribution) diff --git a/docs/content/methodology/trending_methodology/trend_presentation.md b/docs/content/methodology/trending_methodology/trend_presentation.md deleted file mode 100644 index 4c58589a0b..0000000000 --- a/docs/content/methodology/trending_methodology/trend_presentation.md +++ /dev/null @@ -1,36 +0,0 @@ ---- -title: "Trending Presentation" -weight: 3 ---- - -# Trend Presentation - -## Failed tests - -The Failed tests tables list the tests which failed during the last test run. -Separate tables are generated for each testbed. - -## Regressions and progressions - -These tables list tests which encountered a regression or progression during the -specified time period, which is currently set to the last 21 days. - -## Trendline Graphs - -Trendline graphs show measured per run averages of MRR values, NDR or PDR -values, group average values, and detected anomalies. -The graphs are constructed as follows: - -- X-axis represents the date in the format MMDD. -- Y-axis represents run-average MRR value, NDR or PDR values in Mpps. For PDR - tests also a graph with average latency at 50% PDR [us] is generated. -- Markers to indicate anomaly classification: - - - Regression - red circle. - - Progression - green circle. - -- The line shows average MRR value of each group. - -In addition the graphs show dynamic labels while hovering over graph data -points, presenting the CSIT build date, measured value, VPP reference, trend job -build ID and the LF testbed ID. diff --git a/docs/content/methodology/trex_traffic_generator.md b/docs/content/methodology/trex_traffic_generator.md deleted file mode 100644 index 4f62d91c47..0000000000 --- a/docs/content/methodology/trex_traffic_generator.md +++ /dev/null @@ -1,195 +0,0 @@ ---- -title: "TRex Traffic Generator" -weight: 5 ---- - -# TRex Traffic Generator - -## Usage - -[TRex traffic generator](https://trex-tgn.cisco.com) is used for majority of -CSIT performance tests. TRex is used in multiple types of performance tests, -see [Data Plane Throughtput]({{< ref "data_plane_throughput/data_plane_throughput/#Data Plane Throughtput" >}}) -for more detail. - -## Traffic modes - -TRex is primarily used in two (mutually incompatible) modes. - -### Stateless mode - -Sometimes abbreviated as STL. -A mode with high performance, which is unable to react to incoming traffic. -We use this mode whenever it is possible. -Typical test where this mode is not applicable is NAT44ED, -as DUT does not assign deterministic outside address+port combinations, -so we are unable to create traffic that does not lose packets -in out2in direction. - -Measurement results are based on simple L2 counters -(opackets, ipackets) for each traffic direction. - -### Stateful mode - -A mode capable of reacting to incoming traffic. -Contrary to the stateless mode, only UDP and TCP is supported -(carried over IPv4 or IPv6 packets). -Performance is limited, as TRex needs to do more CPU processing. -TRex suports two subtypes of stateful traffic, -CSIT uses ASTF (Advanced STateFul mode). - -This mode is suitable for NAT44ED tests, as clients send packets from inside, -and servers react to it, so they see the outside address and port to respond to. -Also, they do not send traffic before NAT44ED has created the corresponding -translation entry. - -When possible, L2 counters (opackets, ipackets) are used. -Some tests need L7 counters, which track protocol state (e.g. TCP), -but those values are less than reliable on high loads. - -## Traffic Continuity - -Generated traffic is either continuous, or limited (by number of transactions). -Both modes support both continuities in principle. - -### Continuous traffic - -Traffic is started without any data size goal. -Traffic is ended based on time duration, as hinted by search algorithm. -This is useful when DUT behavior does not depend on the traffic duration. -The default for stateless mode. - -### Limited traffic - -Traffic has defined data size goal (given as number of transactions), -duration is computed based on this goal. -Traffic is ended when the size goal is reached, -or when the computed duration is reached. -This is useful when DUT behavior depends on traffic size, -e.g. target number of NAT translation entries, each to be hit exactly once -per direction. -This is used mainly for stateful mode. - -## Traffic synchronicity - -Traffic can be generated synchronously (test waits for duration) -or asynchronously (test operates during traffic and stops traffic explicitly). - -### Synchronous traffic - -Trial measurement is driven by given (or precomputed) duration, -no activity from test driver during the traffic. -Used for most trials. - -### Asynchronous traffic - -Traffic is started, but then the test driver is free to perform -other actions, before stopping the traffic explicitly. -This is used mainly by reconf tests, but also by some trials -used for runtime telemetry. - -## Trafic profiles - -TRex supports several ways to define the traffic. -CSIT uses small Python modules based on Scapy as definitions. -Details of traffic profiles depend on modes (STL or ASTF), -but some are common for both modes. - -Search algorithms are intentionally unaware of the traffic mode used, -so CSIT defines some terms to use instead of mode-specific TRex terms. - -### Transactions - -TRex traffic profile defines a small number of behaviors, -in CSIT called transaction templates. Traffic profiles also instruct -TRex how to create a large number of transactions based on the templates. - -Continuous traffic loops over the generated transactions. -Limited traffic usually executes each transaction once -(typically as constant number of loops over source addresses, -each loop with different source ports). - -Currently, ASTF profiles define one transaction template each. -Number of packets expected per one transaction varies based on profile details, -as does the criterion for when a transaction is considered successful. - -Stateless transactions are just one packet (sent from one TG port, -successful if received on the other TG port). -Thus unidirectional stateless profiles define one transaction template, -bidirectional stateless profiles define two transaction templates. - -### TPS multiplier - -TRex aims to open transaction specified by the profile at a steady rate. -While TRex allows the transaction template to define its intended "cps" value, -CSIT does not specify it, so the default value of 1 is applied, -meaning TRex will open one transaction per second (and transaction template) -by default. But CSIT invocation uses "multiplier" (mult) argument -when starting the traffic, that multiplies the cps value, -meaning it acts as TPS (transactions per second) input. - -With a slight abuse of nomenclature, bidirectional stateless tests -set "packets per transaction" value to 2, just to keep the TPS semantics -as a unidirectional input value. - -### Duration stretching - -TRex can be IO-bound, CPU-bound, or have any other reason -why it is not able to generate the traffic at the requested TPS. -Some conditions are detected, leading to TRex failure, -for example when the bandwidth does not fit into the line capacity. -But many reasons are not detected. - -Unfortunately, TRex frequently reacts by not honoring the duration -in synchronous mode, taking longer to send the traffic, -leading to lower then requested load offered to DUT. -This usualy breaks assumptions used in search algorithms, -so it has to be avoided. - -For stateless traffic, the behavior is quite deterministic, -so the workaround is to apply a fictional TPS limit (max_rate) -to search algorithms, usually depending only on the NIC used. - -For stateful traffic the behavior is not deterministic enough, -for example the limit for TCP traffic depends on DUT packet loss. -In CSIT we decided to use logic similar to asynchronous traffic. -The traffic driver sleeps for a time, then stops the traffic explicitly. -The library that parses counters into measurement results -than usually treats unsent packets/transactions as lost/failed. - -We have added a IP4base tests for every NAT44ED test, -so that users can compare results. -If the results are very similar, it is probable TRex was the bottleneck. - -### Startup delay - -By investigating TRex behavior, it was found that TRex does not start -the traffic in ASTF mode immediately. There is a delay of zero traffic, -after which the traffic rate ramps up to the defined TPS value. - -It is possible to poll for counters during the traffic -(fist nonzero means traffic has started), -but that was found to influence the NDR results. - -Thus "sleep and stop" stategy is used, which needs a correction -to the computed duration so traffic is stopped after the intended -duration of real traffic. Luckily, it turns out this correction -is not dependend on traffic profile nor CPU used by TRex, -so a fixed constant (0.112 seconds) works well. -Unfortunately, the constant may depend on TRex version, -or execution environment (e.g. TRex in AWS). - -The result computations need a precise enough duration of the real traffic, -luckily server side of TRex has precise enough counter for that. - -It is unknown whether stateless traffic profiles also exhibit a startup delay. -Unfortunately, stateless mode does not have similarly precise duration counter, -so some results (mostly MRR) are affected by less precise duration measurement -in Python part of CSIT code. - -## Measuring Latency - -If measurement of latency is requested, two more packet streams are -created (one for each direction) with TRex flow_stats parameter set to -STLFlowLatencyStats. In that case, returned statistics will also include -min/avg/max latency values and encoded HDRHistogram data. \ No newline at end of file diff --git a/docs/content/methodology/tunnel_encapsulations.md b/docs/content/methodology/tunnel_encapsulations.md deleted file mode 100644 index 52505b7efb..0000000000 --- a/docs/content/methodology/tunnel_encapsulations.md +++ /dev/null @@ -1,41 +0,0 @@ ---- -title: "Tunnel Encapsulations" -weight: 10 ---- - -# Tunnel Encapsulations - -Tunnel encapsulations testing is grouped based on the type of outer -header: IPv4 or IPv6. - -## IPv4 Tunnels - -VPP is tested in the following IPv4 tunnel baseline configurations: - -- *ip4vxlan-l2bdbase*: VXLAN over IPv4 tunnels with L2 bridge-domain MAC - switching. -- *ip4vxlan-l2xcbase*: VXLAN over IPv4 tunnels with L2 cross-connect. -- *ip4lispip4-ip4base*: LISP over IPv4 tunnels with IPv4 routing. -- *ip4lispip6-ip6base*: LISP over IPv4 tunnels with IPv6 routing. -- *ip4gtpusw-ip4base*: GTPU over IPv4 tunnels with IPv4 routing. - -In all cases listed above low number of MAC, IPv4, IPv6 flows (253 or 254 per -direction) is switched or routed by VPP. - -In addition selected IPv4 tunnels are tested at scale: - -- *dot1q--ip4vxlanscale-l2bd*: VXLAN over IPv4 tunnels with L2 bridge- - domain MAC switching, with scaled up dot1q VLANs (10, 100, 1k), - mapped to scaled up L2 bridge-domains (10, 100, 1k), that are in turn - mapped to (10, 100, 1k) VXLAN tunnels. 64.5k flows are transmitted per - direction. - -## IPv6 Tunnels - -VPP is tested in the following IPv6 tunnel baseline configurations: - -- *ip6lispip4-ip4base*: LISP over IPv4 tunnels with IPv4 routing. -- *ip6lispip6-ip6base*: LISP over IPv4 tunnels with IPv6 routing. - -In all cases listed above low number of IPv4, IPv6 flows (253 or 254 per -direction) is routed by VPP. diff --git a/docs/content/methodology/vpp_device_functional.md b/docs/content/methodology/vpp_device_functional.md deleted file mode 100644 index 2bad5973b6..0000000000 --- a/docs/content/methodology/vpp_device_functional.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: "VPP_Device Functional" -weight: 18 ---- - -# VPP_Device Functional - -Includes VPP_Device test environment for functional VPP -device tests integrated into LFN CI/CD infrastructure. VPP_Device tests -run on 1-Node testbeds (1n-skx, 1n-arm) and rely on Linux SRIOV Virtual -Function (VF), dot1q VLAN tagging and external loopback cables to -facilitate packet passing over external physical links. Initial focus is -on few baseline tests. New device tests can be added by small edits -to existing CSIT Performance (2-node) test. RF test definition code -stays unchanged with the exception of traffic generator related L2 KWs. diff --git a/docs/content/methodology/vpp_forwarding_modes.md b/docs/content/methodology/vpp_forwarding_modes.md deleted file mode 100644 index 1cc199c607..0000000000 --- a/docs/content/methodology/vpp_forwarding_modes.md +++ /dev/null @@ -1,104 +0,0 @@ ---- -title: "VPP Forwarding Modes" -weight: 3 ---- - -# VPP Forwarding Modes - -VPP is tested in a number of L2, IPv4 and IPv6 packet lookup and -forwarding modes. Within each mode baseline and scale tests are -executed, the latter with varying number of FIB entries. - -## L2 Ethernet Switching - -VPP is tested in three L2 forwarding modes: - -- *l2patch*: L2 patch, the fastest point-to-point L2 path that loops - packets between two interfaces without any Ethernet frame checks or - lookups. -- *l2xc*: L2 cross-connect, point-to-point L2 path with all Ethernet - frame checks, but no MAC learning and no MAC lookup. -- *l2bd*: L2 bridge-domain, multipoint-to-multipoint L2 path with all - Ethernet frame checks, with MAC learning (unless static MACs are used) - and MAC lookup. - -l2bd tests are executed in baseline and scale configurations: - -- *l2bdbase*: Two MAC FIB entries are learned by VPP to enable packet - switching between two interfaces in two directions. VPP L2 switching - is tested with 254 IPv4 unique flows per direction, varying IPv4 - source address per flow in order to invoke RSS based packet - distribution across VPP workers. The same source and destination MAC - address is used for all flows per direction. IPv4 source address is - incremented for every packet. - -- *l2bdscale*: A high number of MAC FIB entries are learned by VPP to - enable packet switching between two interfaces in two directions. - Tested MAC FIB sizes include: i) 10k with 5k unique flows per - direction, ii) 100k with 2 x 50k flows and iii) 1M with 2 x 500k - flows. Unique flows are created by using distinct source and - destination MAC addresses that are changed for every packet using - incremental ordering, making VPP learn (or refresh) distinct src MAC - entries and look up distinct dst MAC entries for every packet. For - details, see - [Packet Flow Ordering]({{< ref "packet_flow_ordering#Packet Flow Ordering" >}}). - -Ethernet wire encapsulations tested include: untagged, dot1q, dot1ad. - -## IPv4 Routing - -IPv4 routing tests are executed in baseline and scale configurations: - -- *ip4base*: Two /32 IPv4 FIB entries are configured in VPP to enable - packet routing between two interfaces in two directions. VPP routing - is tested with 253 IPv4 unique flows per direction, varying IPv4 - source address per flow in order to invoke RSS based packet - distribution across VPP workers. IPv4 source address is incremented - for every packet. - -- *ip4scale*: A high number of /32 IPv4 FIB entries are configured in - VPP. Tested IPv4 FIB sizes include: i) 20k with 10k unique flows per - direction, ii) 200k with 2 * 100k flows and iii) 2M with 2 * 1M - flows. Unique flows are created by using distinct IPv4 destination - addresses that are changed for every packet, using incremental or - random ordering. For details, see - [Packet Flow Ordering]({{< ref "packet_flow_ordering#Packet Flow Ordering" >}}). - -## IPv6 Routing - -Similarly to IPv4, IPv6 routing tests are executed in baseline and scale -configurations: - -- *ip6base*: Two /128 IPv4 FIB entries are configured in VPP to enable - packet routing between two interfaces in two directions. VPP routing - is tested with 253 IPv6 unique flows per direction, varying IPv6 - source address per flow in order to invoke RSS based packet - distribution across VPP workers. IPv6 source address is incremented - for every packet. - -- *ip4scale*: A high number of /128 IPv6 FIB entries are configured in - VPP. Tested IPv6 FIB sizes include: i) 20k with 10k unique flows per - direction, ii) 200k with 2 * 100k flows and iii) 2M with 2 * 1M - flows. Unique flows are created by using distinct IPv6 destination - addresses that are changed for every packet, using incremental or - random ordering. For details, see - [Packet Flow Ordering]({{< ref "packet_flow_ordering#Packet Flow Ordering" >}}). - -## SRv6 Routing - -SRv6 routing tests are executed in a number of baseline configurations, -in each case SR policy and steering policy are configured for one -direction and one (or two) SR behaviours (functions) in the other -directions: - -- *srv6enc1sid*: One SID (no SRH present), one SR function - End. -- *srv6enc2sids*: Two SIDs (SRH present), two SR functions - End and - End.DX6. -- *srv6enc2sids-nodecaps*: Two SIDs (SRH present) without decapsulation, - one SR function - End. -- *srv6proxy-dyn*: Dynamic SRv6 proxy, one SR function - End.AD. -- *srv6proxy-masq*: Masquerading SRv6 proxy, one SR function - End.AM. -- *srv6proxy-stat*: Static SRv6 proxy, one SR function - End.AS. - -In all listed cases low number of IPv6 flows (253 per direction) is -routed by VPP. diff --git a/docs/content/methodology/vpp_startup_settings.md b/docs/content/methodology/vpp_startup_settings.md deleted file mode 100644 index 6e40091a6c..0000000000 --- a/docs/content/methodology/vpp_startup_settings.md +++ /dev/null @@ -1,44 +0,0 @@ ---- -title: "VPP Startup Settings" -weight: 17 ---- - -# VPP Startup Settings - -CSIT code manipulates a number of VPP settings in startup.conf for -optimized performance. List of common settings applied to all tests and -test dependent settings follows. - -## Common Settings - -List of VPP startup.conf settings applied to all tests: - -1. heap-size - set separately for ip4, ip6, stats, main - depending on scale tested. -2. no-tx-checksum-offload - disables UDP / TCP TX checksum offload in - DPDK. Typically needed for use faster vector PMDs (together with - no-multi-seg). -3. buffers-per-numa - sets a number of memory buffers allocated - to VPP per CPU socket. VPP default is 16384. Needs to be increased for - scenarios with large number of interfaces and worker threads. To - accommodate for scale tests, CSIT is setting it to the maximum possible - value corresponding to the limit of DPDK memory mappings (currently - 256). For Xeon Skylake platforms configured with 2MB hugepages and VPP - data-size and buffer-size defaults (2048B and 2496B respectively), this - results in value of 215040 (256 * 840 = 215040, 840 * 2496B buffers fit - in 2MB hugepage). - -## Per Test Settings - -List of vpp startup.conf settings applied dynamically per test: - -1. corelist-workers - list of logical cores to run VPP - worker data plane threads. Depends on HyperThreading and core per - test configuration. -2. num-rx-queues - depends on a number of VPP threads and NIC - interfaces. -3. no-multi-seg - disables multi-segment buffers in DPDK, improves - packet throughput, but disables Jumbo MTU support. Disabled for all - tests apart from the ones that require Jumbo 9000B frame support. -4. UIO driver - depends on topology file definition. -5. QAT VFs - depends on NRThreads, each thread = 1QAT VFs. diff --git a/docs/content/overview/_index.md b/docs/content/overview/_index.md new file mode 100644 index 0000000000..97fb5dec78 --- /dev/null +++ b/docs/content/overview/_index.md @@ -0,0 +1,6 @@ +--- +bookCollapseSection: false +bookFlatSection: true +title: "Overview" +weight: 1 +--- diff --git a/docs/content/overview/c_dash/_index.md b/docs/content/overview/c_dash/_index.md new file mode 100644 index 0000000000..97b351006f --- /dev/null +++ b/docs/content/overview/c_dash/_index.md @@ -0,0 +1,6 @@ +--- +bookCollapseSection: true +bookFlatSection: false +title: "C-Dash" +weight: 1 +--- diff --git a/docs/content/overview/c_dash/design.md b/docs/content/overview/c_dash/design.md new file mode 100644 index 0000000000..ef8c62ab88 --- /dev/null +++ b/docs/content/overview/c_dash/design.md @@ -0,0 +1,6 @@ +--- +title: "Design" +weight: 1 +--- + +# Design diff --git a/docs/content/overview/c_dash/releases.md b/docs/content/overview/c_dash/releases.md new file mode 100644 index 0000000000..1e51c2978a --- /dev/null +++ b/docs/content/overview/c_dash/releases.md @@ -0,0 +1,8 @@ +--- +title: "Releases" +weight: 3 +--- + +# Releases + +## C-Dash v1 diff --git a/docs/content/overview/c_dash/structure.md b/docs/content/overview/c_dash/structure.md new file mode 100644 index 0000000000..ba427f1ee3 --- /dev/null +++ b/docs/content/overview/c_dash/structure.md @@ -0,0 +1,20 @@ +--- +title: "Structure" +weight: 2 +--- + +# Structure + +## Performance Trending + +## Per Release Performance + +## Per Release Performance Comparisons + +## Per Release Coverage Data + +## Test Job Statistics + +## Failures and Anomalies + +## Documentation diff --git a/docs/content/overview/csit/_index.md b/docs/content/overview/csit/_index.md new file mode 100644 index 0000000000..959348d2ae --- /dev/null +++ b/docs/content/overview/csit/_index.md @@ -0,0 +1,6 @@ +--- +bookCollapseSection: true +bookFlatSection: false +title: "CSIT" +weight: 2 +--- diff --git a/docs/content/overview/csit/design.md b/docs/content/overview/csit/design.md new file mode 100644 index 0000000000..53b764f5bb --- /dev/null +++ b/docs/content/overview/csit/design.md @@ -0,0 +1,148 @@ +--- +title: "Design" +weight: 1 +--- + +# Design + +FD.io CSIT system design needs to meet continuously expanding requirements of +FD.io projects including VPP, related sub-systems (e.g. plugin applications, +DPDK drivers) and FD.io applications (e.g. DPDK applications), as well as +growing number of compute platforms running those applications. With CSIT +project scope and charter including both FD.io continuous testing AND +performance trending/comparisons, those evolving requirements further amplify +the need for CSIT framework modularity, flexibility and usability. + +## Design Hierarchy + +CSIT follows a hierarchical system design with SUTs and DUTs at the bottom level +of the hierarchy, presentation level at the top level and a number of functional +layers in-between. The current CSIT system design including CSIT framework is +depicted in the figure below. + +{{< figure src="/cdocs/csit_design_picture.svg" title="CSIT Design" >}} + +A brief bottom-up description is provided here: + +1. SUTs, DUTs, TGs + - SUTs - Systems Under Test; + - DUTs - Devices Under Test; + - TGs - Traffic Generators; +2. Level-1 libraries - Robot and Python + - Lowest level CSIT libraries abstracting underlying test environment, SUT, + DUT and TG specifics; + - Used commonly across multiple L2 KWs; + - Performance and functional tests: + - L1 KWs (KeyWords) are implemented as RF libraries and Python + libraries; + - Performance TG L1 KWs: + - All L1 KWs are implemented as Python libraries: + - Support for TRex only today; + - CSIT IXIA drivers in progress; + - Performance data plane traffic profiles: + - TG-specific stream profiles provide full control of: + - Packet definition - layers, MACs, IPs, ports, combinations thereof + e.g. IPs and UDP ports; + - Stream definitions - different streams can run together, delayed, + one after each other; + - Stream profiles are independent of CSIT framework and can be used + in any T-rex setup, can be sent anywhere to repeat tests with + exactly the same setup; + - Easily extensible - one can create a new stream profile that meets + tests requirements; + - Same stream profile can be used for different tests with the same + traffic needs; + - Functional data plane traffic scripts: + - Scapy specific traffic scripts; +3. Level-2 libraries - Robot resource files: + - Higher level CSIT libraries abstracting required functions for executing + tests; + - L2 KWs are classified into the following functional categories: + - Configuration, test, verification, state report; + - Suite setup, suite teardown; + - Test setup, test teardown; +4. Tests - Robot: + - Test suites with test cases; + - Performance tests using physical testbed environment: + - VPP; + - DPDK-Testpmd; + - DPDK-L3Fwd; + - Tools: + - Documentation generator; + - Report generator; + - Testbed environment setup ansible playbooks; + - Operational debugging scripts; + +5. Test Lifecycle Abstraction + +A well coded test must follow a disciplined abstraction of the test +lifecycles that includes setup, configuration, test and verification. In +addition to improve test execution efficiency, the commmon aspects of +test setup and configuration shared across multiple test cases should be +done only once. Translating these high-level guidelines into the Robot +Framework one arrives to definition of a well coded RF tests for FD.io +CSIT. Anatomy of Good Tests for CSIT: + +1. Suite Setup - Suite startup Configuration common to all Test Cases in suite: + uses Configuration KWs, Verification KWs, StateReport KWs; +2. Test Setup - Test startup Configuration common to multiple Test Cases: uses + Configuration KWs, StateReport KWs; +3. Test Case - uses L2 KWs with RF Gherkin style: + - prefixed with {Given} - Verification of Test setup, reading state: uses + Configuration KWs, Verification KWs, StateReport KWs; + - prefixed with {When} - Test execution: Configuration KWs, Test KWs; + - prefixed with {Then} - Verification of Test execution, reading state: uses + Verification KWs, StateReport KWs; +4. Test Teardown - post Test teardown with Configuration cleanup and + Verification common to multiple Test Cases - uses: Configuration KWs, + Verification KWs, StateReport KWs; +5. Suite Teardown - Suite post-test Configuration cleanup: uses Configuration + KWs, Verification KWs, StateReport KWs; + +## RF Keywords Functional Classification + +CSIT RF KWs are classified into the functional categories matching the test +lifecycle events described earlier. All CSIT RF L2 and L1 KWs have been grouped +into the following functional categories: + +1. Configuration; +2. Test; +3. Verification; +4. StateReport; +5. SuiteSetup; +6. TestSetup; +7. SuiteTeardown; +8. TestTeardown; + +## RF Keywords Naming Guidelines + +Readability counts: "..code is read much more often than it is written." +Hence following a good and consistent grammar practice is important when +writing Robot Framework KeyWords and Tests. All CSIT test cases +are coded using Gherkin style and include only L2 KWs references. L2 KWs are +coded using simple style and include L2 KWs, L1 KWs, and L1 python references. +To improve readability, the proposal is to use the same grammar for both +Robot Framework KW styles, and to formalize the grammar of English +sentences used for naming the Robot Framework KWs. Robot +Framework KWs names are short sentences expressing functional description of +the command. They must follow English sentence grammar in one of the following +forms: + +1. **Imperative** - verb-object(s): *"Do something"*, verb in base form. +2. **Declarative** - subject-verb-object(s): *"Subject does something"*, verb in + a third-person singular present tense form. +3. **Affirmative** - modal_verb-verb-object(s): *"Subject should be something"*, + *"Object should exist"*, verb in base form. +4. **Negative** - modal_verb-Not-verb-object(s): *"Subject should not be + something"*, *"Object should not exist"*, verb in base form. + +Passive form MUST NOT be used. However a usage of past participle as an +adjective is okay. See usage examples provided in the Coding guidelines +section below. Following sections list applicability of the above +grammar forms to different Robot Framework KW categories. Usage +examples are provided, both good and bad. + +## Coding Guidelines + +Coding guidelines can be found on +[Design optimizations wiki page](https://wiki.fd.io/view/CSIT/Design_Optimizations). diff --git a/docs/content/overview/csit/suite_generation.md b/docs/content/overview/csit/suite_generation.md new file mode 100644 index 0000000000..84a19b8ab9 --- /dev/null +++ b/docs/content/overview/csit/suite_generation.md @@ -0,0 +1,123 @@ +--- +title: "Suite Generation" +weight: 5 +--- + +# Suite Generation + +CSIT uses robot suite files to define tests. However, not all suite files +available for Jenkins jobs (or manually started bootstrap scripts) are present +in CSIT git repository. They are generated only when needed. + +## Autogen Library + +There is a code generation layer implemented as Python library called "autogen", +called by various bash scripts. + +It generates the full extent of CSIT suites, using the ones in git as templates. + +## Sources + +The generated suites (and their contents) are affected by multiple information +sources, listed below. + +### Git Suites + +The suites present in git repository act as templates for generating suites. +One of autogen design principles is that any template suite should also act +as a full suite (no placeholders). + +In practice, autogen always re-creates the template suite with exactly +the same content, it is one of checks that autogen works correctly. + +### Regenerate Script + +Not all suites present in CSIT git repository act as template for autogen. +The distinction is on per-directory level. Directories with +`regenerate_testcases.py` script usually consider all suites as templates +(unless possibly not included by the glob patten in the script). + +The script also specifies minimal frame size, indirectly, by specifying protocol +(protocol "ip4" is the default, leading to 64B frame size). + +### Constants + +Values in `Constants.py` are taken into consideration when generating suites. +The values are mostly related to different NIC models and NIC drivers. + +### Python Code + +Python code in `resources/libraries/python/autogen` contains several other +information sources. + +#### Testcase Templates + +The test case part of template suite is ignored, test case lines +are created according to text templates in `Testcase.py` file. + +#### Testcase Argument Lists + +Each testcase template has different number of "arguments", e.g. values +to put into various placeholders. Different test types need different +lists of the argument values, the lists are in `regenerate_glob` method +in `Regenerator.py` file. + +#### Iteration Over Values + +Python code detects the test type (usually by substrings of suite file name), +then iterates over different quantities based on type. +For example, only ndrpdr suite templates generate other types (mrr and soak). + +#### Hardcoded Exclusions + +Some combinations of values are known not to work, so they are excluded. +Examples: Density tests for too much CPUs; IMIX for ASTF. + +## Non-Sources + +Some information sources are available in CSIT repository, +but do not affect the suites generated by autogen. + +### Testbeds + +Overall, no information visible in topology yaml files is taken into account +by autogen. + +#### Testbed Architecture + +Historically, suite files are agnostic to testbed architecture, e.g. ICX or ALT. + +#### Testbed Size + +Historically, 2-node and 3-node suites have diferent names, and while +most of the code is common, the differences are not always simple enough. +Autogen treat 2-node and 3-node suites as independent templates. + +TRex suites are intended for a 1-node circuit of otherwise 2-node or 3-node +testbeds, so they support all 3 robot tags. +They are also detected and treated differently by autogen, +mainly because they need different testcase arguments (no CPU count). +Autogen does nothing specifically related to the fact they should run +only in testbeds/NICs with TG-TG line available. + +#### Other Topology Info + +Some bonding tests need two (parallel) links between DUTs. Autogen does not +care, as suites are agnostic. Robot tag marks the difference, but the link +presence is not explicitly checked. + +### Job specs + +Information in job spec files depend on generated suites (not the other way). +Autogen should generate more suites, as job spec is limited by time budget. +More suites should be available for manually triggered verify jobs, +so autogen covers that. + +### Bootstrap Scripts + +Historically, bootstrap scripts perform some logic, +perhaps adding exclusion options to Robot invocation +(e.g. skipping testbed+NIC combinations for tests that need parallel links). + +Once again, the logic here relies on what autogen generates, +autogen does not look into bootstrap scripts. diff --git a/docs/content/overview/csit/test_naming.md b/docs/content/overview/csit/test_naming.md new file mode 100644 index 0000000000..d7a32518e5 --- /dev/null +++ b/docs/content/overview/csit/test_naming.md @@ -0,0 +1,112 @@ +--- +title: "Test Naming" +weight: 3 +--- + +# Test Naming + +## Background + +{{< release_csit >}} follows a common structured naming convention for all +performance and system functional tests, introduced in CSIT 17.01. + +The naming should be intuitive for majority of the tests. Complete +description of CSIT test naming convention is provided on +[CSIT test naming wiki page](https://wiki.fd.io/view/CSIT/csit-test-naming). +Below few illustrative examples of the naming usage for test suites across CSIT +performance, functional and Honeycomb management test areas. + +## Naming Convention + +The CSIT approach is to use tree naming convention and to encode following +testing information into test suite and test case names: + +1. packet network port configuration + * port type, physical or virtual; + * number of ports; + * NIC model, if applicable; + * port-NIC locality, if applicable; +2. packet encapsulations; +3. VPP packet processing + * packet forwarding mode; + * packet processing function(s); +4. packet forwarding path + * if present, network functions (processes, containers, VMs) and their + topology within the computer; +5. main measured variable, type of test. + +Proposed convention is to encode ports and NICs on the left (underlay), +followed by outer-most frame header, then other stacked headers up to the +header processed by vSwitch-VPP, then VPP forwarding function, then encap on +vhost interface, number of vhost interfaces, number of VMs. If chained VMs +present, they get added on the right. Test topology is expected to be +symmetric, in other words packets enter and leave SUT through ports specified +on the left of the test name. Here some examples to illustrate the convention +followed by the complete legend, and tables mapping the new test filenames to +old ones. + +## Naming Examples + +CSIT test suite naming examples (filename.robot) for common tested VPP +topologies: + +1. **Physical port to physical port - a.k.a. NIC-to-NIC, Phy-to-Phy, P2P** + * *PortNICConfig-WireEncapsulation-PacketForwardingFunction- + PacketProcessingFunction1-...-PacketProcessingFunctionN-TestType* + * *10ge2p1x520-dot1q-l2bdbasemaclrn-ndrdisc.robot* => 2 ports of 10GE on + Intel x520 NIC, dot1q tagged Ethernet, L2 bridge-domain baseline switching + with MAC learning, NDR throughput discovery. + * *10ge2p1x520-ethip4vxlan-l2bdbasemaclrn-ndrchk.robot* => 2 ports of 10GE on + Intel x520 NIC, IPv4 VXLAN Ethernet, L2 bridge-domain baseline switching + with MAC learning, NDR throughput discovery. + * *10ge2p1x520-ethip4-ip4base-ndrdisc.robot* => 2 ports of 10GE on Intel x520 + NIC, IPv4 baseline routed forwarding, NDR throughput discovery. + * *10ge2p1x520-ethip6-ip6scale200k-ndrdisc.robot* => 2 ports of 10GE on Intel + x520 NIC, IPv6 scaled up routed forwarding, NDR throughput discovery. + * *10ge2p1x520-ethip4-ip4base-iacldstbase-ndrdisc.robot* => 2 ports of 10GE + on Intel x520 NIC, IPv4 baseline routed forwarding, ingress Access Control + Lists baseline matching on destination, NDR throughput discovery. + * *40ge2p1vic1385-ethip4-ip4base-ndrdisc.robot* => 2 ports of 40GE on Cisco + vic1385 NIC, IPv4 baseline routed forwarding, NDR throughput discovery. + * *eth2p-ethip4-ip4base-func.robot* => 2 ports of Ethernet, IPv4 baseline + routed forwarding, functional tests. + +2. **Physical port to VM (or VM chain) to physical port - a.k.a. NIC2VM2NIC, + P2V2P, NIC2VMchain2NIC, P2V2V2P** + * *PortNICConfig-WireEncapsulation-PacketForwardingFunction- + PacketProcessingFunction1-...-PacketProcessingFunctionN-VirtEncapsulation- + VirtPortConfig-VMconfig-TestType* + * *10ge2p1x520-dot1q-l2bdbasemaclrn-eth-2vhost-1vm-ndrdisc.robot* => 2 ports + of 10GE on Intel x520 NIC, dot1q tagged Ethernet, L2 bridge-domain + switching to/from two vhost interfaces and one VM, NDR throughput + discovery. + * *10ge2p1x520-ethip4vxlan-l2bdbasemaclrn-eth-2vhost-1vm-ndrdisc.robot* => 2 + ports of 10GE on Intel x520 NIC, IPv4 VXLAN Ethernet, L2 bridge-domain + switching to/from two vhost interfaces and one VM, NDR throughput + discovery. + * *10ge2p1x520-ethip4vxlan-l2bdbasemaclrn-eth-4vhost-2vm-ndrdisc.robot* => 2 + ports of 10GE on Intel x520 NIC, IPv4 VXLAN Ethernet, L2 bridge-domain + switching to/from four vhost interfaces and two VMs, NDR throughput + discovery. + * *eth2p-ethip4vxlan-l2bdbasemaclrn-eth-2vhost-1vm-func.robot* => 2 ports of + Ethernet, IPv4 VXLAN Ethernet, L2 bridge-domain switching to/from two vhost + interfaces and one VM, functional tests. + +3. **API CRUD tests - Create (Write), Read (Retrieve), Update (Modify), Delete + (Destroy) operations for configuration and operational data** + * *ManagementTestKeyword-ManagementOperation-ManagedFunction1-...- + ManagedFunctionN-ManagementAPI1-ManagementAPIN-TestType* + * *mgmt-cfg-lisp-apivat-func* => configuration of LISP with VAT API calls, + functional tests. + * *mgmt-cfg-l2bd-apihc-apivat-func* => configuration of L2 Bridge-Domain with + Honeycomb API and VAT API calls, functional tests. + * *mgmt-oper-int-apihcnc-func* => reading status and operational data of + interface with Honeycomb NetConf API calls, functional tests. + * *mgmt-cfg-int-tap-apihcnc-func* => configuration of tap interfaces with + Honeycomb NetConf API calls, functional tests. + * *mgmt-notif-int-subint-apihcnc-func* => notifications of interface and + sub-interface events with Honeycomb NetConf Notifications, functional + tests. + +For complete description of CSIT test naming convention please refer to +[CSIT test naming wiki page](https://wiki.fd.io/view/CSIT/csit-test-naming). diff --git a/docs/content/overview/csit/test_scenarios.md b/docs/content/overview/csit/test_scenarios.md new file mode 100644 index 0000000000..1f06765eae --- /dev/null +++ b/docs/content/overview/csit/test_scenarios.md @@ -0,0 +1,66 @@ +--- +title: "Test Scenarios" +weight: 2 +--- + +# Test Scenarios + +FD.io CSIT Dashboard includes multiple test scenarios of VPP +centric applications, topologies and use cases. In addition it also +covers baseline tests of DPDK sample applications. Tests are executed in +physical (performance tests) and virtual environments (functional +tests). + +Brief overview of test scenarios covered in this documentation: + +1. **VPP Performance**: VPP performance tests are executed in physical + FD.io testbeds, focusing on VPP network data plane performance in + NIC-to-NIC switching topologies. VPP application runs in + bare-metal host user-mode handling NICs. TRex is used as a traffic generator. + +2. **VPP Vhostuser Performance with KVM VMs**: VPP VM service switching + performance tests using vhostuser virtual interface for + interconnecting multiple NF-in-VM instances. VPP vswitch + instance runs in bare-metal user-mode handling NICs and connecting + over vhost-user interfaces to VM instances each running VPP with virtio + virtual interfaces. Similarly to VPP Performance, tests are run across a + range of configurations. TRex is used as a traffic generator. + +3. **VPP Memif Performance with LXC and Docker Containers**: VPP + Container service switching performance tests using memif virtual + interface for interconnecting multiple VPP-in-container instances. + VPP vswitch instance runs in bare-metal user-mode handling NICs and + connecting over memif (Slave side) interfaces to more instances of + VPP running in LXC or in Docker Containers, both with memif + interfaces (Master side). Similarly to VPP Performance, tests are + run across a range of configurations. TRex is used as a traffic + generator. + +4. **DPDK Performance**: VPP uses DPDK to drive the NICs and physical + interfaces. DPDK performance tests are used as a baseline to + profile performance of the DPDK sub-system. Two DPDK applications + are tested: Testpmd and L3fwd. DPDK tests are executed in the same + testing environment as VPP tests. DPDK Testpmd and L3fwd + applications run in host user-mode. TRex is used as a traffic + generator. + +5. **T-Rex Performance**: T-Rex perfomance tests are executed in physical + FD.io testbeds, focusing on T-Rex data plane performance in NIC-to-NIC + loopback topologies. + +6. **VPP Functional**: VPP functional tests are executed in virtual + FD.io testbeds, focusing on VPP packet processing functionality, + including both network data plane and in-line control plane. Tests + cover vNIC-to-vNIC vNIC-to-nestedVM-to-vNIC forwarding topologies. + Scapy is used as a traffic generator. + +All CSIT test data included in this report is auto-generated from Robot +Framework json output files produced by Linux Foundation FD.io Jenkins jobs +executed against {{< release_vpp >}} artifacts. + +FD.io CSIT system is developed using two main coding platforms: Robot +Framework and Python. {{< release_csit >}} source code for the executed test +suites is available in corresponding CSIT branch in the directory +`./tests/`. A local copy of CSIT source code +can be obtained by cloning CSIT git repository - `git clone +https://gerrit.fd.io/r/csit`. diff --git a/docs/content/overview/csit/test_tags.md b/docs/content/overview/csit/test_tags.md new file mode 100644 index 0000000000..8fc3021d6f --- /dev/null +++ b/docs/content/overview/csit/test_tags.md @@ -0,0 +1,863 @@ +--- +title: "Test Tags" +weight: 4 +--- + +# Test Tags + +All CSIT test cases are labelled with Robot Framework tags used to allow for +easy test case type identification, test case grouping and selection for +execution. Following sections list currently used CSIT tags and their +descriptions. + +## Testbed Topology Tags + +**2_NODE_DOUBLE_LINK_TOPO** + + 2 nodes connected in a circular topology with two links interconnecting + the devices. + +**2_NODE_SINGLE_LINK_TOPO** + + 2 nodes connected in a circular topology with at least one link + interconnecting devices. + +**3_NODE_DOUBLE_LINK_TOPO** + + 3 nodes connected in a circular topology with two links interconnecting + the devices. + +**3_NODE_SINGLE_LINK_TOPO** + + 3 nodes connected in a circular topology with at least one link + interconnecting devices. + +## Objective Tags + +**SKIP_PATCH** + + Test case(s) marked to not run in case of vpp-csit-verify (i.e. VPP patch) + and csit-vpp-verify jobs (i.e. CSIT patch). + +**SKIP_VPP_PATCH** + + Test case(s) marked to not run in case of vpp-csit-verify (i.e. VPP patch). + +## Environment Tags + +**HW_ENV** + + DUTs and TGs are running on bare metal. + +**VM_ENV** + + DUTs and TGs are running in virtual environment. + +**VPP_VM_ENV** + + DUTs with VPP and capable of running Virtual Machine. + +## NIC Model Tags + +**NIC_Intel-X520-DA2** + + Intel X520-DA2 NIC. + +**NIC_Intel-XL710** + + Intel XL710 NIC. + +**NIC_Intel-X710** + + Intel X710 NIC. + +**NIC_Intel-XXV710** + + Intel XXV710 NIC. + +**NIC_Cisco-VIC-1227** + + VIC-1227 by Cisco. + +**NIC_Cisco-VIC-1385** + + VIC-1385 by Cisco. + +**NIC_Amazon-Nitro-50G** + + Amazon EC2 ENA NIC. + +## Scaling Tags + +**FIB_20K** + + 2x10,000 entries in single fib table + +**FIB_200K** + + 2x100,000 entries in single fib table + +**FIB_1M** + + 2x500,000 entries in single fib table + +**FIB_2M** + + 2x1,000,000 entries in single fib table + +**L2BD_1** + + Test with 1 L2 bridge domain. + +**L2BD_10** + + Test with 10 L2 bridge domains. + +**L2BD_100** + + Test with 100 L2 bridge domains. + +**L2BD_1K** + + Test with 1000 L2 bridge domains. + +**VLAN_1** + + Test with 1 VLAN sub-interface. + +**VLAN_10** + + Test with 10 VLAN sub-interfaces. + +**VLAN_100** + + Test with 100 VLAN sub-interfaces. + +**VLAN_1K** + + Test with 1000 VLAN sub-interfaces. + +**VXLAN_1** + + Test with 1 VXLAN tunnel. + +**VXLAN_10** + + Test with 10 VXLAN tunnels. + +**VXLAN_100** + + Test with 100 VXLAN tunnels. + +**VXLAN_1K** + + Test with 1000 VXLAN tunnels. + +**TNL_{t}** + + IPSec in tunnel mode - {t} tunnels. + +**SRC_USER_{u}** + + Traffic flow with {u} unique IPs (users) in one direction. + {u}=(1,10,100,1000,2000,4000). + +**100_FLOWS** + + Traffic stream with 100 unique flows (10 IPs/users x 10 UDP ports) in one + direction. + +**10k_FLOWS** + + Traffic stream with 10 000 unique flows (10 IPs/users x 1000 UDP ports) in + one direction. + +**100k_FLOWS** + + Traffic stream with 100 000 unique flows (100 IPs/users x 1000 UDP ports) in + one direction. + +**HOSTS_{h}** + + Stateless or stateful traffic stream with {h} client source IP4 addresses, + usually with 63 flow differing in source port number. Could be UDP or TCP. + If NAT is used, the clients are inside. Outside IP range can differ. + {h}=(1024,4096,16384,65536,262144). + +**GENEVE4_{t}TUN** + + Test with {t} GENEVE IPv4 tunnel. {t}=(1,4,16,64,256,1024) + +## Test Category Tags + +**DEVICETEST** + + All vpp_device functional test cases. + +**PERFTEST** + + All performance test cases. + +## VPP Device Type Tags + +**SCAPY** + + All test cases that uses Scapy for packet generation and validation. + +## Performance Type Tags + +**NDRPDR** + + Single test finding both No Drop Rate and Partial Drop Rate simultaneously. + The search is done by optimized algorithm which performs + multiple trial runs at different durations and transmit rates. + The results come from the final trials, which have duration of 30 seconds. + +**MRR** + + Performance tests where TG sends the traffic at maximum rate (line rate) + and reports total sent/received packets over trial duration. + The result is an average of 10 trials of 1 second duration. + +**SOAK** + + Performance tests using PLRsearch to find the critical load. + +**RECONF** + + Performance tests aimed to measure lost packets (time) when performing + reconfiguration while full throughput offered load is applied. + +## Ethernet Frame Size Tags + +These are describing the traffic offered by Traffic Generator, +"primary" traffic in case of asymmetric load. +For traffic between DUTs, or for "secondary" traffic, see ${overhead} value. + +**{b}B** + + {b} Bytes frames used for test. + +**IMIX** + + IMIX frame sequence (28x 64B, 16x 570B, 4x 1518B) used for test. + +## Test Type Tags + +**BASE** + + Baseline test cases, no encapsulation, no feature(s) configured in tests. + No scaling whatsoever, beyond minimum needed for RSS. + +**IP4BASE** + + IPv4 baseline test cases, no encapsulation, no feature(s) configured in + tests. Minimal number of routes. Other quantities may be scaled. + +**IP6BASE** + + IPv6 baseline test cases, no encapsulation, no feature(s) configured in + tests. + +**L2XCBASE** + + L2XC baseline test cases, no encapsulation, no feature(s) configured in + tests. + +**L2BDBASE** + + L2BD baseline test cases, no encapsulation, no feature(s) configured in + tests. + +**L2PATCH** + + L2PATCH baseline test cases, no encapsulation, no feature(s) configured in + tests. + +**SCALE** + + Scale test cases. Other tags specify which quantities are scaled. + Also applies if scaling is set on TG only (e.g. DUT works as IP4BASE). + +**ENCAP** + + Test cases where encapsulation is used. Use also encapsulation tag(s). + +**FEATURE** + + At least one feature is configured in test cases. Use also feature tag(s). + +**UDP** + + Tests which use any kind of UDP traffic (STL or ASTF profile). + +**TCP** + + Tests which use any kind of TCP traffic (STL or ASTF profile). + +**TREX** + + Tests which test trex traffic without any software DUTs in the traffic path. + +**UDP_UDIR** + + Tests which use unidirectional UDP traffic (STL profile only). + +**UDP_BIDIR** + + Tests which use bidirectional UDP traffic (STL profile only). + +**UDP_CPS** + + Tests which measure connections per second on minimal UDP pseudoconnections. + This implies ASTF traffic profile is used. + This tag selects specific output processing in PAL. + +**TCP_CPS** + + Tests which measure connections per second on empty TCP connections. + This implies ASTF traffic profile is used. + This tag selects specific output processing in PAL. + +**TCP_RPS** + + Tests which measure requests per second on empty TCP connections. + This implies ASTF traffic profile is used. + This tag selects specific output processing in PAL. + +**UDP_PPS** + + Tests which measure packets per second on lightweight UDP transactions. + This implies ASTF traffic profile is used. + This tag selects specific output processing in PAL. + +**TCP_PPS** + + Tests which measure packets per second on lightweight TCP transactions. + This implies ASTF traffic profile is used. + This tag selects specific output processing in PAL. + +**HTTP** + + Tests which use traffic formed of valid HTTP requests (and responses). + +**LDP_NGINX** + + LDP NGINX is un-modified NGINX with VPP via LD_PRELOAD. + +**NF_DENSITY** + + Performance tests that measure throughput of multiple VNF and CNF + service topologies at different service densities. + +## NF Service Density Tags + +**CHAIN** + + NF service density tests with VNF or CNF service chain topology(ies). + +**PIPE** + + NF service density tests with CNF service pipeline topology(ies). + +**NF_L3FWDIP4** + + NF service density tests with DPDK l3fwd IPv4 routing as NF workload. + +**NF_VPPIP4** + + NF service density tests with VPP IPv4 routing as NF workload. + +**{r}R{c}C** + + Service density matrix locator {r}R{c}C, {r}Row denoting number of + service instances, {c}Column denoting number of NFs per service + instance. {r}=(1,2,4,6,8,10), {c}=(1,2,4,6,8,10). + +**{n}VM{t}T** + + Service density {n}VM{t}T, {n}Number of NF Qemu VMs, {t}Number of threads + per NF. + +**{n}DCR{t}T** + + Service density {n}DCR{t}T, {n}Number of NF Docker containers, {t}Number of + threads per NF. + +**{n}_ADDED_CHAINS** + + {n}Number of chains (or pipelines) added (and/or removed) + during RECONF test. + +## Forwarding Mode Tags + +**L2BDMACSTAT** + + VPP L2 bridge-domain, L2 MAC static. + +**L2BDMACLRN** + + VPP L2 bridge-domain, L2 MAC learning. + +**L2XCFWD** + + VPP L2 point-to-point cross-connect. + +**IP4FWD** + + VPP IPv4 routed forwarding. + +**IP6FWD** + + VPP IPv6 routed forwarding. + +**LOADBALANCER_MAGLEV** + + VPP Load balancer maglev mode. + +**LOADBALANCER_L3DSR** + + VPP Load balancer l3dsr mode. + +**LOADBALANCER_NAT4** + + VPP Load balancer nat4 mode. + +**N2N** + + Mode, where NICs from the same physical server are directly + connected with a cable. + +## Underlay Tags + +**IP4UNRLAY** + + IPv4 underlay. + +**IP6UNRLAY** + + IPv6 underlay. + +**MPLSUNRLAY** + + MPLS underlay. + +## Overlay Tags + +**L2OVRLAY** + + L2 overlay. + +**IP4OVRLAY** + + IPv4 overlay (IPv4 payload). + +**IP6OVRLAY** + + IPv6 overlay (IPv6 payload). + +## Tagging Tags + +**DOT1Q** + + All test cases with dot1q. + +**DOT1AD** + + All test cases with dot1ad. + +## Encapsulation Tags + +**ETH** + + All test cases with base Ethernet (no encapsulation). + +**LISP** + + All test cases with LISP. + +**LISPGPE** + + All test cases with LISP-GPE. + +**LISP_IP4o4** + + All test cases with LISP_IP4o4. + +**LISPGPE_IP4o4** + + All test cases with LISPGPE_IP4o4. + +**LISPGPE_IP6o4** + + All test cases with LISPGPE_IP6o4. + +**LISPGPE_IP4o6** + + All test cases with LISPGPE_IP4o6. + +**LISPGPE_IP6o6** + + All test cases with LISPGPE_IP6o6. + +**VXLAN** + + All test cases with Vxlan. + +**VXLANGPE** + + All test cases with VXLAN-GPE. + +**GRE** + + All test cases with GRE. + +**GTPU** + + All test cases with GTPU. + +**GTPU_HWACCEL** + + All test cases with GTPU_HWACCEL. + +**IPSEC** + + All test cases with IPSEC. + +**WIREGUARD** + + All test cases with WIREGUARD. + +**SRv6** + + All test cases with Segment routing over IPv6 dataplane. + +**SRv6_1SID** + + All SRv6 test cases with single SID. + +**SRv6_2SID_DECAP** + + All SRv6 test cases with two SIDs and with decapsulation. + +**SRv6_2SID_NODECAP** + + All SRv6 test cases with two SIDs and without decapsulation. + +**GENEVE** + + All test cases with GENEVE. + +**GENEVE_L3MODE** + + All test cases with GENEVE tunnel in L3 mode. + +**FLOW** + + All test cases with FLOW. + +**FLOW_DIR** + + All test cases with FLOW_DIR. + +**FLOW_RSS** + + All test cases with FLOW_RSS. + +**NTUPLE** + + All test cases with NTUPLE. + +**L2TPV3** + + All test cases with L2TPV3. + +## Interface Tags + +**PHY** + + All test cases which use physical interface(s). + +**GSO** + + All test cases which uses Generic Segmentation Offload. + +**VHOST** + + All test cases which uses VHOST. + +**VHOST_1024** + + All test cases which uses VHOST DPDK driver with qemu queue size set + to 1024. + +**VIRTIO** + + All test cases which uses VIRTIO native VPP driver. + +**VIRTIO_1024** + + All test cases which uses VIRTIO native VPP driver with qemu queue size set + to 1024. + +**CFS_OPT** + + All test cases which uses VM with optimised scheduler policy. + +**TUNTAP** + + All test cases which uses TUN and TAP. + +**AFPKT** + + All test cases which uses AFPKT. + +**NETMAP** + + All test cases which uses Netmap. + +**MEMIF** + + All test cases which uses Memif. + +**SINGLE_MEMIF** + + All test cases which uses only single Memif connection per DUT. One DUT + instance is running in container having one physical interface exposed to + container. + +**LBOND** + + All test cases which uses link bonding (BondEthernet interface). + +**LBOND_DPDK** + + All test cases which uses DPDK link bonding. + +**LBOND_VPP** + + All test cases which uses VPP link bonding. + +**LBOND_MODE_XOR** + + All test cases which uses link bonding with mode XOR. + +**LBOND_MODE_LACP** + + All test cases which uses link bonding with mode LACP. + +**LBOND_LB_L34** + + All test cases which uses link bonding with load-balance mode l34. + +**LBOND_{n}L** + + All test cases which use {n} link(s) for link bonding. + +**DRV_{d}** + + All test cases which NIC Driver for DUT is set to {d}. Default is VFIO_PCI. + {d}=(AVF, RDMA_CORE, VFIO_PCI, AF_XDP). + +**TG_DRV_{d}** + + All test cases which NIC Driver for TG is set to {d}. Default is IGB_UIO. + {d}=(RDMA_CORE, IGB_UIO). + +**RXQ_SIZE_{n}** + + All test cases which RXQ size (RX descriptors) are set to {n}. Default is 0, + which means VPP (API) default. + +**TXQ_SIZE_{n}** + + All test cases which TXQ size (TX descriptors) are set to {n}. Default is 0, + which means VPP (API) default. + +## Feature Tags + +**IACLDST** + + iACL destination. + +**ADLALWLIST** + + ADL allowlist. + +**NAT44** + + NAT44 configured and tested. + +**NAT64** + + NAT44 configured and tested. + +**ACL** + + ACL plugin configured and tested. + +**IACL** + + ACL plugin configured and tested on input path. + +**OACL** + + ACL plugin configured and tested on output path. + +**ACL_STATELESS** + + ACL plugin configured and tested in stateless mode (permit action). + +**ACL_STATEFUL** + + ACL plugin configured and tested in stateful mode (permit+reflect action). + +**ACL1** + + ACL plugin configured and tested with 1 not-hitting ACE. + +**ACL10** + + ACL plugin configured and tested with 10 not-hitting ACEs. + +**ACL50** + + ACL plugin configured and tested with 50 not-hitting ACEs. + +**SRv6_PROXY** + + SRv6 endpoint to SR-unaware appliance via proxy. + +**SRv6_PROXY_STAT** + + SRv6 endpoint to SR-unaware appliance via static proxy. + +**SRv6_PROXY_DYN** + + SRv6 endpoint to SR-unaware appliance via dynamic proxy. + +**SRv6_PROXY_MASQ** + + SRv6 endpoint to SR-unaware appliance via masquerading proxy. + +## Encryption Tags + +**IPSECSW** + + Crypto in software. + +**IPSECHW** + + Crypto in hardware. + +**IPSECTRAN** + + IPSec in transport mode. + +**IPSECTUN** + + IPSec in tunnel mode. + +**IPSECINT** + + IPSec in interface mode. + +**AES** + + IPSec using AES algorithms. + +**AES_128_CBC** + + IPSec using AES 128 CBC algorithms. + +**AES_128_GCM** + + IPSec using AES 128 GCM algorithms. + +**AES_256_GCM** + + IPSec using AES 256 GCM algorithms. + +**HMAC** + + IPSec using HMAC integrity algorithms. + +**HMAC_SHA_256** + + IPSec using HMAC SHA 256 integrity algorithms. + +**HMAC_SHA_512** + + IPSec using HMAC SHA 512 integrity algorithms. + +**SCHEDULER** + + IPSec using crypto sw scheduler engine. + +**FASTPATH** + + IPSec policy mode with spd fast path enabled. + +## Client-Workload Tags + +**VM** + + All test cases which use at least one virtual machine. + +**LXC** + + All test cases which use Linux container and LXC utils. + +**DRC** + + All test cases which use at least one Docker container. + +**DOCKER** + + All test cases which use Docker as container manager. + +**APP** + + All test cases with specific APP use. + +## Container Orchestration Tags + +**{n}VSWITCH** + + {n} VPP running in {n} Docker container(s) acting as a VSWITCH. + {n}=(1). + +**{n}VNF** + + {n} VPP running in {n} Docker container(s) acting as a VNF work load. + {n}=(1). + +## Multi-Threading Tags + +**STHREAD** + + *Dynamic tag*. + All test cases using single poll mode thread. + +**MTHREAD** + + *Dynamic tag*. + All test cases using more then one poll mode driver thread. + +**{n}NUMA** + + All test cases with packet processing on {n} socket(s). {n}=(1,2). + +**{c}C** + + {c} worker thread pinned to {c} dedicated physical core; or if + HyperThreading is enabled, {c}*2 worker threads each pinned to a separate + logical core within 1 dedicated physical core. Main thread pinned to core 1. + {t}=(1,2,4). + +**{t}T{c}C** + + *Dynamic tag*. + {t} worker threads pinned to {c} dedicated physical cores. Main thread + pinned to core 1. By default CSIT is configuring same amount of receive + queues per interface as worker threads. {t}=(1,2,4,8), {t}=(1,2,4). diff --git a/docs/content/release_notes/_index.md b/docs/content/release_notes/_index.md index c08254e068..3a8318d09f 100644 --- a/docs/content/release_notes/_index.md +++ b/docs/content/release_notes/_index.md @@ -1,5 +1,6 @@ --- +bookCollapseSection: false bookFlatSection: true -title: "Release notes" -weight: 2 ---- \ No newline at end of file +title: "Release Notes" +weight: 3 +--- diff --git a/docs/content/release_notes/csit_rls2306/_index.md b/docs/content/release_notes/csit_rls2306/_index.md new file mode 100644 index 0000000000..27abbb79a6 --- /dev/null +++ b/docs/content/release_notes/csit_rls2306/_index.md @@ -0,0 +1,6 @@ +--- +bookCollapseSection: true +bookFlatSection: false +title: "CSIT rls2306" +weight: 1 +--- diff --git a/docs/content/release_notes/csit_rls2306/dpdk_performance.md b/docs/content/release_notes/csit_rls2306/dpdk_performance.md new file mode 100644 index 0000000000..3d3172c7c9 --- /dev/null +++ b/docs/content/release_notes/csit_rls2306/dpdk_performance.md @@ -0,0 +1,27 @@ +--- +title: "DPDK Performance" +weight: 2 +--- + +# CSIT 23.06 - DPDK Performance + +1. TEST FRAMEWORK +2. DPDK PERFORMANCE TESTS +3. DPDK RELEASE VERSION CHANGE + +# Known Issues + +List of known issues in CSIT 23.06 for DPDK performance tests: + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | | + + +## New + +List of new issues in CSIT 23.06 for DPDK performance tests: + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | | diff --git a/docs/content/release_notes/csit_rls2306/trex_performance.md b/docs/content/release_notes/csit_rls2306/trex_performance.md new file mode 100644 index 0000000000..02f7c68102 --- /dev/null +++ b/docs/content/release_notes/csit_rls2306/trex_performance.md @@ -0,0 +1,24 @@ +--- +title: "TRex Performance" +weight: 3 +--- + +# CSIT 23.06 - TRex Performance + +1. TEST FRAMEWORK + +# Known Issues + +List of known issues in CSIT 23.06 for TRex performance tests + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | | + +## New + +List of new issues in CSIT 23.06 for TRex performance tests: + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | | diff --git a/docs/content/release_notes/csit_rls2306/vpp_device.md b/docs/content/release_notes/csit_rls2306/vpp_device.md new file mode 100644 index 0000000000..c5d544b598 --- /dev/null +++ b/docs/content/release_notes/csit_rls2306/vpp_device.md @@ -0,0 +1,24 @@ +--- +title: "VPP Device" +weight: 4 +--- + +# CSIT 23.06 - VPP Device + +1. TEST FRAMEWORK + +# Known Issues + +List of known issues in CSIT 23.06 for VPP functional tests in VPP Device: + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | | + +## New + +List of new issues in CSIT 23.06 for VPP functional tests in VPP Device: + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | | diff --git a/docs/content/release_notes/csit_rls2306/vpp_performance.md b/docs/content/release_notes/csit_rls2306/vpp_performance.md new file mode 100644 index 0000000000..686420fc0f --- /dev/null +++ b/docs/content/release_notes/csit_rls2306/vpp_performance.md @@ -0,0 +1,42 @@ +--- +title: "VPP Performance" +weight: 1 +--- + +# CSIT 23.06 - VPP Performance + +1. VPP PERFORMANCE TESTS +2. TEST FRAMEWORK +3. PRESENTATION AND ANALYTICS LAYER + +# Known Issues + +## New + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | | + +## Previous + +Issues reported in previous releases which still affect the current results. + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | | + +## Fixed + +Issues reported in previous releases which were fixed in this release: + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- +1 | | + +# Root Cause Analysis for Performance Changes + +List of RCAs in CSIT 23.06 for VPP performance changes: + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | | diff --git a/docs/content/release_notes/dpdk.md b/docs/content/release_notes/dpdk.md deleted file mode 100644 index facefe4b23..0000000000 --- a/docs/content/release_notes/dpdk.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -title: "DPDK Performance" -weight: 2 ---- - -# Changes in {{< release_csit >}} - -1. TEST FRAMEWORK - - **CSIT test environment** version has been updated to ver. 11, see - [Environment Versioning]({{< ref "infrastructure#Release Notes" >}}). -2. DPDK PERFORMANCE TESTS - - No updates -3. DPDK RELEASE VERSION CHANGE - - {{< release_csit >}} tested {{< release_dpdk >}}, as used by - {{< release_vpp >}}. - -# Known Issues - -List of known issues in {{< release_csit >}} for DPDK performance tests: - - **#** | **JiraID** | **Issue Description** --------|--------------------------------------------------|--------------------------------------------------------------------------- - 1 | [CSIT-1848](https://jira.fd.io/browse/CSIT-1848) | 2n-clx, 3n-alt: sporadic testpmd/l3fwd tests fail with no or low traffic. - - -## New - -List of new issues in {{< release_csit >}} for DPDK performance tests: - - **#** | **JiraID** | **Issue Description** --------|--------------------------------------------------|--------------------------------------------------------------------------- \ No newline at end of file diff --git a/docs/content/release_notes/previous/_index.md b/docs/content/release_notes/previous/_index.md new file mode 100644 index 0000000000..40716f8315 --- /dev/null +++ b/docs/content/release_notes/previous/_index.md @@ -0,0 +1,6 @@ +--- +bookCollapseSection: true +bookFlatSection: false +title: "Previous" +weight: 2 +--- diff --git a/docs/content/release_notes/previous/csit_rls2302/_index.md b/docs/content/release_notes/previous/csit_rls2302/_index.md new file mode 100644 index 0000000000..aac03a946d --- /dev/null +++ b/docs/content/release_notes/previous/csit_rls2302/_index.md @@ -0,0 +1,6 @@ +--- +bookCollapseSection: false +bookFlatSection: false +title: "CSIT rls2302" +weight: 1 +--- diff --git a/docs/content/release_notes/previous/csit_rls2302/dpdk_performance.md b/docs/content/release_notes/previous/csit_rls2302/dpdk_performance.md new file mode 100644 index 0000000000..320dccf746 --- /dev/null +++ b/docs/content/release_notes/previous/csit_rls2302/dpdk_performance.md @@ -0,0 +1,31 @@ +--- +title: "DPDK Performance" +weight: 2 +--- + +# CSIT 23.02 - DPDK Performance + +1. TEST FRAMEWORK + - **CSIT test environment** version has been updated to ver. 11, see + [Environment Versioning]({{< ref "../../../infrastructure/fdio_csit_testbed_versioning" >}}). +2. DPDK PERFORMANCE TESTS + - No updates +3. DPDK RELEASE VERSION CHANGE + - CSIT 23.02 tested DPDK 22.07, as used by VPP 23.02. + +# Known Issues + +List of known issues in CSIT 23.02 for DPDK performance tests: + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | [CSIT-1848](https://jira.fd.io/browse/CSIT-1848) | 2n-clx, 3n-alt: sporadic testpmd/l3fwd tests fail with no or low traffic. + + +## New + +List of new issues in {{< release_csit >}} for DPDK performance tests: + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | | diff --git a/docs/content/release_notes/previous/csit_rls2302/trex_performance.md b/docs/content/release_notes/previous/csit_rls2302/trex_performance.md new file mode 100644 index 0000000000..67f2947891 --- /dev/null +++ b/docs/content/release_notes/previous/csit_rls2302/trex_performance.md @@ -0,0 +1,26 @@ +--- +title: "TRex Performance" +weight: 3 +--- + +# CSIT 23.02 - TRex Performance + +1. TEST FRAMEWORK + - **CSIT test environment** version has been updated to ver. 11, see + [Environment Versioning]({{< ref "../../../infrastructure/fdio_csit_testbed_versioning" >}}). + +# Known Issues + +List of known issues in CSIT 23.02 for TRex performance tests + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | [CSIT-1876](https://jira.fd.io/browse/CSIT-1876) | 1n-aws: TRex NDR PDR ALL IP4 scale and L2 scale tests failing with 50% packet loss. CSIT removed ip4scale and l2scale except ip4scale2m where it's still failing. + +## New + +List of new issues in CSIT 23.02 for TRex performance tests: + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | | diff --git a/docs/content/release_notes/previous/csit_rls2302/vpp_device.md b/docs/content/release_notes/previous/csit_rls2302/vpp_device.md new file mode 100644 index 0000000000..44ba9f5ce5 --- /dev/null +++ b/docs/content/release_notes/previous/csit_rls2302/vpp_device.md @@ -0,0 +1,26 @@ +--- +title: "VPP Device" +weight: 4 +--- + +# CSIT 23.02 - VPP Device + +1. TEST FRAMEWORK + - **CSIT test environment** version has been updated to ver. 11, see + [Environment Versioning]({{< ref "../../../infrastructure/fdio_csit_testbed_versioning" >}}). + +# Known Issues + +List of known issues in CSIT 23.02 for VPP functional tests in VPP Device: + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | | + +## New + +List of new issues in CSIT 23.02 for VPP functional tests in VPP Device: + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | | diff --git a/docs/content/release_notes/previous/csit_rls2302/vpp_performance.md b/docs/content/release_notes/previous/csit_rls2302/vpp_performance.md new file mode 100644 index 0000000000..072c55f14e --- /dev/null +++ b/docs/content/release_notes/previous/csit_rls2302/vpp_performance.md @@ -0,0 +1,93 @@ +--- +title: "VPP Performance" +weight: 1 +--- + +# CSIT 23.02 - VPP Performance + +1. VPP PERFORMANCE TESTS + - **Enhanced and added VPP hoststack tests** to daily and weekly + trending including: Quic VPP Echo, UDP+TCP LD_PRELOAD iPerf3, + LD_PRELOAD NGINX. + - **Added Nvidia/Mellanox DPDK tests** to daily and weekly trending + and report, in addition to RDMA_CORE ones that were already + there. + - **Jumbo frames tests** got fixed and re-added number of to report + coverage tests. + - **Intel Xeon SKX performance testbeds** got decommissioned and + removed from FD.io performance lab. +2. TEST FRAMEWORK + - **CSIT test environment** version has not changed from ver. 11 used + in previous release, see + [Environment Versioning]({{< ref "../../../infrastructure/fdio_csit_testbed_versioning" >}}). + - **CSIT PAPI optimizations for scale** got applied improving PAPI + programming speed especially for large scale tests. VAT has been + now completely deprecated from CSIT. + - **General Code Housekeeping**: Ongoing code optimizations and bug + fixes. +3. PRESENTATION AND ANALYTICS LAYER + - [Performance dashboard](https://csit.fd.io/) got updated with + addition of VPP telemetry trending across all VPP tests. A number + of code and AWS resource usage optimizations got applied to the + data processing pipeline and UI frontend and backend. + - Examples of release iterative data visualisation: + - [Packet throughput 2n-icx-e810cq-ip4-base-scale-pdr](https://csit.fd.io/report/#eNrdVcluwjAQ_Zr0ggbZDml64QDkP5BxhhJlwYxNVPr1OAhpYiGO7cEHb3pv1qeRnT8T7h1266zYZuU2U2VThy3LN4twUOdULhSM1oLKl-FG2KF2CGqAxvyAFOIblZX4JYW5gB6P0NgVfK4OIA2gP02vsA6Tja1pcq12T9cvcRitr57RED1CRiQGo7SYZk-3GeddsszXhJoNQsYMeXSzZOKamHUk3aNrfpGpoQuMm9BohqSJ_fubnaHPRpXVg_F3qjijO1RCtEBDnZo8UXFJ6NQmKlGbgjp9ujPU_8cEFdXHcKb-8Q8V1R2PI8PX) + - [Speedup Multi-Core throughput graph for 2n-icx-e810cq-ip4-base-pdr](https://csit.fd.io/report/#eNrtlM8OgjAMxp8GL6aGFRAvHlTew8xRhAR1bpOoT-8wJIUYEg8mXjjsX35fu65fMusuhvaW6nWQbIN0G2Ba5X4Kos3cL6a2GIUIjdaA0cLvDNUkLQGeoVJ3EGF4JNSCViJUV5BNAZWOYRkfQCggV7YnPw5tjM5Nmxp3XeqPe5jmN8fU3z4gDRmGg7JYpstHTzNWLOulIckBvmJGjmyvmOGbWFUYeSJbPYmlvgvMlW80I6GG-d1D92jXqDR7K37qCk6ujLuC_3IlnlwZdyX-0pUkm50v5vT-yZLsBXP6Swk>) + - [MRR, NDR and PDR comparison for 2n-icx-e810cq-ip4-base](https://csit.fd.io/report/#eNrtVMsOgjAQ_Bq8mDW0gHjxoPIfppZVSQDrthLx6y2GuBBj4kVPHvrKzG6nM0mtOxFuLZbLIFkH6TqQaZH7KYhWU79QaWUUSmiMARnN_I6wRGURZA2FvoIIwwNKI3AhQn0G1eyhMDHM4x0IDeiO3cmPXVdTEXWt5aZv_XIPo_nFMepvHyENEoMjWUwzx3bAeSeW-YpQcYFXzJBDOxAzfhOz9qQqtMUNmepdYFx7oxkSetzftWaA9kal2YPx5VTq_J_KR6n0Rv0mFfNP5bNUzDOVJJvUJ6oeP1mS3QG2H0sT>) + - [Normalized throughput architecture comparison for 2n-[icx|clx]-e810cq-ip4-base-pdr](https://csit.fd.io/report/#eNrVk00OgjAQhU-DGzOGFhA3LlTuYUoZhKRibSsRT28hJANRF-500b98rzOvM6l1F4NHi2obJPsg3Qc8rQs_BdFu6RejLI9CDq3WwKOV3xlUKCwCb0CqO7AwPCHXDDcslFcQbQm1jmEd58AkoKv6kx95f0cXpg_ND2PolzxEi5sj6rPPSIuG4MwWyXTVTTSfzJJeGBR0wTsm5NBOzMzfRKrSiDPa-oEk9VUgLn2hCTE5j-86PaFjodJsUHzXlVr-UVfem_35riTZormY8_BneNpvhRpzJNkT6FzkMw>) + - [NICs comparison for 2n-icx-ip4-base-pdr](https://csit.fd.io/report/#eNrll99ugyAUh5_G3SxnESx1N7to53s0FI6rmbYMnKF7-qFrcmRmV7vReuG__A74wSckuvZi8eCwfknEPsn3Cc8rHU5JtnsMF1s7nqUcOmOAZ0_hzmKN0iHwM6jaA0vTN-SGKS_EVkJTewGV2cB2cwSmANtT_xSOY9_IaNv3zV9vfU9eRKn-bCkNr4-SDi2FEReVmdN1VPMnLTWQFiW1CMgUtehGNPGgqKq0skFXfSGVhmmgXIWppoipuP_2akbpbabyYqj4txerG7kcLz3tnXvBZ5aqD5BduQAtBLsOK9ro9-Vo6Wnv1sswUJ-zdPZLJSJdgY_ZL5IY9U6NcPEzTN8NX14JXpsZW_mNewi46zAz691rwroKJzPfwaaws7ciiofzxTbDv6QovgETwNPp>) + +# Known Issues + +Editing Note: below listed known issues need to be updated to reflect the +current state as tracked on +[CSIT TestFailuresTracking wiki](https://wiki.fd.io/view/CSIT/TestFailuresTracking). + +## New + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | [CSIT-1890](https://jira.fd.io/browse/CSIT-1890) | 3n-alt: Tests failing until 40Ge Interface comes up. + +## Previous + +Issues reported in previous releases which still affect the current results. + +**#** | **JiraID** | **Issue Description** +------|-------------------------------------------------------------------------------------------------|--------------- + 1 | [CSIT-1782](https://jira.fd.io/browse/CSIT-1782) | Multicore AVF tests are failing when trying to create interface. Frequency is reduced by CSIT workaround, but occasional failures do still happen. + 2 | [CSIT-1785](https://jira.fd.io/browse/CSIT-1785) [VPP-1972](https://jira.fd.io/browse/VPP-1972) | NAT44ED tests failing to establish all TCP sessions. At least for max scale, in allotted time (limited by session 500s timeout) due to worse slow path performance than previously measured and calibrated for. CSIT removed the max scale NAT tests to avoid this issue. + 3 | [CSIT-1799](https://jira.fd.io/browse/CSIT-1799) | All NAT44-ED 16M sessions CPS scale tests fail while setting NAT44 address range. + 4 | [CSIT-1800](https://jira.fd.io/browse/CSIT-1800) | All Geneve L3 mode scale tests (1024 tunnels) are failing. + 5 | [CSIT-1801](https://jira.fd.io/browse/CSIT-1801) | 9000B payload frames not forwarded over tunnels due to violating supported Max Frame Size (VxLAN, LISP, + 6 | [CSIT-1802](https://jira.fd.io/browse/CSIT-1802) | all testbeds: AF-XDP - NDR tests failing from time to time. + 7 | [CSIT-1804](https://jira.fd.io/browse/CSIT-1804) | All testbeds: NDR tests failing from time to time. + 8 | [CSIT-1808](https://jira.fd.io/browse/CSIT-1808) | All tests with 9000B payload frames not forwarded over memif interfaces. + 9 | [CSIT-1827](https://jira.fd.io/browse/CSIT-1827) | 3n-icx, 3n-skx: all AVF crypto tests sporadically fail. 1518B with no traffic, IMIX with excessive + 10 | [CSIT-1835](https://jira.fd.io/browse/CSIT-1835) | 3n-icx: QUIC vppecho BPS tests failing on timeout when checking hoststack finished. + 11 | [CSIT-1849](https://jira.fd.io/browse/CSIT-1849) | 2n-skx, 2n-clx, 2n-icx: UDP 16m TPUT tests fail to create all sessions. + 12 | [CSIT-1864](https://jira.fd.io/browse/CSIT-1864) | 2n-clx: half of the packets lost on PDR tests. + 13 | [CSIT-1877](https://jira.fd.io/browse/CSIT-1877) | 3n-tsh: all VM tests failing to boot VM. + 14 | [CSIT-1883](https://jira.fd.io/browse/CSIT-1883) | 3n-snr: All hwasync wireguard tests failing when trying to verify device. + 15 | [CSIT-1884](https://jira.fd.io/browse/CSIT-1884) | 2n-clx, 2n-icx: All NAT44DET NDR PDR IMIX over 1M sessions BIDIR tests failing to create enough sessions. + 16 | [CSIT-1885](https://jira.fd.io/browse/CSIT-1885) | 3n-icx: 9000b ip4 ip6 l2 NDRPDR AVF tests are failing to forward traffic. + 17 | [CSIT-1886](https://jira.fd.io/browse/CSIT-1886) | 3n-icx: Wireguard tests with 100 and more tunnels are failing PDR criteria. + +## Fixed + +Issues reported in previous releases which were fixed in this release: + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | [CSIT-1868](https://jira.fd.io/browse/CSIT-1868) | 2n-clx: ALL ldpreload-nginx tests fails when trying to start nginx. + 2 | [CSIT-1871](https://jira.fd.io/browse/CSIT-1871) | 3n-snr: 25GE interface between SUT and TG/TRex goes down randomly. + +# Root Cause Analysis for Performance Changes + +List of RCAs in CSIT 23.02 for VPP performance changes: + +**#** | **JiraID** | **Issue Description** +------|--------------------------------------------------|-------------------------------------------------------------- + 1 | [CSIT-1887](https://jira.fd.io/browse/CSIT-1887) | rls2210 RCA: ASTF tests TRex upgrade decreased TRex performance. NAT results not affected, except on Denverton due to interference from VPP-2010. + 2 | [CSIT-1888](https://jira.fd.io/browse/CSIT-1888) | rls2210 RCA: testbed differences, especially for ipsec. Not caused by VPP code nor CSIT code. Most probable cause is clang-14 behavior. + 3 | [CSIT-1889](https://jira.fd.io/browse/CSIT-1889) | rls2210 RCA: policy-outbound-nocrypto. When VPP added spd fast path matching (Gerrit 36097), it decreased MRR of the corresponding tests, at least on 3-alt. diff --git a/docs/content/release_notes/trex.md b/docs/content/release_notes/trex.md deleted file mode 100644 index 3794dc159c..0000000000 --- a/docs/content/release_notes/trex.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -title: "TRex Performance" -weight: 3 ---- - -# Changes in {{< release_csit >}} - -1. TEST FRAMEWORK - - **CSIT test environment** version has been updated to ver. 11, see - [Environment Versioning]({{< ref "infrastructure#Release Notes" >}}). - -# Known Issues - -List of known issues in {{< release_csit >}} for TRex performance tests - - **#** | **JiraID** | **Issue Description** --------|--------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------- - 1 | [CSIT-1876](https://jira.fd.io/browse/CSIT-1876) | 1n-aws: TRex NDR PDR ALL IP4 scale and L2 scale tests failing with 50% packet loss. CSIT removed ip4scale and l2scale except ip4scale2m where it's still failing. - - -## New - -List of new issues in {{< release_csit >}} for TRex performance tests: - - **#** | **JiraID** | **Issue Description** --------|--------------------------------------------------|--------------------------------------------------------------------------- \ No newline at end of file diff --git a/docs/content/release_notes/vpp.md b/docs/content/release_notes/vpp.md deleted file mode 100644 index 48805ba574..0000000000 --- a/docs/content/release_notes/vpp.md +++ /dev/null @@ -1,95 +0,0 @@ ---- -title: "VPP Performance" -weight: 1 ---- - -# Changes in {{< release_csit >}} - -1. VPP PERFORMANCE TESTS - - **Enhanced and added VPP hoststack tests** to daily and weekly - trending including: Quic VPP Echo, UDP+TCP LD_PRELOAD iPerf3, - LD_PRELOAD NGINX. - - **Added Nvidia/Mellanox DPDK tests** to daily and weekly trending - and report, in addition to RDMA_CORE ones that were already - there. - - **Jumbo frames tests** got fixed and re-added number of to report - coverage tests. - - **Intel Xeon SKX performance testbeds** got decommissioned and - removed from FD.io performance lab. -2. TEST FRAMEWORK - - **CSIT test environment** version has not changed from ver. 11 used - in previous release, see - [Environment Versioning]({{< ref "infrastructure#Release Notes" >}}). - - **CSIT PAPI optimizations for scale** got applied improving PAPI - programming speed especially for large scale tests. VAT has been - now completely deprecated from CSIT. - - **General Code Housekeeping**: Ongoing code optimizations and bug - fixes. -3. PRESENTATION AND ANALYTICS LAYER - - [Performance dashboard](https://csit.fd.io/) got updated with - addition of VPP telemetry trending across all VPP tests. A number - of code and AWS resource usage optimizations got applied to the - data processing pipeline and UI frontend and backend. - - Examples of release iterative data visualisation: - - - [Packet throughput 2n-icx-e810cq-ip4-base-scale-pdr](https://csit.fd.io/report/#eNrdVcluwjAQ_Zr0ggbZDml64QDkP5BxhhJlwYxNVPr1OAhpYiGO7cEHb3pv1qeRnT8T7h1266zYZuU2U2VThy3LN4twUOdULhSM1oLKl-FG2KF2CGqAxvyAFOIblZX4JYW5gB6P0NgVfK4OIA2gP02vsA6Tja1pcq12T9cvcRitr57RED1CRiQGo7SYZk-3GeddsszXhJoNQsYMeXSzZOKamHUk3aNrfpGpoQuMm9BohqSJ_fubnaHPRpXVg_F3qjijO1RCtEBDnZo8UXFJ6NQmKlGbgjp9ujPU_8cEFdXHcKb-8Q8V1R2PI8PX) - - [Speedup Multi-Core throughput graph for 2n-icx-e810cq-ip4-base-pdr](https://csit.fd.io/report/#eNrtlM8OgjAMxp8GL6aGFRAvHlTew8xRhAR1bpOoT-8wJIUYEg8mXjjsX35fu65fMusuhvaW6nWQbIN0G2Ba5X4Kos3cL6a2GIUIjdaA0cLvDNUkLQGeoVJ3EGF4JNSCViJUV5BNAZWOYRkfQCggV7YnPw5tjM5Nmxp3XeqPe5jmN8fU3z4gDRmGg7JYpstHTzNWLOulIckBvmJGjmyvmOGbWFUYeSJbPYmlvgvMlW80I6GG-d1D92jXqDR7K37qCk6ujLuC_3IlnlwZdyX-0pUkm50v5vT-yZLsBXP6Swk>) - - [MRR, NDR and PDR comparison for 2n-icx-e810cq-ip4-base](https://csit.fd.io/report/#eNrtVMsOgjAQ_Bq8mDW0gHjxoPIfppZVSQDrthLx6y2GuBBj4kVPHvrKzG6nM0mtOxFuLZbLIFkH6TqQaZH7KYhWU79QaWUUSmiMARnN_I6wRGURZA2FvoIIwwNKI3AhQn0G1eyhMDHM4x0IDeiO3cmPXVdTEXWt5aZv_XIPo_nFMepvHyENEoMjWUwzx3bAeSeW-YpQcYFXzJBDOxAzfhOz9qQqtMUNmepdYFx7oxkSetzftWaA9kal2YPx5VTq_J_KR6n0Rv0mFfNP5bNUzDOVJJvUJ6oeP1mS3QG2H0sT>) - - [Normalized throughput architecture comparison for 2n-[icx|clx]-e810cq-ip4-base-pdr](https://csit.fd.io/report/#eNrVk00OgjAQhU-DGzOGFhA3LlTuYUoZhKRibSsRT28hJANRF-500b98rzOvM6l1F4NHi2obJPsg3Qc8rQs_BdFu6RejLI9CDq3WwKOV3xlUKCwCb0CqO7AwPCHXDDcslFcQbQm1jmEd58AkoKv6kx95f0cXpg_ND2PolzxEi5sj6rPPSIuG4MwWyXTVTTSfzJJeGBR0wTsm5NBOzMzfRKrSiDPa-oEk9VUgLn2hCTE5j-86PaFjodJsUHzXlVr-UVfem_35riTZormY8_BneNpvhRpzJNkT6FzkMw>) - - [NICs comparison for 2n-icx-ip4-base-pdr](https://csit.fd.io/report/#eNrll99ugyAUh5_G3SxnESx1N7to53s0FI6rmbYMnKF7-qFrcmRmV7vReuG__A74wSckuvZi8eCwfknEPsn3Cc8rHU5JtnsMF1s7nqUcOmOAZ0_hzmKN0iHwM6jaA0vTN-SGKS_EVkJTewGV2cB2cwSmANtT_xSOY9_IaNv3zV9vfU9eRKn-bCkNr4-SDi2FEReVmdN1VPMnLTWQFiW1CMgUtehGNPGgqKq0skFXfSGVhmmgXIWppoipuP_2akbpbabyYqj4txerG7kcLz3tnXvBZ5aqD5BduQAtBLsOK9ro9-Vo6Wnv1sswUJ-zdPZLJSJdgY_ZL5IY9U6NcPEzTN8NX14JXpsZW_mNewi46zAz691rwroKJzPfwaaws7ciiofzxTbDv6QovgETwNPp>) - -# Known Issues - -Editing Note: below listed known issues need to be updated to reflect the -current state as tracked on -[CSIT TestFailuresTracking wiki](https://wiki.fd.io/view/CSIT/TestFailuresTracking). - -## New - - **#** | **JiraID** | **Issue Description** --------|--------------------------------------------------|------------------------------------------------------ - 1 | [CSIT-1890](https://jira.fd.io/browse/CSIT-1890) | 3n-alt: Tests failing until 40Ge Interface comes up. - - -## Previous - -Issues reported in previous releases which still affect the current results. - - **#** | **JiraID** | **Issue Description** --------|-------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - 1 | [CSIT-1782](https://jira.fd.io/browse/CSIT-1782) | Multicore AVF tests are failing when trying to create interface. Frequency is reduced by CSIT workaround, but occasional failures do still happen. - 2 | [CSIT-1785](https://jira.fd.io/browse/CSIT-1785) [VPP-1972](https://jira.fd.io/browse/VPP-1972) | NAT44ED tests failing to establish all TCP sessions. At least for max scale, in allotted time (limited by session 500s timeout) due to worse slow path performance than previously measured and calibrated for. CSIT removed the max scale NAT tests to avoid this issue. - 3 | [CSIT-1799](https://jira.fd.io/browse/CSIT-1799) | All NAT44-ED 16M sessions CPS scale tests fail while setting NAT44 address range. - 4 | [CSIT-1800](https://jira.fd.io/browse/CSIT-1800) | All Geneve L3 mode scale tests (1024 tunnels) are failing. - 5 | [CSIT-1801](https://jira.fd.io/browse/CSIT-1801) | 9000B payload frames not forwarded over tunnels due to violating supported Max Frame Size (VxLAN, LISP, - 6 | [CSIT-1802](https://jira.fd.io/browse/CSIT-1802) | all testbeds: AF-XDP - NDR tests failing from time to time. - 7 | [CSIT-1804](https://jira.fd.io/browse/CSIT-1804) | All testbeds: NDR tests failing from time to time. - 8 | [CSIT-1808](https://jira.fd.io/browse/CSIT-1808) | All tests with 9000B payload frames not forwarded over memif interfaces. - 9 | [CSIT-1827](https://jira.fd.io/browse/CSIT-1827) | 3n-icx, 3n-skx: all AVF crypto tests sporadically fail. 1518B with no traffic, IMIX with excessive - 10 | [CSIT-1835](https://jira.fd.io/browse/CSIT-1835) | 3n-icx: QUIC vppecho BPS tests failing on timeout when checking hoststack finished. - 11 | [CSIT-1849](https://jira.fd.io/browse/CSIT-1849) | 2n-skx, 2n-clx, 2n-icx: UDP 16m TPUT tests fail to create all sessions. - 12 | [CSIT-1864](https://jira.fd.io/browse/CSIT-1864) | 2n-clx: half of the packets lost on PDR tests. - 13 | [CSIT-1877](https://jira.fd.io/browse/CSIT-1877) | 3n-tsh: all VM tests failing to boot VM. - 14 | [CSIT-1883](https://jira.fd.io/browse/CSIT-1883) | 3n-snr: All hwasync wireguard tests failing when trying to verify device. - 15 | [CSIT-1884](https://jira.fd.io/browse/CSIT-1884) | 2n-clx, 2n-icx: All NAT44DET NDR PDR IMIX over 1M sessions BIDIR tests failing to create enough sessions. - 16 | [CSIT-1885](https://jira.fd.io/browse/CSIT-1885) | 3n-icx: 9000b ip4 ip6 l2 NDRPDR AVF tests are failing to forward traffic. - 17 | [CSIT-1886](https://jira.fd.io/browse/CSIT-1886) | 3n-icx: Wireguard tests with 100 and more tunnels are failing PDR criteria. - -## Fixed - -Issues reported in previous releases which were fixed in this release: - - **#** | **JiraID** | **Issue Description** --------|--------------------------------------------------|--------------------------------------------------------------------- - 1 | [CSIT-1868](https://jira.fd.io/browse/CSIT-1868) | 2n-clx: ALL ldpreload-nginx tests fails when trying to start nginx. - 2 | [CSIT-1871](https://jira.fd.io/browse/CSIT-1871) | 3n-snr: 25GE interface between SUT and TG/TRex goes down randomly. - -# Root Cause Analysis for Performance Changes - -List of RCAs in {{< release_csit >}} for VPP performance changes: - - **#** | **JiraID** | **Issue Description** --------|--------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------- - 1 | [CSIT-1887](https://jira.fd.io/browse/CSIT-1887) | rls2210 RCA: ASTF tests TRex upgrade decreased TRex performance. NAT results not affected, except on Denverton due to interference from VPP-2010. - 2 | [CSIT-1888](https://jira.fd.io/browse/CSIT-1888) | rls2210 RCA: testbed differences, especially for ipsec. Not caused by VPP code nor CSIT code. Most probable cause is clang-14 behavior. - 3 | [CSIT-1889](https://jira.fd.io/browse/CSIT-1889) | rls2210 RCA: policy-outbound-nocrypto. When VPP added spd fast path matching (Gerrit 36097), it decreased MRR of the corresponding tests, at least on 3-alt. diff --git a/docs/content/release_notes/vpp_device.md b/docs/content/release_notes/vpp_device.md deleted file mode 100644 index 2f1f6d34b5..0000000000 --- a/docs/content/release_notes/vpp_device.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -title: "VPP Device" -weight: 4 ---- - -# Changes in {{< release_csit >}} - -1. TEST FRAMEWORK - - **CSIT test environment** version has been updated to ver. 11, see - [Environment Versioning]({{< ref "infrastructure#Release Notes" >}}). - -# Known Issues - -List of known issues in {{< release_csit >}} for VPP functional tests in VPP Device: - - **#** | **JiraID** | **Issue Description** --------|--------------------------------------------------|--------------------------------------------------------------------------- - -## New - -List of new issues in {{< release_csit >}} for VPP functional tests in VPP Device: - - **#** | **JiraID** | **Issue Description** --------|--------------------------------------------------|--------------------------------------------------------------------------- \ No newline at end of file -- cgit 1.2.3-korg