From 5bd1b14228f73af4c50412ebbc02cf5d6515c0a3 Mon Sep 17 00:00:00 2001 From: Ido Barnea Date: Tue, 7 Feb 2017 14:41:58 +0200 Subject: VF interfaces documentation and benchmark Signed-off-by: Ido Barnea --- doc/trex_book.asciidoc | 60 ++++++++++++++++++++++++++ doc/trex_vm_bench.asciidoc | 102 +++++++++++++++++++++++++++++++++++++++++++++ doc/ws_main.py | 3 ++ 3 files changed, 165 insertions(+) create mode 100644 doc/trex_vm_bench.asciidoc diff --git a/doc/trex_book.asciidoc b/doc/trex_book.asciidoc index e2c2e5a2..3179f8c0 100755 --- a/doc/trex_book.asciidoc +++ b/doc/trex_book.asciidoc @@ -127,6 +127,7 @@ TRex curretly works on x86 architecture and can operate well on Cisco UCS hardwa | Chipset | Bandwidth (Gb/sec) | Example | Intel I350 | 1 | Intel 4x1GE 350-T4 NIC | Intel 82599 | 10 | Cisco part ID:N2XX-AIPCI01 Intel x520-D2, Intel X520 Dual Port 10Gb SFP+ Adapter +| Intel 82599 VF | x | | Intel X710 | 10 | Cisco part ID:UCSC-PCIE-IQ10GF link:https://en.wikipedia.org/wiki/Small_form-factor_pluggable_transceiver[SFP+], *Preferred* support per stream stats in hardware link:http://www.silicom-usa.com/PE310G4i71L_Quad_Port_Fiber_SFP+_10_Gigabit_Ethernet_PCI_Express_Server_Adapter_49[Silicom PE310G4i71L] | Intel XL710 | 40 | Cisco part ID:UCSC-PCIE-ID40GF, link:https://en.wikipedia.org/wiki/QSFP[QSFP+] (copper/optical) | Intel XL710/X710 VF | x | @@ -2144,6 +2145,65 @@ sudo arp -s 10.0.0.100 sudo arp -s 172.168.0.100 /dev/null 2>&1 +exec /sbin/modprobe i40e >/dev/null 2>&1 +---- + +==== x710 specific instructions +For x710 (i40e driver), we needed to download latest kernel driver. On all distributions we were using, existing driver was not new enough. + +To make the system use your new compiled driver with the correct parameters: + +Copy the .ko file to /lib/modules/Your kernel version as seen by uname -r/kernel/drivers/net/ethernet/intel/i40e/i40e.ko + + +==== 82599 specific instructions +In order to make VF interfaces work correctly, we had to increase mtu on related PF interfaces. + +For example, if you run with max_vfs=1,1 (one VF per PF), you will have something like this: + +[source,bash] +---- +sudo ./dpdk_nic_bind.py -s +Network devices using DPDK-compatible driver +============================================ +0000:03:10.0 '82599 Ethernet Controller Virtual Function' drv=igb_uio unused= +0000:03:10.1 '82599 Ethernet Controller Virtual Function' drv=igb_uio unused= + +Network devices using kernel driver +=================================== +0000:01:00.0 'I350 Gigabit Network Connection' if=eth0 drv=igb unused=igb_uio *Active* +0000:03:00.0 '82599ES 10-Gigabit SFI/SFP+ Network Connection' if=eth2 drv=ixgbe unused=igb_uio +0000:03:00.1 '82599ES 10-Gigabit SFI/SFP+ Network Connection' if=eth3 drv=ixgbe unused=igb_uio +---- + +In order to work with 0000:03:10.0 and 0000:03:10.1, you will have to run the following + +[source,bash] +---- +sudo ifconfig eth3 up mtu 9000 +sudo ifconfig eth2 up mtu 9000 +---- + +==== Performance +See the performance tests we did link:trex_vm_bench.html[here] === Mellanox ConnectX-4 support diff --git a/doc/trex_vm_bench.asciidoc b/doc/trex_vm_bench.asciidoc new file mode 100644 index 00000000..6084fda3 --- /dev/null +++ b/doc/trex_vm_bench.asciidoc @@ -0,0 +1,102 @@ +TRex VM benchmark howto +======================= +:email: trex.tgen@gmail.com +:quotes.++: +:numbered: +:web_server_url: https://trex-tgn.cisco.com/trex +:local_web_server_url: csi-wiki-01:8181/trex +:toclevels: 6 +:tabledef-default.subs: normal,callouts + +include::trex_ga.asciidoc[] + +// PDF version - image width variable +ifdef::backend-docbook[] +:p_width: 450 +endif::backend-docbook[] + +// HTML version - image width variable +ifdef::backend-xhtml11[] +:p_width: 800 +endif::backend-xhtml11[] + + +== Purpose of this document + +The purpose of this document is to describe the performance of TRex on virtual machines with virtual NICs, and on VF interfaces +Test setup and methodology are described, so users can repeat the test. + +== Test setup + +All tests were done by connecting two ports in loopback. + +For the purpose of the test, TRex server is run with ``-c 1'' command line option. This +makes TRex use one core for TX (in addition to 1 core for RX, and one core for control). + +=== Setup details + +[cols="1,5"] +|================= +| Server: | UCSC-C240-M4SX +| CPU: | 2 x Intel(R) Xeon(R) CPU E5-2667 v3 @ 3.20GHz +| RAM: | 65536 @ 2133 MHz +| OS: | Fedora 18 for all tests, except the X710 which was done on Centos 6. x710/82599 tests where done on bare metal. For other NICs ESXI was used. +| Switch: | Cisco Nexus 3172 Chassis, System version: 6.0(2)U5(2). +| TRex: | v2.16 with patches for using dpdk 1702 (will get into v2.17) +|================= + +=== Topology + +Two ports connected in loopback. + +=== test commands +Run TRex stateless using: ``./t-rex-64 -i -c 1'' + +In stateless console (``trex-console'') we do the following tests: + +var1: start -f stl/bench.py -t size=64,vm=var1 -m --port 0 --force + +cached: start -f stl/bench.py -t size=64,vm=cached -m --port 0 --force + +latency: start -f stl/udp_1pkt_src_ip_split_latency.py -m --port 0 --force + + +=== Results + +==== Throughput tests + +.64 bytes with 1 variable field engine +[cols="2,2^,2^,2^,2", options="header"] +|================= +| NIC/driver | Max PPS at NDR <1> | TX core CPU <2> | RX core CPU <3> | Max possible TX <4> +| i40evf | 10M | 85% | 75% | 11.76 +| ixgbevf | 9M | 98% | 63% | 9.18 +| vmxnet3 | 0.9M | 17% | 1.63% | 5.29 +| virtio | 0.28M | 5.3% | 0.3% | 5.28 +| e1000 | 0.7M | 19.4% | 1.45% | 3.6 +|================= + +<1> Maximum packets per second rate we can send until we see packet drops. +<2> TX CPU utilization at this point. +<3> RX CPU utilization at this point. +<4> Theoretical maximum packets per second with TX core at 100% (extrapolation from 1) + +.64 bytes with mbuf cache feature +[cols="2,2^,2^,2^,2", options="header"] +|================= +| NIC/driver | Max PPS at NDR | TX core CPU | RX core CPU | Max possible TX +| i40evf | 10M | 40% | 77% | 25 +| ixgbevf | 9M | 48% | 59% | 18.75 +| vmxnet3 | 0.9M | 8% | 1.7% | 11.25 +| virtio | 0.31M | 3.9% | 0.35% | 7.9 +| e1000 | 0.7M | 9.4% | 1.45% | 7.44 +|================= + + +==== Latency test + +.Latency test results +[cols="2,2^,2^,2^,2", options="header"] +|================= +| NIC/driver | Rate (pps) | Average latency (usec) | TX core | RX core CPU +| i40evf | 7M | 8 | 28.6% | 79% +| ixgbevf | 8.9M | 16 | 49% | 81.5% +| vmxnet3 | 0.9M | 80-120 with spikes | 8% | 2.15% +| virtio | 0.26M | 37-40 with spikes | 4% | 0.36% +| e1000 | 0.4M | 100 | 7.7% | 0.85% +|================= diff --git a/doc/ws_main.py b/doc/ws_main.py index 4b972a43..c07464ad 100755 --- a/doc/ws_main.py +++ b/doc/ws_main.py @@ -974,6 +974,9 @@ def build(bld): bld(rule=convert_to_html_toc_book, source='trex_vm_manual.asciidoc waf.css', target='trex_vm_manual.html',scan=ascii_doc_scan) + bld(rule=convert_to_html_toc_book, + source='trex_vm_bench.asciidoc waf.css', target='trex_vm_bench.html',scan=ascii_doc_scan) + bld(rule=convert_to_html_toc_book, source='trex_stateless.asciidoc waf.css', target='trex_stateless.html',scan=ascii_doc_scan); -- cgit 1.2.3-korg