aboutsummaryrefslogtreecommitdiffstats
path: root/docs/report/vpp_performance_tests
diff options
context:
space:
mode:
authorTibor Frank <tifrank@cisco.com>2018-05-04 13:41:43 +0200
committerTibor Frank <tifrank@cisco.com>2018-05-04 13:42:28 +0200
commit4bfd1dcd604dac12d1dd00ba63c0d5e4170a1f2b (patch)
tree82b4fffb72fd2771d3b77be606219371ba2b8dda /docs/report/vpp_performance_tests
parent202f199be94d87144384f3ea9c6d50e9766b0d73 (diff)
Report: Add TSA plots
Change-Id: Ibb8da96518848fef2fc5b0002a84391dea81c573 Signed-off-by: Tibor Frank <tifrank@cisco.com>
Diffstat (limited to 'docs/report/vpp_performance_tests')
-rw-r--r--docs/report/vpp_performance_tests/throughput_speedup_multi_core/container_memif.rst90
-rw-r--r--docs/report/vpp_performance_tests/throughput_speedup_multi_core/container_orchestrated.rst158
-rw-r--r--docs/report/vpp_performance_tests/throughput_speedup_multi_core/index.rst6
-rw-r--r--docs/report/vpp_performance_tests/throughput_speedup_multi_core/ip4_tunnels.rst90
-rw-r--r--docs/report/vpp_performance_tests/throughput_speedup_multi_core/ip6_tunnels.rst90
-rw-r--r--docs/report/vpp_performance_tests/throughput_speedup_multi_core/ipsec.rst95
-rw-r--r--docs/report/vpp_performance_tests/throughput_speedup_multi_core/vm_vhost.rst289
7 files changed, 818 insertions, 0 deletions
diff --git a/docs/report/vpp_performance_tests/throughput_speedup_multi_core/container_memif.rst b/docs/report/vpp_performance_tests/throughput_speedup_multi_core/container_memif.rst
new file mode 100644
index 0000000000..c95b3f57ee
--- /dev/null
+++ b/docs/report/vpp_performance_tests/throughput_speedup_multi_core/container_memif.rst
@@ -0,0 +1,90 @@
+Container memif Connections
+===========================
+
+Following sections include Throughput Speedup Analysis for VPP multi-
+core multi-thread configurations with no Hyper-Threading, specifically
+for tested 2t2c (2threads, 2cores) and 4t4c scenarios. 1t1c throughput
+results are used as a reference for reported speedup ratio.
+Performance is reported for VPP
+running in multiple configurations of VPP worker thread(s), a.k.a. VPP
+data plane thread(s), and their physical CPU core(s) placement.
+
+NDR Throughput
+--------------
+
+VPP NDR 64B packet throughput speedup ratio is presented in the graphs
+below for 10ge2p1x520 network interface card.
+
+NIC 10ge2p1x520
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x520-64B-container-memif-tsa-ndrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x520-64B-container-memif-tsa-ndrdisc}
+ \label{fig:10ge2p1x520-64B-container-memif-tsa-ndrdisc}
+ \end{figure}
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/vpp/perf/container_memif && grep -E "64B-(1t1c|2t2c|4t4c)-(eth|dot1q|dot1ad)-(l2xcbase|l2bdbasemaclrn)-.*ndrdisc" *
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/vpp/perf/container_memif
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-(eth|dot1q|dot1ad)-(l2xcbase|l2bdbasemaclrn)-.*ndrdisc" *
+
+*Figure 1. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+NDR Throughput for Phy-to-Phy L2 Ethernet Switching (base).*
+
+PDR Throughput
+--------------
+
+VPP PDR 64B packet throughput speedup ratio is presented in the graphs
+below for 10ge2p1x520 network interface card.
+
+NIC 10ge2p1x520
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x520-64B-container-memif-tsa-pdrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x520-64B-container-memif-tsa-pdrdisc}
+ \label{fig:10ge2p1x520-64B-container-memif-tsa-pdrdisc}
+ \end{figure}
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/vpp/perf/container_memif && grep -E "64B-(1t1c|2t2c|4t4c)-(eth|dot1q|dot1ad)-(l2xcbase|l2bdbasemaclrn)-.*pdrdisc" *
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/vpp/perf/container_memif
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-(eth|dot1q|dot1ad)-(l2xcbase|l2bdbasemaclrn)-.*pdrdisc" *
+
+*Figure 2. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+PDR Throughput for Phy-to-Phy L2 Ethernet Switching (base).* \ No newline at end of file
diff --git a/docs/report/vpp_performance_tests/throughput_speedup_multi_core/container_orchestrated.rst b/docs/report/vpp_performance_tests/throughput_speedup_multi_core/container_orchestrated.rst
new file mode 100644
index 0000000000..5bc49a7d97
--- /dev/null
+++ b/docs/report/vpp_performance_tests/throughput_speedup_multi_core/container_orchestrated.rst
@@ -0,0 +1,158 @@
+Container Orchestrated Topologies
+=================================
+
+Following sections include Throughput Speedup Analysis for VPP multi-
+core multi-thread configurations with no Hyper-Threading, specifically
+for tested 2t2c (2threads, 2cores) and 4t4c scenarios. 1t1c throughput
+results are used as a reference for reported speedup ratio.
+Performance is reported for VPP
+running in multiple configurations of VPP worker thread(s), a.k.a. VPP
+data plane thread(s), and their physical CPU core(s) placement.
+
+NDR Throughput
+--------------
+
+VPP NDR 64B packet throughput speedup ratio is presented in the graphs
+below for 10ge2p1x520 and 10ge2p1x710 network interface cards.
+
+NIC 10ge2p1x520
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x520-64B-container-orchestrated-tsa-ndrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x520-64B-container-orchestrated-tsa-ndrdisc}
+ \label{fig:10ge2p1x520-64B-container-orchestrated-tsa-ndrdisc}
+ \end{figure}
+
+*Figure 1. VPP 1thread 1core - NDR Throughput for Phy-to-Phy L2 Ethernet
+Switching (base).*
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/kubernetes/perf/container_memif && grep -E "64B-(1t1c|2t2c|4t4c)-(eth|dot1q|dot1ad)-[1-9]drc(l2xcbase|l2bdbasemaclrn)-.*ndrdisc" 10ge2p1x520*
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/kubernetes/perf/container_memif
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-(eth|dot1q|dot1ad)-[1-9]drc(l2xcbase|l2bdbasemaclrn)-.*ndrdisc" 10ge2p1x520*
+
+NIC 10ge2p1x710
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x710-64B-container-orchestrated-tsa-ndrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x710-64B-container-orchestrated-tsa-ndrdisc}
+ \label{fig:10ge2p1x710-64B-container-orchestrated-tsa-ndrdisc}
+ \end{figure}
+
+*Figure 2. VPP 1thread 1core - NDR Throughput for Phy-to-Phy L2 Ethernet
+Switching (base).*
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/kubernetes/perf/container_memif && grep -E "64B-(1t1c|2t2c|4t4c)-(eth|dot1q|dot1ad)-[1-9]drc(l2xcbase|l2bdbasemaclrn)-.*ndrdisc" 10ge2p1x710*
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/kubernetes/perf/container_memif
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-(eth|dot1q|dot1ad)-[1-9]drc(l2xcbase|l2bdbasemaclrn)-.*ndrdisc" 10ge2p1x710*
+
+PDR Throughput
+--------------
+
+VPP PDR 64B packet throughput speedup ratio is presented in the graphs
+below for 10ge2p1x520 and 10ge2p1x710 network interface cards.
+
+NIC 10ge2p1x520
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x520-64B-container-orchestrated-tsa-pdrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x520-64B-container-orchestrated-tsa-pdrdisc}
+ \label{fig:10ge2p1x520-64B-container-orchestrated-tsa-pdrdisc}
+ \end{figure}
+
+*Figure 3. VPP 1thread 1core - NDR Throughput for Phy-to-Phy L2 Ethernet
+Switching (base).*
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/kubernetes/perf/container_memif && grep -E "64B-(1t1c|2t2c|4t4c)-(eth|dot1q|dot1ad)-[1-9]drc(l2xcbase|l2bdbasemaclrn)-.*pdrdisc" 10ge2p1x520*
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/kubernetes/perf/container_memif
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-(eth|dot1q|dot1ad)-[1-9]drc(l2xcbase|l2bdbasemaclrn)-.*pdrdisc" 10ge2p1x520*
+
+NIC 10ge2p1x710
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x710-64B-container-orchestrated-tsa-pdrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x710-64B-container-orchestrated-tsa-pdrdisc}
+ \label{fig:10ge2p1x710-64B-container-orchestrated-tsa-pdrdisc}
+ \end{figure}
+
+*Figure 4. VPP 1thread 1core - NDR Throughput for Phy-to-Phy L2 Ethernet
+Switching (base).*
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/kubernetes/perf/container_memif && grep -E "64B-(1t1c|2t2c|4t4c)-(eth|dot1q|dot1ad)-[1-9]drc(l2xcbase|l2bdbasemaclrn)-.*pdrdisc" 10ge2p1x710*
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/kubernetes/perf/container_memif
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-(eth|dot1q|dot1ad)-[1-9]drc(l2xcbase|l2bdbasemaclrn)-.*pdrdisc" 10ge2p1x710*
diff --git a/docs/report/vpp_performance_tests/throughput_speedup_multi_core/index.rst b/docs/report/vpp_performance_tests/throughput_speedup_multi_core/index.rst
index ddce548b6c..10b5f1644a 100644
--- a/docs/report/vpp_performance_tests/throughput_speedup_multi_core/index.rst
+++ b/docs/report/vpp_performance_tests/throughput_speedup_multi_core/index.rst
@@ -18,3 +18,9 @@ threaded VPP configurations relative to 1-core configurations.
l2
ip4
ip6
+ ip4_tunnels
+ ip6_tunnels
+ vm_vhost
+ container_memif
+ container_orchestrated
+ ipsec
diff --git a/docs/report/vpp_performance_tests/throughput_speedup_multi_core/ip4_tunnels.rst b/docs/report/vpp_performance_tests/throughput_speedup_multi_core/ip4_tunnels.rst
new file mode 100644
index 0000000000..222046e4c2
--- /dev/null
+++ b/docs/report/vpp_performance_tests/throughput_speedup_multi_core/ip4_tunnels.rst
@@ -0,0 +1,90 @@
+IPv4 Overlay Tunnels
+====================
+
+Following sections include Throughput Speedup Analysis for VPP multi-
+core multi-thread configurations with no Hyper-Threading, specifically
+for tested 2t2c (2threads, 2cores) and 4t4c scenarios. 1t1c throughput
+results are used as a reference for reported speedup ratio.
+Performance is reported for VPP
+running in multiple configurations of VPP worker thread(s), a.k.a. VPP
+data plane thread(s), and their physical CPU core(s) placement.
+
+NDR Throughput
+--------------
+
+VPP NDR 64B packet throughput speedup ratio is presented in the graphs
+below for 10ge2p1x520 network interface card.
+
+NIC 10ge2p1x520
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x520-64B-ethip4-tsa-ndrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x520-64B-ethip4-tsa-ndrdisc}
+ \label{fig:10ge2p1x520-64B-ethip4-tsa-ndrdisc}
+ \end{figure}
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/vpp/perf/ip4_tunnels && grep -E "64B-(1t1c|2t2c|4t4c)-ethip4[a-z0-9]+-[a-z0-9]*-ndrdisc" *
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/vpp/perf/ip4_tunnels
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-ethip4[a-z0-9]+-[a-z0-9]*-ndrdisc" *
+
+*Figure 1. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+NDR Throughput for Phy-to-Phy IPv4 Overlay Tunnels.*
+
+PDR Throughput
+--------------
+
+VPP PDR 64B packet throughput speedup ratio is presented in the graphs
+below for 10ge2p1x520 network interface card.
+
+NIC 10ge2p1x520
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x520-64B-ethip4-tsa-pdrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x520-64B-ethip4-tsa-pdrdisc}
+ \label{fig:10ge2p1x520-64B-ethip4-tsa-pdrdisc}
+ \end{figure}
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/vpp/perf/ip4_tunnels && grep -E "64B-(1t1c|2t2c|4t4c)-ethip4[a-z0-9]+-[a-z0-9]*-pdrdisc" *
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/vpp/perf/ip4_tunnels
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-ethip4[a-z0-9]+-[a-z0-9]*-pdrdisc" *
+
+*Figure 2. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+PDR Throughput for Phy-to-Phy IPv4 Overlay Tunnels.*
diff --git a/docs/report/vpp_performance_tests/throughput_speedup_multi_core/ip6_tunnels.rst b/docs/report/vpp_performance_tests/throughput_speedup_multi_core/ip6_tunnels.rst
new file mode 100644
index 0000000000..eec062faef
--- /dev/null
+++ b/docs/report/vpp_performance_tests/throughput_speedup_multi_core/ip6_tunnels.rst
@@ -0,0 +1,90 @@
+IPv6 Overlay Tunnels
+====================
+
+Following sections include Throughput Speedup Analysis for VPP multi-
+core multi-thread configurations with no Hyper-Threading, specifically
+for tested 2t2c (2threads, 2cores) and 4t4c scenarios. 1t1c throughput
+results are used as a reference for reported speedup ratio.
+Performance is reported for VPP
+running in multiple configurations of VPP worker thread(s), a.k.a. VPP
+data plane thread(s), and their physical CPU core(s) placement.
+
+NDR Throughput
+--------------
+
+VPP NDR 64B packet throughput speedup ratio is presented in the graphs
+below for 10ge2p1x520 network interface card.
+
+NIC 10ge2p1x520
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x520-78B-ethip6-tsa-ndrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x520-78B-ethip6-tsa-ndrdisc}
+ \label{fig:10ge2p1x520-78B-ethip6-tsa-ndrdisc}
+ \end{figure}
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/vpp/perf/ip6_tunnels && grep -E "78B-(1t1c|2t2c|4t4c)-ethip6[a-z0-9]+-[a-z0-9]*-ndrdisc" *
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/vpp/perf/ip6_tunnels
+ $ grep -E "78B-(1t1c|2t2c|4t4c)-ethip6[a-z0-9]+-[a-z0-9]*-ndrdisc" *
+
+*Figure 1. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+NDR Throughput for Phy-to-Phy IPv6 Overlay Tunnels.*
+
+PDR Throughput
+--------------
+
+VPP PDR 64B packet throughput speedup ratio is presented in the graphs
+below for 10ge2p1x520 network interface card.
+
+NIC 10ge2p1x520
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x520-78B-ethip6-tsa-pdrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x520-78B-ethip6-tsa-pdrdisc}
+ \label{fig:10ge2p1x520-78B-ethip6-tsa-pdrdisc}
+ \end{figure}
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/vpp/perf/ip6_tunnels && grep -E "78B-(1t1c|2t2c|4t4c)-ethip6[a-z0-9]+-[a-z0-9]*-pdrdisc" *
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/vpp/perf/ip6_tunnels
+ $ grep -E "78B-(1t1c|2t2c|4t4c)-ethip6[a-z0-9]+-[a-z0-9]*-pdrdisc" *
+
+*Figure 2. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+PDR Throughput for Phy-to-Phy IPv6 Overlay Tunnels.*
diff --git a/docs/report/vpp_performance_tests/throughput_speedup_multi_core/ipsec.rst b/docs/report/vpp_performance_tests/throughput_speedup_multi_core/ipsec.rst
new file mode 100644
index 0000000000..678e00fec5
--- /dev/null
+++ b/docs/report/vpp_performance_tests/throughput_speedup_multi_core/ipsec.rst
@@ -0,0 +1,95 @@
+IPSec Crypto HW: IP4 Routed-Forwarding
+======================================
+
+Following sections include Throughput Speedup Analysis for VPP multi-
+core multi-thread configurations with no Hyper-Threading, specifically
+for tested 2t2c (2threads, 2cores) and 4t4c scenarios. 1t1c throughput
+results are used as a reference for reported speedup ratio.
+VPP IPSec encryption is accelerated using DPDK cryptodev
+library driving Intel Quick Assist (QAT) crypto PCIe hardware cards.
+Performance is reported for VPP running in multiple configurations of
+VPP worker thread(s), a.k.a. VPP data plane thread(s), and their
+physical CPU core(s) placement.
+
+NDR Throughput
+--------------
+
+VPP NDR 64B packet throughput speedup ratio is presented in the graphs
+below for 40ge2p1xl710 network interface card.
+
+NIC 40ge2p1xl710
+~~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/40ge2p1xl710-64B-ipsechw-tsa-ndrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{40ge2p1xl710-64B-ipsechw-tsa-ndrdisc}
+ \label{fig:40ge2p1xl710-64B-ipsechw-tsa-ndrdisc}
+ \end{figure}
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/vpp/perf/crypto && grep -E "64B-(1t1c|2t2c|4t4c)-.*ipsec.*-ndrdisc" *
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/vpp/perf/crypto
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-.*ipsec.*-ndrdisc" *
+
+*Figure 1. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+NDR Throughput for Phy-to-Phy IPSEC HW.*
+
+PDR Throughput
+--------------
+
+VPP PDR 64B packet throughput speedup ratio is presented in the graphs
+below for 40ge2p1xl710 network interface card.
+
+NIC 40ge2p1xl710
+~~~~~~~~~~~~~~~~
+
+VPP PDR 64B packet throughput in 1t1c setup (1thread, 1core) is presented
+in the graph below. PDR measured for 0.5% packet loss ratio.
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/40ge2p1xl710-64B-ipsechw-tsa-pdrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{40ge2p1xl710-64B-ipsechw-tsa-pdrdisc}
+ \label{fig:40ge2p1xl710-64B-ipsechw-tsa-pdrdisc}
+ \end{figure}
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/vpp/perf/crypto && grep -E "64B-(1t1c|2t2c|4t4c)-.*ipsec.*-pdrdisc" *
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/vpp/perf/crypto
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-.*ipsec.*-pdrdisc" *
+
+*Figure 2. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+PDR Throughput for Phy-to-Phy IPSEC HW.*
diff --git a/docs/report/vpp_performance_tests/throughput_speedup_multi_core/vm_vhost.rst b/docs/report/vpp_performance_tests/throughput_speedup_multi_core/vm_vhost.rst
new file mode 100644
index 0000000000..9a1c3df0eb
--- /dev/null
+++ b/docs/report/vpp_performance_tests/throughput_speedup_multi_core/vm_vhost.rst
@@ -0,0 +1,289 @@
+VM vhost Connections
+====================
+Following sections include Throughput Speedup Analysis for VPP multi-
+core multi-thread configurations with no Hyper-Threading, specifically
+for tested 2t2c (2threads, 2cores) and 4t4c scenarios. 1t1c throughput
+results are used as a reference for reported speedup ratio. Input data
+used for the graphs comes from Phy-to-Phy 64B performance tests with
+VM vhost-user, including NDR throughput (zero packet loss) and
+PDR throughput (<0.5% packet loss).
+
+NDR Throughput
+--------------
+
+VPP NDR 64B packet throughput speedup ratio is presented in the graphs
+below for 10ge2p1x520, 10ge2p1x710 and 40ge2p1xl710 network interface cards.
+
+NIC 10ge2p1x520
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x520-64B-vhost-sel1-tsa-ndrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x520-64B-vhost-sel1-tsa-ndrdisc}
+ \label{fig:10ge2p1x520-64B-vhost-sel1-tsa-ndrdisc}
+ \end{figure}
+
+*Figure 1a. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+NDR Throughput for Phy-to-Phy VM vhost-user selected TCs.*
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x520-64B-vhost-sel2-tsa-ndrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x520-64B-vhost-sel2-tsa-ndrdisc}
+ \label{fig:10ge2p1x520-64B-vhost-sel2-tsa-ndrdisc}
+ \end{figure}
+
+*Figure 1b. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+NDR Throughput for Phy-to-Phy VM vhost-user selected TCs.*
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/vpp/perf/vm_vhost && grep -E "64B-(1t1c|2t2c|4t4c)-.*vhost.*-ndrdisc" 10ge2p1x520*
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/vpp/perf/vm_vhost
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-.*vhost.*-ndrdisc" 10ge2p1x520*
+
+NIC 10ge2p1x710
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x710-64B-vhost-sel2-tsa-ndrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x710-64B-vhost-sel2-tsa-ndrdisc}
+ \label{fig:10ge2p1x710-64B-vhost-sel2-tsa-ndrdisc}
+ \end{figure}
+
+*Figure 2. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+NDR Throughput for Phy-to-Phy VM vhost-user selected TCs.*
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/vpp/perf/vm_vhost && grep -E "64B-(1t1c|2t2c|4t4c)-.*vhost.*-ndrdisc" 10ge2p1x710*
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/vpp/perf/vm_vhost
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-.*vhost.*-ndrdisc" 10ge2p1x710*
+
+NIC 40ge2p1xl710
+~~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/40ge2p1xl710-64B-vhost-sel1-tsa-ndrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{40ge2p1xl710-64B-vhost-sel1-tsa-ndrdisc}
+ \label{fig:40ge2p1xl710-64B-vhost-sel1-tsa-ndrdisc}
+ \end{figure}
+
+*Figure 3a. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+NDR Throughput for Phy-to-Phy VM vhost-user selected TCs.*
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/40ge2p1xl710-64B-vhost-sel2-tsa-ndrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{40ge2p1xl710-64B-vhost-sel2-tsa-ndrdisc}
+ \label{fig:40ge2p1xl710-64B-vhost-sel2-tsa-ndrdisc}
+ \end{figure}
+
+*Figure 3b. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+NDR Throughput for Phy-to-Phy VM vhost-user selected TCs.*
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/vpp/perf/vm_vhost && grep -E "64B-(1t1c|2t2c|4t4c)-.*vhost.*-ndrdisc" 40ge2p1xl710*
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/vpp/perf/vm_vhost
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-.*vhost.*-ndrdisc" 40ge2p1xl710*
+
+PDR Throughput
+--------------
+
+VPP PDR 64B packet throughput speedup ratio is presented in the graphs
+below for 10ge2p1x520, 10ge2p1x710 and 40ge2p1xl710 network interface cards.
+
+NIC 10ge2p1x520
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x520-64B-vhost-sel1-tsa-pdrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x520-64B-vhost-sel1-tsa-pdrdisc}
+ \label{fig:10ge2p1x520-64B-vhost-sel1-tsa-pdrdisc}
+ \end{figure}
+
+*Figure 4a. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+PDR Throughput for Phy-to-Phy VM vhost-user selected TCs.*
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x520-64B-vhost-sel2-tsa-pdrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x520-64B-vhost-sel2-tsa-pdrdisc}
+ \label{fig:10ge2p1x520-64B-vhost-sel2-tsa-pdrdisc}
+ \end{figure}
+
+*Figure 4b. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+PDR Throughput for Phy-to-Phy VM vhost-user selected TCs.*
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/vpp/perf/vm_vhost && grep -E "64B-(1t1c|2t2c|4t4c)-.*vhost.*-pdrdisc" 10ge2p1x520*
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/vpp/perf/vm_vhost
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-.*vhost.*-pdrdisc" 10ge2p1x520*
+
+NIC 10ge2p1x710
+~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/10ge2p1x710-64B-vhost-sel2-tsa-pdrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{10ge2p1x710-64B-vhost-sel2-tsa-pdrdisc}
+ \label{fig:10ge2p1x710-64B-vhost-sel2-tsa-pdrdisc}
+ \end{figure}
+
+*Figure 5. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+PDR Throughput for Phy-to-Phy VM vhost-user selected TCs.*
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/vpp/perf/vm_vhost && grep -E "64B-(1t1c|2t2c|4t4c)-.*vhost.*-pdrdisc" 10ge2p1x710*
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/vpp/perf/vm_vhost
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-.*vhost.*-pdrdisc" 10ge2p1x710*
+
+NIC 40ge2p1xl710
+~~~~~~~~~~~~~~~~
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/40ge2p1xl710-64B-vhost-sel1-tsa-pdrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{40ge2p1xl710-64B-vhost-sel1-tsa-pdrdisc}
+ \label{fig:40ge2p1xl710-64B-vhost-sel1-tsa-pdrdisc}
+ \end{figure}
+
+*Figure 6a. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+PDR Throughput for Phy-to-Phy VM vhost-user selected TCs.*
+
+.. raw:: html
+
+ <iframe width="700" height="1000" frameborder="0" scrolling="no" src="../../_static/vpp/40ge2p1xl710-64B-vhost-sel2-tsa-pdrdisc.html"></iframe>
+
+.. raw:: latex
+
+ \begin{figure}[H]
+ \centering
+ \graphicspath{{../_build/_static/vpp/}}
+ \includegraphics[clip, trim=0cm 8cm 5cm 0cm, width=0.70\textwidth]{40ge2p1xl710-64B-vhost-sel2-tsa-pdrdisc}
+ \label{fig:40ge2p1xl710-64B-vhost-sel2-tsa-pdrdisc}
+ \end{figure}
+
+*Figure 6b. Throughput Speedup Analysis - Multi-Core Speedup Ratio - Normalized
+PDR Throughput for Phy-to-Phy VM vhost-user selected TCs.*
+
+CSIT source code for the test cases used for above plots can be found in CSIT
+git repository:
+
+.. only:: html
+
+ .. program-output:: cd ../../../../../ && set +x && cd tests/vpp/perf/vm_vhost && grep -E "64B-(1t1c|2t2c|4t4c)-.*vhost.*-pdrdisc" 40ge2p1xl710*
+ :shell:
+
+.. only:: latex
+
+ .. code-block:: bash
+
+ $ cd tests/vpp/perf/vm_vhost
+ $ grep -E "64B-(1t1c|2t2c|4t4c)-.*vhost.*-pdrdisc" 40ge2p1xl710*