aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZhiyong Yang <zhiyong.yang@intel.com>2019-03-20 03:35:39 -0400
committerDamjan Marion <dmarion@me.com>2019-04-23 07:13:46 +0000
commit2c1904fa8efffabfadd7f09edb4da98c2e172cd6 (patch)
tree73c17d9d0ec7ff6eb25d6e581829034b0d6f0809
parentf92211672646dccdaf50ec7cea9197e15dca03ed (diff)
dpdk_esp_encrypt: add to prefetch data
The memory areas storing vlib_buffer_t and ip4|6_and_esp_header_t are not prefetched. The patch help dpdk_esp_encrypt to reduce 18 clocks/pkt from 149 to 131 on Haswell when running IPsec in tunnel mode. Change-Id: I4f4e9e2b3982a4b7810cab8ed828a5e4631f8f8c Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
-rw-r--r--src/plugins/dpdk/ipsec/esp_encrypt.c16
1 files changed, 13 insertions, 3 deletions
diff --git a/src/plugins/dpdk/ipsec/esp_encrypt.c b/src/plugins/dpdk/ipsec/esp_encrypt.c
index 908f846e315..653c158fb94 100644
--- a/src/plugins/dpdk/ipsec/esp_encrypt.c
+++ b/src/plugins/dpdk/ipsec/esp_encrypt.c
@@ -155,8 +155,8 @@ dpdk_esp_encrypt_inline (vlib_main_t * vm,
while (n_left_from > 0 && n_left_to_next > 0)
{
clib_error_t *error;
- u32 bi0;
- vlib_buffer_t *b0 = 0;
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
u32 sa_index0;
ip4_and_esp_header_t *ih0, *oh0 = 0;
ip6_and_esp_header_t *ih6_0, *oh6_0 = 0;
@@ -169,7 +169,7 @@ dpdk_esp_encrypt_inline (vlib_main_t * vm,
u8 trunc_size;
u16 rewrite_len;
u16 udp_encap_adv = 0;
- struct rte_mbuf *mb0 = 0;
+ struct rte_mbuf *mb0;
struct rte_crypto_op *op;
u16 res_idx;
@@ -188,6 +188,16 @@ dpdk_esp_encrypt_inline (vlib_main_t * vm,
/* mb0 */
CLIB_PREFETCH (mb0, CLIB_CACHE_LINE_BYTES, STORE);
+ if (n_left_from > 1)
+ {
+ bi1 = from[1];
+ b1 = vlib_get_buffer (vm, bi1);
+
+ CLIB_PREFETCH (b1, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b1->data - CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
op = ops[0];
ops += 1;
ASSERT (op->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED);
710-eth-l2bdbasemaclrn-iacl1sf-10kflows-ndrpdr.robot'>
path: root/tests/vpp/perf/l2/10ge2p1x710-eth-l2bdbasemaclrn-iacl1sf-10kflows-ndrpdr.robot
blob: 7a29f318d573407040b2398bcc20725ad8087c37 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# Copyright (c) 2019 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

*** Settings ***
| Resource | resources/libraries/robot/performance/performance_setup.robot
| ...
| Force Tags | 3_NODE_SINGLE_LINK_TOPO | PERFTEST | HW_ENV | NDRPDR
| ... | NIC_Intel-X710 | ETH | L2BDMACLRN | FEATURE | ACL | ACL_STATEFUL
| ... | IACL | ACL1 | 10K_FLOWS
| ...
| Suite Setup | Run Keywords
| ... | Set up 3-node performance topology with DUT's NIC model | L2
| ... | ${nic_name}
| ... | AND | Set up performance test suite with ACL
| Suite Teardown | Tear down 3-node performance topology
| ...
| Test Setup | Set up performance test
| Test Teardown | Tear down performance test
| ...
| Test Template | Local Template
| ...
| Documentation | *RFC2544: Pkt throughput L2BD test cases with ACL*
| ...
| ... | *[Top] Network Topologies:* TG-DUT1-DUT2-TG 3-node circular topology\
| ... | with single links between nodes.
| ... | *[Enc] Packet Encapsulations:* Eth-IPv4-UDP for L2 switching of IPv4.
| ... | *[Cfg] DUT configuration:* DUT1 is configured with L2 bridge domain\
| ... | and MAC learning enabled. DUT2 is configured with L2 cross-connects.\
| ... | Required ACL rules are applied to input paths of both DUT1 intefaces.\
| ... | DUT1 and DUT2 are tested with ${nic_name}.\
| ... | *[Ver] TG verification:* TG finds and reports throughput NDR (Non Drop\
| ... | Rate) with zero packet loss tolerance and throughput PDR (Partial Drop\
| ... | Rate) with non-zero packet loss tolerance (LT) expressed in percentage\
| ... | of packets transmitted. NDR and PDR are discovered for different\
| ... | Ethernet L2 frame sizes using MLRsearch library.\
| ... | Test packets are generated by TG on\
| ... | links to DUTs. TG traffic profile contains two L3 flow-groups\
| ... | (flow-group per direction, ${flows_per_dir} flows per flow-group) with\
| ... | all packets containing Ethernet header, IPv4 header with UDP header and\
| ... | static payload. MAC addresses are matching MAC addresses of the TG node\
| ... | interfaces.
| ... | *[Ref] Applicable standard specifications:* RFC2544.

*** Variables ***
| ${nic_name}= | Intel-X710
| ${overhead}= | ${0}
# ACL test setup
| ${acl_action}= | permit+reflect
| ${acl_apply_type}= | input
| ${no_hit_aces_number}= | 1
| ${flows_per_dir}= | 10k
# starting points for non-hitting ACLs
| ${src_ip_start}= | 30.30.30.1
| ${dst_ip_start}= | 40.40.40.1
| ${ip_step}= | ${1}
| ${sport_start}= | ${1000}
| ${dport_start}= | ${1000}
| ${port_step}= | ${1}
| ${trex_stream1_subnet}= | 10.10.10.0/24
| ${trex_stream2_subnet}= | 20.20.20.0/24
# Traffic profile:
| ${traffic_profile}= | trex-sl-3n-ethip4udp-10u1000p-conc

*** Keywords ***
| Local Template
| | [Documentation]
| | ... | [Cfg] DUT runs L2BD config with ACLs with ${phy_cores} phy
| | ... | core(s).
| | ... | [Ver] Measure NDR and PDR values using MLRsearch algorithm.\
| | ...
| | ... | *Arguments:*
| | ... | - frame_size - Framesize in Bytes in integer or string (IMIX_v4_1).
| | ... | Type: integer, string
| | ... | - phy_cores - Number of physical cores. Type: integer
| | ... | - rxq - Number of RX queues, default value: ${None}. Type: integer
| | ...
| | [Arguments] | ${frame_size} | ${phy_cores} | ${rxq}=${None}
| | ...
| | Set Test Variable | \${frame_size}
| | ...
| | Given Add worker threads and rxqueues to all DUTs | ${phy_cores} | ${rxq}
| | And Add PCI devices to all DUTs
| | Set Max Rate And Jumbo And Handle Multi Seg
| | And Apply startup configuration on all VPP DUTs
| | When Initialize L2 bridge domain with IPv4 ACLs on DUT1 in 3-node circular topology
| | Then Find NDR and PDR intervals using optimized search

*** Test Cases ***
| tc01-64B-1c-eth-l2bdbasemaclrn-iacl1sf-10kflows-ndrpdr
| | [Tags] | 64B | 1C
| | frame_size=${64} | phy_cores=${1}

| tc02-64B-2c-eth-l2bdbasemaclrn-iacl1sf-10kflows-ndrpdr
| | [Tags] | 64B | 2C
| | frame_size=${64} | phy_cores=${2}

| tc03-64B-4c-eth-l2bdbasemaclrn-iacl1sf-10kflows-ndrpdr
| | [Tags] | 64B | 4C
| | frame_size=${64} | phy_cores=${4}

| tc04-1518B-1c-eth-l2bdbasemaclrn-iacl1sf-10kflows-ndrpdr
| | [Tags] | 1518B | 1C
| | frame_size=${1518} | phy_cores=${1}

| tc05-1518B-2c-eth-l2bdbasemaclrn-iacl1sf-10kflows-ndrpdr
| | [Tags] | 1518B | 2C
| | frame_size=${1518} | phy_cores=${2}

| tc06-1518B-4c-eth-l2bdbasemaclrn-iacl1sf-10kflows-ndrpdr
| | [Tags] | 1518B | 4C
| | frame_size=${1518} | phy_cores=${4}

| tc07-9000B-1c-eth-l2bdbasemaclrn-iacl1sf-10kflows-ndrpdr
| | [Tags] | 9000B | 1C
| | frame_size=${9000} | phy_cores=${1}

| tc08-9000B-2c-eth-l2bdbasemaclrn-iacl1sf-10kflows-ndrpdr
| | [Tags] | 9000B | 2C
| | frame_size=${9000} | phy_cores=${2}

| tc09-9000B-4c-eth-l2bdbasemaclrn-iacl1sf-10kflows-ndrpdr
| | [Tags] | 9000B | 4C
| | frame_size=${9000} | phy_cores=${4}

| tc10-IMIX-1c-eth-l2bdbasemaclrn-iacl1sf-10kflows-ndrpdr
| | [Tags] | IMIX | 1C
| | frame_size=IMIX_v4_1 | phy_cores=${1}

| tc11-IMIX-2c-eth-l2bdbasemaclrn-iacl1sf-10kflows-ndrpdr
| | [Tags] | IMIX | 2C
| | frame_size=IMIX_v4_1 | phy_cores=${2}

| tc12-IMIX-4c-eth-l2bdbasemaclrn-iacl1sf-10kflows-ndrpdr
| | [Tags] | IMIX | 4C
| | frame_size=IMIX_v4_1 | phy_cores=${4}