summaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
authorKingwel Xie <kingwel.xie@ericsson.com>2018-12-22 03:17:15 -0500
committerDamjan Marion <dmarion@me.com>2018-12-22 11:50:14 +0000
commitb9894ee69f6ba9c6a8eeb531d528e656b2a07812 (patch)
tree35d3d55bdf3474e2ba210cacd0adc46c98fea34b /src/plugins
parent66a1003f1707c16b183aafb19f7208c4f5641965 (diff)
crypto-input impprovement:
1. multi-loop, and new style with vlib_buffer_enqueue_to_next 2. add error counter for AUTH-FAILURE 3. buffer trace changed. now it supports 'trace add dpdk-crypto-input 10' just like the other input nodes Actual measurement shows >10 clocks per packets are saved, under QAT or openssl PMD case Change-Id: I6ea34e4ae3b08c381219ff6bc8adda2d927fbfd5 Signed-off-by: Kingwel Xie <kingwel.xie@ericsson.com>
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/dpdk/ipsec/crypto_node.c237
1 files changed, 163 insertions, 74 deletions
diff --git a/src/plugins/dpdk/ipsec/crypto_node.c b/src/plugins/dpdk/ipsec/crypto_node.c
index 5fbaaf3316a..a5f8d8e805e 100644
--- a/src/plugins/dpdk/ipsec/crypto_node.c
+++ b/src/plugins/dpdk/ipsec/crypto_node.c
@@ -28,6 +28,7 @@
#define foreach_dpdk_crypto_input_error \
_(DQ_COPS, "Crypto ops dequeued") \
+ _(AUTH_FAILED, "Crypto verification failed") \
_(STATUS, "Crypto operation failed")
typedef enum
@@ -88,90 +89,178 @@ format_dpdk_crypto_input_trace (u8 * s, va_list * args)
return s;
}
+static_always_inline void
+dpdk_crypto_check_check_op (vlib_main_t * vm, vlib_node_runtime_t * node,
+ struct rte_crypto_op *op0, u16 * next)
+{
+ if (PREDICT_FALSE (op0->status != RTE_CRYPTO_OP_STATUS_SUCCESS))
+ {
+ next[0] = DPDK_CRYPTO_INPUT_NEXT_DROP;
+ vlib_node_increment_counter (vm,
+ node->node_index,
+ DPDK_CRYPTO_INPUT_ERROR_STATUS, 1);
+ /* if auth failed */
+ if (op0->status == RTE_CRYPTO_OP_STATUS_AUTH_FAILED)
+ vlib_node_increment_counter (vm,
+ node->node_index,
+ DPDK_CRYPTO_INPUT_ERROR_AUTH_FAILED, 1);
+ }
+}
+
+always_inline void
+dpdk_crypto_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
+ struct rte_crypto_op **ops, u32 n_deq)
+{
+ u32 n_left, n_trace;
+ if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node))))
+ {
+ n_left = n_deq;
+
+ while (n_trace && n_left)
+ {
+ vlib_buffer_t *b0;
+ struct rte_crypto_op *op0;
+ u16 next;
+
+ op0 = ops[0];
+
+ next = crypto_op_get_priv (op0)->next;
+
+ b0 = vlib_buffer_from_rte_mbuf (op0->sym[0].m_src);
+
+ vlib_trace_buffer (vm, node, next, b0, /* follow_chain */ 0);
+
+ dpdk_crypto_input_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->status = op0->status;
+
+ n_trace--;
+ n_left--;
+ ops++;
+ }
+ vlib_set_trace_count (vm, node, n_trace);
+ }
+}
+
static_always_inline u32
dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node,
crypto_resource_t * res, u8 outbound)
{
- u32 n_deq, total_n_deq = 0, *to_next = 0, n_ops, next_index;
u32 thread_idx = vlib_get_thread_index ();
- dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
u8 numa = rte_socket_id ();
+
+ dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
crypto_worker_main_t *cwm =
vec_elt_at_index (dcm->workers_main, thread_idx);
+
+ u32 n_ops, n_deq;
+ u32 bis[VLIB_FRAME_SIZE], *bi;
+ u16 nexts[VLIB_FRAME_SIZE], *next;
struct rte_crypto_op **ops;
- next_index = node->cached_next_index;
+ bi = bis;
+ next = nexts;
+ ops = cwm->ops;
- {
- ops = cwm->ops;
- n_ops = rte_cryptodev_dequeue_burst (res->dev_id,
- res->qp_id + outbound,
- ops, VLIB_FRAME_SIZE);
- res->inflights[outbound] -= n_ops;
- ASSERT (res->inflights >= 0);
-
- n_deq = n_ops;
- total_n_deq += n_ops;
-
- while (n_ops > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_ops > 0 && n_left_to_next > 0)
- {
- u32 bi0, next0;
- vlib_buffer_t *b0 = 0;
- struct rte_crypto_op *op;
-
- op = ops[0];
- ops += 1;
- n_ops -= 1;
- n_left_to_next -= 1;
-
- dpdk_op_priv_t *priv = crypto_op_get_priv (op);
- next0 = priv->next;
-
- if (PREDICT_FALSE (op->status != RTE_CRYPTO_OP_STATUS_SUCCESS))
- {
- next0 = DPDK_CRYPTO_INPUT_NEXT_DROP;
- vlib_node_increment_counter (vm,
- dpdk_crypto_input_node.index,
- DPDK_CRYPTO_INPUT_ERROR_STATUS,
- 1);
- }
-
- /* XXX store bi0 and next0 in op private? */
-
- b0 = vlib_buffer_from_rte_mbuf (op->sym[0].m_src);
- bi0 = vlib_get_buffer_index (vm, b0);
-
- to_next[0] = bi0;
- to_next += 1;
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vlib_trace_next_frame (vm, node, next0);
- dpdk_crypto_input_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->status = op->status;
- }
-
- op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- crypto_free_ops (numa, cwm->ops, n_deq);
- }
-
- vlib_node_increment_counter (vm, dpdk_crypto_input_node.index,
- DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, total_n_deq);
- return total_n_deq;
+ n_ops = n_deq = rte_cryptodev_dequeue_burst (res->dev_id,
+ res->qp_id + outbound,
+ ops, VLIB_FRAME_SIZE);
+
+ res->inflights[outbound] -= n_ops;
+
+ dpdk_crypto_input_trace (vm, node, ops, n_deq);
+
+ while (n_ops >= 4)
+ {
+ struct rte_crypto_op *op0, *op1, *op2, *op3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+
+ /* Prefetch next iteration. */
+ if (n_ops >= 8)
+ {
+ CLIB_PREFETCH (ops[4], CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (ops[5], CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (ops[6], CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (ops[7], CLIB_CACHE_LINE_BYTES, LOAD);
+
+ CLIB_PREFETCH (crypto_op_get_priv (ops[4]), CLIB_CACHE_LINE_BYTES,
+ LOAD);
+ CLIB_PREFETCH (crypto_op_get_priv (ops[5]), CLIB_CACHE_LINE_BYTES,
+ LOAD);
+ CLIB_PREFETCH (crypto_op_get_priv (ops[6]), CLIB_CACHE_LINE_BYTES,
+ LOAD);
+ CLIB_PREFETCH (crypto_op_get_priv (ops[7]), CLIB_CACHE_LINE_BYTES,
+ LOAD);
+ }
+
+ op0 = ops[0];
+ op1 = ops[1];
+ op2 = ops[2];
+ op3 = ops[3];
+
+ next[0] = crypto_op_get_priv (op0)->next;
+ next[1] = crypto_op_get_priv (op1)->next;
+ next[2] = crypto_op_get_priv (op2)->next;
+ next[3] = crypto_op_get_priv (op3)->next;
+
+ dpdk_crypto_check_check_op (vm, node, op0, next + 0);
+ dpdk_crypto_check_check_op (vm, node, op0, next + 1);
+ dpdk_crypto_check_check_op (vm, node, op0, next + 2);
+ dpdk_crypto_check_check_op (vm, node, op0, next + 3);
+
+ b0 = vlib_buffer_from_rte_mbuf (op0->sym[0].m_src);
+ b1 = vlib_buffer_from_rte_mbuf (op1->sym[0].m_src);
+ b2 = vlib_buffer_from_rte_mbuf (op2->sym[0].m_src);
+ b3 = vlib_buffer_from_rte_mbuf (op3->sym[0].m_src);
+
+ bi[0] = vlib_get_buffer_index (vm, b0);
+ bi[1] = vlib_get_buffer_index (vm, b1);
+ bi[2] = vlib_get_buffer_index (vm, b2);
+ bi[3] = vlib_get_buffer_index (vm, b3);
+
+ op0->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+ op1->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+ op2->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+ op3->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+
+ /* next */
+ next += 4;
+ n_ops -= 4;
+ ops += 4;
+ bi += 4;
+ }
+ while (n_ops > 0)
+ {
+ struct rte_crypto_op *op0;
+ vlib_buffer_t *b0;
+
+ op0 = ops[0];
+
+ next[0] = crypto_op_get_priv (op0)->next;
+
+ dpdk_crypto_check_check_op (vm, node, op0, next + 0);
+
+ /* XXX store bi0 and next0 in op0 private? */
+ b0 = vlib_buffer_from_rte_mbuf (op0->sym[0].m_src);
+ bi[0] = vlib_get_buffer_index (vm, b0);
+
+ op0->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+
+ /* next */
+ next += 1;
+ n_ops -= 1;
+ ops += 1;
+ bi += 1;
+ }
+
+ vlib_node_increment_counter (vm, node->node_index,
+ DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, n_deq);
+
+ vlib_buffer_enqueue_to_next (vm, node, bis, nexts, n_deq);
+
+ crypto_free_ops (numa, cwm->ops, n_deq);
+
+ return n_deq;
}
static_always_inline uword
@@ -197,7 +286,7 @@ dpdk_crypto_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (res->inflights[1])
n_deq += dpdk_crypto_dequeue (vm, node, res, 1);
- if (unlikely(res->remove && !(res->inflights[0] || res->inflights[1])))
+ if (PREDICT_FALSE (res->remove && !(res->inflights[0] || res->inflights[1])))
vec_add1 (remove, res_idx[0]);
}
/* *INDENT-ON* */
ght .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */ .highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */ .highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */ .highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */ .highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */ .highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */ .highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */ .highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */ .highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */ .highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */ .highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */ .highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */ .highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */ .highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */ .highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */ .highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */ .highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */ .highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */ .highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */ .highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */ .highlight .vc { color: #336699 } /* Name.Variable.Class */ .highlight .vg { color: #dd7700 } /* Name.Variable.Global */ .highlight .vi { color: #3333bb } /* Name.Variable.Instance */ .highlight .vm { color: #336699 } /* Name.Variable.Magic */ .highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */ }
# Copyright (c) 2018 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

*** Settings ***
| Resource | resources/libraries/robot/performance/performance_setup.robot
| ...
| Force Tags | 2_NODE_SINGLE_LINK_TOPO | PERFTEST | HW_ENV | MRR
| ... | NIC_Intel-XXV710 | ETH | IP4FWD | FEATURE | ACL | ACL_STATEFUL
| ... | IACL | ACL50 | 10k_FLOWS
| ...
| Suite Setup | Run Keywords
| ... | Set up 2-node performance topology with DUT's NIC model | L3
| ... | Intel-XXV710
| ... | AND | Set up performance test suite with ACL
| Suite Teardown | Tear down 2-node performance topology
| ...
| Test Setup | Set up performance test
| ...
| Test Teardown | Tear down performance mrr test
| ...
| Test Template | Local Template
| ...
| Documentation | *Raw results IPv4 test cases with ACL*
| ...
| ... | *[Top] Network Topologies:* TG-DUT1-TG 2-node circular topology\
| ... | with single links between nodes.
| ... | *[Enc] Packet Encapsulations:* Eth-IPv4-UDP for IPv4 routing.
| ... | *[Cfg] DUT configuration:* DUT1 is configured with IPv4 routing.\
| ... | Required ACL rules are applied to input paths of both DUT1 intefaces.\
| ... | DUT1 is tested with 2p25GE NIC XXV710 by Intel.
| ... | *[Ver] TG verification:* In MaxReceivedRate test TG sends traffic\
| ... | at line rate and reports total received/sent packets over trial period.\
| ... | Test packets are generated by TG on links to DUT1. TG traffic profile\
| ... | contains two L3 flow-groups (flow-group per direction, ${flows_per_dir}\
| ... | flows per flow-group) with all packets containing Ethernet header, IPv4\
| ... | header with IP protocol=61 and static payload. MAC addresses are\
| ... | matching MAC addresses of the TG node interfaces.
| ... | *[Ref] Applicable standard specifications:* RFC2544.

*** Variables ***
# XXV710-DA2 bandwidth limit ~49Gbps/2=24.5Gbps
| ${s_24.5G} | ${24500000000}
# XXV710-DA2 Mpps limit 37.5Mpps/2=18.75Mpps
| ${s_18.75Mpps} | ${18750000}

# ACL test setup
| ${acl_action}= | permit+reflect
| ${acl_apply_type}= | input
| ${no_hit_aces_number}= | 50
| ${flows_per_dir}= | 10k

# starting points for non-hitting ACLs
| ${src_ip_start}= | 30.30.30.1
| ${dst_ip_start}= | 40.40.40.1
| ${ip_step}= | ${1}
| ${sport_start}= | ${1000}
| ${dport_start}= | ${1000}
| ${port_step}= | ${1}
| ${trex_stream1_subnet}= | 10.10.10.0/24
| ${trex_stream2_subnet}= | 20.20.20.0/24

| ${traffic_profile}= | trex-sl-2n-ethip4udp-10u1000p-conc

*** Keywords ***
| Local Template
| | ...
| | [Documentation]
| | ... | [Cfg] DUT runs IPv4 routing config.
| | ... | Each DUT uses ${phy_cores} physical core(s) for worker threads.
| | ... | [Ver] Measure MaxReceivedRate for ${framesize}B frames using single\
| | ... | trial throughput test.
| | ...
| | ... | *Arguments:*
| | ... | - framesize - Framesize in Bytes in integer or string (IMIX_v4_1).
| | ... | Type: integer, string
| | ... | - phy_cores - Number of physical cores. Type: integer
| | ... | - rxq - Number of RX queues, default value: ${None}. Type: integer
| | ...
| | [Arguments] | ${framesize} | ${phy_cores} | ${rxq}=${None}
| | ...
| | Given Add worker threads and rxqueues to all DUTs | ${phy_cores} | ${rxq}
| | And Add PCI devices to all DUTs
| | ${max_rate} | ${jumbo} = | Get Max Rate And Jumbo And Handle Multi Seg
| | ... | ${s_24.5G} | ${framesize} | pps_limit=${s_18.75Mpps}
| | And Apply startup configuration on all VPP DUTs
| | ${ip_nr}= | Set Variable | 10
| | When Initialize IPv4 routing for '${ip_nr}' addresses with IPv4 ACLs on DUT1 in circular topology
| | Then Traffic should pass with maximum rate
| | ... | ${max_rate}pps | ${framesize} | ${traffic_profile}

*** Test Cases ***
| tc01-64B-1c-ethip4udp-ip4base-iacl50sf-10kflows-mrr
| | [Tags] | 64B | 1C
| | framesize=${64} | phy_cores=${1}

| tc02-64B-2c-ethip4udp-ip4base-iacl50sf-10kflows-mrr
| | [Tags] | 64B | 2C
| | framesize=${64} | phy_cores=${2}

| tc03-64B-4c-ethip4udp-ip4base-iacl50sf-10kflows-mrr
| | [Tags] | 64B | 4C
| | framesize=${64} | phy_cores=${4}

| tc04-1518B-1c-ethip4udp-ip4base-iacl50sf-10kflows-mrr
| | [Tags] | 1518B | 1C
| | framesize=${1518} | phy_cores=${1}

| tc05-1518B-2c-ethip4udp-ip4base-iacl50sf-10kflows-mrr
| | [Tags] | 1518B | 2C
| | framesize=${1518} | phy_cores=${2}

| tc06-1518B-4c-ethip4udp-ip4base-iacl50sf-10kflows-mrr
| | [Tags] | 1518B | 4C
| | framesize=${1518} | phy_cores=${4}

| tc07-9000B-1c-ethip4udp-ip4base-iacl50sf-10kflows-mrr
| | [Tags] | 9000B | 1C
| | framesize=${9000} | phy_cores=${1}

| tc08-9000B-2c-ethip4udp-ip4base-iacl50sf-10kflows-mrr
| | [Tags] | 9000B | 2C
| | framesize=${9000} | phy_cores=${2}

| tc09-9000B-4c-ethip4udp-ip4base-iacl50sf-10kflows-mrr
| | [Tags] | 9000B | 4C
| | framesize=${9000} | phy_cores=${4}

| tc10-IMIX-1c-ethip4udp-ip4base-iacl50sf-10kflows-mrr
| | [Tags] | IMIX | 1C
| | framesize=IMIX_v4_1 | phy_cores=${1}

| tc11-IMIX-2c-ethip4udp-ip4base-iacl50sf-10kflows-mrr
| | [Tags] | IMIX | 2C
| | framesize=IMIX_v4_1 | phy_cores=${2}

| tc12-IMIX-4c-ethip4udp-ip4base-iacl50sf-10kflows-mrr
| | [Tags] | IMIX | 4C
| | framesize=IMIX_v4_1 | phy_cores=${4}