summaryrefslogtreecommitdiffstats
path: root/src/vnet
diff options
context:
space:
mode:
authorGovindarajan Mohandoss <govindarajan.mohandoss@arm.com>2021-03-19 19:20:49 +0000
committerDamjan Marion <dmarion@me.com>2021-10-12 16:43:18 +0000
commit6d7dfcbfa4bc05f1308fc677f19ade44ea699da1 (patch)
treeeb17ffe94db34644ccfb870732a8c6e3d6ba58b7 /src/vnet
parentd9e9870dd941bfb826530815e3196ced0b544b5d (diff)
ipsec: Performance improvement of ipsec4_output_node using flow cache
Adding flow cache support to improve outbound IPv4/IPSec SPD lookup performance. Details about flow cache: Mechanism: 1. First packet of a flow will undergo linear search in SPD table. Once a policy match is found, a new entry will be added into the flow cache. From 2nd packet onwards, the policy lookup will happen in flow cache. 2. The flow cache is implemented using bihash without collision handling. This will avoid the logic to age out or recycle the old flows in flow cache. Whenever a collision occurs, old entry will be overwritten by the new entry. Worst case is when all the 256 packets in a batch result in collision and fall back to linear search. Average and best case will be O(1). 3. The size of flow cache is fixed and decided based on the number of flows to be supported. The default is set to 1 million flows. This can be made as a configurable option as a next step. 4. Whenever a SPD rule is added/deleted by the control plane, the flow cache entries will be completely deleted (reset) in the control plane. The assumption here is that SPD rule add/del is not a frequent operation from control plane. Flow cache reset is done, by putting the data plane in fall back mode, to bypass flow cache and do linear search till the SPD rule add/delete operation is complete. Once the rule is successfully added/deleted, the data plane will be allowed to make use of the flow cache. The flow cache will be reset only after flushing out the inflight packets from all the worker cores using vlib_worker_wait_one_loop(). Details about bihash usage: 1. A new bihash template (16_8) is added to support IPv4 5 tuple. BIHASH_KVP_PER_PAGE and BIHASH_KVP_AT_BUCKET_LEVEL are set to 1 in the new template. It means only one KVP is supported per bucket. 2. Collision handling is avoided by calling BV (clib_bihash_add_or_overwrite_stale) function. Through the stale callback function pointer, the KVP entry will be overwritten during collision. 3. Flow cache reset is done using BV (clib_bihash_foreach_key_value_pair) function. Through the callback function pointer, the KVP value is reset to ~0ULL. MRR performance numbers with 1 core, 1 ESP Tunnel, null-encrypt, 64B for different SPD policy matching indices: SPD Policy index : 1 10 100 1000 Throughput : MPPS/MPPS MPPS/MPPS MPPS/MPPS KPPS/MPPS (Baseline/Optimized) ARM Neoverse N1 : 5.2/4.84 4.55/4.84 2.11/4.84 329.5/4.84 ARM TX2 : 2.81/2.6 2.51/2.6 1.27/2.6 176.62/2.6 INTEL SKX : 4.93/4.48 4.29/4.46 2.05/4.48 336.79/4.47 Next Steps: Following can be made as a configurable option through startup conf at IPSec level: 1. Enable/Disable Flow cache. 2. Bihash configuration like number of buckets and memory size. 3. Dual/Quad loop unroll can be applied around bihash to further improve the performance. 4. The same flow cache logic can be applied for IPv6 as well as in IPSec inbound direction. A deeper and wider flow cache using bihash_40_8 can replace existing bihash_16_8, to make it common for both IPv4 and IPv6 in both outbound and inbound directions. Following changes are made based on the review comments: 1. ON/OFF flow cache through startup conf. Default: OFF 2. Flow cache stale entry detection using epoch counter. 3. Avoid host order endianness conversion during flow cache lookup. 4. Move IPSec startup conf to a common file. 5. Added SPD flow cache unit test case 6. Replaced bihash with vectors to implement flow cache. 7. ipsec_add_del_policy API is not mpsafe. Cleaned up inflight packets check in control plane. Type: improvement Signed-off-by: mgovind <govindarajan.Mohandoss@arm.com> Signed-off-by: Zachary Leaf <zachary.leaf@arm.com> Tested-by: Jieqiang Wang <jieqiang.wang@arm.com> Change-Id: I62b4d6625fbc6caf292427a5d2046aa5672b2006
Diffstat (limited to 'src/vnet')
-rw-r--r--src/vnet/ipsec/ipsec.c32
-rw-r--r--src/vnet/ipsec/ipsec.h71
-rw-r--r--src/vnet/ipsec/ipsec_cli.c5
-rw-r--r--src/vnet/ipsec/ipsec_format.c11
-rw-r--r--src/vnet/ipsec/ipsec_output.c137
-rw-r--r--src/vnet/ipsec/ipsec_spd.h2
-rw-r--r--src/vnet/ipsec/ipsec_spd_policy.c23
7 files changed, 264 insertions, 17 deletions
diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c
index d154b519ecb..30774ec10ff 100644
--- a/src/vnet/ipsec/ipsec.c
+++ b/src/vnet/ipsec/ipsec.c
@@ -26,6 +26,10 @@
#include <vnet/ipsec/ah.h>
#include <vnet/ipsec/ipsec_tun.h>
+/* Flow cache is sized for 1 million flows with a load factor of .25.
+ */
+#define IPSEC4_OUT_SPD_DEFAULT_HASH_NUM_BUCKETS (1 << 22)
+
ipsec_main_t ipsec_main;
esp_async_post_next_t esp_encrypt_async_next;
esp_async_post_next_t esp_decrypt_async_next;
@@ -545,6 +549,13 @@ ipsec_init (vlib_main_t * vm)
im->async_mode = 0;
crypto_engine_backend_register_post_node (vm);
+ im->ipsec4_out_spd_hash_tbl = NULL;
+ im->flow_cache_flag = 0;
+ im->ipsec4_out_spd_flow_cache_entries = 0;
+ im->epoch_count = 0;
+ im->ipsec4_out_spd_hash_num_buckets =
+ IPSEC4_OUT_SPD_DEFAULT_HASH_NUM_BUCKETS;
+
return 0;
}
@@ -553,11 +564,25 @@ VLIB_INIT_FUNCTION (ipsec_init);
static clib_error_t *
ipsec_config (vlib_main_t *vm, unformat_input_t *input)
{
+ ipsec_main_t *im = &ipsec_main;
unformat_input_t sub_input;
+ u32 ipsec4_out_spd_hash_num_buckets;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "ip4 %U", unformat_vlib_cli_sub_input, &sub_input))
+ if (unformat (input, "ipv4-outbound-spd-flow-cache on"))
+ im->flow_cache_flag = 1;
+ else if (unformat (input, "ipv4-outbound-spd-flow-cache off"))
+ im->flow_cache_flag = 0;
+ else if (unformat (input, "ipv4-outbound-spd-hash-buckets %d",
+ &ipsec4_out_spd_hash_num_buckets))
+ {
+ /* Size of hash is power of 2 >= number of buckets */
+ im->ipsec4_out_spd_hash_num_buckets =
+ 1ULL << max_log2 (ipsec4_out_spd_hash_num_buckets);
+ }
+ else if (unformat (input, "ip4 %U", unformat_vlib_cli_sub_input,
+ &sub_input))
{
uword table_size = ~0;
u32 n_buckets = ~0;
@@ -594,6 +619,11 @@ ipsec_config (vlib_main_t *vm, unformat_input_t *input)
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
+ if (im->flow_cache_flag)
+ {
+ vec_add2 (im->ipsec4_out_spd_hash_tbl, im->ipsec4_out_spd_hash_tbl,
+ im->ipsec4_out_spd_hash_num_buckets);
+ }
return 0;
}
diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h
index 0245c5575e4..968d377cea0 100644
--- a/src/vnet/ipsec/ipsec.h
+++ b/src/vnet/ipsec/ipsec.h
@@ -36,6 +36,26 @@ typedef clib_error_t *(*enable_disable_cb_t) (int is_enable);
typedef struct
{
+ u64 key[2];
+ u64 value;
+ i32 bucket_lock;
+ u32 un_used;
+} ipsec4_hash_kv_16_8_t;
+
+typedef union
+{
+ struct
+ {
+ ip4_address_t ip4_addr[2];
+ u16 port[2];
+ u8 proto;
+ u8 pad[3];
+ };
+ ipsec4_hash_kv_16_8_t kv_16_8;
+} ipsec4_spd_5tuple_t;
+
+typedef struct
+{
u8 *name;
/* add/del callback */
add_del_sa_sess_cb_t add_del_sa_sess_cb;
@@ -130,6 +150,7 @@ typedef struct
uword *ipsec_if_real_dev_by_show_dev;
uword *ipsec_if_by_sw_if_index;
+ ipsec4_hash_kv_16_8_t *ipsec4_out_spd_hash_tbl;
clib_bihash_8_16_t tun4_protect_by_key;
clib_bihash_24_16_t tun6_protect_by_key;
@@ -206,8 +227,13 @@ typedef struct
u32 esp4_dec_tun_fq_index;
u32 esp6_dec_tun_fq_index;
+ /* Number of buckets for flow cache */
+ u32 ipsec4_out_spd_hash_num_buckets;
+ u32 ipsec4_out_spd_flow_cache_entries;
+ u32 epoch_count;
u8 async_mode;
u16 msg_id_base;
+ u8 flow_cache_flag;
} ipsec_main_t;
typedef enum ipsec_format_flags_t_
@@ -247,6 +273,51 @@ get_next_output_feature_node_index (vlib_buffer_t * b,
return node->next_nodes[next];
}
+static_always_inline u64
+ipsec4_hash_16_8 (ipsec4_hash_kv_16_8_t *v)
+{
+#ifdef clib_crc32c_uses_intrinsics
+ return clib_crc32c ((u8 *) v->key, 16);
+#else
+ u64 tmp = v->key[0] ^ v->key[1];
+ return clib_xxhash (tmp);
+#endif
+}
+
+static_always_inline int
+ipsec4_hash_key_compare_16_8 (u64 *a, u64 *b)
+{
+#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
+ u64x2 v;
+ v = u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b);
+ return u64x2_is_all_zero (v);
+#else
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1])) == 0;
+#endif
+}
+
+/* clib_spinlock_lock is not used to save another memory indirection */
+static_always_inline void
+ipsec_spinlock_lock (i32 *lock)
+{
+ i32 free = 0;
+ while (!clib_atomic_cmp_and_swap_acq_relax_n (lock, &free, 1, 0))
+ {
+ /* atomic load limits number of compare_exchange executions */
+ while (clib_atomic_load_relax_n (lock))
+ CLIB_PAUSE ();
+ /* on failure, compare_exchange writes lock into free */
+ free = 0;
+ }
+}
+
+static_always_inline void
+ipsec_spinlock_unlock (i32 *lock)
+{
+ /* Make sure all reads/writes are complete before releasing the lock */
+ clib_atomic_release (lock);
+}
+
u32 ipsec_register_ah_backend (vlib_main_t * vm, ipsec_main_t * im,
const char *name,
const char *ah4_encrypt_node_name,
diff --git a/src/vnet/ipsec/ipsec_cli.c b/src/vnet/ipsec/ipsec_cli.c
index bdb9c7bf698..95e8145fe92 100644
--- a/src/vnet/ipsec/ipsec_cli.c
+++ b/src/vnet/ipsec/ipsec_cli.c
@@ -427,6 +427,11 @@ ipsec_spd_show_all (vlib_main_t * vm, ipsec_main_t * im)
pool_foreach_index (spdi, im->spds) {
vlib_cli_output(vm, "%U", format_ipsec_spd, spdi);
}
+
+ if (im->flow_cache_flag)
+ {
+ vlib_cli_output (vm, "%U", format_ipsec_spd_flow_cache);
+ }
/* *INDENT-ON* */
}
diff --git a/src/vnet/ipsec/ipsec_format.c b/src/vnet/ipsec/ipsec_format.c
index ec644a7dca6..751d098bcdd 100644
--- a/src/vnet/ipsec/ipsec_format.c
+++ b/src/vnet/ipsec/ipsec_format.c
@@ -232,6 +232,17 @@ done:
}
u8 *
+format_ipsec_spd_flow_cache (u8 *s, va_list *args)
+{
+ ipsec_main_t *im = &ipsec_main;
+
+ s = format (s, "\nip4-outbound-spd-flow-cache-entries: %u",
+ im->ipsec4_out_spd_flow_cache_entries);
+
+ return (s);
+}
+
+u8 *
format_ipsec_key (u8 * s, va_list * args)
{
ipsec_key_t *key = va_arg (*args, ipsec_key_t *);
diff --git a/src/vnet/ipsec/ipsec_output.c b/src/vnet/ipsec/ipsec_output.c
index 8fb9566fa38..84927debaca 100644
--- a/src/vnet/ipsec/ipsec_output.c
+++ b/src/vnet/ipsec/ipsec_output.c
@@ -63,9 +63,90 @@ format_ipsec_output_trace (u8 * s, va_list * args)
return s;
}
+always_inline void
+ipsec4_out_spd_add_flow_cache_entry (ipsec_main_t *im, u8 pr, u32 la, u32 ra,
+ u16 lp, u16 rp, u32 pol_id)
+{
+ u64 hash;
+ u8 overwrite = 0, stale_overwrite = 0;
+ ipsec4_spd_5tuple_t ip4_5tuple = { .ip4_addr = { (ip4_address_t) la,
+ (ip4_address_t) ra },
+ .port = { lp, rp },
+ .proto = pr };
+
+ ip4_5tuple.kv_16_8.value = (((u64) pol_id) << 32) | ((u64) im->epoch_count);
+
+ hash = ipsec4_hash_16_8 (&ip4_5tuple.kv_16_8);
+ hash &= (im->ipsec4_out_spd_hash_num_buckets - 1);
+
+ ipsec_spinlock_lock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+ /* Check if we are overwriting an existing entry so we know
+ whether to increment the flow cache counter. Since flow
+ cache counter is reset on any policy add/remove, but
+ hash table values are not, we also need to check if the entry
+ we are overwriting is stale or not. If it's a stale entry
+ overwrite, we still want to increment flow cache counter */
+ overwrite = (im->ipsec4_out_spd_hash_tbl[hash].value != 0);
+ /* Check for stale entry by comparing with current epoch count */
+ if (PREDICT_FALSE (overwrite))
+ stale_overwrite =
+ (im->epoch_count !=
+ ((u32) (im->ipsec4_out_spd_hash_tbl[hash].value & 0xFFFFFFFF)));
+ clib_memcpy_fast (&im->ipsec4_out_spd_hash_tbl[hash], &ip4_5tuple.kv_16_8,
+ sizeof (ip4_5tuple.kv_16_8));
+ ipsec_spinlock_unlock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+
+ /* Increment the counter to track active flow cache entries
+ when entering a fresh entry or overwriting a stale one */
+ if (!overwrite || stale_overwrite)
+ clib_atomic_fetch_add_relax (&im->ipsec4_out_spd_flow_cache_entries, 1);
+
+ return;
+}
+
+always_inline ipsec_policy_t *
+ipsec4_out_spd_find_flow_cache_entry (ipsec_main_t *im, u8 pr, u32 la, u32 ra,
+ u16 lp, u16 rp)
+{
+ ipsec_policy_t *p = NULL;
+ ipsec4_hash_kv_16_8_t kv_result;
+ u64 hash;
+
+ if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) &&
+ (pr != IP_PROTOCOL_SCTP)))
+ {
+ lp = 0;
+ rp = 0;
+ }
+ ipsec4_spd_5tuple_t ip4_5tuple = { .ip4_addr = { (ip4_address_t) la,
+ (ip4_address_t) ra },
+ .port = { lp, rp },
+ .proto = pr };
+
+ hash = ipsec4_hash_16_8 (&ip4_5tuple.kv_16_8);
+ hash &= (im->ipsec4_out_spd_hash_num_buckets - 1);
+
+ ipsec_spinlock_lock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+ kv_result = im->ipsec4_out_spd_hash_tbl[hash];
+ ipsec_spinlock_unlock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+
+ if (ipsec4_hash_key_compare_16_8 ((u64 *) &ip4_5tuple.kv_16_8,
+ (u64 *) &kv_result))
+ {
+ if (im->epoch_count == ((u32) (kv_result.value & 0xFFFFFFFF)))
+ {
+ /* Get the policy based on the index */
+ p =
+ pool_elt_at_index (im->policies, ((u32) (kv_result.value >> 32)));
+ }
+ }
+
+ return p;
+}
+
always_inline ipsec_policy_t *
-ipsec_output_policy_match (ipsec_spd_t * spd, u8 pr, u32 la, u32 ra, u16 lp,
- u16 rp)
+ipsec_output_policy_match (ipsec_spd_t *spd, u8 pr, u32 la, u32 ra, u16 lp,
+ u16 rp, u8 flow_cache_enabled)
{
ipsec_main_t *im = &ipsec_main;
ipsec_policy_t *p;
@@ -92,10 +173,13 @@ ipsec_output_policy_match (ipsec_spd_t * spd, u8 pr, u32 la, u32 ra, u16 lp,
if (la > clib_net_to_host_u32 (p->laddr.stop.ip4.as_u32))
continue;
- if (PREDICT_FALSE
- ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP)
- && (pr != IP_PROTOCOL_SCTP)))
- return p;
+ if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) &&
+ (pr != IP_PROTOCOL_SCTP)))
+ {
+ lp = 0;
+ rp = 0;
+ goto add_flow_cache;
+ }
if (lp < p->lport.start)
continue;
@@ -109,6 +193,15 @@ ipsec_output_policy_match (ipsec_spd_t * spd, u8 pr, u32 la, u32 ra, u16 lp,
if (rp > p->rport.stop)
continue;
+ add_flow_cache:
+ if (flow_cache_enabled)
+ {
+ /* Add an Entry in Flow cache */
+ ipsec4_out_spd_add_flow_cache_entry (
+ im, pr, clib_host_to_net_u32 (la), clib_host_to_net_u32 (ra),
+ clib_host_to_net_u16 (lp), clib_host_to_net_u16 (rp), *i);
+ }
+
return p;
}
return 0;
@@ -185,6 +278,7 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
ipsec_spd_t *spd0 = 0;
int bogus;
u64 nc_protect = 0, nc_bypass = 0, nc_discard = 0, nc_nomatch = 0;
+ u8 flow_cache_enabled = im->flow_cache_flag;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -194,7 +288,7 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u32 bi0, pi0, bi1;
vlib_buffer_t *b0, *b1;
- ipsec_policy_t *p0;
+ ipsec_policy_t *p0 = NULL;
ip4_header_t *ip0;
ip6_header_t *ip6_0 = 0;
udp_header_t *udp0;
@@ -262,15 +356,26 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
sw_if_index0, spd_index0, spd0->id);
#endif
- p0 = ipsec_output_policy_match (spd0, ip0->protocol,
- clib_net_to_host_u32
- (ip0->src_address.as_u32),
- clib_net_to_host_u32
- (ip0->dst_address.as_u32),
- clib_net_to_host_u16
- (udp0->src_port),
- clib_net_to_host_u16
- (udp0->dst_port));
+ /*
+ * Check whether flow cache is enabled.
+ */
+ if (flow_cache_enabled)
+ {
+ p0 = ipsec4_out_spd_find_flow_cache_entry (
+ im, ip0->protocol, ip0->src_address.as_u32,
+ ip0->dst_address.as_u32, udp0->src_port, udp0->dst_port);
+ }
+
+ /* Fall back to linear search if flow cache lookup fails */
+ if (p0 == NULL)
+ {
+ p0 = ipsec_output_policy_match (
+ spd0, ip0->protocol,
+ clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ clib_net_to_host_u16 (udp0->src_port),
+ clib_net_to_host_u16 (udp0->dst_port), flow_cache_enabled);
+ }
}
tcp0 = (void *) udp0;
diff --git a/src/vnet/ipsec/ipsec_spd.h b/src/vnet/ipsec/ipsec_spd.h
index 3637c27287d..5bfc6ae56f6 100644
--- a/src/vnet/ipsec/ipsec_spd.h
+++ b/src/vnet/ipsec/ipsec_spd.h
@@ -64,6 +64,8 @@ extern int ipsec_set_interface_spd (vlib_main_t * vm,
extern u8 *format_ipsec_spd (u8 * s, va_list * args);
+extern u8 *format_ipsec_spd_flow_cache (u8 *s, va_list *args);
+
#endif /* __IPSEC_SPD_H__ */
/*
diff --git a/src/vnet/ipsec/ipsec_spd_policy.c b/src/vnet/ipsec/ipsec_spd_policy.c
index 05cfdf0a671..85acf7aea7b 100644
--- a/src/vnet/ipsec/ipsec_spd_policy.c
+++ b/src/vnet/ipsec/ipsec_spd_policy.c
@@ -156,6 +156,29 @@ ipsec_add_del_policy (vlib_main_t * vm,
if (!spd)
return VNET_API_ERROR_SYSCALL_ERROR_1;
+ if (im->flow_cache_flag && !policy->is_ipv6 &&
+ policy->type == IPSEC_SPD_POLICY_IP4_OUTBOUND)
+ {
+ /*
+ * Flow cache entry is valid only when epoch_count value in control
+ * plane and data plane match. Otherwise, flow cache entry is considered
+ * stale. To avoid the race condition of using old epoch_count value
+ * in data plane after the roll over of epoch_count in control plane,
+ * entire flow cache is reset.
+ */
+ if (im->epoch_count == 0xFFFFFFFF)
+ {
+ /* Reset all the entries in flow cache */
+ clib_memset_u8 (im->ipsec4_out_spd_hash_tbl, 0,
+ im->ipsec4_out_spd_hash_num_buckets *
+ (sizeof (*(im->ipsec4_out_spd_hash_tbl))));
+ }
+ /* Increment epoch counter by 1 */
+ clib_atomic_fetch_add_relax (&im->epoch_count, 1);
+ /* Reset spd flow cache counter since all old entries are stale */
+ clib_atomic_store_relax_n (&im->ipsec4_out_spd_flow_cache_entries, 0);
+ }
+
if (is_add)
{
u32 policy_index;