summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNathan Skrzypczak <nathan.skrzypczak@gmail.com>2020-11-04 11:41:05 +0100
committerBeno�t Ganne <bganne@cisco.com>2023-08-09 08:23:45 +0000
commit6d733a93b2eb9c16196ee17d5cdc77db21589571 (patch)
tree7e23016a4ceafedfe85974b58d56a4007780c52b
parent897844d1e96797f69e3e7e11d237d847fe4e46bf (diff)
cnat: remove rwlock on ts
Type: improvement Remove rwlock contention on timestamps. ~10% pps with 10k sessions. Use fixed-size-pools of increasing sizes starting with 4K, and with a x2 step each time. We don't free/shrink allocated pools. Change-Id: I5fea51faba40430106c823275a6356e81709d118 Signed-off-by: Nathan Skrzypczak <nathan.skrzypczak@gmail.com>
-rw-r--r--src/plugins/cnat/cnat_client.c7
-rw-r--r--src/plugins/cnat/cnat_client.h2
-rw-r--r--src/plugins/cnat/cnat_inline.h104
-rw-r--r--src/plugins/cnat/cnat_scanner.c1
-rw-r--r--src/plugins/cnat/cnat_session.c40
-rw-r--r--src/plugins/cnat/cnat_types.c17
-rw-r--r--src/plugins/cnat/cnat_types.h23
7 files changed, 133 insertions, 61 deletions
diff --git a/src/plugins/cnat/cnat_client.c b/src/plugins/cnat/cnat_client.c
index 73835b093f0..5f0590efbf8 100644
--- a/src/plugins/cnat/cnat_client.c
+++ b/src/plugins/cnat/cnat_client.c
@@ -20,10 +20,9 @@
#include <cnat/cnat_translation.h>
cnat_client_t *cnat_client_pool;
-
cnat_client_db_t cnat_client_db;
-
dpo_type_t cnat_client_dpo;
+fib_source_t cnat_fib_source;
static_always_inline u8
cnat_client_is_clone (cnat_client_t * cc)
@@ -302,7 +301,6 @@ cnat_client_show (vlib_main_t * vm,
vlib_cli_output(vm, "%U", format_cnat_client, cci, 0);
vlib_cli_output (vm, "%d clients", pool_elts (cnat_client_pool));
- vlib_cli_output (vm, "%d timestamps", pool_elts (cnat_timestamps));
}
else
{
@@ -389,6 +387,9 @@ cnat_client_init (vlib_main_t * vm)
clib_bihash_init_16_8 (&cnat_client_db.cc_ip_id_hash, "CNat client DB",
cm->client_hash_buckets, cm->client_hash_memory);
+ cnat_fib_source = fib_source_allocate ("cnat", CNAT_FIB_SOURCE_PRIORITY,
+ FIB_SOURCE_BH_SIMPLE);
+
clib_spinlock_init (&cnat_client_db.throttle_lock);
cnat_client_db.throttle_mem =
hash_create_mem (0, sizeof (ip_address_t), sizeof (uword));
diff --git a/src/plugins/cnat/cnat_client.h b/src/plugins/cnat/cnat_client.h
index db6933c3b95..4dc6b754b2f 100644
--- a/src/plugins/cnat/cnat_client.h
+++ b/src/plugins/cnat/cnat_client.h
@@ -86,8 +86,6 @@ extern void cnat_client_free_by_ip (ip46_address_t * addr, u8 af);
extern cnat_client_t *cnat_client_pool;
extern dpo_type_t cnat_client_dpo;
-#define CC_INDEX_INVALID ((u32)(~0))
-
static_always_inline cnat_client_t *
cnat_client_get (index_t i)
{
diff --git a/src/plugins/cnat/cnat_inline.h b/src/plugins/cnat/cnat_inline.h
index 5a55ecbf3c0..2986b3497a9 100644
--- a/src/plugins/cnat/cnat_inline.h
+++ b/src/plugins/cnat/cnat_inline.h
@@ -19,72 +19,122 @@
#include <cnat/cnat_types.h>
+always_inline int
+cnat_ts_is_free_index (u32 index)
+{
+ u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
+ index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
+ return pool_is_free_index (cnat_timestamps.ts_pools[pidx], index);
+}
+
+always_inline cnat_timestamp_t *
+cnat_timestamp_get (u32 index)
+{
+ /* 6 top bits for choosing pool */
+ u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
+ index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
+ return pool_elt_at_index (cnat_timestamps.ts_pools[pidx], index);
+}
+
+always_inline cnat_timestamp_t *
+cnat_timestamp_get_if_valid (u32 index)
+{
+ /* 6 top bits for choosing pool */
+ u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
+ index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
+ if (pidx >= cnat_timestamps.next_empty_pool_idx)
+ return (NULL);
+ if (pool_is_free_index (cnat_timestamps.ts_pools[pidx], index))
+ return (NULL);
+ return pool_elt_at_index (cnat_timestamps.ts_pools[pidx], index);
+}
+
+always_inline index_t
+cnat_timestamp_alloc ()
+{
+ cnat_timestamp_t *ts;
+ u32 index, pool_sz;
+ uword pidx;
+
+ clib_spinlock_lock (&cnat_timestamps.ts_lock);
+ pidx = clib_bitmap_first_set (cnat_timestamps.ts_free);
+ pool_sz = 1 << (CNAT_TS_BASE_SIZE + pidx);
+ ASSERT (pidx <= cnat_timestamps.next_empty_pool_idx);
+ if (pidx == cnat_timestamps.next_empty_pool_idx)
+ pool_init_fixed (
+ cnat_timestamps.ts_pools[cnat_timestamps.next_empty_pool_idx++],
+ pool_sz);
+ pool_get (cnat_timestamps.ts_pools[pidx], ts);
+ if (pool_elts (cnat_timestamps.ts_pools[pidx]) == pool_sz)
+ clib_bitmap_set (cnat_timestamps.ts_free, pidx, 0);
+ clib_spinlock_unlock (&cnat_timestamps.ts_lock);
+
+ index = (u32) pidx << (32 - CNAT_TS_MPOOL_BITS);
+ return index | (ts - cnat_timestamps.ts_pools[pidx]);
+}
+
+always_inline void
+cnat_timestamp_destroy (u32 index)
+{
+ u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
+ index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
+ clib_spinlock_lock (&cnat_timestamps.ts_lock);
+ pool_put_index (cnat_timestamps.ts_pools[pidx], index);
+ clib_bitmap_set (cnat_timestamps.ts_free, pidx, 1);
+ clib_spinlock_unlock (&cnat_timestamps.ts_lock);
+}
+
always_inline u32
cnat_timestamp_new (f64 t)
{
- u32 index;
- cnat_timestamp_t *ts;
- clib_rwlock_writer_lock (&cnat_main.ts_lock);
- pool_get (cnat_timestamps, ts);
+ index_t index = cnat_timestamp_alloc ();
+ cnat_timestamp_t *ts = cnat_timestamp_get (index);
ts->last_seen = t;
ts->lifetime = cnat_main.session_max_age;
ts->refcnt = CNAT_TIMESTAMP_INIT_REFCNT;
- index = ts - cnat_timestamps;
- clib_rwlock_writer_unlock (&cnat_main.ts_lock);
return index;
}
always_inline void
cnat_timestamp_inc_refcnt (u32 index)
{
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
- ts->refcnt++;
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
+ cnat_timestamp_t *ts = cnat_timestamp_get (index);
+ clib_atomic_add_fetch (&ts->refcnt, 1);
}
always_inline void
cnat_timestamp_update (u32 index, f64 t)
{
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
+ cnat_timestamp_t *ts = cnat_timestamp_get (index);
ts->last_seen = t;
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
}
always_inline void
cnat_timestamp_set_lifetime (u32 index, u16 lifetime)
{
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
+ cnat_timestamp_t *ts = cnat_timestamp_get (index);
ts->lifetime = lifetime;
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
}
always_inline f64
cnat_timestamp_exp (u32 index)
{
f64 t;
- if (INDEX_INVALID == index)
+ cnat_timestamp_t *ts = cnat_timestamp_get_if_valid (index);
+ if (NULL == ts)
return -1;
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
t = ts->last_seen + (f64) ts->lifetime;
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
return t;
}
always_inline void
cnat_timestamp_free (u32 index)
{
- if (INDEX_INVALID == index)
+ cnat_timestamp_t *ts = cnat_timestamp_get_if_valid (index);
+ if (NULL == ts)
return;
- clib_rwlock_writer_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
- ts->refcnt--;
- if (0 == ts->refcnt)
- pool_put (cnat_timestamps, ts);
- clib_rwlock_writer_unlock (&cnat_main.ts_lock);
+ if (0 == clib_atomic_sub_fetch (&ts->refcnt, 1))
+ cnat_timestamp_destroy (index);
}
/*
diff --git a/src/plugins/cnat/cnat_scanner.c b/src/plugins/cnat/cnat_scanner.c
index b3591f7e8b0..2f982711581 100644
--- a/src/plugins/cnat/cnat_scanner.c
+++ b/src/plugins/cnat/cnat_scanner.c
@@ -14,6 +14,7 @@
*/
#include <cnat/cnat_session.h>
+#include <vlibmemory/api.h>
#include <cnat/cnat_client.h>
static uword
diff --git a/src/plugins/cnat/cnat_session.c b/src/plugins/cnat/cnat_session.c
index ea34d913217..0f1cd43f501 100644
--- a/src/plugins/cnat/cnat_session.c
+++ b/src/plugins/cnat/cnat_session.c
@@ -94,7 +94,8 @@ format_cnat_session (u8 * s, va_list * args)
cnat_session_t *sess = va_arg (*args, cnat_session_t *);
CLIB_UNUSED (int verbose) = va_arg (*args, int);
f64 ts = 0;
- if (!pool_is_free_index (cnat_timestamps, sess->value.cs_ts_index))
+
+ if (!cnat_ts_is_free_index (sess->value.cs_ts_index))
ts = cnat_timestamp_exp (sess->value.cs_ts_index);
s = format (
@@ -279,6 +280,12 @@ cnat_session_init (vlib_main_t * vm)
cm->session_hash_memory);
BV (clib_bihash_set_kvp_format_fn) (&cnat_session_db, format_cnat_session);
+ cnat_timestamps.next_empty_pool_idx = 0;
+ clib_bitmap_alloc (cnat_timestamps.ts_free, 1 << CNAT_TS_MPOOL_BITS);
+ clib_bitmap_set_region (cnat_timestamps.ts_free, 0, 1,
+ 1 << CNAT_TS_MPOOL_BITS);
+ clib_spinlock_init (&cnat_timestamps.ts_lock);
+
return (NULL);
}
@@ -289,21 +296,38 @@ cnat_timestamp_show (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
cnat_timestamp_t *ts;
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- pool_foreach (ts, cnat_timestamps)
+ int ts_cnt = 0, cnt;
+ u8 verbose = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- vlib_cli_output (vm, "[%d] last_seen:%f lifetime:%u ref:%u",
- ts - cnat_timestamps, ts->last_seen, ts->lifetime,
- ts->refcnt);
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ return (clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input));
+ }
+
+ for (int i = 0; i < cnat_timestamps.next_empty_pool_idx; i++)
+ {
+ cnt = pool_elts (cnat_timestamps.ts_pools[i]);
+ ts_cnt += cnt;
+ vlib_cli_output (vm, "-- Pool %d [%d/%d]", i, cnt,
+ pool_header (cnat_timestamps.ts_pools[i])->max_elts);
+ if (!verbose)
+ continue;
+ pool_foreach (ts, cnat_timestamps.ts_pools[i])
+ vlib_cli_output (vm, "[%d] last_seen:%f lifetime:%u ref:%u",
+ ts - cnat_timestamps.ts_pools[i], ts->last_seen,
+ ts->lifetime, ts->refcnt);
}
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
+ vlib_cli_output (vm, "Total timestamps %d", ts_cnt);
return (NULL);
}
VLIB_CLI_COMMAND (cnat_timestamp_show_cmd, static) = {
.path = "show cnat timestamp",
.function = cnat_timestamp_show,
- .short_help = "show cnat timestamp",
+ .short_help = "show cnat timestamp [verbose]",
.is_mp_safe = 1,
};
diff --git a/src/plugins/cnat/cnat_types.c b/src/plugins/cnat/cnat_types.c
index b09459d8a14..084a03da968 100644
--- a/src/plugins/cnat/cnat_types.c
+++ b/src/plugins/cnat/cnat_types.c
@@ -16,8 +16,7 @@
#include <cnat/cnat_types.h>
cnat_main_t cnat_main;
-fib_source_t cnat_fib_source;
-cnat_timestamp_t *cnat_timestamps;
+cnat_timestamp_mpool_t cnat_timestamps;
char *cnat_error_strings[] = {
#define cnat_error(n,s) s,
@@ -152,19 +151,6 @@ format_cnat_endpoint (u8 * s, va_list * args)
return (s);
}
-static clib_error_t *
-cnat_types_init (vlib_main_t * vm)
-{
- cnat_fib_source = fib_source_allocate ("cnat",
- CNAT_FIB_SOURCE_PRIORITY,
- FIB_SOURCE_BH_SIMPLE);
-
-
- clib_rwlock_init (&cnat_main.ts_lock);
-
- return (NULL);
-}
-
void
cnat_enable_disable_scanner (cnat_scanner_cmd_t event_type)
{
@@ -258,7 +244,6 @@ cnat_get_main ()
}
VLIB_EARLY_CONFIG_FUNCTION (cnat_config, "cnat");
-VLIB_INIT_FUNCTION (cnat_types_init);
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/cnat/cnat_types.h b/src/plugins/cnat/cnat_types.h
index abae83af59a..d229d21adae 100644
--- a/src/plugins/cnat/cnat_types.h
+++ b/src/plugins/cnat/cnat_types.h
@@ -148,9 +148,6 @@ typedef struct cnat_main_
/* delay in seconds between two scans of session/clients tables */
f64 scanner_timeout;
- /* Lock for the timestamp pool */
- clib_rwlock_t ts_lock;
-
/* Index of the scanner process node */
uword scanner_node_index;
@@ -175,6 +172,23 @@ typedef struct cnat_timestamp_t_
u16 refcnt;
} cnat_timestamp_t;
+/* Create the first pool with 1 << CNAT_TS_BASE_SIZE elts */
+#define CNAT_TS_BASE_SIZE (8)
+/* reserve the top CNAT_TS_MPOOL_BITS bits for finding the pool */
+#define CNAT_TS_MPOOL_BITS (6)
+
+typedef struct cnat_timestamp_mpool_t_
+{
+ /* Increasing fixed size pools of timestamps */
+ cnat_timestamp_t *ts_pools[1 << CNAT_TS_MPOOL_BITS];
+ /* Bitmap of pools with free space */
+ uword *ts_free;
+ /* Index of next pool to init */
+ u8 next_empty_pool_idx;
+ /* ts creation lock */
+ clib_spinlock_t ts_lock;
+} cnat_timestamp_mpool_t;
+
typedef struct cnat_node_ctx_
{
f64 now;
@@ -188,8 +202,7 @@ extern u8 *format_cnat_endpoint (u8 * s, va_list * args);
extern uword unformat_cnat_ep_tuple (unformat_input_t * input,
va_list * args);
extern uword unformat_cnat_ep (unformat_input_t * input, va_list * args);
-extern cnat_timestamp_t *cnat_timestamps;
-extern fib_source_t cnat_fib_source;
+extern cnat_timestamp_mpool_t cnat_timestamps;
extern cnat_main_t cnat_main;
extern char *cnat_error_strings[];