From 6d733a93b2eb9c16196ee17d5cdc77db21589571 Mon Sep 17 00:00:00 2001 From: Nathan Skrzypczak Date: Wed, 4 Nov 2020 11:41:05 +0100 Subject: cnat: remove rwlock on ts Type: improvement Remove rwlock contention on timestamps. ~10% pps with 10k sessions. Use fixed-size-pools of increasing sizes starting with 4K, and with a x2 step each time. We don't free/shrink allocated pools. Change-Id: I5fea51faba40430106c823275a6356e81709d118 Signed-off-by: Nathan Skrzypczak --- src/plugins/cnat/cnat_client.c | 7 +-- src/plugins/cnat/cnat_client.h | 2 - src/plugins/cnat/cnat_inline.h | 104 +++++++++++++++++++++++++++++----------- src/plugins/cnat/cnat_scanner.c | 1 + src/plugins/cnat/cnat_session.c | 40 ++++++++++++---- src/plugins/cnat/cnat_types.c | 17 +------ src/plugins/cnat/cnat_types.h | 23 +++++++-- 7 files changed, 133 insertions(+), 61 deletions(-) diff --git a/src/plugins/cnat/cnat_client.c b/src/plugins/cnat/cnat_client.c index 73835b093f0..5f0590efbf8 100644 --- a/src/plugins/cnat/cnat_client.c +++ b/src/plugins/cnat/cnat_client.c @@ -20,10 +20,9 @@ #include cnat_client_t *cnat_client_pool; - cnat_client_db_t cnat_client_db; - dpo_type_t cnat_client_dpo; +fib_source_t cnat_fib_source; static_always_inline u8 cnat_client_is_clone (cnat_client_t * cc) @@ -302,7 +301,6 @@ cnat_client_show (vlib_main_t * vm, vlib_cli_output(vm, "%U", format_cnat_client, cci, 0); vlib_cli_output (vm, "%d clients", pool_elts (cnat_client_pool)); - vlib_cli_output (vm, "%d timestamps", pool_elts (cnat_timestamps)); } else { @@ -389,6 +387,9 @@ cnat_client_init (vlib_main_t * vm) clib_bihash_init_16_8 (&cnat_client_db.cc_ip_id_hash, "CNat client DB", cm->client_hash_buckets, cm->client_hash_memory); + cnat_fib_source = fib_source_allocate ("cnat", CNAT_FIB_SOURCE_PRIORITY, + FIB_SOURCE_BH_SIMPLE); + clib_spinlock_init (&cnat_client_db.throttle_lock); cnat_client_db.throttle_mem = hash_create_mem (0, sizeof (ip_address_t), sizeof (uword)); diff --git a/src/plugins/cnat/cnat_client.h b/src/plugins/cnat/cnat_client.h index db6933c3b95..4dc6b754b2f 100644 --- a/src/plugins/cnat/cnat_client.h +++ b/src/plugins/cnat/cnat_client.h @@ -86,8 +86,6 @@ extern void cnat_client_free_by_ip (ip46_address_t * addr, u8 af); extern cnat_client_t *cnat_client_pool; extern dpo_type_t cnat_client_dpo; -#define CC_INDEX_INVALID ((u32)(~0)) - static_always_inline cnat_client_t * cnat_client_get (index_t i) { diff --git a/src/plugins/cnat/cnat_inline.h b/src/plugins/cnat/cnat_inline.h index 5a55ecbf3c0..2986b3497a9 100644 --- a/src/plugins/cnat/cnat_inline.h +++ b/src/plugins/cnat/cnat_inline.h @@ -19,72 +19,122 @@ #include +always_inline int +cnat_ts_is_free_index (u32 index) +{ + u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS); + index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS); + return pool_is_free_index (cnat_timestamps.ts_pools[pidx], index); +} + +always_inline cnat_timestamp_t * +cnat_timestamp_get (u32 index) +{ + /* 6 top bits for choosing pool */ + u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS); + index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS); + return pool_elt_at_index (cnat_timestamps.ts_pools[pidx], index); +} + +always_inline cnat_timestamp_t * +cnat_timestamp_get_if_valid (u32 index) +{ + /* 6 top bits for choosing pool */ + u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS); + index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS); + if (pidx >= cnat_timestamps.next_empty_pool_idx) + return (NULL); + if (pool_is_free_index (cnat_timestamps.ts_pools[pidx], index)) + return (NULL); + return pool_elt_at_index (cnat_timestamps.ts_pools[pidx], index); +} + +always_inline index_t +cnat_timestamp_alloc () +{ + cnat_timestamp_t *ts; + u32 index, pool_sz; + uword pidx; + + clib_spinlock_lock (&cnat_timestamps.ts_lock); + pidx = clib_bitmap_first_set (cnat_timestamps.ts_free); + pool_sz = 1 << (CNAT_TS_BASE_SIZE + pidx); + ASSERT (pidx <= cnat_timestamps.next_empty_pool_idx); + if (pidx == cnat_timestamps.next_empty_pool_idx) + pool_init_fixed ( + cnat_timestamps.ts_pools[cnat_timestamps.next_empty_pool_idx++], + pool_sz); + pool_get (cnat_timestamps.ts_pools[pidx], ts); + if (pool_elts (cnat_timestamps.ts_pools[pidx]) == pool_sz) + clib_bitmap_set (cnat_timestamps.ts_free, pidx, 0); + clib_spinlock_unlock (&cnat_timestamps.ts_lock); + + index = (u32) pidx << (32 - CNAT_TS_MPOOL_BITS); + return index | (ts - cnat_timestamps.ts_pools[pidx]); +} + +always_inline void +cnat_timestamp_destroy (u32 index) +{ + u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS); + index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS); + clib_spinlock_lock (&cnat_timestamps.ts_lock); + pool_put_index (cnat_timestamps.ts_pools[pidx], index); + clib_bitmap_set (cnat_timestamps.ts_free, pidx, 1); + clib_spinlock_unlock (&cnat_timestamps.ts_lock); +} + always_inline u32 cnat_timestamp_new (f64 t) { - u32 index; - cnat_timestamp_t *ts; - clib_rwlock_writer_lock (&cnat_main.ts_lock); - pool_get (cnat_timestamps, ts); + index_t index = cnat_timestamp_alloc (); + cnat_timestamp_t *ts = cnat_timestamp_get (index); ts->last_seen = t; ts->lifetime = cnat_main.session_max_age; ts->refcnt = CNAT_TIMESTAMP_INIT_REFCNT; - index = ts - cnat_timestamps; - clib_rwlock_writer_unlock (&cnat_main.ts_lock); return index; } always_inline void cnat_timestamp_inc_refcnt (u32 index) { - clib_rwlock_reader_lock (&cnat_main.ts_lock); - cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index); - ts->refcnt++; - clib_rwlock_reader_unlock (&cnat_main.ts_lock); + cnat_timestamp_t *ts = cnat_timestamp_get (index); + clib_atomic_add_fetch (&ts->refcnt, 1); } always_inline void cnat_timestamp_update (u32 index, f64 t) { - clib_rwlock_reader_lock (&cnat_main.ts_lock); - cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index); + cnat_timestamp_t *ts = cnat_timestamp_get (index); ts->last_seen = t; - clib_rwlock_reader_unlock (&cnat_main.ts_lock); } always_inline void cnat_timestamp_set_lifetime (u32 index, u16 lifetime) { - clib_rwlock_reader_lock (&cnat_main.ts_lock); - cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index); + cnat_timestamp_t *ts = cnat_timestamp_get (index); ts->lifetime = lifetime; - clib_rwlock_reader_unlock (&cnat_main.ts_lock); } always_inline f64 cnat_timestamp_exp (u32 index) { f64 t; - if (INDEX_INVALID == index) + cnat_timestamp_t *ts = cnat_timestamp_get_if_valid (index); + if (NULL == ts) return -1; - clib_rwlock_reader_lock (&cnat_main.ts_lock); - cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index); t = ts->last_seen + (f64) ts->lifetime; - clib_rwlock_reader_unlock (&cnat_main.ts_lock); return t; } always_inline void cnat_timestamp_free (u32 index) { - if (INDEX_INVALID == index) + cnat_timestamp_t *ts = cnat_timestamp_get_if_valid (index); + if (NULL == ts) return; - clib_rwlock_writer_lock (&cnat_main.ts_lock); - cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index); - ts->refcnt--; - if (0 == ts->refcnt) - pool_put (cnat_timestamps, ts); - clib_rwlock_writer_unlock (&cnat_main.ts_lock); + if (0 == clib_atomic_sub_fetch (&ts->refcnt, 1)) + cnat_timestamp_destroy (index); } /* diff --git a/src/plugins/cnat/cnat_scanner.c b/src/plugins/cnat/cnat_scanner.c index b3591f7e8b0..2f982711581 100644 --- a/src/plugins/cnat/cnat_scanner.c +++ b/src/plugins/cnat/cnat_scanner.c @@ -14,6 +14,7 @@ */ #include +#include #include static uword diff --git a/src/plugins/cnat/cnat_session.c b/src/plugins/cnat/cnat_session.c index ea34d913217..0f1cd43f501 100644 --- a/src/plugins/cnat/cnat_session.c +++ b/src/plugins/cnat/cnat_session.c @@ -94,7 +94,8 @@ format_cnat_session (u8 * s, va_list * args) cnat_session_t *sess = va_arg (*args, cnat_session_t *); CLIB_UNUSED (int verbose) = va_arg (*args, int); f64 ts = 0; - if (!pool_is_free_index (cnat_timestamps, sess->value.cs_ts_index)) + + if (!cnat_ts_is_free_index (sess->value.cs_ts_index)) ts = cnat_timestamp_exp (sess->value.cs_ts_index); s = format ( @@ -279,6 +280,12 @@ cnat_session_init (vlib_main_t * vm) cm->session_hash_memory); BV (clib_bihash_set_kvp_format_fn) (&cnat_session_db, format_cnat_session); + cnat_timestamps.next_empty_pool_idx = 0; + clib_bitmap_alloc (cnat_timestamps.ts_free, 1 << CNAT_TS_MPOOL_BITS); + clib_bitmap_set_region (cnat_timestamps.ts_free, 0, 1, + 1 << CNAT_TS_MPOOL_BITS); + clib_spinlock_init (&cnat_timestamps.ts_lock); + return (NULL); } @@ -289,21 +296,38 @@ cnat_timestamp_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { cnat_timestamp_t *ts; - clib_rwlock_reader_lock (&cnat_main.ts_lock); - pool_foreach (ts, cnat_timestamps) + int ts_cnt = 0, cnt; + u8 verbose = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { - vlib_cli_output (vm, "[%d] last_seen:%f lifetime:%u ref:%u", - ts - cnat_timestamps, ts->last_seen, ts->lifetime, - ts->refcnt); + if (unformat (input, "verbose")) + verbose = 1; + else + return (clib_error_return (0, "unknown input '%U'", + format_unformat_error, input)); + } + + for (int i = 0; i < cnat_timestamps.next_empty_pool_idx; i++) + { + cnt = pool_elts (cnat_timestamps.ts_pools[i]); + ts_cnt += cnt; + vlib_cli_output (vm, "-- Pool %d [%d/%d]", i, cnt, + pool_header (cnat_timestamps.ts_pools[i])->max_elts); + if (!verbose) + continue; + pool_foreach (ts, cnat_timestamps.ts_pools[i]) + vlib_cli_output (vm, "[%d] last_seen:%f lifetime:%u ref:%u", + ts - cnat_timestamps.ts_pools[i], ts->last_seen, + ts->lifetime, ts->refcnt); } - clib_rwlock_reader_unlock (&cnat_main.ts_lock); + vlib_cli_output (vm, "Total timestamps %d", ts_cnt); return (NULL); } VLIB_CLI_COMMAND (cnat_timestamp_show_cmd, static) = { .path = "show cnat timestamp", .function = cnat_timestamp_show, - .short_help = "show cnat timestamp", + .short_help = "show cnat timestamp [verbose]", .is_mp_safe = 1, }; diff --git a/src/plugins/cnat/cnat_types.c b/src/plugins/cnat/cnat_types.c index b09459d8a14..084a03da968 100644 --- a/src/plugins/cnat/cnat_types.c +++ b/src/plugins/cnat/cnat_types.c @@ -16,8 +16,7 @@ #include cnat_main_t cnat_main; -fib_source_t cnat_fib_source; -cnat_timestamp_t *cnat_timestamps; +cnat_timestamp_mpool_t cnat_timestamps; char *cnat_error_strings[] = { #define cnat_error(n,s) s, @@ -152,19 +151,6 @@ format_cnat_endpoint (u8 * s, va_list * args) return (s); } -static clib_error_t * -cnat_types_init (vlib_main_t * vm) -{ - cnat_fib_source = fib_source_allocate ("cnat", - CNAT_FIB_SOURCE_PRIORITY, - FIB_SOURCE_BH_SIMPLE); - - - clib_rwlock_init (&cnat_main.ts_lock); - - return (NULL); -} - void cnat_enable_disable_scanner (cnat_scanner_cmd_t event_type) { @@ -258,7 +244,6 @@ cnat_get_main () } VLIB_EARLY_CONFIG_FUNCTION (cnat_config, "cnat"); -VLIB_INIT_FUNCTION (cnat_types_init); /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/cnat/cnat_types.h b/src/plugins/cnat/cnat_types.h index abae83af59a..d229d21adae 100644 --- a/src/plugins/cnat/cnat_types.h +++ b/src/plugins/cnat/cnat_types.h @@ -148,9 +148,6 @@ typedef struct cnat_main_ /* delay in seconds between two scans of session/clients tables */ f64 scanner_timeout; - /* Lock for the timestamp pool */ - clib_rwlock_t ts_lock; - /* Index of the scanner process node */ uword scanner_node_index; @@ -175,6 +172,23 @@ typedef struct cnat_timestamp_t_ u16 refcnt; } cnat_timestamp_t; +/* Create the first pool with 1 << CNAT_TS_BASE_SIZE elts */ +#define CNAT_TS_BASE_SIZE (8) +/* reserve the top CNAT_TS_MPOOL_BITS bits for finding the pool */ +#define CNAT_TS_MPOOL_BITS (6) + +typedef struct cnat_timestamp_mpool_t_ +{ + /* Increasing fixed size pools of timestamps */ + cnat_timestamp_t *ts_pools[1 << CNAT_TS_MPOOL_BITS]; + /* Bitmap of pools with free space */ + uword *ts_free; + /* Index of next pool to init */ + u8 next_empty_pool_idx; + /* ts creation lock */ + clib_spinlock_t ts_lock; +} cnat_timestamp_mpool_t; + typedef struct cnat_node_ctx_ { f64 now; @@ -188,8 +202,7 @@ extern u8 *format_cnat_endpoint (u8 * s, va_list * args); extern uword unformat_cnat_ep_tuple (unformat_input_t * input, va_list * args); extern uword unformat_cnat_ep (unformat_input_t * input, va_list * args); -extern cnat_timestamp_t *cnat_timestamps; -extern fib_source_t cnat_fib_source; +extern cnat_timestamp_mpool_t cnat_timestamps; extern cnat_main_t cnat_main; extern char *cnat_error_strings[]; -- cgit 1.2.3-korg