From e211ac4886d0ae51c08f77c76ed37b76f01f7629 Mon Sep 17 00:00:00 2001 From: Benoît Ganne Date: Tue, 21 Feb 2023 16:09:47 +0100 Subject: fib: fix load-balance and replicate dpos buckets overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit load-balance and replicate dpos both store their number of buckets as u16, which can overflow if too many paths are configured. For load-balance it can happens quite quickly because of weights normalization. Type: fix Change-Id: I0c78c39fc3d40626dfc58b49e7d99d71f9852b50 Signed-off-by: Benoît Ganne --- src/plugins/unittest/fib_test.c | 123 ++++++++++++++++++++++++++-------------- src/vnet/dpo/load_balance.c | 28 ++++++--- src/vnet/dpo/load_balance.h | 10 ++++ src/vnet/dpo/replicate_dpo.c | 16 +++++- src/vnet/dpo/replicate_dpo.h | 8 +++ 5 files changed, 132 insertions(+), 53 deletions(-) diff --git a/src/plugins/unittest/fib_test.c b/src/plugins/unittest/fib_test.c index d417d5ff6ea..fbac809d726 100644 --- a/src/plugins/unittest/fib_test.c +++ b/src/plugins/unittest/fib_test.c @@ -780,6 +780,69 @@ fib_test_validate_entry (fib_node_index_t fei, return (res); } +static int +fib_test_multipath_v4 (const test_main_t *tm, const u32 fib_index, + const fib_prefix_t *pfx, const int n_paths, + const int expected_n_buckets) +{ + const int path_list_pool_size = fib_path_list_pool_size(); + const int path_list_db_size = fib_path_list_db_size(); + const int entry_pool_size = fib_entry_pool_size(); + fib_route_path_t *r_paths = NULL; + const load_balance_t *lb; + const dpo_id_t *dpo; + u32 fei; + int res = 0; + int i; + + for (i = 0; i < n_paths; i++) + { + fib_route_path_t r_path = { + .frp_proto = DPO_PROTO_IP4, + .frp_addr = { + .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + i), + }, + .frp_sw_if_index = tm->hw[0]->sw_if_index, + .frp_weight = 1, + .frp_fib_index = ~0, + .frp_flags = FIB_ROUTE_PATH_ATTACHED, + }; + vec_add1(r_paths, r_path); + } + + fib_table_entry_update(fib_index, + pfx, + FIB_SOURCE_API, + FIB_ENTRY_FLAG_NONE, + r_paths); + + fei = fib_table_lookup_exact_match(fib_index, pfx); + FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "prefix present"); + dpo = fib_entry_contribute_ip_forwarding(fei); + + lb = load_balance_get(dpo->dpoi_index); + FIB_TEST((lb->lb_n_buckets == expected_n_buckets), + "prefix lb over %d paths", lb->lb_n_buckets); + + fib_table_entry_delete(fib_index, + pfx, + FIB_SOURCE_API); + FIB_TEST(FIB_NODE_INDEX_INVALID == + fib_table_lookup_exact_match(fib_index, pfx), "prefix removed"); + vec_free(r_paths); + + /* + * add-remove test. no change. + */ + FIB_TEST((path_list_db_size == fib_path_list_db_size()), + "path list DB population:%d", fib_path_list_db_size()); + FIB_TEST((path_list_pool_size == fib_path_list_pool_size()), + "path list pool size is %d", fib_path_list_pool_size()); + FIB_TEST((entry_pool_size == fib_entry_pool_size()), + "entry pool size is %d", fib_entry_pool_size()); + return res; +} + static int fib_test_v4 (void) { @@ -3614,52 +3677,26 @@ fib_test_v4 (void) /* * A route with multiple paths at once */ - fib_route_path_t *r_paths = NULL; + FIB_TEST(0 == + fib_test_multipath_v4(tm, fib_index, &pfx_4_4_4_4_s_32, 4, 4), + "multipath with 4 nexthops"); - for (ii = 0; ii < 4; ii++) - { - fib_route_path_t r_path = { - .frp_proto = DPO_PROTO_IP4, - .frp_addr = { - .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + ii), - }, - .frp_sw_if_index = tm->hw[0]->sw_if_index, - .frp_weight = 1, - .frp_fib_index = ~0, - }; - vec_add1(r_paths, r_path); - } - - fib_table_entry_update(fib_index, - &pfx_4_4_4_4_s_32, - FIB_SOURCE_API, - FIB_ENTRY_FLAG_NONE, - r_paths); - - fei = fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32); - FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.4.4.4/32 present"); - dpo = fib_entry_contribute_ip_forwarding(fei); - - lb = load_balance_get(dpo->dpoi_index); - FIB_TEST((lb->lb_n_buckets == 4), "4.4.4.4/32 lb over %d paths", lb->lb_n_buckets); - - fib_table_entry_delete(fib_index, - &pfx_4_4_4_4_s_32, - FIB_SOURCE_API); - FIB_TEST(FIB_NODE_INDEX_INVALID == - fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32), - "4.4.4.4/32 removed"); - vec_free(r_paths); + /* + * A route with lots of multiple paths that will overflow max supported + * lb buckets because of normalization + */ + FIB_TEST(0 == + fib_test_multipath_v4(tm, fib_index, &pfx_4_4_4_4_s_32, + LB_MAX_BUCKETS / 2 + 23, LB_MAX_BUCKETS), + "multipath with too many nexthops"); /* - * add-remove test. no change. + * A route with more paths than max supported lb buckets */ - FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d", - fib_path_list_db_size()); - FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d", - fib_path_list_pool_size()); - FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d", - fib_entry_pool_size()); + FIB_TEST(0 == + fib_test_multipath_v4 (tm, fib_index, &pfx_4_4_4_4_s_32, + LB_MAX_BUCKETS + 13, LB_MAX_BUCKETS), + "multipath with too many nexthops"); /* * A route deag route @@ -3698,7 +3735,6 @@ fib_test_v4 (void) FIB_TEST(FIB_NODE_INDEX_INVALID == fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32), "4.4.4.4/32 removed"); - vec_free(r_paths); /* * A route deag route in a source lookup table @@ -3737,7 +3773,6 @@ fib_test_v4 (void) FIB_TEST(FIB_NODE_INDEX_INVALID == fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32), "4.4.4.4/32 removed"); - vec_free(r_paths); /* * add-remove test. no change. diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c index ff46d56e3e2..fae7c1db94a 100644 --- a/src/vnet/dpo/load_balance.c +++ b/src/vnet/dpo/load_balance.c @@ -244,6 +244,8 @@ load_balance_create_i (u32 num_buckets, { load_balance_t *lb; + ASSERT (num_buckets <= LB_MAX_BUCKETS); + lb = load_balance_alloc_i(); lb->lb_hash_config = fhc; lb->lb_n_buckets = num_buckets; @@ -455,8 +457,9 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops, /* Try larger and larger power of 2 sized adjacency blocks until we find one where traffic flows to within 1% of specified weights. */ - for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2) + for (n_adj = clib_min(max_pow2 (n_nhs), LB_MAX_BUCKETS); ; n_adj *= 2) { + ASSERT (n_adj <= LB_MAX_BUCKETS); error = 0; norm = n_adj / ((f64) sum_weight); @@ -487,12 +490,22 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops, nhs[0].path_weight += n_adj_left; - /* Less than 5% average error per adjacency with this size adjacency block? */ - if (error <= multipath_next_hop_error_tolerance*n_adj) + /* Less than 1% average error per adjacency with this size adjacency block, + * or did we reached the maximum number of buckets we support? */ + if (error <= multipath_next_hop_error_tolerance*n_adj || + n_adj >= LB_MAX_BUCKETS) { - /* Truncate any next hops with zero weight. */ - vec_set_len (nhs, i); - break; + if (i < n_nhs) + { + /* Truncate any next hops in excess */ + vlib_log_err(load_balance_logger, + "Too many paths for load-balance, truncating %d -> %d", + n_nhs, i); + for (int j = i; j < n_nhs; j++) + dpo_reset (&vec_elt(nhs, j).path_dpo); + } + vec_set_len (nhs, i); + break; } } @@ -622,6 +635,7 @@ static inline void load_balance_set_n_buckets (load_balance_t *lb, u32 n_buckets) { + ASSERT (n_buckets <= LB_MAX_BUCKETS); lb->lb_n_buckets = n_buckets; lb->lb_n_buckets_minus_1 = n_buckets-1; } @@ -651,8 +665,6 @@ load_balance_multipath_update (const dpo_id_t *dpo, &sum_of_weights, multipath_next_hop_error_tolerance); - ASSERT (n_buckets >= vec_len (raw_nhs)); - /* * Save the old load-balance map used, and get a new one if required. */ diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h index 5428e20e981..3605d82a207 100644 --- a/src/vnet/dpo/load_balance.h +++ b/src/vnet/dpo/load_balance.h @@ -49,6 +49,12 @@ typedef struct load_balance_main_t_ extern load_balance_main_t load_balance_main; +/** + * The maximum number of buckets that a load-balance object can have + * This must not overflow the lb_n_buckets field + */ +#define LB_MAX_BUCKETS 8192 + /** * The number of buckets that a load-balance object can have and still * fit in one cache-line @@ -176,6 +182,10 @@ typedef struct load_balance_t_ { STATIC_ASSERT(sizeof(load_balance_t) <= CLIB_CACHE_LINE_BYTES, "A load_balance object size exceeds one cacheline"); +STATIC_ASSERT (LB_MAX_BUCKETS <= CLIB_U16_MAX, + "Too many buckets for load_balance object"); +STATIC_ASSERT (LB_MAX_BUCKETS && !(LB_MAX_BUCKETS & (LB_MAX_BUCKETS - 1)), + "LB_MAX_BUCKETS must be a power of 2"); /** * Flags controlling load-balance formatting/display diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c index 5f88f12b910..0474fd82984 100644 --- a/src/vnet/dpo/replicate_dpo.c +++ b/src/vnet/dpo/replicate_dpo.c @@ -172,6 +172,8 @@ replicate_create_i (u32 num_buckets, { replicate_t *rep; + ASSERT (num_buckets <= REP_MAX_BUCKETS); + rep = replicate_alloc_i(); rep->rep_n_buckets = num_buckets; rep->rep_proto = rep_proto; @@ -311,7 +313,8 @@ static inline void replicate_set_n_buckets (replicate_t *rep, u32 n_buckets) { - rep->rep_n_buckets = n_buckets; + ASSERT (n_buckets <= REP_MAX_BUCKETS); + rep->rep_n_buckets = n_buckets; } void @@ -331,6 +334,17 @@ replicate_multipath_update (const dpo_id_t *dpo, rep->rep_proto); n_buckets = vec_len(nhs); + if (n_buckets > REP_MAX_BUCKETS) + { + vlib_log_err (replicate_logger, + "Too many paths for replicate, truncating %d -> %d", + n_buckets, REP_MAX_BUCKETS); + for (int i = REP_MAX_BUCKETS; i < n_buckets; i++) + dpo_reset (&vec_elt (nhs, i).path_dpo); + vec_set_len (nhs, REP_MAX_BUCKETS); + n_buckets = REP_MAX_BUCKETS; + } + if (0 == rep->rep_n_buckets) { /* diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h index 908c20c1d56..d21f52a4833 100644 --- a/src/vnet/dpo/replicate_dpo.h +++ b/src/vnet/dpo/replicate_dpo.h @@ -40,6 +40,12 @@ typedef struct replicate_main_t_ extern replicate_main_t replicate_main; +/** + * The number of buckets that a replicate object can have + * This must not overflow the rep_n_buckets field + */ +#define REP_MAX_BUCKETS 1024 + /** * The number of buckets that a load-balance object can have and still * fit in one cache-line @@ -108,6 +114,8 @@ typedef struct replicate_t_ { STATIC_ASSERT(sizeof(replicate_t) <= CLIB_CACHE_LINE_BYTES, "A replicate object size exceeds one cacheline"); +STATIC_ASSERT (REP_MAX_BUCKETS <= CLIB_U16_MAX, + "Too many buckets for replicate object"); /** * Flags controlling load-balance formatting/display -- cgit 1.2.3-korg