From 8e8f98c9d46788438bd176f7c2bfde0a5837cad9 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Fri, 3 Feb 2017 11:58:53 -0500 Subject: Wheel-timer infra Change-Id: I5499dd6b768425a56936afae50bd578620c83d30 Signed-off-by: Dave Barach --- src/vppinfra/tw_timer_template.c | 341 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 341 insertions(+) create mode 100644 src/vppinfra/tw_timer_template.c (limited to 'src/vppinfra/tw_timer_template.c') diff --git a/src/vppinfra/tw_timer_template.c b/src/vppinfra/tw_timer_template.c new file mode 100644 index 00000000..9aa5624f --- /dev/null +++ b/src/vppinfra/tw_timer_template.c @@ -0,0 +1,341 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** @file + * @brief TW timer implementation TEMPLATE ONLY, do not compile directly + * + * + */ + +static inline u32 +TW (make_internal_timer_handle) (u32 pool_index, u32 timer_id) +{ + u32 handle; + + ASSERT (timer_id < TW_TIMERS_PER_OBJECT); + ASSERT (pool_index < (1 << (32 - LOG2_TW_TIMERS_PER_OBJECT))); + + handle = (timer_id << (32 - LOG2_TW_TIMERS_PER_OBJECT)) | (pool_index); + return handle; +} + +static inline void +timer_addhead (TWT (tw_timer) * pool, u32 head_index, u32 new_index) +{ + TWT (tw_timer) * head = pool_elt_at_index (pool, head_index); + TWT (tw_timer) * old_first; + u32 old_first_index; + TWT (tw_timer) * new; + + new = pool_elt_at_index (pool, new_index); + + if (PREDICT_FALSE (head->next == head_index)) + { + head->next = head->prev = new_index; + new->next = new->prev = head_index; + return; + } + + old_first_index = head->next; + old_first = pool_elt_at_index (pool, old_first_index); + + new->next = old_first_index; + new->prev = old_first->prev; + old_first->prev = new_index; + head->next = new_index; +} + +static inline void +timer_remove (TWT (tw_timer) * pool, u32 index) +{ + TWT (tw_timer) * elt = pool_elt_at_index (pool, index); + TWT (tw_timer) * next_elt, *prev_elt; + + ASSERT (elt->user_handle != ~0); + + next_elt = pool_elt_at_index (pool, elt->next); + prev_elt = pool_elt_at_index (pool, elt->prev); + + next_elt->prev = elt->prev; + prev_elt->next = elt->next; + + elt->prev = elt->next = ~0; +} + +/** + * @brief Start a Tw Timer + * @param tw_timer_wheel_t * tw timer wheel object pointer + * @param u32 pool_index user pool index, presumably for a tw session + * @param u32 timer_id app-specific timer ID. 4 bits. + * @param u32 interval timer interval in ticks + * @returns handle needed to cancel the timer + */ +u32 +TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, u32 pool_index, u32 timer_id, + u32 interval) +{ +#if TW_TIMER_WHEELS > 1 + u16 slow_ring_offset; + u32 carry; +#endif + u16 fast_ring_offset; + tw_timer_wheel_slot_t *ts; + TWT (tw_timer) * t; + + ASSERT (interval); + + pool_get (tw->timers, t); + t->next = t->prev = ~0; +#if TW_TIMER_WHEELS > 1 + t->fast_ring_offset = ~0; +#endif + t->user_handle = TW (make_internal_timer_handle) (pool_index, timer_id); + + fast_ring_offset = interval & TW_RING_MASK; + fast_ring_offset += tw->current_index[TW_TIMER_RING_FAST]; +#if TW_TIMER_WHEELS > 1 + carry = fast_ring_offset >= TW_SLOTS_PER_RING ? 1 : 0; + fast_ring_offset %= TW_SLOTS_PER_RING; + slow_ring_offset = (interval >> TW_RING_SHIFT) + carry; + + /* Timer duration exceeds ~7 hrs? Oops */ + ASSERT (slow_ring_offset < TW_SLOTS_PER_RING); + + /* Timer expires more than 51.2 seconds from now? */ + if (slow_ring_offset) + { + slow_ring_offset += tw->current_index[TW_TIMER_RING_SLOW]; + slow_ring_offset %= TW_SLOTS_PER_RING; + + /* We'll want the fast ring offset later... */ + t->fast_ring_offset = fast_ring_offset; + ASSERT (t->fast_ring_offset < TW_SLOTS_PER_RING); + + ts = &tw->w[TW_TIMER_RING_SLOW][slow_ring_offset]; + + timer_addhead (tw->timers, ts->head_index, t - tw->timers); + + return t - tw->timers; + } +#else + fast_ring_offset %= TW_SLOTS_PER_RING; + ASSERT (interval < TW_SLOTS_PER_RING); +#endif + + /* Timer expires less than one fast-ring revolution from now */ + ts = &tw->w[TW_TIMER_RING_FAST][fast_ring_offset]; + + timer_addhead (tw->timers, ts->head_index, t - tw->timers); + return t - tw->timers; +} + +/** + * @brief Stop a tw timer + * @param tw_timer_wheel_t * tw timer wheel object pointer + * @param u32 pool_index user pool index, passed for consistency checking only + * @param u32 timer_id 4 bit timer ID, passed for consistency checking only + * @param u32 handle timer cancellation returned by tw_timer_start + */ + +void TW (tw_timer_stop) (TWT (tw_timer_wheel) * tw, u32 handle) +{ + TWT (tw_timer) * t; + + t = pool_elt_at_index (tw->timers, handle); + + /* in case of idiotic handle (e.g. passing a listhead index) */ + ASSERT (t->user_handle != ~0); + + timer_remove (tw->timers, handle); + + pool_put_index (tw->timers, handle); +} + +/** + * @brief Initialize a tw timer wheel template instance + * @param tw_timer_wheel_t * tw timer wheel object pointer + * @param void * expired_timer_callback. Passed a u32 * vector of + * expired timer handles. + * @param f64 timer_interval_in_seconds + */ +void +TW (tw_timer_wheel_init) (TWT (tw_timer_wheel) * tw, + void *expired_timer_callback, + f64 timer_interval_in_seconds) +{ + int ring, slot; + tw_timer_wheel_slot_t *ts; + TWT (tw_timer) * t; + memset (tw, 0, sizeof (*tw)); + tw->expired_timer_callback = expired_timer_callback; + if (timer_interval_in_seconds == 0.0) + { + clib_warning ("timer interval is zero"); + abort (); + } + tw->timer_interval = timer_interval_in_seconds; + tw->ticks_per_second = 1.0 / timer_interval_in_seconds; + + for (ring = 0; ring < TW_TIMER_WHEELS; ring++) + { + for (slot = 0; slot < TW_SLOTS_PER_RING; slot++) + { + ts = &tw->w[ring][slot]; + pool_get (tw->timers, t); + memset (t, 0xff, sizeof (*t)); + t->next = t->prev = t - tw->timers; + ts->head_index = t - tw->timers; + } + } +} + +/** + * @brief Free a tw timer wheel template instance + * @param tw_timer_wheel_t * tw timer wheel object pointer + */ +void TW (tw_timer_wheel_free) (TWT (tw_timer_wheel) * tw) +{ + int i, j; + tw_timer_wheel_slot_t *ts; + TWT (tw_timer) * head, *t; + u32 next_index; + + for (i = 0; i < TW_TIMER_WHEELS; i++) + { + for (j = 0; j < TW_SLOTS_PER_RING; j++) + { + ts = &tw->w[i][j]; + head = pool_elt_at_index (tw->timers, ts->head_index); + next_index = head->next; + + while (next_index != ts->head_index) + { + t = pool_elt_at_index (tw->timers, next_index); + next_index = t->next; + pool_put (tw->timers, t); + } + pool_put (tw->timers, head); + } + } + memset (tw, 0, sizeof (*tw)); +} + +/** + * @brief Advance a tw timer wheel. Calls the expired timer callback + * as needed. This routine should be called once every timer_interval seconds + * @param tw_timer_wheel_t * tw timer wheel template instance pointer + * @param f64 now the current time, e.g. from vlib_time_now(vm) + */ +void TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now) +{ + u32 nticks, i; + tw_timer_wheel_slot_t *ts; + TWT (tw_timer) * t, *head; + u32 fast_wheel_index; + u32 next_index; +#if TW_TIMER_WHEELS > 1 + u32 slow_wheel_index; +#endif + + /* Shouldn't happen */ + if (PREDICT_FALSE (now < tw->next_run_time)) + return; + + /* Number of ticks which have occurred */ + nticks = tw->ticks_per_second * (now - tw->last_run_time); + if (nticks == 0) + return; + + /* Remember when we ran, compute next runtime */ + tw->next_run_time = (now + tw->timer_interval); + tw->last_run_time = now; + + for (i = 0; i < nticks; i++) + { + fast_wheel_index = tw->current_index[TW_TIMER_RING_FAST]; + + /* + * If we've been around the fast ring once, + * process one slot in the slow ring before we handle + * the fast ring. + */ + if (PREDICT_FALSE (fast_wheel_index == TW_SLOTS_PER_RING)) + { + fast_wheel_index = tw->current_index[TW_TIMER_RING_FAST] = 0; + +#if TW_TIMER_WHEELS > 1 + tw->current_index[TW_TIMER_RING_SLOW]++; + tw->current_index[TW_TIMER_RING_SLOW] %= TW_SLOTS_PER_RING; + slow_wheel_index = tw->current_index[TW_TIMER_RING_SLOW]; + + ts = &tw->w[TW_TIMER_RING_SLOW][slow_wheel_index]; + + head = pool_elt_at_index (tw->timers, ts->head_index); + next_index = head->next; + + /* Make slot empty */ + head->next = head->prev = ts->head_index; + + /* traverse slot, deal timers into fast ring */ + while (next_index != head - tw->timers) + { + t = pool_elt_at_index (tw->timers, next_index); + next_index = t->next; + + /* Remove from slow ring slot (hammer) */ + t->next = t->prev = ~0; + ASSERT (t->fast_ring_offset < TW_SLOTS_PER_RING); + /* Add to fast ring */ + ts = &tw->w[TW_TIMER_RING_FAST][t->fast_ring_offset]; + timer_addhead (tw->timers, ts->head_index, t - tw->timers); + } +#endif + } + + /* Handle the fast ring */ + vec_reset_length (tw->expired_timer_handles); + + ts = &tw->w[TW_TIMER_RING_FAST][fast_wheel_index]; + + head = pool_elt_at_index (tw->timers, ts->head_index); + next_index = head->next; + + /* Make slot empty */ + head->next = head->prev = ts->head_index; + + /* Construct vector of expired timer handles to give the user */ + while (next_index != ts->head_index) + { + t = pool_elt_at_index (tw->timers, next_index); + next_index = t->next; + vec_add1 (tw->expired_timer_handles, t->user_handle); + pool_put (tw->timers, t); + } + + /* If any timers expired, tell the user */ + if (vec_len (tw->expired_timer_handles)) + tw->expired_timer_callback (tw->expired_timer_handles); + tw->current_index[TW_TIMER_RING_FAST]++; + tw->current_tick++; + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg From 581b072bab3af281b0475168cce8f5c4c4666f49 Mon Sep 17 00:00:00 2001 From: Gabriel Ganne Date: Mon, 13 Feb 2017 10:27:15 +0100 Subject: tw_timer_expire_timers() return the number of expirations to be used for node statistics Also fix tw_timer_stop() description Change-Id: I84b529e330c4534fd55487e7e2b8b089ee68ca11 Signed-off-by: Gabriel Ganne --- src/vppinfra/tw_timer_template.c | 21 +++++++++++++-------- src/vppinfra/tw_timer_template.h | 2 +- 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'src/vppinfra/tw_timer_template.c') diff --git a/src/vppinfra/tw_timer_template.c b/src/vppinfra/tw_timer_template.c index 9aa5624f..139d27ca 100644 --- a/src/vppinfra/tw_timer_template.c +++ b/src/vppinfra/tw_timer_template.c @@ -144,11 +144,8 @@ TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, u32 pool_index, u32 timer_id, /** * @brief Stop a tw timer * @param tw_timer_wheel_t * tw timer wheel object pointer - * @param u32 pool_index user pool index, passed for consistency checking only - * @param u32 timer_id 4 bit timer ID, passed for consistency checking only * @param u32 handle timer cancellation returned by tw_timer_start */ - void TW (tw_timer_stop) (TWT (tw_timer_wheel) * tw, u32 handle) { TWT (tw_timer) * t; @@ -238,30 +235,32 @@ void TW (tw_timer_wheel_free) (TWT (tw_timer_wheel) * tw) * @param tw_timer_wheel_t * tw timer wheel template instance pointer * @param f64 now the current time, e.g. from vlib_time_now(vm) */ -void TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now) +u32 TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now) { u32 nticks, i; tw_timer_wheel_slot_t *ts; TWT (tw_timer) * t, *head; u32 fast_wheel_index; u32 next_index; + u32 nexpirations, total_nexpirations; #if TW_TIMER_WHEELS > 1 u32 slow_wheel_index; #endif /* Shouldn't happen */ if (PREDICT_FALSE (now < tw->next_run_time)) - return; + return 0; /* Number of ticks which have occurred */ nticks = tw->ticks_per_second * (now - tw->last_run_time); if (nticks == 0) - return; + return 0; /* Remember when we ran, compute next runtime */ tw->next_run_time = (now + tw->timer_interval); tw->last_run_time = now; + total_nexpirations = 0; for (i = 0; i < nticks; i++) { fast_wheel_index = tw->current_index[TW_TIMER_RING_FAST]; @@ -325,11 +324,17 @@ void TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now) } /* If any timers expired, tell the user */ - if (vec_len (tw->expired_timer_handles)) - tw->expired_timer_callback (tw->expired_timer_handles); + nexpirations = vec_len (tw->expired_timer_handles); + if (nexpirations) + { + tw->expired_timer_callback (tw->expired_timer_handles); + total_nexpirations += nexpirations; + } tw->current_index[TW_TIMER_RING_FAST]++; tw->current_tick++; } + + return total_nexpirations; } /* diff --git a/src/vppinfra/tw_timer_template.h b/src/vppinfra/tw_timer_template.h index cf15ab8a..2e41bcac 100644 --- a/src/vppinfra/tw_timer_template.h +++ b/src/vppinfra/tw_timer_template.h @@ -188,7 +188,7 @@ void TW (tw_timer_wheel_init) (TWT (tw_timer_wheel) * tw, void TW (tw_timer_wheel_free) (TWT (tw_timer_wheel) * tw); -void TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now); +u32 TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now); /* * fd.io coding-style-patch-verification: ON -- cgit 1.2.3-korg From 83ed1f4f1bc3f09de0e7ca1e163bf1d7dd4b8be2 Mon Sep 17 00:00:00 2001 From: Gabriel Ganne Date: Wed, 15 Feb 2017 16:55:30 +0100 Subject: tw_timer_expire_timers() - add a maximum to the number of expiration per call The idea is to prevent a huge processing burst if, say, the network goes down 10' for some reason, and so that we don't need to expire 1M timer sessions on the first call. The maximum is not an exact value, but a value after which the expiration process is postponed until the next call. That way, we don't have to process the same tick twice, nor to unlink timers once at a time when processing a tick. The fact that a timer slot could contain many entries should be dealt with by changing the number of ticks per second. Change-Id: I892d07f965094102a3d53e7dbf4e6f5ad22d4967 Signed-off-by: Gabriel Ganne --- src/vppinfra/tw_timer_template.c | 8 ++++++-- src/vppinfra/tw_timer_template.h | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'src/vppinfra/tw_timer_template.c') diff --git a/src/vppinfra/tw_timer_template.c b/src/vppinfra/tw_timer_template.c index 139d27ca..436dd4e1 100644 --- a/src/vppinfra/tw_timer_template.c +++ b/src/vppinfra/tw_timer_template.c @@ -170,13 +170,14 @@ void TW (tw_timer_stop) (TWT (tw_timer_wheel) * tw, u32 handle) void TW (tw_timer_wheel_init) (TWT (tw_timer_wheel) * tw, void *expired_timer_callback, - f64 timer_interval_in_seconds) + f64 timer_interval_in_seconds, u32 max_expirations) { int ring, slot; tw_timer_wheel_slot_t *ts; TWT (tw_timer) * t; memset (tw, 0, sizeof (*tw)); tw->expired_timer_callback = expired_timer_callback; + tw->max_expirations = max_expirations; if (timer_interval_in_seconds == 0.0) { clib_warning ("timer interval is zero"); @@ -258,7 +259,6 @@ u32 TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now) /* Remember when we ran, compute next runtime */ tw->next_run_time = (now + tw->timer_interval); - tw->last_run_time = now; total_nexpirations = 0; for (i = 0; i < nticks; i++) @@ -332,8 +332,12 @@ u32 TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now) } tw->current_index[TW_TIMER_RING_FAST]++; tw->current_tick++; + + if (total_nexpirations >= tw->max_expirations) + break; } + tw->last_run_time += i * tw->ticks_per_second; return total_nexpirations; } diff --git a/src/vppinfra/tw_timer_template.h b/src/vppinfra/tw_timer_template.h index 2e41bcac..6b61e424 100644 --- a/src/vppinfra/tw_timer_template.h +++ b/src/vppinfra/tw_timer_template.h @@ -175,6 +175,9 @@ typedef struct /** vector of expired timers */ u32 *expired_timer_handles; + + /** maximum expirations */ + u32 max_expirations; } TWT (tw_timer_wheel); u32 TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, @@ -184,7 +187,7 @@ void TW (tw_timer_stop) (TWT (tw_timer_wheel) * tw, u32 handle); void TW (tw_timer_wheel_init) (TWT (tw_timer_wheel) * tw, void *expired_timer_callback, - f64 timer_interval); + f64 timer_interval, u32 max_expirations); void TW (tw_timer_wheel_free) (TWT (tw_timer_wheel) * tw); -- cgit 1.2.3-korg From 954898f9453032e3d08326b946f6d7007cf39610 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Tue, 21 Feb 2017 19:26:51 -0800 Subject: Fix last run time update for timer wheel Change-Id: I9ac04b15440297c154ed1e3fba888915044cb245 Signed-off-by: Florin Coras --- src/vppinfra/tw_timer_template.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/vppinfra/tw_timer_template.c') diff --git a/src/vppinfra/tw_timer_template.c b/src/vppinfra/tw_timer_template.c index 436dd4e1..e3f44500 100644 --- a/src/vppinfra/tw_timer_template.c +++ b/src/vppinfra/tw_timer_template.c @@ -337,7 +337,7 @@ u32 TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now) break; } - tw->last_run_time += i * tw->ticks_per_second; + tw->last_run_time += i * tw->timer_interval; return total_nexpirations; } -- cgit 1.2.3-korg From 4af9ba1dabe3dbd4a2dd3d8c71434477c5ea81b9 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Wed, 7 Jun 2017 15:18:23 -0400 Subject: three-level timer wheel implementation w/ overflow vector prep work for s/timing_wheel/tw_timer/ in the vlib process model Change-Id: I763f4968a8fce1764a3778b12def0afbd30086b1 Signed-off-by: Dave Barach --- src/vppinfra.am | 6 + src/vppinfra/test_tw_timer.c | 913 ++++++++++++++++++++++++++++---- src/vppinfra/tw_timer_16t_1w_2048sl.h | 1 + src/vppinfra/tw_timer_16t_2w_512sl.h | 1 + src/vppinfra/tw_timer_1t_3w_1024sl_ov.c | 26 + src/vppinfra/tw_timer_1t_3w_1024sl_ov.h | 48 ++ src/vppinfra/tw_timer_2t_1w_2048sl.h | 1 + src/vppinfra/tw_timer_4t_3w_256sl.c | 26 + src/vppinfra/tw_timer_4t_3w_256sl.h | 47 ++ src/vppinfra/tw_timer_4t_3w_4sl_ov.c | 32 ++ src/vppinfra/tw_timer_4t_3w_4sl_ov.h | 48 ++ src/vppinfra/tw_timer_template.c | 405 ++++++++++++-- src/vppinfra/tw_timer_template.h | 48 +- 13 files changed, 1428 insertions(+), 174 deletions(-) create mode 100644 src/vppinfra/tw_timer_1t_3w_1024sl_ov.c create mode 100644 src/vppinfra/tw_timer_1t_3w_1024sl_ov.h create mode 100644 src/vppinfra/tw_timer_4t_3w_256sl.c create mode 100644 src/vppinfra/tw_timer_4t_3w_256sl.h create mode 100644 src/vppinfra/tw_timer_4t_3w_4sl_ov.c create mode 100644 src/vppinfra/tw_timer_4t_3w_4sl_ov.h (limited to 'src/vppinfra/tw_timer_template.c') diff --git a/src/vppinfra.am b/src/vppinfra.am index 3939d3ce..ff2b8ea4 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -211,6 +211,8 @@ nobase_include_HEADERS = \ vppinfra/tw_timer_2t_1w_2048sl.h \ vppinfra/tw_timer_16t_2w_512sl.h \ vppinfra/tw_timer_16t_1w_2048sl.h \ + vppinfra/tw_timer_4t_3w_256sl.h \ + vppinfra/tw_timer_1t_3w_1024sl_ov.h \ vppinfra/tw_timer_template.h \ vppinfra/tw_timer_template.c \ vppinfra/types.h \ @@ -268,6 +270,10 @@ CLIB_CORE = \ vppinfra/tw_timer_16t_2w_512sl.c \ vppinfra/tw_timer_16t_1w_2048sl.h \ vppinfra/tw_timer_16t_1w_2048sl.c \ + vppinfra/tw_timer_4t_3w_256sl.h \ + vppinfra/tw_timer_4t_3w_256sl.c \ + vppinfra/tw_timer_1t_3w_1024sl_ov.h \ + vppinfra/tw_timer_1t_3w_1024sl_ov.c \ vppinfra/unformat.c \ vppinfra/vec.c \ vppinfra/vector.c \ diff --git a/src/vppinfra/test_tw_timer.c b/src/vppinfra/test_tw_timer.c index 26499509..ec0baa07 100644 --- a/src/vppinfra/test_tw_timer.c +++ b/src/vppinfra/test_tw_timer.c @@ -3,6 +3,8 @@ #include #include #include +#include +#include typedef struct { @@ -10,7 +12,7 @@ typedef struct u32 stop_timer_handle; /** Test item should expire at this clock tick */ - u32 expected_to_expire; + u64 expected_to_expire; } tw_timer_test_elt_t; typedef struct @@ -24,8 +26,14 @@ typedef struct /** The double-wheel */ tw_timer_wheel_16t_2w_512sl_t double_wheel; + /* The triple wheel */ + tw_timer_wheel_4t_3w_256sl_t triple_wheel; + + /* The triple wheel with overflow vector */ + tw_timer_wheel_1t_3w_1024sl_ov_t triple_ov_wheel; + /** random number seed */ - u32 seed; + u64 seed; /** number of timers */ u32 ntimers; @@ -68,6 +76,32 @@ run_double_wheel (tw_timer_wheel_16t_2w_512sl_t * tw, u32 n_ticks) } } +static void +run_triple_wheel (tw_timer_wheel_4t_3w_256sl_t * tw, u32 n_ticks) +{ + u32 i; + f64 now = tw->last_run_time + 1.01; + + for (i = 0; i < n_ticks; i++) + { + tw_timer_expire_timers_4t_3w_256sl (tw, now); + now += 1.01; + } +} + +static void +run_triple_ov_wheel (tw_timer_wheel_1t_3w_1024sl_ov_t * tw, u32 n_ticks) +{ + u32 i; + f64 now = tw->last_run_time + 1.01; + + for (i = 0; i < n_ticks; i++) + { + tw_timer_expire_timers_1t_3w_1024sl_ov (tw, now); + now += 1.01; + } +} + static void expired_timer_single_callback (u32 * expired_timers) { @@ -87,7 +121,7 @@ expired_timer_single_callback (u32 * expired_timers) if (e->expected_to_expire != tm->single_wheel.current_tick) { - fformat (stdout, "[%d] expired at %d not %d\n", + fformat (stdout, "[%d] expired at %lld not %lld\n", e - tm->test_elts, tm->single_wheel.current_tick, e->expected_to_expire); } @@ -114,7 +148,7 @@ expired_timer_double_callback (u32 * expired_timers) if (e->expected_to_expire != tm->double_wheel.current_tick) { - fformat (stdout, "[%d] expired at %d not %d\n", + fformat (stdout, "[%d] expired at %lld not %lld\n", e - tm->test_elts, tm->double_wheel.current_tick, e->expected_to_expire); } @@ -122,13 +156,64 @@ expired_timer_double_callback (u32 * expired_timers) } } +static void +expired_timer_triple_callback (u32 * expired_timers) +{ + int i; + u32 pool_index, timer_id; + tw_timer_test_elt_t *e; + tw_timer_test_main_t *tm = &tw_timer_test_main; + + for (i = 0; i < vec_len (expired_timers); i++) + { + pool_index = expired_timers[i] & 0x3FFFFFFF; + timer_id = expired_timers[i] >> 30; + + ASSERT (timer_id == 3); + + e = pool_elt_at_index (tm->test_elts, pool_index); + + if (e->expected_to_expire != tm->triple_wheel.current_tick) + { + fformat (stdout, "[%d] expired at %lld not %lld\n", + e - tm->test_elts, tm->triple_wheel.current_tick, + e->expected_to_expire); + } + pool_put (tm->test_elts, e); + } +} + +static void +expired_timer_triple_ov_callback (u32 * expired_timers) +{ + int i; + u32 pool_index; + tw_timer_test_elt_t *e; + tw_timer_test_main_t *tm = &tw_timer_test_main; + + for (i = 0; i < vec_len (expired_timers); i++) + { + pool_index = expired_timers[i]; + + e = pool_elt_at_index (tm->test_elts, pool_index); + + if (e->expected_to_expire != tm->triple_ov_wheel.current_tick) + { + fformat (stdout, "[%d] expired at %lld not %lld\n", + e - tm->test_elts, tm->triple_ov_wheel.current_tick, + e->expected_to_expire); + } + pool_put (tm->test_elts, e); + } +} + static clib_error_t * test2_single (tw_timer_test_main_t * tm) { u32 i, j; tw_timer_test_elt_t *e; u32 initial_wheel_offset; - u32 expiration_time; + u64 expiration_time; u32 max_expiration_time = 0; u32 *deleted_indices = 0; u32 adds = 0, deletes = 0; @@ -145,7 +230,14 @@ test2_single (tw_timer_test_main_t * tm) run_single_wheel (&tm->single_wheel, initial_wheel_offset); - fformat (stdout, "test %d timers, %d iter, %d ticks per iter, 0x%x seed\n", + fformat (stdout, "initial wheel time %d, fast index %d\n", + tm->single_wheel.current_tick, + tm->single_wheel.current_index[TW_TIMER_RING_FAST]); + + initial_wheel_offset = tm->single_wheel.current_tick; + + fformat (stdout, + "test %d timers, %d iter, %d ticks per iter, 0x%llx seed\n", tm->ntimers, tm->niter, tm->ticks_per_iter, tm->seed); before = clib_time_now (&tm->clib_time); @@ -158,7 +250,7 @@ test2_single (tw_timer_test_main_t * tm) do { - expiration_time = random_u32 (&tm->seed) & (2047); + expiration_time = random_u64 (&tm->seed) & (2047); } while (expiration_time == 0); @@ -192,7 +284,9 @@ test2_single (tw_timer_test_main_t * tm) del_and_re_add: for (j = 0; j < vec_len (deleted_indices); j++) - pool_put_index (tm->test_elts, deleted_indices[j]); + { + pool_put_index (tm->test_elts, deleted_indices[j]); + } deletes += j; @@ -203,7 +297,7 @@ test2_single (tw_timer_test_main_t * tm) do { - expiration_time = random_u32 (&tm->seed) & (2047); + expiration_time = random_u64 (&tm->seed) & (2047); } while (expiration_time == 0); @@ -269,11 +363,19 @@ test2_double (tw_timer_test_main_t * tm) 1.0 /* timer interval */ , ~0); /* Prime offset */ - initial_wheel_offset = 757; + initial_wheel_offset = 7577; run_double_wheel (&tm->double_wheel, initial_wheel_offset); - fformat (stdout, "test %d timers, %d iter, %d ticks per iter, 0x%x seed\n", + fformat (stdout, "initial wheel time %d, fast index %d slow index %d\n", + tm->double_wheel.current_tick, + tm->double_wheel.current_index[TW_TIMER_RING_FAST], + tm->double_wheel.current_index[TW_TIMER_RING_SLOW]); + + initial_wheel_offset = tm->double_wheel.current_tick; + + fformat (stdout, + "test %d timers, %d iter, %d ticks per iter, 0x%llx seed\n", tm->ntimers, tm->niter, tm->ticks_per_iter, tm->seed); before = clib_time_now (&tm->clib_time); @@ -286,7 +388,7 @@ test2_double (tw_timer_test_main_t * tm) do { - expiration_time = random_u32 (&tm->seed) & ((1 << 17) - 1); + expiration_time = random_u64 (&tm->seed) & ((1 << 17) - 1); } while (expiration_time == 0); @@ -294,6 +396,7 @@ test2_double (tw_timer_test_main_t * tm) max_expiration_time = expiration_time; e->expected_to_expire = expiration_time + initial_wheel_offset; + e->stop_timer_handle = tw_timer_start_16t_2w_512sl (&tm->double_wheel, e - tm->test_elts, 14 /* timer id */ , @@ -331,7 +434,7 @@ test2_double (tw_timer_test_main_t * tm) do { - expiration_time = random_u32 (&tm->seed) & ((1 << 17) - 1); + expiration_time = random_u64 (&tm->seed) & ((1 << 17) - 1); } while (expiration_time == 0); @@ -340,6 +443,7 @@ test2_double (tw_timer_test_main_t * tm) e->expected_to_expire = expiration_time + tm->double_wheel.current_tick; + e->stop_timer_handle = tw_timer_start_16t_2w_512sl (&tm->double_wheel, e - tm->test_elts, 14 /* timer id */ , expiration_time); @@ -379,106 +483,126 @@ test2_double (tw_timer_test_main_t * tm) } static clib_error_t * -test1_single (tw_timer_test_main_t * tm) +test2_triple (tw_timer_test_main_t * tm) { - u32 i; + u32 i, j; tw_timer_test_elt_t *e; - u32 offset; + u32 initial_wheel_offset = 0; + u32 expiration_time; + u32 max_expiration_time = 0; + u32 *deleted_indices = 0; + u32 adds = 0, deletes = 0; + f64 before, after; - tw_timer_wheel_init_2t_1w_2048sl (&tm->single_wheel, - expired_timer_single_callback, - 1.0 /* timer interval */ , ~0); + clib_time_init (&tm->clib_time); - /* - * Prime offset, to make sure that the wheel starts in a - * non-trivial position - */ - offset = 123; + tw_timer_wheel_init_4t_3w_256sl (&tm->triple_wheel, + expired_timer_triple_callback, + 1.0 /* timer interval */ , ~0); - run_single_wheel (&tm->single_wheel, offset); - fformat (stdout, "initial wheel time %d, fast index %d\n", - tm->single_wheel.current_tick, - tm->single_wheel.current_index[TW_TIMER_RING_FAST]); + /* Prime offset */ + initial_wheel_offset = 75700; + run_triple_wheel (&tm->triple_wheel, initial_wheel_offset); - for (i = 0; i < tm->ntimers; i++) - { - u32 expected_to_expire; - u32 timer_arg; + fformat (stdout, + "initial wheel time %d, fi %d si %d gi %d\n", + tm->triple_wheel.current_tick, + tm->triple_wheel.current_index[TW_TIMER_RING_FAST], + tm->triple_wheel.current_index[TW_TIMER_RING_SLOW], + tm->triple_wheel.current_index[TW_TIMER_RING_GLACIER]); - timer_arg = 1 + i; - timer_arg &= 2047; - if (timer_arg == 0) - timer_arg = 1; + initial_wheel_offset = tm->triple_wheel.current_tick; - expected_to_expire = timer_arg + offset; + fformat (stdout, + "test %d timers, %d iter, %d ticks per iter, 0x%llx seed\n", + tm->ntimers, tm->niter, tm->ticks_per_iter, tm->seed); + + before = clib_time_now (&tm->clib_time); + /* Prime the pump */ + for (i = 0; i < tm->ntimers; i++) + { pool_get (tm->test_elts, e); memset (e, 0, sizeof (*e)); - e->expected_to_expire = expected_to_expire; - e->stop_timer_handle = tw_timer_start_2t_1w_2048sl - (&tm->single_wheel, e - tm->test_elts, 1 /* timer id */ , - timer_arg); - } - run_single_wheel (&tm->single_wheel, tm->ntimers + 3); - if (pool_elts (tm->test_elts)) - fformat (stdout, "Note: %d elements remain in pool\n", - pool_elts (tm->test_elts)); + do + { + expiration_time = random_u64 (&tm->seed) & ((1 << 17) - 1); + } + while (expiration_time == 0); - /* *INDENT-OFF* */ - pool_foreach (e, tm->test_elts, - ({ - fformat(stdout, "[%d] expected to expire %d\n", - e - tm->test_elts, - e->expected_to_expire); - })); - /* *INDENT-ON* */ + if (expiration_time > max_expiration_time) + max_expiration_time = expiration_time; - fformat (stdout, - "final wheel time %d, fast index %d\n", - tm->single_wheel.current_tick, - tm->single_wheel.current_index[TW_TIMER_RING_FAST]); + e->expected_to_expire = expiration_time + initial_wheel_offset; - pool_free (tm->test_elts); - tw_timer_wheel_free_2t_1w_2048sl (&tm->single_wheel); - return 0; -} + e->stop_timer_handle = + tw_timer_start_4t_3w_256sl (&tm->triple_wheel, e - tm->test_elts, + 3 /* timer id */ , + expiration_time); + } -static clib_error_t * -test1_double (tw_timer_test_main_t * tm) -{ - u32 i; - tw_timer_test_elt_t *e; - u32 offset; + adds += i; - tw_timer_wheel_init_16t_2w_512sl (&tm->double_wheel, - expired_timer_double_callback, - 1.0 /* timer interval */ , ~0); + for (i = 0; i < tm->niter; i++) + { + run_triple_wheel (&tm->triple_wheel, tm->ticks_per_iter); - /* - * Prime offset, to make sure that the wheel starts in a - * non-trivial position - */ - offset = 227989; + j = 0; + vec_reset_length (deleted_indices); + /* *INDENT-OFF* */ + pool_foreach (e, tm->test_elts, + ({ + tw_timer_stop_4t_3w_256sl (&tm->triple_wheel, e->stop_timer_handle); + vec_add1 (deleted_indices, e - tm->test_elts); + if (++j >= tm->ntimers / 4) + goto del_and_re_add; + })); + /* *INDENT-ON* */ - run_double_wheel (&tm->double_wheel, offset); + del_and_re_add: + for (j = 0; j < vec_len (deleted_indices); j++) + pool_put_index (tm->test_elts, deleted_indices[j]); - fformat (stdout, "initial wheel time %d, fast index %d\n", - tm->double_wheel.current_tick, - tm->double_wheel.current_index[TW_TIMER_RING_FAST]); + deletes += j; - for (i = 0; i < tm->ntimers; i++) - { - pool_get (tm->test_elts, e); - memset (e, 0, sizeof (*e)); + for (j = 0; j < tm->ntimers / 4; j++) + { + pool_get (tm->test_elts, e); + memset (e, 0, sizeof (*e)); - e->expected_to_expire = i + offset + 1; - e->stop_timer_handle = tw_timer_start_16t_2w_512sl - (&tm->double_wheel, e - tm->test_elts, 14 /* timer id */ , - i + 1); + do + { + expiration_time = random_u64 (&tm->seed) & ((1 << 17) - 1); + } + while (expiration_time == 0); + + if (expiration_time > max_expiration_time) + max_expiration_time = expiration_time; + + e->expected_to_expire = expiration_time + + tm->triple_wheel.current_tick; + + e->stop_timer_handle = tw_timer_start_4t_3w_256sl + (&tm->triple_wheel, e - tm->test_elts, 3 /* timer id */ , + expiration_time); + } + adds += j; } - run_double_wheel (&tm->double_wheel, tm->ntimers + 3); + + vec_free (deleted_indices); + + run_triple_wheel (&tm->triple_wheel, max_expiration_time + 1); + + after = clib_time_now (&tm->clib_time); + + fformat (stdout, "%d adds, %d deletes, %d ticks\n", adds, deletes, + tm->triple_wheel.current_tick); + fformat (stdout, "test ran %.2f seconds, %.2f ops/second\n", + (after - before), + ((f64) adds + (f64) deletes + + (f64) tm->triple_wheel.current_tick) / (after - before)); if (pool_elts (tm->test_elts)) fformat (stdout, "Note: %d elements remain in pool\n", @@ -487,45 +611,567 @@ test1_double (tw_timer_test_main_t * tm) /* *INDENT-OFF* */ pool_foreach (e, tm->test_elts, ({ - fformat(stdout, "[%d] expected to expire %d\n", - e - tm->test_elts, - e->expected_to_expire); + fformat (stdout, "[%d] expected to expire %d\n", + e - tm->test_elts, + e->expected_to_expire); })); /* *INDENT-ON* */ - fformat (stdout, - "final wheel time %d, fast index %d\n", - tm->double_wheel.current_tick, - tm->double_wheel.current_index[TW_TIMER_RING_FAST]); - pool_free (tm->test_elts); - tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel); + tw_timer_wheel_free_4t_3w_256sl (&tm->triple_wheel); return 0; } static clib_error_t * -timer_test_command_fn (tw_timer_test_main_t * tm, unformat_input_t * input) +test2_triple_ov (tw_timer_test_main_t * tm) { + u32 i, j; + tw_timer_test_elt_t *e; + u32 initial_wheel_offset = 0; + u32 expiration_time; + u32 max_expiration_time = 0; + u32 *deleted_indices = 0; + u32 adds = 0, deletes = 0; + f64 before, after; - int is_test1 = 0; - int num_wheels = 1; - int is_test2 = 0; + clib_time_init (&tm->clib_time); - memset (tm, 0, sizeof (*tm)); - /* Default values */ - tm->ntimers = 100000; - tm->seed = 0xDEADDABE; - tm->niter = 1000; - tm->ticks_per_iter = 727; + tw_timer_wheel_init_1t_3w_1024sl_ov (&tm->triple_ov_wheel, + expired_timer_triple_ov_callback, + 1.0 /* timer interval */ , ~0); - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "seed %d", &tm->seed)) - ; + + /* Prime offset */ + initial_wheel_offset = 75700; + run_triple_ov_wheel (&tm->triple_ov_wheel, initial_wheel_offset); + + fformat (stdout, + "initial wheel time %d, fi %d si %d gi %d\n", + tm->triple_ov_wheel.current_tick, + tm->triple_ov_wheel.current_index[TW_TIMER_RING_FAST], + tm->triple_ov_wheel.current_index[TW_TIMER_RING_SLOW], + tm->triple_ov_wheel.current_index[TW_TIMER_RING_GLACIER]); + + initial_wheel_offset = tm->triple_ov_wheel.current_tick; + + fformat (stdout, + "test %d timers, %d iter, %d ticks per iter, 0x%llx seed\n", + tm->ntimers, tm->niter, tm->ticks_per_iter, tm->seed); + + before = clib_time_now (&tm->clib_time); + + /* Prime the pump */ + for (i = 0; i < tm->ntimers; i++) + { + pool_get (tm->test_elts, e); + memset (e, 0, sizeof (*e)); + + do + { + expiration_time = random_u64 (&tm->seed) & ((1 << 17) - 1); + } + while (expiration_time == 0); + + if (expiration_time > max_expiration_time) + max_expiration_time = expiration_time; + + e->expected_to_expire = expiration_time + initial_wheel_offset; + + e->stop_timer_handle = + tw_timer_start_1t_3w_1024sl_ov (&tm->triple_ov_wheel, + e - tm->test_elts, 0 /* timer id */ , + expiration_time); + } + + adds += i; + + for (i = 0; i < tm->niter; i++) + { + run_triple_ov_wheel (&tm->triple_ov_wheel, tm->ticks_per_iter); + + j = 0; + vec_reset_length (deleted_indices); + /* *INDENT-OFF* */ + pool_foreach (e, tm->test_elts, + ({ + tw_timer_stop_1t_3w_1024sl_ov (&tm->triple_ov_wheel, + e->stop_timer_handle); + vec_add1 (deleted_indices, e - tm->test_elts); + if (++j >= tm->ntimers / 4) + goto del_and_re_add; + })); + /* *INDENT-ON* */ + + del_and_re_add: + for (j = 0; j < vec_len (deleted_indices); j++) + pool_put_index (tm->test_elts, deleted_indices[j]); + + deletes += j; + + for (j = 0; j < tm->ntimers / 4; j++) + { + pool_get (tm->test_elts, e); + memset (e, 0, sizeof (*e)); + + do + { + expiration_time = random_u64 (&tm->seed) & ((1 << 17) - 1); + } + while (expiration_time == 0); + + if (expiration_time > max_expiration_time) + max_expiration_time = expiration_time; + + e->expected_to_expire = expiration_time + + tm->triple_ov_wheel.current_tick; + + e->stop_timer_handle = tw_timer_start_1t_3w_1024sl_ov + (&tm->triple_ov_wheel, e - tm->test_elts, 0 /* timer id */ , + expiration_time); + } + adds += j; + } + + vec_free (deleted_indices); + + run_triple_ov_wheel (&tm->triple_ov_wheel, max_expiration_time + 1); + + after = clib_time_now (&tm->clib_time); + + fformat (stdout, "%d adds, %d deletes, %d ticks\n", adds, deletes, + tm->triple_ov_wheel.current_tick); + fformat (stdout, "test ran %.2f seconds, %.2f ops/second\n", + (after - before), + ((f64) adds + (f64) deletes + + (f64) tm->triple_ov_wheel.current_tick) / (after - before)); + + if (pool_elts (tm->test_elts)) + fformat (stdout, "Note: %d elements remain in pool\n", + pool_elts (tm->test_elts)); + + /* *INDENT-OFF* */ + pool_foreach (e, tm->test_elts, + ({ + TWT (tw_timer) * t; + + fformat (stdout, "[%d] expected to expire %d\n", + e - tm->test_elts, + e->expected_to_expire); + t = pool_elt_at_index (tm->triple_ov_wheel.timers, e->stop_timer_handle); + fformat (stdout, " expiration_time %lld\n", t->expiration_time); + })); + /* *INDENT-ON* */ + + pool_free (tm->test_elts); + tw_timer_wheel_free_1t_3w_1024sl_ov (&tm->triple_ov_wheel); + return 0; +} + +static clib_error_t * +test1_single (tw_timer_test_main_t * tm) +{ + u32 i; + tw_timer_test_elt_t *e; + u32 offset; + + tw_timer_wheel_init_2t_1w_2048sl (&tm->single_wheel, + expired_timer_single_callback, + 1.0 /* timer interval */ , ~0); + + /* + * Prime offset, to make sure that the wheel starts in a + * non-trivial position + */ + offset = 123; + + run_single_wheel (&tm->single_wheel, offset); + + fformat (stdout, "initial wheel time %d, fast index %d\n", + tm->single_wheel.current_tick, + tm->single_wheel.current_index[TW_TIMER_RING_FAST]); + + offset = tm->single_wheel.current_tick; + + for (i = 0; i < tm->ntimers; i++) + { + u32 expected_to_expire; + u32 timer_arg; + + timer_arg = 1 + i; + timer_arg &= 2047; + if (timer_arg == 0) + timer_arg = 1; + + expected_to_expire = timer_arg + offset; + + pool_get (tm->test_elts, e); + memset (e, 0, sizeof (*e)); + e->expected_to_expire = expected_to_expire; + e->stop_timer_handle = tw_timer_start_2t_1w_2048sl + (&tm->single_wheel, e - tm->test_elts, 1 /* timer id */ , + timer_arg); + } + run_single_wheel (&tm->single_wheel, tm->ntimers + 3); + + if (pool_elts (tm->test_elts)) + fformat (stdout, "Note: %d elements remain in pool\n", + pool_elts (tm->test_elts)); + + /* *INDENT-OFF* */ + pool_foreach (e, tm->test_elts, + ({ + fformat(stdout, "[%d] expected to expire %d\n", + e - tm->test_elts, + e->expected_to_expire); + })); + /* *INDENT-ON* */ + + fformat (stdout, + "final wheel time %d, fast index %d\n", + tm->single_wheel.current_tick, + tm->single_wheel.current_index[TW_TIMER_RING_FAST]); + + pool_free (tm->test_elts); + tw_timer_wheel_free_2t_1w_2048sl (&tm->single_wheel); + return 0; +} + +static clib_error_t * +test1_double (tw_timer_test_main_t * tm) +{ + u32 i; + tw_timer_test_elt_t *e; + u32 offset; + + tw_timer_wheel_init_16t_2w_512sl (&tm->double_wheel, + expired_timer_double_callback, + 1.0 /* timer interval */ , ~0); + + /* + * Prime offset, to make sure that the wheel starts in a + * non-trivial position + */ + offset = 227989; + + run_double_wheel (&tm->double_wheel, offset); + + fformat (stdout, "initial wheel time %d, fast index %d\n", + tm->double_wheel.current_tick, + tm->double_wheel.current_index[TW_TIMER_RING_FAST]); + + for (i = 0; i < tm->ntimers; i++) + { + pool_get (tm->test_elts, e); + memset (e, 0, sizeof (*e)); + + e->expected_to_expire = i + offset + 1; + e->stop_timer_handle = tw_timer_start_16t_2w_512sl + (&tm->double_wheel, e - tm->test_elts, 14 /* timer id */ , + i + 1); + } + run_double_wheel (&tm->double_wheel, tm->ntimers + 3); + + if (pool_elts (tm->test_elts)) + fformat (stdout, "Note: %d elements remain in pool\n", + pool_elts (tm->test_elts)); + + /* *INDENT-OFF* */ + pool_foreach (e, tm->test_elts, + ({ + fformat(stdout, "[%d] expected to expire %d\n", + e - tm->test_elts, + e->expected_to_expire); + })); + /* *INDENT-ON* */ + + fformat (stdout, + "final wheel time %d, fast index %d\n", + tm->double_wheel.current_tick, + tm->double_wheel.current_index[TW_TIMER_RING_FAST]); + + pool_free (tm->test_elts); + tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel); + return 0; +} + +static clib_error_t * +test3_triple_double (tw_timer_test_main_t * tm) +{ + tw_timer_test_elt_t *e; + u32 initial_wheel_offset = 0; + u32 expiration_time; + u32 max_expiration_time = 0; + u32 adds = 0, deletes = 0; + f64 before, after; + + clib_time_init (&tm->clib_time); + + tw_timer_wheel_init_4t_3w_256sl (&tm->triple_wheel, + expired_timer_triple_callback, + 1.0 /* timer interval */ , ~0); + + initial_wheel_offset = 0; + run_triple_wheel (&tm->triple_wheel, initial_wheel_offset); + + fformat (stdout, + "initial wheel time %d, fi %d si %d gi %d\n", + tm->triple_wheel.current_tick, + tm->triple_wheel.current_index[TW_TIMER_RING_FAST], + tm->triple_wheel.current_index[TW_TIMER_RING_SLOW], + tm->triple_wheel.current_index[TW_TIMER_RING_GLACIER]); + + initial_wheel_offset = tm->triple_wheel.current_tick; + + fformat (stdout, "Create a timer which expires at wheel-time (1, 0, 0)\n"); + + before = clib_time_now (&tm->clib_time); + + /* Prime the pump */ + pool_get (tm->test_elts, e); + memset (e, 0, sizeof (*e)); + + /* 1 glacier ring tick from now */ + expiration_time = TW_SLOTS_PER_RING * TW_SLOTS_PER_RING; + e->expected_to_expire = expiration_time + initial_wheel_offset; + max_expiration_time = expiration_time; + + e->stop_timer_handle = + tw_timer_start_4t_3w_256sl (&tm->triple_wheel, e - tm->test_elts, + 3 /* timer id */ , + expiration_time); + + run_triple_wheel (&tm->triple_wheel, max_expiration_time + 1); + + after = clib_time_now (&tm->clib_time); + + fformat (stdout, "%d adds, %d deletes, %d ticks\n", adds, deletes, + tm->triple_wheel.current_tick); + fformat (stdout, "test ran %.2f seconds, %.2f ops/second\n", + (after - before), + ((f64) adds + (f64) deletes + + (f64) tm->triple_wheel.current_tick) / (after - before)); + + if (pool_elts (tm->test_elts)) + fformat (stdout, "Note: %d elements remain in pool\n", + pool_elts (tm->test_elts)); + + /* *INDENT-OFF* */ + pool_foreach (e, tm->test_elts, + ({ + fformat (stdout, "[%d] expected to expire %d\n", + e - tm->test_elts, + e->expected_to_expire); + })); + /* *INDENT-ON* */ + + pool_free (tm->test_elts); + tw_timer_wheel_free_4t_3w_256sl (&tm->triple_wheel); + return 0; +} + +static clib_error_t * +test4_double_double (tw_timer_test_main_t * tm) +{ + u32 i; + tw_timer_test_elt_t *e; + u32 initial_wheel_offset; + u32 expiration_time; + u32 max_expiration_time = 0; + u32 *deleted_indices = 0; + u32 adds = 0, deletes = 0; + f64 before, after; + + clib_time_init (&tm->clib_time); + + tw_timer_wheel_init_16t_2w_512sl (&tm->double_wheel, + expired_timer_double_callback, + 1.0 /* timer interval */ , ~0); + /* Prime offset */ + initial_wheel_offset = 0; + + run_double_wheel (&tm->double_wheel, initial_wheel_offset); + + fformat (stdout, "initial wheel time %d, fast index %d slow index %d\n", + tm->double_wheel.current_tick, + tm->double_wheel.current_index[TW_TIMER_RING_FAST], + tm->double_wheel.current_index[TW_TIMER_RING_SLOW]); + + initial_wheel_offset = tm->double_wheel.current_tick; + + fformat (stdout, "test timer which expires at 512 ticks\n"); + + before = clib_time_now (&tm->clib_time); + + /* Prime the pump */ + for (i = 0; i < tm->ntimers; i++) + { + pool_get (tm->test_elts, e); + memset (e, 0, sizeof (*e)); + + expiration_time = 512; + + if (expiration_time > max_expiration_time) + max_expiration_time = expiration_time; + + e->expected_to_expire = expiration_time + initial_wheel_offset; + e->stop_timer_handle = + tw_timer_start_16t_2w_512sl (&tm->double_wheel, e - tm->test_elts, + 14 /* timer id */ , + expiration_time); + } + + adds = 1; + + vec_free (deleted_indices); + + run_double_wheel (&tm->double_wheel, max_expiration_time + 1); + + after = clib_time_now (&tm->clib_time); + + fformat (stdout, "%d adds, %d deletes, %d ticks\n", adds, deletes, + tm->double_wheel.current_tick); + fformat (stdout, "test ran %.2f seconds, %.2f ops/second\n", + (after - before), + ((f64) adds + (f64) deletes + + (f64) tm->double_wheel.current_tick) / (after - before)); + + if (pool_elts (tm->test_elts)) + fformat (stdout, "Note: %d elements remain in pool\n", + pool_elts (tm->test_elts)); + + /* *INDENT-OFF* */ + pool_foreach (e, tm->test_elts, + ({ + fformat (stdout, "[%d] expected to expire %d\n", + e - tm->test_elts, + e->expected_to_expire); + })); + /* *INDENT-ON* */ + + pool_free (tm->test_elts); + tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel); + return 0; +} + +static clib_error_t * +test5_double (tw_timer_test_main_t * tm) +{ + u32 i; + tw_timer_test_elt_t *e; + u32 initial_wheel_offset; + u32 expiration_time; + u32 max_expiration_time = 0; + u32 adds = 0, deletes = 0; + f64 before, after; + + clib_time_init (&tm->clib_time); + + tw_timer_wheel_init_16t_2w_512sl (&tm->double_wheel, + expired_timer_double_callback, + 1.0 /* timer interval */ , ~0); + + /* Prime offset */ + initial_wheel_offset = 7567; + + run_double_wheel (&tm->double_wheel, initial_wheel_offset); + + fformat (stdout, "initial wheel time %d, fast index %d slow index %d\n", + tm->double_wheel.current_tick, + tm->double_wheel.current_index[TW_TIMER_RING_FAST], + tm->double_wheel.current_index[TW_TIMER_RING_SLOW]); + + initial_wheel_offset = tm->double_wheel.current_tick; + + fformat (stdout, + "test %d timers, %d iter, %d ticks per iter, 0x%llx seed\n", + tm->ntimers, tm->niter, tm->ticks_per_iter, tm->seed); + + before = clib_time_now (&tm->clib_time); + + /* Prime the pump */ + for (i = 0; i < tm->ntimers; i++) + { + pool_get (tm->test_elts, e); + memset (e, 0, sizeof (*e)); + + expiration_time = i + 1; + + if (expiration_time > max_expiration_time) + max_expiration_time = expiration_time; + + e->expected_to_expire = expiration_time + initial_wheel_offset; + e->stop_timer_handle = + tw_timer_start_16t_2w_512sl (&tm->double_wheel, e - tm->test_elts, + 14 /* timer id */ , + expiration_time); + } + + adds += i; + + run_double_wheel (&tm->double_wheel, max_expiration_time + 1); + + after = clib_time_now (&tm->clib_time); + + fformat (stdout, "%d adds, %d deletes, %d ticks\n", adds, deletes, + tm->double_wheel.current_tick); + fformat (stdout, "test ran %.2f seconds, %.2f ops/second\n", + (after - before), + ((f64) adds + (f64) deletes + + (f64) tm->double_wheel.current_tick) / (after - before)); + + if (pool_elts (tm->test_elts)) + fformat (stdout, "Note: %d elements remain in pool\n", + pool_elts (tm->test_elts)); + + /* *INDENT-OFF* */ + pool_foreach (e, tm->test_elts, + ({ + fformat (stdout, "[%d] expected to expire %d\n", + e - tm->test_elts, + e->expected_to_expire); + })); + /* *INDENT-ON* */ + + pool_free (tm->test_elts); + tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel); + return 0; +} + +static clib_error_t * +timer_test_command_fn (tw_timer_test_main_t * tm, unformat_input_t * input) +{ + + int is_test1 = 0; + int num_wheels = 1; + int is_test2 = 0; + int is_test3 = 0; + int is_test4 = 0; + int is_test5 = 0; + int overflow = 0; + + memset (tm, 0, sizeof (*tm)); + /* Default values */ + tm->ntimers = 100000; + tm->seed = 0xDEADDABEB00BFACE; + tm->niter = 1000; + tm->ticks_per_iter = 727; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "seed %lld", &tm->seed)) + ; else if (unformat (input, "test1")) is_test1 = 1; else if (unformat (input, "test2")) is_test2 = 1; + else if (unformat (input, "overflow")) + overflow = 1; + else if (unformat (input, "lebron")) + is_test3 = 1; + else if (unformat (input, "wilt")) + is_test4 = 1; + else if (unformat (input, "linear")) + is_test5 = 1; else if (unformat (input, "wheels %d", &num_wheels)) ; else if (unformat (input, "ntimers %d", &tm->ntimers)) @@ -534,12 +1180,14 @@ timer_test_command_fn (tw_timer_test_main_t * tm, unformat_input_t * input) ; else if (unformat (input, "ticks_per_iter %d", &tm->ticks_per_iter)) ; + else + break; } - if (is_test1 + is_test2 == 0) + if (is_test1 + is_test2 + is_test3 + is_test4 + is_test5 == 0) return clib_error_return (0, "No test specified [test1..n]"); - if (num_wheels < 1 || num_wheels > 2) + if (num_wheels < 1 || num_wheels > 3) return clib_error_return (0, "unsupported... 1 or 2 wheels only"); if (is_test1) @@ -553,9 +1201,25 @@ timer_test_command_fn (tw_timer_test_main_t * tm, unformat_input_t * input) { if (num_wheels == 1) return test2_single (tm); - else + else if (num_wheels == 2) return test2_double (tm); + else if (num_wheels == 3) + { + if (overflow == 0) + return test2_triple (tm); + else + return test2_triple_ov (tm); + } } + if (is_test3) + return test3_triple_double (tm); + + if (is_test4) + return test4_double_double (tm); + + if (is_test5) + return test5_double (tm); + /* NOTREACHED */ return 0; } @@ -583,6 +1247,25 @@ main (int argc, char *argv[]) } #endif /* CLIB_UNIX */ +/* For debugging... */ +int +pifi (void *p, u32 index) +{ + return pool_is_free_index (p, index); +} + +u32 +vl (void *p) +{ + return vec_len (p); +} + +uword +pe (void *v) +{ + return (pool_elts (v)); +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vppinfra/tw_timer_16t_1w_2048sl.h b/src/vppinfra/tw_timer_16t_1w_2048sl.h index 685ac31e..6edef17b 100644 --- a/src/vppinfra/tw_timer_16t_1w_2048sl.h +++ b/src/vppinfra/tw_timer_16t_1w_2048sl.h @@ -24,6 +24,7 @@ #undef TW_TIMERS_PER_OBJECT #undef LOG2_TW_TIMERS_PER_OBJECT #undef TW_SUFFIX +#undef TW_OVERFLOW_VECTOR #define TW_TIMER_WHEELS 1 #define TW_SLOTS_PER_RING 2048 diff --git a/src/vppinfra/tw_timer_16t_2w_512sl.h b/src/vppinfra/tw_timer_16t_2w_512sl.h index 93b26d29..2497b31c 100644 --- a/src/vppinfra/tw_timer_16t_2w_512sl.h +++ b/src/vppinfra/tw_timer_16t_2w_512sl.h @@ -24,6 +24,7 @@ #undef TW_TIMERS_PER_OBJECT #undef LOG2_TW_TIMERS_PER_OBJECT #undef TW_SUFFIX +#undef TW_OVERFLOW_VECTOR #define TW_TIMER_WHEELS 2 #define TW_SLOTS_PER_RING 512 diff --git a/src/vppinfra/tw_timer_1t_3w_1024sl_ov.c b/src/vppinfra/tw_timer_1t_3w_1024sl_ov.c new file mode 100644 index 00000000..8a65752c --- /dev/null +++ b/src/vppinfra/tw_timer_1t_3w_1024sl_ov.c @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "tw_timer_1t_3w_1024sl_ov.h" +#include "tw_timer_template.c" + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/tw_timer_1t_3w_1024sl_ov.h b/src/vppinfra/tw_timer_1t_3w_1024sl_ov.h new file mode 100644 index 00000000..7327f87b --- /dev/null +++ b/src/vppinfra/tw_timer_1t_3w_1024sl_ov.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_tw_timer_1t_3w_1024sl_ov_h__ +#define __included_tw_timer_1t_3w_1024sl_ov_h__ + +/* ... So that a client app can create multiple wheel geometries */ +#undef TW_TIMER_WHEELS +#undef TW_SLOTS_PER_RING +#undef TW_RING_SHIFT +#undef TW_RING_MASK +#undef TW_TIMERS_PER_OBJECT +#undef LOG2_TW_TIMERS_PER_OBJECT +#undef TW_SUFFIX +#undef TW_OVERFLOW_VECTOR + +#define TW_TIMER_WHEELS 3 +#define TW_SLOTS_PER_RING 1024 +#define TW_RING_SHIFT 10 +#define TW_RING_MASK (TW_SLOTS_PER_RING -1) +#define TW_TIMERS_PER_OBJECT 1 +#define LOG2_TW_TIMERS_PER_OBJECT 0 +#define TW_SUFFIX _1t_3w_1024sl_ov +#define TW_OVERFLOW_VECTOR 1 + +#include + +#endif /* __included_tw_timer_1t_3w_1024sl_ov_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/tw_timer_2t_1w_2048sl.h b/src/vppinfra/tw_timer_2t_1w_2048sl.h index d1cf6d07..33b74405 100644 --- a/src/vppinfra/tw_timer_2t_1w_2048sl.h +++ b/src/vppinfra/tw_timer_2t_1w_2048sl.h @@ -24,6 +24,7 @@ #undef TW_TIMERS_PER_OBJECT #undef LOG2_TW_TIMERS_PER_OBJECT #undef TW_SUFFIX +#undef TW_OVERFLOW_VECTOR #define TW_TIMER_WHEELS 1 #define TW_SLOTS_PER_RING 2048 diff --git a/src/vppinfra/tw_timer_4t_3w_256sl.c b/src/vppinfra/tw_timer_4t_3w_256sl.c new file mode 100644 index 00000000..73bb34b2 --- /dev/null +++ b/src/vppinfra/tw_timer_4t_3w_256sl.c @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "tw_timer_4t_3w_256sl.h" +#include "tw_timer_template.c" + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/tw_timer_4t_3w_256sl.h b/src/vppinfra/tw_timer_4t_3w_256sl.h new file mode 100644 index 00000000..89adb7a2 --- /dev/null +++ b/src/vppinfra/tw_timer_4t_3w_256sl.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_tw_timer_4t_3w_256sl_h__ +#define __included_tw_timer_4t_3w_256sl_h__ + +/* ... So that a client app can create multiple wheel geometries */ +#undef TW_TIMER_WHEELS +#undef TW_SLOTS_PER_RING +#undef TW_RING_SHIFT +#undef TW_RING_MASK +#undef TW_TIMERS_PER_OBJECT +#undef LOG2_TW_TIMERS_PER_OBJECT +#undef TW_SUFFIX +#undef TW_OVERFLOW_VECTOR + +#define TW_TIMER_WHEELS 3 +#define TW_SLOTS_PER_RING 256 +#define TW_RING_SHIFT 8 +#define TW_RING_MASK (TW_SLOTS_PER_RING -1) +#define TW_TIMERS_PER_OBJECT 4 +#define LOG2_TW_TIMERS_PER_OBJECT 2 +#define TW_SUFFIX _4t_3w_256sl + +#include + +#endif /* __included_tw_timer_4t_3w_256sl_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/tw_timer_4t_3w_4sl_ov.c b/src/vppinfra/tw_timer_4t_3w_4sl_ov.c new file mode 100644 index 00000000..e2af7b5d --- /dev/null +++ b/src/vppinfra/tw_timer_4t_3w_4sl_ov.c @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This wheel geometry is not prima facie useful, except for testing + */ + +#if TW_TIMER_TEST_GEOMETRY > 0 +#include +#include "tw_timer_4t_3w_4sl_ov.h" +#include "tw_timer_template.c" +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/tw_timer_4t_3w_4sl_ov.h b/src/vppinfra/tw_timer_4t_3w_4sl_ov.h new file mode 100644 index 00000000..0f76164d --- /dev/null +++ b/src/vppinfra/tw_timer_4t_3w_4sl_ov.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_tw_timer_4t_3w_4sl_ov_h__ +#define __included_tw_timer_4t_3w_4sl_ov_h__ + +/* ... So that a client app can create multiple wheel geometries */ +#undef TW_TIMER_WHEELS +#undef TW_SLOTS_PER_RING +#undef TW_RING_SHIFT +#undef TW_RING_MASK +#undef TW_TIMERS_PER_OBJECT +#undef LOG2_TW_TIMERS_PER_OBJECT +#undef TW_SUFFIX +#undef TW_OVERFLOW_VECTOR + +#define TW_TIMER_WHEELS 3 +#define TW_SLOTS_PER_RING 4 +#define TW_RING_SHIFT 2 +#define TW_RING_MASK (TW_SLOTS_PER_RING -1) +#define TW_TIMERS_PER_OBJECT 4 +#define LOG2_TW_TIMERS_PER_OBJECT 2 +#define TW_SUFFIX _4t_3w_4sl_ov +#define TW_OVERFLOW_VECTOR 1 + +#include + +#endif /* __included_tw_timer_4t_3w_256sl_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/tw_timer_template.c b/src/vppinfra/tw_timer_template.c index e3f44500..a0c407ae 100644 --- a/src/vppinfra/tw_timer_template.c +++ b/src/vppinfra/tw_timer_template.c @@ -18,16 +18,19 @@ * * */ - static inline u32 TW (make_internal_timer_handle) (u32 pool_index, u32 timer_id) { u32 handle; ASSERT (timer_id < TW_TIMERS_PER_OBJECT); +#if LOG2_TW_TIMERS_PER_OBJECT > 0 ASSERT (pool_index < (1 << (32 - LOG2_TW_TIMERS_PER_OBJECT))); handle = (timer_id << (32 - LOG2_TW_TIMERS_PER_OBJECT)) | (pool_index); +#else + handle = pool_index; +#endif return handle; } @@ -79,16 +82,22 @@ timer_remove (TWT (tw_timer) * pool, u32 index) * @param tw_timer_wheel_t * tw timer wheel object pointer * @param u32 pool_index user pool index, presumably for a tw session * @param u32 timer_id app-specific timer ID. 4 bits. - * @param u32 interval timer interval in ticks + * @param u64 interval timer interval in ticks * @returns handle needed to cancel the timer */ u32 TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, u32 pool_index, u32 timer_id, - u32 interval) + u64 interval) { #if TW_TIMER_WHEELS > 1 u16 slow_ring_offset; u32 carry; +#endif +#if TW_TIMER_WHEELS > 2 + u16 glacier_ring_offset; +#endif +#if TW_OVERFLOW_VECTOR > 0 + u64 interval_plus_time_to_wrap, triple_wrap_mask; #endif u16 fast_ring_offset; tw_timer_wheel_slot_t *ts; @@ -97,31 +106,89 @@ TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, u32 pool_index, u32 timer_id, ASSERT (interval); pool_get (tw->timers, t); - t->next = t->prev = ~0; -#if TW_TIMER_WHEELS > 1 - t->fast_ring_offset = ~0; -#endif + memset (t, 0xff, sizeof (*t)); + t->user_handle = TW (make_internal_timer_handle) (pool_index, timer_id); + /* Factor interval into 1..3 wheel offsets */ +#if TW_TIMER_WHEELS > 2 +#if TW_OVERFLOW_VECTOR > 0 + /* + * This is tricky. Put a timer onto the overflow + * vector if the interval PLUS the time + * until the next triple-wrap exceeds one full revolution + * of all three wheels. + */ + triple_wrap_mask = (1 << (3 * TW_RING_SHIFT)) - 1; + interval_plus_time_to_wrap = + interval + (tw->current_tick & triple_wrap_mask); + if ((interval_plus_time_to_wrap >= 1 << (3 * TW_RING_SHIFT))) + { + t->expiration_time = tw->current_tick + interval; + ts = &tw->overflow; + timer_addhead (tw->timers, ts->head_index, t - tw->timers); + return t - tw->timers; + } +#endif + + glacier_ring_offset = interval >> (2 * TW_RING_SHIFT); + ASSERT (glacier_ring_offset < TW_SLOTS_PER_RING); + interval -= (glacier_ring_offset << (2 * TW_RING_SHIFT)); +#endif +#if TW_TIMER_WHEELS > 1 + slow_ring_offset = interval >> TW_RING_SHIFT; + ASSERT (slow_ring_offset < TW_SLOTS_PER_RING); + interval -= (slow_ring_offset << TW_RING_SHIFT); +#endif fast_ring_offset = interval & TW_RING_MASK; - fast_ring_offset += tw->current_index[TW_TIMER_RING_FAST]; + + /* + * Account for the current wheel positions(s) + * This is made slightly complicated by the fact that the current + * index vector will contain (TW_SLOTS_PER_RING, ...) when + * the actual position is (0, ...) + */ + + fast_ring_offset += tw->current_index[TW_TIMER_RING_FAST] & TW_RING_MASK; + #if TW_TIMER_WHEELS > 1 carry = fast_ring_offset >= TW_SLOTS_PER_RING ? 1 : 0; fast_ring_offset %= TW_SLOTS_PER_RING; - slow_ring_offset = (interval >> TW_RING_SHIFT) + carry; + slow_ring_offset += (tw->current_index[TW_TIMER_RING_SLOW] & TW_RING_MASK) + + carry; + carry = slow_ring_offset >= TW_SLOTS_PER_RING ? 1 : 0; + slow_ring_offset %= TW_SLOTS_PER_RING; +#endif - /* Timer duration exceeds ~7 hrs? Oops */ - ASSERT (slow_ring_offset < TW_SLOTS_PER_RING); +#if TW_TIMER_WHEELS > 2 + glacier_ring_offset += + (tw->current_index[TW_TIMER_RING_GLACIER] & TW_RING_MASK) + carry; + glacier_ring_offset %= TW_SLOTS_PER_RING; +#endif - /* Timer expires more than 51.2 seconds from now? */ - if (slow_ring_offset) +#if TW_TIMER_WHEELS > 2 + if (glacier_ring_offset != + (tw->current_index[TW_TIMER_RING_GLACIER] & TW_RING_MASK)) { - slow_ring_offset += tw->current_index[TW_TIMER_RING_SLOW]; - slow_ring_offset %= TW_SLOTS_PER_RING; + /* We'll need slow and fast ring offsets later */ + t->slow_ring_offset = slow_ring_offset; + t->fast_ring_offset = fast_ring_offset; - /* We'll want the fast ring offset later... */ + ts = &tw->w[TW_TIMER_RING_GLACIER][glacier_ring_offset]; + + timer_addhead (tw->timers, ts->head_index, t - tw->timers); + + return t - tw->timers; + } +#endif + +#if TW_TIMER_WHEELS > 1 + /* Timer expires more than 51.2 seconds from now? */ + if (slow_ring_offset != + (tw->current_index[TW_TIMER_RING_SLOW] & TW_RING_MASK)) + { + /* We'll need the fast ring offset later... */ t->fast_ring_offset = fast_ring_offset; - ASSERT (t->fast_ring_offset < TW_SLOTS_PER_RING); ts = &tw->w[TW_TIMER_RING_SLOW][slow_ring_offset]; @@ -131,7 +198,6 @@ TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, u32 pool_index, u32 timer_id, } #else fast_ring_offset %= TW_SLOTS_PER_RING; - ASSERT (interval < TW_SLOTS_PER_RING); #endif /* Timer expires less than one fast-ring revolution from now */ @@ -141,6 +207,41 @@ TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, u32 pool_index, u32 timer_id, return t - tw->timers; } +#if TW_TIMER_SCAN_FOR_HANDLE > 0 +int TW (scan_for_handle) (TWT (tw_timer_wheel) * tw, u32 handle) +{ + int i, j; + tw_timer_wheel_slot_t *ts; + TWT (tw_timer) * t, *head; + u32 next_index; + int rv = 0; + + for (i = 0; i < TW_TIMER_WHEELS; i++) + { + for (j = 0; j < TW_SLOTS_PER_RING; j++) + { + ts = &tw->w[i][j]; + head = pool_elt_at_index (tw->timers, ts->head_index); + next_index = head->next; + + while (next_index != ts->head_index) + { + t = pool_elt_at_index (tw->timers, next_index); + if (next_index == handle) + { + clib_warning ("handle %d found in ring %d slot %d", + handle, i, j); + clib_warning ("user handle 0x%x", t->user_handle); + rv = 1; + } + next_index = t->next; + } + } + } + return rv; +} +#endif /* TW_TIMER_SCAN_FOR_HANDLE */ + /** * @brief Stop a tw timer * @param tw_timer_wheel_t * tw timer wheel object pointer @@ -164,7 +265,7 @@ void TW (tw_timer_stop) (TWT (tw_timer_wheel) * tw, u32 handle) * @brief Initialize a tw timer wheel template instance * @param tw_timer_wheel_t * tw timer wheel object pointer * @param void * expired_timer_callback. Passed a u32 * vector of - * expired timer handles. + * expired timer handles. The callback is optional. * @param f64 timer_interval_in_seconds */ void @@ -185,6 +286,9 @@ TW (tw_timer_wheel_init) (TWT (tw_timer_wheel) * tw, } tw->timer_interval = timer_interval_in_seconds; tw->ticks_per_second = 1.0 / timer_interval_in_seconds; + tw->first_expires_tick = ~0ULL; + vec_validate (tw->expired_timer_handles, 0); + _vec_len (tw->expired_timer_handles) = 0; for (ring = 0; ring < TW_TIMER_WHEELS; ring++) { @@ -197,6 +301,14 @@ TW (tw_timer_wheel_init) (TWT (tw_timer_wheel) * tw, ts->head_index = t - tw->timers; } } + +#if TW_OVERFLOW_VECTOR > 0 + ts = &tw->overflow; + pool_get (tw->timers, t); + memset (t, 0xff, sizeof (*t)); + t->next = t->prev = t - tw->timers; + ts->head_index = t - tw->timers; +#endif } /** @@ -227,6 +339,21 @@ void TW (tw_timer_wheel_free) (TWT (tw_timer_wheel) * tw) pool_put (tw->timers, head); } } + +#if TW_OVERFLOW_VECVOR > 0 + ts = &tw->overflow; + head = pool_elt_at_index (tw->timers, ts->head_index); + next_index = head->next; + + while (next_index != ts->head_index) + { + t = pool_elt_at_index (tw->timers, next_index); + next_index = t->next; + pool_put (tw->timers, t); + } + pool_put (tw->timers, head); +#endif + memset (tw, 0, sizeof (*tw)); } @@ -235,50 +362,185 @@ void TW (tw_timer_wheel_free) (TWT (tw_timer_wheel) * tw) * as needed. This routine should be called once every timer_interval seconds * @param tw_timer_wheel_t * tw timer wheel template instance pointer * @param f64 now the current time, e.g. from vlib_time_now(vm) + * @returns u32 * vector of expired user handles */ -u32 TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now) +static inline + u32 * TW (tw_timer_expire_timers_internal) (TWT (tw_timer_wheel) * tw, + f64 now, + u32 * callback_vector_arg) { u32 nticks, i; tw_timer_wheel_slot_t *ts; TWT (tw_timer) * t, *head; + u32 *callback_vector; u32 fast_wheel_index; u32 next_index; - u32 nexpirations, total_nexpirations; -#if TW_TIMER_WHEELS > 1 - u32 slow_wheel_index; -#endif + u32 slow_wheel_index __attribute__ ((unused)); + u32 glacier_wheel_index __attribute__ ((unused)); /* Shouldn't happen */ if (PREDICT_FALSE (now < tw->next_run_time)) - return 0; + return callback_vector_arg; /* Number of ticks which have occurred */ nticks = tw->ticks_per_second * (now - tw->last_run_time); if (nticks == 0) - return 0; + return callback_vector_arg; /* Remember when we ran, compute next runtime */ tw->next_run_time = (now + tw->timer_interval); - total_nexpirations = 0; + if (callback_vector_arg == 0) + { + _vec_len (tw->expired_timer_handles) = 0; + callback_vector = tw->expired_timer_handles; + } + else + callback_vector = callback_vector_arg; + for (i = 0; i < nticks; i++) { fast_wheel_index = tw->current_index[TW_TIMER_RING_FAST]; + if (TW_TIMER_WHEELS > 1) + slow_wheel_index = tw->current_index[TW_TIMER_RING_SLOW]; + if (TW_TIMER_WHEELS > 2) + glacier_wheel_index = tw->current_index[TW_TIMER_RING_GLACIER]; + +#if TW_OVERFLOW_VECTOR > 0 + /* Triple odometer-click? Process the overflow vector... */ + if (PREDICT_FALSE (fast_wheel_index == TW_SLOTS_PER_RING + && slow_wheel_index == TW_SLOTS_PER_RING + && glacier_wheel_index == TW_SLOTS_PER_RING)) + { + u64 interval; + u32 new_glacier_ring_offset, new_slow_ring_offset; + u32 new_fast_ring_offset; + ts = &tw->overflow; + head = pool_elt_at_index (tw->timers, ts->head_index); + next_index = head->next; + + /* Make slot empty */ + head->next = head->prev = ts->head_index; + + /* traverse slot, place timers wherever they go */ + while (next_index != head - tw->timers) + { + t = pool_elt_at_index (tw->timers, next_index); + next_index = t->next; + + /* Remove from the overflow vector (hammer) */ + t->next = t->prev = ~0; + + ASSERT (t->expiration_time >= tw->current_tick); + + interval = t->expiration_time - tw->current_tick; + + /* Right back onto the overflow vector? */ + if (interval >= (1 << (3 * TW_RING_SHIFT))) + { + ts = &tw->overflow; + timer_addhead (tw->timers, ts->head_index, t - tw->timers); + continue; + } + /* Compute ring offsets */ + new_glacier_ring_offset = interval >> (2 * TW_RING_SHIFT); + + interval -= (new_glacier_ring_offset << (2 * TW_RING_SHIFT)); + + /* Note: the wheels are at (0,0,0), no add-with-carry needed */ + new_slow_ring_offset = interval >> TW_RING_SHIFT; + interval -= (new_slow_ring_offset << TW_RING_SHIFT); + new_fast_ring_offset = interval & TW_RING_MASK; + t->slow_ring_offset = new_slow_ring_offset; + t->fast_ring_offset = new_fast_ring_offset; + + /* Timer expires Right Now */ + if (PREDICT_FALSE (t->slow_ring_offset == 0 && + t->fast_ring_offset == 0 && + new_glacier_ring_offset == 0)) + { + vec_add1 (callback_vector, t->user_handle); + pool_put (tw->timers, t); + } + /* Timer moves to the glacier ring */ + else if (new_glacier_ring_offset) + { + ts = &tw->w[TW_TIMER_RING_GLACIER][new_glacier_ring_offset]; + timer_addhead (tw->timers, ts->head_index, t - tw->timers); + } + /* Timer moves to the slow ring */ + else if (t->slow_ring_offset) + { + /* Add to slow ring */ + ts = &tw->w[TW_TIMER_RING_SLOW][t->slow_ring_offset]; + timer_addhead (tw->timers, ts->head_index, t - tw->timers); + } + /* Timer timer moves to the fast ring */ + else + { + ts = &tw->w[TW_TIMER_RING_FAST][t->fast_ring_offset]; + timer_addhead (tw->timers, ts->head_index, t - tw->timers); + } + } + } +#endif + +#if TW_TIMER_WHEELS > 2 /* - * If we've been around the fast ring once, - * process one slot in the slow ring before we handle - * the fast ring. + * Double odometer-click? Process one slot in the glacier ring... */ - if (PREDICT_FALSE (fast_wheel_index == TW_SLOTS_PER_RING)) + if (PREDICT_FALSE (fast_wheel_index == TW_SLOTS_PER_RING + && slow_wheel_index == TW_SLOTS_PER_RING)) { - fast_wheel_index = tw->current_index[TW_TIMER_RING_FAST] = 0; + glacier_wheel_index %= TW_SLOTS_PER_RING; + ts = &tw->w[TW_TIMER_RING_GLACIER][glacier_wheel_index]; -#if TW_TIMER_WHEELS > 1 - tw->current_index[TW_TIMER_RING_SLOW]++; - tw->current_index[TW_TIMER_RING_SLOW] %= TW_SLOTS_PER_RING; - slow_wheel_index = tw->current_index[TW_TIMER_RING_SLOW]; + head = pool_elt_at_index (tw->timers, ts->head_index); + next_index = head->next; + + /* Make slot empty */ + head->next = head->prev = ts->head_index; + + /* traverse slot, deal timers into slow ring */ + while (next_index != head - tw->timers) + { + t = pool_elt_at_index (tw->timers, next_index); + next_index = t->next; + + /* Remove from glacier ring slot (hammer) */ + t->next = t->prev = ~0; + + /* Timer expires Right Now */ + if (PREDICT_FALSE (t->slow_ring_offset == 0 && + t->fast_ring_offset == 0)) + { + vec_add1 (callback_vector, t->user_handle); + pool_put (tw->timers, t); + } + /* Timer expires during slow-wheel tick 0 */ + else if (PREDICT_FALSE (t->slow_ring_offset == 0)) + { + ts = &tw->w[TW_TIMER_RING_FAST][t->fast_ring_offset]; + timer_addhead (tw->timers, ts->head_index, t - tw->timers); + } + else /* typical case */ + { + /* Add to slow ring */ + ts = &tw->w[TW_TIMER_RING_SLOW][t->slow_ring_offset]; + timer_addhead (tw->timers, ts->head_index, t - tw->timers); + } + } + } +#endif +#if TW_TIMER_WHEELS > 1 + /* + * Single odometer-click? Process a slot in the slow ring, + */ + if (PREDICT_FALSE (fast_wheel_index == TW_SLOTS_PER_RING)) + { + slow_wheel_index %= TW_SLOTS_PER_RING; ts = &tw->w[TW_TIMER_RING_SLOW][slow_wheel_index]; head = pool_elt_at_index (tw->timers, ts->head_index); @@ -293,19 +555,27 @@ u32 TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now) t = pool_elt_at_index (tw->timers, next_index); next_index = t->next; - /* Remove from slow ring slot (hammer) */ + /* Remove from sloe ring slot (hammer) */ t->next = t->prev = ~0; - ASSERT (t->fast_ring_offset < TW_SLOTS_PER_RING); - /* Add to fast ring */ - ts = &tw->w[TW_TIMER_RING_FAST][t->fast_ring_offset]; - timer_addhead (tw->timers, ts->head_index, t - tw->timers); + + /* Timer expires Right Now */ + if (PREDICT_FALSE (t->fast_ring_offset == 0)) + { + vec_add1 (callback_vector, t->user_handle); + pool_put (tw->timers, t); + } + else /* typical case */ + { + /* Add to fast ring */ + ts = &tw->w[TW_TIMER_RING_FAST][t->fast_ring_offset]; + timer_addhead (tw->timers, ts->head_index, t - tw->timers); + } } -#endif } +#endif /* Handle the fast ring */ - vec_reset_length (tw->expired_timer_handles); - + fast_wheel_index %= TW_SLOTS_PER_RING; ts = &tw->w[TW_TIMER_RING_FAST][fast_wheel_index]; head = pool_elt_at_index (tw->timers, ts->head_index); @@ -319,26 +589,57 @@ u32 TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now) { t = pool_elt_at_index (tw->timers, next_index); next_index = t->next; - vec_add1 (tw->expired_timer_handles, t->user_handle); + vec_add1 (callback_vector, t->user_handle); pool_put (tw->timers, t); } /* If any timers expired, tell the user */ - nexpirations = vec_len (tw->expired_timer_handles); - if (nexpirations) + if (callback_vector_arg == 0 && vec_len (callback_vector)) { - tw->expired_timer_callback (tw->expired_timer_handles); - total_nexpirations += nexpirations; + /* The callback is optional. We return the u32 * handle vector */ + if (tw->expired_timer_callback) + { + tw->expired_timer_callback (callback_vector); + _vec_len (callback_vector) = 0; + } + tw->expired_timer_handles = callback_vector; } - tw->current_index[TW_TIMER_RING_FAST]++; tw->current_tick++; + fast_wheel_index++; + tw->current_index[TW_TIMER_RING_FAST] = fast_wheel_index; + +#if TW_TIMER_WHEELS > 1 + if (PREDICT_FALSE (fast_wheel_index == TW_SLOTS_PER_RING)) + slow_wheel_index++; + tw->current_index[TW_TIMER_RING_SLOW] = slow_wheel_index; +#endif + +#if TW_TIMER_WHEELS > 2 + if (PREDICT_FALSE (slow_wheel_index == TW_SLOTS_PER_RING)) + glacier_wheel_index++; + tw->current_index[TW_TIMER_RING_GLACIER] = glacier_wheel_index; +#endif - if (total_nexpirations >= tw->max_expirations) + if (vec_len (callback_vector) >= tw->max_expirations) break; } + if (callback_vector_arg == 0) + tw->expired_timer_handles = callback_vector; + tw->last_run_time += i * tw->timer_interval; - return total_nexpirations; + return callback_vector; +} + +u32 *TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now) +{ + return TW (tw_timer_expire_timers_internal) (tw, now, 0 /* no vector */ ); +} + +u32 *TW (tw_timer_expire_timers_vec) (TWT (tw_timer_wheel) * tw, f64 now, + u32 * vec) +{ + return TW (tw_timer_expire_timers_internal) (tw, now, vec); } /* diff --git a/src/vppinfra/tw_timer_template.h b/src/vppinfra/tw_timer_template.h index 6b61e424..76755609 100644 --- a/src/vppinfra/tw_timer_template.h +++ b/src/vppinfra/tw_timer_template.h @@ -110,16 +110,39 @@ Expired timer callback: } */ +#if (TW_TIMER_WHEELS != 1 && TW_TIMER_WHEELS != 2 && TW_TIMER_WHEELS != 3) +#error TW_TIMER_WHEELS must be 1, 2 or 3 +#endif + typedef struct { /** next, previous pool indices */ u32 next; u32 prev; -#if TW_TIMER_WHEELS > 0 - /** fast ring offset, only valid in the slow ring */ - u16 fast_ring_offset; - u16 pad; + + union + { + struct + { +#if (TW_TIMER_WHEELS == 3) + /** fast ring offset, only valid in the slow ring */ + u16 fast_ring_offset; + /** slow ring offset, only valid in the glacier ring */ + u16 slow_ring_offset; +#endif +#if (TW_TIMER_WHEELS == 2) + /** fast ring offset, only valid in the slow ring */ + u16 fast_ring_offset; + /** slow ring offset, only valid in the glacier ring */ + u16 pad; #endif + }; + +#if (TW_OVERFLOW_VECTOR > 0) + u64 expiration_time; +#endif + }; + /** user timer handle */ u32 user_handle; } TWT (tw_timer); @@ -141,6 +164,8 @@ typedef enum TW_TIMER_RING_FAST, /** Slow timer ring ID */ TW_TIMER_RING_SLOW, + /** Glacier ring ID */ + TW_TIMER_RING_GLACIER, } tw_ring_index_t; #endif /* __defined_tw_timer_wheel_slot__ */ @@ -162,7 +187,10 @@ typedef struct f64 timer_interval; /** current tick */ - u32 current_tick; + u64 current_tick; + + /** first expiration time */ + u64 first_expires_tick; /** current wheel indices */ u32 current_index[TW_TIMER_WHEELS]; @@ -170,6 +198,10 @@ typedef struct /** wheel arrays */ tw_timer_wheel_slot_t w[TW_TIMER_WHEELS][TW_SLOTS_PER_RING]; +#if TW_OVERFLOW_VECTOR > 0 + tw_timer_wheel_slot_t overflow; +#endif + /** expired timer callback, receives a vector of handles */ void (*expired_timer_callback) (u32 * expired_timer_handles); @@ -181,7 +213,7 @@ typedef struct } TWT (tw_timer_wheel); u32 TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, - u32 pool_index, u32 timer_id, u32 interval); + u32 pool_index, u32 timer_id, u64 interval); void TW (tw_timer_stop) (TWT (tw_timer_wheel) * tw, u32 handle); @@ -191,7 +223,9 @@ void TW (tw_timer_wheel_init) (TWT (tw_timer_wheel) * tw, void TW (tw_timer_wheel_free) (TWT (tw_timer_wheel) * tw); -u32 TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now); +u32 *TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now); +u32 *TW (tw_timer_expire_timers_vec) (TWT (tw_timer_wheel) * tw, f64 now, + u32 * vec); /* * fd.io coding-style-patch-verification: ON -- cgit 1.2.3-korg From 28b70af8e5b4bea004138cfca74e79b44803c5db Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Tue, 13 Jun 2017 17:04:28 -0400 Subject: VPP-874: fix coverity warnings in tw_timer_template.c Best guess, tested carefully, should do no harm. Clang doesn't complain either way; it's not certain that this patch will make the coverity warnings in tw_timer_template.c disappear. Change-Id: I75aa0cfd8970751e823a1165df2a755e947c4cf9 Signed-off-by: Dave Barach --- src/vppinfra/tw_timer_template.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/vppinfra/tw_timer_template.c') diff --git a/src/vppinfra/tw_timer_template.c b/src/vppinfra/tw_timer_template.c index a0c407ae..9253488c 100644 --- a/src/vppinfra/tw_timer_template.c +++ b/src/vppinfra/tw_timer_template.c @@ -132,13 +132,13 @@ TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, u32 pool_index, u32 timer_id, #endif glacier_ring_offset = interval >> (2 * TW_RING_SHIFT); - ASSERT (glacier_ring_offset < TW_SLOTS_PER_RING); - interval -= (glacier_ring_offset << (2 * TW_RING_SHIFT)); + ASSERT ((u64) glacier_ring_offset < TW_SLOTS_PER_RING); + interval -= (((u64) glacier_ring_offset) << (2 * TW_RING_SHIFT)); #endif #if TW_TIMER_WHEELS > 1 slow_ring_offset = interval >> TW_RING_SHIFT; - ASSERT (slow_ring_offset < TW_SLOTS_PER_RING); - interval -= (slow_ring_offset << TW_RING_SHIFT); + ASSERT ((u64) slow_ring_offset < TW_SLOTS_PER_RING); + interval -= (((u64) slow_ring_offset) << TW_RING_SHIFT); #endif fast_ring_offset = interval & TW_RING_MASK; -- cgit 1.2.3-korg From 5c20a0131a6a2516c14d5ccfc6db90fd13ec8a33 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Tue, 13 Jun 2017 08:48:31 -0400 Subject: switch vlib process model to tw_timer_template timer impl Change-Id: I36bb47faea55a6fea7af7ee58d87d8f6dd28f93d Signed-off-by: Dave Barach --- src/vlib/main.c | 62 +++++++++++++++++---------- src/vlib/node.h | 15 ++++--- src/vlib/node_funcs.h | 28 +++++++------ src/vlib/unix/input.c | 61 +++++++++++---------------- src/vnet/lisp-cp/control.h | 1 + src/vppinfra/tw_timer_16t_1w_2048sl.h | 4 ++ src/vppinfra/tw_timer_16t_2w_512sl.h | 4 ++ src/vppinfra/tw_timer_1t_3w_1024sl_ov.h | 4 ++ src/vppinfra/tw_timer_2t_1w_2048sl.h | 4 ++ src/vppinfra/tw_timer_4t_3w_256sl.h | 4 ++ src/vppinfra/tw_timer_4t_3w_4sl_ov.h | 4 ++ src/vppinfra/tw_timer_template.c | 74 +++++++++++++++++++++++++++++++++ src/vppinfra/tw_timer_template.h | 9 ++++ 13 files changed, 197 insertions(+), 77 deletions(-) (limited to 'src/vppinfra/tw_timer_template.c') diff --git a/src/vlib/main.c b/src/vlib/main.c index 14f680e6..19d70232 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -1341,9 +1342,16 @@ dispatch_process (vlib_main_t * vm, p->suspended_process_frame_index = pf - nm->suspended_process_frames; if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK) - timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time, - vlib_timing_wheel_data_set_suspended_process - (node->runtime_index)); + { + TWT (tw_timer_wheel) * tw = + (TWT (tw_timer_wheel) *) nm->timing_wheel; + p->stop_timer_handle = + TW (tw_timer_start) (tw, + vlib_timing_wheel_data_set_suspended_process + (node->runtime_index) /* [sic] pool idex */ , + 0 /* timer_id */ , + p->resume_clock_interval); + } } else p->flags &= ~VLIB_PROCESS_IS_RUNNING; @@ -1416,9 +1424,14 @@ dispatch_suspended_process (vlib_main_t * vm, n_vectors = 0; p->n_suspends += 1; if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK) - timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time, - vlib_timing_wheel_data_set_suspended_process - (node->runtime_index)); + { + p->stop_timer_handle = + TW (tw_timer_start) ((TWT (tw_timer_wheel) *) nm->timing_wheel, + vlib_timing_wheel_data_set_suspended_process + (node->runtime_index) /* [sic] pool idex */ , + 0 /* timer_id */ , + p->resume_clock_interval); + } } else { @@ -1465,17 +1478,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) else cpu_time_now = clib_cpu_time_now (); - /* Arrange for first level of timing wheel to cover times we care - most about. */ - if (is_main) - { - nm->timing_wheel.min_sched_time = 10e-6; - nm->timing_wheel.max_sched_time = 10e-3; - timing_wheel_init (&nm->timing_wheel, - cpu_time_now, vm->clib_time.clocks_per_second); - vec_alloc (nm->data_from_advancing_timing_wheel, 32); - } - /* Pre-allocate interupt runtime indices and lock. */ vec_alloc (nm->pending_interrupt_node_runtime_indices, 32); vec_alloc (last_node_runtime_indices, 32); @@ -1561,12 +1563,15 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (is_main) { /* Check if process nodes have expired from timing wheel. */ - nm->data_from_advancing_timing_wheel - = timing_wheel_advance (&nm->timing_wheel, cpu_time_now, - nm->data_from_advancing_timing_wheel, - &nm->cpu_time_next_process_ready); + ASSERT (nm->data_from_advancing_timing_wheel != 0); + + nm->data_from_advancing_timing_wheel = + TW (tw_timer_expire_timers_vec) + ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm), + nm->data_from_advancing_timing_wheel); ASSERT (nm->data_from_advancing_timing_wheel != 0); + if (PREDICT_FALSE (_vec_len (nm->data_from_advancing_timing_wheel) > 0)) { @@ -1612,8 +1617,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) dispatch_suspended_process (vm, di, cpu_time_now); } } - - /* Reset vector. */ _vec_len (nm->data_from_advancing_timing_wheel) = 0; } } @@ -1692,6 +1695,7 @@ int vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) { clib_error_t *volatile error; + vlib_node_main_t *nm = &vm->node_main; vm->queue_signal_callback = dummy_queue_signal_callback; @@ -1746,6 +1750,18 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, "default"); + nm->timing_wheel = clib_mem_alloc_aligned (sizeof (TWT (tw_timer_wheel)), + CLIB_CACHE_LINE_BYTES); + + vec_validate (nm->data_from_advancing_timing_wheel, 10); + _vec_len (nm->data_from_advancing_timing_wheel) = 0; + + /* Create the process timing wheel */ + TW (tw_timer_wheel_init) ((TWT (tw_timer_wheel) *) nm->timing_wheel, + 0 /* no callback */ , + 10e-6 /* timer period 10us */ , + ~0 /* max expirations per call */ ); + switch (clib_setjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_NONE)) { case VLIB_MAIN_LOOP_EXIT_NONE: diff --git a/src/vlib/node.h b/src/vlib/node.h index 906d795f..77914272 100644 --- a/src/vlib/node.h +++ b/src/vlib/node.h @@ -43,7 +43,6 @@ #include #include #include -#include #include /* for vlib_trace_filter_t */ /* Forward declaration. */ @@ -542,8 +541,14 @@ typedef struct /* Pool of currently valid event types. */ vlib_process_event_type_t *event_type_pool; - /* When suspending saves cpu cycle counter when process is to be resumed. */ - u64 resume_cpu_time; + /* + * When suspending saves clock time (10us ticks) when process + * is to be resumed. + */ + u64 resume_clock_interval; + + /* Handle from timer code, to cancel an unexpired timer */ + u32 stop_timer_handle; /* Default output function and its argument for any CLI outputs within the process. */ @@ -664,7 +669,7 @@ typedef struct vlib_pending_frame_t *pending_frames; /* Timing wheel for scheduling time-based node dispatch. */ - timing_wheel_t timing_wheel; + void *timing_wheel; vlib_signal_timed_event_data_t *signal_timed_event_data_pool; @@ -672,7 +677,7 @@ typedef struct u32 *data_from_advancing_timing_wheel; /* CPU time of next process to be ready on timing wheel. */ - u64 cpu_time_next_process_ready; + f64 time_next_process_ready; /* Vector of process nodes. One for each node of type VLIB_NODE_TYPE_PROCESS. */ diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h index 4d7cc192..d6588a74 100644 --- a/src/vlib/node_funcs.h +++ b/src/vlib/node_funcs.h @@ -46,6 +46,7 @@ #define included_vlib_node_funcs_h #include +#include /** \brief Get vlib node by index. @warning This function will ASSERT if @c i is out of range. @@ -428,14 +429,14 @@ vlib_current_process (vlib_main_t * vm) return vlib_get_current_process (vm)->node_runtime.node_index; } -/** Returns TRUE if a process suspend time is less than 1us +/** Returns TRUE if a process suspend time is less than 10us @param dt - remaining poll time in seconds - @returns 1 if dt < 1e-6, 0 otherwise + @returns 1 if dt < 10e-6, 0 otherwise */ always_inline uword vlib_process_suspend_time_is_zero (f64 dt) { - return dt < 1e-6; + return dt < 10e-6; } /** Suspend a vlib cooperative multi-tasking thread for a period of time @@ -450,7 +451,6 @@ vlib_process_suspend (vlib_main_t * vm, f64 dt) uword r; vlib_node_main_t *nm = &vm->node_main; vlib_process_t *p = vec_elt (nm->processes, nm->current_process_index); - u64 dt_cpu = dt * vm->clib_time.clocks_per_second; if (vlib_process_suspend_time_is_zero (dt)) return VLIB_PROCESS_RESUME_LONGJMP_RESUME; @@ -459,7 +459,8 @@ vlib_process_suspend (vlib_main_t * vm, f64 dt) r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND); if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND) { - p->resume_cpu_time = clib_cpu_time_now () + dt_cpu; + /* expiration time in 10us ticks */ + p->resume_clock_interval = dt * 1e5; clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND); } @@ -718,8 +719,7 @@ vlib_process_wait_for_event_or_clock (vlib_main_t * vm, f64 dt) r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND); if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND) { - p->resume_cpu_time = (clib_cpu_time_now () - + (dt * vm->clib_time.clocks_per_second)); + p->resume_clock_interval = dt * 1e5; clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND); } @@ -834,7 +834,8 @@ vlib_process_signal_event_helper (vlib_node_main_t * nm, p->flags = p_flags | VLIB_PROCESS_RESUME_PENDING; vec_add1 (nm->data_from_advancing_timing_wheel, x); if (delete_from_wheel) - timing_wheel_delete (&nm->timing_wheel, x); + TW (tw_timer_stop) ((TWT (tw_timer_wheel) *) nm->timing_wheel, + p->stop_timer_handle); } return data_to_be_written_by_caller; @@ -895,7 +896,6 @@ vlib_process_signal_event_at_time (vlib_main_t * vm, else { vlib_signal_timed_event_data_t *te; - u64 dt_cpu = dt * vm->clib_time.clocks_per_second; pool_get_aligned (nm->signal_timed_event_data_pool, te, sizeof (te[0])); @@ -911,10 +911,12 @@ vlib_process_signal_event_at_time (vlib_main_t * vm, te->process_node_index = n->runtime_index; te->event_type_index = t; - timing_wheel_insert (&nm->timing_wheel, clib_cpu_time_now () + dt_cpu, - vlib_timing_wheel_data_set_timed_event (te - - nm-> - signal_timed_event_data_pool)); + p->stop_timer_handle = + TW (tw_timer_start) ((TWT (tw_timer_wheel) *) nm->timing_wheel, + vlib_timing_wheel_data_set_timed_event + (te - nm->signal_timed_event_data_pool), + 0 /* timer_id */ , + (vlib_time_now (vm) + dt) * 1e5); /* Inline data big enough to hold event? */ if (te->n_data_bytes < sizeof (te->inline_event_data)) diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c index 73783d13..515dae94 100644 --- a/src/vlib/unix/input.c +++ b/src/vlib/unix/input.c @@ -40,6 +40,7 @@ #include #include #include +#include /* FIXME autoconf */ #define HAVE_LINUX_EPOLL @@ -113,56 +114,44 @@ linux_epoll_input (vlib_main_t * vm, { vlib_node_main_t *nm = &vm->node_main; - u64 t = nm->cpu_time_next_process_ready; + u32 ticks_until_expiration; f64 timeout; - int timeout_ms, max_timeout_ms = 10; + int timeout_ms = 0, max_timeout_ms = 10; f64 vector_rate = vlib_last_vectors_per_main_loop (vm); - if (t == ~0ULL) + /* If we're not working very hard, decide how long to sleep */ + if (vector_rate < 2 && vm->api_queue_nonempty == 0 + && nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] == 0) { - timeout = 10e-3; - timeout_ms = max_timeout_ms; - } - else - { - timeout = - (((i64) t - (i64) clib_cpu_time_now ()) - * vm->clib_time.seconds_per_clock) - /* subtract off some slop time */ - 50e-6; + ticks_until_expiration = TW (tw_timer_first_expires_in_ticks) + ((TWT (tw_timer_wheel) *) nm->timing_wheel); - if (timeout < 1e-3) + /* Nothing on the fast wheel, sleep 10ms */ + if (ticks_until_expiration == TW_SLOTS_PER_RING) { - /* We have event happenning in less than 1 ms so - don't allow epoll to wait */ - timeout_ms = 0; + timeout = 10e-3; + timeout_ms = max_timeout_ms; } else { - timeout_ms = timeout * 1e3; - - /* Must be between 1 and 10 ms. */ - timeout_ms = clib_max (1, timeout_ms); - timeout_ms = clib_min (max_timeout_ms, timeout_ms); + timeout = (f64) ticks_until_expiration *1e-5; + if (timeout < 1e-3) + timeout_ms = 0; + else + { + timeout_ms = timeout * 1e3; + /* Must be between 1 and 10 ms. */ + timeout_ms = clib_max (1, timeout_ms); + timeout_ms = clib_min (max_timeout_ms, timeout_ms); + } } + node->input_main_loops_per_call = 0; } - - /* If we still have input nodes polling (e.g. vnet packet generator) - don't sleep. */ - if (nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] > 0) - timeout_ms = 0; - - /* - * When busy: don't wait & only epoll for input - * every 1024 times through main loop. - */ - if (vector_rate > 1 || vm->api_queue_nonempty) + else /* busy */ { - timeout_ms = 0; + /* Don't come back for a respectable number of dispatch cycles */ node->input_main_loops_per_call = 1024; } - else - /* We're not busy; go to sleep for a while. */ - node->input_main_loops_per_call = 0; /* Allow any signal to wakeup our sleep. */ { diff --git a/src/vnet/lisp-cp/control.h b/src/vnet/lisp-cp/control.h index 577035c4..0e63b3c7 100644 --- a/src/vnet/lisp-cp/control.h +++ b/src/vnet/lisp-cp/control.h @@ -19,6 +19,7 @@ #include #include #include +#include #define NUMBER_OF_RETRIES 1 #define PENDING_MREQ_EXPIRATION_TIME 3.0 /* seconds */ diff --git a/src/vppinfra/tw_timer_16t_1w_2048sl.h b/src/vppinfra/tw_timer_16t_1w_2048sl.h index 6edef17b..66cf7d37 100644 --- a/src/vppinfra/tw_timer_16t_1w_2048sl.h +++ b/src/vppinfra/tw_timer_16t_1w_2048sl.h @@ -25,6 +25,8 @@ #undef LOG2_TW_TIMERS_PER_OBJECT #undef TW_SUFFIX #undef TW_OVERFLOW_VECTOR +#undef TW_FAST_WHEEL_BITMAP +#undef TW_TIMER_ALLOW_DUPLICATE_STOP #define TW_TIMER_WHEELS 1 #define TW_SLOTS_PER_RING 2048 @@ -33,6 +35,8 @@ #define TW_TIMERS_PER_OBJECT 16 #define LOG2_TW_TIMERS_PER_OBJECT 4 #define TW_SUFFIX _16t_1w_2048sl +#define TW_FAST_WHEEL_BITMAP 0 +#define TW_TIMER_ALLOW_DUPLICATE_STOP 0 #include diff --git a/src/vppinfra/tw_timer_16t_2w_512sl.h b/src/vppinfra/tw_timer_16t_2w_512sl.h index 2497b31c..00587b8e 100644 --- a/src/vppinfra/tw_timer_16t_2w_512sl.h +++ b/src/vppinfra/tw_timer_16t_2w_512sl.h @@ -25,6 +25,8 @@ #undef LOG2_TW_TIMERS_PER_OBJECT #undef TW_SUFFIX #undef TW_OVERFLOW_VECTOR +#undef TW_FAST_WHEEL_BITMAP +#undef TW_TIMER_ALLOW_DUPLICATE_STOP #define TW_TIMER_WHEELS 2 #define TW_SLOTS_PER_RING 512 @@ -33,6 +35,8 @@ #define TW_TIMERS_PER_OBJECT 16 #define LOG2_TW_TIMERS_PER_OBJECT 4 #define TW_SUFFIX _16t_2w_512sl +#define TW_FAST_WHEEL_BITMAP 0 +#define TW_TIMER_ALLOW_DUPLICATE_STOP 0 #include diff --git a/src/vppinfra/tw_timer_1t_3w_1024sl_ov.h b/src/vppinfra/tw_timer_1t_3w_1024sl_ov.h index 7327f87b..e5e4cc19 100644 --- a/src/vppinfra/tw_timer_1t_3w_1024sl_ov.h +++ b/src/vppinfra/tw_timer_1t_3w_1024sl_ov.h @@ -25,6 +25,8 @@ #undef LOG2_TW_TIMERS_PER_OBJECT #undef TW_SUFFIX #undef TW_OVERFLOW_VECTOR +#undef TW_FAST_WHEEL_BITMAP +#undef TW_TIMER_ALLOW_DUPLICATE_STOP #define TW_TIMER_WHEELS 3 #define TW_SLOTS_PER_RING 1024 @@ -34,6 +36,8 @@ #define LOG2_TW_TIMERS_PER_OBJECT 0 #define TW_SUFFIX _1t_3w_1024sl_ov #define TW_OVERFLOW_VECTOR 1 +#define TW_FAST_WHEEL_BITMAP 1 +#define TW_TIMER_ALLOW_DUPLICATE_STOP 1 #include diff --git a/src/vppinfra/tw_timer_2t_1w_2048sl.h b/src/vppinfra/tw_timer_2t_1w_2048sl.h index 33b74405..98b548b3 100644 --- a/src/vppinfra/tw_timer_2t_1w_2048sl.h +++ b/src/vppinfra/tw_timer_2t_1w_2048sl.h @@ -25,6 +25,8 @@ #undef LOG2_TW_TIMERS_PER_OBJECT #undef TW_SUFFIX #undef TW_OVERFLOW_VECTOR +#undef TW_FAST_WHEEL_BITMAP +#undef TW_TIMER_ALLOW_DUPLICATE_STOP #define TW_TIMER_WHEELS 1 #define TW_SLOTS_PER_RING 2048 @@ -33,6 +35,8 @@ #define TW_TIMERS_PER_OBJECT 2 #define LOG2_TW_TIMERS_PER_OBJECT 1 #define TW_SUFFIX _2t_1w_2048sl +#define TW_FAST_WHEEL_BITMAP 0 +#define TW_TIMER_ALLOW_DUPLICATE_STOP 0 #include diff --git a/src/vppinfra/tw_timer_4t_3w_256sl.h b/src/vppinfra/tw_timer_4t_3w_256sl.h index 89adb7a2..07203de8 100644 --- a/src/vppinfra/tw_timer_4t_3w_256sl.h +++ b/src/vppinfra/tw_timer_4t_3w_256sl.h @@ -25,6 +25,8 @@ #undef LOG2_TW_TIMERS_PER_OBJECT #undef TW_SUFFIX #undef TW_OVERFLOW_VECTOR +#undef TW_FAST_WHEEL_BITMAP +#undef TW_TIMER_ALLOW_DUPLICATE_STOP #define TW_TIMER_WHEELS 3 #define TW_SLOTS_PER_RING 256 @@ -33,6 +35,8 @@ #define TW_TIMERS_PER_OBJECT 4 #define LOG2_TW_TIMERS_PER_OBJECT 2 #define TW_SUFFIX _4t_3w_256sl +#define TW_FAST_WHEEL_BITMAP 0 +#define TW_TIMER_ALLOW_DUPLICATE_STOP 0 #include diff --git a/src/vppinfra/tw_timer_4t_3w_4sl_ov.h b/src/vppinfra/tw_timer_4t_3w_4sl_ov.h index 0f76164d..20a01d05 100644 --- a/src/vppinfra/tw_timer_4t_3w_4sl_ov.h +++ b/src/vppinfra/tw_timer_4t_3w_4sl_ov.h @@ -25,6 +25,8 @@ #undef LOG2_TW_TIMERS_PER_OBJECT #undef TW_SUFFIX #undef TW_OVERFLOW_VECTOR +#undef TW_FAST_WHEEL_BITMAP +#undef TW_TIMER_ALLOW_DUPLICATE_STOP #define TW_TIMER_WHEELS 3 #define TW_SLOTS_PER_RING 4 @@ -34,6 +36,8 @@ #define LOG2_TW_TIMERS_PER_OBJECT 2 #define TW_SUFFIX _4t_3w_4sl_ov #define TW_OVERFLOW_VECTOR 1 +#define TW_FAST_WHEEL_BITMAP 0 +#define TW_TIMER_ALLOW_DUPLICATE_STOP 0 #include diff --git a/src/vppinfra/tw_timer_template.c b/src/vppinfra/tw_timer_template.c index 9253488c..c0a9685a 100644 --- a/src/vppinfra/tw_timer_template.c +++ b/src/vppinfra/tw_timer_template.c @@ -204,6 +204,11 @@ TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, u32 pool_index, u32 timer_id, ts = &tw->w[TW_TIMER_RING_FAST][fast_ring_offset]; timer_addhead (tw->timers, ts->head_index, t - tw->timers); + +#if TW_FAST_WHEEL_BITMAP + tw->fast_slot_bitmap = clib_bitmap_set (tw->fast_slot_bitmap, + fast_ring_offset, 1); +#endif return t - tw->timers; } @@ -251,6 +256,16 @@ void TW (tw_timer_stop) (TWT (tw_timer_wheel) * tw, u32 handle) { TWT (tw_timer) * t; +#if TW_TIMER_ALLOW_DUPLICATE_STOP + /* + * A vlib process may have its timer expire, and receive + * an event before the expiration is processed. + * That results in a duplicate tw_timer_stop. + */ + if (pool_is_free_index (tw->timers, handle)) + return; +#endif + t = pool_elt_at_index (tw->timers, handle); /* in case of idiotic handle (e.g. passing a listhead index) */ @@ -481,6 +496,11 @@ static inline { ts = &tw->w[TW_TIMER_RING_FAST][t->fast_ring_offset]; timer_addhead (tw->timers, ts->head_index, t - tw->timers); +#if TW_FAST_WHEEL_BITMAP + tw->fast_slot_bitmap = + clib_bitmap_set (tw->fast_slot_bitmap, + t->fast_ring_offset, 1); +#endif } } } @@ -523,6 +543,11 @@ static inline { ts = &tw->w[TW_TIMER_RING_FAST][t->fast_ring_offset]; timer_addhead (tw->timers, ts->head_index, t - tw->timers); +#if TW_FAST_WHEEL_BITMAP + tw->fast_slot_bitmap = + clib_bitmap_set (tw->fast_slot_bitmap, + t->fast_ring_offset, 1); +#endif } else /* typical case */ { @@ -569,6 +594,11 @@ static inline /* Add to fast ring */ ts = &tw->w[TW_TIMER_RING_FAST][t->fast_ring_offset]; timer_addhead (tw->timers, ts->head_index, t - tw->timers); +#if TW_FAST_WHEEL_BITMAP + tw->fast_slot_bitmap = + clib_bitmap_set (tw->fast_slot_bitmap, + t->fast_ring_offset, 1); +#endif } } } @@ -604,6 +634,12 @@ static inline } tw->expired_timer_handles = callback_vector; } + +#if TW_FAST_WHEEL_BITMAP + tw->fast_slot_bitmap = clib_bitmap_set (tw->fast_slot_bitmap, + fast_wheel_index, 0); +#endif + tw->current_tick++; fast_wheel_index++; tw->current_index[TW_TIMER_RING_FAST] = fast_wheel_index; @@ -642,6 +678,44 @@ u32 *TW (tw_timer_expire_timers_vec) (TWT (tw_timer_wheel) * tw, f64 now, return TW (tw_timer_expire_timers_internal) (tw, now, vec); } +#if TW_FAST_WHEEL_BITMAP +/** Returns an approximation to the first timer expiration in + * timer-ticks from "now". To avoid wasting an unjustifiable + * amount of time on the problem, we maintain an approximate fast-wheel slot + * occupancy bitmap. We don't worry about clearing fast wheel bits + * when timers are removed from fast wheel slots. + */ + +u32 TW (tw_timer_first_expires_in_ticks) (TWT (tw_timer_wheel) * tw) +{ + u32 first_expiring_index, fast_ring_index; + i32 delta; + + if (clib_bitmap_is_zero (tw->fast_slot_bitmap)) + return TW_SLOTS_PER_RING; + + fast_ring_index = tw->current_index[TW_TIMER_RING_FAST]; + if (fast_ring_index == TW_SLOTS_PER_RING) + fast_ring_index = 0; + + first_expiring_index = clib_bitmap_next_set (tw->fast_slot_bitmap, + fast_ring_index); + if (first_expiring_index == ~0 && fast_ring_index != 0) + first_expiring_index = clib_bitmap_first_set (tw->fast_slot_bitmap); + + ASSERT (first_expiring_index != ~0); + + delta = (i32) first_expiring_index - (i32) fast_ring_index; + if (delta < 0) + delta += TW_SLOTS_PER_RING; + + ASSERT (delta >= 0); + + return (u32) delta; +} + +#endif + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vppinfra/tw_timer_template.h b/src/vppinfra/tw_timer_template.h index 76755609..0404e3f4 100644 --- a/src/vppinfra/tw_timer_template.h +++ b/src/vppinfra/tw_timer_template.h @@ -19,6 +19,7 @@ #include #include +#include #ifndef _twt #define _twt(a,b) a##b##_t @@ -202,6 +203,11 @@ typedef struct tw_timer_wheel_slot_t overflow; #endif +#if TW_FAST_WHEEL_BITMAP > 0 + /** Fast wheel slot occupancy bitmap */ + uword *fast_slot_bitmap; +#endif + /** expired timer callback, receives a vector of handles */ void (*expired_timer_callback) (u32 * expired_timer_handles); @@ -226,6 +232,9 @@ void TW (tw_timer_wheel_free) (TWT (tw_timer_wheel) * tw); u32 *TW (tw_timer_expire_timers) (TWT (tw_timer_wheel) * tw, f64 now); u32 *TW (tw_timer_expire_timers_vec) (TWT (tw_timer_wheel) * tw, f64 now, u32 * vec); +#if TW_FAST_WHEEL_BITMAP +u32 TW (tw_timer_first_expires_in_ticks) (TWT (tw_timer_wheel) * tw); +#endif /* * fd.io coding-style-patch-verification: ON -- cgit 1.2.3-korg From b7f1faa7fbd4575f28766e552a73810c6de0ace3 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Tue, 29 Aug 2017 11:43:37 -0400 Subject: Add fixed-size, preallocated pool support Simply call pool_init_fixed(...) before using the pool. Note that fixed, preallocated pools live in individually-mmap'ed address segments, except for the free element bitmap. A large fixed pool can exceed 4gb. Fix tcp buffer allocator leak, remove broken assert Change-Id: I4421082e12a77c41c6e20f7747f3150dcd01fc26 Signed-off-by: Dave Barach --- src/svm/svm_fifo_segment.c | 2 +- src/vnet/session/application_interface.c | 19 +++++ src/vnet/session/session.c | 32 +++----- src/vnet/session/session_cli.c | 4 +- src/vnet/tcp/tcp.c | 24 +++--- src/vnet/tcp/tcp_output.c | 42 ++++++---- src/vppinfra.am | 26 +++--- src/vppinfra/bihash_24_8.h | 2 +- src/vppinfra/pool.c | 131 +++++++++++++++++++++++++++++++ src/vppinfra/pool.h | 119 +++++++++++++++++++++------- src/vppinfra/test_fpool.c | 69 ++++++++++++++++ src/vppinfra/tw_timer_16t_1w_2048sl.h | 1 + src/vppinfra/tw_timer_16t_2w_512sl.h | 3 +- src/vppinfra/tw_timer_1t_3w_1024sl_ov.h | 1 + src/vppinfra/tw_timer_2t_1w_2048sl.h | 1 + src/vppinfra/tw_timer_4t_3w_256sl.h | 1 + src/vppinfra/tw_timer_4t_3w_4sl_ov.h | 1 + src/vppinfra/tw_timer_template.c | 116 +++++++++++++++++++++++++-- src/vppinfra/tw_timer_template.h | 24 +++++- 19 files changed, 520 insertions(+), 98 deletions(-) create mode 100644 src/vppinfra/pool.c create mode 100644 src/vppinfra/test_fpool.c (limited to 'src/vppinfra/tw_timer_template.c') diff --git a/src/svm/svm_fifo_segment.c b/src/svm/svm_fifo_segment.c index 6600a423..97d9976b 100644 --- a/src/svm/svm_fifo_segment.c +++ b/src/svm/svm_fifo_segment.c @@ -461,7 +461,7 @@ svm_fifo_segment_free_fifo (svm_fifo_segment_private_t * s, svm_fifo_t * f, freelist_index = f->freelist_index; - ASSERT (freelist_index > 0 && freelist_index < vec_len (fsh->free_fifos)); + ASSERT (freelist_index < vec_len (fsh->free_fifos)); ssvm_lock_non_recursive (sh, 2); oldheap = ssvm_push_heap (sh); diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c index 566a52d7..8dbc3a1a 100644 --- a/src/vnet/session/application_interface.c +++ b/src/vnet/session/application_interface.c @@ -207,11 +207,22 @@ unformat_vnet_uri (unformat_input_t * input, va_list * args) return 0; } +static u8 *cache_uri; +static session_type_t cache_sst; +static transport_endpoint_t *cache_tep; + int parse_uri (char *uri, session_type_t * sst, transport_endpoint_t * tep) { unformat_input_t _input, *input = &_input; + if (cache_uri && !strncmp (uri, (char *) cache_uri, vec_len (cache_uri))) + { + *sst = cache_sst; + *tep = *cache_tep; + return 0; + } + /* Make sure */ uri = (char *) format (0, "%s%c", uri, 0); @@ -224,6 +235,14 @@ parse_uri (char *uri, session_type_t * sst, transport_endpoint_t * tep) } unformat_free (input); + vec_free (cache_uri); + cache_uri = (u8 *) uri; + cache_sst = *sst; + if (cache_tep) + clib_mem_free (cache_tep); + cache_tep = clib_mem_alloc (sizeof (*tep)); + *cache_tep = *tep; + return 0; } diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index dcd141f1..17644e29 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -889,32 +889,24 @@ session_manager_main_enable (vlib_main_t * vm) session_vpp_event_queue_allocate (smm, i); /* Preallocate sessions */ - if (num_threads == 1) + if (smm->preallocated_sessions) { - for (i = 0; i < smm->preallocated_sessions; i++) + if (num_threads == 1) { - stream_session_t *ss __attribute__ ((unused)); - pool_get_aligned (smm->sessions[0], ss, CLIB_CACHE_LINE_BYTES); + pool_init_fixed (smm->sessions[0], smm->preallocated_sessions); } - - for (i = 0; i < smm->preallocated_sessions; i++) - pool_put_index (smm->sessions[0], i); - } - else - { - int j; - preallocated_sessions_per_worker = smm->preallocated_sessions / - (num_threads - 1); - - for (j = 1; j < num_threads; j++) + else { - for (i = 0; i < preallocated_sessions_per_worker; i++) + int j; + preallocated_sessions_per_worker = + (1.1 * (f64) smm->preallocated_sessions / + (f64) (num_threads - 1)); + + for (j = 1; j < num_threads; j++) { - stream_session_t *ss __attribute__ ((unused)); - pool_get_aligned (smm->sessions[j], ss, CLIB_CACHE_LINE_BYTES); + pool_init_fixed (smm->sessions[j], + preallocated_sessions_per_worker); } - for (i = 0; i < preallocated_sessions_per_worker; i++) - pool_put_index (smm->sessions[j], i); } } diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c index 028dc9d8..d9f516be 100755 --- a/src/vnet/session/session_cli.c +++ b/src/vnet/session/session_cli.c @@ -115,8 +115,8 @@ unformat_stream_session_id (unformat_input_t * input, va_list * args) { *proto = TRANSPORT_PROTO_UDP; } - else if (unformat (input, "%U:%d->%U:%d", unformat_ip4_address, &lcl->ip4, - lcl_port, unformat_ip4_address, &rmt->ip4, rmt_port)) + if (unformat (input, "%U:%d->%U:%d", unformat_ip4_address, &lcl->ip4, + lcl_port, unformat_ip4_address, &rmt->ip4, rmt_port)) { *is_ip4 = 1; tuple_is_set = 1; diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 0a826a52..a4c13084 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -1150,6 +1150,10 @@ tcp_timer_establish_handler (u32 conn_index) else { tc = tcp_connection_get (conn_index, vlib_get_thread_index ()); + /* note: the connection may have already disappeared */ + if (PREDICT_FALSE (tc == 0)) + return; + ASSERT (tc->state == TCP_STATE_SYN_RCVD); } tc->timers[TCP_TIMER_ESTABLISH] = TCP_TIMER_HANDLE_INVALID; @@ -1244,7 +1248,7 @@ tcp_main_enable (vlib_main_t * vm) vlib_thread_main_t *vtm = vlib_get_thread_main (); clib_error_t *error = 0; u32 num_threads; - int i, thread; + int thread; tcp_connection_t *tc __attribute__ ((unused)); u32 preallocated_connections_per_thread; @@ -1297,21 +1301,17 @@ tcp_main_enable (vlib_main_t * vm) } for (; thread < num_threads; thread++) { - for (i = 0; i < preallocated_connections_per_thread; i++) - pool_get (tm->connections[thread], tc); - - for (i = 0; i < preallocated_connections_per_thread; i++) - pool_put_index (tm->connections[thread], i); + if (preallocated_connections_per_thread) + pool_init_fixed (tm->connections[thread], + preallocated_connections_per_thread); } /* - * Preallocate half-open connections + * Use a preallocated half-open connection pool? */ - for (i = 0; i < tm->preallocated_half_open_connections; i++) - pool_get (tm->half_open_connections, tc); - - for (i = 0; i < tm->preallocated_half_open_connections; i++) - pool_put_index (tm->half_open_connections, i); + if (tm->preallocated_half_open_connections) + pool_init_fixed (tm->half_open_connections, + tm->preallocated_half_open_connections); /* Initialize per worker thread tx buffers (used for control messages) */ vec_validate (tm->tx_buffers, num_threads - 1); diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 02555513..15a9dcb4 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -440,13 +440,16 @@ tcp_init_mss (tcp_connection_t * tc) always_inline int tcp_alloc_tx_buffers (tcp_main_t * tm, u8 thread_index, u32 n_free_buffers) { + u32 current_length = vec_len (tm->tx_buffers[thread_index]); + vec_validate (tm->tx_buffers[thread_index], - vec_len (tm->tx_buffers[thread_index]) + n_free_buffers - 1); + current_length + n_free_buffers - 1); _vec_len (tm->tx_buffers[thread_index]) = - vlib_buffer_alloc_from_free_list (vlib_get_main (), - tm->tx_buffers[thread_index], - n_free_buffers, - VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + current_length + vlib_buffer_alloc_from_free_list (vlib_get_main (), + tm->tx_buffers + [thread_index], + n_free_buffers, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); /* buffer shortage, report failure */ if (vec_len (tm->tx_buffers[thread_index]) == 0) { @@ -1293,11 +1296,17 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) if (is_syn) { tc = tcp_half_open_connection_get (index); + /* Note: the connection may have transitioned to ESTABLISHED... */ + if (PREDICT_FALSE (tc == 0)) + return; tc->timers[TCP_TIMER_RETRANSMIT_SYN] = TCP_TIMER_HANDLE_INVALID; } else { tc = tcp_connection_get (index, thread_index); + /* Note: the connection may have been closed and pool_put */ + if (PREDICT_FALSE (tc == 0)) + return; tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID; } @@ -1332,25 +1341,27 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1); - /* Send one segment */ + /* Send one segment. Note that n_bytes may be zero due to buffer shortfall */ n_bytes = tcp_prepare_retransmit_segment (tc, 0, tc->snd_mss, &b); - ASSERT (n_bytes); - bi = vlib_get_buffer_index (vm, b); + /* TODO be less aggressive about this */ scoreboard_clear (&tc->sack_sb); if (n_bytes == 0) { - clib_warning ("could not retransmit anything"); - clib_warning ("%U", format_tcp_connection, tc, 2); - + if (b) + { + clib_warning ("retransmit fail: %U", format_tcp_connection, tc, + 2); + ASSERT (tc->rto_boff > 1 && tc->snd_una == tc->snd_congestion); + } /* Try again eventually */ tcp_retransmit_timer_set (tc); - ASSERT (0 || (tc->rto_boff > 1 - && tc->snd_una == tc->snd_congestion)); return; } + bi = vlib_get_buffer_index (vm, b); + /* For first retransmit, record timestamp (Eifel detection RFC3522) */ if (tc->rto_boff == 1) tc->snd_rxt_ts = tcp_time_now (); @@ -1378,7 +1389,10 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) - return; + { + clib_warning ("tcp_get_free_buffer_index FAIL"); + return; + } b = vlib_get_buffer (vm, bi); tcp_init_buffer (vm, b); tcp_push_hdr_i (tc, b, tc->state, 1); diff --git a/src/vppinfra.am b/src/vppinfra.am index 533bacd6..8f01114c 100644 --- a/src/vppinfra.am +++ b/src/vppinfra.am @@ -18,10 +18,11 @@ TESTS = if ENABLE_TESTS TESTS += test_bihash_template \ test_dlist \ - test_elog \ test_elf \ + test_elog \ test_fifo \ test_format \ + test_fpool \ test_hash \ test_heap \ test_longjmp \ @@ -42,17 +43,16 @@ TESTS += test_bihash_template \ test_zvec endif -TESTS += test_bihash_template - noinst_PROGRAMS = $(TESTS) check_PROGRAMS = $(TESTS) test_bihash_template_SOURCES = vppinfra/test_bihash_template.c test_dlist_SOURCES = vppinfra/test_dlist.c -test_elog_SOURCES = vppinfra/test_elog.c test_elf_SOURCES = vppinfra/test_elf.c +test_elog_SOURCES = vppinfra/test_elog.c test_fifo_SOURCES = vppinfra/test_fifo.c test_format_SOURCES = vppinfra/test_format.c +test_fpool_SOURCES = vppinfra/test_fpool.c test_hash_SOURCES = vppinfra/test_hash.c test_heap_SOURCES = vppinfra/test_heap.c test_longjmp_SOURCES = vppinfra/test_longjmp.c @@ -61,8 +61,8 @@ test_md5_SOURCES = vppinfra/test_md5.c test_mheap_SOURCES = vppinfra/test_mheap.c test_pool_iterate_SOURCES = vppinfra/test_pool_iterate.c test_ptclosure_SOURCES = vppinfra/test_ptclosure.c -test_random_SOURCES = vppinfra/test_random.c test_random_isaac_SOURCES = vppinfra/test_random_isaac.c +test_random_SOURCES = vppinfra/test_random.c test_serialize_SOURCES = vppinfra/test_serialize.c test_slist_SOURCES = vppinfra/test_slist.c test_socket_SOURCES = vppinfra/test_socket.c @@ -76,10 +76,11 @@ test_zvec_SOURCES = vppinfra/test_zvec.c # So we'll need -DDEBUG to enable ASSERTs test_bihash_template_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_dlist_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG -test_elog_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_elf_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG +test_elog_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_fifo_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_format_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG +test_fpool_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_hash_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_heap_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_longjmp_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG @@ -90,9 +91,9 @@ test_pool_iterate_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_ptclosure_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_random_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_random_isaac_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG -test_socket_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_serialize_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_slist_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG +test_socket_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_time_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_timing_wheel_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_tw_timer_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG @@ -101,10 +102,11 @@ test_zvec_CPPFLAGS = $(AM_CPPFLAGS) -DCLIB_DEBUG test_bihash_template_LDADD = libvppinfra.la test_dlist_LDADD = libvppinfra.la -test_elog_LDADD = libvppinfra.la test_elf_LDADD = libvppinfra.la +test_elog_LDADD = libvppinfra.la test_fifo_LDADD = libvppinfra.la test_format_LDADD = libvppinfra.la +test_fpool_LDADD = libvppinfra.la test_hash_LDADD = libvppinfra.la test_heap_LDADD = libvppinfra.la test_longjmp_LDADD = libvppinfra.la @@ -113,8 +115,8 @@ test_md5_LDADD = libvppinfra.la test_mheap_LDADD = libvppinfra.la test_pool_iterate_LDADD = libvppinfra.la test_ptclosure_LDADD = libvppinfra.la -test_random_LDADD = libvppinfra.la test_random_isaac_LDADD = libvppinfra.la +test_random_LDADD = libvppinfra.la test_serialize_LDADD = libvppinfra.la test_slist_LDADD = libvppinfra.la test_socket_LDADD = libvppinfra.la @@ -126,10 +128,11 @@ test_zvec_LDADD = libvppinfra.la test_bihash_template_LDFLAGS = -static test_dlist_LDFLAGS = -static -test_elog_LDFLAGS = -static test_elf_LDFLAGS = -static +test_elog_LDFLAGS = -static test_fifo_LDFLAGS = -static test_format_LDFLAGS = -static +test_fpool_LDFLAGS = -static test_hash_LDFLAGS = -static test_heap_LDFLAGS = -static test_longjmp_LDFLAGS = -static @@ -138,8 +141,8 @@ test_md5_LDFLAGS = -static test_mheap_LDFLAGS = -static test_pool_iterate_LDFLAGS = -static test_ptclosure_LDFLAGS = -static -test_random_LDFLAGS = -static test_random_isaac_LDFLAGS = -static +test_random_LDFLAGS = -static test_serialize_LDFLAGS = -static test_slist_LDFLAGS = -static test_socket_LDFLAGS = -static @@ -247,6 +250,7 @@ CLIB_CORE = \ vppinfra/fifo.c \ vppinfra/fheap.c \ vppinfra/format.c \ + vppinfra/pool.c \ vppinfra/graph.c \ vppinfra/hash.c \ vppinfra/heap.c \ diff --git a/src/vppinfra/bihash_24_8.h b/src/vppinfra/bihash_24_8.h index d0be028c..173168fe 100644 --- a/src/vppinfra/bihash_24_8.h +++ b/src/vppinfra/bihash_24_8.h @@ -18,7 +18,7 @@ #define BIHASH_TYPE _24_8 #define BIHASH_KVP_PER_PAGE 4 -#define BIHASH_KVP_CACHE_SIZE 3 +#define BIHASH_KVP_CACHE_SIZE 0 #ifndef __included_bihash_24_8_h__ #define __included_bihash_24_8_h__ diff --git a/src/vppinfra/pool.c b/src/vppinfra/pool.c new file mode 100644 index 00000000..ed83b41a --- /dev/null +++ b/src/vppinfra/pool.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + Copyright (c) 2001, 2002, 2003, 2004 Eliot Dresselhaus + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#include + +void +_pool_init_fixed (void **pool_ptr, u32 elt_size, u32 max_elts) +{ + u8 *mmap_base; + u64 vector_size; + u64 free_index_size; + u64 total_size; + u64 page_size; + pool_header_t *fh; + vec_header_t *vh; + u8 *v; + u32 *fi; + u32 i; + u32 set_bits; + + ASSERT (elt_size); + ASSERT (max_elts); + + vector_size = pool_aligned_header_bytes + vec_header_bytes (0) + + (u64) elt_size *max_elts; + + free_index_size = vec_header_bytes (0) + sizeof (u32) * max_elts; + + /* Round up to a cache line boundary */ + vector_size = (vector_size + CLIB_CACHE_LINE_BYTES - 1) + & ~(CLIB_CACHE_LINE_BYTES - 1); + + free_index_size = (free_index_size + CLIB_CACHE_LINE_BYTES - 1) + & ~(CLIB_CACHE_LINE_BYTES - 1); + + total_size = vector_size + free_index_size; + + /* Round up to an even number of pages */ + page_size = clib_mem_get_page_size (); + total_size = (total_size + page_size - 1) & ~(page_size - 1); + + /* mmap demand zero memory */ + + mmap_base = mmap (0, total_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + if (mmap_base == MAP_FAILED) + { + clib_unix_warning ("mmap"); + *pool_ptr = 0; + } + + /* First comes the pool header */ + fh = (pool_header_t *) mmap_base; + /* Find the user vector pointer */ + v = (u8 *) (mmap_base + pool_aligned_header_bytes); + /* Finally, the vector header */ + vh = _vec_find (v); + + fh->free_bitmap = 0; /* No free elts (yet) */ + fh->max_elts = max_elts; + fh->mmap_base = mmap_base; + fh->mmap_size = total_size; + + vh->len = max_elts; + + /* Build the free-index vector */ + vh = (vec_header_t *) (v + vector_size); + vh->len = max_elts; + fi = (u32 *) (vh + 1); + + fh->free_indices = fi; + + /* Set the entire free bitmap */ + clib_bitmap_alloc (fh->free_bitmap, max_elts); + memset (fh->free_bitmap, 0xff, vec_len (fh->free_bitmap) * sizeof (uword)); + + /* Clear any extraneous set bits */ + set_bits = vec_len (fh->free_bitmap) * BITS (uword); + + for (i = max_elts; i < set_bits; i++) + fh->free_bitmap = clib_bitmap_set (fh->free_bitmap, i, 0); + + /* Create the initial free vector */ + for (i = 0; i < max_elts; i++) + fi[i] = (max_elts - 1) - i; + + *pool_ptr = v; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/pool.h b/src/vppinfra/pool.h index 56536b77..62d5b54e 100644 --- a/src/vppinfra/pool.h +++ b/src/vppinfra/pool.h @@ -56,6 +56,16 @@ typedef struct /** Vector of free indices. One element for each set bit in bitmap. */ u32 *free_indices; + + /* The following fields are set for fixed-size, preallocated pools */ + + /** Maximum size of the pool, in elements */ + u32 max_elts; + + /** mmap segment info: base + length */ + u8 *mmap_base; + u64 mmap_size; + } pool_header_t; /** Align pool header so that pointers are naturally aligned. */ @@ -69,6 +79,15 @@ pool_header (void *v) return vec_aligned_header (v, sizeof (pool_header_t), sizeof (void *)); } +extern void _pool_init_fixed (void **, u32, u32); +extern void fpool_free (void *); + +/** initialize a fixed-size, preallocated pool */ +#define pool_init_fixed(pool,max_elts) \ +{ \ + _pool_init_fixed((void **)&(pool),sizeof(pool[0]),max_elts); \ +} + /** Validate a pool */ always_inline void pool_validate (void *v) @@ -98,7 +117,7 @@ pool_header_validate_index (void *v, uword index) do { \ uword __pool_validate_index = (i); \ vec_validate_ha ((v), __pool_validate_index, \ - pool_aligned_header_bytes, /* align */ 0); \ + pool_aligned_header_bytes, /* align */ 0); \ pool_header_validate_index ((v), __pool_validate_index); \ } while (0) @@ -166,34 +185,40 @@ pool_free_elts (void *v) First search free list. If nothing is free extend vector of objects. */ -#define pool_get_aligned(P,E,A) \ -do { \ - pool_header_t * _pool_var (p) = pool_header (P); \ - uword _pool_var (l); \ - \ - _pool_var (l) = 0; \ - if (P) \ - _pool_var (l) = vec_len (_pool_var (p)->free_indices); \ - \ - if (_pool_var (l) > 0) \ - { \ - /* Return free element from free list. */ \ +#define pool_get_aligned(P,E,A) \ +do { \ + pool_header_t * _pool_var (p) = pool_header (P); \ + uword _pool_var (l); \ + \ + _pool_var (l) = 0; \ + if (P) \ + _pool_var (l) = vec_len (_pool_var (p)->free_indices); \ + \ + if (_pool_var (l) > 0) \ + { \ + /* Return free element from free list. */ \ uword _pool_var (i) = _pool_var (p)->free_indices[_pool_var (l) - 1]; \ - (E) = (P) + _pool_var (i); \ - _pool_var (p)->free_bitmap = \ + (E) = (P) + _pool_var (i); \ + _pool_var (p)->free_bitmap = \ clib_bitmap_andnoti (_pool_var (p)->free_bitmap, _pool_var (i)); \ - _vec_len (_pool_var (p)->free_indices) = _pool_var (l) - 1; \ - } \ - else \ - { \ - /* Nothing on free list, make a new element and return it. */ \ - P = _vec_resize (P, \ - /* length_increment */ 1, \ + _vec_len (_pool_var (p)->free_indices) = _pool_var (l) - 1; \ + } \ + else \ + { \ + /* fixed-size, preallocated pools cannot expand */ \ + if ((P) && _pool_var(p)->max_elts) \ + { \ + clib_warning ("can't expand fixed-size pool"); \ + os_out_of_memory(); \ + } \ + /* Nothing on free list, make a new element and return it. */ \ + P = _vec_resize (P, \ + /* length_increment */ 1, \ /* new size */ (vec_len (P) + 1) * sizeof (P[0]), \ - pool_aligned_header_bytes, \ - /* align */ (A)); \ - E = vec_end (P) - 1; \ - } \ + pool_aligned_header_bytes, \ + /* align */ (A)); \ + E = vec_end (P) - 1; \ + } \ } while (0) /** Allocate an object E from a pool P (unspecified alignment). */ @@ -207,7 +232,11 @@ do { \ \ _pool_var (l) = 0; \ if (P) \ + { \ + if (_pool_var (p)->max_elts) \ + return 0; \ _pool_var (l) = vec_len (_pool_var (p)->free_indices); \ + } \ \ /* Free elements, certainly won't expand */ \ if (_pool_var (l) > 0) \ @@ -248,7 +277,16 @@ do { \ /* Add element to free bitmap and to free list. */ \ _pool_var (p)->free_bitmap = \ clib_bitmap_ori (_pool_var (p)->free_bitmap, _pool_var (l)); \ - vec_add1 (_pool_var (p)->free_indices, _pool_var (l)); \ + /* Preallocated pool? */ \ + if (_pool_var (p)->max_elts) \ + { \ + ASSERT(_pool_var(l) < _pool_var (p)->max_elts); \ + _pool_var(p)->free_indices[_vec_len(_pool_var(p)->free_indices)] = \ + _pool_var(l); \ + _vec_len(_pool_var(p)->free_indices) += 1; \ + } \ + else \ + vec_add1 (_pool_var (p)->free_indices, _pool_var (l)); \ } while (0) /** Free pool element with given index. */ @@ -262,6 +300,17 @@ do { \ #define pool_alloc_aligned(P,N,A) \ do { \ pool_header_t * _p; \ + \ + if ((P)) \ + { \ + _p = pool_header (P); \ + if (_p->max_elts) \ + { \ + clib_warning ("Can't expand fixed-size pool"); \ + os_out_of_memory(); \ + } \ + } \ + \ (P) = _vec_resize ((P), 0, (vec_len (P) + (N)) * sizeof (P[0]), \ pool_aligned_header_bytes, \ (A)); \ @@ -281,8 +330,20 @@ _pool_free (void *v) if (!v) return v; clib_bitmap_free (p->free_bitmap); - vec_free (p->free_indices); - vec_free_h (v, pool_aligned_header_bytes); + + if (p->max_elts) + { + int rv; + + rv = munmap (p->mmap_base, p->mmap_size); + if (rv) + clib_unix_warning ("munmap"); + } + else + { + vec_free (p->free_indices); + vec_free_h (v, pool_aligned_header_bytes); + } return 0; } diff --git a/src/vppinfra/test_fpool.c b/src/vppinfra/test_fpool.c new file mode 100644 index 00000000..e2d67f16 --- /dev/null +++ b/src/vppinfra/test_fpool.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#include + +/* can be a very large size */ +#define NELTS 1024 + +int +main (int argc, char *argv[]) +{ + u32 *junk = 0; + int i; + u32 *tp = 0; + u32 *indices = 0; + + clib_mem_init (0, 3ULL << 30); + + vec_validate (indices, NELTS - 1); + _vec_len (indices) = 0; + + pool_init_fixed (tp, NELTS); + + for (i = 0; i < NELTS; i++) + { + pool_get (tp, junk); + vec_add1 (indices, junk - tp); + *junk = i; + } + + for (i = 0; i < NELTS; i++) + { + junk = pool_elt_at_index (tp, indices[i]); + ASSERT (*junk == i); + } + + fformat (stdout, "%d pool elts before deletes\n", pool_elts (tp)); + + pool_put_index (tp, indices[12]); + pool_put_index (tp, indices[43]); + + fformat (stdout, "%d pool elts after deletes\n", pool_elts (tp)); + + pool_validate (tp); + + pool_free (tp); + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vppinfra/tw_timer_16t_1w_2048sl.h b/src/vppinfra/tw_timer_16t_1w_2048sl.h index 66cf7d37..761646b3 100644 --- a/src/vppinfra/tw_timer_16t_1w_2048sl.h +++ b/src/vppinfra/tw_timer_16t_1w_2048sl.h @@ -27,6 +27,7 @@ #undef TW_OVERFLOW_VECTOR #undef TW_FAST_WHEEL_BITMAP #undef TW_TIMER_ALLOW_DUPLICATE_STOP +#undef TW_START_STOP_TRACE_SIZE #define TW_TIMER_WHEELS 1 #define TW_SLOTS_PER_RING 2048 diff --git a/src/vppinfra/tw_timer_16t_2w_512sl.h b/src/vppinfra/tw_timer_16t_2w_512sl.h index 00587b8e..029f529d 100644 --- a/src/vppinfra/tw_timer_16t_2w_512sl.h +++ b/src/vppinfra/tw_timer_16t_2w_512sl.h @@ -27,6 +27,7 @@ #undef TW_OVERFLOW_VECTOR #undef TW_FAST_WHEEL_BITMAP #undef TW_TIMER_ALLOW_DUPLICATE_STOP +#undef TW_START_STOP_TRACE_SIZE #define TW_TIMER_WHEELS 2 #define TW_SLOTS_PER_RING 512 @@ -36,7 +37,7 @@ #define LOG2_TW_TIMERS_PER_OBJECT 4 #define TW_SUFFIX _16t_2w_512sl #define TW_FAST_WHEEL_BITMAP 0 -#define TW_TIMER_ALLOW_DUPLICATE_STOP 0 +#define TW_TIMER_ALLOW_DUPLICATE_STOP 1 #include diff --git a/src/vppinfra/tw_timer_1t_3w_1024sl_ov.h b/src/vppinfra/tw_timer_1t_3w_1024sl_ov.h index e5e4cc19..0b455e02 100644 --- a/src/vppinfra/tw_timer_1t_3w_1024sl_ov.h +++ b/src/vppinfra/tw_timer_1t_3w_1024sl_ov.h @@ -27,6 +27,7 @@ #undef TW_OVERFLOW_VECTOR #undef TW_FAST_WHEEL_BITMAP #undef TW_TIMER_ALLOW_DUPLICATE_STOP +#undef TW_START_STOP_TRACE_SIZE #define TW_TIMER_WHEELS 3 #define TW_SLOTS_PER_RING 1024 diff --git a/src/vppinfra/tw_timer_2t_1w_2048sl.h b/src/vppinfra/tw_timer_2t_1w_2048sl.h index 98b548b3..6ae86688 100644 --- a/src/vppinfra/tw_timer_2t_1w_2048sl.h +++ b/src/vppinfra/tw_timer_2t_1w_2048sl.h @@ -27,6 +27,7 @@ #undef TW_OVERFLOW_VECTOR #undef TW_FAST_WHEEL_BITMAP #undef TW_TIMER_ALLOW_DUPLICATE_STOP +#undef TW_START_STOP_TRACE_SIZE #define TW_TIMER_WHEELS 1 #define TW_SLOTS_PER_RING 2048 diff --git a/src/vppinfra/tw_timer_4t_3w_256sl.h b/src/vppinfra/tw_timer_4t_3w_256sl.h index 07203de8..16c41bcd 100644 --- a/src/vppinfra/tw_timer_4t_3w_256sl.h +++ b/src/vppinfra/tw_timer_4t_3w_256sl.h @@ -27,6 +27,7 @@ #undef TW_OVERFLOW_VECTOR #undef TW_FAST_WHEEL_BITMAP #undef TW_TIMER_ALLOW_DUPLICATE_STOP +#undef TW_START_STOP_TRACE_SIZE #define TW_TIMER_WHEELS 3 #define TW_SLOTS_PER_RING 256 diff --git a/src/vppinfra/tw_timer_4t_3w_4sl_ov.h b/src/vppinfra/tw_timer_4t_3w_4sl_ov.h index 20a01d05..845ffeac 100644 --- a/src/vppinfra/tw_timer_4t_3w_4sl_ov.h +++ b/src/vppinfra/tw_timer_4t_3w_4sl_ov.h @@ -27,6 +27,7 @@ #undef TW_OVERFLOW_VECTOR #undef TW_FAST_WHEEL_BITMAP #undef TW_TIMER_ALLOW_DUPLICATE_STOP +#undef TW_START_STOP_TRACE_SIZE #define TW_TIMER_WHEELS 3 #define TW_SLOTS_PER_RING 4 diff --git a/src/vppinfra/tw_timer_template.c b/src/vppinfra/tw_timer_template.c index c0a9685a..aba00142 100644 --- a/src/vppinfra/tw_timer_template.c +++ b/src/vppinfra/tw_timer_template.c @@ -18,6 +18,87 @@ * * */ +#if TW_START_STOP_TRACE_SIZE > 0 + +void TW (tw_timer_trace) (TWT (tw_timer_wheel) * tw, u32 timer_id, + u32 pool_index, u32 handle) +{ + TWT (trace) * t = &tw->traces[tw->trace_index]; + + t->timer_id = timer_id; + t->pool_index = pool_index; + t->handle = handle; + + tw->trace_index++; + if (tw->trace_index == TW_START_STOP_TRACE_SIZE) + { + tw->trace_index = 0; + tw->trace_wrapped++; + } +} + +void TW (tw_search_trace) (TWT (tw_timer_wheel) * tw, u32 handle) +{ + u32 i, start_pos; + TWT (trace) * t; + char *s = "bogus!"; + + /* reverse search for the supplied handle */ + + start_pos = tw->trace_index; + if (start_pos == 0) + start_pos = TW_START_STOP_TRACE_SIZE - 1; + else + start_pos--; + + for (i = start_pos; i > 0; i--) + { + t = &tw->traces[i]; + if (t->handle == handle) + { + switch (t->timer_id) + { + case 0xFF: + s = "stopped"; + break; + case 0xFE: + s = "expired"; + break; + default: + s = "started"; + break; + } + fformat (stderr, "handle 0x%x (%d) %s at trace %d\n", + handle, handle, s, i); + } + } + if (tw->trace_wrapped > 0) + { + for (i = TW_START_STOP_TRACE_SIZE; i >= tw->trace_index; i--) + { + t = &tw->traces[i]; + if (t->handle == handle) + { + switch (t->timer_id) + { + case 0xFF: + s = "stopped"; + break; + case 0xFE: + s = "expired"; + break; + default: + s = "started"; + break; + } + fformat (stderr, "handle 0x%x (%d) %s at trace %d\n", + handle, handle, s, i); + } + } + } +} +#endif /* TW_START_STOP_TRACE_SIZE > 0 */ + static inline u32 TW (make_internal_timer_handle) (u32 pool_index, u32 timer_id) { @@ -127,6 +208,9 @@ TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, u32 pool_index, u32 timer_id, t->expiration_time = tw->current_tick + interval; ts = &tw->overflow; timer_addhead (tw->timers, ts->head_index, t - tw->timers); +#if TW_START_STOP_TRACE_SIZE > 0 + TW (tw_timer_trace) (tw, timer_id, pool_index, t - tw->timers); +#endif return t - tw->timers; } #endif @@ -177,7 +261,9 @@ TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, u32 pool_index, u32 timer_id, ts = &tw->w[TW_TIMER_RING_GLACIER][glacier_ring_offset]; timer_addhead (tw->timers, ts->head_index, t - tw->timers); - +#if TW_START_STOP_TRACE_SIZE > 0 + TW (tw_timer_trace) (tw, timer_id, pool_index, t - tw->timers); +#endif return t - tw->timers; } #endif @@ -193,7 +279,9 @@ TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, u32 pool_index, u32 timer_id, ts = &tw->w[TW_TIMER_RING_SLOW][slow_ring_offset]; timer_addhead (tw->timers, ts->head_index, t - tw->timers); - +#if TW_START_STOP_TRACE_SIZE > 0 + TW (tw_timer_trace) (tw, timer_id, pool_index, t - tw->timers); +#endif return t - tw->timers; } #else @@ -208,6 +296,9 @@ TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, u32 pool_index, u32 timer_id, #if TW_FAST_WHEEL_BITMAP tw->fast_slot_bitmap = clib_bitmap_set (tw->fast_slot_bitmap, fast_ring_offset, 1); +#endif +#if TW_START_STOP_TRACE_SIZE > 0 + TW (tw_timer_trace) (tw, timer_id, pool_index, t - tw->timers); #endif return t - tw->timers; } @@ -265,6 +356,9 @@ void TW (tw_timer_stop) (TWT (tw_timer_wheel) * tw, u32 handle) if (pool_is_free_index (tw->timers, handle)) return; #endif +#if TW_START_STOP_TRACE_SIZE > 0 + TW (tw_timer_trace) (tw, ~0, ~0, handle); +#endif t = pool_elt_at_index (tw->timers, handle); @@ -302,6 +396,7 @@ TW (tw_timer_wheel_init) (TWT (tw_timer_wheel) * tw, tw->timer_interval = timer_interval_in_seconds; tw->ticks_per_second = 1.0 / timer_interval_in_seconds; tw->first_expires_tick = ~0ULL; + vec_validate (tw->expired_timer_handles, 0); _vec_len (tw->expired_timer_handles) = 0; @@ -476,6 +571,9 @@ static inline new_glacier_ring_offset == 0)) { vec_add1 (callback_vector, t->user_handle); +#if TW_START_STOP_TRACE_SIZE > 0 + TW (tw_timer_trace) (tw, 0xfe, ~0, t - tw->timers); +#endif pool_put (tw->timers, t); } /* Timer moves to the glacier ring */ @@ -536,6 +634,9 @@ static inline t->fast_ring_offset == 0)) { vec_add1 (callback_vector, t->user_handle); +#if TW_START_STOP_TRACE_SIZE > 0 + TW (tw_timer_trace) (tw, 0xfe, ~0, t - tw->timers); +#endif pool_put (tw->timers, t); } /* Timer expires during slow-wheel tick 0 */ @@ -587,6 +688,9 @@ static inline if (PREDICT_FALSE (t->fast_ring_offset == 0)) { vec_add1 (callback_vector, t->user_handle); +#if TW_START_STOP_TRACE_SIZE > 0 + TW (tw_timer_trace) (tw, 0xfe, ~0, t - tw->timers); +#endif pool_put (tw->timers, t); } else /* typical case */ @@ -620,6 +724,9 @@ static inline t = pool_elt_at_index (tw->timers, next_index); next_index = t->next; vec_add1 (callback_vector, t->user_handle); +#if TW_START_STOP_TRACE_SIZE > 0 + TW (tw_timer_trace) (tw, 0xfe, ~0, t - tw->timers); +#endif pool_put (tw->timers, t); } @@ -628,10 +735,7 @@ static inline { /* The callback is optional. We return the u32 * handle vector */ if (tw->expired_timer_callback) - { - tw->expired_timer_callback (callback_vector); - _vec_len (callback_vector) = 0; - } + tw->expired_timer_callback (callback_vector); tw->expired_timer_handles = callback_vector; } diff --git a/src/vppinfra/tw_timer_template.h b/src/vppinfra/tw_timer_template.h index 0404e3f4..0217644d 100644 --- a/src/vppinfra/tw_timer_template.h +++ b/src/vppinfra/tw_timer_template.h @@ -170,6 +170,13 @@ typedef enum } tw_ring_index_t; #endif /* __defined_tw_timer_wheel_slot__ */ +typedef CLIB_PACKED (struct + { + u8 timer_id; + u32 pool_index; + u32 handle; + }) TWT (trace); + typedef struct { /** Timer pool */ @@ -211,11 +218,20 @@ typedef struct /** expired timer callback, receives a vector of handles */ void (*expired_timer_callback) (u32 * expired_timer_handles); - /** vector of expired timers */ + /** vectors of expired timers */ u32 *expired_timer_handles; /** maximum expirations */ u32 max_expirations; + + /** current trace index */ +#if TW_START_STOP_TRACE_SIZE > 0 + /* Start/stop/expire tracing */ + u32 trace_index; + u32 trace_wrapped; + TWT (trace) traces[TW_START_STOP_TRACE_SIZE]; +#endif + } TWT (tw_timer_wheel); u32 TW (tw_timer_start) (TWT (tw_timer_wheel) * tw, @@ -236,6 +252,12 @@ u32 *TW (tw_timer_expire_timers_vec) (TWT (tw_timer_wheel) * tw, f64 now, u32 TW (tw_timer_first_expires_in_ticks) (TWT (tw_timer_wheel) * tw); #endif +#if TW_START_STOP_TRACE_SIZE > 0 +void TW (tw_search_trace) (TWT (tw_timer_wheel) * tw, u32 handle); +void TW (tw_timer_trace) (TWT (tw_timer_wheel) * tw, u32 timer_id, + u32 pool_index, u32 handle); +#endif + /* * fd.io coding-style-patch-verification: ON * -- cgit 1.2.3-korg From 4eeeaaf5e822718eb222e6c49abd82e1bcb566fd Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Tue, 5 Sep 2017 14:03:37 -0400 Subject: tcp: horizontal scaling improvments - do not scale syn-ack window - fix the max number of outstanding syns in builtin client - fix syn-sent ack validation to use modulo arithmetic - improve retransmit timer handler - fix output buffer allocator leakeage - improved debugging Change-Id: Iac3bc0eadf7d0b494a93e22d210a3153b61b3273 Signed-off-by: Florin Coras --- src/vnet/session/session.c | 21 ++-- src/vnet/session/session_node.c | 6 +- src/vnet/tcp/builtin_client.c | 5 + src/vnet/tcp/tcp.c | 26 ++++- src/vnet/tcp/tcp.h | 7 +- src/vnet/tcp/tcp_debug.h | 49 +++++---- src/vnet/tcp/tcp_error.def | 3 +- src/vnet/tcp/tcp_input.c | 204 ++++++++++++++++++++++++++----------- src/vnet/tcp/tcp_output.c | 212 ++++++++++++++++++--------------------- src/vppinfra/tw_timer_template.c | 11 +- 10 files changed, 335 insertions(+), 209 deletions(-) (limited to 'src/vppinfra/tw_timer_template.c') diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index 17644e29..4544f9a0 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -456,13 +456,16 @@ stream_session_connect_notify (transport_connection_t * tc, u8 is_fail) st); if (handle == HALF_OPEN_LOOKUP_INVALID_VALUE) { - clib_warning ("This can't be good!"); + clib_warning ("half-open was removed!"); return -1; } + /* Cleanup half-open table */ + stream_session_half_open_table_del (tc); + /* Get the app's index from the handle we stored when opening connection * and the opaque (api_context for external apps) from transport session - * index*/ + * index */ app = application_get_if_valid (handle >> 32); if (!app) return -1; @@ -499,9 +502,6 @@ stream_session_connect_notify (transport_connection_t * tc, u8 is_fail) new_s->session_state = SESSION_STATE_READY; } - /* Cleanup session lookup */ - stream_session_half_open_table_del (tc); - return error; } @@ -535,7 +535,7 @@ stream_session_disconnect_notify (transport_connection_t * tc) } /** - * Cleans up session and associated app if needed. + * Cleans up session and lookup table. */ void stream_session_delete (stream_session_t * s) @@ -559,9 +559,10 @@ stream_session_delete (stream_session_t * s) /** * Notification from transport that connection is being deleted * - * This should be called only on previously fully established sessions. For - * instance failed connects should call stream_session_connect_notify and - * indicate that the connect has failed. + * This removes the session if it is still valid. It should be called only on + * previously fully established sessions. For instance failed connects should + * call stream_session_connect_notify and indicate that the connect has + * failed. */ void stream_session_delete_notify (transport_connection_t * tc) @@ -748,7 +749,7 @@ session_send_session_evt_to_thread (u64 session_handle, if (PREDICT_TRUE (q->cursize < q->maxsize)) { if (unix_shared_memory_queue_add (q, (u8 *) & evt, - 1 /* do wait for mutex */ )) + 0 /* do wait for mutex */ )) { clib_warning ("failed to enqueue evt"); } diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index dec6d13c..09687687 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -267,7 +267,7 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, bi0); b0->error = 0; - b0->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED; + b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; b0->current_data = 0; b0->total_length_not_including_first_buffer = 0; @@ -321,8 +321,10 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, })); /* *INDENT-ON* */ - VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); + if (VLIB_BUFFER_TRACE_TRAJECTORY) + b0->pre_data[1] = 3; + if (PREDICT_FALSE (n_trace > 0)) { session_queue_trace_t *t0; diff --git a/src/vnet/tcp/builtin_client.c b/src/vnet/tcp/builtin_client.c index 94e6b4ae..5b4c8679 100644 --- a/src/vnet/tcp/builtin_client.c +++ b/src/vnet/tcp/builtin_client.c @@ -509,6 +509,11 @@ clients_connect (vlib_main_t * vm, u8 * uri, u32 n_clients) /* Crude pacing for call setups */ if ((i % 4) == 0) vlib_process_suspend (vm, 10e-6); + ASSERT (i + 1 >= tm->ready_connections); + while (i + 1 - tm->ready_connections > 8000) + { + vlib_process_suspend (vm, 100e-6); + } } } diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index a4c13084..04f1e068 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -160,6 +160,7 @@ tcp_half_open_connection_new (void) { tcp_main_t *tm = vnet_get_tcp_main (); tcp_connection_t *tc = 0; + ASSERT (vlib_get_thread_index () == 0); pool_get (tm->half_open_connections, tc); memset (tc, 0, sizeof (*tc)); tc->c_c_index = tc - tm->half_open_connections; @@ -561,6 +562,22 @@ tcp_connection_fib_attach (tcp_connection_t * tc) } #endif /* 0 */ +/** + * Initialize connection send variables. + */ +void +tcp_init_snd_vars (tcp_connection_t * tc) +{ + u32 time_now; + + /* Set random initial sequence */ + time_now = tcp_time_now (); + tc->iss = random_u32 (&time_now); + tc->snd_una = tc->iss; + tc->snd_nxt = tc->iss + 1; + tc->snd_una_max = tc->snd_nxt; +} + /** Initialize tcp connection variables * * Should be called after having received a msg from the peer, i.e., a SYN or @@ -572,6 +589,9 @@ tcp_connection_init_vars (tcp_connection_t * tc) tcp_init_mss (tc); scoreboard_init (&tc->sack_sb); tcp_cc_init (tc); + if (tc->state == TCP_STATE_SYN_RCVD) + tcp_init_snd_vars (tc); + // tcp_connection_fib_attach (tc); } @@ -691,6 +711,7 @@ tcp_connection_open (transport_endpoint_t * rmt) TCP_EVT_DBG (TCP_EVT_OPEN, tc); tc->state = TCP_STATE_SYN_SENT; + tcp_init_snd_vars (tc); tcp_send_syn (tc); clib_spinlock_unlock_if_init (&tm->half_open_lock); @@ -784,7 +805,7 @@ format_tcp_vars (u8 * s, va_list * args) tc->snd_wnd, tc->rcv_wnd, tc->snd_wl1 - tc->irs, tc->snd_wl2 - tc->iss); s = format (s, " flight size %u send space %u rcv_wnd_av %d\n", - tcp_flight_size (tc), tcp_available_snd_space (tc), + tcp_flight_size (tc), tcp_available_output_snd_space (tc), tcp_rcv_wnd_available (tc)); s = format (s, " cong %U ", format_tcp_congestion_status, tc); s = format (s, "cwnd %u ssthresh %u rtx_bytes %u bytes_acked %u\n", @@ -1155,6 +1176,9 @@ tcp_timer_establish_handler (u32 conn_index) return; ASSERT (tc->state == TCP_STATE_SYN_RCVD); + /* Start cleanup. App wasn't notified yet so use delete notify as + * opposed to delete to cleanup session layer state. */ + stream_session_delete_notify (&tc->connection); } tc->timers[TCP_TIMER_ESTABLISH] = TCP_TIMER_HANDLE_INVALID; tcp_connection_cleanup (tc); diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 11d61f5d..6020a3de 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -97,7 +97,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler; * ticks to timer units */ #define TCP_DELACK_TIME 1 /* 0.1s */ #define TCP_ESTABLISH_TIME 750 /* 75s */ -#define TCP_SYN_RCVD_TIME 100 /* 10s */ +#define TCP_SYN_RCVD_TIME 600 /* 60s */ #define TCP_2MSL_TIME 300 /* 30s */ #define TCP_CLOSEWAIT_TIME 20 /* 0.1s */ #define TCP_CLEANUP_TIME 5 /* 0.5s Time to wait before cleanup */ @@ -676,6 +676,7 @@ tcp_prepare_retransmit_segment (tcp_connection_t * tc, u32 offset, void tcp_connection_timers_init (tcp_connection_t * tc); void tcp_connection_timers_reset (tcp_connection_t * tc); +void tcp_init_snd_vars (tcp_connection_t * tc); void tcp_connection_init_vars (tcp_connection_t * tc); always_inline void @@ -690,6 +691,7 @@ always_inline void tcp_timer_set (tcp_connection_t * tc, u8 timer_id, u32 interval) { ASSERT (tc->c_thread_index == vlib_get_thread_index ()); + ASSERT (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID); tc->timers[timer_id] = tw_timer_start_16t_2w_512sl (&tcp_main.timer_wheels[tc->c_thread_index], tc->c_c_index, timer_id, interval); @@ -722,6 +724,7 @@ tcp_timer_update (tcp_connection_t * tc, u8 timer_id, u32 interval) always_inline void tcp_retransmit_timer_set (tcp_connection_t * tc) { + ASSERT (tc->snd_una != tc->snd_una_max); tcp_timer_set (tc, TCP_TIMER_RETRANSMIT, clib_max (tc->rto * TCP_TO_TIMER_TICK, 1)); } @@ -769,7 +772,7 @@ tcp_retransmit_timer_update (tcp_connection_t * tc) { tcp_retransmit_timer_reset (tc); if (tc->snd_wnd < tc->snd_mss) - tcp_persist_timer_set (tc); + tcp_persist_timer_update (tc); } else tcp_timer_update (tc, TCP_TIMER_RETRANSMIT, diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h index fc36eb29..cf77e6e6 100755 --- a/src/vnet/tcp/tcp_debug.h +++ b/src/vnet/tcp/tcp_debug.h @@ -197,9 +197,10 @@ typedef enum _tcp_dbg_evt ed->data[0] = _tc->c_c_index; \ } -#define TCP_EVT_SYN_RCVD_HANDLER(_tc, ...) \ +#define TCP_EVT_SYN_RCVD_HANDLER(_tc,_init, ...) \ { \ - TCP_EVT_INIT_HANDLER(_tc, 0); \ + if (_init) \ + TCP_EVT_INIT_HANDLER(_tc, 0); \ ELOG_TYPE_DECLARE (_e) = \ { \ .format = "syn-rx: irs %u", \ @@ -275,11 +276,14 @@ typedef enum _tcp_dbg_evt { \ ELOG_TYPE_DECLARE (_e) = \ { \ - .format = "syn-tx: iss %u", \ - .format_args = "i4", \ + .format = "syn-tx: iss %u snd_una %u snd_una_max %u snd_nxt %u", \ + .format_args = "i4i4i4i4", \ }; \ - DECLARE_ETD(_tc, _e, 1); \ + DECLARE_ETD(_tc, _e, 4); \ ed->data[0] = _tc->iss; \ + ed->data[1] = _tc->snd_una - _tc->iss; \ + ed->data[2] = _tc->snd_una_max - _tc->iss; \ + ed->data[3] = _tc->snd_nxt - _tc->iss; \ TCP_EVT_STATE_CHANGE_HANDLER(_tc); \ } @@ -287,24 +291,30 @@ typedef enum _tcp_dbg_evt { \ ELOG_TYPE_DECLARE (_e) = \ { \ - .format = "synack-tx: iss %u irs %u", \ - .format_args = "i4i4", \ + .format = "synack-tx: iss %u irs %u snd_una %u snd_nxt %u rcv_nxt %u",\ + .format_args = "i4i4i4i4i4", \ }; \ - DECLARE_ETD(_tc, _e, 2); \ + DECLARE_ETD(_tc, _e, 5); \ ed->data[0] = _tc->iss; \ ed->data[1] = _tc->irs; \ + ed->data[2] = _tc->snd_una - _tc->iss; \ + ed->data[3] = _tc->snd_nxt - _tc->iss; \ + ed->data[4] = _tc->rcv_nxt - _tc->irs; \ } #define TCP_EVT_SYNACK_RCVD_HANDLER(_tc, ...) \ { \ ELOG_TYPE_DECLARE (_e) = \ { \ - .format = "synack-rx: iss %u irs %u", \ - .format_args = "i4i4", \ + .format = "synack-rx: iss %u irs %u snd_una %u snd_nxt %u rcv_nxt %u",\ + .format_args = "i4i4i4i4i4", \ }; \ - DECLARE_ETD(_tc, _e, 2); \ + DECLARE_ETD(_tc, _e, 5); \ ed->data[0] = _tc->iss; \ ed->data[1] = _tc->irs; \ + ed->data[2] = _tc->snd_una - _tc->iss; \ + ed->data[3] = _tc->snd_nxt - _tc->iss; \ + ed->data[4] = _tc->rcv_nxt - _tc->irs; \ TCP_EVT_STATE_CHANGE_HANDLER(_tc); \ } @@ -361,17 +371,20 @@ typedef enum _tcp_dbg_evt { \ ELOG_TYPE_DECLARE (_e) = \ { \ - .format = "%s-rxt: iss %u", \ - .format_args = "t4i4", \ + .format = "%s-rxt: iss %u irs %u snd_nxt %u rcv_nxt %u", \ + .format_args = "t4i4i4i4i4", \ .n_enum_strings = 2, \ .enum_strings = { \ "syn", \ "syn-ack", \ }, \ }; \ - DECLARE_ETD(_tc, _e, 2); \ + DECLARE_ETD(_tc, _e, 5); \ ed->data[0] = _type; \ ed->data[1] = _tc->iss; \ + ed->data[2] = _tc->irs; \ + ed->data[3] = _tc->snd_nxt - _tc->iss; \ + ed->data[4] = _tc->rcv_nxt - _tc->irs; \ } #else @@ -414,7 +427,7 @@ typedef enum _tcp_dbg_evt ed->data[0] = _tc->rcv_nxt - _tc->irs; \ ed->data[1] = _tc->rcv_wnd; \ ed->data[2] = _tc->snd_nxt - _tc->iss; \ - ed->data[3] = tcp_available_wnd(_tc); \ + ed->data[3] = tcp_available_snd_wnd(_tc); \ ed->data[4] = _tc->snd_wnd; \ } @@ -422,7 +435,7 @@ typedef enum _tcp_dbg_evt { \ ELOG_TYPE_DECLARE (_e) = \ { \ - .format = "acked: %u snd_una %u snd_wnd %u cwnd %u inflight %u", \ + .format = "ack-rx: %u snd_una %u snd_wnd %u cwnd %u inflight %u", \ .format_args = "i4i4i4i4i4", \ }; \ DECLARE_ETD(_tc, _e, 5); \ @@ -452,13 +465,13 @@ typedef enum _tcp_dbg_evt { \ ELOG_TYPE_DECLARE (_e) = \ { \ - .format = "pktize: una %u snd_nxt %u space %u flight %u rcv_wnd %u",\ + .format = "tx: una %u snd_nxt %u space %u flight %u rcv_wnd %u",\ .format_args = "i4i4i4i4i4", \ }; \ DECLARE_ETD(_tc, _e, 5); \ ed->data[0] = _tc->snd_una - _tc->iss; \ ed->data[1] = _tc->snd_nxt - _tc->iss; \ - ed->data[2] = tcp_available_snd_space (_tc); \ + ed->data[2] = tcp_available_output_snd_space (_tc); \ ed->data[3] = tcp_flight_size (_tc); \ ed->data[4] = _tc->rcv_wnd; \ } diff --git a/src/vnet/tcp/tcp_error.def b/src/vnet/tcp/tcp_error.def index a4e46d64..08922315 100644 --- a/src/vnet/tcp/tcp_error.def +++ b/src/vnet/tcp/tcp_error.def @@ -38,4 +38,5 @@ tcp_error (FILTERED_DUPACKS, "Filtered duplicate ACKs") tcp_error (RST_SENT, "Resets sent") tcp_error (INVALID_CONNECTION, "Invalid connection") tcp_error (NO_WND, "No window") -tcp_error (CONNECTION_CLOSED, "Connection closed") \ No newline at end of file +tcp_error (CONNECTION_CLOSED, "Connection closed") +tcp_error (CREATE_EXISTS, "Connection already exists") \ No newline at end of file diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 1d903453..841e72a5 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -275,6 +275,7 @@ tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0, if (PREDICT_FALSE (tcp_options_parse (th0, &tc0->rcv_opts))) { + clib_warning ("options parse error"); return -1; } @@ -350,9 +351,12 @@ tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0, if (tcp_syn (th0)) { /* TODO implement RFC 5961 */ - tcp_make_ack (tc0, b0); + if (tc0->state != TCP_STATE_SYN_RCVD) + tcp_make_ack (tc0, b0); + else + tcp_make_synack (tc0, b0); *next0 = tcp_next_output (tc0->c_is_ip4); - TCP_EVT_DBG (TCP_EVT_SYN_RCVD, tc0); + TCP_EVT_DBG (TCP_EVT_SYN_RCVD, tc0, 0); return -1; } @@ -1842,6 +1846,74 @@ VLIB_NODE_FUNCTION_MULTIARCH (tcp6_established_node, tcp6_established); vlib_node_registration_t tcp4_syn_sent_node; vlib_node_registration_t tcp6_syn_sent_node; +static u8 +tcp_lookup_is_valid (tcp_connection_t * tc, tcp_header_t * hdr) +{ + transport_connection_t *tmp; + if (!tc) + return 1; + + u8 is_valid = (tc->c_lcl_port == hdr->dst_port + && (tc->state == TCP_STATE_LISTEN + || tc->c_rmt_port == hdr->src_port)); + + if (!is_valid) + { + if ((tmp = + stream_session_half_open_lookup (&tc->c_lcl_ip, &tc->c_rmt_ip, + tc->c_lcl_port, tc->c_rmt_port, + tc->c_transport_proto))) + { + if (tmp->lcl_port == hdr->dst_port + && tmp->rmt_port == hdr->src_port) + { + clib_warning ("half-open is valid!"); + } + } + } + return is_valid; +} + +/** + * Lookup transport connection + */ +static tcp_connection_t * +tcp_lookup_connection (vlib_buffer_t * b, u8 thread_index, u8 is_ip4) +{ + tcp_header_t *tcp; + transport_connection_t *tconn; + tcp_connection_t *tc; + if (is_ip4) + { + ip4_header_t *ip4; + ip4 = vlib_buffer_get_current (b); + tcp = ip4_next_header (ip4); + tconn = stream_session_lookup_transport_wt4 (&ip4->dst_address, + &ip4->src_address, + tcp->dst_port, + tcp->src_port, + SESSION_TYPE_IP4_TCP, + thread_index); + tc = tcp_get_connection_from_transport (tconn); + ASSERT (tcp_lookup_is_valid (tc, tcp)); + } + else + { + ip6_header_t *ip6; + ip6 = vlib_buffer_get_current (b); + tcp = ip6_next_header (ip6); + tconn = stream_session_lookup_transport_wt6 (&ip6->dst_address, + &ip6->src_address, + tcp->dst_port, + tcp->src_port, + SESSION_TYPE_IP6_TCP, + thread_index); + tc = tcp_get_connection_from_transport (tconn); + ASSERT (tcp_lookup_is_valid (tc, tcp)); + } + return tc; +} + always_inline uword tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, int is_ip4) @@ -1888,6 +1960,15 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, goto drop; } + /* Half-open completed recently but the connection was't removed + * yet by the owning thread */ + if (PREDICT_FALSE (tc0->flags & TCP_CONN_HALF_OPEN_DONE)) + { + /* Make sure the connection actually exists */ + ASSERT (tcp_lookup_connection (b0, my_thread_index, is_ip4)); + goto drop; + } + ack0 = vnet_buffer (b0)->tcp.ack_number; seq0 = vnet_buffer (b0)->tcp.seq_number; tcp0 = tcp_buffer_hdr (b0); @@ -1914,16 +1995,20 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, */ if (tcp_ack (tcp0)) { - if (ack0 <= tc0->iss || ack0 > tc0->snd_nxt) + if (seq_leq (ack0, tc0->iss) || seq_gt (ack0, tc0->snd_nxt)) { + clib_warning ("ack not in rcv wnd"); if (!tcp_rst (tcp0)) tcp_send_reset_w_pkt (tc0, b0, is_ip4); goto drop; } /* Make sure ACK is valid */ - if (tc0->snd_una > ack0) - goto drop; + if (seq_gt (tc0->snd_una, ack0)) + { + clib_warning ("ack invalid"); + goto drop; + } } /* @@ -1949,11 +2034,17 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* No SYN flag. Drop. */ if (!tcp_syn (tcp0)) - goto drop; + { + clib_warning ("not synack"); + goto drop; + } /* Parse options */ if (tcp_options_parse (tcp0, &tc0->rcv_opts)) - goto drop; + { + clib_warning ("options parse fail"); + goto drop; + } /* Valid SYN or SYN-ACK. Move connection from half-open pool to * current thread pool. */ @@ -1981,8 +2072,8 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (tcp_opts_wscale (&new_tc0->rcv_opts)) new_tc0->snd_wscale = new_tc0->rcv_opts.wscale; - new_tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window) - << new_tc0->snd_wscale; + /* RFC1323: SYN and SYN-ACK wnd not scaled */ + new_tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window); new_tc0->snd_wl1 = seq0; new_tc0->snd_wl2 = ack0; @@ -2004,6 +2095,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, * allocate session send reset */ if (stream_session_connect_notify (&new_tc0->connection, 0)) { + clib_warning ("connect notify fail"); tcp_send_reset_w_pkt (new_tc0, b0, is_ip4); tcp_connection_cleanup (new_tc0); goto drop; @@ -2032,6 +2124,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, } tc0->rtt_ts = 0; + tcp_init_snd_vars (tc0); tcp_make_synack (new_tc0, b0); next0 = tcp_next_output (is_ip4); @@ -2196,6 +2289,18 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + tcp_is_syn (tcp0) + tcp_is_fin (tcp0) + vnet_buffer (b0)->tcp.data_len; + if (CLIB_DEBUG) + { + tcp_connection_t *tmp; + tmp = tcp_lookup_connection (b0, my_thread_index, is_ip4); + if (tmp->state != tc0->state) + { + clib_warning ("state changed"); + ASSERT (0); + goto drop; + } + } + /* * Special treatment for CLOSED */ @@ -2211,8 +2316,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, */ /* 1-4: check SEQ, RST, SYN */ - if (PREDICT_FALSE - (tcp_segment_validate (vm, tc0, b0, tcp0, &next0))) + if (PREDICT_FALSE (tcp_segment_validate (vm, tc0, b0, tcp0, + &next0))) { error0 = TCP_ERROR_SEGMENT_INVALID; goto drop; @@ -2230,6 +2335,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, */ if (!tcp_rcv_ack_is_acceptable (tc0, b0)) { + clib_warning ("connection not accepted"); tcp_send_reset_w_pkt (tc0, b0, is_ip4); goto drop; } @@ -2252,6 +2358,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* Reset SYN-ACK retransmit and SYN_RCV establish timers */ tcp_retransmit_timer_reset (tc0); tcp_timer_reset (tc0, TCP_TIMER_ESTABLISH); + TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0); break; case TCP_STATE_ESTABLISHED: /* We can get packets in established state here because they @@ -2400,6 +2507,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* Send FIN-ACK notify app and enter CLOSE-WAIT */ tcp_connection_timers_reset (tc0); tcp_make_fin (tc0, b0); + tc0->snd_nxt += 1; next0 = tcp_next_output (tc0->c_is_ip4); stream_session_disconnect_notify (&tc0->connection); tc0->state = TCP_STATE_CLOSE_WAIT; @@ -2598,6 +2706,14 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* 3. check for a SYN (did that already) */ + /* Make sure connection wasn't just created */ + child0 = tcp_lookup_connection (b0, my_thread_index, is_ip4); + if (PREDICT_FALSE (child0->state != TCP_STATE_LISTEN)) + { + error0 = TCP_ERROR_CREATE_EXISTS; + goto drop; + } + /* Create child session and send SYN-ACK */ child0 = tcp_connection_new (my_thread_index); child0->c_lcl_port = lc0->c_lcl_port; @@ -2621,12 +2737,15 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (stream_session_accept (&child0->connection, lc0->c_s_index, sst, 0 /* notify */ )) { + clib_warning ("session accept fail"); + tcp_connection_cleanup (child0); error0 = TCP_ERROR_CREATE_SESSION_FAIL; goto drop; } if (tcp_options_parse (th0, &child0->rcv_opts)) { + clib_warning ("options parse fail"); goto drop; } @@ -2651,7 +2770,7 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node, child0->snd_wl2 = vnet_buffer (b0)->tcp.ack_number; tcp_connection_init_vars (child0); - TCP_EVT_DBG (TCP_EVT_SYN_RCVD, child0); + TCP_EVT_DBG (TCP_EVT_SYN_RCVD, child0, 1); /* Reuse buffer to make syn-ack and send */ tcp_make_synack (child0, b0); @@ -2768,34 +2887,6 @@ typedef enum _tcp_input_next #define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN) -static u8 -tcp_lookup_is_valid (tcp_connection_t * tc, tcp_header_t * hdr) -{ - transport_connection_t *tmp; - if (!tc) - return 1; - - u8 is_valid = (tc->c_lcl_port == hdr->dst_port - && (tc->state == TCP_STATE_LISTEN - || tc->c_rmt_port == hdr->src_port)); - - if (!is_valid) - { - if ((tmp = - stream_session_half_open_lookup (&tc->c_lcl_ip, &tc->c_rmt_ip, - tc->c_lcl_port, tc->c_rmt_port, - tc->c_transport_proto))) - { - if (tmp->lcl_port == hdr->dst_port - && tmp->rmt_port == hdr->src_port) - { - clib_warning ("half-open is valid!"); - } - } - } - return is_valid; -} - always_inline uword tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, int is_ip4) @@ -2822,6 +2913,7 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *b0; tcp_header_t *tcp0 = 0; tcp_connection_t *tc0; + transport_connection_t *tconn; ip4_header_t *ip40; ip6_header_t *ip60; u32 error0 = TCP_ERROR_NO_LISTENER, next0 = TCP_INPUT_NEXT_DROP; @@ -2847,15 +2939,13 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + tcp_header_bytes (tcp0)); n_data_bytes0 = clib_net_to_host_u16 (ip40->length) - n_advance_bytes0; - - tc0 = - (tcp_connection_t *) - stream_session_lookup_transport_wt4 (&ip40->dst_address, - &ip40->src_address, - tcp0->dst_port, - tcp0->src_port, - SESSION_TYPE_IP4_TCP, - my_thread_index); + tconn = stream_session_lookup_transport_wt4 (&ip40->dst_address, + &ip40->src_address, + tcp0->dst_port, + tcp0->src_port, + SESSION_TYPE_IP4_TCP, + my_thread_index); + tc0 = tcp_get_connection_from_transport (tconn); ASSERT (tcp_lookup_is_valid (tc0, tcp0)); } else @@ -2866,15 +2956,13 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length) - n_advance_bytes0; n_advance_bytes0 += sizeof (ip60[0]); - - tc0 = - (tcp_connection_t *) - stream_session_lookup_transport_wt6 (&ip60->dst_address, - &ip60->src_address, - tcp0->dst_port, - tcp0->src_port, - SESSION_TYPE_IP6_TCP, - my_thread_index); + tconn = stream_session_lookup_transport_wt6 (&ip60->dst_address, + &ip60->src_address, + tcp0->dst_port, + tcp0->src_port, + SESSION_TYPE_IP6_TCP, + my_thread_index); + tc0 = tcp_get_connection_from_transport (tconn); ASSERT (tcp_lookup_is_valid (tc0, tcp0)); } diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 15a9dcb4..9cb3e779 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -66,11 +66,10 @@ format_tcp_tx_trace (u8 * s, va_list * args) } static u8 -tcp_window_compute_scale (u32 available_space) +tcp_window_compute_scale (u32 window) { u8 wnd_scale = 0; - while (wnd_scale < TCP_MAX_WND_SCALE - && (available_space >> wnd_scale) > TCP_WND_MAX) + while (wnd_scale < TCP_MAX_WND_SCALE && (window >> wnd_scale) > TCP_WND_MAX) wnd_scale++; return wnd_scale; } @@ -444,12 +443,10 @@ tcp_alloc_tx_buffers (tcp_main_t * tm, u8 thread_index, u32 n_free_buffers) vec_validate (tm->tx_buffers[thread_index], current_length + n_free_buffers - 1); - _vec_len (tm->tx_buffers[thread_index]) = - current_length + vlib_buffer_alloc_from_free_list (vlib_get_main (), - tm->tx_buffers - [thread_index], - n_free_buffers, - VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + _vec_len (tm->tx_buffers[thread_index]) = current_length + + vlib_buffer_alloc (vlib_get_main (), + &tm->tx_buffers[thread_index][current_length], + n_free_buffers); /* buffer shortage, report failure */ if (vec_len (tm->tx_buffers[thread_index]) == 0) { @@ -470,7 +467,7 @@ tcp_get_free_buffer_index (tcp_main_t * tm, u32 * bidx) return -1; } my_tx_buffers = tm->tx_buffers[thread_index]; - *bidx = my_tx_buffers[_vec_len (my_tx_buffers) - 1]; + *bidx = my_tx_buffers[vec_len (my_tx_buffers) - 1]; _vec_len (my_tx_buffers) -= 1; return 0; } @@ -478,10 +475,7 @@ tcp_get_free_buffer_index (tcp_main_t * tm, u32 * bidx) always_inline void tcp_return_buffer (tcp_main_t * tm) { - u32 *my_tx_buffers; - u32 thread_index = vlib_get_thread_index (); - my_tx_buffers = tm->tx_buffers[thread_index]; - _vec_len (my_tx_buffers) += 1; + _vec_len (tm->tx_buffers[vlib_get_thread_index ()]) += 1; } always_inline void * @@ -489,7 +483,8 @@ tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b) { if (b->flags & VLIB_BUFFER_NEXT_PRESENT) vlib_buffer_free_one (vm, b->next_buffer); - b->flags = 0; + /* Zero all flags but free list index and trace flag */ + b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1; b->current_data = 0; b->current_length = 0; b->total_length_not_including_first_buffer = 0; @@ -503,7 +498,8 @@ always_inline void * tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b) { ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); - b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED; + b->flags &= VLIB_BUFFER_FREE_LIST_INDEX_MASK; + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; b->total_length_not_including_first_buffer = 0; vnet_buffer (b)->tcp.flags = 0; @@ -567,8 +563,34 @@ tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b) /* Reset flags, make sure ack is sent */ vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK; +} + +/** + * Convert buffer to SYN + */ +void +tcp_make_syn (tcp_connection_t * tc, vlib_buffer_t * b) +{ + u8 tcp_hdr_opts_len, tcp_opts_len; + tcp_header_t *th; + u16 initial_wnd; + tcp_options_t snd_opts; + + initial_wnd = tcp_initial_window_to_advertise (tc); - tc->snd_nxt += 1; + /* Make and write options */ + memset (&snd_opts, 0, sizeof (snd_opts)); + tcp_opts_len = tcp_make_syn_options (&snd_opts, tc->rcv_wscale); + tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); + + th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss, + tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN, + initial_wnd); + vnet_buffer (b)->tcp.connection_index = tc->c_c_index; + tcp_options_write ((u8 *) (th + 1), &snd_opts); + + tcp_timer_update (tc, TCP_TIMER_RETRANSMIT_SYN, + tc->rto * TCP_TO_TIMER_TICK); } /** @@ -582,37 +604,25 @@ tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b) u8 tcp_opts_len, tcp_hdr_opts_len; tcp_header_t *th; u16 initial_wnd; - u32 time_now; memset (snd_opts, 0, sizeof (*snd_opts)); - tcp_reuse_buffer (vm, b); - /* Set random initial sequence */ - time_now = tcp_time_now (); - - tc->iss = random_u32 (&time_now); - tc->snd_una = tc->iss; - tc->snd_nxt = tc->iss + 1; - tc->snd_una_max = tc->snd_nxt; - initial_wnd = tcp_initial_window_to_advertise (tc); - - /* Make and write options */ tcp_opts_len = tcp_make_synack_options (tc, snd_opts); tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss, tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd); - tcp_options_write ((u8 *) (th + 1), snd_opts); vnet_buffer (b)->tcp.connection_index = tc->c_c_index; vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK; - /* Init retransmit timer */ - tcp_retransmit_timer_set (tc); + /* Init retransmit timer. Use update instead of set because of + * retransmissions */ + tcp_retransmit_timer_force_update (tc); TCP_EVT_DBG (TCP_EVT_SYNACK_SENT, tc); } @@ -918,44 +928,17 @@ tcp_send_syn (tcp_connection_t * tc) u32 bi; tcp_main_t *tm = vnet_get_tcp_main (); vlib_main_t *vm = vlib_get_main (); - u8 tcp_hdr_opts_len, tcp_opts_len; - tcp_header_t *th; - u32 time_now; - u16 initial_wnd; - tcp_options_t snd_opts; if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) return; b = vlib_get_buffer (vm, bi); tcp_init_buffer (vm, b); - - /* Set random initial sequence */ - time_now = tcp_time_now (); - - tc->iss = random_u32 (&time_now); - tc->snd_una = tc->iss; - tc->snd_una_max = tc->snd_nxt = tc->iss + 1; - - initial_wnd = tcp_initial_window_to_advertise (tc); - - /* Make and write options */ - memset (&snd_opts, 0, sizeof (snd_opts)); - tcp_opts_len = tcp_make_syn_options (&snd_opts, tc->rcv_wscale); - tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t); - - th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss, - tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN, - initial_wnd); - - tcp_options_write ((u8 *) (th + 1), &snd_opts); + tcp_make_syn (tc, b); /* Measure RTT with this */ tc->rtt_ts = tcp_time_now (); tc->rtt_seq = tc->snd_nxt; - - /* Start retransmit trimer */ - tcp_timer_set (tc, TCP_TIMER_RETRANSMIT_SYN, tc->rto * TCP_TO_TIMER_TICK); tc->rto_boff = 0; /* Set the connection establishment timer */ @@ -1010,8 +993,12 @@ tcp_send_fin (tcp_connection_t * tc) /* buffer will be initialized by in tcp_make_fin */ tcp_make_fin (tc, b); tcp_enqueue_to_output_now (vm, b, bi, tc->c_is_ip4); - tc->flags |= TCP_CONN_FINSNT; - tc->flags &= ~TCP_CONN_FINPNDG; + if (!(tc->flags & TCP_CONN_FINSNT)) + { + tc->flags |= TCP_CONN_FINSNT; + tc->flags &= ~TCP_CONN_FINPNDG; + tc->snd_nxt += 1; + } tcp_retransmit_timer_force_update (tc); TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc); } @@ -1146,6 +1133,7 @@ tcp_prepare_retransmit_segment (tcp_connection_t * tc, u32 offset, * Make sure we can retransmit something */ available_bytes = stream_session_tx_fifo_max_dequeue (&tc->connection); + ASSERT (available_bytes >= offset); available_bytes -= offset; if (!available_bytes) return 0; @@ -1209,6 +1197,7 @@ tcp_prepare_retransmit_segment (tcp_connection_t * tc, u32 offset, VLIB_FRAME_SIZE - available_bufs)) { tcp_return_buffer (tm); + *b = 0; return 0; } } @@ -1236,7 +1225,7 @@ tcp_prepare_retransmit_segment (tcp_connection_t * tc, u32 offset, ASSERT (n_peeked == len_to_deq); n_bytes += n_peeked; chain_b->current_length = n_peeked; - chain_b->flags = 0; + chain_b->flags &= VLIB_BUFFER_FREE_LIST_INDEX_MASK; chain_b->next_buffer = 0; /* update previous buffer */ @@ -1310,19 +1299,6 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID; } - if (!tcp_in_recovery (tc) && tc->rto_boff > 0 - && tc->state >= TCP_STATE_ESTABLISHED) - { - tc->rto_boff = 0; - tcp_update_rto (tc); - } - - /* Increment RTO backoff (also equal to number of retries) */ - tc->rto_boff += 1; - - /* Go back to first un-acked byte */ - tc->snd_nxt = tc->snd_una; - if (tc->state >= TCP_STATE_ESTABLISHED) { /* Lost FIN, retransmit and return */ @@ -1332,6 +1308,18 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) return; } + /* We're not in recovery so make sure rto_boff is 0 */ + if (!tcp_in_recovery (tc) && tc->rto_boff > 0) + { + tc->rto_boff = 0; + tcp_update_rto (tc); + } + + /* Increment RTO backoff (also equal to number of retries) and go back + * to first un-acked byte */ + tc->rto_boff += 1; + tc->snd_nxt = tc->snd_una; + /* First retransmit timeout */ if (tc->rto_boff == 1) tcp_rtx_timeout_cc (tc); @@ -1349,12 +1337,11 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) if (n_bytes == 0) { - if (b) - { - clib_warning ("retransmit fail: %U", format_tcp_connection, tc, - 2); - ASSERT (tc->rto_boff > 1 && tc->snd_una == tc->snd_congestion); - } + ASSERT (!b); + if (tc->snd_una == tc->snd_una_max) + return; + ASSERT (tc->rto_boff > 1 && tc->snd_una == tc->snd_congestion); + clib_warning ("retransmit fail: %U", format_tcp_connection, tc, 2); /* Try again eventually */ tcp_retransmit_timer_set (tc); return; @@ -1365,16 +1352,18 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) /* For first retransmit, record timestamp (Eifel detection RFC3522) */ if (tc->rto_boff == 1) tc->snd_rxt_ts = tcp_time_now (); + + tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); + tcp_retransmit_timer_update (tc); } - /* Retransmit for SYN/SYNACK */ - else if (tc->state == TCP_STATE_SYN_RCVD || tc->state == TCP_STATE_SYN_SENT) + /* Retransmit for SYN */ + else if (tc->state == TCP_STATE_SYN_SENT) { /* Half-open connection actually moved to established but we were * waiting for syn retransmit to pop to call cleanup from the right * thread. */ if (tc->flags & TCP_CONN_HALF_OPEN_DONE) { - ASSERT (tc->state == TCP_STATE_SYN_SENT); if (tcp_half_open_connection_cleanup (tc)) { clib_warning ("could not remove half-open connection"); @@ -1385,49 +1374,46 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) /* Try without increasing RTO a number of times. If this fails, * start growing RTO exponentially */ + tc->rto_boff += 1; if (tc->rto_boff > TCP_RTO_SYN_RETRIES) tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) - { - clib_warning ("tcp_get_free_buffer_index FAIL"); - return; - } + return; + b = vlib_get_buffer (vm, bi); tcp_init_buffer (vm, b); - tcp_push_hdr_i (tc, b, tc->state, 1); + tcp_make_syn (tc, b); - /* Account for the SYN */ - tc->snd_nxt += 1; tc->rtt_ts = 0; - TCP_EVT_DBG (TCP_EVT_SYN_RXT, tc, - (tc->state == TCP_STATE_SYN_SENT ? 0 : 1)); + TCP_EVT_DBG (TCP_EVT_SYN_RXT, tc, 0); + + /* This goes straight to ipx_lookup. Retransmit timer set already */ + tcp_push_ip_hdr (tm, tc, b); + tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4); } - else + /* Retransmit SYN-ACK */ + else if (tc->state == TCP_STATE_SYN_RCVD) { - ASSERT (tc->state == TCP_STATE_CLOSED); - clib_warning ("connection closed ..."); - return; - } + tc->rto_boff += 1; + tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX); + tc->rtt_ts = 0; - if (!is_syn) - { - tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); + if (PREDICT_FALSE (tcp_get_free_buffer_index (tm, &bi))) + return; + + b = vlib_get_buffer (vm, bi); + tcp_make_synack (tc, b); + TCP_EVT_DBG (TCP_EVT_SYN_RXT, tc, 1); - /* Re-enable retransmit timer */ - tcp_retransmit_timer_set (tc); + /* Retransmit timer already updated, just enqueue to output */ + tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); } else { - ASSERT (tc->state == TCP_STATE_SYN_SENT); - - /* This goes straight to ipx_lookup */ - tcp_push_ip_hdr (tm, tc, b); - tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4); - - /* Re-enable retransmit timer */ - tcp_timer_set (tc, TCP_TIMER_RETRANSMIT_SYN, - tc->rto * TCP_TO_TIMER_TICK); + ASSERT (tc->state == TCP_STATE_CLOSED); + clib_warning ("connection closed ..."); + return; } } diff --git a/src/vppinfra/tw_timer_template.c b/src/vppinfra/tw_timer_template.c index aba00142..abad3718 100644 --- a/src/vppinfra/tw_timer_template.c +++ b/src/vppinfra/tw_timer_template.c @@ -572,7 +572,8 @@ static inline { vec_add1 (callback_vector, t->user_handle); #if TW_START_STOP_TRACE_SIZE > 0 - TW (tw_timer_trace) (tw, 0xfe, ~0, t - tw->timers); + TW (tw_timer_trace) (tw, 0xfe, t->user_handle, + t - tw->timers); #endif pool_put (tw->timers, t); } @@ -635,7 +636,8 @@ static inline { vec_add1 (callback_vector, t->user_handle); #if TW_START_STOP_TRACE_SIZE > 0 - TW (tw_timer_trace) (tw, 0xfe, ~0, t - tw->timers); + TW (tw_timer_trace) (tw, 0xfe, t->user_handle, + t - tw->timers); #endif pool_put (tw->timers, t); } @@ -689,7 +691,8 @@ static inline { vec_add1 (callback_vector, t->user_handle); #if TW_START_STOP_TRACE_SIZE > 0 - TW (tw_timer_trace) (tw, 0xfe, ~0, t - tw->timers); + TW (tw_timer_trace) (tw, 0xfe, t->user_handle, + t - tw->timers); #endif pool_put (tw->timers, t); } @@ -725,7 +728,7 @@ static inline next_index = t->next; vec_add1 (callback_vector, t->user_handle); #if TW_START_STOP_TRACE_SIZE > 0 - TW (tw_timer_trace) (tw, 0xfe, ~0, t - tw->timers); + TW (tw_timer_trace) (tw, 0xfe, t->user_handle, t - tw->timers); #endif pool_put (tw->timers, t); } -- cgit 1.2.3-korg