diff options
author | Dave Barach <dave@barachs.net> | 2020-03-27 11:14:59 -0400 |
---|---|---|
committer | Dave Barach <dave@barachs.net> | 2020-03-27 12:03:19 -0400 |
commit | b70fd4a545cf8e3bb1c3ddbac3fff6a112a8cbbd (patch) | |
tree | 9885466351e09265d0ad3434c6bf2898cabe3dd6 /src | |
parent | ffe2caf9da1105c4fedd7c152deea456816eed17 (diff) |
vppinfra: manual cherrypick timebase fixes
Gerrit Headline
------ --------
25729 fix corner cases in tw_timer_expire
25763 improve timebase resilience
25837 remove time jump workaround
Automatic cherry-picking failed miserably.
Type: fix
Ticket: VPP-1852
Signed-off-by: Dave Barach <dave@barachs.net>
Change-Id: I460effb2b5f4f19ae4548ef01ca3726b7412bd7f
Diffstat (limited to 'src')
-rw-r--r-- | src/vppinfra/time.c | 185 | ||||
-rw-r--r-- | src/vppinfra/time.h | 10 | ||||
-rw-r--r-- | src/vppinfra/tw_timer_template.c | 11 |
3 files changed, 150 insertions, 56 deletions
diff --git a/src/vppinfra/time.c b/src/vppinfra/time.c index bfc48ce0671..0b00a11421d 100644 --- a/src/vppinfra/time.c +++ b/src/vppinfra/time.c @@ -39,6 +39,7 @@ #include <vppinfra/time.h> #include <vppinfra/format.h> #include <vppinfra/cpu.h> +#include <math.h> #ifdef CLIB_UNIX @@ -51,11 +52,8 @@ static f64 estimate_clock_frequency (f64 sample_time) { - /* Round to nearest 100KHz. */ - const f64 round_to_units = 100e5; - f64 time_now, time_start, time_limit, freq; - u64 ifreq, t[2]; + u64 t[2]; time_start = time_now = unix_time_now (); time_limit = time_now + sample_time; @@ -65,8 +63,6 @@ estimate_clock_frequency (f64 sample_time) t[1] = clib_cpu_time_now (); freq = (t[1] - t[0]) / (time_now - time_start); - ifreq = flt_round_nearest (freq / round_to_units); - freq = ifreq * round_to_units; return freq; } @@ -153,10 +149,43 @@ os_cpu_clock_frequency (void) #endif f64 cpu_freq; +#ifdef __x86_64__ + u32 __clib_unused eax = 0, ebx = 0, ecx = 0, edx = 0; + clib_get_cpuid (0x00, &eax, &ebx, &ecx, &edx); + if (eax >= 0x15) + { + u32 max_leaf = eax; + /* + CPUID Leaf 0x15 - Time Stamp Counter and Nominal Core Crystal Clock Info + eax - denominator of the TSC/”core crystal clock” ratio + ebx - numerator of the TSC/”core crystal clock” ratio + ecx - nominal frequency of the core crystal clock in Hz + edx - reseved + */ + + clib_get_cpuid (0x15, &eax, &ebx, &ecx, &edx); + if (ebx && ecx) + return (u64) ecx *ebx / eax; + + if (max_leaf >= 0x16) + { + /* + CPUID Leaf 0x16 - Processor Frequency Information Leaf + eax - Bits 15 - 00: Processor Base Frequency (in MHz). + */ + + clib_get_cpuid (0x16, &eax, &ebx, &ecx, &edx); + if (eax) + return 1e6 * (eax & 0xffff); + } + } +#endif + + /* If we have an invariant TSC, use it to estimate the clock frequency */ if (clib_cpu_supports_invariant_tsc ()) return estimate_clock_frequency (1e-3); - /* First try /sys version. */ + /* Next, try /sys version. */ cpu_freq = clock_frequency_from_sys_filesystem (); if (cpu_freq != 0) return cpu_freq; @@ -179,26 +208,76 @@ clib_time_init (clib_time_t * c) { clib_memset (c, 0, sizeof (c[0])); c->clocks_per_second = os_cpu_clock_frequency (); + /* + * Sporadic reports of os_cpu_clock_frequency() returning 0.0 + * in highly parallel container environments. + * To avoid immediate division by zero: + * Step 1: try estimate_clock_frequency(). + * Step 2: give up. Pretend we have a 2gHz clock. + */ + if (PREDICT_FALSE (c->clocks_per_second == 0.0)) + { + c->clocks_per_second = estimate_clock_frequency (1e-3); + if (c->clocks_per_second == 0.0) + { + clib_warning ("os_cpu_clock_frequency() returned 0.0, use 2e9..."); + c->clocks_per_second = 2e9; + } + } c->seconds_per_clock = 1 / c->clocks_per_second; c->log2_clocks_per_second = min_log2_u64 ((u64) c->clocks_per_second); - /* Initially verify frequency every sec */ - c->log2_clocks_per_frequency_verify = c->log2_clocks_per_second; + /* Verify frequency every 16 sec */ + c->log2_clocks_per_frequency_verify = c->log2_clocks_per_second + 4; c->last_verify_reference_time = unix_time_now (); + c->init_reference_time = c->last_verify_reference_time; c->last_cpu_time = clib_cpu_time_now (); c->init_cpu_time = c->last_verify_cpu_time = c->last_cpu_time; + c->total_cpu_time = 0ULL; + + /* + * Use exponential smoothing, with a half-life of 1 minute + * reported_rate(t) = reported_rate(t-1) * K + rate(t)*(1-K) + * where K = e**(-1.0/3.75); + * 15 samples in 4 minutes + * 7.5 samples in 2 minutes, + * 3.75 samples in 1 minute, etc. + */ + c->damping_constant = exp (-1.0 / 3.75); } void clib_time_verify_frequency (clib_time_t * c) { - f64 now_reference = unix_time_now (); - f64 dtr = now_reference - c->last_verify_reference_time; - f64 dtr_max; - u64 dtc = c->last_cpu_time - c->last_verify_cpu_time; + f64 now_reference, delta_reference, delta_reference_max; + f64 delta_clock_in_seconds; + u64 now_clock, delta_clock; f64 new_clocks_per_second, delta; - f64 save_total_cpu_time_in_seconds; + + /* Ask the kernel and the CPU what time it is... */ + now_reference = unix_time_now (); + now_clock = clib_cpu_time_now (); + + /* Compute change in the reference clock */ + delta_reference = now_reference - c->last_verify_reference_time; + + /* And change in the CPU clock */ + delta_clock_in_seconds = (f64) (now_clock - c->last_verify_cpu_time) * + c->seconds_per_clock; + + /* + * Recompute vpp start time reference, and total clocks + * using the current clock rate + */ + c->init_reference_time += (delta_reference - delta_clock_in_seconds); + c->total_cpu_time = (now_reference - c->init_reference_time) + * c->clocks_per_second; + + c->last_cpu_time = now_clock; + + /* Calculate a new clock rate sample */ + delta_clock = c->last_cpu_time - c->last_verify_cpu_time; c->last_verify_cpu_time = c->last_cpu_time; c->last_verify_reference_time = now_reference; @@ -208,68 +287,70 @@ clib_time_verify_frequency (clib_time_t * c) * or off by a factor of two - or 8 seconds - whichever is larger? * Someone reset the clock behind our back. */ - dtr_max = (f64) (2ULL << c->log2_clocks_per_frequency_verify) / + delta_reference_max = (f64) (2ULL << c->log2_clocks_per_frequency_verify) / (f64) (1ULL << c->log2_clocks_per_second); - dtr_max = dtr_max > 8.0 ? dtr_max : 8.0; + delta_reference_max = delta_reference_max > 8.0 ? delta_reference_max : 8.0; - if (dtr <= 0.0 || dtr > dtr_max) - { - c->log2_clocks_per_frequency_verify = c->log2_clocks_per_second; - return; - } - - if (PREDICT_FALSE (c->round_to_units == 0.0)) - { - f64 next_pow10, est_round_to_units; - /* - * Compute the first power of ten which is greater than - * 0.1% of the new clock rate. Save the result, and use it - * to round future results, so we don't end up calculating - * silly-looking clock rates. - */ - est_round_to_units = ((f64) dtc / dtr) * 0.001; - next_pow10 = ceil (log10 (est_round_to_units)); - c->round_to_units = pow (10.0, next_pow10); - } + /* Ignore this sample */ + if (delta_reference <= 0.0 || delta_reference > delta_reference_max) + return; /* * Reject large frequency changes, another consequence of * system clock changes particularly with old kernels. */ - new_clocks_per_second = - flt_round_nearest ((f64) dtc / (dtr * c->round_to_units)) - * c->round_to_units; + new_clocks_per_second = ((f64) delta_clock) / delta_reference; /* Compute abs(rate change) */ delta = new_clocks_per_second - c->clocks_per_second; if (delta < 0.0) delta = -delta; - /* If rate change > 1%, reject it and try again */ + /* If rate change > 1%, reject this sample */ if (PREDICT_FALSE ((delta / c->clocks_per_second) > .01)) { clib_warning ("Rejecting large frequency change of %.2f%%", (delta / c->clocks_per_second) * 100.0); - c->log2_clocks_per_frequency_verify = c->log2_clocks_per_second; return; } - /* Save total cpu time in seconds */ - save_total_cpu_time_in_seconds = c->total_cpu_time * c->seconds_per_clock; - - /* Recalculate clock rate */ - c->clocks_per_second = new_clocks_per_second; - c->seconds_per_clock = 1 / c->clocks_per_second; + /* Add sample to the exponentially-smoothed rate */ + c->clocks_per_second = c->clocks_per_second * c->damping_constant + + (1.0 - c->damping_constant) * new_clocks_per_second; + c->seconds_per_clock = 1.0 / c->clocks_per_second; /* - * Restore total cpu time in seconds. Otherwise, if c->clocks_per_second - * has decreased, time may appear to flow backwards. + * Recalculate total_cpu_time based on the kernel timebase, and + * the calculated clock rate */ - c->total_cpu_time = save_total_cpu_time_in_seconds * c->clocks_per_second; + c->total_cpu_time = + (now_reference - c->init_reference_time) * c->clocks_per_second; +} + + +u8 * +format_clib_time (u8 * s, va_list * args) +{ + clib_time_t *c = va_arg (*args, clib_time_t *); + int verbose = va_arg (*args, int); + f64 now, reftime, delta_reftime_in_seconds, error; + + /* Compute vpp elapsed time from the CPU clock */ + reftime = unix_time_now (); + now = clib_time_now (c); + + s = format (s, "Time now %.6f", now); + if (verbose == 0) + return s; + + /* And also from the kernel */ + delta_reftime_in_seconds = reftime - c->init_reference_time; + + error = now - delta_reftime_in_seconds; - /* Double time between verifies; max at 64 secs ~ 1 minute. */ - if (c->log2_clocks_per_frequency_verify < c->log2_clocks_per_second + 6) - c->log2_clocks_per_frequency_verify += 1; + s = format (s, ", reftime %.6f, error %.6f, clocks/sec %.6f", + delta_reftime_in_seconds, error, c->clocks_per_second); + return (s); } /* diff --git a/src/vppinfra/time.h b/src/vppinfra/time.h index b2d749256d3..4d8997f0a9e 100644 --- a/src/vppinfra/time.h +++ b/src/vppinfra/time.h @@ -39,6 +39,7 @@ #define included_time_h #include <vppinfra/clib.h> +#include <vppinfra/format.h> typedef struct { @@ -56,10 +57,9 @@ typedef struct from clock cycles into seconds. */ f64 seconds_per_clock; - f64 round_to_units; - /* Time stamp of call to clib_time_init call. */ u64 init_cpu_time; + f64 init_reference_time; u64 last_verify_cpu_time; @@ -67,8 +67,14 @@ typedef struct f64 last_verify_reference_time; u32 log2_clocks_per_second, log2_clocks_per_frequency_verify; + + /* Damping constant */ + f64 damping_constant; + } clib_time_t; +format_function_t format_clib_time; + /* Return CPU time stamp as 64bit number. */ #if defined(__x86_64__) || defined(i386) always_inline u64 diff --git a/src/vppinfra/tw_timer_template.c b/src/vppinfra/tw_timer_template.c index 07e300f5eac..174ffaceb47 100644 --- a/src/vppinfra/tw_timer_template.c +++ b/src/vppinfra/tw_timer_template.c @@ -421,7 +421,6 @@ TW (tw_timer_wheel_init) (TWT (tw_timer_wheel) * tw, } tw->timer_interval = timer_interval_in_seconds; tw->ticks_per_second = 1.0 / timer_interval_in_seconds; - tw->first_expires_tick = ~0ULL; vec_validate (tw->expired_timer_handles, 0); _vec_len (tw->expired_timer_handles) = 0; @@ -514,7 +513,7 @@ static inline u32 slow_wheel_index __attribute__ ((unused)); u32 glacier_wheel_index __attribute__ ((unused)); - /* Shouldn't happen */ + /* Called too soon to process new timer expirations? */ if (PREDICT_FALSE (now < tw->next_run_time)) return callback_vector_arg; @@ -526,6 +525,14 @@ static inline /* Remember when we ran, compute next runtime */ tw->next_run_time = (now + tw->timer_interval); + /* First call, or time jumped backwards? */ + if (PREDICT_FALSE + ((tw->last_run_time == 0.0) || (now <= tw->last_run_time))) + { + tw->last_run_time = now; + return callback_vector_arg; + } + if (callback_vector_arg == 0) { _vec_len (tw->expired_timer_handles) = 0; |