summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Barach <dave@barachs.net>2020-03-27 11:14:59 -0400
committerDave Barach <dave@barachs.net>2020-03-27 12:03:19 -0400
commitb70fd4a545cf8e3bb1c3ddbac3fff6a112a8cbbd (patch)
tree9885466351e09265d0ad3434c6bf2898cabe3dd6
parentffe2caf9da1105c4fedd7c152deea456816eed17 (diff)
vppinfra: manual cherrypick timebase fixes
Gerrit Headline ------ -------- 25729 fix corner cases in tw_timer_expire 25763 improve timebase resilience 25837 remove time jump workaround Automatic cherry-picking failed miserably. Type: fix Ticket: VPP-1852 Signed-off-by: Dave Barach <dave@barachs.net> Change-Id: I460effb2b5f4f19ae4548ef01ca3726b7412bd7f
-rw-r--r--src/vppinfra/time.c185
-rw-r--r--src/vppinfra/time.h10
-rw-r--r--src/vppinfra/tw_timer_template.c11
3 files changed, 150 insertions, 56 deletions
diff --git a/src/vppinfra/time.c b/src/vppinfra/time.c
index bfc48ce0671..0b00a11421d 100644
--- a/src/vppinfra/time.c
+++ b/src/vppinfra/time.c
@@ -39,6 +39,7 @@
#include <vppinfra/time.h>
#include <vppinfra/format.h>
#include <vppinfra/cpu.h>
+#include <math.h>
#ifdef CLIB_UNIX
@@ -51,11 +52,8 @@
static f64
estimate_clock_frequency (f64 sample_time)
{
- /* Round to nearest 100KHz. */
- const f64 round_to_units = 100e5;
-
f64 time_now, time_start, time_limit, freq;
- u64 ifreq, t[2];
+ u64 t[2];
time_start = time_now = unix_time_now ();
time_limit = time_now + sample_time;
@@ -65,8 +63,6 @@ estimate_clock_frequency (f64 sample_time)
t[1] = clib_cpu_time_now ();
freq = (t[1] - t[0]) / (time_now - time_start);
- ifreq = flt_round_nearest (freq / round_to_units);
- freq = ifreq * round_to_units;
return freq;
}
@@ -153,10 +149,43 @@ os_cpu_clock_frequency (void)
#endif
f64 cpu_freq;
+#ifdef __x86_64__
+ u32 __clib_unused eax = 0, ebx = 0, ecx = 0, edx = 0;
+ clib_get_cpuid (0x00, &eax, &ebx, &ecx, &edx);
+ if (eax >= 0x15)
+ {
+ u32 max_leaf = eax;
+ /*
+ CPUID Leaf 0x15 - Time Stamp Counter and Nominal Core Crystal Clock Info
+ eax - denominator of the TSC/”core crystal clock” ratio
+ ebx - numerator of the TSC/”core crystal clock” ratio
+ ecx - nominal frequency of the core crystal clock in Hz
+ edx - reseved
+ */
+
+ clib_get_cpuid (0x15, &eax, &ebx, &ecx, &edx);
+ if (ebx && ecx)
+ return (u64) ecx *ebx / eax;
+
+ if (max_leaf >= 0x16)
+ {
+ /*
+ CPUID Leaf 0x16 - Processor Frequency Information Leaf
+ eax - Bits 15 - 00: Processor Base Frequency (in MHz).
+ */
+
+ clib_get_cpuid (0x16, &eax, &ebx, &ecx, &edx);
+ if (eax)
+ return 1e6 * (eax & 0xffff);
+ }
+ }
+#endif
+
+ /* If we have an invariant TSC, use it to estimate the clock frequency */
if (clib_cpu_supports_invariant_tsc ())
return estimate_clock_frequency (1e-3);
- /* First try /sys version. */
+ /* Next, try /sys version. */
cpu_freq = clock_frequency_from_sys_filesystem ();
if (cpu_freq != 0)
return cpu_freq;
@@ -179,26 +208,76 @@ clib_time_init (clib_time_t * c)
{
clib_memset (c, 0, sizeof (c[0]));
c->clocks_per_second = os_cpu_clock_frequency ();
+ /*
+ * Sporadic reports of os_cpu_clock_frequency() returning 0.0
+ * in highly parallel container environments.
+ * To avoid immediate division by zero:
+ * Step 1: try estimate_clock_frequency().
+ * Step 2: give up. Pretend we have a 2gHz clock.
+ */
+ if (PREDICT_FALSE (c->clocks_per_second == 0.0))
+ {
+ c->clocks_per_second = estimate_clock_frequency (1e-3);
+ if (c->clocks_per_second == 0.0)
+ {
+ clib_warning ("os_cpu_clock_frequency() returned 0.0, use 2e9...");
+ c->clocks_per_second = 2e9;
+ }
+ }
c->seconds_per_clock = 1 / c->clocks_per_second;
c->log2_clocks_per_second = min_log2_u64 ((u64) c->clocks_per_second);
- /* Initially verify frequency every sec */
- c->log2_clocks_per_frequency_verify = c->log2_clocks_per_second;
+ /* Verify frequency every 16 sec */
+ c->log2_clocks_per_frequency_verify = c->log2_clocks_per_second + 4;
c->last_verify_reference_time = unix_time_now ();
+ c->init_reference_time = c->last_verify_reference_time;
c->last_cpu_time = clib_cpu_time_now ();
c->init_cpu_time = c->last_verify_cpu_time = c->last_cpu_time;
+ c->total_cpu_time = 0ULL;
+
+ /*
+ * Use exponential smoothing, with a half-life of 1 minute
+ * reported_rate(t) = reported_rate(t-1) * K + rate(t)*(1-K)
+ * where K = e**(-1.0/3.75);
+ * 15 samples in 4 minutes
+ * 7.5 samples in 2 minutes,
+ * 3.75 samples in 1 minute, etc.
+ */
+ c->damping_constant = exp (-1.0 / 3.75);
}
void
clib_time_verify_frequency (clib_time_t * c)
{
- f64 now_reference = unix_time_now ();
- f64 dtr = now_reference - c->last_verify_reference_time;
- f64 dtr_max;
- u64 dtc = c->last_cpu_time - c->last_verify_cpu_time;
+ f64 now_reference, delta_reference, delta_reference_max;
+ f64 delta_clock_in_seconds;
+ u64 now_clock, delta_clock;
f64 new_clocks_per_second, delta;
- f64 save_total_cpu_time_in_seconds;
+
+ /* Ask the kernel and the CPU what time it is... */
+ now_reference = unix_time_now ();
+ now_clock = clib_cpu_time_now ();
+
+ /* Compute change in the reference clock */
+ delta_reference = now_reference - c->last_verify_reference_time;
+
+ /* And change in the CPU clock */
+ delta_clock_in_seconds = (f64) (now_clock - c->last_verify_cpu_time) *
+ c->seconds_per_clock;
+
+ /*
+ * Recompute vpp start time reference, and total clocks
+ * using the current clock rate
+ */
+ c->init_reference_time += (delta_reference - delta_clock_in_seconds);
+ c->total_cpu_time = (now_reference - c->init_reference_time)
+ * c->clocks_per_second;
+
+ c->last_cpu_time = now_clock;
+
+ /* Calculate a new clock rate sample */
+ delta_clock = c->last_cpu_time - c->last_verify_cpu_time;
c->last_verify_cpu_time = c->last_cpu_time;
c->last_verify_reference_time = now_reference;
@@ -208,68 +287,70 @@ clib_time_verify_frequency (clib_time_t * c)
* or off by a factor of two - or 8 seconds - whichever is larger?
* Someone reset the clock behind our back.
*/
- dtr_max = (f64) (2ULL << c->log2_clocks_per_frequency_verify) /
+ delta_reference_max = (f64) (2ULL << c->log2_clocks_per_frequency_verify) /
(f64) (1ULL << c->log2_clocks_per_second);
- dtr_max = dtr_max > 8.0 ? dtr_max : 8.0;
+ delta_reference_max = delta_reference_max > 8.0 ? delta_reference_max : 8.0;
- if (dtr <= 0.0 || dtr > dtr_max)
- {
- c->log2_clocks_per_frequency_verify = c->log2_clocks_per_second;
- return;
- }
-
- if (PREDICT_FALSE (c->round_to_units == 0.0))
- {
- f64 next_pow10, est_round_to_units;
- /*
- * Compute the first power of ten which is greater than
- * 0.1% of the new clock rate. Save the result, and use it
- * to round future results, so we don't end up calculating
- * silly-looking clock rates.
- */
- est_round_to_units = ((f64) dtc / dtr) * 0.001;
- next_pow10 = ceil (log10 (est_round_to_units));
- c->round_to_units = pow (10.0, next_pow10);
- }
+ /* Ignore this sample */
+ if (delta_reference <= 0.0 || delta_reference > delta_reference_max)
+ return;
/*
* Reject large frequency changes, another consequence of
* system clock changes particularly with old kernels.
*/
- new_clocks_per_second =
- flt_round_nearest ((f64) dtc / (dtr * c->round_to_units))
- * c->round_to_units;
+ new_clocks_per_second = ((f64) delta_clock) / delta_reference;
/* Compute abs(rate change) */
delta = new_clocks_per_second - c->clocks_per_second;
if (delta < 0.0)
delta = -delta;
- /* If rate change > 1%, reject it and try again */
+ /* If rate change > 1%, reject this sample */
if (PREDICT_FALSE ((delta / c->clocks_per_second) > .01))
{
clib_warning ("Rejecting large frequency change of %.2f%%",
(delta / c->clocks_per_second) * 100.0);
- c->log2_clocks_per_frequency_verify = c->log2_clocks_per_second;
return;
}
- /* Save total cpu time in seconds */
- save_total_cpu_time_in_seconds = c->total_cpu_time * c->seconds_per_clock;
-
- /* Recalculate clock rate */
- c->clocks_per_second = new_clocks_per_second;
- c->seconds_per_clock = 1 / c->clocks_per_second;
+ /* Add sample to the exponentially-smoothed rate */
+ c->clocks_per_second = c->clocks_per_second * c->damping_constant +
+ (1.0 - c->damping_constant) * new_clocks_per_second;
+ c->seconds_per_clock = 1.0 / c->clocks_per_second;
/*
- * Restore total cpu time in seconds. Otherwise, if c->clocks_per_second
- * has decreased, time may appear to flow backwards.
+ * Recalculate total_cpu_time based on the kernel timebase, and
+ * the calculated clock rate
*/
- c->total_cpu_time = save_total_cpu_time_in_seconds * c->clocks_per_second;
+ c->total_cpu_time =
+ (now_reference - c->init_reference_time) * c->clocks_per_second;
+}
+
+
+u8 *
+format_clib_time (u8 * s, va_list * args)
+{
+ clib_time_t *c = va_arg (*args, clib_time_t *);
+ int verbose = va_arg (*args, int);
+ f64 now, reftime, delta_reftime_in_seconds, error;
+
+ /* Compute vpp elapsed time from the CPU clock */
+ reftime = unix_time_now ();
+ now = clib_time_now (c);
+
+ s = format (s, "Time now %.6f", now);
+ if (verbose == 0)
+ return s;
+
+ /* And also from the kernel */
+ delta_reftime_in_seconds = reftime - c->init_reference_time;
+
+ error = now - delta_reftime_in_seconds;
- /* Double time between verifies; max at 64 secs ~ 1 minute. */
- if (c->log2_clocks_per_frequency_verify < c->log2_clocks_per_second + 6)
- c->log2_clocks_per_frequency_verify += 1;
+ s = format (s, ", reftime %.6f, error %.6f, clocks/sec %.6f",
+ delta_reftime_in_seconds, error, c->clocks_per_second);
+ return (s);
}
/*
diff --git a/src/vppinfra/time.h b/src/vppinfra/time.h
index b2d749256d3..4d8997f0a9e 100644
--- a/src/vppinfra/time.h
+++ b/src/vppinfra/time.h
@@ -39,6 +39,7 @@
#define included_time_h
#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
typedef struct
{
@@ -56,10 +57,9 @@ typedef struct
from clock cycles into seconds. */
f64 seconds_per_clock;
- f64 round_to_units;
-
/* Time stamp of call to clib_time_init call. */
u64 init_cpu_time;
+ f64 init_reference_time;
u64 last_verify_cpu_time;
@@ -67,8 +67,14 @@ typedef struct
f64 last_verify_reference_time;
u32 log2_clocks_per_second, log2_clocks_per_frequency_verify;
+
+ /* Damping constant */
+ f64 damping_constant;
+
} clib_time_t;
+format_function_t format_clib_time;
+
/* Return CPU time stamp as 64bit number. */
#if defined(__x86_64__) || defined(i386)
always_inline u64
diff --git a/src/vppinfra/tw_timer_template.c b/src/vppinfra/tw_timer_template.c
index 07e300f5eac..174ffaceb47 100644
--- a/src/vppinfra/tw_timer_template.c
+++ b/src/vppinfra/tw_timer_template.c
@@ -421,7 +421,6 @@ TW (tw_timer_wheel_init) (TWT (tw_timer_wheel) * tw,
}
tw->timer_interval = timer_interval_in_seconds;
tw->ticks_per_second = 1.0 / timer_interval_in_seconds;
- tw->first_expires_tick = ~0ULL;
vec_validate (tw->expired_timer_handles, 0);
_vec_len (tw->expired_timer_handles) = 0;
@@ -514,7 +513,7 @@ static inline
u32 slow_wheel_index __attribute__ ((unused));
u32 glacier_wheel_index __attribute__ ((unused));
- /* Shouldn't happen */
+ /* Called too soon to process new timer expirations? */
if (PREDICT_FALSE (now < tw->next_run_time))
return callback_vector_arg;
@@ -526,6 +525,14 @@ static inline
/* Remember when we ran, compute next runtime */
tw->next_run_time = (now + tw->timer_interval);
+ /* First call, or time jumped backwards? */
+ if (PREDICT_FALSE
+ ((tw->last_run_time == 0.0) || (now <= tw->last_run_time)))
+ {
+ tw->last_run_time = now;
+ return callback_vector_arg;
+ }
+
if (callback_vector_arg == 0)
{
_vec_len (tw->expired_timer_handles) = 0;