diff options
author | Dave Barach <dave@barachs.net> | 2017-04-01 11:07:40 -0400 |
---|---|---|
committer | Damjan Marion <dmarion.lists@gmail.com> | 2017-04-01 17:22:31 +0000 |
commit | 903fd513e32a37e55aec0cfb4cf30e000680e0c3 (patch) | |
tree | c8e85da35fe9d9107cb3737537ccb4957fddc9e0 /src/vppinfra | |
parent | a3af337e06a79f7d1dacf42a319f241c907122fc (diff) |
Clean up event log merge code
Fix a decade-old ridiculous qsort function bug. Managed to subtract
floating-point numbers as if they were integers, leading to
manufactured time-paradoxes. That completely confuses g2, leading to
the summary disappearance of entire tracks' worth of data at high zoom
levels.
Add a manual alignment tweak parameter to elog_merge, users can
dial-out time paradoxes caused by NTP-grade clock synchronization.
The event-logger has a precision of O(100ns), whereas NTP
synchronization is O(1ms).
Change-Id: I69dedabaa314f69f9df74ec9ee66e21e6c87f703
Signed-off-by: Dave Barach <dave@barachs.net>
Diffstat (limited to 'src/vppinfra')
-rw-r--r-- | src/vppinfra/elog.c | 81 | ||||
-rw-r--r-- | src/vppinfra/elog.h | 206 | ||||
-rw-r--r-- | src/vppinfra/test_elog.c | 59 |
3 files changed, 276 insertions, 70 deletions
diff --git a/src/vppinfra/elog.c b/src/vppinfra/elog.c index e9f06d0948c..12e3f5d4bd8 100644 --- a/src/vppinfra/elog.c +++ b/src/vppinfra/elog.c @@ -77,6 +77,7 @@ new_event_type (elog_main_t * em, uword i) em->event_type_by_format = hash_create_vec ( /* size */ 0, sizeof (u8), sizeof (uword)); + t->type_index_plus_one = i + 1; hash_set_mem (em->event_type_by_format, t->format, i); } @@ -400,14 +401,15 @@ void elog_time_now (elog_time_stamp_t * et) { u64 cpu_time_now, os_time_now_nsec; + struct timespec ts; #ifdef CLIB_UNIX { #include <sys/syscall.h> - struct timespec ts; syscall (SYS_clock_gettime, CLOCK_REALTIME, &ts); cpu_time_now = clib_cpu_time_now (); - os_time_now_nsec = 1e9 * ts.tv_sec + ts.tv_nsec; + /* Subtract 3/30/2017's worth of seconds to retain precision */ + os_time_now_nsec = 1e9 * (ts.tv_sec - 1490885108) + ts.tv_nsec; } #else cpu_time_now = clib_cpu_time_now (); @@ -600,11 +602,21 @@ elog_cmp (void *a1, void *a2) elog_event_t *e1 = a1; elog_event_t *e2 = a2; - return e1->time - e2->time; + if (e1->time < e2->time) + return -1; + + if (e1->time > e2->time) + return 1; + + return 0; } +/* + * merge two event logs. Complicated and cranky. + */ void -elog_merge (elog_main_t * dst, u8 * dst_tag, elog_main_t * src, u8 * src_tag) +elog_merge (elog_main_t * dst, u8 * dst_tag, elog_main_t * src, u8 * src_tag, + f64 align_tweak) { elog_event_t *e; uword l; @@ -615,6 +627,7 @@ elog_merge (elog_main_t * dst, u8 * dst_tag, elog_main_t * src, u8 * src_tag) memset (&newt, 0, sizeof (newt)); + /* Acquire src and dst events */ elog_get_events (src); elog_get_events (dst); @@ -622,7 +635,7 @@ elog_merge (elog_main_t * dst, u8 * dst_tag, elog_main_t * src, u8 * src_tag) vec_append (dst->string_table, src->string_table); l = vec_len (dst->events); - vec_add (dst->events, src->events, vec_len (src->events)); + vec_append (dst->events, src->events); /* Prepend the supplied tag (if any) to all dst track names */ if (dst_tag) @@ -638,6 +651,9 @@ elog_merge (elog_main_t * dst, u8 * dst_tag, elog_main_t * src, u8 * src_tag) } } + /* + * Remember where we started allocating new tracks while merging + */ track_offset_for_src_tracks = vec_len (dst->tracks); /* Copy / tag source tracks */ @@ -688,10 +704,18 @@ elog_merge (elog_main_t * dst, u8 * dst_tag, elog_main_t * src, u8 * src_tag) (elog_time_stamp_diff_cpu (&src->init_time, &dst->init_time) * .5 * (dst->nsec_per_cpu_clock + src->nsec_per_cpu_clock)); - /* Heuristic to see if src/dst came from same time source. - If frequencies are "the same" and os clock and cpu clock agree - to within 100e-9 secs about time difference between src/dst - init_time, then we use cpu clock. Otherwise we use OS clock. */ + /* + * Heuristic to see if src/dst came from same time source. + * If frequencies are "the same" and os clock and cpu clock agree + * to within 100e-9 secs about time difference between src/dst + * init_time, then we use cpu clock. Otherwise we use OS clock. + * + * When merging event logs from different systems, time paradoxes + * at the O(1ms) level are to be expected. Hence, the "align_tweak" + * parameter. If two events logged on different processors are known + * to occur in a specific order - and with a reasonably-estimated + * interval - supply a non-zero "align_tweak" parameter + */ if (fabs (src->nsec_per_cpu_clock - dst->nsec_per_cpu_clock) < 1e-2 && fabs (dt_os_nsec - dt_clock_nsec) < 100) dt_event = dt_clock_nsec; @@ -699,23 +723,45 @@ elog_merge (elog_main_t * dst, u8 * dst_tag, elog_main_t * src, u8 * src_tag) /* Convert to seconds. */ dt_event *= 1e-9; + /* + * Move the earlier set of events later, to avoid creating + * events which preceed the Big Bang (aka have negative timestamps). + * + * Not to any scale, we have something like the following picture: + * + * DST capture start point + * ^ + * +--- dt_event --+ + * v + * SRC capture start point + * + * In this case dt_event is positive, src started after dst, + * to put src events onto a common timebase we have to move them + * forward in time. Naturally, the opposite case is + * possible, too: dt_event will be negative, and so we have to + * move dst events forward in time by the |dt_event|. + * In both cases, we add align_tweak. + */ if (dt_event > 0) { /* Src started after dst. */ for (e = dst->events + l; e < vec_end (dst->events); e++) - e->time += dt_event; + e->time += dt_event + align_tweak; } else { /* Dst started after src. */ + dt_event = -dt_event; for (e = dst->events + 0; e < dst->events + l; e++) - e->time += dt_event; + e->time += dt_event + align_tweak; } } /* Sort events by increasing time. */ vec_sort_with_function (dst->events, elog_cmp); + dst->n_total_events = vec_len (dst->events); + /* Recreate the event ring or the results won't serialize */ { int i; @@ -731,12 +777,7 @@ elog_merge (elog_main_t * dst, u8 * dst_tag, elog_main_t * src, u8 * src_tag) ed = dst->event_ring + i; ed[0] = es[0]; - - /* Invert elog_peek_events calculation */ - ed->time_cycles = - (es->time / dst->cpu_timer.seconds_per_clock) + dst->init_time.cpu; } - dst->n_total_events = vec_len (dst->events); } } @@ -990,6 +1031,7 @@ void serialize_elog_main (serialize_main_t * m, va_list * va) { elog_main_t *em = va_arg (*va, elog_main_t *); + int flush_ring = va_arg (*va, int); elog_event_t *e; serialize_magic (m, elog_serialize_magic, strlen (elog_serialize_magic)); @@ -1005,8 +1047,11 @@ serialize_elog_main (serialize_main_t * m, va_list * va) vec_serialize (m, em->string_table, serialize_vec_8); /* Free old events (cached) in case they have changed. */ - vec_free (em->events); - elog_get_events (em); + if (flush_ring) + { + vec_free (em->events); + elog_get_events (em); + } serialize_integer (m, vec_len (em->events), sizeof (u32)); diff --git a/src/vppinfra/elog.h b/src/vppinfra/elog.h index 9756fb83a8d..359868dd0f0 100644 --- a/src/vppinfra/elog.h +++ b/src/vppinfra/elog.h @@ -35,7 +35,16 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -/* High speed event logging with much thanks to Dave Barach. */ +/* High speed event logger */ + +/** \file + The fine-grained event logger allows lightweight, thread-safe + event logging at minimum cost. In typical operation, logging + a single event costs around 80ns on x86_64. It's appropriate + for at-least per-frame event-logging in vector packet processing. + + See https://wiki.fd.io/view/VPP/elog for more information. +*/ #ifndef included_clib_elog_h #define included_clib_elog_h @@ -50,38 +59,38 @@ typedef struct { union { - /* Absolute time stamp in CPU clock cycles. */ + /** Absolute time stamp in CPU clock cycles. */ u64 time_cycles; - /* Absolute time as floating point number in seconds. */ + /** Absolute time as floating point number in seconds. */ f64 time; }; - /* Event type index. */ + /** Event type index. */ u16 type; - /* Track for this event. Tracks allow events to be sorted and + /** Track for this event. Tracks allow events to be sorted and displayed by track. Think of 2 dimensional display with time and track being the x and y axes. */ u16 track; - /* 20-bytes of data follows and pads to 32 bytes. */ + /** 20-bytes of data follows, pads to 32 bytes. */ u8 data[20]; } elog_event_t; typedef struct { - /* Type index plus one assigned to this type. + /** Type index plus one assigned to this type. This is used to mark type as seen. */ u32 type_index_plus_one; - /* String table as a vector constructed when type is registered. */ + /** String table as a vector constructed when type is registered. */ char **enum_strings_vector; - /* Format string. (example: "my-event (%d,%d)"). */ + /** Format string. (example: "my-event (%d,%d)"). */ char *format; - /* Specifies how arguments to format are parsed from event data. + /** Specifies how arguments to format are parsed from event data. String of characters '0' '1' or '2' '3' to specify log2 size of data (e.g. for u8, u16, u32 or u64), 's' means a null-terminated C string @@ -90,97 +99,110 @@ typedef struct 'f' is a double. */ char *format_args; - /* Function name generating event. */ + /** Function name generating event. */ char *function; - /* Number of elements in string enum table. */ + /** Number of elements in string enum table. */ u32 n_enum_strings; - /* String table for enum/number to string formatting. */ + /** String table for enum/number to string formatting. */ char *enum_strings[]; } elog_event_type_t; typedef struct { - /* Track name vector. */ + /** Track name vector. */ char *name; - /* Set to one when track has been added to + /** Set to one when track has been added to main structure. */ u32 track_index_plus_one; } elog_track_t; typedef struct { - /* CPU cycle counter. */ + /** CPU cycle counter. */ u64 cpu; - /* OS timer in nano secs since epoch Jan 1 1970. */ + /** OS timer in nano secs since epoch 3/30/2017, see elog_time_now() */ u64 os_nsec; } elog_time_stamp_t; typedef struct { - /* Total number of events in buffer. */ + /** Total number of events in buffer. */ u32 n_total_events; - /* When count reaches limit logging is disabled. This is + /** When count reaches limit logging is disabled. This is used for event triggers. */ u32 n_total_events_disable_limit; - /* Dummy event to use when logger is disabled. */ + /** Dummy event to use when logger is disabled. */ elog_event_t dummy_event; - /* Power of 2 number of elements in ring. */ + /** Power of 2 number of elements in ring. */ uword event_ring_size; - /* Vector of events (circular buffer). Power of 2 size. - Used when events are being collected. */ + /** Vector of events (circular buffer). Power of 2 size. + Used when events are being collected. */ elog_event_t *event_ring; - /* Vector of event types. */ + /** Vector of event types. */ elog_event_type_t *event_types; - /* Hash table mapping type format to type index. */ + /** Hash table mapping type format to type index. */ uword *event_type_by_format; - /* Events may refer to strings in string table. */ + /** Events may refer to strings in string table. */ char *string_table; - /* Vector of tracks. */ + /** Vector of tracks. */ elog_track_t *tracks; - /* Default track. */ + /** Default track. */ elog_track_t default_track; - /* Place holder for CPU clock frequency. */ + /** Place holder for CPU clock frequency. */ clib_time_t cpu_timer; + /** Timestamps */ elog_time_stamp_t init_time, serialize_time; - /* SMP lock, non-zero means locking required */ + /** SMP lock, non-zero means locking required */ uword *lock; - /* Use serialize_time and init_time to give estimate for - cpu clock frequency. */ + /** Use serialize_time and init_time to give estimate for + cpu clock frequency. */ f64 nsec_per_cpu_clock; - /* Vector of events converted to generic form after collection. */ + /** Vector of events converted to generic form after collection. */ elog_event_t *events; } elog_main_t; +/** @brief Return number of events in the event-log buffer + @param em elog_main_t * + @return number of events in the buffer +*/ + always_inline uword elog_n_events_in_buffer (elog_main_t * em) { return clib_min (em->n_total_events, em->event_ring_size); } +/** @brief Return number of events which can fit in the event buffer + @param em elog_main_t * + @return number of events which can fit in the buffer +*/ always_inline uword elog_buffer_capacity (elog_main_t * em) { return em->event_ring_size; } +/** @brief Reset the event buffer + @param em elog_main_t * +*/ always_inline void elog_reset_buffer (elog_main_t * em) { @@ -188,6 +210,9 @@ elog_reset_buffer (elog_main_t * em) em->n_total_events_disable_limit = ~0; } +/** @brief Enable or disable event logging + @param em elog_main_t * +*/ always_inline void elog_enable_disable (elog_main_t * em, int is_enabled) { @@ -195,18 +220,27 @@ elog_enable_disable (elog_main_t * em, int is_enabled) em->n_total_events_disable_limit = is_enabled ? ~0 : 0; } -/* Disable logging after specified number of ievents have been logged. +/** @brief disable logging after specified number of ievents have been logged. + This is used as a "debug trigger" when a certain event has occurred. Events will be logged both before and after the "event" but the - event will not be lost as long as N < RING_SIZE. */ + event will not be lost as long as N < RING_SIZE. + + @param em elog_main_t * + @param n uword number of events before disabling event logging +*/ always_inline void elog_disable_after_events (elog_main_t * em, uword n) { em->n_total_events_disable_limit = em->n_total_events + n; } -/* Signal a trigger. We do this when we encounter an event that we want to save - context around (before and after). */ +/* @brief mid-buffer logic-analyzer trigger + + Currently, only midpoint triggering is supported, but it's pretty obvious + how to generalize the scheme. + @param em elog_main_t * +*/ always_inline void elog_disable_trigger (elog_main_t * em) { @@ -214,18 +248,44 @@ elog_disable_trigger (elog_main_t * em) em->n_total_events + vec_len (em->event_ring) / 2; } -/* External function to register types/tracks. */ +/** @brief register an event type + @param em elog_main_t * + @param t elog_event_type_t * event to register + @return type index + @warning Typically not called directly +*/ + word elog_event_type_register (elog_main_t * em, elog_event_type_t * t); + +/** @brief register an event track + @param em elog_main_t * + @param t elog_track_t * track to register + @return track index + @note this function is often called directly +*/ word elog_track_register (elog_main_t * em, elog_track_t * t); +/** @brief event logging enabled predicate + @param em elog_main_t * + @return 1 if enabled, 0 if not enabled +*/ always_inline uword elog_is_enabled (elog_main_t * em) { return em->n_total_events < em->n_total_events_disable_limit; } -/* Add an event to the log. Returns a pointer to the - data for caller to write into. */ +/** @brief Allocate an event to be filled in by the caller + + Not normally called directly; this function underlies the + ELOG_DATA and ELOG_TRACK_DATA macros + + @param em elog_main_t * + @param type elog_event_type_t * type + @param track elog_track_t * track + @param cpu_time u64 current cpu tick value + @returns event to be filled in +*/ always_inline void * elog_event_data_inline (elog_main_t * em, elog_event_type_t * type, @@ -274,7 +334,17 @@ void *elog_event_data (elog_main_t * em, elog_event_type_t * type, elog_track_t * track, u64 cpu_time); -/* Non-inline version. */ +/** @brief Allocate an event to be filled in by the caller, non-inline + + Not normally called directly; this function underlies the + ELOG_DATA and ELOG_TRACK_DATA macros + + @param em elog_main_t * + @param type elog_event_type_t * type + @param track elog_track_t * track + @param cpu_time u64 current cpu tick value + @returns event to be filled in +*/ always_inline void * elog_event_data_not_inline (elog_main_t * em, elog_event_type_t * type, @@ -286,7 +356,11 @@ elog_event_data_not_inline (elog_main_t * em, return elog_event_data (em, type, track, cpu_time); } -/* Most common forms: log a single 32 bit datum, w / w-out track */ +/** @brief Log a single-datum event + @param em elog_main_t * + @param type elog_event_type_t * type + @param data u32 single datum to capture +*/ always_inline void elog (elog_main_t * em, elog_event_type_t * type, u32 data) { @@ -297,7 +371,11 @@ elog (elog_main_t * em, elog_event_type_t * type, u32 data) d[0] = data; } -/* Inline version of above. */ +/** @brief Log a single-datum event, inline version + @param em elog_main_t * + @param type elog_event_type_t * type + @param data u32 single datum to capture +*/ always_inline void elog_inline (elog_main_t * em, elog_event_type_t * type, u32 data) { @@ -308,6 +386,12 @@ elog_inline (elog_main_t * em, elog_event_type_t * type, u32 data) d[0] = data; } +/** @brief Log a single-datum event to a specific track, non-inline version + @param em elog_main_t * + @param type elog_event_type_t * type + @param type elog_event_track_t * track + @param data u32 single datum to capture +*/ always_inline void elog_track (elog_main_t * em, elog_event_type_t * type, elog_track_t * track, u32 data) @@ -319,6 +403,12 @@ elog_track (elog_main_t * em, elog_event_type_t * type, elog_track_t * track, d[0] = data; } +/** @brief Log a single-datum event to a specific track + @param em elog_main_t * + @param type elog_event_type_t * type + @param type elog_event_track_t * track + @param data u32 single datum to capture +*/ always_inline void elog_track_inline (elog_main_t * em, elog_event_type_t * type, elog_track_t * track, u32 data) @@ -392,19 +482,37 @@ elog_data_inline (elog_main_t * em, elog_event_type_t * type, #define ELOG_DATA(em,f) elog_data ((em), &__ELOG_TYPE_VAR (f), &(em)->default_track) #define ELOG_DATA_INLINE(em,f) elog_data_inline ((em), &__ELOG_TYPE_VAR (f), &(em)->default_track) +/** @brief add a string to the event-log string table + + Often combined with hashing and the T4 elog format specifier to + display complex strings in offline tooling + + @param em elog_main_t * + @param format char * + @param VARARGS + @return u32 index to add to event log +*/ u32 elog_string (elog_main_t * em, char *format, ...); + void elog_time_now (elog_time_stamp_t * et); -/* Convert ievents to events and return them as a vector. - Sets em->events to resulting vector. */ +/** @brief convert event ring events to events, and return them as a vector. + @param em elog_main_t * + @return event vector with timestamps in f64 seconds + @note sets em->events to resulting vector. +*/ elog_event_t *elog_get_events (elog_main_t * em); -/* Convert ievents to events and return them as a vector with no side effects. */ +/** @brief convert event ring events to events, and return them as a vector. + @param em elog_main_t * + @return event vector with timestamps in f64 seconds + @note no side effects +*/ elog_event_t *elog_peek_events (elog_main_t * em); /* Merge two logs, add supplied track tags. */ void elog_merge (elog_main_t * dst, u8 * dst_tag, - elog_main_t * src, u8 * src_tag); + elog_main_t * src, u8 * src_tag, f64 align_tweak); /* 2 arguments elog_main_t and elog_event_t to format event or track name. */ u8 *format_elog_event (u8 * s, va_list * va); @@ -418,7 +526,7 @@ void elog_alloc (elog_main_t * em, u32 n_events); #ifdef CLIB_UNIX always_inline clib_error_t * -elog_write_file (elog_main_t * em, char *unix_file) +elog_write_file (elog_main_t * em, char *unix_file, int flush_ring) { serialize_main_t m; clib_error_t *error; @@ -426,7 +534,7 @@ elog_write_file (elog_main_t * em, char *unix_file) error = serialize_open_unix_file (&m, unix_file); if (error) return error; - error = serialize (&m, serialize_elog_main, em); + error = serialize (&m, serialize_elog_main, em, flush_ring); if (!error) serialize_close (&m); return error; diff --git a/src/vppinfra/test_elog.c b/src/vppinfra/test_elog.c index 89905adb4be..1cf5ba1f75c 100644 --- a/src/vppinfra/test_elog.c +++ b/src/vppinfra/test_elog.c @@ -52,6 +52,8 @@ test_elog_main (unformat_input_t * input) f64 min_sample_time; char *dump_file, *load_file, *merge_file, **merge_files; u8 *tag, **tags; + f64 align_tweak; + f64 *align_tweaks; n_iter = 100; max_events = 100000; @@ -61,6 +63,7 @@ test_elog_main (unformat_input_t * input) load_file = 0; merge_files = 0; tags = 0; + align_tweaks = 0; min_sample_time = 2; while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { @@ -83,6 +86,8 @@ test_elog_main (unformat_input_t * input) ; else if (unformat (input, "sample-time %f", &min_sample_time)) ; + else if (unformat (input, "align-tweak %f", &align_tweak)) + vec_add1 (align_tweaks, align_tweak); else { error = clib_error_create ("unknown input `%U'\n", @@ -102,9 +107,15 @@ test_elog_main (unformat_input_t * input) { uword i; elog_main_t *ems; - vec_clone (ems, merge_files); + /* Supply default tags as needed */ + if (vec_len (tags) < vec_len (ems)) + { + for (i = vec_len (tags); i < vec_len (ems); i++) + vec_add1 (tags, format (0, "F%d%c", i, 0)); + } + elog_init (em, max_events); for (i = 0; i < vec_len (ems); i++) { @@ -113,7 +124,10 @@ test_elog_main (unformat_input_t * input) goto done; if (i > 0) { - elog_merge (em, tags[0], &ems[i], tags[i]); + align_tweak = 0.0; + if (i <= vec_len (align_tweaks)) + align_tweak = align_tweaks[i - 1]; + elog_merge (em, tags[0], &ems[i], tags[i], align_tweak); tags[0] = 0; } } @@ -217,7 +231,8 @@ test_elog_main (unformat_input_t * input) #ifdef CLIB_UNIX if (dump_file) { - if ((error = elog_write_file (em, dump_file))) + if ((error = + elog_write_file (em, dump_file, 0 /* do not flush ring */ ))) goto done; } #endif @@ -246,6 +261,8 @@ main (int argc, char *argv[]) unformat_input_t i; int r; + clib_mem_init (0, 3ULL << 30); + unformat_init_command_line (&i, argv); r = test_elog_main (&i); unformat_free (&i); @@ -253,6 +270,42 @@ main (int argc, char *argv[]) } #endif +/** + * @brief GDB callable function: vl - Return vector length of vector + * + * @param *p - void - address of vector + * + * @return length - u32 + * + */ +u32 +vl (void *p) +{ + return vec_len (p); +} + +/** + * @brief GDB callable function: pe - call pool_elts - number of elements in a pool + * + * @param *v - void - address of pool + * + * @return number - uword + * + */ +#include <vppinfra/pool.h> +uword +pe (void *v) +{ + return (pool_elts (v)); +} + +#include <vppinfra/hash.h> +uword +he (void *v) +{ + return (hash_elts (v)); +} + /* * fd.io coding-style-patch-verification: ON * |