diff options
Diffstat (limited to 'src/vppinfra/unix-kelog.c')
-rw-r--r-- | src/vppinfra/unix-kelog.c | 415 |
1 files changed, 415 insertions, 0 deletions
diff --git a/src/vppinfra/unix-kelog.c b/src/vppinfra/unix-kelog.c new file mode 100644 index 00000000000..88428ee8f2e --- /dev/null +++ b/src/vppinfra/unix-kelog.c @@ -0,0 +1,415 @@ +/* + Copyright (c) 2010 Cisco and/or its affiliates. + + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +#include <vppinfra/error.h> +#include <vppinfra/unix.h> +#include <vppinfra/elog.h> +#include <vppinfra/format.h> +#include <vppinfra/os.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> + +typedef enum +{ + RUNNING = 0, + WAKEUP, +} sched_event_type_t; + +typedef struct +{ + u32 cpu; + u8 *task; + u32 pid; + f64 timestamp; + sched_event_type_t type; +} sched_event_t; + +void +kelog_init (elog_main_t * em, char *kernel_tracer, u32 n_events) +{ + int enable_fd, current_tracer_fd, data_fd; + int len; + struct timespec ts, ts2; + char *trace_enable = "/debug/tracing/tracing_enabled"; + char *current_tracer = "/debug/tracing/current_tracer"; + char *trace_data = "/debug/tracing/trace"; + f64 realtime, monotonic; + f64 freq, secs_per_clock; + + ASSERT (kernel_tracer); + + /*$$$$ fixme */ + n_events = 1 << 18; + + /* init first so we won't hurt ourselves if we bail */ + elog_init (em, n_events); + + enable_fd = open (trace_enable, O_RDWR); + if (enable_fd < 0) + { + clib_warning ("Couldn't open %s", trace_enable); + return; + } + /* disable kernel tracing */ + if (write (enable_fd, "0\n", 2) != 2) + { + clib_unix_warning ("disable tracing"); + close (enable_fd); + return; + } + + /* + * open + clear the data buffer. + * see .../linux/kernel/trace/trace.c:tracing_open() + */ + data_fd = open (trace_data, O_RDWR | O_TRUNC); + if (data_fd < 0) + { + clib_warning ("Couldn't open+clear %s", trace_data); + return; + } + close (data_fd); + + /* configure tracing */ + current_tracer_fd = open (current_tracer, O_RDWR); + + if (current_tracer_fd < 0) + { + clib_warning ("Couldn't open %s", current_tracer); + close (enable_fd); + return; + } + + len = strlen (kernel_tracer); + + if (write (current_tracer_fd, kernel_tracer, len) != len) + { + clib_unix_warning ("configure trace"); + close (current_tracer_fd); + close (enable_fd); + return; + } + + close (current_tracer_fd); + + /* + * The kernel event log uses CLOCK_MONOTONIC timestamps, + * not CLOCK_REALTIME timestamps. These differ by a constant + * but the constant is not available in user mode. + * This estimate will be off by one syscall round-trip. + */ + clib_time_init (&em->cpu_timer); + em->init_time.cpu = em->cpu_timer.init_cpu_time; + syscall (SYS_clock_gettime, CLOCK_MONOTONIC, &ts); + + /* enable kernel tracing */ + if (write (enable_fd, "1\n", 2) != 2) + { + clib_unix_warning ("enable tracing"); + close (enable_fd); + return; + } + + close (enable_fd); +} + + +u8 * +format_sched_event (u8 * s, va_list * va) +{ + sched_event_t *e = va_arg (*va, sched_event_t *); + + s = format (s, "cpu %d task %10s type %s timestamp %12.6f\n", + e->cpu, e->task, e->type ? "WAKEUP " : "RUNNING", e->timestamp); + + return s; +} + +sched_event_t * +parse_sched_switch_trace (u8 * tdata, u32 * index) +{ + u8 *cp = tdata + *index; + u8 *limit = tdata + vec_len (tdata); + int colons; + static sched_event_t event; + sched_event_t *e = &event; + static u8 *task_name; + u32 secs, usecs; + int i; + +again: + /* eat leading w/s */ + while (cp < limit && (*cp == ' ' && *cp == '\t')) + cp++; + if (cp == limit) + return 0; + + /* header line */ + if (*cp == '#') + { + while (cp < limit && (*cp != '\n')) + cp++; + if (*cp == '\n') + { + cp++; + goto again; + } + clib_warning ("bugger 0"); + return 0; + } + + while (cp < limit && *cp != ']') + cp++; + + if (*cp == 0) + return 0; + + if (*cp != ']') + { + clib_warning ("bugger 0.1"); + return 0; + } + + cp++; + while (cp < limit && (*cp == ' ' && *cp == '\t')) + cp++; + if (cp == limit) + { + clib_warning ("bugger 0.2"); + return 0; + } + + secs = atoi (cp); + + while (cp < limit && (*cp != '.')) + cp++; + + if (cp == limit) + { + clib_warning ("bugger 0.3"); + return 0; + } + + cp++; + + usecs = atoi (cp); + + e->timestamp = ((f64) secs) + ((f64) usecs) * 1e-6; + + /* eat up to third colon */ + for (i = 0; i < 3; i++) + { + while (cp < limit && *cp != ':') + cp++; + cp++; + } + --cp; + if (*cp != ':') + { + clib_warning ("bugger 1"); + return 0; + } + /* aim at '>' (switch-to) / '+' (wakeup) */ + cp += 5; + if (cp >= limit) + { + clib_warning ("bugger 2"); + return 0; + } + if (*cp == '>') + e->type = RUNNING; + else if (*cp == '+') + e->type = WAKEUP; + else + { + clib_warning ("bugger 3"); + return 0; + } + + cp += 3; + if (cp >= limit) + { + clib_warning ("bugger 4"); + return 0; + } + + e->cpu = atoi (cp); + cp += 4; + + if (cp >= limit) + { + clib_warning ("bugger 4"); + return 0; + } + while (cp < limit && (*cp == ' ' || *cp == '\t')) + cp++; + + e->pid = atoi (cp); + + for (i = 0; i < 2; i++) + { + while (cp < limit && *cp != ':') + cp++; + cp++; + } + --cp; + if (*cp != ':') + { + clib_warning ("bugger 5"); + return 0; + } + + cp += 3; + if (cp >= limit) + { + clib_warning ("bugger 6"); + return 0; + } + while (cp < limit && (*cp != ' ' && *cp != '\n')) + { + vec_add1 (task_name, *cp); + cp++; + } + vec_add1 (task_name, 0); + /* _vec_len() = 0 in caller */ + e->task = task_name; + + if (cp < limit) + cp++; + + *index = cp - tdata; + return e; +} + +static u32 +elog_id_for_pid (elog_main_t * em, u8 * name, u32 pid) +{ + uword *p, r; + mhash_t *h = &em->string_table_hash; + + if (!em->string_table_hash.hash) + mhash_init (h, sizeof (uword), sizeof (pid)); + + p = mhash_get (h, &pid); + if (p) + return p[0]; + r = elog_string (em, "%s(%d)", name, pid); + mhash_set (h, &pid, r, /* old_value */ 0); + return r; +} + +void +kelog_collect_sched_switch_trace (elog_main_t * em) +{ + int enable_fd, data_fd; + char *trace_enable = "/debug/tracing/tracing_enabled"; + char *trace_data = "/debug/tracing/trace"; + u8 *data = 0; + u8 *dp; + int bytes, total_bytes; + u32 pos; + sched_event_t *evt; + u64 nsec_to_add; + u32 index; + f64 clocks_per_sec; + + enable_fd = open (trace_enable, O_RDWR); + if (enable_fd < 0) + { + clib_warning ("Couldn't open %s", trace_enable); + return; + } + /* disable kernel tracing */ + if (write (enable_fd, "0\n", 2) != 2) + { + clib_unix_warning ("disable tracing"); + close (enable_fd); + return; + } + close (enable_fd); + + /* Read the trace data */ + data_fd = open (trace_data, O_RDWR); + if (data_fd < 0) + { + clib_warning ("Couldn't open %s", trace_data); + return; + } + + /* + * Extract trace into a vector. Note that seq_printf() [kernel] + * is not guaranteed to produce 4096 bytes at a time. + */ + vec_validate (data, 4095); + total_bytes = 0; + pos = 0; + while (1) + { + bytes = read (data_fd, data + pos, 4096); + if (bytes <= 0) + break; + + total_bytes += bytes; + _vec_len (data) = total_bytes; + + pos = vec_len (data); + vec_validate (data, vec_len (data) + 4095); + } + vec_add1 (data, 0); + + /* Synthesize events */ + em->is_enabled = 1; + + index = 0; + while ((evt = parse_sched_switch_trace (data, &index))) + { + u64 fake_cpu_clock; + + fake_cpu_clock = evt->timestamp * em->cpu_timer.clocks_per_second; + { + ELOG_TYPE_DECLARE (e) = + { + .format = "%d: %s %s",.format_args = "i4T4t4",.n_enum_strings = + 2,.enum_strings = + { + "running", "wakeup",} + ,}; + struct + { + u32 cpu, string_table_offset, which; + } *ed; + + ed = elog_event_data_not_inline (em, &__ELOG_TYPE_VAR (e), + &em->default_track, fake_cpu_clock); + ed->cpu = evt->cpu; + ed->string_table_offset = elog_id_for_pid (em, evt->task, evt->pid); + ed->which = evt->type; + } + _vec_len (evt->task) = 0; + } + em->is_enabled = 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |