/* Copyright (c) 2010 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include #include #include #include typedef enum { RUNNING = 0, WAKEUP, } sched_event_type_t; typedef struct { u32 cpu; u8 *task; u32 pid; f64 timestamp; sched_event_type_t type; } sched_event_t; void kelog_init (elog_main_t * em, char *kernel_tracer, u32 n_events) { int enable_fd, current_tracer_fd, data_fd; int len; struct timespec ts, ts2; char *trace_enable = "/debug/tracing/tracing_enabled"; char *current_tracer = "/debug/tracing/current_tracer"; char *trace_data = "/debug/tracing/trace"; f64 realtime, monotonic; f64 freq, secs_per_clock; ASSERT (kernel_tracer); /*$$$$ fixme */ n_events = 1 << 18; /* init first so we won't hurt ourselves if we bail */ elog_init (em, n_events); enable_fd = open (trace_enable, O_RDWR); if (enable_fd < 0) { clib_warning ("Couldn't open %s", trace_enable); return; } /* disable kernel tracing */ if (write (enable_fd, "0\n", 2) != 2) { clib_unix_warning ("disable tracing"); close (enable_fd); return; } /* * open + clear the data buffer. * see .../linux/kernel/trace/trace.c:tracing_open() */ data_fd = open (trace_data, O_RDWR | O_TRUNC); if (data_fd < 0) { clib_warning ("Couldn't open+clear %s", trace_data); return; } close (data_fd); /* configure tracing */ current_tracer_fd = open (current_tracer, O_RDWR); if (current_tracer_fd < 0) { clib_warning ("Couldn't open %s", current_tracer); close (enable_fd); return; } len = strlen (kernel_tracer); if (write (current_tracer_fd, kernel_tracer, len) != len) { clib_unix_warning ("configure trace"); close (current_tracer_fd); close (enable_fd); return; } close (current_tracer_fd); /* * The kernel event log uses CLOCK_MONOTONIC timestamps, * not CLOCK_REALTIME timestamps. These differ by a constant * but the constant is not available in user mode. * This estimate will be off by one syscall round-trip. */ clib_time_init (&em->cpu_timer); em->init_time.cpu = em->cpu_timer.init_cpu_time; syscall (SYS_clock_gettime, CLOCK_MONOTONIC, &ts); /* enable kernel tracing */ if (write (enable_fd, "1\n", 2) != 2) { clib_unix_warning ("enable tracing"); close (enable_fd); return; } close (enable_fd); } u8 * format_sched_event (u8 * s, va_list * va) { sched_event_t *e = va_arg (*va, sched_event_t *); s = format (s, "cpu %d task %10s type %s timestamp %12.6f\n", e->cpu, e->task, e->type ? "WAKEUP " : "RUNNING", e->timestamp); return s; } sched_event_t * parse_sched_switch_trace (u8 * tdata, u32 * index) { u8 *cp = tdata + *index; u8 *limit = tdata + vec_len (tdata); int colons; static sched_event_t event; sched_event_t *e = &event; static u8 *task_name; u32 secs, usecs; int i; again: /* eat leading w/s */ while (cp < limit && (*cp == ' ' && *cp == '\t')) cp++; if (cp == limit) return 0; /* header line */ if (*cp == '#') { while (cp < limit && (*cp != '\n')) cp++; if (*cp == '\n') { cp++; goto again; } clib_warning ("bugger 0"); return 0; } while (cp < limit && *cp != ']') cp++; if (*cp == 0) return 0; if (*cp != ']') { clib_warning ("bugger 0.1"); return 0; } cp++; while (cp < limit && (*cp == ' ' && *cp == '\t')) cp++; if (cp == limit) { clib_warning ("bugger 0.2"); return 0; } secs = atoi (cp); while (cp < limit && (*cp != '.')) cp++; if (cp == limit) { clib_warning ("bugger 0.3"); return 0; } cp++; usecs = atoi (cp); e->timestamp = ((f64) secs) + ((f64) usecs) * 1e-6; /* eat up to third colon */ for (i = 0; i < 3; i++) { while (cp < limit && *cp != ':') cp++; cp++; } --cp; if (*cp != ':') { clib_warning ("bugger 1"); return 0; } /* aim at '>' (switch-to) / '+' (wakeup) */ cp += 5; if (cp >= limit) { clib_warning ("bugger 2"); return 0; } if (*cp == '>') e->type = RUNNING; else if (*cp == '+') e->type = WAKEUP; else { clib_warning ("bugger 3"); return 0; } cp += 3; if (cp >= limit) { clib_warning ("bugger 4"); return 0; } e->cpu = atoi (cp); cp += 4; if (cp >= limit) { clib_warning ("bugger 4"); return 0; } while (cp < limit && (*cp == ' ' || *cp == '\t')) cp++; e->pid = atoi (cp); for (i = 0; i < 2; i++) { while (cp < limit && *cp != ':') cp++; cp++; } --cp; if (*cp != ':') { clib_warning ("bugger 5"); return 0; } cp += 3; if (cp >= limit) { clib_warning ("bugger 6"); return 0; } while (cp < limit && (*cp != ' ' && *cp != '\n')) { vec_add1 (task_name, *cp); cp++; } vec_add1 (task_name, 0); /* _vec_len() = 0 in caller */ e->task = task_name; if (cp < limit) cp++; *index = cp - tdata; return e; } static u32 elog_id_for_pid (elog_main_t * em, u8 * name, u32 pid) { uword *p, r; mhash_t *h = &em->string_table_hash; if (!em->string_table_hash.hash) mhash_init (h, sizeof (uword), sizeof (pid)); p = mhash_get (h, &pid); if (p) return p[0]; r = elog_string (em, "%s(%d)", name, pid); mhash_set (h, &pid, r, /* old_value */ 0); return r; } void kelog_collect_sched_switch_trace (elog_main_t * em) { int enable_fd, data_fd; char *trace_enable = "/debug/tracing/tracing_enabled"; char *trace_data = "/debug/tracing/trace"; u8 *data = 0; u8 *dp; int bytes, total_bytes; u32 pos; sched_event_t *evt; u64 nsec_to_add; u32 index; f64 clocks_per_sec; enable_fd = open (trace_enable, O_RDWR); if (enable_fd < 0) { clib_warning ("Couldn't open %s", trace_enable); return; } /* disable kernel tracing */ if (write (enable_fd, "0\n", 2) != 2) { clib_unix_warning ("disable tracing"); close (enable_fd); return; } close (enable_fd); /* Read the trace data */ data_fd = open (trace_data, O_RDWR); if (data_fd < 0) { clib_warning ("Couldn't open %s", trace_data); return; } /* * Extract trace into a vector. Note that seq_printf() [kernel] * is not guaranteed to produce 4096 bytes at a time. */ vec_validate (data, 4095); total_bytes = 0; pos = 0; while (1) { bytes = read (data_fd, data + pos, 4096); if (bytes <= 0) break; total_bytes += bytes; _vec_len (data) = total_bytes; pos = vec_len (data); vec_validate (data, vec_len (data) + 4095); } vec_add1 (data, 0); /* Synthesize events */ em->is_enabled = 1; index = 0; while ((evt = parse_sched_switch_trace (data, &index))) { u64 fake_cpu_clock; fake_cpu_clock = evt->timestamp * em->cpu_timer.clocks_per_second; { ELOG_TYPE_DECLARE (e) = { .format = "%d: %s %s",.format_args = "i4T4t4",.n_enum_strings = 2,.enum_strings = { "running", "wakeup",} ,}; struct { u32 cpu, string_table_offset, which; } *ed; ed = elog_event_data_not_inline (em, &__ELOG_TYPE_VAR (e), &em->default_track, fake_cpu_clock); ed->cpu = evt->cpu; ed->string_table_offset = elog_id_for_pid (em, evt->task, evt->pid); ed->which = evt->type; } _vec_len (evt->task) = 0; } em->is_enabled = 0; } /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */