aboutsummaryrefslogtreecommitdiffstats
path: root/vppinfra/vppinfra/unix-kelog.c
diff options
context:
space:
mode:
Diffstat (limited to 'vppinfra/vppinfra/unix-kelog.c')
-rw-r--r--vppinfra/vppinfra/unix-kelog.c400
1 files changed, 400 insertions, 0 deletions
diff --git a/vppinfra/vppinfra/unix-kelog.c b/vppinfra/vppinfra/unix-kelog.c
new file mode 100644
index 00000000000..21ac9921083
--- /dev/null
+++ b/vppinfra/vppinfra/unix-kelog.c
@@ -0,0 +1,400 @@
+/*
+ Copyright (c) 2010 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <vppinfra/error.h>
+#include <vppinfra/unix.h>
+#include <vppinfra/elog.h>
+#include <vppinfra/format.h>
+#include <vppinfra/os.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+
+typedef enum
+ {
+ RUNNING = 0,
+ WAKEUP,
+ } sched_event_type_t;
+
+typedef struct
+{
+ u32 cpu;
+ u8 *task;
+ u32 pid;
+ f64 timestamp;
+ sched_event_type_t type;
+} sched_event_t;
+
+void kelog_init (elog_main_t * em, char * kernel_tracer, u32 n_events)
+{
+ int enable_fd, current_tracer_fd, data_fd;
+ int len;
+ struct timespec ts, ts2;
+ char *trace_enable = "/debug/tracing/tracing_enabled";
+ char *current_tracer = "/debug/tracing/current_tracer";
+ char *trace_data = "/debug/tracing/trace";
+ f64 realtime, monotonic;
+ f64 freq, secs_per_clock;
+
+ ASSERT (kernel_tracer);
+
+ /*$$$$ fixme */
+ n_events = 1<<18;
+
+ /* init first so we won't hurt ourselves if we bail */
+ elog_init (em, n_events);
+
+ enable_fd = open (trace_enable, O_RDWR);
+ if (enable_fd < 0)
+ {
+ clib_warning ("Couldn't open %s", trace_enable);
+ return;
+ }
+ /* disable kernel tracing */
+ if (write (enable_fd, "0\n", 2) != 2)
+ {
+ clib_unix_warning ("disable tracing");
+ close(enable_fd);
+ return;
+ }
+
+ /*
+ * open + clear the data buffer.
+ * see .../linux/kernel/trace/trace.c:tracing_open()
+ */
+ data_fd = open (trace_data, O_RDWR | O_TRUNC);
+ if (data_fd < 0)
+ {
+ clib_warning ("Couldn't open+clear %s", trace_data);
+ return;
+ }
+ close(data_fd);
+
+ /* configure tracing */
+ current_tracer_fd = open (current_tracer, O_RDWR);
+
+ if (current_tracer_fd < 0)
+ {
+ clib_warning ("Couldn't open %s", current_tracer);
+ close(enable_fd);
+ return;
+ }
+
+ len = strlen(kernel_tracer);
+
+ if (write (current_tracer_fd, kernel_tracer, len) != len)
+ {
+ clib_unix_warning ("configure trace");
+ close(current_tracer_fd);
+ close(enable_fd);
+ return;
+ }
+
+ close(current_tracer_fd);
+
+ /*
+ * The kernel event log uses CLOCK_MONOTONIC timestamps,
+ * not CLOCK_REALTIME timestamps. These differ by a constant
+ * but the constant is not available in user mode.
+ * This estimate will be off by one syscall round-trip.
+ */
+ clib_time_init (&em->cpu_timer);
+ em->init_time.cpu = em->cpu_timer.init_cpu_time;
+ syscall (SYS_clock_gettime, CLOCK_MONOTONIC, &ts);
+
+ /* enable kernel tracing */
+ if (write (enable_fd, "1\n", 2) != 2)
+ {
+ clib_unix_warning ("enable tracing");
+ close(enable_fd);
+ return;
+ }
+
+ close(enable_fd);
+}
+
+
+u8 *format_sched_event (u8 * s, va_list * va)
+{
+ sched_event_t *e = va_arg (*va, sched_event_t *);
+
+ s = format (s, "cpu %d task %10s type %s timestamp %12.6f\n",
+ e->cpu, e->task, e->type ? "WAKEUP " : "RUNNING", e->timestamp);
+
+ return s;
+}
+
+sched_event_t *parse_sched_switch_trace (u8 *tdata, u32 *index)
+{
+ u8 *cp = tdata + *index;
+ u8 *limit = tdata + vec_len(tdata);
+ int colons;
+ static sched_event_t event;
+ sched_event_t *e = &event;
+ static u8 *task_name;
+ u32 secs, usecs;
+ int i;
+
+ again:
+ /* eat leading w/s */
+ while (cp < limit && (*cp == ' ' && *cp == '\t'))
+ cp++;
+ if (cp == limit)
+ return 0;
+
+ /* header line */
+ if (*cp == '#')
+ {
+ while (cp < limit && (*cp != '\n'))
+ cp++;
+ if (*cp == '\n')
+ {
+ cp++;
+ goto again;
+ }
+ clib_warning ("bugger 0");
+ return 0;
+ }
+
+ while (cp < limit && *cp != ']')
+ cp++;
+
+ if (*cp == 0)
+ return 0;
+
+ if (*cp != ']')
+ {
+ clib_warning ("bugger 0.1");
+ return 0;
+ }
+
+ cp++;
+ while (cp < limit && (*cp == ' ' && *cp == '\t'))
+ cp++;
+ if (cp == limit)
+ {
+ clib_warning ("bugger 0.2");
+ return 0;
+ }
+
+ secs = atoi(cp);
+
+ while (cp < limit && (*cp != '.'))
+ cp++;
+
+ if (cp == limit)
+ {
+ clib_warning ("bugger 0.3");
+ return 0;
+ }
+
+ cp++;
+
+ usecs = atoi (cp);
+
+ e->timestamp = ((f64)secs) + ((f64)usecs)*1e-6;
+
+ /* eat up to third colon */
+ for (i = 0; i < 3; i++)
+ {
+ while (cp < limit && *cp != ':')
+ cp++;
+ cp++;
+ }
+ --cp;
+ if (*cp != ':')
+ {
+ clib_warning ("bugger 1");
+ return 0;
+ }
+ /* aim at '>' (switch-to) / '+' (wakeup) */
+ cp += 5;
+ if (cp >= limit)
+ {
+ clib_warning ("bugger 2");
+ return 0;
+ }
+ if (*cp == '>')
+ e->type = RUNNING;
+ else if (*cp == '+')
+ e->type = WAKEUP;
+ else
+ {
+ clib_warning ("bugger 3");
+ return 0;
+ }
+
+ cp += 3;
+ if (cp >= limit)
+ {
+ clib_warning ("bugger 4");
+ return 0;
+ }
+
+ e->cpu = atoi (cp);
+ cp += 4;
+
+ if (cp >= limit)
+ {
+ clib_warning ("bugger 4");
+ return 0;
+ }
+ while (cp < limit && (*cp == ' ' || *cp == '\t'))
+ cp++;
+
+ e->pid = atoi (cp);
+
+ for (i = 0; i < 2; i++)
+ {
+ while (cp < limit && *cp != ':')
+ cp++;
+ cp++;
+ }
+ --cp;
+ if (*cp != ':')
+ {
+ clib_warning ("bugger 5");
+ return 0;
+ }
+
+ cp += 3;
+ if (cp >= limit)
+ {
+ clib_warning ("bugger 6");
+ return 0;
+ }
+ while (cp < limit && (*cp != ' ' && *cp != '\n'))
+ {
+ vec_add1(task_name, *cp);
+ cp++;
+ }
+ vec_add1(task_name, 0);
+ /* _vec_len() = 0 in caller */
+ e->task = task_name;
+
+ if (cp < limit)
+ cp++;
+
+ *index = cp - tdata;
+ return e;
+}
+
+static u32 elog_id_for_pid (elog_main_t *em, u8 *name, u32 pid)
+{
+ uword * p, r;
+ mhash_t * h = &em->string_table_hash;
+
+ if (! em->string_table_hash.hash)
+ mhash_init (h, sizeof (uword), sizeof (pid));
+
+ p = mhash_get (h, &pid);
+ if (p)
+ return p[0];
+ r = elog_string (em, "%s(%d)", name, pid);
+ mhash_set (h, &pid, r, /* old_value */ 0);
+ return r;
+}
+
+void kelog_collect_sched_switch_trace (elog_main_t *em)
+{
+ int enable_fd, data_fd;
+ char *trace_enable = "/debug/tracing/tracing_enabled";
+ char *trace_data = "/debug/tracing/trace";
+ u8 *data = 0;
+ u8 *dp;
+ int bytes, total_bytes;
+ u32 pos;
+ sched_event_t *evt;
+ u64 nsec_to_add;
+ u32 index;
+ f64 clocks_per_sec;
+
+ enable_fd = open (trace_enable, O_RDWR);
+ if (enable_fd < 0)
+ {
+ clib_warning ("Couldn't open %s", trace_enable);
+ return;
+ }
+ /* disable kernel tracing */
+ if (write (enable_fd, "0\n", 2) != 2)
+ {
+ clib_unix_warning ("disable tracing");
+ close(enable_fd);
+ return;
+ }
+ close(enable_fd);
+
+ /* Read the trace data */
+ data_fd = open (trace_data, O_RDWR);
+ if (data_fd < 0)
+ {
+ clib_warning ("Couldn't open %s", trace_data);
+ return;
+ }
+
+ /*
+ * Extract trace into a vector. Note that seq_printf() [kernel]
+ * is not guaranteed to produce 4096 bytes at a time.
+ */
+ vec_validate (data, 4095);
+ total_bytes = 0;
+ pos = 0;
+ while (1)
+ {
+ bytes = read(data_fd, data+pos, 4096);
+ if (bytes <= 0)
+ break;
+
+ total_bytes += bytes;
+ _vec_len(data) = total_bytes;
+
+ pos = vec_len(data);
+ vec_validate(data, vec_len(data)+4095);
+ }
+ vec_add1(data, 0);
+
+ /* Synthesize events */
+ em->is_enabled = 1;
+
+ index = 0;
+ while ((evt = parse_sched_switch_trace (data, &index)))
+ {
+ u64 fake_cpu_clock;
+
+ fake_cpu_clock = evt->timestamp * em->cpu_timer.clocks_per_second;
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "%d: %s %s",
+ .format_args = "i4T4t4",
+ .n_enum_strings = 2,
+ .enum_strings = { "running", "wakeup", },
+ };
+ struct { u32 cpu, string_table_offset, which; } * ed;
+
+ ed = elog_event_data_not_inline (em, &__ELOG_TYPE_VAR(e),
+ &em->default_track,
+ fake_cpu_clock);
+ ed->cpu = evt->cpu;
+ ed->string_table_offset = elog_id_for_pid (em, evt->task, evt->pid);
+ ed->which = evt->type;
+ }
+ _vec_len(evt->task) = 0;
+ }
+ em->is_enabled = 0;
+}