summaryrefslogtreecommitdiffstats
path: root/src/vppinfra/unix-kelog.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/vppinfra/unix-kelog.c')
-rw-r--r--src/vppinfra/unix-kelog.c415
1 files changed, 415 insertions, 0 deletions
diff --git a/src/vppinfra/unix-kelog.c b/src/vppinfra/unix-kelog.c
new file mode 100644
index 00000000..88428ee8
--- /dev/null
+++ b/src/vppinfra/unix-kelog.c
@@ -0,0 +1,415 @@
+/*
+ Copyright (c) 2010 Cisco and/or its affiliates.
+
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#include <vppinfra/error.h>
+#include <vppinfra/unix.h>
+#include <vppinfra/elog.h>
+#include <vppinfra/format.h>
+#include <vppinfra/os.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+
+typedef enum
+{
+ RUNNING = 0,
+ WAKEUP,
+} sched_event_type_t;
+
+typedef struct
+{
+ u32 cpu;
+ u8 *task;
+ u32 pid;
+ f64 timestamp;
+ sched_event_type_t type;
+} sched_event_t;
+
+void
+kelog_init (elog_main_t * em, char *kernel_tracer, u32 n_events)
+{
+ int enable_fd, current_tracer_fd, data_fd;
+ int len;
+ struct timespec ts, ts2;
+ char *trace_enable = "/debug/tracing/tracing_enabled";
+ char *current_tracer = "/debug/tracing/current_tracer";
+ char *trace_data = "/debug/tracing/trace";
+ f64 realtime, monotonic;
+ f64 freq, secs_per_clock;
+
+ ASSERT (kernel_tracer);
+
+ /*$$$$ fixme */
+ n_events = 1 << 18;
+
+ /* init first so we won't hurt ourselves if we bail */
+ elog_init (em, n_events);
+
+ enable_fd = open (trace_enable, O_RDWR);
+ if (enable_fd < 0)
+ {
+ clib_warning ("Couldn't open %s", trace_enable);
+ return;
+ }
+ /* disable kernel tracing */
+ if (write (enable_fd, "0\n", 2) != 2)
+ {
+ clib_unix_warning ("disable tracing");
+ close (enable_fd);
+ return;
+ }
+
+ /*
+ * open + clear the data buffer.
+ * see .../linux/kernel/trace/trace.c:tracing_open()
+ */
+ data_fd = open (trace_data, O_RDWR | O_TRUNC);
+ if (data_fd < 0)
+ {
+ clib_warning ("Couldn't open+clear %s", trace_data);
+ return;
+ }
+ close (data_fd);
+
+ /* configure tracing */
+ current_tracer_fd = open (current_tracer, O_RDWR);
+
+ if (current_tracer_fd < 0)
+ {
+ clib_warning ("Couldn't open %s", current_tracer);
+ close (enable_fd);
+ return;
+ }
+
+ len = strlen (kernel_tracer);
+
+ if (write (current_tracer_fd, kernel_tracer, len) != len)
+ {
+ clib_unix_warning ("configure trace");
+ close (current_tracer_fd);
+ close (enable_fd);
+ return;
+ }
+
+ close (current_tracer_fd);
+
+ /*
+ * The kernel event log uses CLOCK_MONOTONIC timestamps,
+ * not CLOCK_REALTIME timestamps. These differ by a constant
+ * but the constant is not available in user mode.
+ * This estimate will be off by one syscall round-trip.
+ */
+ clib_time_init (&em->cpu_timer);
+ em->init_time.cpu = em->cpu_timer.init_cpu_time;
+ syscall (SYS_clock_gettime, CLOCK_MONOTONIC, &ts);
+
+ /* enable kernel tracing */
+ if (write (enable_fd, "1\n", 2) != 2)
+ {
+ clib_unix_warning ("enable tracing");
+ close (enable_fd);
+ return;
+ }
+
+ close (enable_fd);
+}
+
+
+u8 *
+format_sched_event (u8 * s, va_list * va)
+{
+ sched_event_t *e = va_arg (*va, sched_event_t *);
+
+ s = format (s, "cpu %d task %10s type %s timestamp %12.6f\n",
+ e->cpu, e->task, e->type ? "WAKEUP " : "RUNNING", e->timestamp);
+
+ return s;
+}
+
+sched_event_t *
+parse_sched_switch_trace (u8 * tdata, u32 * index)
+{
+ u8 *cp = tdata + *index;
+ u8 *limit = tdata + vec_len (tdata);
+ int colons;
+ static sched_event_t event;
+ sched_event_t *e = &event;
+ static u8 *task_name;
+ u32 secs, usecs;
+ int i;
+
+again:
+ /* eat leading w/s */
+ while (cp < limit && (*cp == ' ' && *cp == '\t'))
+ cp++;
+ if (cp == limit)
+ return 0;
+
+ /* header line */
+ if (*cp == '#')
+ {
+ while (cp < limit && (*cp != '\n'))
+ cp++;
+ if (*cp == '\n')
+ {
+ cp++;
+ goto again;
+ }
+ clib_warning ("bugger 0");
+ return 0;
+ }
+
+ while (cp < limit && *cp != ']')
+ cp++;
+
+ if (*cp == 0)
+ return 0;
+
+ if (*cp != ']')
+ {
+ clib_warning ("bugger 0.1");
+ return 0;
+ }
+
+ cp++;
+ while (cp < limit && (*cp == ' ' && *cp == '\t'))
+ cp++;
+ if (cp == limit)
+ {
+ clib_warning ("bugger 0.2");
+ return 0;
+ }
+
+ secs = atoi (cp);
+
+ while (cp < limit && (*cp != '.'))
+ cp++;
+
+ if (cp == limit)
+ {
+ clib_warning ("bugger 0.3");
+ return 0;
+ }
+
+ cp++;
+
+ usecs = atoi (cp);
+
+ e->timestamp = ((f64) secs) + ((f64) usecs) * 1e-6;
+
+ /* eat up to third colon */
+ for (i = 0; i < 3; i++)
+ {
+ while (cp < limit && *cp != ':')
+ cp++;
+ cp++;
+ }
+ --cp;
+ if (*cp != ':')
+ {
+ clib_warning ("bugger 1");
+ return 0;
+ }
+ /* aim at '>' (switch-to) / '+' (wakeup) */
+ cp += 5;
+ if (cp >= limit)
+ {
+ clib_warning ("bugger 2");
+ return 0;
+ }
+ if (*cp == '>')
+ e->type = RUNNING;
+ else if (*cp == '+')
+ e->type = WAKEUP;
+ else
+ {
+ clib_warning ("bugger 3");
+ return 0;
+ }
+
+ cp += 3;
+ if (cp >= limit)
+ {
+ clib_warning ("bugger 4");
+ return 0;
+ }
+
+ e->cpu = atoi (cp);
+ cp += 4;
+
+ if (cp >= limit)
+ {
+ clib_warning ("bugger 4");
+ return 0;
+ }
+ while (cp < limit && (*cp == ' ' || *cp == '\t'))
+ cp++;
+
+ e->pid = atoi (cp);
+
+ for (i = 0; i < 2; i++)
+ {
+ while (cp < limit && *cp != ':')
+ cp++;
+ cp++;
+ }
+ --cp;
+ if (*cp != ':')
+ {
+ clib_warning ("bugger 5");
+ return 0;
+ }
+
+ cp += 3;
+ if (cp >= limit)
+ {
+ clib_warning ("bugger 6");
+ return 0;
+ }
+ while (cp < limit && (*cp != ' ' && *cp != '\n'))
+ {
+ vec_add1 (task_name, *cp);
+ cp++;
+ }
+ vec_add1 (task_name, 0);
+ /* _vec_len() = 0 in caller */
+ e->task = task_name;
+
+ if (cp < limit)
+ cp++;
+
+ *index = cp - tdata;
+ return e;
+}
+
+static u32
+elog_id_for_pid (elog_main_t * em, u8 * name, u32 pid)
+{
+ uword *p, r;
+ mhash_t *h = &em->string_table_hash;
+
+ if (!em->string_table_hash.hash)
+ mhash_init (h, sizeof (uword), sizeof (pid));
+
+ p = mhash_get (h, &pid);
+ if (p)
+ return p[0];
+ r = elog_string (em, "%s(%d)", name, pid);
+ mhash_set (h, &pid, r, /* old_value */ 0);
+ return r;
+}
+
+void
+kelog_collect_sched_switch_trace (elog_main_t * em)
+{
+ int enable_fd, data_fd;
+ char *trace_enable = "/debug/tracing/tracing_enabled";
+ char *trace_data = "/debug/tracing/trace";
+ u8 *data = 0;
+ u8 *dp;
+ int bytes, total_bytes;
+ u32 pos;
+ sched_event_t *evt;
+ u64 nsec_to_add;
+ u32 index;
+ f64 clocks_per_sec;
+
+ enable_fd = open (trace_enable, O_RDWR);
+ if (enable_fd < 0)
+ {
+ clib_warning ("Couldn't open %s", trace_enable);
+ return;
+ }
+ /* disable kernel tracing */
+ if (write (enable_fd, "0\n", 2) != 2)
+ {
+ clib_unix_warning ("disable tracing");
+ close (enable_fd);
+ return;
+ }
+ close (enable_fd);
+
+ /* Read the trace data */
+ data_fd = open (trace_data, O_RDWR);
+ if (data_fd < 0)
+ {
+ clib_warning ("Couldn't open %s", trace_data);
+ return;
+ }
+
+ /*
+ * Extract trace into a vector. Note that seq_printf() [kernel]
+ * is not guaranteed to produce 4096 bytes at a time.
+ */
+ vec_validate (data, 4095);
+ total_bytes = 0;
+ pos = 0;
+ while (1)
+ {
+ bytes = read (data_fd, data + pos, 4096);
+ if (bytes <= 0)
+ break;
+
+ total_bytes += bytes;
+ _vec_len (data) = total_bytes;
+
+ pos = vec_len (data);
+ vec_validate (data, vec_len (data) + 4095);
+ }
+ vec_add1 (data, 0);
+
+ /* Synthesize events */
+ em->is_enabled = 1;
+
+ index = 0;
+ while ((evt = parse_sched_switch_trace (data, &index)))
+ {
+ u64 fake_cpu_clock;
+
+ fake_cpu_clock = evt->timestamp * em->cpu_timer.clocks_per_second;
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "%d: %s %s",.format_args = "i4T4t4",.n_enum_strings =
+ 2,.enum_strings =
+ {
+ "running", "wakeup",}
+ ,};
+ struct
+ {
+ u32 cpu, string_table_offset, which;
+ } *ed;
+
+ ed = elog_event_data_not_inline (em, &__ELOG_TYPE_VAR (e),
+ &em->default_track, fake_cpu_clock);
+ ed->cpu = evt->cpu;
+ ed->string_table_offset = elog_id_for_pid (em, evt->task, evt->pid);
+ ed->which = evt->type;
+ }
+ _vec_len (evt->task) = 0;
+ }
+ em->is_enabled = 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */