aboutsummaryrefslogtreecommitdiffstats
path: root/test/packetdrill/run.c
diff options
context:
space:
mode:
Diffstat (limited to 'test/packetdrill/run.c')
-rw-r--r--test/packetdrill/run.c695
1 files changed, 695 insertions, 0 deletions
diff --git a/test/packetdrill/run.c b/test/packetdrill/run.c
new file mode 100644
index 0000000..37ec449
--- /dev/null
+++ b/test/packetdrill/run.c
@@ -0,0 +1,695 @@
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+/*
+ * Author: ncardwell@google.com (Neal Cardwell)
+ *
+ * Implementation for the test script execution module.
+ */
+
+#include "run.h"
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/times.h>
+#include <unistd.h>
+#include "ip.h"
+#include "logging.h"
+#include "netdev.h"
+#include "wire_client_netdev.h"
+#include "parse.h"
+#include "run_command.h"
+#include "run_packet.h"
+#include "run_system_call.h"
+#include "script.h"
+#include "so_testing.h"
+#include "socket.h"
+#include "system.h"
+#include "tcp.h"
+#include "tcp_options.h"
+
+/* MAX_SPIN_USECS is the maximum amount of time (in microseconds) to
+ * spin waiting for an event. We sleep up until this many microseconds
+ * before a script event. We get the best results on tickless
+ * (CONFIG_NO_HZ=y) kernels when we try to sleep until the exact jiffy
+ * of a script event; this reduces the staleness/noise we see in
+ * jiffies values on tickless kernels, since the kernel updates the
+ * jiffies value at the time we wake, and then we execute the test
+ * event shortly thereafter. The value below was chosen experimentally
+ * based on experiences on a 2.2GHz machine for which there was a
+ * measured overhead of roughly 15 usec for the unlock/usleep/lock
+ * sequence that wait_for_event() must execute while waiting
+ * for the next event.
+ */
+const int MAX_SPIN_USECS = 20;
+/* Global bool init_cmd_exed */
+bool init_cmd_exed = false;
+
+/* Final command to always execute at end of script, in order to clean up: */
+const char *cleanup_cmd;
+
+/* Path of currently-executing script, for use in cleanup command errors: */
+const char *script_path;
+
+struct state *state_new(struct config *config,
+ struct script *script,
+ struct netdev *netdev)
+{
+ struct state *state = calloc(1, sizeof(struct state));
+
+ if (pthread_mutex_init(&state->mutex, NULL) != 0)
+ die_perror("pthread_mutex_init");
+
+ run_lock(state);
+
+ state->config = config;
+ state->script = script;
+ state->netdev = netdev;
+ state->packets = packets_new(state);
+ state->syscalls = syscalls_new(state);
+ state->code = code_new(config);
+ state->fds = NULL;
+ state->num_events = 0;
+ return state;
+}
+
+/* Add the file descriptor to the list of run-time file descriptors. */
+void state_add_fd(struct state *state, struct fd_state *fd)
+{
+ fd->next = state->fds;
+ state->fds = fd;
+}
+
+/* Close all sockets, free all the socket structs, and send a RST
+ * packet to clean up kernel state for each connection.
+ * TODO(ncardwell): centralize error handling and ensure test errors
+ * always result in a call to these clean-up functions, so we can make
+ * sure to reset connections in all cases.
+ */
+static void close_all_fds(struct state *state)
+{
+ struct fd_state *fd = state->fds, *dead_fd = NULL;
+
+ while (fd != NULL) {
+ dead_fd = fd;
+ fd = fd->next;
+ dead_fd->ops->close(state, dead_fd);
+ }
+}
+
+void state_free(struct state *state)
+{
+ /* We have to stop the system call thread first, since it's using
+ * sockets that we want to close and reset.
+ */
+ syscalls_free(state, state->syscalls);
+
+ /* Then we close the sockets and reset the connections, while
+ * we still have a netdev for injecting reset packets to free
+ * per-connection kernel state.
+ */
+ close_all_fds(state);
+
+ netdev_free(state->netdev);
+ packets_free(state->packets);
+ code_free(state->code);
+
+ if (state->wire_client)
+ wire_client_free(state->wire_client);
+
+ if (state->so_instance)
+ so_instance_free(state->so_instance);
+
+ run_unlock(state);
+ if (pthread_mutex_destroy(&state->mutex) != 0)
+ die_perror("pthread_mutex_destroy");
+
+ memset(state, 0, sizeof(*state)); /* paranoia to help catch bugs */
+ free(state);
+}
+
+s64 now_usecs(struct state *state)
+{
+ struct timeval tv;
+ if (state->so_instance) {
+ if (state->so_instance->ifc.gettimeofday(
+ state->so_instance->ifc.userdata, &tv, NULL) < 0)
+ die_perror("gettimeofday");
+ } else {
+ if (gettimeofday(&tv, NULL) < 0)
+ die_perror("gettimeofday");
+ }
+ return timeval_to_usecs(&tv);
+}
+
+/*
+ * Verify that something happened at the expected time.
+ * WARNING: verify_time() should not be looking at state->event
+ * because in some cases (checking the finish time for blocking system
+ * calls) we call verify_time() at a time when state->event
+ * points at an event other than the one whose time we're currently
+ * checking.
+ */
+int verify_time(struct state *state, enum event_time_t time_type,
+ s64 script_usecs, s64 script_usecs_end,
+ s64 live_usecs, const char *description, char **error)
+{
+ s64 expected_usecs = script_usecs - state->script_start_time_usecs;
+ s64 expected_usecs_end = script_usecs_end -
+ state->script_start_time_usecs;
+ s64 actual_usecs = live_usecs - state->live_start_time_usecs;
+ int tolerance_usecs = state->config->tolerance_usecs;
+
+ DEBUGP("expected: %.3f actual: %.3f (secs)\n",
+ usecs_to_secs(script_usecs), usecs_to_secs(actual_usecs));
+
+ if (time_type == ANY_TIME)
+ return STATUS_OK;
+
+ if (time_type == ABSOLUTE_RANGE_TIME ||
+ time_type == RELATIVE_RANGE_TIME) {
+ DEBUGP("expected_usecs_end %.3f\n",
+ usecs_to_secs(script_usecs_end));
+ if (actual_usecs < (expected_usecs - tolerance_usecs) ||
+ actual_usecs > (expected_usecs_end + tolerance_usecs)) {
+ if (time_type == ABSOLUTE_RANGE_TIME) {
+ asprintf(error,
+ "timing error: expected "
+ "%s in time range %.6f~%.6f sec "
+ "but happened at %.6f sec",
+ description,
+ usecs_to_secs(script_usecs),
+ usecs_to_secs(script_usecs_end),
+ usecs_to_secs(actual_usecs));
+ } else if (time_type == RELATIVE_RANGE_TIME) {
+ s64 offset_usecs = state->event->offset_usecs;
+ asprintf(error,
+ "timing error: expected "
+ "%s in relative time range +%.6f~+%.6f "
+ "sec but happened at %+.6f sec",
+ description,
+ usecs_to_secs(script_usecs -
+ offset_usecs),
+ usecs_to_secs(script_usecs_end -
+ offset_usecs),
+ usecs_to_secs(actual_usecs -
+ offset_usecs));
+ }
+ return STATUS_ERR;
+ } else {
+ return STATUS_OK;
+ }
+ }
+
+ if ((actual_usecs < (expected_usecs - tolerance_usecs)) ||
+ (actual_usecs > (expected_usecs + tolerance_usecs))) {
+ asprintf(error,
+ "timing error: "
+ "expected %s at %.6f sec but happened at %.6f sec; "
+ "tolerance %.6f sec",
+ description,
+ usecs_to_secs(script_usecs),
+ usecs_to_secs(actual_usecs),
+ usecs_to_secs(tolerance_usecs));
+ return STATUS_ERR;
+ } else {
+ return STATUS_OK;
+ }
+}
+
+/* Return a static string describing the given event, for error messages. */
+static const char *event_description(struct event *event)
+{
+ enum direction_t direction = DIRECTION_INVALID;
+
+ if ((event->type <= INVALID_EVENT) ||
+ (event->type >= NUM_EVENT_TYPES)) {
+ die("bogus event type: %d", event->type);
+ }
+ switch (event->type) {
+ case PACKET_EVENT:
+ direction = packet_direction(event->event.packet);
+ if (direction == DIRECTION_INBOUND)
+ return "inbound packet";
+ else if (direction == DIRECTION_OUTBOUND)
+ return "outbound packet";
+ else
+ assert(!"bad direction");
+ break;
+ case SYSCALL_EVENT:
+ return "system call start";
+ case COMMAND_EVENT:
+ return "command";
+ case CODE_EVENT:
+ return "data collection for code";
+ case INVALID_EVENT:
+ case NUM_EVENT_TYPES:
+ assert(!"bogus type");
+ break;
+ /* We omit default case so compiler catches missing values. */
+ }
+ return "invalid event";
+}
+
+void check_event_time(struct state *state, s64 live_usecs)
+{
+ char *error = NULL;
+ const char *description = event_description(state->event);
+ if (verify_time(state,
+ state->event->time_type,
+ state->event->time_usecs,
+ state->event->time_usecs_end, live_usecs,
+ description, &error)) {
+ die("%s:%d: %s\n",
+ state->config->script_path,
+ state->event->line_number,
+ error);
+ }
+}
+
+/* Consecutive relative inbound packets should be anchored relative to the
+ * packet start times, to avoid accumulating errors from CPU processing
+ * overheads on consecutive packets.
+ */
+bool is_event_start_time_anchored(struct event *event)
+{
+ return (event->type == PACKET_EVENT &&
+ packet_direction(event->event.packet) == DIRECTION_INBOUND);
+}
+
+/* Set the start (and end time, if applicable) for the event if it
+ * uses wildcard or relative timing.
+ */
+void adjust_relative_event_times(struct state *state, struct event *event)
+{
+ s64 offset_usecs = 0;
+
+ if (event->time_type != ANY_TIME &&
+ event->time_type != RELATIVE_TIME &&
+ event->time_type != RELATIVE_RANGE_TIME)
+ return;
+
+ if (state->last_event &&
+ is_event_start_time_anchored(state->last_event) &&
+ is_event_start_time_anchored(event))
+ offset_usecs = state->last_event->time_usecs;
+ else
+ offset_usecs = now_usecs(state) - state->live_start_time_usecs;
+ event->offset_usecs = offset_usecs;
+
+ event->time_usecs += offset_usecs;
+ if (event->time_type == RELATIVE_RANGE_TIME)
+ event->time_usecs_end += offset_usecs;
+
+ /* Adjust the end time of blocking system calls using relative times. */
+ if (event->time_type == RELATIVE_TIME &&
+ event->type == SYSCALL_EVENT &&
+ is_blocking_syscall(event->event.syscall)) {
+ event->event.syscall->end_usecs += offset_usecs;
+ }
+}
+
+void wait_for_event(struct state *state)
+{
+ s64 event_usecs =
+ script_time_to_live_time_usecs(
+ state, state->event->time_usecs);
+ DEBUGP("waiting until %lld -- now is %lld\n",
+ event_usecs, now_usecs(state));
+ while (1) {
+ const s64 wait_usecs = event_usecs - now_usecs(state);
+ if (wait_usecs <= 0)
+ break;
+
+ /* If we're waiting a long time, and we are on an OS
+ * that we know has a fine-grained usleep(), then
+ * usleep() instead of spinning on the CPU.
+ */
+#ifdef linux
+ /* Since the scheduler may not wake us up precisely
+ * when we tell it to, sleep until just before the
+ * event we're waiting for and then spin.
+ */
+ if (state->num_events > 0 && wait_usecs > MAX_SPIN_USECS) {
+ run_unlock(state);
+ if (state->so_instance) {
+ state->so_instance->ifc.usleep(
+ state->so_instance->ifc.userdata,
+ wait_usecs - MAX_SPIN_USECS);
+ } else {
+ usleep(wait_usecs - MAX_SPIN_USECS);
+ }
+ run_lock(state);
+ }
+#endif
+
+ /* At this point we should only have a millisecond or
+ * two to wait, so we spin.
+ */
+ }
+
+ if (state->num_events > 0)
+ check_event_time(state, now_usecs(state));
+}
+
+int get_next_event(struct state *state, char **error)
+{
+ DEBUGP("gettimeofday: %.6f\n", now_usecs(state)/1000000.0);
+
+ if (state->event == NULL) {
+ /* First event. */
+ state->event = state->script->event_list;
+ state->script_start_time_usecs = state->event->time_usecs;
+ if (state->event->time_usecs != 0) {
+ asprintf(error,
+ "%s:%d: first event should be at time 0\n",
+ state->config->script_path,
+ state->event->line_number);
+ return STATUS_ERR;
+ }
+ } else {
+ /* Move to the next event. */
+ state->script_last_time_usecs = state->event->time_usecs;
+ state->last_event = state->event;
+ state->event = state->event->next;
+ }
+
+ if (state->event == NULL)
+ return STATUS_OK; /* script is done */
+
+ assert((state->event->type > INVALID_EVENT) &&
+ (state->event->type < NUM_EVENT_TYPES));
+
+ if (state->last_event &&
+ is_event_time_absolute(state->last_event) &&
+ is_event_time_absolute(state->event) &&
+ state->event->time_usecs < state->script_last_time_usecs) {
+ asprintf(error,
+ "%s:%d: time goes backward in script "
+ "from %lld usec to %lld usec\n",
+ state->config->script_path,
+ state->event->line_number,
+ state->script_last_time_usecs,
+ state->event->time_usecs);
+ return STATUS_ERR;
+ }
+ return STATUS_OK;
+}
+
+/* Run the given packet event; print warnings/errors, and exit on error. */
+static void run_local_packet_event(struct state *state, struct event *event,
+ struct packet *packet)
+{
+ char *error = NULL;
+ int result = STATUS_OK;
+
+ result = run_packet_event(state, event, packet, &error);
+ if (result == STATUS_WARN) {
+ fprintf(stderr, "%s", error);
+ free(error);
+ } else if (result == STATUS_ERR) {
+ die("%s", error);
+ }
+}
+
+/* For more consistent timing, if there's more than one CPU on this
+ * machine then use a real-time priority. We skip this if there's only
+ * 1 CPU because we do not want to risk making the machine
+ * unresponsive.
+ */
+void set_scheduling_priority(void)
+{
+ /* Get the CPU count and skip this if we only have 1 CPU. */
+ int num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (num_cpus < 0)
+ die_perror("sysconf(_SC_NPROCESSORS_ONLN)");
+ if (num_cpus <= 1)
+ return;
+
+#if !defined(__OpenBSD__)
+ /* Chose a real-time policy, but use SCHED_RR instead of
+ * SCHED_FIFO, so that we round-robin among real-time threads
+ * of the same priority. In practice this shouldn't matter,
+ * since there will not likely be other realtime threads.
+ */
+ int policy = SCHED_RR;
+
+ /* Use the minimum priority, to be nice. */
+ int priority = sched_get_priority_min(policy);
+ if (priority < 0)
+ die_perror("sched_get_priority_min");
+
+ /* Set the policy and priority for our threads. */
+ struct sched_param param;
+ memset(&param, 0, sizeof(param));
+ param.sched_priority = priority;
+ if (sched_setscheduler(0, policy, &param))
+ DEBUGP("sched_setscheduler failed: %s\n", strerror(errno));
+#endif /* !defined(__OpenBSD__) */
+}
+
+/* To ensure timing that's as consistent as possible, pull all our
+ * pages to RAM and pin them there.
+ */
+void lock_memory(void)
+{
+ if (mlockall(MCL_CURRENT | MCL_FUTURE))
+ die_perror("lockall(MCL_CURRENT | MCL_FUTURE)");
+}
+
+/* Wait for and return the wall time at which we should start the
+ * test, in microseconds. To make test results more reproducible, we
+ * wait for a start time that is well into the middle of a Linux jiffy
+ * (JIFFY_OFFSET_USECS into the jiffy). If you try to run a test
+ * script starting at a time that is too near the edge of a jiffy, and
+ * the test tries (as most do) to schedule events at 1-millisecond
+ * boundaries relative to this start time, then slight CPU or
+ * scheduling variations cause the kernel to record time measurements
+ * that are 1 jiffy too big or too small, so the kernel gets
+ * unexpected RTT and RTT variance values, leading to unexpected RTO
+ * and delayed ACK timer behavior.
+ *
+ * To try to find the edge of a jiffy, we spin and watch the output of
+ * times(2), which increments every time the jiffies clock has
+ * advanced another 10ms. We wait for a few ticks
+ * (TARGET_JIFFY_TICKS) to go by, to reduce noise from warm-up
+ * effects. We could do fancier measuring and filtering here, but so
+ * far this level of complexity seems sufficient.
+ */
+static s64 schedule_start_time_usecs(struct state *state)
+{
+#ifdef linux
+ s64 start_usecs = 0;
+ clock_t last_jiffies = times(NULL);
+ int jiffy_ticks = 0;
+ const int TARGET_JIFFY_TICKS = 10;
+ while (jiffy_ticks < TARGET_JIFFY_TICKS) {
+ clock_t jiffies = times(NULL);
+ if (jiffies != last_jiffies) {
+ start_usecs = now_usecs(state);
+ ++jiffy_ticks;
+ }
+ last_jiffies = jiffies;
+ }
+ const int JIFFY_OFFSET_USECS = 250;
+ start_usecs += JIFFY_OFFSET_USECS;
+ return start_usecs;
+#else
+ return now_usecs(state);
+#endif
+}
+
+/* Run final command we always execute at end of script, to clean up. If there
+ * is a cleanup command at the end of a packetdrill script, we execute that no
+ * matter whether the test passes or fails. This makes the cleanup command a
+ * good place to undo any sysctl settings the script changed, for example.
+ */
+int run_cleanup_command(void)
+{
+ if (cleanup_cmd != NULL && init_cmd_exed) {
+ char *error = NULL;
+
+ if (safe_system(cleanup_cmd, &error)) {
+ fprintf(stderr,
+ "%s: error executing cleanup command: %s\n",
+ script_path, error);
+ free(error);
+ return STATUS_ERR;
+ }
+ }
+ return STATUS_OK;
+}
+
+void run_script(struct config *config, struct script *script)
+{
+ char *error = NULL;
+ struct state *state = NULL;
+ struct netdev *netdev = NULL;
+ struct event *event = NULL;
+
+ DEBUGP("run_script: running script\n");
+
+ set_scheduling_priority();
+ lock_memory();
+
+ /* This interpreter loop runs for local mode or wire client mode. */
+ assert(!config->is_wire_server);
+
+ script_path = config->script_path;
+
+ /* How we use the network is of course a little different in
+ * each of the two cases....
+ */
+ if (config->is_wire_client)
+ netdev = wire_client_netdev_new(config);
+ else if (config->so_filename)
+ netdev = so_netdev_new(config);
+ else
+ netdev = local_netdev_new(config);
+
+ state = state_new(config, script, netdev);
+
+ if (config->is_wire_client) {
+ state->wire_client = wire_client_new();
+ wire_client_init(state->wire_client, config, script);
+ }
+
+ if (config->so_filename) {
+ state->so_instance = so_instance_new();
+ so_instance_init(state->so_instance, config, script, state);
+ }
+
+ init_cmd_exed = false;
+ if (script->init_command != NULL) {
+ if (safe_system(script->init_command->command_line,
+ &error)) {
+ asprintf(&error, "%s: error executing init command: %s\n",
+ config->script_path, error);
+ free(error);
+ exit(EXIT_FAILURE);
+ }
+ init_cmd_exed = true;
+ }
+
+ signal(SIGPIPE, SIG_IGN); /* ignore EPIPE */
+
+ state->live_start_time_usecs = schedule_start_time_usecs(state);
+ DEBUGP("live_start_time_usecs is %lld\n",
+ state->live_start_time_usecs);
+
+ if (state->wire_client != NULL)
+ wire_client_send_client_starting(state->wire_client);
+
+ while (1) {
+ if (get_next_event(state, &error))
+ die("%s", error);
+ event = state->event;
+ if (event == NULL)
+ break;
+
+ if (state->wire_client != NULL)
+ wire_client_next_event(state->wire_client, event);
+
+ /* In wire mode, we adjust relative times after
+ * getting notification that previous packet events
+ * have completed, if any.
+ */
+ adjust_relative_event_times(state, event);
+
+ switch (event->type) {
+ case PACKET_EVENT:
+ /* For wire clients, the server handles packets. */
+ if (!config->is_wire_client) {
+ run_local_packet_event(state, event,
+ event->event.packet);
+ }
+ break;
+ case SYSCALL_EVENT:
+ run_system_call_event(state, event,
+ event->event.syscall);
+ break;
+ case COMMAND_EVENT:
+ run_command_event(state, event,
+ event->event.command);
+ break;
+ case CODE_EVENT:
+ run_code_event(state, event,
+ event->event.code->text);
+ break;
+ case INVALID_EVENT:
+ case NUM_EVENT_TYPES:
+ assert(!"bogus type");
+ break;
+ /* We omit default case so compiler catches missing values. */
+ }
+ state->num_events++;
+ }
+
+ /* Wait for any outstanding packet events we requested on the server. */
+ if (state->wire_client != NULL)
+ wire_client_next_event(state->wire_client, NULL);
+
+ if (run_cleanup_command() == STATUS_ERR)
+ exit(EXIT_FAILURE);
+
+ if (code_execute(state->code, &error)) {
+ die("%s: error executing code: %s\n",
+ state->config->script_path, error);
+ free(error);
+ }
+
+ state_free(state);
+
+ DEBUGP("run_script: done running\n");
+}
+
+int parse_script_and_set_config(int argc, char *argv[],
+ struct config *config,
+ struct script *script,
+ const char *script_path,
+ const char *script_buffer)
+{
+ struct invocation invocation = {
+ .argc = argc,
+ .argv = argv,
+ .config = config,
+ .script = script,
+ };
+
+ DEBUGP("parse_and_run_script: %s\n", script_path);
+ assert(script_path != NULL);
+
+ init_script(script);
+
+ set_default_config(config);
+ config->script_path = strdup(script_path);
+
+ if (script_buffer != NULL)
+ copy_script(script_buffer, script);
+ else
+ read_script(script_path, script);
+
+ return parse_script(config, script, &invocation);
+}