From 78c896b3b3127515478090c19447e27dc406427e Mon Sep 17 00:00:00 2001 From: Jianfeng Tan Date: Mon, 18 Nov 2019 06:59:50 +0000 Subject: TLDKv2 Signed-off-by: Jianfeng Tan Signed-off-by: Jielong Zhou Signed-off-by: Jian Zhang Signed-off-by: Chen Zhao Change-Id: I55c39de4c6cd30f991f35631eb507f770230f08e --- test/packetdrill/run.c | 695 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 695 insertions(+) create mode 100644 test/packetdrill/run.c (limited to 'test/packetdrill/run.c') diff --git a/test/packetdrill/run.c b/test/packetdrill/run.c new file mode 100644 index 0000000..37ec449 --- /dev/null +++ b/test/packetdrill/run.c @@ -0,0 +1,695 @@ +/* + * Copyright 2013 Google Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ +/* + * Author: ncardwell@google.com (Neal Cardwell) + * + * Implementation for the test script execution module. + */ + +#include "run.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ip.h" +#include "logging.h" +#include "netdev.h" +#include "wire_client_netdev.h" +#include "parse.h" +#include "run_command.h" +#include "run_packet.h" +#include "run_system_call.h" +#include "script.h" +#include "so_testing.h" +#include "socket.h" +#include "system.h" +#include "tcp.h" +#include "tcp_options.h" + +/* MAX_SPIN_USECS is the maximum amount of time (in microseconds) to + * spin waiting for an event. We sleep up until this many microseconds + * before a script event. We get the best results on tickless + * (CONFIG_NO_HZ=y) kernels when we try to sleep until the exact jiffy + * of a script event; this reduces the staleness/noise we see in + * jiffies values on tickless kernels, since the kernel updates the + * jiffies value at the time we wake, and then we execute the test + * event shortly thereafter. The value below was chosen experimentally + * based on experiences on a 2.2GHz machine for which there was a + * measured overhead of roughly 15 usec for the unlock/usleep/lock + * sequence that wait_for_event() must execute while waiting + * for the next event. + */ +const int MAX_SPIN_USECS = 20; +/* Global bool init_cmd_exed */ +bool init_cmd_exed = false; + +/* Final command to always execute at end of script, in order to clean up: */ +const char *cleanup_cmd; + +/* Path of currently-executing script, for use in cleanup command errors: */ +const char *script_path; + +struct state *state_new(struct config *config, + struct script *script, + struct netdev *netdev) +{ + struct state *state = calloc(1, sizeof(struct state)); + + if (pthread_mutex_init(&state->mutex, NULL) != 0) + die_perror("pthread_mutex_init"); + + run_lock(state); + + state->config = config; + state->script = script; + state->netdev = netdev; + state->packets = packets_new(state); + state->syscalls = syscalls_new(state); + state->code = code_new(config); + state->fds = NULL; + state->num_events = 0; + return state; +} + +/* Add the file descriptor to the list of run-time file descriptors. */ +void state_add_fd(struct state *state, struct fd_state *fd) +{ + fd->next = state->fds; + state->fds = fd; +} + +/* Close all sockets, free all the socket structs, and send a RST + * packet to clean up kernel state for each connection. + * TODO(ncardwell): centralize error handling and ensure test errors + * always result in a call to these clean-up functions, so we can make + * sure to reset connections in all cases. + */ +static void close_all_fds(struct state *state) +{ + struct fd_state *fd = state->fds, *dead_fd = NULL; + + while (fd != NULL) { + dead_fd = fd; + fd = fd->next; + dead_fd->ops->close(state, dead_fd); + } +} + +void state_free(struct state *state) +{ + /* We have to stop the system call thread first, since it's using + * sockets that we want to close and reset. + */ + syscalls_free(state, state->syscalls); + + /* Then we close the sockets and reset the connections, while + * we still have a netdev for injecting reset packets to free + * per-connection kernel state. + */ + close_all_fds(state); + + netdev_free(state->netdev); + packets_free(state->packets); + code_free(state->code); + + if (state->wire_client) + wire_client_free(state->wire_client); + + if (state->so_instance) + so_instance_free(state->so_instance); + + run_unlock(state); + if (pthread_mutex_destroy(&state->mutex) != 0) + die_perror("pthread_mutex_destroy"); + + memset(state, 0, sizeof(*state)); /* paranoia to help catch bugs */ + free(state); +} + +s64 now_usecs(struct state *state) +{ + struct timeval tv; + if (state->so_instance) { + if (state->so_instance->ifc.gettimeofday( + state->so_instance->ifc.userdata, &tv, NULL) < 0) + die_perror("gettimeofday"); + } else { + if (gettimeofday(&tv, NULL) < 0) + die_perror("gettimeofday"); + } + return timeval_to_usecs(&tv); +} + +/* + * Verify that something happened at the expected time. + * WARNING: verify_time() should not be looking at state->event + * because in some cases (checking the finish time for blocking system + * calls) we call verify_time() at a time when state->event + * points at an event other than the one whose time we're currently + * checking. + */ +int verify_time(struct state *state, enum event_time_t time_type, + s64 script_usecs, s64 script_usecs_end, + s64 live_usecs, const char *description, char **error) +{ + s64 expected_usecs = script_usecs - state->script_start_time_usecs; + s64 expected_usecs_end = script_usecs_end - + state->script_start_time_usecs; + s64 actual_usecs = live_usecs - state->live_start_time_usecs; + int tolerance_usecs = state->config->tolerance_usecs; + + DEBUGP("expected: %.3f actual: %.3f (secs)\n", + usecs_to_secs(script_usecs), usecs_to_secs(actual_usecs)); + + if (time_type == ANY_TIME) + return STATUS_OK; + + if (time_type == ABSOLUTE_RANGE_TIME || + time_type == RELATIVE_RANGE_TIME) { + DEBUGP("expected_usecs_end %.3f\n", + usecs_to_secs(script_usecs_end)); + if (actual_usecs < (expected_usecs - tolerance_usecs) || + actual_usecs > (expected_usecs_end + tolerance_usecs)) { + if (time_type == ABSOLUTE_RANGE_TIME) { + asprintf(error, + "timing error: expected " + "%s in time range %.6f~%.6f sec " + "but happened at %.6f sec", + description, + usecs_to_secs(script_usecs), + usecs_to_secs(script_usecs_end), + usecs_to_secs(actual_usecs)); + } else if (time_type == RELATIVE_RANGE_TIME) { + s64 offset_usecs = state->event->offset_usecs; + asprintf(error, + "timing error: expected " + "%s in relative time range +%.6f~+%.6f " + "sec but happened at %+.6f sec", + description, + usecs_to_secs(script_usecs - + offset_usecs), + usecs_to_secs(script_usecs_end - + offset_usecs), + usecs_to_secs(actual_usecs - + offset_usecs)); + } + return STATUS_ERR; + } else { + return STATUS_OK; + } + } + + if ((actual_usecs < (expected_usecs - tolerance_usecs)) || + (actual_usecs > (expected_usecs + tolerance_usecs))) { + asprintf(error, + "timing error: " + "expected %s at %.6f sec but happened at %.6f sec; " + "tolerance %.6f sec", + description, + usecs_to_secs(script_usecs), + usecs_to_secs(actual_usecs), + usecs_to_secs(tolerance_usecs)); + return STATUS_ERR; + } else { + return STATUS_OK; + } +} + +/* Return a static string describing the given event, for error messages. */ +static const char *event_description(struct event *event) +{ + enum direction_t direction = DIRECTION_INVALID; + + if ((event->type <= INVALID_EVENT) || + (event->type >= NUM_EVENT_TYPES)) { + die("bogus event type: %d", event->type); + } + switch (event->type) { + case PACKET_EVENT: + direction = packet_direction(event->event.packet); + if (direction == DIRECTION_INBOUND) + return "inbound packet"; + else if (direction == DIRECTION_OUTBOUND) + return "outbound packet"; + else + assert(!"bad direction"); + break; + case SYSCALL_EVENT: + return "system call start"; + case COMMAND_EVENT: + return "command"; + case CODE_EVENT: + return "data collection for code"; + case INVALID_EVENT: + case NUM_EVENT_TYPES: + assert(!"bogus type"); + break; + /* We omit default case so compiler catches missing values. */ + } + return "invalid event"; +} + +void check_event_time(struct state *state, s64 live_usecs) +{ + char *error = NULL; + const char *description = event_description(state->event); + if (verify_time(state, + state->event->time_type, + state->event->time_usecs, + state->event->time_usecs_end, live_usecs, + description, &error)) { + die("%s:%d: %s\n", + state->config->script_path, + state->event->line_number, + error); + } +} + +/* Consecutive relative inbound packets should be anchored relative to the + * packet start times, to avoid accumulating errors from CPU processing + * overheads on consecutive packets. + */ +bool is_event_start_time_anchored(struct event *event) +{ + return (event->type == PACKET_EVENT && + packet_direction(event->event.packet) == DIRECTION_INBOUND); +} + +/* Set the start (and end time, if applicable) for the event if it + * uses wildcard or relative timing. + */ +void adjust_relative_event_times(struct state *state, struct event *event) +{ + s64 offset_usecs = 0; + + if (event->time_type != ANY_TIME && + event->time_type != RELATIVE_TIME && + event->time_type != RELATIVE_RANGE_TIME) + return; + + if (state->last_event && + is_event_start_time_anchored(state->last_event) && + is_event_start_time_anchored(event)) + offset_usecs = state->last_event->time_usecs; + else + offset_usecs = now_usecs(state) - state->live_start_time_usecs; + event->offset_usecs = offset_usecs; + + event->time_usecs += offset_usecs; + if (event->time_type == RELATIVE_RANGE_TIME) + event->time_usecs_end += offset_usecs; + + /* Adjust the end time of blocking system calls using relative times. */ + if (event->time_type == RELATIVE_TIME && + event->type == SYSCALL_EVENT && + is_blocking_syscall(event->event.syscall)) { + event->event.syscall->end_usecs += offset_usecs; + } +} + +void wait_for_event(struct state *state) +{ + s64 event_usecs = + script_time_to_live_time_usecs( + state, state->event->time_usecs); + DEBUGP("waiting until %lld -- now is %lld\n", + event_usecs, now_usecs(state)); + while (1) { + const s64 wait_usecs = event_usecs - now_usecs(state); + if (wait_usecs <= 0) + break; + + /* If we're waiting a long time, and we are on an OS + * that we know has a fine-grained usleep(), then + * usleep() instead of spinning on the CPU. + */ +#ifdef linux + /* Since the scheduler may not wake us up precisely + * when we tell it to, sleep until just before the + * event we're waiting for and then spin. + */ + if (state->num_events > 0 && wait_usecs > MAX_SPIN_USECS) { + run_unlock(state); + if (state->so_instance) { + state->so_instance->ifc.usleep( + state->so_instance->ifc.userdata, + wait_usecs - MAX_SPIN_USECS); + } else { + usleep(wait_usecs - MAX_SPIN_USECS); + } + run_lock(state); + } +#endif + + /* At this point we should only have a millisecond or + * two to wait, so we spin. + */ + } + + if (state->num_events > 0) + check_event_time(state, now_usecs(state)); +} + +int get_next_event(struct state *state, char **error) +{ + DEBUGP("gettimeofday: %.6f\n", now_usecs(state)/1000000.0); + + if (state->event == NULL) { + /* First event. */ + state->event = state->script->event_list; + state->script_start_time_usecs = state->event->time_usecs; + if (state->event->time_usecs != 0) { + asprintf(error, + "%s:%d: first event should be at time 0\n", + state->config->script_path, + state->event->line_number); + return STATUS_ERR; + } + } else { + /* Move to the next event. */ + state->script_last_time_usecs = state->event->time_usecs; + state->last_event = state->event; + state->event = state->event->next; + } + + if (state->event == NULL) + return STATUS_OK; /* script is done */ + + assert((state->event->type > INVALID_EVENT) && + (state->event->type < NUM_EVENT_TYPES)); + + if (state->last_event && + is_event_time_absolute(state->last_event) && + is_event_time_absolute(state->event) && + state->event->time_usecs < state->script_last_time_usecs) { + asprintf(error, + "%s:%d: time goes backward in script " + "from %lld usec to %lld usec\n", + state->config->script_path, + state->event->line_number, + state->script_last_time_usecs, + state->event->time_usecs); + return STATUS_ERR; + } + return STATUS_OK; +} + +/* Run the given packet event; print warnings/errors, and exit on error. */ +static void run_local_packet_event(struct state *state, struct event *event, + struct packet *packet) +{ + char *error = NULL; + int result = STATUS_OK; + + result = run_packet_event(state, event, packet, &error); + if (result == STATUS_WARN) { + fprintf(stderr, "%s", error); + free(error); + } else if (result == STATUS_ERR) { + die("%s", error); + } +} + +/* For more consistent timing, if there's more than one CPU on this + * machine then use a real-time priority. We skip this if there's only + * 1 CPU because we do not want to risk making the machine + * unresponsive. + */ +void set_scheduling_priority(void) +{ + /* Get the CPU count and skip this if we only have 1 CPU. */ + int num_cpus = sysconf(_SC_NPROCESSORS_ONLN); + if (num_cpus < 0) + die_perror("sysconf(_SC_NPROCESSORS_ONLN)"); + if (num_cpus <= 1) + return; + +#if !defined(__OpenBSD__) + /* Chose a real-time policy, but use SCHED_RR instead of + * SCHED_FIFO, so that we round-robin among real-time threads + * of the same priority. In practice this shouldn't matter, + * since there will not likely be other realtime threads. + */ + int policy = SCHED_RR; + + /* Use the minimum priority, to be nice. */ + int priority = sched_get_priority_min(policy); + if (priority < 0) + die_perror("sched_get_priority_min"); + + /* Set the policy and priority for our threads. */ + struct sched_param param; + memset(¶m, 0, sizeof(param)); + param.sched_priority = priority; + if (sched_setscheduler(0, policy, ¶m)) + DEBUGP("sched_setscheduler failed: %s\n", strerror(errno)); +#endif /* !defined(__OpenBSD__) */ +} + +/* To ensure timing that's as consistent as possible, pull all our + * pages to RAM and pin them there. + */ +void lock_memory(void) +{ + if (mlockall(MCL_CURRENT | MCL_FUTURE)) + die_perror("lockall(MCL_CURRENT | MCL_FUTURE)"); +} + +/* Wait for and return the wall time at which we should start the + * test, in microseconds. To make test results more reproducible, we + * wait for a start time that is well into the middle of a Linux jiffy + * (JIFFY_OFFSET_USECS into the jiffy). If you try to run a test + * script starting at a time that is too near the edge of a jiffy, and + * the test tries (as most do) to schedule events at 1-millisecond + * boundaries relative to this start time, then slight CPU or + * scheduling variations cause the kernel to record time measurements + * that are 1 jiffy too big or too small, so the kernel gets + * unexpected RTT and RTT variance values, leading to unexpected RTO + * and delayed ACK timer behavior. + * + * To try to find the edge of a jiffy, we spin and watch the output of + * times(2), which increments every time the jiffies clock has + * advanced another 10ms. We wait for a few ticks + * (TARGET_JIFFY_TICKS) to go by, to reduce noise from warm-up + * effects. We could do fancier measuring and filtering here, but so + * far this level of complexity seems sufficient. + */ +static s64 schedule_start_time_usecs(struct state *state) +{ +#ifdef linux + s64 start_usecs = 0; + clock_t last_jiffies = times(NULL); + int jiffy_ticks = 0; + const int TARGET_JIFFY_TICKS = 10; + while (jiffy_ticks < TARGET_JIFFY_TICKS) { + clock_t jiffies = times(NULL); + if (jiffies != last_jiffies) { + start_usecs = now_usecs(state); + ++jiffy_ticks; + } + last_jiffies = jiffies; + } + const int JIFFY_OFFSET_USECS = 250; + start_usecs += JIFFY_OFFSET_USECS; + return start_usecs; +#else + return now_usecs(state); +#endif +} + +/* Run final command we always execute at end of script, to clean up. If there + * is a cleanup command at the end of a packetdrill script, we execute that no + * matter whether the test passes or fails. This makes the cleanup command a + * good place to undo any sysctl settings the script changed, for example. + */ +int run_cleanup_command(void) +{ + if (cleanup_cmd != NULL && init_cmd_exed) { + char *error = NULL; + + if (safe_system(cleanup_cmd, &error)) { + fprintf(stderr, + "%s: error executing cleanup command: %s\n", + script_path, error); + free(error); + return STATUS_ERR; + } + } + return STATUS_OK; +} + +void run_script(struct config *config, struct script *script) +{ + char *error = NULL; + struct state *state = NULL; + struct netdev *netdev = NULL; + struct event *event = NULL; + + DEBUGP("run_script: running script\n"); + + set_scheduling_priority(); + lock_memory(); + + /* This interpreter loop runs for local mode or wire client mode. */ + assert(!config->is_wire_server); + + script_path = config->script_path; + + /* How we use the network is of course a little different in + * each of the two cases.... + */ + if (config->is_wire_client) + netdev = wire_client_netdev_new(config); + else if (config->so_filename) + netdev = so_netdev_new(config); + else + netdev = local_netdev_new(config); + + state = state_new(config, script, netdev); + + if (config->is_wire_client) { + state->wire_client = wire_client_new(); + wire_client_init(state->wire_client, config, script); + } + + if (config->so_filename) { + state->so_instance = so_instance_new(); + so_instance_init(state->so_instance, config, script, state); + } + + init_cmd_exed = false; + if (script->init_command != NULL) { + if (safe_system(script->init_command->command_line, + &error)) { + asprintf(&error, "%s: error executing init command: %s\n", + config->script_path, error); + free(error); + exit(EXIT_FAILURE); + } + init_cmd_exed = true; + } + + signal(SIGPIPE, SIG_IGN); /* ignore EPIPE */ + + state->live_start_time_usecs = schedule_start_time_usecs(state); + DEBUGP("live_start_time_usecs is %lld\n", + state->live_start_time_usecs); + + if (state->wire_client != NULL) + wire_client_send_client_starting(state->wire_client); + + while (1) { + if (get_next_event(state, &error)) + die("%s", error); + event = state->event; + if (event == NULL) + break; + + if (state->wire_client != NULL) + wire_client_next_event(state->wire_client, event); + + /* In wire mode, we adjust relative times after + * getting notification that previous packet events + * have completed, if any. + */ + adjust_relative_event_times(state, event); + + switch (event->type) { + case PACKET_EVENT: + /* For wire clients, the server handles packets. */ + if (!config->is_wire_client) { + run_local_packet_event(state, event, + event->event.packet); + } + break; + case SYSCALL_EVENT: + run_system_call_event(state, event, + event->event.syscall); + break; + case COMMAND_EVENT: + run_command_event(state, event, + event->event.command); + break; + case CODE_EVENT: + run_code_event(state, event, + event->event.code->text); + break; + case INVALID_EVENT: + case NUM_EVENT_TYPES: + assert(!"bogus type"); + break; + /* We omit default case so compiler catches missing values. */ + } + state->num_events++; + } + + /* Wait for any outstanding packet events we requested on the server. */ + if (state->wire_client != NULL) + wire_client_next_event(state->wire_client, NULL); + + if (run_cleanup_command() == STATUS_ERR) + exit(EXIT_FAILURE); + + if (code_execute(state->code, &error)) { + die("%s: error executing code: %s\n", + state->config->script_path, error); + free(error); + } + + state_free(state); + + DEBUGP("run_script: done running\n"); +} + +int parse_script_and_set_config(int argc, char *argv[], + struct config *config, + struct script *script, + const char *script_path, + const char *script_buffer) +{ + struct invocation invocation = { + .argc = argc, + .argv = argv, + .config = config, + .script = script, + }; + + DEBUGP("parse_and_run_script: %s\n", script_path); + assert(script_path != NULL); + + init_script(script); + + set_default_config(config); + config->script_path = strdup(script_path); + + if (script_buffer != NULL) + copy_script(script_buffer, script); + else + read_script(script_path, script); + + return parse_script(config, script, &invocation); +} -- cgit 1.2.3-korg