From 78c896b3b3127515478090c19447e27dc406427e Mon Sep 17 00:00:00 2001 From: Jianfeng Tan Date: Mon, 18 Nov 2019 06:59:50 +0000 Subject: TLDKv2 Signed-off-by: Jianfeng Tan Signed-off-by: Jielong Zhou Signed-off-by: Jian Zhang Signed-off-by: Chen Zhao Change-Id: I55c39de4c6cd30f991f35631eb507f770230f08e --- test/packetdrill/run_system_call.c | 3561 ++++++++++++++++++++++++++++++++++++ 1 file changed, 3561 insertions(+) create mode 100644 test/packetdrill/run_system_call.c (limited to 'test/packetdrill/run_system_call.c') diff --git a/test/packetdrill/run_system_call.c b/test/packetdrill/run_system_call.c new file mode 100644 index 0000000..8c70a27 --- /dev/null +++ b/test/packetdrill/run_system_call.c @@ -0,0 +1,3561 @@ +/* + * Copyright 2013 Google Inc. + * Copyright 2016 Red Hat Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ +/* + * Author: ncardwell@google.com (Neal Cardwell) + * + * A module to execute a system call from a test script. + */ + +#include "run_system_call.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "assert.h" +#include "file.h" +#include "epoll.h" +#include "pipe.h" +#include "logging.h" +#include "run.h" +#include "script.h" +#include "icmp.h" +#include "icmpv6.h" +#include "capability.h" + +static int to_live_fd(struct state *state, int script_fd, int *live_fd, + char **error); + +static int syscall_icmp_sendto(struct state *state, + struct syscall_spec *syscall, + struct expression_list *args, char **error); + +/* Provide a wrapper for the Linux gettid() system call (glibc does not). */ +static pid_t gettid(void) +{ +#ifdef linux + return syscall(__NR_gettid); +#endif +#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) + /* TODO(ncardwell): Implement me. XXX */ + return 0; +#endif /* defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)*/ +} + +/* Read a whole file into the given buffer of the given length. */ +static void read_whole_file(const char *path, char *buffer, int max_bytes) +{ + int fd = open(path, O_RDONLY); + if (fd < 0) + die_perror("open"); + + int bytes = read(fd, buffer, max_bytes); + if (bytes < 0) + die_perror("read"); + else if (bytes == max_bytes) + die("%s file too large to read\n", path); + + if (close(fd) < 0) + die_perror("close"); +} + +/* Return true iff the given thread is sleeping. */ +static bool is_thread_sleeping(pid_t process_id, pid_t thread_id) +{ + /* Read the entire thread state file, using the buffer size ps uses. */ + char *proc_path = NULL; + asprintf(&proc_path, "/proc/%d/task/%d/stat", process_id, thread_id); + const int STATE_BUFFER_BYTES = 1023; + char *state = calloc(STATE_BUFFER_BYTES, 1); + read_whole_file(proc_path, state, STATE_BUFFER_BYTES - 1); + state[STATE_BUFFER_BYTES - 1] = '\0'; + + /* Parse the thread state from the third space-delimited field. */ + const int THREAD_STATE_INDEX = 3; + const char *field = state; + int i = 0; + for (i = 0; i < THREAD_STATE_INDEX - 1; i++) { + field = strchr(field, ' '); + if (field == NULL) + die("unable to parse %s\n", proc_path); + ++field; + } + bool is_sleeping = (field[0] == 'S'); + + free(proc_path); + free(state); + + return is_sleeping; +} + +/* Returns number of expressions in the list. */ +static int expression_list_length(struct expression_list *list) +{ + int count = 0; + while (list != NULL) { + list = list->next; + ++count; + } + return count; +} + +static int get_arg_count(struct expression_list *args) +{ + return expression_list_length(args); +} + +/* Verify that the expression list has the expected number of + * expressions. Returns STATUS_OK on success; on failure returns + * STATUS_ERR and sets error message. + */ +static int check_arg_count(struct expression_list *args, int expected, + char **error) +{ + assert(expected >= 0); + int actual = get_arg_count(args); + if (actual != expected) { + asprintf(error, "Expected %d args but got %d", expected, + actual); + return STATUS_ERR; + } + return STATUS_OK; +} + +/* Returns the argument with the given index. Returns the argument on + * success; on failure returns NULL and sets error message. + */ +static struct expression *get_arg(struct expression_list *args, + int index, char **error) +{ + assert(index >= 0); + int current = 0; + while ((args != NULL) && (current < index)) { + args = args->next; + ++current; + } + if ((args != NULL) && (current == index)) { + return args->expression; + } else { + asprintf(error, "Argument list too short"); + return NULL; + } +} + +/* Return STATUS_OK if the expression is of the expected + * type. Otherwise fill in the error with a human-readable error + * message about the mismatch and return STATUS_ERR. + */ +static int check_type(const struct expression *expression, + enum expression_t expected_type, + char **error) +{ + if (expression->type == expected_type) { + return STATUS_OK; + } else { + asprintf(error, "Bad type; actual: %s expected: %s", + expression_type_to_string(expression->type), + expression_type_to_string(expected_type)); + return STATUS_ERR; + } +} + +/* Sets the value from the expression argument, checking that it is a + * valid s32 or u32, and matches the expected type. Returns STATUS_OK on + * success; on failure returns STATUS_ERR and sets error message. + */ +static int get_s32(struct expression *expression, + s32 *value, char **error) +{ + if (check_type(expression, EXPR_INTEGER, error)) + return STATUS_ERR; + if ((expression->value.num > UINT_MAX) || + (expression->value.num < INT_MIN)) { + asprintf(error, + "Value out of range for 32-bit integer: %lld", + expression->value.num); + return STATUS_ERR; + } + *value = expression->value.num; + return STATUS_OK; +} + +/* Sets the value from the expression argument, checking that it matches the + * expected type. Returns STATUS_OK on success; on failure returns STATUS_ERR + * and sets error message. + */ +static int get_s64(struct expression *expression, + s64 *value, char **error) +{ + if (check_type(expression, EXPR_INTEGER, error)) + return STATUS_ERR; + *value = expression->value.num; + return STATUS_OK; +} + +/* Return the value of the argument with the given index, and verify + * that it has the expected type. + */ +static int s32_arg(struct expression_list *args, + int index, s32 *value, char **error) +{ + struct expression *expression = get_arg(args, index, error); + if (expression == NULL) + return STATUS_ERR; + return get_s32(expression, value, error); +} + +/* Return the value of the argument with the given index, and verify + * that it has the expected type. + */ +static int s64_arg(struct expression_list *args, + int index, s64 *value, char **error) +{ + struct expression *expression = get_arg(args, index, error); + if (expression == NULL) + return STATUS_ERR; + return get_s64(expression, value, error); +} + +/* Return the value of the argument with the given index, and verify + * that it has the expected type: a list with a single integer. + */ +static int bracketed_arg(struct expression_list *args, + int index, struct expression **elt, char **error) +{ + struct expression_list *list; + struct expression *expression; + + *elt = NULL; + expression = get_arg(args, index, error); + if (expression == NULL) + return STATUS_ERR; + if (check_type(expression, EXPR_LIST, error)) + return STATUS_ERR; + list = expression->value.list; + if (expression_list_length(list) != 1) { + asprintf(error, + "Expected [] but got multiple elements"); + return STATUS_ERR; + } + *elt = list->expression; + return STATUS_OK; +} + +/* Return the value of the argument with the given index, and verify + * that it has the expected type: a list with a single s32. + */ +static int s32_bracketed_arg(struct expression_list *args, + int index, s32 *value, char **error) +{ + struct expression *expression = NULL; + + if (bracketed_arg(args, index, &expression, error)) + return STATUS_ERR; + return get_s32(expression, value, error); +} + +/* Return the value of the argument with the given index, and verify + * that it has the expected type: a list with a single s64. + */ +static int s64_bracketed_arg(struct expression_list *args, + int index, s64 *value, char **error) +{ + struct expression *expression = NULL; + + if (bracketed_arg(args, index, &expression, error)) + return STATUS_ERR; + return get_s64(expression, value, error); +} + +/* Return STATUS_OK iff the argument with the given index is an + * ellipsis (...). + */ +static int ellipsis_arg(struct expression_list *args, int index, char **error) +{ + struct expression *expression = get_arg(args, index, error); + if (expression == NULL) + return STATUS_ERR; + if (check_type(expression, EXPR_ELLIPSIS, error)) + return STATUS_ERR; + return STATUS_OK; +} + +/* Free all the space used by the given iovec. */ +static void iovec_free(struct iovec *iov, size_t iov_len) +{ + int i; + + if (iov == NULL) + return; + + for (i = 0; i < iov_len; ++i) + free(iov[i].iov_base); + free(iov); +} + +/* Allocate and fill in an iovec described by the given expression. + * Return STATUS_OK if the expression is a valid iovec. Otherwise + * fill in the error with a human-readable error message and return + * STATUS_ERR. + */ +static int iovec_new(struct expression *expression, + struct iovec **iov_ptr, size_t *iov_len_ptr, + char **error) +{ + int status = STATUS_ERR; + int i; + struct expression_list *list; /* input expression from script */ + size_t iov_len = 0; + struct iovec *iov = NULL; /* live output */ + + if (check_type(expression, EXPR_LIST, error)) + goto error_out; + + list = expression->value.list; + + iov_len = expression_list_length(list); + iov = calloc(iov_len, sizeof(struct iovec)); + + for (i = 0; i < iov_len; ++i, list = list->next) { + size_t len; + struct iovec_expr *iov_expr; + + if (check_type(list->expression, EXPR_IOVEC, error)) + goto error_out; + + iov_expr = list->expression->value.iovec; + + assert(iov_expr->iov_base->type == EXPR_ELLIPSIS); + assert(iov_expr->iov_len->type == EXPR_INTEGER); + + len = iov_expr->iov_len->value.num; + + iov[i].iov_len = len; + iov[i].iov_base = calloc(len, 1); + } + + status = STATUS_OK; + +error_out: + *iov_ptr = iov; + *iov_len_ptr = iov_len; + return status; +} + +static bool sendcall_may_free(struct state *state) +{ + return !state->config->send_omit_free; +} + +static void sendcall_free(struct state *state, void *ptr) +{ + if (sendcall_may_free(state)) + free(ptr); +} + +static inline int list_length(struct expression_list *list) +{ + int length = 0; + while (list) { + length++; + list = list->next; + } + return length; +} + +int add_nla(void *dst, int type, int len, const void *data) +{ + struct nlattr *nla = (struct nlattr *) dst; + int attr_size = NLA_HDRLEN + len; + int total_size = NLA_ALIGN(attr_size); + + nla->nla_type = type; + nla->nla_len = attr_size; + memcpy(dst + NLA_HDRLEN, data, len); + memset(dst + attr_size, 0, total_size - attr_size); + + return total_size; +} + +/* Returns whether the NLA value is valid. */ +static bool nla_value_is_valid(enum expression_t type) +{ + return type == EXPR_INTEGER || type == EXPR_ELLIPSIS; +} + +#define OPT_NLA_IGNORE_VAL (~0U) +#define OPT_NLA_IGNORE_VAL_U32 ((u32) OPT_NLA_IGNORE_VAL) +#define OPT_NLA_IGNORE_VAL_U8 ((u8) OPT_NLA_IGNORE_VAL) + +/* Fills in the value of a TLV expression. */ +static void get_nla_value(const struct expression *expr, void *out_buf, + int num_bytes) +{ + u64 val; + + val = (expr->type == EXPR_INTEGER) ? expr->value.num + : OPT_NLA_IGNORE_VAL; + memcpy(out_buf, &val, num_bytes); +} + +/* Fill in the expected values of from 'expr', which is a list of binary + * expressions of the form: key = val. + */ +static int nla_expr_list_to_nla(struct expression_list *list, + void *dst, int *len, + struct nla_type_info *nla_info, char **error) +{ + struct expression *element, *key, *value; + void *start = dst; + u64 val; /* each value uses some prefix of this space */ + s64 key_num, val_num; + int num_bytes; + + for (; list; list = list->next) { + element = list->expression; + + if (check_type(element, EXPR_BINARY, error)) + return STATUS_ERR; + + if (strcmp("=", element->value.binary->op) != 0) + return STATUS_ERR; + + key = element->value.binary->lhs; + value = element->value.binary->rhs; + if (check_type(key, EXPR_INTEGER, error)) + return STATUS_ERR; + if (!nla_value_is_valid(value->type)) { + asprintf(error, + "values must be numeric or ellipsis"); + return STATUS_ERR; + } + + key_num = key->value.num; + val_num = value->value.num; + num_bytes = nla_info[key_num].length; + if (num_bytes == sizeof(u8) && + value->type == EXPR_INTEGER && !is_valid_u8(val_num)) + die("out of bound u8 value specified\n"); + else if (num_bytes == sizeof(u32) && + value->type == EXPR_INTEGER && !is_valid_u32(val_num)) + die("out of bound u32 value specified\n"); + + get_nla_value(value, &val, num_bytes); + dst += add_nla(dst, key_num, nla_info[key_num].length, &val); + } + + *len = dst - start; + return STATUS_OK; +} + +/* Fill in the values of sock_extended_err structure from the expression. */ +static int new_extended_err(const struct sock_extended_err_expr *expr, + struct sock_extended_err *ee, char **error) +{ + if (get_s32(expr->ee_errno, (s32 *)&ee->ee_errno, error)) + return STATUS_ERR; + if (get_s32(expr->ee_origin, (s32 *)&ee->ee_origin, error)) + return STATUS_ERR; + if (get_s32(expr->ee_type, (s32 *)&ee->ee_type, error)) + return STATUS_ERR; + if (get_s32(expr->ee_code, (s32 *)&ee->ee_code, error)) + return STATUS_ERR; + if (get_s32(expr->ee_info, (s32 *)&ee->ee_info, error)) + return STATUS_ERR; + if (get_s32(expr->ee_data, (s32 *)&ee->ee_data, error)) + return STATUS_ERR; + + return STATUS_OK; +} + +/* Info for various TCP NLAs */ +struct nla_type_info tcp_nla[] = { + [_TCP_NLA_PAD] = {"TCP_NLA_PAD", sizeof(u32)}, + [_TCP_NLA_BUSY] = {"TCP_NLA_BUSY", sizeof(u64)}, + [_TCP_NLA_RWND_LIMITED] = {"TCP_NLA_RWND_LIMITED", sizeof(u64)}, + [_TCP_NLA_SNDBUF_LIMITED] = {"TCP_NLA_SNDBUF_LIMITED", sizeof(u64)}, + [_TCP_NLA_DATA_SEGS_OUT] = {"TCP_NLA_DATA_SEGS_OUT", sizeof(u64)}, + [_TCP_NLA_TOTAL_RETRANS] = {"TCP_NLA_TOTAL_RETRANS", sizeof(u64)}, + [_TCP_NLA_PACING_RATE] = {"TCP_NLA_PACING_RATE", sizeof(u64)}, + [_TCP_NLA_DELIVERY_RATE] = {"TCP_NLA_DELIVERY_RATE", sizeof(u64)}, + [_TCP_NLA_SND_CWND] = {"TCP_NLA_SND_CWND", sizeof(u32)}, + [_TCP_NLA_REORDERING] = {"TCP_NLA_REORDERING", sizeof(u32)}, + [_TCP_NLA_MIN_RTT] = {"TCP_NLA_MIN_RTT", sizeof(u32)}, + [_TCP_NLA_RECUR_RETRANS] = {"TCP_NLA_RECUR_RETRANS", sizeof(u8)}, + [_TCP_NLA_DELIVERY_RATE_APP_LMT] = {"TCP_NLA_DELIVERY_RATE_APP_LMT", + sizeof(u8)}, + [_TCP_NLA_SNDQ_SIZE] = {"TCP_NLA_SNDQ_SIZE", sizeof(u32)}, + [_TCP_NLA_CA_STATE] = {"TCP_NLA_CA_STATE", sizeof(u8)}, +}; + +/* Allocate and fill a msg_control described by the given expression. + * Return STATUS_OK if the expression is a valid msg_control. + * Otherwise fill in the error with a human-readable error message and + * return STATUS_ERR. + */ +static int cmsg_new(const struct expression *expr, struct msghdr *msg, + char **error) +{ + int status = STATUS_ERR; + int len, sum = 0; + const struct expression_list *list; + const struct cmsg_expr *cmsg_expr; + struct sock_extended_err_expr *ee_expr; + struct expression_list *stats_expr; + struct cmsghdr *cmsg; + void *data; + + assert(expr->type == EXPR_LIST); + + msg->msg_control = calloc(1, MSGHDR_MAX_CONTROLLEN); + msg->msg_controllen = MSGHDR_MAX_CONTROLLEN; + + cmsg = CMSG_FIRSTHDR(msg); + + for (list = expr->value.list; list; list = list->next) { + expr = list->expression; + if (check_type(expr, EXPR_CMSG, error)) + goto error_out; + + cmsg_expr = expr->value.cmsg; + if (get_s32(cmsg_expr->cmsg_level, &cmsg->cmsg_level, error)) + goto error_out; + if (get_s32(cmsg_expr->cmsg_type, &cmsg->cmsg_type, error)) + goto error_out; + + data = CMSG_DATA(cmsg); + + switch (cmsg_expr->cmsg_data->type) { + case EXPR_INTEGER: + len = sizeof(int); + if (get_s32(cmsg_expr->cmsg_data, data, error)) + goto error_out; + break; + + case EXPR_SCM_TIMESTAMPING: + len = sizeof(struct scm_timestamping); + memcpy(data, + cmsg_expr->cmsg_data->value.scm_timestamping, + len); + break; + + case EXPR_LIST: + stats_expr = cmsg_expr->cmsg_data->value.list; + if (nla_expr_list_to_nla(stats_expr, data, &len, + tcp_nla, error)) + goto error_out; + break; + + case EXPR_SOCK_EXTENDED_ERR: + /* ip(v6)_recv_error returns a struct defined in + * function scope that appends a sockaddr. + */ + len = sizeof(struct sock_extended_err); + if (cmsg->cmsg_level == SOL_IP) + len += sizeof(struct sockaddr_in); + else + len += sizeof(struct sockaddr_in6); + + ee_expr = cmsg_expr->cmsg_data->value.sock_extended_err; + if (new_extended_err(ee_expr, + (struct sock_extended_err *)data, + error)) + goto error_out; + break; + + default: + asprintf(error, "Unrecognized type for cmsg_data"); + goto error_out; + } + + cmsg->cmsg_len = CMSG_LEN(len); + sum += CMSG_SPACE(len); + + cmsg = CMSG_NXTHDR(msg, cmsg); + } + + status = STATUS_OK; + +error_out: + msg->msg_controllen = sum; + + return status; +} + +/* Check if the sock_extended_err structure is the same as expected. */ +static bool sock_ee_expect_eq(struct sock_extended_err *expected, + struct sock_extended_err *actual, int index, + char **error) { + if (actual->ee_errno != expected->ee_errno) { + asprintf(error, + "Bad errno in extended err %d: " + "expected=%u actual=%u", + index, expected->ee_errno, actual->ee_errno); + return false; + } + if (actual->ee_origin != expected->ee_origin) { + asprintf(error, + "Bad origin in extended err %d: " + "expected=%u actual=%u", + index, expected->ee_origin, actual->ee_origin); + return false; + } + if (actual->ee_type != expected->ee_type) { + asprintf(error, + "Bad type in extended err %d: " + "expected=%u actual=%u", + index, expected->ee_type, actual->ee_type); + return false; + } + if (actual->ee_code != expected->ee_code) { + asprintf(error, + "Bad code in extended err %d: " + "expected=%u actual=%u", + index, expected->ee_code, actual->ee_code); + return false; + } + if (actual->ee_info != expected->ee_info) { + asprintf(error, + "Bad info in extended err %d: " + "expected=%u actual=%u", + index, expected->ee_info, actual->ee_info); + return false; + } + if (actual->ee_data != expected->ee_data) { + asprintf(error, + "Bad data in extended err %d: " + "expected=%u actual=%u", + index, expected->ee_data, actual->ee_data); + return false; + } + return true; +} + +/* Convert a timespec to usecs. */ +static s64 timespec_to_usecs(struct timespec *ts) +{ + if (ts == NULL) + return -1; + return (s64)ts->tv_sec * 1000000 + ts->tv_nsec / 1000; +} + +/* Check if the scm_timestamping is the same as expected. */ +static bool scm_timestamping_expect_eq(struct state *state, + struct scm_timestamping *expected, + struct scm_timestamping *actual, + int index, char **error) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(expected->ts); i++) { + /* ignore the timestamps, if expected is 0. */ + if (!expected->ts[i].tv_sec && !expected->ts[i].tv_nsec) + continue; + + s64 exp_usecs = script_time_to_live_time_usecs(state, + timespec_to_usecs(&expected->ts[i])); + s64 actual_usecs = timespec_to_usecs(&actual->ts[i]); + /* difference exceeds configured timing tolerance */ + if (llabs(exp_usecs - actual_usecs) > + state->config->tolerance_usecs) { + asprintf(error, + "Bad timestamp %d in scm_timestamping %d: " + "expected=%lld (%lld) actual=%lld (%lld) " + "start=%lld", + i, index, + exp_usecs, + exp_usecs - state->live_start_time_usecs, + actual_usecs, + actual_usecs - state->live_start_time_usecs, + state->live_start_time_usecs); + return false; + } + } + return true; +} + +/* Check the stats of SCM_TIMESTAMPING_OPT_STATS */ +static bool scm_opt_stats_expect_eq(struct state *state, + void *expected, + void *actual, + int len, + int index, char **error) +{ + int tolerance_us = state->config->tolerance_usecs; + int offset = 0; + u64 ev, av; + u32 ev_u32, av_u32; + u8 ev_u8, av_u8; + + while (offset < len) { + struct nlattr *enla = (struct nlattr *) (expected + offset); + struct nlattr *anla = (struct nlattr *) (actual + offset); + + if (enla->nla_type != anla->nla_type) { + asprintf(error, + "Bad nla_type %d: " + "expected=%u actual=%u", index, + enla->nla_type, anla->nla_type); + return false; + } + + if (enla->nla_len != anla->nla_len) { + asprintf(error, + "Bad nla_len %d: " + "expected=%u actual=%u", index, + enla->nla_len, anla->nla_len); + return false; + } + + switch (enla->nla_type) { + case _TCP_NLA_BUSY: + case _TCP_NLA_RWND_LIMITED: + case _TCP_NLA_SNDBUF_LIMITED: + ev = *(u64 *) ((void *) enla + NLA_HDRLEN); + av = *(u64 *) ((void *) anla + NLA_HDRLEN); + + if (ev == OPT_NLA_IGNORE_VAL) { + break; + } else if (ev) { + if (llabs((s64)(ev - av)) <= tolerance_us) + break; + } else if (!av) { /* Be precise about 0s */ + break; + } + + asprintf(error, "Bad %s: expected=%llu actual=%llu", + tcp_nla[enla->nla_type].name, ev, av); + return false; + case _TCP_NLA_DATA_SEGS_OUT: + case _TCP_NLA_TOTAL_RETRANS: + case _TCP_NLA_PACING_RATE: + case _TCP_NLA_DELIVERY_RATE: + ev = *(u64 *) ((void *) enla + NLA_HDRLEN); + av = *(u64 *) ((void *) anla + NLA_HDRLEN); + if (ev == av || ev == OPT_NLA_IGNORE_VAL) + break; + + asprintf(error, "Bad %s: expected=%llu actual=%llu", + tcp_nla[enla->nla_type].name, ev, av); + return false; + case _TCP_NLA_SND_CWND: + case _TCP_NLA_REORDERING: + case _TCP_NLA_MIN_RTT: + case _TCP_NLA_SNDQ_SIZE: + ev_u32 = *(u32 *) ((void *) enla + NLA_HDRLEN); + av_u32 = *(u32 *) ((void *) anla + NLA_HDRLEN); + if (ev_u32 == av_u32 || + ev_u32 == OPT_NLA_IGNORE_VAL_U32) + break; + + asprintf(error, "Bad %s: expected=%u actual=%u", + tcp_nla[enla->nla_type].name, ev_u32, av_u32); + return false; + + case _TCP_NLA_RECUR_RETRANS: + case _TCP_NLA_DELIVERY_RATE_APP_LMT: + case _TCP_NLA_CA_STATE: + ev_u8 = *(u8 *) ((void *) enla + NLA_HDRLEN); + av_u8 = *(u8 *) ((void *) anla + NLA_HDRLEN); + if (ev_u8 == av_u8 || + ev_u8 == OPT_NLA_IGNORE_VAL_U8) + break; + + asprintf(error, "Bad %s: expected=%u actual=%u", + tcp_nla[enla->nla_type].name, ev_u8, av_u8); + return false; + + default: + return false; + } + + offset += NLA_ALIGN(enla->nla_len); + } + + return true; +} + +/* Check if the cmsg in actual is the same as the one in expected. */ +static bool cmsg_expect_eq(struct state *state, struct msghdr *expect, + struct msghdr *actual, char **error) +{ + int i = 0; + const size_t hdr_len = CMSG_ALIGN(sizeof(struct cmsghdr)); + struct cmsghdr *acm = NULL, *ecm = NULL; + void *adata = NULL, *edata = NULL; + + for (acm = CMSG_FIRSTHDR(actual), ecm = CMSG_FIRSTHDR(expect); + acm && ecm && acm->cmsg_len && ecm->cmsg_len; + acm = CMSG_NXTHDR(actual, acm), ecm = CMSG_NXTHDR(expect, ecm), + i++) { + if (acm->cmsg_level != ecm->cmsg_level) { + asprintf(error, + "Bad level in cmsg %d: expected=%d actual=%d", + i, ecm->cmsg_level, acm->cmsg_level); + return false; + } + if (acm->cmsg_type != ecm->cmsg_type) { + asprintf(error, + "Bad type in cmsg %d: expected=%d actual=%d", + i, ecm->cmsg_type, acm->cmsg_type); + return false; + } + if (acm->cmsg_len != ecm->cmsg_len) { + asprintf(error, + "Bad len in cmsg %d: expected=%lu actual=%lu", + i, ecm->cmsg_len, acm->cmsg_len); + return false; + } + + edata = CMSG_DATA(ecm); + adata = CMSG_DATA(acm); + if (!edata && !adata) + continue; + + if (!edata) { + asprintf(error, + "Bad data in cmsg %d: " + "expected is null, actual is not null", i); + return false; + } else if (!adata) { + asprintf(error, + "Bad data in cmsg %d: " + "expected is not null, actual is null", i); + return false; + } + + if ((acm->cmsg_level == SOL_IP && + acm->cmsg_type == IP_RECVERR) || + (acm->cmsg_level == SOL_IPV6 && + acm->cmsg_type == IPV6_RECVERR)) { + struct sock_extended_err *eee = edata; + struct sock_extended_err *aee = adata; + if (!sock_ee_expect_eq(eee, aee, i, error)) + return false; + } else if (acm->cmsg_level == SOL_SOCKET && + acm->cmsg_type == SCM_TIMESTAMPING) { + struct scm_timestamping *ets = edata; + struct scm_timestamping *ats = adata; + if (!scm_timestamping_expect_eq(state, ets, ats, i, + error)) + return false; + } else if (acm->cmsg_level == SOL_SOCKET && + acm->cmsg_type == SCM_TIMESTAMPING_OPT_STATS) { + if (!scm_opt_stats_expect_eq(state, edata, adata, + acm->cmsg_len - hdr_len, + i, error)) + return false; + } else if (memcmp((char *)adata, /* byte-to-byte */ + (char *)edata, acm->cmsg_len - hdr_len)) { + asprintf(error, + "Bad data in cmsg %d: expected=%s actual=%s", + i, (char *)edata, (char *)adata); + return false; + } + } + + if (!acm && !ecm) + return true; + if (acm && !ecm) { + asprintf(error, "received more than %d cmsgs", i); + return false; + } + if (!acm && ecm) { + asprintf(error, "received only %d cmsgs", i); + return false; + } + asprintf(error, "cmsgs do not match"); + return false; +} + +/* Free all the space used by the given msghdr. */ +static void msghdr_free(struct msghdr *msg, size_t iov_len) +{ + if (msg == NULL) + return; + + free(msg->msg_name); + iovec_free(msg->msg_iov, iov_len); + free(msg->msg_control); +} + +/* Allocate and fill in a msghdr described by the given expression. */ +static int msghdr_new(struct expression *expression, + struct msghdr **msg_ptr, size_t *iov_len_ptr, + char **error) +{ + int status = STATUS_ERR; + s32 s32_val = 0; + struct msghdr_expr *msg_expr; /* input expression from script */ + socklen_t name_len = sizeof(struct sockaddr_storage); + struct msghdr *msg = NULL; /* live output */ + + if (check_type(expression, EXPR_MSGHDR, error)) + goto error_out; + + msg_expr = expression->value.msghdr; + + msg = calloc(1, sizeof(struct msghdr)); + + if (msg_expr->msg_name != NULL) { + assert(msg_expr->msg_name->type == EXPR_ELLIPSIS); + msg->msg_name = calloc(1, name_len); + } + + if (msg_expr->msg_namelen != NULL) { + assert(msg_expr->msg_namelen->type == EXPR_ELLIPSIS); + msg->msg_namelen = name_len; + } + + if (msg_expr->msg_iov != NULL) { + if (iovec_new(msg_expr->msg_iov, &msg->msg_iov, iov_len_ptr, + error)) + goto error_out; + } + + if (msg_expr->msg_iovlen != NULL) { + if (get_s32(msg_expr->msg_iovlen, &s32_val, error)) + goto error_out; + msg->msg_iovlen = s32_val; + } + + if (msg->msg_iovlen != *iov_len_ptr) { + asprintf(error, + "msg_iovlen %d does not match %d-element iovec array", + (int)msg->msg_iovlen, (int)*iov_len_ptr); + goto error_out; + } + + if (msg_expr->msg_control != NULL) { + if (cmsg_new(msg_expr->msg_control, msg, error)) + goto error_out; + } + + if (msg_expr->msg_flags != NULL) { + if (get_s32(msg_expr->msg_flags, &s32_val, error)) + goto error_out; + msg->msg_flags = s32_val; + } + + status = STATUS_OK; + +error_out: + *msg_ptr = msg; + return status; +} + +/* Allocate and fill in a pollfds array described by the given + * fds_expression. Return STATUS_OK if the expression is a valid + * pollfd struct array. Otherwise fill in the error with a + * human-readable error message and return STATUS_ERR. + */ +static int pollfds_new(struct state *state, + struct expression *fds_expression, + struct pollfd **fds_ptr, size_t *fds_len_ptr, + char **error) +{ + int status = STATUS_ERR; + int i; + struct expression_list *list; /* input expression from script */ + size_t fds_len = 0; + struct pollfd *fds = NULL; /* live output */ + + if (check_type(fds_expression, EXPR_LIST, error)) + goto error_out; + + list = fds_expression->value.list; + + fds_len = expression_list_length(list); + fds = calloc(fds_len, sizeof(struct pollfd)); + + for (i = 0; i < fds_len; ++i, list = list->next) { + struct pollfd_expr *fds_expr; + + if (check_type(list->expression, EXPR_POLLFD, error)) + goto error_out; + + fds_expr = list->expression->value.pollfd; + + if (check_type(fds_expr->fd, EXPR_INTEGER, error)) + goto error_out; + if (check_type(fds_expr->events, EXPR_INTEGER, error)) + goto error_out; + if (check_type(fds_expr->revents, EXPR_INTEGER, error)) + goto error_out; + + if (to_live_fd(state, fds_expr->fd->value.num, + &fds[i].fd, error)) + goto error_out; + + fds[i].events = fds_expr->events->value.num; + fds[i].revents = fds_expr->revents->value.num; + } + + status = STATUS_OK; + +error_out: + *fds_ptr = fds; + *fds_len_ptr = fds_len; + return status; +} + +/* Check the results of a poll() system call: check that the output + * revents fields in the fds array match those in the script. Return + * STATUS_OK if they match. Otherwise fill in the error with a + * human-readable error message and return STATUS_ERR. + */ +static int pollfds_check(struct expression *fds_expression, + const struct pollfd *fds, size_t fds_len, + char **error) +{ + struct expression_list *list; /* input expression from script */ + int i; + + assert(fds_expression->type == EXPR_LIST); + list = fds_expression->value.list; + + for (i = 0; i < fds_len; ++i, list = list->next) { + struct pollfd_expr *fds_expr; + int expected_revents, actual_revents; + + assert(list->expression->type == EXPR_POLLFD); + fds_expr = list->expression->value.pollfd; + + assert(fds_expr->fd->type == EXPR_INTEGER); + assert(fds_expr->events->type == EXPR_INTEGER); + assert(fds_expr->revents->type == EXPR_INTEGER); + + expected_revents = fds_expr->revents->value.num; + actual_revents = fds[i].revents; + if (actual_revents != expected_revents) { + char *expected_revents_string = + flags_to_string(poll_flags, + expected_revents); + char *actual_revents_string = + flags_to_string(poll_flags, + actual_revents); + asprintf(error, + "Expected revents of %s but got %s " + "for pollfd %d", + expected_revents_string, + actual_revents_string, + i); + free(expected_revents_string); + free(actual_revents_string); + return STATUS_ERR; + } + } + return STATUS_OK; +} + +/* For blocking system calls, give up the global lock and wake the + * main thread so it can continue test execution. Callers should call + * this function immediately before calling a system call in order to + * release the global lock immediately before a system call that the + * script expects to block. + */ +static void begin_syscall(struct state *state, struct syscall_spec *syscall) +{ + if (is_blocking_syscall(syscall)) { + assert(state->syscalls->state == SYSCALL_ENQUEUED); + state->syscalls->state = SYSCALL_RUNNING; + run_unlock(state); + DEBUGP("syscall thread: begin_syscall signals dequeued\n"); + if (pthread_cond_signal(&state->syscalls->dequeued) != 0) + die_perror("pthread_cond_signal"); + } +} + +/* Verify that the system call returned the expected result code and + * errno value. Returns STATUS_OK on success; on failure returns + * STATUS_ERR and sets error message. Callers should call this function + * immediately after returning from a system call in order to immediately + * re-grab the global lock if this is a blocking call. + */ +enum result_check_t { + CHECK_EXACT, /* check that result matches exactly */ + CHECK_FD, /* check that result is fd or matching error */ +}; +static int end_syscall(struct state *state, struct syscall_spec *syscall, + enum result_check_t mode, int actual, char **error) +{ + int actual_errno = errno; /* in case we clobber this later */ + s32 expected = 0; + + /* For blocking calls, advance state and reacquire the global lock. */ + if (is_blocking_syscall(syscall)) { + s64 live_end_usecs = now_usecs(state); + DEBUGP("syscall thread: end_syscall grabs lock\n"); + run_lock(state); + state->syscalls->live_end_usecs = live_end_usecs; + assert(state->syscalls->state == SYSCALL_RUNNING); + state->syscalls->state = SYSCALL_DONE; + } + if (state->config->verbose) { + printf("%s syscall: %9.6f\n", syscall->name, + usecs_to_secs(now_usecs(state))); + } + + + /* Compare actual vs expected return value */ + if (get_s32(syscall->result, &expected, error)) + return STATUS_ERR; + if (mode == CHECK_FD && expected >= 0) { + if (actual < 0) { + asprintf(error, + "Expected non-negative result but got %d " + "with errno %d (%s)", + actual, actual_errno, strerror(actual_errno)); + return STATUS_ERR; + } + } else if (mode == CHECK_FD || mode == CHECK_EXACT) { + if (actual != expected) { + asprintf(error, + "Expected result %d but got %d " + "with errno %d (%s)", + expected, + actual, actual_errno, strerror(actual_errno)); + return STATUS_ERR; + } + } else { + assert(!"bad mode"); + } + + /* Compare actual vs expected errno */ + if (syscall->error != NULL) { + s64 expected_errno = 0; + if (symbol_to_int(syscall->error->errno_macro, + &expected_errno, error)) + return STATUS_ERR; + if (actual_errno != expected_errno) { + asprintf(error, + "Expected errno %d (%s) but got %d (%s)", + (int)expected_errno, strerror(expected_errno), + actual_errno, strerror(actual_errno)); + return STATUS_ERR; + } + } + + return STATUS_OK; +} + +/* Return a pointer to the fd with the given script fd, or NULL. */ +static struct fd_state *find_by_script_fd( + struct state *state, int script_fd) +{ + struct fd_state *fd = NULL; + + for (fd = state->fds; fd != NULL; fd = fd->next) + if (!fd->is_closed && (fd->script_fd == script_fd)) { + assert(fd->live_fd >= 0); + assert(fd->script_fd >= 0); + return fd; + } + return NULL; +} + +/* Return a pointer to the fd with the given live fd, or NULL. */ +static struct fd_state *find_by_live_fd( + struct state *state, int live_fd) +{ + struct fd_state *fd = NULL; + + for (fd = state->fds; fd != NULL; fd = fd->next) + if (!fd->is_closed & (fd->live_fd == live_fd)) { + assert(fd->live_fd >= 0); + assert(fd->script_fd >= 0); + return fd; + } + return NULL; +} + +/* Find the live fd corresponding to the fd in a script. Returns + * STATUS_OK on success; on failure returns STATUS_ERR and sets + * error message. + */ +static int to_live_fd(struct state *state, int script_fd, int *live_fd, + char **error) +{ + struct fd_state *fd = find_by_script_fd(state, script_fd); + + if (fd != NULL) { + *live_fd = fd->live_fd; + return STATUS_OK; + } else { + *live_fd = -1; + asprintf(error, "unable to find fd with script fd %d", + script_fd); + return STATUS_ERR; + } +} + +/* Look for conflicting fds. Should not happen if the script is valid and this + * program is bug-free. + */ +static int check_duplicate_fd(struct state *state, int script_fd, int live_fd, + char **error) +{ + if (find_by_script_fd(state, script_fd)) { + asprintf(error, "duplicate fd %d in script", + script_fd); + return STATUS_ERR; + } + if (find_by_live_fd(state, live_fd)) { + asprintf(error, "duplicate live fd %d", live_fd); + return STATUS_ERR; + } + + return STATUS_OK; +} + +/* Parse the argument with the given index + * Set *is_null to true if arg is 0 (NULL) + * Set *is_null to false if arg is ellipsis (...) + * Return error if arg is neither of the above + */ +static int buffer_arg(struct expression_list *args, int index, + bool *is_null, char **error) +{ + struct expression *expression = get_arg(args, index, error); + + if (expression && expression->type == EXPR_ELLIPSIS) { + *is_null = false; + return STATUS_OK; + } + if (expression && expression->type == EXPR_INTEGER && + expression->value.num == 0) { + *is_null = true; + return STATUS_OK; + } + asprintf(error, "Expected ... or NULL for buffer"); + return STATUS_ERR; +} + +static void *alloc_buffer(bool is_null, int count, bool set_zero) +{ + void *buf; + + if (is_null) + return NULL; + + if (set_zero) + buf = calloc(count, 1); + else + buf = malloc(count); + assert(buf != NULL); + return buf; +} + +/**************************************************************************** + * Here we have the "backend" post-processing and pre-processing that + * we perform after and/or before each of the system calls that + * we support... + */ + +/* The app called open(). Create a struct file to track the new file. + * Returns STATUS_OK on success; on failure returns STATUS_ERR and + * sets error message. + */ +static int run_syscall_open(struct state *state, int script_fd, int live_fd, + char **error) +{ + struct file *file = NULL; + + if (check_duplicate_fd(state, script_fd, live_fd, error)) + return STATUS_ERR; + + file = file_new(state); + file->fd.script_fd = script_fd; + file->fd.live_fd = live_fd; + return STATUS_OK; +} + +/* The app called socket() in the script and we did a live reenactment + * socket() call. Create a struct socket to track the new socket. + * Returns STATUS_OK on success; on failure returns STATUS_ERR and + * sets error message. + */ +static int run_syscall_socket(struct state *state, int address_family, int type, + int protocol, int script_fd, int live_fd, + char **error) +{ + /* Validate fd values. */ + if (script_fd < 0) { + asprintf(error, "invalid socket fd %d in script", script_fd); + return STATUS_ERR; + } + if (live_fd < 0) { + asprintf(error, "invalid live socket fd %d", live_fd); + return STATUS_ERR; + } + + if (check_duplicate_fd(state, script_fd, live_fd, error)) + return STATUS_ERR; + + /* These fd values are kosher, so store them. */ + struct socket *socket = socket_new(state); + socket->state = SOCKET_NEW; + socket->address_family = address_family; + socket->type = type; + socket->protocol = protocol; + socket->fd.script_fd = script_fd; + socket->fd.live_fd = live_fd; + + /* Any later packets in the test script will now be mapped here. */ + state->socket_under_test = socket; + + DEBUGP("socket() creating new socket: script_fd: %d live_fd: %d\n", + socket->fd.script_fd, socket->fd.live_fd); + return STATUS_OK; +} + +/* Handle a close() call for the given fd. + * Returns STATUS_OK on success; on failure returns STATUS_ERR and + * sets error message. + */ +static int run_syscall_close(struct state *state, int script_fd, + int live_fd, char **error) +{ + struct fd_state *fd = find_by_script_fd(state, script_fd); + if ((fd == NULL) || (fd->live_fd != live_fd)) + goto error_out; + + fd->is_closed = true; + return STATUS_OK; + +error_out: + asprintf(error, + "unable to find fd with script fd %d and live fd %d", + script_fd, live_fd); + return STATUS_ERR; +} + +/* Fill in the live_addr and live_addrlen for a bind() call. + * Returns STATUS_OK on success; on failure returns STATUS_ERR and + * sets error message. + */ +static int run_syscall_bind(struct state *state, + struct sockaddr *live_addr, + socklen_t *live_addrlen, char **error) +{ + DEBUGP("run_syscall_bind\n"); + + /* Fill in the live address we want to bind to */ + ip_to_sockaddr(&state->config->live_bind_ip, + state->config->live_bind_port, + live_addr, live_addrlen); + + return STATUS_OK; +} + +/* Handle a listen() call for the given socket. + * Returns STATUS_OK on success; on failure returns STATUS_ERR and + * sets error message. + */ +static int run_syscall_listen(struct state *state, int script_fd, + int live_fd, char **error) +{ + struct socket *socket = NULL; + + socket = fd_to_socket(find_by_script_fd(state, script_fd)); + if (socket != NULL) { + assert(socket->fd.script_fd == script_fd); + assert(socket->fd.live_fd == live_fd); + socket->state = SOCKET_PASSIVE_LISTENING; + return STATUS_OK; + } else { + asprintf(error, "unable to find socket with script fd %d", + script_fd); + return STATUS_ERR; + } +} + +/* Handle an accept() call creating a new socket with the given file + * descriptors. + * Returns STATUS_OK on success; on failure returns STATUS_ERR and + * sets error message. + */ +static int run_syscall_accept(struct state *state, + int script_accepted_fd, + int live_accepted_fd, + struct sockaddr *live_addr, + int live_addrlen, char **error) +{ + struct socket *socket = NULL; + struct fd_state *fd = NULL; + struct ip_address ip; + u16 port = 0; + DEBUGP("run_syscall_accept\n"); + + /* Parse the sockaddr into a nice multi-protocol ip_address struct. */ + ip_from_sockaddr(live_addr, live_addrlen, &ip, &port); + + /* For ipv4-mapped-ipv6: if ip is IPv4-mapped IPv6, map it to IPv4. */ + if (ip.address_family == AF_INET6) { + struct ip_address ipv4; + if (ipv6_map_to_ipv4(ip, &ipv4) == STATUS_OK) + ip = ipv4; + } + + for (fd = state->fds; fd != NULL; fd = fd->next) { + if (fd->ops->type != FD_SOCKET) + continue; + socket = fd_to_socket(fd); + if (DEBUG_LOGGING) { + char remote_string[ADDR_STR_LEN]; + DEBUGP("socket state=%d script addr: %s:%d\n", + socket->state, + ip_to_string(&socket->script.remote.ip, + remote_string), + socket->script.remote.port); + } + + if ((socket->state == SOCKET_PASSIVE_SYNACK_SENT) || /* TFO */ + (socket->state == SOCKET_PASSIVE_SYNACK_ACKED)) { + assert(is_equal_ip(&socket->live.remote.ip, &ip)); + assert(is_equal_port(socket->live.remote.port, + htons(port))); + socket->fd.script_fd = script_accepted_fd; + socket->fd.live_fd = live_accepted_fd; + return STATUS_OK; + } + } + + if (!state->config->is_wire_client) { + asprintf(error, "unable to find socket matching accept() call"); + return STATUS_ERR; + } + + /* If this is a wire client, then this process just + * sees the system call action for this socket. Create a child + * passive socket for this accept call, and fill in what we + * know about the socket. Any further packets in the test + * script will be directed to this child socket. + */ + socket = socket_new(state); + state->socket_under_test = socket; + assert(socket->state == SOCKET_INIT); + socket->address_family = ip.address_family; + + socket->live.remote.ip = ip; + socket->live.remote.port = port; + socket->live.local.ip = state->config->live_local_ip; + socket->live.local.port = htons(state->config->live_bind_port); + + socket->fd.live_fd = live_accepted_fd; + socket->fd.script_fd = script_accepted_fd; + + if (DEBUG_LOGGING) { + char local_string[ADDR_STR_LEN]; + char remote_string[ADDR_STR_LEN]; + DEBUGP("live: local: %s.%d\n", + ip_to_string(&socket->live.local.ip, local_string), + ntohs(socket->live.local.port)); + DEBUGP("live: remote: %s.%d\n", + ip_to_string(&socket->live.remote.ip, remote_string), + ntohs(socket->live.remote.port)); + } + return STATUS_OK; +} + +/* Handle an connect() or sendto() call initiating a connect to a + * remote address. Fill in the live_addr and live_addrlen for the live + * connect(). Returns STATUS_OK on success; on failure returns + * STATUS_ERR and sets error message. + */ +static int run_syscall_connect(struct state *state, + int script_fd, + bool must_be_new_socket, + struct sockaddr *live_addr, + socklen_t *live_addrlen, + int sa_family, + char **error) +{ + struct socket *socket = NULL; + DEBUGP("run_syscall_connect\n"); + + if (sa_family != -1) { + sa_family_t sa_fa = (sa_family_t) sa_family; + memset(live_addr, 0, sizeof(*live_addr)); + live_addr->sa_family = sa_fa; + } else { + /* Fill in the live address we want to connect to */ + ip_to_sockaddr(&state->config->live_connect_ip, + state->config->live_connect_port, + live_addr, live_addrlen); + } + + socket = fd_to_socket(find_by_script_fd(state, script_fd)); + assert(socket != NULL); + /* Reset socket state to NEW if we are about to disconnect + * the socket so that later connect will succeed. + */ + if (live_addr->sa_family == AF_UNSPEC) { + socket->state = SOCKET_NEW; + return STATUS_OK; + } + + if (socket->state != SOCKET_NEW) { + if (must_be_new_socket) { + asprintf(error, "socket is not new"); + return STATUS_ERR; + } else { + return STATUS_OK; + } + } + + socket->state = SOCKET_ACTIVE_CONNECTING; + ip_reset(&socket->script.remote.ip); + ip_reset(&socket->script.local.ip); + socket->script.remote.port = 0; + socket->script.local.port = 0; + socket->live.remote.ip = state->config->live_remote_ip; + socket->live.remote.port = htons(state->config->live_connect_port); + DEBUGP("success: setting socket to state %d\n", socket->state); + return STATUS_OK; +} + +/* The app called epoll_create(). Create a struct epoll to track this new + * epoll event. + * Returns STATUS_OK on success; on failure returns STATUS_ERR and sets + * error message. + */ +static int run_syscall_epoll_create(struct state *state, int epfd_script, + int epfd_live, char **error) +{ + struct epoll *epoll = NULL; + + if (check_duplicate_fd(state, epfd_script, epfd_live, error)) + return STATUS_ERR; + + epoll = epoll_new(state); + epoll->fd.script_fd = epfd_script; + epoll->fd.live_fd = epfd_live; + return STATUS_OK; +} + +/* The app called pipe(). Create a struct pipe to track this new pipe event. + * Note: both pfd_script and pfd_live point to 2-integer arrays. + * Returns STATUS_OK on success; on failure returns STATUS_ERR and sets + * error message. + */ +static int run_syscall_pipe(struct state *state, int *pfd_script, int *pfd_live, + char **error) +{ + struct pipe *pipe = NULL; + int i; + + for (i = 0; i < 2; i++) { + if (check_duplicate_fd(state, pfd_script[i], + pfd_live[i], error)) + return STATUS_ERR; + } + + for (i = 0; i < 2; i++) { + pipe = pipe_new(state); + pipe->fd.script_fd = pfd_script[i]; + pipe->fd.live_fd = pfd_live[i]; + } + return STATUS_OK; +} + +/**************************************************************************** + * Here we have the parsing and invocation of the system calls that + * we support... + */ + +static int syscall_socket(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int domain, type, protocol, live_fd, script_fd, result; + + if (check_arg_count(args, 3, error)) + return STATUS_ERR; + if (ellipsis_arg(args, 0, error)) + return STATUS_ERR; + if (s32_arg(args, 1, &type, error)) + return STATUS_ERR; + if (s32_arg(args, 2, &protocol, error)) + return STATUS_ERR; + + domain = state->config->socket_domain; + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.socket( + state->so_instance->ifc.userdata, + domain, type, protocol); + } else { + result = socket(domain, type, protocol); + } + + if (end_syscall(state, syscall, CHECK_FD, result, error)) + return STATUS_ERR; + + if (result >= 0) { + live_fd = result; + if (get_s32(syscall->result, &script_fd, error)) + return STATUS_ERR; + if (run_syscall_socket(state, domain, type, protocol, + script_fd, live_fd, error)) + return STATUS_ERR; + } + + return STATUS_OK; +} + +static int syscall_bind(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, result; + struct sockaddr_storage live_addr; + socklen_t live_addrlen; + + if (check_arg_count(args, 3, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (ellipsis_arg(args, 1, error)) + return STATUS_ERR; + if (ellipsis_arg(args, 2, error)) + return STATUS_ERR; + if (run_syscall_bind( + state, + (struct sockaddr *)&live_addr, &live_addrlen, error)) + return STATUS_ERR; + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.bind( + state->so_instance->ifc.userdata, + live_fd, (struct sockaddr *)&live_addr, + live_addrlen); + } else { + result = bind(live_fd, (struct sockaddr *)&live_addr, + live_addrlen); + } + + return end_syscall(state, syscall, CHECK_EXACT, result, error); +} + +static int syscall_listen(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, backlog, result; + + if (check_arg_count(args, 2, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (s32_arg(args, 1, &backlog, error)) + return STATUS_ERR; + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.listen( + state->so_instance->ifc.userdata, + live_fd, backlog); + } else { + result = listen(live_fd, backlog); + } + + if (end_syscall(state, syscall, CHECK_EXACT, result, error)) + return STATUS_ERR; + + if (run_syscall_listen(state, script_fd, live_fd, error)) + return STATUS_ERR; + + return STATUS_OK; +} + +static int syscall_accept(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, live_accepted_fd, script_accepted_fd, result; + struct sockaddr_storage live_addr; + socklen_t live_addrlen = sizeof(live_addr); + if (check_arg_count(args, 3, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (ellipsis_arg(args, 1, error)) + return STATUS_ERR; + if (ellipsis_arg(args, 2, error)) + return STATUS_ERR; + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.accept( + state->so_instance->ifc.userdata, + live_fd, (struct sockaddr *)&live_addr, + &live_addrlen); + } else { + result = accept(live_fd, (struct sockaddr *)&live_addr, + &live_addrlen); + } + + if (end_syscall(state, syscall, CHECK_FD, result, error)) + return STATUS_ERR; + + if (result >= 0) { + live_accepted_fd = result; + if (get_s32(syscall->result, &script_accepted_fd, error)) + return STATUS_ERR; + if (run_syscall_accept( + state, script_accepted_fd, live_accepted_fd, + (struct sockaddr *)&live_addr, live_addrlen, + error)) + return STATUS_ERR; + } + + return STATUS_OK; +} + +static int syscall_connect(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, result; + struct sockaddr_storage live_addr; + socklen_t live_addrlen = sizeof(live_addr); + int sa_family = -1; + if (check_arg_count(args, 3, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (ellipsis_arg(args, 1, error) && + s32_arg(args, 1, &sa_family, error)) + return STATUS_ERR; + if (ellipsis_arg(args, 2, error)) + return STATUS_ERR; + + if (run_syscall_connect( + state, script_fd, false, + (struct sockaddr *)&live_addr, &live_addrlen, + sa_family, error)) + return STATUS_ERR; + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.connect( + state->so_instance->ifc.userdata, + live_fd, (struct sockaddr *)&live_addr, + live_addrlen); + } else { + result = connect(live_fd, (struct sockaddr *)&live_addr, + live_addrlen); + } + + return end_syscall(state, syscall, CHECK_EXACT, result, error); +} + +static int syscall_read(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, count, result; + char *buf = NULL; + bool is_null; + + if (check_arg_count(args, 3, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (buffer_arg(args, 1, &is_null, error)) + return STATUS_ERR; + if (s32_arg(args, 2, &count, error)) + return STATUS_ERR; + buf = alloc_buffer(is_null, count, false); + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.read( + state->so_instance->ifc.userdata, + live_fd, buf, count); + } else { + result = read(live_fd, buf, count); + } + + int status = end_syscall(state, syscall, CHECK_EXACT, result, error); + + free(buf); + return status; +} + +static int syscall_readv(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, iov_count, result; + struct expression *iov_expression = NULL; + struct iovec *iov = NULL; + size_t iov_len = 0; + int status = STATUS_ERR; + + if (check_arg_count(args, 3, error)) + goto error_out; + + if (s32_arg(args, 0, &script_fd, error)) + goto error_out; + if (to_live_fd(state, script_fd, &live_fd, error)) + goto error_out; + + iov_expression = get_arg(args, 1, error); + if (iov_expression == NULL) + goto error_out; + if (iovec_new(iov_expression, &iov, &iov_len, error)) + goto error_out; + + if (s32_arg(args, 2, &iov_count, error)) + goto error_out; + + if (iov_count != iov_len) { + asprintf(error, + "iov_count %d does not match %d-element iovec array", + iov_count, (int)iov_len); + goto error_out; + } + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.readv( + state->so_instance->ifc.userdata, + live_fd, iov, iov_count); + } else { + result = readv(live_fd, iov, iov_count); + } + + status = end_syscall(state, syscall, CHECK_EXACT, result, error); + +error_out: + iovec_free(iov, iov_len); + return status; +} + +static int syscall_recv(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, count, flags, result; + char *buf = NULL; + bool is_null; + + if (check_arg_count(args, 4, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (buffer_arg(args, 1, &is_null, error)) + return STATUS_ERR; + if (s32_arg(args, 2, &count, error)) + return STATUS_ERR; + if (s32_arg(args, 3, &flags, error)) + return STATUS_ERR; + buf = alloc_buffer(is_null, count, false); + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.recv( + state->so_instance->ifc.userdata, + live_fd, buf, count, flags); + } else { + result = recv(live_fd, buf, count, flags); + } + + int status = end_syscall(state, syscall, CHECK_EXACT, result, error); + + free(buf); + return status; +} + +static int syscall_recvfrom(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, count, flags, result; + struct sockaddr_storage live_addr; + socklen_t live_addrlen = sizeof(live_addr); + char *buf = NULL; + bool is_null; + + if (check_arg_count(args, 6, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (buffer_arg(args, 1, &is_null, error)) + return STATUS_ERR; + if (s32_arg(args, 2, &count, error)) + return STATUS_ERR; + if (s32_arg(args, 3, &flags, error)) + return STATUS_ERR; + if (ellipsis_arg(args, 4, error)) + return STATUS_ERR; + if (ellipsis_arg(args, 5, error)) + return STATUS_ERR; + buf = alloc_buffer(is_null, count, false); + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.recvfrom( + state->so_instance->ifc.userdata, + live_fd, buf, count, flags, + (struct sockaddr *)&live_addr, &live_addrlen); + } else { + result = recvfrom(live_fd, buf, count, flags, + (struct sockaddr *)&live_addr, &live_addrlen); + } + + int status = end_syscall(state, syscall, CHECK_EXACT, result, error); + + free(buf); + return status; +} + +static int syscall_recvmsg(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, flags, result; + struct expression *msg_expression = NULL; + struct msghdr *msg = NULL, *expected_msg = NULL; + size_t iov_len = 0; + int status = STATUS_ERR; + + if (check_arg_count(args, 3, error)) + goto error_out; + if (s32_arg(args, 0, &script_fd, error)) + goto error_out; + if (to_live_fd(state, script_fd, &live_fd, error)) + goto error_out; + + msg_expression = get_arg(args, 1, error); + if (msg_expression == NULL) + goto error_out; + if (msghdr_new(msg_expression, &msg, &iov_len, error)) + goto error_out; + if (msghdr_new(msg_expression, &expected_msg, &iov_len, error)) + goto error_out; + + if (s32_arg(args, 2, &flags, error)) + goto error_out; + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.recvmsg( + state->so_instance->ifc.userdata, + live_fd, msg, flags); + } else { + result = recvmsg(live_fd, msg, flags); + } + + if (end_syscall(state, syscall, CHECK_EXACT, result, error)) + goto error_out; + + if (msg->msg_flags != expected_msg->msg_flags) { + asprintf(error, "Expected msg_flags 0x%08X but got 0x%08X", + expected_msg->msg_flags, msg->msg_flags); + goto error_out; + } + + if (!cmsg_expect_eq(state, expected_msg, msg, error)) + goto error_out; + + status = STATUS_OK; + +error_out: + msghdr_free(msg, iov_len); + msghdr_free(expected_msg, iov_len); + return status; +} + +static int syscall_write(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, count, result; + char *buf = NULL; + bool is_null; + + if (check_arg_count(args, 3, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (buffer_arg(args, 1, &is_null, error)) + return STATUS_ERR; + if (s32_arg(args, 2, &count, error)) + return STATUS_ERR; + buf = alloc_buffer(is_null, count, true); + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.write( + state->so_instance->ifc.userdata, + live_fd, buf, count); + } else { + result = write(live_fd, buf, count); + } + + int status = end_syscall(state, syscall, CHECK_EXACT, result, error); + + free(buf); + return status; +} + +static int syscall_writev(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, iov_count, result; + struct expression *iov_expression = NULL; + struct iovec *iov = NULL; + size_t iov_len = 0; + int status = STATUS_ERR; + + if (check_arg_count(args, 3, error)) + goto error_out; + + if (s32_arg(args, 0, &script_fd, error)) + goto error_out; + if (to_live_fd(state, script_fd, &live_fd, error)) + goto error_out; + + iov_expression = get_arg(args, 1, error); + if (iov_expression == NULL) + goto error_out; + if (iovec_new(iov_expression, &iov, &iov_len, error)) + goto error_out; + + if (s32_arg(args, 2, &iov_count, error)) + goto error_out; + + if (iov_count != iov_len) { + asprintf(error, + "iov_count %d does not match %d-element iovec array", + iov_count, (int)iov_len); + goto error_out; + } + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.writev( + state->so_instance->ifc.userdata, + live_fd, iov, iov_count); + } else { + result = writev(live_fd, iov, iov_count); + } + + status = end_syscall(state, syscall, CHECK_EXACT, result, error); + +error_out: + iovec_free(iov, iov_len); + return status; +} + +static int syscall_send(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, count, flags, result; + char *buf = NULL; + bool is_null; + + if (check_arg_count(args, 4, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (buffer_arg(args, 1, &is_null, error)) + return STATUS_ERR; + if (s32_arg(args, 2, &count, error)) + return STATUS_ERR; + if (s32_arg(args, 3, &flags, error)) + return STATUS_ERR; + buf = alloc_buffer(is_null, count, true); + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.send( + state->so_instance->ifc.userdata, + live_fd, buf, count, flags); + } else { + result = send(live_fd, buf, count, flags); + } + + int status = end_syscall(state, syscall, CHECK_EXACT, result, error); + + sendcall_free(state, buf); + + return status; +} + +static int syscall_sendto(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, count, flags, result; + struct sockaddr_storage live_addr; + socklen_t live_addrlen = sizeof(live_addr); + struct socket *socket = NULL; + char *buf = NULL; + int sa_family = -1; + bool is_null; + + if (check_arg_count(args, 6, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (buffer_arg(args, 1, &is_null, error)) + return STATUS_ERR; + if (s32_arg(args, 2, &count, error)) + return STATUS_ERR; + if (s32_arg(args, 3, &flags, error)) + return STATUS_ERR; + if (ellipsis_arg(args, 4, error) && + s32_arg(args, 4, &sa_family, error)) + return STATUS_ERR; + if (ellipsis_arg(args, 5, error)) + return STATUS_ERR; + + /* ICMP sockets need special handling. */ + socket = fd_to_socket(find_by_script_fd(state, script_fd)); + if (socket != NULL && socket->type == SOCK_DGRAM && + ((socket->address_family == AF_INET && + socket->protocol == IPPROTO_ICMP) || + (socket->address_family == AF_INET6 && + socket->protocol == IPPROTO_ICMPV6))) + return syscall_icmp_sendto(state, syscall, args, error); + + if (run_syscall_connect( + state, script_fd, false, + (struct sockaddr *)&live_addr, &live_addrlen, sa_family, + error)) + return STATUS_ERR; + + buf = alloc_buffer(is_null, count, true); + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.sendto( + state->so_instance->ifc.userdata, + live_fd, buf, count, flags, + (struct sockaddr *)&live_addr, live_addrlen); + } else { + result = sendto(live_fd, buf, count, flags, + (struct sockaddr *)&live_addr, live_addrlen); + } + + int status = end_syscall(state, syscall, CHECK_EXACT, result, error); + + sendcall_free(state, buf); + + return status; +} + +static int syscall_sendmsg(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, flags, result; + struct expression *msg_expression = NULL; + struct msghdr *msg = NULL; + size_t iov_len = 0; + int status = STATUS_ERR; + + if (check_arg_count(args, 3, error)) + goto error_out; + if (s32_arg(args, 0, &script_fd, error)) + goto error_out; + if (to_live_fd(state, script_fd, &live_fd, error)) + goto error_out; + + msg_expression = get_arg(args, 1, error); + if (msg_expression == NULL) + goto error_out; + if (msghdr_new(msg_expression, &msg, &iov_len, error)) + goto error_out; + + if (s32_arg(args, 2, &flags, error)) + goto error_out; + + if ((msg->msg_name != NULL) && + run_syscall_connect(state, script_fd, false, + msg->msg_name, &msg->msg_namelen, -1, error)) + goto error_out; + if (msg->msg_flags != 0) { + asprintf(error, "sendmsg ignores msg_flags field in msghdr"); + goto error_out; + } + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.sendmsg( + state->so_instance->ifc.userdata, + live_fd, msg, flags); + } else { + result = sendmsg(live_fd, msg, flags); + } + + status = end_syscall(state, syscall, CHECK_EXACT, result, error); + +error_out: + if (sendcall_may_free(state)) + msghdr_free(msg, iov_len); + return status; +} + +/* + * Send echo request using ICMP socket. + * Note: Kernel will reject and fail the sendto() call if the data sent does not + * have room for a proper ICMP header. And ICMP type must be 8 (ICMP_ECHO) and + * ICMP code must be 0. + */ +static int syscall_icmp_sendto(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, count, flags, result; + struct sockaddr_storage live_addr; + socklen_t live_addrlen = sizeof(live_addr); + char *buf = NULL; + bool is_null; + + if (check_arg_count(args, 6, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (buffer_arg(args, 1, &is_null, error)) + return STATUS_ERR; + if (s32_arg(args, 2, &count, error)) + return STATUS_ERR; + if (s32_arg(args, 3, &flags, error)) + return STATUS_ERR; + if (ellipsis_arg(args, 4, error)) + return STATUS_ERR; + if (ellipsis_arg(args, 5, error)) + return STATUS_ERR; + + if (run_syscall_connect( + state, script_fd, false, + (struct sockaddr *)&live_addr, &live_addrlen, -1, error)) + return STATUS_ERR; + + buf = alloc_buffer(is_null, count, true); + if (state->config->wire_protocol == AF_INET && + count >= sizeof(struct icmpv4)) { + struct icmpv4 *icmp = (struct icmpv4 *)buf; + icmp->type = ICMP_ECHO; + } else if (state->config->wire_protocol == AF_INET6 && + count >= sizeof(struct icmpv6)) { + struct icmpv6 *icmpv6 = (struct icmpv6 *)buf; + icmpv6->type = ICMPV6_ECHO_REQUEST; + } + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.sendto( + state->so_instance->ifc.userdata, + live_fd, buf, count, flags, + (struct sockaddr *)&live_addr, live_addrlen); + } else { + result = sendto(live_fd, buf, count, flags, + (struct sockaddr *)&live_addr, live_addrlen); + } + + int status = end_syscall(state, syscall, CHECK_EXACT, result, error); + + free(buf); + return status; +} + +static int syscall_fcntl(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, command, result; + + /* fcntl is an odd system call - it can take either 2 or 3 args. */ + int actual_arg_count = get_arg_count(args); + if ((actual_arg_count != 2) && (actual_arg_count != 3)) { + asprintf(error, "fcntl expected 2-3 args but got %d", + actual_arg_count); + return STATUS_ERR; + } + + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (s32_arg(args, 1, &command, error)) + return STATUS_ERR; + + if (actual_arg_count == 2) { + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.fcntl( + state->so_instance->ifc.userdata, + live_fd, command); + } else { + result = fcntl(live_fd, command); + } + } else if (actual_arg_count == 3) { + s32 arg; + if (s32_arg(args, 2, &arg, error)) + return STATUS_ERR; + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.fcntl( + state->so_instance->ifc.userdata, + live_fd, command, arg); + } else { + result = fcntl(live_fd, command, arg); + } + } else { + assert(0); /* not reached */ + } + + return end_syscall(state, syscall, CHECK_EXACT, result, error); +} + +static int syscall_ioctl(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, command, result; + + /* ioctl is an odd system call - it can take either 2 or 3 args. */ + int actual_arg_count = get_arg_count(args); + if ((actual_arg_count != 2) && (actual_arg_count != 3)) { + asprintf(error, "ioctl expected 2-3 args but got %d", + actual_arg_count); + return STATUS_ERR; + } + + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (s32_arg(args, 1, &command, error)) + return STATUS_ERR; + + if (actual_arg_count == 2) { + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.ioctl( + state->so_instance->ifc.userdata, + live_fd, command); + } else { + result = ioctl(live_fd, command); + } + + return end_syscall(state, syscall, CHECK_EXACT, result, error); + + } else if (actual_arg_count == 3) { + s32 script_optval, live_optval; + + if (s32_bracketed_arg(args, 2, &script_optval, error)) + return STATUS_ERR; + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.ioctl( + state->so_instance->ifc.userdata, + live_fd, command, &live_optval); + } else { + result = ioctl(live_fd, command, &live_optval); + } + + if (end_syscall(state, syscall, CHECK_EXACT, result, error)) + return STATUS_ERR; + + if (live_optval != script_optval) { + asprintf(error, + "Bad ioctl optval: expected: %d actual: %d", + (int)script_optval, (int)live_optval); + return STATUS_ERR; + } + + return STATUS_OK; + } else { + assert(0); /* not reached */ + } + return STATUS_ERR; +} + +static int syscall_close(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, result; + if (check_arg_count(args, 1, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.close( + state->so_instance->ifc.userdata, + live_fd); + } else { + result = close(live_fd); + } + + if (end_syscall(state, syscall, CHECK_EXACT, result, error)) + return STATUS_ERR; + + if (run_syscall_close(state, script_fd, live_fd, error)) + return STATUS_ERR; + + return STATUS_OK; +} + +static int syscall_shutdown(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_fd, script_fd, how, result; + if (check_arg_count(args, 2, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (s32_arg(args, 1, &how, error)) + return STATUS_ERR; + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.shutdown( + state->so_instance->ifc.userdata, + live_fd, how); + } else { + result = shutdown(live_fd, how); + } + + if (end_syscall(state, syscall, CHECK_EXACT, result, error)) + return STATUS_ERR; + + return STATUS_OK; +} + +static int syscall_getsockopt(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int script_fd, live_fd, level, optname, result; + void *live_optval = NULL, *script_optval = NULL; + s32 script_optlen, script_optval_s32; + socklen_t live_optlen; + struct expression *val_expression = NULL; + int status = STATUS_ERR; + + if (check_arg_count(args, 5, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (s32_arg(args, 1, &level, error)) + return STATUS_ERR; + if (s32_arg(args, 2, &optname, error)) + return STATUS_ERR; + val_expression = get_arg(args, 3, error); + if (val_expression == NULL) + return STATUS_ERR; + if (s32_bracketed_arg(args, 4, &script_optlen, error)) + return STATUS_ERR; + + /* Allocate space for getsockopt output. */ + live_optlen = script_optlen; + live_optval = calloc(1, live_optlen + 1); + assert(live_optval != NULL); + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.getsockopt( + state->so_instance->ifc.userdata, + live_fd, level, optname, + live_optval, &live_optlen); + } else { + result = getsockopt(live_fd, level, optname, + live_optval, &live_optlen); + } + + if (end_syscall(state, syscall, CHECK_EXACT, result, error)) + goto error_out; + + if ((int)live_optlen != script_optlen) { + asprintf(error, + "Bad getsockopt optlen: expected: %d actual: %d", + (int)script_optlen, (int)live_optlen); + goto error_out; + } + + if (val_expression->type == EXPR_STRING) { + script_optval = val_expression->value.string; + + if (strcmp(live_optval, script_optval) != 0) { + asprintf(error, + "Bad getsockopt optval: " + "expected: '%s' actual: '%s'", + (char *)script_optval, (char *)live_optval); + goto error_out; + } + } else if (val_expression->type == EXPR_LIST) { + if (script_optlen != 4) { + asprintf(error, "Unsupported getsockopt optlen: %d", + (int)script_optlen); + goto error_out; + } + + if (s32_bracketed_arg(args, 3, &script_optval_s32, error)) + goto error_out; + + if (*(s32 *)live_optval != script_optval_s32) { + asprintf(error, + "Bad getsockopt optval: " + "expected: %d actual: %d", + script_optval_s32, *(s32 *)live_optval); + goto error_out; + } + } else if (val_expression->type == EXPR_GRE) { + struct gre *live_gre = (struct gre *)live_optval; + struct gre *script_gre = &val_expression->value.gre; + + if (script_optlen != sizeof(struct gre)) { + asprintf(error, "Unsupported getsockopt optlen: %d", + (int)script_optlen); + goto error_out; + } + + if (live_gre->flags != script_gre->flags || + live_gre->be16[0] != script_gre->be16[0] || + live_gre->be16[1] != script_gre->be16[1] || + live_gre->be32[1] != script_gre->be32[1] || + live_gre->be32[2] != script_gre->be32[2]) { + asprintf(error, "Bad getsockopt optval."); + /* TODO: Populate this with a GRE header dump. */ + goto error_out; + } + } else if (val_expression->type == EXPR_IN6_ADDR) { + struct in6_addr *live_ipv6 = (struct in6_addr *)live_optval; + struct in6_addr *script_ipv6 = &val_expression->value.address_ipv6; + + if (script_optlen != sizeof(struct in6_addr)) { // != 16 + asprintf(error, "Unsupported getsockopt optlen: %d", + (int)script_optlen); + goto error_out; + } + + if (memcmp(live_ipv6, script_ipv6, sizeof(struct in6_addr))) { + char live_buf[INET6_ADDRSTRLEN]; + char script_buf[INET6_ADDRSTRLEN]; + inet_ntop(AF_INET6, live_ipv6, live_buf, sizeof(live_buf)); + inet_ntop(AF_INET6, script_ipv6, script_buf, sizeof(script_buf)); + asprintf(error, + "Bad getsockopt optval: " + "expected: %s " + "actual: %s ", + script_buf, live_buf); + goto error_out; + } + } else { + asprintf(error, "unsupported getsockopt value type: %s", + expression_type_to_string( + val_expression->type)); + goto error_out; + } + + status = STATUS_OK; + +error_out: + free(live_optval); + return status; +} + +static int syscall_setsockopt(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int script_fd, live_fd, level, optname, optval_s32, optlen, result; + void *optval = NULL; + struct expression *val_expression; + + if (check_arg_count(args, 5, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + if (s32_arg(args, 1, &level, error)) + return STATUS_ERR; + if (s32_arg(args, 2, &optname, error)) + return STATUS_ERR; + if (s32_arg(args, 4, &optlen, error)) + return STATUS_ERR; + + val_expression = get_arg(args, 3, error); + if (val_expression == NULL) + return STATUS_ERR; + if (val_expression->type == EXPR_LINGER) { + optval = &val_expression->value.linger; + } else if (val_expression->type == EXPR_GRE) { + optval = &val_expression->value.gre; + } else if (val_expression->type == EXPR_IN6_ADDR) { + optval = &val_expression->value.address_ipv6; + } else if (val_expression->type == EXPR_MPLS_STACK) { + optval = val_expression->value.mpls_stack; + } else if (val_expression->type == EXPR_STRING) { + optval = val_expression->value.string; + } else if (val_expression->type == EXPR_LIST) { + if (s32_bracketed_arg(args, 3, &optval_s32, error)) + return STATUS_ERR; + optval = &optval_s32; + } else { + asprintf(error, "unsupported setsockopt value type: %s", + expression_type_to_string( + val_expression->type)); + return STATUS_ERR; + } + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.setsockopt( + state->so_instance->ifc.userdata, + live_fd, level, optname, optval, optlen); + } else { + result = setsockopt(live_fd, level, optname, optval, optlen); + } + + return end_syscall(state, syscall, CHECK_EXACT, result, error); +} + +static int syscall_poll(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + struct expression *fds_expression = NULL; + struct pollfd *fds = NULL; + size_t fds_len; + int nfds, timeout, result; + int status = STATUS_ERR; + + if (check_arg_count(args, 3, error)) + goto error_out; + + fds_expression = get_arg(args, 0, error); + if (fds_expression == NULL) + goto error_out; + if (pollfds_new(state, fds_expression, &fds, &fds_len, error)) + goto error_out; + + if (s32_arg(args, 1, &nfds, error)) + goto error_out; + if (s32_arg(args, 2, &timeout, error)) + goto error_out; + + if (nfds != fds_len) { + asprintf(error, + "nfds %d does not match %d-element pollfd array", + nfds, (int)fds_len); + goto error_out; + } + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.poll( + state->so_instance->ifc.userdata, + fds, nfds, timeout); + } else { + result = poll(fds, nfds, timeout); + } + + if (end_syscall(state, syscall, CHECK_EXACT, result, error)) + goto error_out; + + if (pollfds_check(fds_expression, fds, fds_len, error)) + goto error_out; + + status = STATUS_OK; + +error_out: + free(fds); + return status; +} + +static int syscall_cap_set(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int cap_flag, cap_value, cap_op; + int result; + cap_t caps; + + if (check_arg_count(args, 3, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &cap_flag, error)) + return STATUS_ERR; + if (s32_arg(args, 1, &cap_value, error)) + return STATUS_ERR; + if (s32_arg(args, 2, &cap_op, error)) + return STATUS_ERR; + + caps = cap_get_proc(); + if (caps == NULL) + die("Error in cap_get_proc()\n"); + + if (cap_set_flag(caps, cap_flag, 1, &cap_value, + cap_op) == -1) + die("Error in cap_set_flag()\n"); + + begin_syscall(state, syscall); + + result = cap_set_proc(caps); + + if (end_syscall(state, syscall, CHECK_FD, result, error)) + return STATUS_ERR; + + if (cap_free(caps) == -1) + die("Error in cap_free()\n"); + + return STATUS_OK; +} + +static int syscall_open(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int script_fd, live_fd, result; + struct expression *name_expression; + char *name; + int flags; + + if (check_arg_count(args, 2, error)) + return STATUS_ERR; + name_expression = get_arg(args, 0, error); + if (check_type(name_expression, EXPR_STRING, error)) + return STATUS_ERR; + name = name_expression->value.string; + if (s32_arg(args, 1, &flags, error)) + return STATUS_ERR; + + begin_syscall(state, syscall); + + result = open(name, flags); + + if (end_syscall(state, syscall, CHECK_FD, result, error)) + return STATUS_ERR; + + if (result >= 0) { + live_fd = result; + if (get_s32(syscall->result, &script_fd, error)) + return STATUS_ERR; + if (run_syscall_open(state, script_fd, live_fd, error)) + return STATUS_ERR; + } + + return STATUS_OK; +} + +static int syscall_sendfile(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int live_outfd, script_outfd; + int live_infd, script_infd; + s64 script_offset = 0; + off_t live_offset; + int count, result; + int status = STATUS_ERR; + + if (check_arg_count(args, 4, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &script_outfd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_outfd, &live_outfd, error)) + return STATUS_ERR; + if (s32_arg(args, 1, &script_infd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_infd, &live_infd, error)) + return STATUS_ERR; + if (s64_bracketed_arg(args, 2, &script_offset, error)) + return STATUS_ERR; + if (s32_arg(args, 3, &count, error)) + return STATUS_ERR; + + live_offset = script_offset; + + begin_syscall(state, syscall); + + result = sendfile(live_outfd, live_infd, &live_offset, count); + + status = end_syscall(state, syscall, CHECK_EXACT, result, error); + + return status; +} + +/* Translate epoll_event expression into epoll_event data structure + * epoll_data specifies the type of epoll_event->data + */ +static int get_epoll_event_from_expr(struct state *state, + struct expression *epollev, + struct epoll_event *event, + enum epoll_data_type_t *epoll_data, + int script_fd, + int live_fd, + char **error) +{ + struct epollev_expr *epollev_expr = NULL; + + if (epollev == NULL) + return STATUS_ERR; + if (check_type(epollev, EXPR_EPOLLEV, error)) + return STATUS_ERR; + epollev_expr = epollev->value.epollev; + if (!epollev_expr) + return STATUS_ERR; + if (check_type(epollev_expr->events, EXPR_INTEGER, error)) + return STATUS_ERR; + event->events = epollev_expr->events->value.num; + if (epollev_expr->ptr) { + if (check_type(epollev_expr->ptr, EXPR_INTEGER, error)) + return STATUS_ERR; + event->data.ptr = (void *)epollev_expr->ptr->value.num; + *epoll_data = EPOLL_DATA_PTR; + } else if (epollev_expr->fd) { + if (check_type(epollev_expr->fd, EXPR_INTEGER, error)) + return STATUS_ERR; + /* script_fd = -1 means we don't have a specific socket fd + * So we find live_fd directly from passed in event->data.fd + */ + if (script_fd == -1) { + script_fd = epollev_expr->fd->value.num; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + } else { + if (epollev_expr->fd->value.num != script_fd) { + asprintf(error, + "wrong fd specified in epoll_event\n"); + return STATUS_ERR; + } + } + event->data.fd = live_fd; + *epoll_data = EPOLL_DATA_FD; + } else if (epollev_expr->u32) { + if (check_type(epollev_expr->u32, EXPR_INTEGER, error)) + return STATUS_ERR; + event->data.u32 = epollev_expr->u32->value.num; + *epoll_data = EPOLL_DATA_U32; + } else if (epollev_expr->u64) { + if (check_type(epollev_expr->u64, EXPR_INTEGER, error)) + return STATUS_ERR; + event->data.u64 = epollev_expr->u64->value.num; + *epoll_data = EPOLL_DATA_U64; + } else { + asprintf(error, "epoll_event specified incorrectly"); + return STATUS_ERR; + } + + return STATUS_OK; +} + +static int syscall_epoll_create(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int size, result, script_fd, live_fd; + if (check_arg_count(args, 1, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &size, error)) + return STATUS_ERR; + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.epoll_create( + state->so_instance->ifc.userdata, + size); + } else { + result = epoll_create(size); + } + + if (end_syscall(state, syscall, CHECK_FD, result, error)) + return STATUS_ERR; + + if (result >= 0) { + live_fd = result; + if (get_s32(syscall->result, &script_fd, error)) + return STATUS_ERR; + if (run_syscall_epoll_create(state, script_fd, live_fd, error)) + return STATUS_ERR; + } + + return STATUS_OK; +} + +static int syscall_epoll_ctl(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int epfd_script, epfd_live, op, script_fd, live_fd, result; + struct expression *epollev = NULL; + struct epoll_event event; + enum epoll_data_type_t epoll_data; + + if (check_arg_count(args, 4, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &epfd_script, error)) + return STATUS_ERR; + if (to_live_fd(state, epfd_script, &epfd_live, error)) + return STATUS_ERR; + if (s32_arg(args, 1, &op, error)) + return STATUS_ERR; + if (s32_arg(args, 2, &script_fd, error)) + return STATUS_ERR; + if (to_live_fd(state, script_fd, &live_fd, error)) + return STATUS_ERR; + epollev = get_arg(args, 3, error); + if (get_epoll_event_from_expr(state, epollev, &event, &epoll_data, + script_fd, live_fd, error)) + return STATUS_ERR; + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.epoll_ctl( + state->so_instance->ifc.userdata, + epfd_live, op, live_fd, &event); + } else { + result = epoll_ctl(epfd_live, op, live_fd, &event); + } + + if (end_syscall(state, syscall, CHECK_EXACT, result, error)) + return STATUS_ERR; + + return STATUS_OK; +} + +static int syscall_epoll_wait(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int epfd_script, epfd_live, maxevents, timeout; + struct expression *epollev = NULL; + struct epoll_event event_script = {0}; + struct epoll_event *event_live; + enum epoll_data_type_t epoll_data; + int status = STATUS_ERR; + int result; + + if (check_arg_count(args, 4, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &epfd_script, error)) + return STATUS_ERR; + if (to_live_fd(state, epfd_script, &epfd_live, error)) + return STATUS_ERR; + epollev = get_arg(args, 1, error); + if (get_epoll_event_from_expr(state, epollev, &event_script, + &epoll_data, -1, -1, error)) + return STATUS_ERR; + if (s32_arg(args, 2, &maxevents, error)) + return STATUS_ERR; + if (s32_arg(args, 3, &timeout, error)) + return STATUS_ERR; + + event_live = calloc(maxevents, sizeof(struct epoll_event)); + if (!event_live) { + asprintf(error, "Failed to calloc %d struct epoll_event\n", + maxevents); + goto error_out; + } + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.epoll_wait( + state->so_instance->ifc.userdata, + epfd_live, event_live, maxevents, timeout); + } else { + result = epoll_wait(epfd_live, event_live, maxevents, timeout); + } + + if (end_syscall(state, syscall, CHECK_EXACT, result, error)) + goto error_out; + + if (event_script.events != event_live->events) { + asprintf(error, + "epoll_event->events does not match script: " + "expected: 0x%x " + "actual: 0x%x\n", + event_script.events, event_live->events); + goto error_out; + } + + switch(epoll_data) { + case EPOLL_DATA_PTR: + if (event_script.data.ptr != event_live->data.ptr) { + asprintf(error, + "epoll_event->data does not match script: " + "expected: %p " + "actual: %p\n", + event_script.data.ptr, + event_live->data.ptr); + goto error_out; + } + break; + case EPOLL_DATA_FD: + if (event_script.data.fd != event_live->data.fd) { + asprintf(error, + "epoll_event->data does not match script: " + "expected: %d " + "actual: %d\n", + event_script.data.fd, + event_live->data.fd); + goto error_out; + } + break; + case EPOLL_DATA_U32: + if (event_script.data.u32 != event_live->data.u32) { + asprintf(error, + "epoll_event->data does not match script: " + "expected: %u " + "actual: %u\n", + event_script.data.u32, + event_live->data.u32); + goto error_out; + } + break; + case EPOLL_DATA_U64: + if (event_script.data.u64 != event_live->data.u64) { + asprintf(error, + "epoll_event->data does not match script: " + "expected: %lu " + "actual: %lu\n", + event_script.data.u64, + event_live->data.u64); + goto error_out; + } + break; + default: + asprintf(error, "wrong event->data type\n"); + goto error_out; + } + + status = STATUS_OK; + +error_out: + free(event_live); + return status; +} + +static int get_pipe_expression(struct state *state, + struct expression *pipe_expr, + int *pipefd_script, + char **error) +{ + struct expression_list *list; + int i = 0; + int list_len; + + if (check_type(pipe_expr, EXPR_LIST, error)) + return STATUS_ERR; + list = pipe_expr->value.list; + list_len = list_length(list); + if (list_len != 2) { + asprintf(error, "%d pipe file descriptors instead of 2\n", + list_len); + return STATUS_ERR; + } + for (i = 0; i < 2; i++) { + if (check_type(list->expression, EXPR_INTEGER, error)) + return STATUS_ERR; + pipefd_script[i] = list->expression->value.num; + list = list->next; + } + + return STATUS_OK; +} + +static int syscall_pipe(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + struct expression *pipe_expr = NULL; + int pipefd_script[2]; + int pipefd_live[2]; + int result; + + if (check_arg_count(args, 1, error)) + return STATUS_ERR; + pipe_expr = get_arg(args, 0, error); + if (pipe_expr == NULL) + return STATUS_ERR; + if (get_pipe_expression(state, pipe_expr, pipefd_script, error)) + return STATUS_ERR; + + begin_syscall(state, syscall); + + if (state->so_instance) { + result = state->so_instance->ifc.pipe( + state->so_instance->ifc.userdata, + pipefd_live); + } else { + result = pipe(pipefd_live); + } + + if (end_syscall(state, syscall, CHECK_EXACT, result, error)) + return STATUS_ERR; + + if (result >= 0) { + if (run_syscall_pipe(state, pipefd_script, pipefd_live, error)) + return STATUS_ERR; + } + + return STATUS_OK; +} + +static int syscall_splice(struct state *state, struct syscall_spec *syscall, + struct expression_list *args, char **error) +{ + int fd_in_script, fd_in_live; + int fd_out_script, fd_out_live; + s64 off_in, off_out; + int len, flags; + int result; + + if (check_arg_count(args, 6, error)) + return STATUS_ERR; + if (s32_arg(args, 0, &fd_in_script, error)) + return STATUS_ERR; + if (to_live_fd(state, fd_in_script, &fd_in_live, error)) + return STATUS_ERR; + if (s64_arg(args, 1, &off_in, error)) + return STATUS_ERR; + if (s32_arg(args, 2, &fd_out_script, error)) + return STATUS_ERR; + if (to_live_fd(state, fd_out_script, &fd_out_live, error)) + return STATUS_ERR; + if (s64_arg(args, 3, &off_out, error)) + return STATUS_ERR; + if (s32_arg(args, 4, &len, error)) + return STATUS_ERR; + if (s32_arg(args, 5, &flags, error)) + return STATUS_ERR; + + if (state->so_instance) { + result = state->so_instance->ifc.splice( + state->so_instance->ifc.userdata, + fd_in_live, (loff_t *) &off_in, + fd_out_live, (loff_t *) &off_out, + len, flags); + } else { + result = splice(fd_in_live, (loff_t *) off_in, fd_out_live, + (loff_t *) off_out, len, flags); + } + if (end_syscall(state, syscall, CHECK_EXACT, result, error)) + return STATUS_ERR; + + return STATUS_OK; +} + +/* A dispatch table with all the system calls that we support... */ +struct system_call_entry { + const char *name; + int (*function) (struct state *state, + struct syscall_spec *syscall, + struct expression_list *args, + char **error); +}; +struct system_call_entry system_call_table[] = { + {"socket", syscall_socket}, + {"bind", syscall_bind}, + {"listen", syscall_listen}, + {"accept", syscall_accept}, + {"connect", syscall_connect}, + {"read", syscall_read}, + {"readv", syscall_readv}, + {"recv", syscall_recv}, + {"recvfrom", syscall_recvfrom}, + {"recvmsg", syscall_recvmsg}, + {"write", syscall_write}, + {"writev", syscall_writev}, + {"send", syscall_send}, + {"sendto", syscall_sendto}, + {"sendmsg", syscall_sendmsg}, + {"fcntl", syscall_fcntl}, + {"ioctl", syscall_ioctl}, + {"close", syscall_close}, + {"shutdown", syscall_shutdown}, + {"getsockopt", syscall_getsockopt}, + {"setsockopt", syscall_setsockopt}, + {"poll", syscall_poll}, + {"cap_set", syscall_cap_set}, + {"open", syscall_open}, + {"sendfile", syscall_sendfile}, + {"epoll_create", syscall_epoll_create}, + {"epoll_ctl", syscall_epoll_ctl}, + {"epoll_wait", syscall_epoll_wait}, + {"pipe", syscall_pipe}, + {"splice", syscall_splice}, +}; + +/* Evaluate the system call arguments and invoke the system call. */ +static void invoke_system_call( + struct state *state, struct event *event, struct syscall_spec *syscall) +{ + DEBUGP("%d: invoke call: %s\n", event->line_number, syscall->name); + + char *error = NULL; + const char *name = syscall->name; + struct expression_list *args = NULL; + int i = 0; + int result = 0; + + /* Wait for the right time before firing off this event. */ + wait_for_event(state); + + /* Find and invoke the handler for this system call. */ + for (i = 0; i < ARRAY_SIZE(system_call_table); ++i) + if (strcmp(name, system_call_table[i].name) == 0) + break; + if (i == ARRAY_SIZE(system_call_table)) { + asprintf(&error, "Unknown system call: '%s'", name); + goto error_out; + } + + /* Evaluate script symbolic expressions to get live numeric args for + * system calls. + */ + if (evaluate_expression_list(syscall->arguments, &args, &error)) + goto error_out; + + /* Run the system call. */ + result = system_call_table[i].function(state, syscall, args, &error); + + free_expression_list(args); + + if (result == STATUS_ERR) + goto error_out; + return; + +error_out: + die("%s:%d: runtime error in %s call: %s\n", + state->config->script_path, event->line_number, + syscall->name, error); + free(error); +} + +/* Wait for the system call thread to go idle. To avoid mystifying + * hangs when scripts specify overlapping time ranges for blocking + * system calls, we limit the duration of our waiting to 1 second. + */ +static int await_idle_thread(struct state *state) +{ + struct timespec end_time = { .tv_sec = 0, .tv_nsec = 0 }; + const int MAX_WAIT_SECS = 1; + while (state->syscalls->state != SYSCALL_IDLE) { + /* On the first time through the loop, calculate end time. */ + if (end_time.tv_sec == 0) { + if (clock_gettime(CLOCK_REALTIME, &end_time) != 0) + die_perror("clock_gettime"); + end_time.tv_sec += MAX_WAIT_SECS; + } + /* Wait for a signal or our timeout end_time to arrive. */ + DEBUGP("main thread: awaiting idle syscall thread\n"); + int status = pthread_cond_timedwait(&state->syscalls->idle, + &state->mutex, &end_time); + if (status == ETIMEDOUT) + return STATUS_ERR; + else if (status != 0) + die_perror("pthread_cond_timedwait"); + } + return STATUS_OK; +} + +static int yield(void) +{ +#if defined(linux) + return pthread_yield(); +#elif defined(__FreeBSD__) || defined(__OpenBSD__) + pthread_yield(); + return 0; +#elif defined(__NetBSD__) + return sched_yield(); +#endif /* defined(__NetBSD__) */ +} + +/* Enqueue the system call for the syscall thread and wake up the thread. */ +static void enqueue_system_call( + struct state *state, struct event *event, struct syscall_spec *syscall) +{ + char *error = NULL; + bool done = false; + + /* Wait if there are back-to-back blocking system calls. */ + if (await_idle_thread(state)) { + asprintf(&error, "blocking system call while another blocking " + "system call is already in progress"); + goto error_out; + } + + /* Enqueue the system call info and wake up the syscall thread. */ + DEBUGP("main thread: signal enqueued\n"); + state->syscalls->state = SYSCALL_ENQUEUED; + if (pthread_cond_signal(&state->syscalls->enqueued) != 0) + die_perror("pthread_cond_signal"); + + /* Wait for the syscall thread to dequeue and start the system call. */ + while (state->syscalls->state == SYSCALL_ENQUEUED) { + DEBUGP("main thread: waiting for dequeued signal; " + "state: %d\n", state->syscalls->state); + if (pthread_cond_wait(&state->syscalls->dequeued, + &state->mutex) != 0) { + die_perror("pthread_cond_wait"); + } + } + + /* Wait for the syscall thread to block or finish the call. */ + while (!done) { + /* Unlock and yield so the system call thread can make + * the system call in a timely fashion. + */ + DEBUGP("main thread: unlocking and yielding\n"); + pid_t thread_id = state->syscalls->thread_id; + run_unlock(state); + if (yield() != 0) + die_perror("yield"); + + DEBUGP("main thread: checking syscall thread state\n"); + if (is_thread_sleeping(getpid(), thread_id)) + done = true; + + /* Grab the lock again and see if the thread is idle. */ + DEBUGP("main thread: locking and reading state\n"); + run_lock(state); + if (state->syscalls->state == SYSCALL_IDLE) + done = true; + } + DEBUGP("main thread: continuing after syscall\n"); + return; + +error_out: + die("%s:%d: runtime error in %s call: %s\n", + state->config->script_path, event->line_number, + syscall->name, error); + free(error); +} + +void run_system_call_event( + struct state *state, struct event *event, struct syscall_spec *syscall) +{ + DEBUGP("%d: system call: %s\n", event->line_number, syscall->name); + + if (is_blocking_syscall(syscall)) + enqueue_system_call(state, event, syscall); + else { + await_idle_thread(state); + invoke_system_call(state, event, syscall); + } +} + +/* The code executed by our system call thread, which executes + * blocking system calls. + */ +static void *system_call_thread(void *arg) +{ + struct state *state = (struct state *)arg; + char *error = NULL; + struct event *event = NULL; + struct syscall_spec *syscall = NULL; + bool done = false; + + DEBUGP("syscall thread: starting and locking\n"); + run_lock(state); + + state->syscalls->thread_id = gettid(); + if (state->syscalls->thread_id < 0) + die_perror("gettid"); + + while (!done) { + DEBUGP("syscall thread: in state %d\n", + state->syscalls->state); + + switch (state->syscalls->state) { + case SYSCALL_IDLE: + DEBUGP("syscall thread: waiting\n"); + if (pthread_cond_wait(&state->syscalls->enqueued, + &state->mutex)) { + die_perror("pthread_cond_wait"); + } + break; + + case SYSCALL_RUNNING: + case SYSCALL_DONE: + assert(0); /* should not be reached */ + break; + + case SYSCALL_ENQUEUED: + DEBUGP("syscall thread: invoking syscall\n"); + /* Remember the syscall event, since below we + * release the global lock and the main thread + * will move on to other, later events. + */ + event = state->event; + syscall = event->event.syscall; + assert(event->type == SYSCALL_EVENT); + state->syscalls->event = event; + state->syscalls->live_end_usecs = -1; + + /* Make the system call. Note that our callees + * here will release the global lock before + * making the actual system call and then + * re-acquire it after the system call returns + * and before returning to us. + */ + invoke_system_call(state, event, syscall); + + /* Check end time for the blocking system call. */ + assert(state->syscalls->live_end_usecs >= 0); + if (verify_time(state, + event->time_type, + syscall->end_usecs, 0, + state->syscalls->live_end_usecs, + "system call return", &error)) { + die("%s:%d: %s\n", + state->config->script_path, + event->line_number, + error); + } + + /* Mark our thread idle and wake the main + * thread if it's waiting for this call to + * finish. + */ + assert(state->syscalls->state == SYSCALL_DONE); + state->syscalls->state = SYSCALL_IDLE; + state->syscalls->event = NULL; + state->syscalls->live_end_usecs = -1; + DEBUGP("syscall thread: now idle\n"); + if (pthread_cond_signal(&state->syscalls->idle) != 0) + die_perror("pthread_cond_signal"); + break; + + case SYSCALL_EXITING: + done = true; + break; + /* omitting default so compiler will catch missing cases */ + } + } + DEBUGP("syscall thread: unlocking and exiting\n"); + run_unlock(state); + + return NULL; +} + +struct syscalls *syscalls_new(struct state *state) +{ + struct syscalls *syscalls = calloc(1, sizeof(struct syscalls)); + + syscalls->state = SYSCALL_IDLE; + + if (pthread_create(&syscalls->thread, NULL, system_call_thread, + state) != 0) { + die_perror("pthread_create"); + } + + if ((pthread_cond_init(&syscalls->idle, NULL) != 0) || + (pthread_cond_init(&syscalls->enqueued, NULL) != 0) || + (pthread_cond_init(&syscalls->dequeued, NULL) != 0)) { + die_perror("pthread_cond_init"); + } + + return syscalls; +} + +void syscalls_free(struct state *state, struct syscalls *syscalls) +{ + /* Wait a bit for the thread to go idle. */ + if (await_idle_thread(state)) { + die("%s:%d: runtime error: exiting while " + "a blocking system call is in progress\n", + state->config->script_path, + syscalls->event->line_number); + } + + /* Send a request to terminate the thread. */ + DEBUGP("main thread: signaling syscall thread to exit\n"); + syscalls->state = SYSCALL_EXITING; + if (pthread_cond_signal(&syscalls->enqueued) != 0) + die_perror("pthread_cond_signal"); + + /* Release the lock briefly and wait for syscall thread to finish. */ + run_unlock(state); + DEBUGP("main thread: unlocking, waiting for syscall thread exit\n"); + void *thread_result = NULL; + if (pthread_join(syscalls->thread, &thread_result) != 0) + die_perror("pthread_cancel"); + DEBUGP("main thread: joined syscall thread; relocking\n"); + run_lock(state); + + if ((pthread_cond_destroy(&syscalls->idle) != 0) || + (pthread_cond_destroy(&syscalls->enqueued) != 0) || + (pthread_cond_destroy(&syscalls->dequeued) != 0)) { + die_perror("pthread_cond_destroy"); + } + + memset(syscalls, 0, sizeof(*syscalls)); /* to help catch bugs */ + free(syscalls); +} -- cgit 1.2.3-korg