From 78c896b3b3127515478090c19447e27dc406427e Mon Sep 17 00:00:00 2001 From: Jianfeng Tan Date: Mon, 18 Nov 2019 06:59:50 +0000 Subject: TLDKv2 Signed-off-by: Jianfeng Tan Signed-off-by: Jielong Zhou Signed-off-by: Jian Zhang Signed-off-by: Chen Zhao Change-Id: I55c39de4c6cd30f991f35631eb507f770230f08e --- test/packetdrill/parser.y | 1739 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1739 insertions(+) create mode 100644 test/packetdrill/parser.y (limited to 'test/packetdrill/parser.y') diff --git a/test/packetdrill/parser.y b/test/packetdrill/parser.y new file mode 100644 index 0000000..70219bd --- /dev/null +++ b/test/packetdrill/parser.y @@ -0,0 +1,1739 @@ +%{ +/* + * Copyright 2013 Google Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ +/* + * Author: Author: ncardwell@google.com (Neal Cardwell) + * + * This is the parser for the packetdrill script language. It is + * processed by the bison parser generator. + * + * For full documentation see: http://www.gnu.org/software/bison/manual/ + * + * Here is a quick and dirty tutorial on bison: + * + * A bison parser specification is basically a BNF grammar for the + * language you are parsing. Each rule specifies a nonterminal symbol + * on the left-hand side and a sequence of terminal symbols (lexical + * tokens) and or nonterminal symbols on the right-hand side that can + * "reduce" to the symbol on the left hand side. When the parser sees + * the sequence of symbols on the right where it "wants" to see a + * nonterminal on the left, the rule fires, executing the semantic + * action code in curly {} braces as it reduces the right hand side to + * the left hand side. + * + * The semantic action code for a rule produces an output, which it + * can reference using the $$ token. The set of possible types + * returned in output expressions is given in the %union section of + * the .y file. The specific type of the output for a terminal or + * nonterminal symbol (corresponding to a field in the %union) is + * given by the %type directive in the .y file. The action code can + * access the outputs of the symbols on the right hand side by using + * the notation $1 for the first symbol, $2 for the second symbol, and + * so on. + * + * The lexer (generated by flex from lexer.l) feeds a stream of + * terminal symbols up to this parser. Parser semantic actions can + * access the lexer output for a terminal symbol with the same + * notation they use for nonterminals. + * + * Here's an example rule with its semantic action in {} braces: + * + * tcp_option + * ... + * | MSS INTEGER { + * $$ = tcp_option_new(...); + * ... + * $$->data.mss.bytes = htons($2); + * } + * + * This rule basically says: + * + * When the parser wants to see a tcp_option, if it sees an MSS from + * the lexer followed by an INTEGER from the lexer then run the + * action code that (a) stores in the output $$ a pointer to a + * struct tcp_option object, and then (b) stores in that object the + * value of the INTEGER token (accessed with $2). + * + */ + +/* The first part of the .y file consists of C code that bison copies + * directly into the top of the .c file it generates. + */ + +#include "types.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "gre_packet.h" +#include "ip.h" +#include "ip_packet.h" +#include "icmp_packet.h" +#include "logging.h" +#include "mpls.h" +#include "mpls_packet.h" +#include "tcp_packet.h" +#include "udp_packet.h" +#include "parse.h" +#include "script.h" +#include "tcp.h" +#include "tcp_options.h" + +/* This include of the bison-generated .h file must go last so that we + * can first include all of the declarations on which it depends. + */ +#include "parser.h" + +/* Change this YYDEBUG to 1 to get verbose debug output for parsing: */ +#define YYDEBUG 0 +#if YYDEBUG +extern int yydebug; +#endif + +extern FILE *yyin; +extern int yylineno; +extern char *yytext; +extern int yylex(void); +extern int yyparse(void); +extern int yywrap(void); +extern const char *cleanup_cmd; + +/* This mutex guards all parser global variables declared in this file. */ +pthread_mutex_t parser_mutex = PTHREAD_MUTEX_INITIALIZER; + +/* The input to the parser: the path name of the script file to parse. */ +static const char* current_script_path = NULL; + +/* The starting line number of the input script statement that we're + * currently parsing. This may be different than yylineno if bison had + * to look ahead and lexically scan a token on the following line to + * decide that the current statement is done. + */ +static int current_script_line = -1; + +/* + * We uses this object to look up configuration info needed during + * parsing (such as whether packets are IPv4 or IPv6). + */ +struct config *in_config = NULL; + +/* The output of the parser: an output script containing + * 1) a linked list of options + * 2) a linked list of events + */ +static struct script *out_script = NULL; + +/* The test invocation to pass back to parse_and_finalize_config(). */ +struct invocation *invocation; + +/* Copy the script contents into our single linear buffer. */ +void copy_script(const char *script_buffer, struct script *script) +{ + DEBUGP("copy_script\n"); + + free(script->buffer); + script->length = strlen(script_buffer); + script->buffer = strdup(script_buffer); + assert(script->buffer != NULL); + + DEBUGP("copy_script: %d bytes\n", script->length); +} + +/* Read the script file into a single linear buffer. */ +void read_script(const char *script_path, struct script *script) +{ + int size = 0; + + DEBUGP("read_script(%s)\n", script_path); + + while (script->buffer == NULL) { + struct stat script_info; + int fd = -1; + + /* Allocate a buffer big enough for the whole file. */ + if (stat(script_path, &script_info) != 0) + die("parse error: stat() of script file '%s': %s\n", + script_path, strerror(errno)); + + /* Pick a buffer size larger than the file, so we'll + * know if the file grew. + */ + size = max((int)script_info.st_size, size) + 1; + + script->buffer = malloc(size); + assert(script->buffer != NULL); + + /* Read the file into our buffer. */ + fd = open(script_path, O_RDONLY); + if (fd < 0) + die("parse error opening script file '%s': %s\n", + script_path, strerror(errno)); + + script->length = read(fd, script->buffer, size); + if (script->length < 0) + die("parse error reading script file '%s': %s\n", + script_path, strerror(errno)); + + /* If we filled the buffer, then probably another + * process wrote more to the file since our stat call, + * so we should try again. + */ + if (script->length == size) { + free(script->buffer); + script->buffer = NULL; + script->length = 0; + } + + if (close(fd)) + die_perror("close"); + } + DEBUGP("read_script: %d bytes\n", script->length); +} + + +/* The public entry point for the script parser. Parses the + * text script file with the given path name and fills in the script + * object with the parsed representation. + */ +int parse_script(struct config *config, + struct script *script, + struct invocation *callback_invocation) +{ + /* This bison-generated parser is not multi-thread safe, so we + * have a lock to prevent more than one thread using the + * parser at the same time. This is useful in the wire server + * context, where in general we may have more than one test + * thread running at the same time. + */ + if (pthread_mutex_lock(&parser_mutex) != 0) + die_perror("pthread_mutex_lock"); + +#if YYDEBUG + yydebug = 1; +#endif + + /* Now parse the script from our buffer. */ + yyin = fmemopen(script->buffer, script->length, "r"); + if (yyin == NULL) + die_perror("fmemopen: parse error opening script buffer"); + + current_script_path = config->script_path; + in_config = config; + out_script = script; + invocation = callback_invocation; + + /* We have to reset the line number here since the wire server + * can do more than one yyparse(). + */ + yylineno = 1; + + int result = yyparse(); /* invoke bison-generated parser */ + current_script_path = NULL; + + if (fclose(yyin)) + die_perror("fclose: error closing script buffer"); + + /* Unlock parser. */ + if (pthread_mutex_unlock(&parser_mutex) != 0) + die_perror("pthread_mutex_unlock"); + + return result ? STATUS_ERR : STATUS_OK; +} + +/* Bison emits code to call this method when there's a parse-time error. + * We print the line number and the error message. + */ +static void yyerror(const char *message) +{ + fprintf(stderr, "%s:%d: parse error at '%s': %s\n", + current_script_path, yylineno, yytext, message); +} + +/* After we finish parsing each line of a script, we analyze the + * semantics of the line. If we encounter an error then we print the + * error message to stderr and exit with an error. + */ +static void semantic_error(const char* message) +{ + assert(current_script_line >= 0); + die("%s:%d: semantic error: %s\n", + current_script_path, current_script_line, message); +} + +/* This standard callback is invoked by flex when it encounters + * the end of a file. We return 1 to tell flex to return EOF. + */ +int yywrap(void) +{ + return 1; +} + +/* Create and initalize a new expression. */ +static struct expression *new_expression(enum expression_t type) +{ + struct expression *expression = calloc(1, sizeof(struct expression)); + expression->type = type; + return expression; +} + +/* Create and initalize a new integer expression with the given + * literal value and format string. + */ +static struct expression *new_integer_expression(s64 num, const char *format) +{ + struct expression *expression = new_expression(EXPR_INTEGER); + expression->value.num = num; + expression->format = format; + return expression; +} + +/* Create and initalize a new one-element expression_list. */ +static struct expression_list *new_expression_list( + struct expression *expression) +{ + struct expression_list *list; + list = calloc(1, sizeof(struct expression_list)); + list->expression = expression; + list->next = NULL; + return list; +} + +/* Add the expression to the end of the list. */ +static void expression_list_append(struct expression_list *list, + struct expression *expression) +{ + while (list->next != NULL) { + list = list->next; + } + list->next = new_expression_list(expression); +} + +/* Create and initialize a new option. */ +static struct option_list *new_option(char *name, char *value) +{ + struct option_list *opt = calloc(1, sizeof(struct option_list)); + opt->name = name; + opt->value = value; + return opt; +} + +/* Create and initialize a new event. */ +static struct event *new_event(enum event_t type) +{ + struct event *e = calloc(1, sizeof(struct event)); + e->type = type; + e->time_usecs_end = NO_TIME_RANGE; + e->offset_usecs = NO_TIME_RANGE; + return e; +} + +static int parse_hex_byte(const char *hex, u8 *byte) +{ + if (!isxdigit((int)hex[0]) || !isxdigit((int)hex[1])) { + return STATUS_ERR; /* need two hex digits per byte */ + } + char buf[] = { hex[0], hex[1], '\0' }; + char* buf_end = NULL; + u32 byte_value = strtoul(buf, &buf_end, 16); + assert(byte_value <= 0xff); + assert(buf_end == buf + 2); + *byte = byte_value; + return STATUS_OK; +} + +/* Converts a hex string in 'hex' into bytes and stores them in a + * buffer 'buf' of length 'buf_len' bytes; returns number of bytes in + * out_len. Works for hex strings of arbitrary size, such as very long + * TCP Fast Open cookies. + */ +static int parse_hex_string(const char *hex, u8 *buf, int buf_len, + int *out_len) +{ + u8 *out = buf; + u8 *buf_end = buf + buf_len; + while (hex[0] != '\0') { + if (out >= buf_end) { + return STATUS_ERR; /* ran out of output space */ + } + if (parse_hex_byte(hex, out)) + return STATUS_ERR; /* bad character */ + hex += 2; + out += 1; + } + *out_len = out - buf; + assert(*out_len <= buf_len); + return STATUS_OK; +} + +static struct tcp_option *new_tcp_fast_open_option(const char *cookie_string, + char **error, bool exp) +{ + int cookie_string_len = strlen(cookie_string); + if (cookie_string_len & 1) { + asprintf(error, + "TCP fast open cookie has an odd number of digits"); + return NULL; + } + int cookie_bytes = cookie_string_len / 2; /* 2 hex chars per byte */ + int max_bytes = exp ? MAX_TCP_FAST_OPEN_EXP_COOKIE_BYTES : + MAX_TCP_FAST_OPEN_COOKIE_BYTES; + if (cookie_bytes > max_bytes) { + asprintf(error, "TCP fast open cookie too long"); + asprintf(error, "TCP fast open cookie of %d bytes " + "exceeds maximum cookie length of %d bytes", + cookie_bytes, max_bytes); + return NULL; + } + u8 option_bytes = cookie_bytes + (exp ? TCPOLEN_EXP_FASTOPEN_BASE : + TCPOLEN_FASTOPEN_BASE); + struct tcp_option *option; + option = tcp_option_new(exp ? TCPOPT_EXP : TCPOPT_FASTOPEN, + option_bytes); + if (exp) + option->data.fast_open_exp.magic = htons(TCPOPT_FASTOPEN_MAGIC); + + int parsed_bytes = 0; + /* Parse cookie. This should be an ASCII hex string + * representing an even number of bytes (4-16 bytes). But we + * do not enforce this, since we want to allow test cases that + * supply invalid cookies. + */ + if (parse_hex_string(cookie_string, + exp ? option->data.fast_open_exp.cookie : + option->data.fast_open.cookie, + exp ? sizeof(option->data.fast_open_exp.cookie): + sizeof(option->data.fast_open.cookie), + &parsed_bytes)) { + free(option); + asprintf(error, + "TCP fast open cookie '%s' is not a valid hex string", + cookie_string); + return NULL; + } + assert(parsed_bytes == cookie_bytes); + return option; +} + +static struct tcp_option *new_md5_option(const char *digest_string, + char **error) +{ + struct tcp_option *option; + int digest_string_len = strlen(digest_string); + int digest_bytes = digest_string_len / 2; + int parsed_bytes = 0; + + if (digest_bytes > TCP_MD5_DIGEST_LEN) { + asprintf(error, "TCP MD5 digest longer than 16 bytes"); + return NULL; + } + + option = tcp_option_new(TCPOPT_MD5SIG, TCPOLEN_MD5_BASE + digest_bytes); + + /* Parse MD5 digest. This should be an ASCII hex string representing 16 + * bytes. But we allow smaller buffers, since we want to allow test + * cases that supply invalid cookies. + */ + if (parse_hex_string(digest_string, + option->data.md5.digest, + sizeof(option->data.md5.digest), + &parsed_bytes)) { + free(option); + asprintf(error, "TCP MD5 digest is not a valid hex string"); + return NULL; + } + assert(parsed_bytes <= digest_bytes); + return option; +} + +static struct packet *append_gre(struct packet *packet, struct expression *expr) +{ + struct gre *gre = &expr->value.gre; + char *error = NULL; + if (gre_header_append(packet, gre, &error)) + semantic_error(error); + free(expr); + return packet; +} + +%} + +%locations +%expect 3 /* we expect shift/reduce conflicts */ +/* The %union section specifies the set of possible types for values + * for all nonterminal and terminal symbols in the grammar. + */ +%union { + s64 integer; + double floating; + char *string; + char *reserved; + s64 time_usecs; + enum direction_t direction; + enum ip_ecn_t ip_ecn; + struct tos_spec tos_spec; + struct ip_info ip_info; + struct mpls_stack *mpls_stack; + struct mpls mpls_stack_entry; + u16 port; + s32 window; + u16 urg_ptr; + u32 sequence_number; + struct { + int protocol; /* IPPROTO_TCP or IPPROTO_UDP */ + u32 start_sequence; + u16 payload_bytes; + } tcp_sequence_info; + struct option_list *option; + struct event *event; + struct packet *packet; + struct syscall_spec *syscall; + struct command_spec *command; + struct code_spec *code; + struct tcp_option *tcp_option; + struct tcp_options *tcp_options; + struct expression *expression; + struct expression_list *expression_list; + struct errno_spec *errno_info; + struct { + u16 src_port; + u16 dst_port; + } port_info; +} + +/* The specific type of the output for a symbol is given by the %type + * directive. By convention terminal symbols returned from the lexer + * have ALL_CAPS names, and nonterminal symbols have lower_case names. + */ +%token ELLIPSIS +%token SA_FAMILY SIN_PORT SIN_ADDR _HTONS_ INET_ADDR INET6_ADDR +%token MSG_NAME MSG_IOV MSG_FLAGS MSG_CONTROL +%token CMSG_LEVEL CMSG_TYPE CMSG_DATA +%token FD EVENTS REVENTS ONOFF LINGER +%token U32 U64 PTR +%token ACK ECR EOL MSS NOP SACK SACKOK TIMESTAMP VAL WIN WSCALE +%token URG MD5 FAST_OPEN FAST_OPEN_EXP +%token TOS FLAGS FLOWLABEL +%token ECT0 ECT1 CE ECT01 NO_ECN +%token IPV4 IPV6 ICMP UDP RAW GRE MTU ID +%token MPLS LABEL TC TTL +%token OPTION +%token SUM OFF KEY SEQ +%token NONE CHECKSUM SEQUENCE PRESENT +%token EE_ERRNO EE_CODE EE_DATA EE_INFO EE_ORIGIN EE_TYPE +%token SCM_SEC SCM_NSEC +%token FLOAT +%token INTEGER HEX_INTEGER +%token WORD STRING BACK_QUOTED CODE IPV4_ADDR IPV6_ADDR +%type direction +%type ip_info opt_ip_info +%type tos_spec +%type ip_ecn +%type