aboutsummaryrefslogtreecommitdiffstats
path: root/test/packetdrill/lexer.l
diff options
context:
space:
mode:
Diffstat (limited to 'test/packetdrill/lexer.l')
-rw-r--r--test/packetdrill/lexer.l280
1 files changed, 280 insertions, 0 deletions
diff --git a/test/packetdrill/lexer.l b/test/packetdrill/lexer.l
new file mode 100644
index 0000000..7d063d3
--- /dev/null
+++ b/test/packetdrill/lexer.l
@@ -0,0 +1,280 @@
+%{
+/*
+ * Copyright 2013 Google Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+/*
+ * Author: ncardwell@google.com (Neal Cardwell)
+ *
+ * This is the specification for the lexical scanner for the packetdrill
+ * script language. It is processed by the flex lexical scanner
+ * generator.
+ *
+ * For full documentation see: http://flex.sourceforge.net/manual/
+ *
+ * Here is a quick and dirty tutorial on flex:
+ *
+ * A flex lexical scanner specification is basically a list of rules,
+ * where each rule is a regular expressions for a lexical token to
+ * match, followed by a C fragment to execute when the scanner sees
+ * that pattern.
+ *
+ * The lexer feeds a stream of terminal symbols up to this parser,
+ * passing up a FOO token for each "return FOO" in the lexer spec. The
+ * lexer specifies what value to pass up to the parser by setting a
+ * yylval.fooval field, where fooval is a field in the %union in the
+ * .y file.
+ *
+ * TODO: detect overflow in numeric literals.
+ */
+
+#include "types.h"
+
+#include <netinet/in.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "script.h"
+#include "tcp_options.h"
+#include "parse.h"
+#include "config.h"
+
+/* This include of the bison-generated .h file must go last so that we
+ * can first include all of the declarations on which it depends.
+ */
+#include "parser.h"
+
+/* Suppress flex's generation of an uncalled static input() function, which
+ * leads to a compiler warning:
+ * warning: ‘input’ defined but not used
+ */
+#define YY_NO_INPUT
+
+/* Copy the string name "foo" after the "--" of a "--foo" option. */
+static char *option(const char *s)
+{
+ const int dash_dash_len = 2;
+ return strndup(s + dash_dash_len, strlen(s) - dash_dash_len);
+}
+
+/* Copy the string inside a quoted string. */
+static char *quoted(const char *s)
+{
+ const int delim_len = 1;
+ return strndup(s + delim_len, strlen(s) - 2*delim_len);
+}
+
+/* Check to see if the word in yytext is a user-defined symbol, and if so then
+ * return its value. Otherwise return the word itself.
+ */
+int word(void)
+{
+ char *word = yytext;
+ char *value = NULL;
+
+ /* Look in symbol table for matching user-defined symbol->value map. */
+ value = definition_get(in_config->defines, word);
+ if (value) {
+ if (value[0] == '"') {
+ yylval.string = quoted(value); /* SYM="val" */
+ return STRING;
+ } else if (value[0] == '`') {
+ yylval.string = quoted(value); /* SYM=`val` */
+ return BACK_QUOTED;
+ } else {
+ yylval.string = strdup(value); /* SYM=val */
+ return WORD;
+ }
+ }
+ /* A literal word (e.g. system call name or socket option name). */
+ yylval.string = strdup(word);
+ return WORD;
+}
+
+/* Copy the code inside a code snippet that is enclosed in %{ }% after
+ * first stripping the space and tab characters from either end of the
+ * snippet. We strip leading and trailing whitespace for Python users
+ * to remain sane, since Python is sensitive to whitespace. To summarize,
+ * given an input %{<space><code><space>}% we return: <code>
+ */
+static char *code(const char *s)
+{
+ const int delim_len = sizeof("%{")-1;
+
+ const char *start = s + delim_len;
+ while ((*start == ' ') || (*start == '\t'))
+ ++start;
+
+ const char *end = s + (strlen(s) - 1) - delim_len;
+ while ((*end == ' ') || (*end == '\t'))
+ --end;
+
+ const int code_len = end - start + 1;
+ return strndup(start, code_len);
+}
+
+/* Convert a hex string prefixed by "0x" to an integer value. */
+static s64 hextol(const char *s)
+{
+ return strtol(yytext + 2, NULL, 16);
+}
+
+%}
+
+%{
+#define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno;
+%}
+%option yylineno
+%option nounput
+
+/* A regexp for C++ comments: */
+cpp_comment \/\/[^\n]*\n
+
+/* Here is a summary of the regexp for C comments:
+ * open-comment
+ * any number of:
+ * (non-stars) or (star then non-slash)
+ * close comment
+ */
+c_comment \/\*(([^*])|(\*[^\/]))*\*\/
+
+/* The regexp for code snippets is analogous to that for C comments.
+ * Here is a summary of the regexp for code snippets:
+ * %{
+ * any number of:
+ * (non-}) or (} then non-%)
+ * }%
+ */
+code \%\{(([^}])|(\}[^\%]))*\}\%
+
+/* IPv4: a regular experssion for an IPv4 address */
+ipv4_addr [0-9]+[.][0-9]+[.][0-9]+[.][0-9]+
+
+/* IPv6: a regular experssion for an IPv6 address. The complexity is
+ * unfortunate, but we can't use a super-simple approach because TCP
+ * sequence number ranges like 1:1001 can look like IPv6 addresses if
+ * we use a naive approach.
+ */
+seg [0-9a-fA-F]{1,4}
+v0 [:][:]
+v1 ({seg}[:]){7,7}{seg}
+v2 ({seg}[:]){1,7}[:]
+v3 ({seg}[:]){1,6}[:]{seg}
+v4 ({seg}[:]){1,5}([:]{seg}){1,2}
+v5 ({seg}[:]){1,4}([:]{seg}){1,3}
+v6 ({seg}[:]){1,3}([:]{seg}){1,4}
+v7 ({seg}[:]){1,2}([:]{seg}){1,5}
+v8 {seg}[:](([:]{seg}){1,6})
+v9 [:]([:]{seg}){1,7}
+/* IPv4-mapped IPv6 address: */
+v10 [:][:]ffff[:]{ipv4_addr}
+/* IPv4-translated IPv6 address: */
+v11 [:][:]ffff[:](0){1,4}[:]{ipv4_addr}
+/* IPv4-embedded IPv6 addresses: */
+v12 ({seg}[:]){1,4}[:]{ipv4_addr}
+ipv6_addr ({v0}|{v1}|{v2}|{v3}|{v4}|{v5}|{v6}|{v7}|{v8}|{v9}|{v10}|{v11}|{v12})
+
+%%
+sa_family return SA_FAMILY;
+sin_port return SIN_PORT;
+sin_addr return SIN_ADDR;
+msg_name return MSG_NAME;
+msg_iov return MSG_IOV;
+msg_flags return MSG_FLAGS;
+msg_control return MSG_CONTROL;
+cmsg_data return CMSG_DATA;
+cmsg_level return CMSG_LEVEL;
+cmsg_type return CMSG_TYPE;
+ee_errno return EE_ERRNO;
+ee_origin return EE_ORIGIN;
+ee_type return EE_TYPE;
+ee_code return EE_CODE;
+ee_info return EE_INFO;
+ee_data return EE_DATA;
+scm_sec return SCM_SEC;
+scm_nsec return SCM_NSEC;
+fd return FD;
+u32 return U32;
+u64 return U64;
+ptr return PTR;
+events return EVENTS;
+revents return REVENTS;
+onoff return ONOFF;
+linger return LINGER;
+htons return _HTONS_;
+ipv4 return IPV4;
+ipv6 return IPV6;
+icmp return ICMP;
+udp return UDP;
+GREv0 return GRE;
+gre return GRE;
+raw return RAW;
+sum return SUM;
+off return OFF;
+key return KEY;
+seq return SEQ;
+none return NONE;
+checksum return CHECKSUM;
+sequence# return SEQUENCE;
+present return PRESENT;
+mpls return MPLS;
+label return LABEL;
+tc return TC;
+ttl return TTL;
+inet_addr return INET_ADDR;
+inet6_addr return INET6_ADDR;
+ack return ACK;
+eol return EOL;
+ecr return ECR;
+mss return MSS;
+mtu return MTU;
+nop return NOP;
+sack return SACK;
+sackOK return SACKOK;
+md5 return MD5;
+TS return TIMESTAMP;
+FO return FAST_OPEN;
+FOEXP return FAST_OPEN_EXP;
+tos return TOS;
+flowlabel return FLOWLABEL;
+flags return FLAGS;
+Flags return FLAGS;
+val return VAL;
+win return WIN;
+urg return URG;
+wscale return WSCALE;
+ect01 return ECT01;
+ect0 return ECT0;
+ect1 return ECT1;
+noecn return NO_ECN;
+ce return CE;
+id return ID;
+[.][.][.] return ELLIPSIS;
+--[a-zA-Z0-9_]+ yylval.string = option(yytext); return OPTION;
+[-]?[0-9]*[.][0-9]+ yylval.floating = atof(yytext); return FLOAT;
+[-]?[0-9]+ yylval.integer = atoll(yytext); return INTEGER;
+0x[0-9a-fA-F]+ yylval.integer = hextol(yytext); return HEX_INTEGER;
+[a-zA-Z0-9_]+ return word();
+\"(\\.|[^"])*\" yylval.string = quoted(yytext); return STRING;
+\`(\\.|[^`])*\` yylval.string = quoted(yytext); return BACK_QUOTED;
+[^ \t\n] return (int) yytext[0];
+[ \t\n]+ /* ignore whitespace */;
+{cpp_comment} /* ignore C++-style comment */;
+{c_comment} /* ignore C-style comment */;
+{code} yylval.string = code(yytext); return CODE;
+{ipv4_addr} yylval.string = strdup(yytext); return IPV4_ADDR;
+{ipv6_addr} yylval.string = strdup(yytext); return IPV6_ADDR;
+%%