aboutsummaryrefslogtreecommitdiffstats
path: root/test/packetdrill/lexer.l
blob: 7d063d37fee581ac865e401c7bc78796133005c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
%{
/*
 * Copyright 2013 Google Inc.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA.
 */
/*
 * Author: ncardwell@google.com (Neal Cardwell)
 *
 * This is the specification for the lexical scanner for the packetdrill
 * script language. It is processed by the flex lexical scanner
 * generator.
 *
 * For full documentation see: http://flex.sourceforge.net/manual/
 *
 * Here is a quick and dirty tutorial on flex:
 *
 * A flex lexical scanner specification is basically a list of rules,
 * where each rule is a regular expressions for a lexical token to
 * match, followed by a C fragment to execute when the scanner sees
 * that pattern.
 *
 * The lexer feeds a stream of terminal symbols up to this parser,
 * passing up a FOO token for each "return FOO" in the lexer spec. The
 * lexer specifies what value to pass up to the parser by setting a
 * yylval.fooval field, where fooval is a field in the %union in the
 * .y file.
 *
 * TODO: detect overflow in numeric literals.
 */

#include "types.h"

#include <netinet/in.h>
#include <stdlib.h>
#include <stdio.h>
#include "script.h"
#include "tcp_options.h"
#include "parse.h"
#include "config.h"

/* This include of the bison-generated .h file must go last so that we
 * can first include all of the declarations on which it depends.
 */
#include "parser.h"

/* Suppress flex's generation of an uncalled static input() function, which
 * leads to a compiler warning:
 *    warning: ‘input’ defined but not used
 */
#define YY_NO_INPUT

/* Copy the string name "foo" after the "--" of a "--foo" option. */
static char *option(const char *s)
{
	const int dash_dash_len = 2;
	return strndup(s + dash_dash_len, strlen(s) - dash_dash_len);
}

/* Copy the string inside a quoted string. */
static char *quoted(const char *s)
{
	const int delim_len = 1;
	return strndup(s + delim_len, strlen(s) - 2*delim_len);
}

/* Check to see if the word in yytext is a user-defined symbol, and if so then
 * return its value. Otherwise return the word itself.
 */
int word(void)
{
	char *word = yytext;
	char *value = NULL;

	/* Look in symbol table for matching user-defined symbol->value map. */
	value = definition_get(in_config->defines, word);
	if (value) {
		if (value[0] == '"') {
			yylval.string = quoted(value);		/* SYM="val" */
			return STRING;
		} else if (value[0] == '`') {
			yylval.string = quoted(value);		/* SYM=`val` */
			return BACK_QUOTED;
		} else {
			yylval.string = strdup(value);		/* SYM=val */
			return WORD;
		}
	}
	/* A literal word (e.g. system call name or socket option name). */
	yylval.string = strdup(word);
	return WORD;
}

/* Copy the code inside a code snippet that is enclosed in %{ }% after
 * first stripping the space and tab characters from either end of the
 * snippet. We strip leading and trailing whitespace for Python users
 * to remain sane, since Python is sensitive to whitespace. To summarize,
 * given an input %{<space><code><space>}% we return: <code>
 */
static char *code(const char *s)
{
	const int delim_len = sizeof("%{")-1;

	const char *start = s + delim_len;
	while ((*start == ' ') || (*start == '\t'))
		++start;

	const char *end = s + (strlen(s) - 1) - delim_len;
	while ((*end == ' ') || (*end == '\t'))
		--end;

	const int code_len = end - start + 1;
	return strndup(start, code_len);
}

/* Convert a hex string prefixed by "0x" to an integer value. */
static s64 hextol(const char *s)
{
	return strtol(yytext + 2, NULL, 16);
}

%}

%{
#define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno;
%}
%option yylineno
%option nounput

/* A regexp for C++ comments: */
cpp_comment	\/\/[^\n]*\n

/* Here is a summary of the regexp for C comments:
 *   open-comment
 *   any number of:
 *     (non-stars) or (star then non-slash)
 *   close comment
 */
c_comment	\/\*(([^*])|(\*[^\/]))*\*\/

/* The regexp for code snippets is analogous to that for C comments.
 * Here is a summary of the regexp for code snippets:
 *   %{
 *   any number of:
 *     (non-}) or (} then non-%)
 *   }%
 */
code		\%\{(([^}])|(\}[^\%]))*\}\%

/* IPv4: a regular experssion for an IPv4 address */
ipv4_addr		[0-9]+[.][0-9]+[.][0-9]+[.][0-9]+

/* IPv6: a regular experssion for an IPv6 address. The complexity is
 * unfortunate, but we can't use a super-simple approach because TCP
 * sequence number ranges like 1:1001 can look like IPv6 addresses if
 * we use a naive approach.
 */
seg	[0-9a-fA-F]{1,4}
v0	[:][:]
v1	({seg}[:]){7,7}{seg}
v2	({seg}[:]){1,7}[:]
v3	({seg}[:]){1,6}[:]{seg}
v4	({seg}[:]){1,5}([:]{seg}){1,2}
v5	({seg}[:]){1,4}([:]{seg}){1,3}
v6	({seg}[:]){1,3}([:]{seg}){1,4}
v7	({seg}[:]){1,2}([:]{seg}){1,5}
v8	{seg}[:](([:]{seg}){1,6})
v9	[:]([:]{seg}){1,7}
/* IPv4-mapped IPv6 address: */
v10	[:][:]ffff[:]{ipv4_addr}
/* IPv4-translated IPv6 address: */
v11	[:][:]ffff[:](0){1,4}[:]{ipv4_addr}
/* IPv4-embedded IPv6 addresses: */
v12	({seg}[:]){1,4}[:]{ipv4_addr}
ipv6_addr ({v0}|{v1}|{v2}|{v3}|{v4}|{v5}|{v6}|{v7}|{v8}|{v9}|{v10}|{v11}|{v12})

%%
sa_family		return SA_FAMILY;
sin_port		return SIN_PORT;
sin_addr		return SIN_ADDR;
msg_name		return MSG_NAME;
msg_iov			return MSG_IOV;
msg_flags		return MSG_FLAGS;
msg_control		return MSG_CONTROL;
cmsg_data		return CMSG_DATA;
cmsg_level		return CMSG_LEVEL;
cmsg_type		return CMSG_TYPE;
ee_errno		return EE_ERRNO;
ee_origin		return EE_ORIGIN;
ee_type			return EE_TYPE;
ee_code			return EE_CODE;
ee_info			return EE_INFO;
ee_data			return EE_DATA;
scm_sec			return SCM_SEC;
scm_nsec		return SCM_NSEC;
fd			return FD;
u32			return U32;
u64			return U64;
ptr			return PTR;
events			return EVENTS;
revents			return REVENTS;
onoff			return ONOFF;
linger			return LINGER;
htons			return _HTONS_;
ipv4			return IPV4;
ipv6			return IPV6;
icmp			return ICMP;
udp			return UDP;
GREv0			return GRE;
gre			return GRE;
raw			return RAW;
sum			return SUM;
off			return OFF;
key			return KEY;
seq			return SEQ;
none			return NONE;
checksum		return CHECKSUM;
sequence#		return SEQUENCE;
present			return PRESENT;
mpls			return MPLS;
label			return LABEL;
tc			return TC;
ttl			return TTL;
inet_addr		return INET_ADDR;
inet6_addr		return INET6_ADDR;
ack			return ACK;
eol			return EOL;
ecr			return ECR;
mss			return MSS;
mtu			return MTU;
nop			return NOP;
sack			return SACK;
sackOK			return SACKOK;
md5			return MD5;
TS			return TIMESTAMP;
FO			return FAST_OPEN;
FOEXP			return FAST_OPEN_EXP;
tos			return TOS;
flowlabel		return FLOWLABEL;
flags			return FLAGS;
Flags			return FLAGS;
val			return VAL;
win			return WIN;
urg			return URG;
wscale			return WSCALE;
ect01			return ECT01;
ect0			return ECT0;
ect1			return ECT1;
noecn			return NO_ECN;
ce			return CE;
id			return ID;
[.][.][.]		return ELLIPSIS;
--[a-zA-Z0-9_]+		yylval.string	= option(yytext); return OPTION;
[-]?[0-9]*[.][0-9]+	yylval.floating	= atof(yytext);   return FLOAT;
[-]?[0-9]+		yylval.integer	= atoll(yytext);  return INTEGER;
0x[0-9a-fA-F]+		yylval.integer	= hextol(yytext); return HEX_INTEGER;
[a-zA-Z0-9_]+		return word();
\"(\\.|[^"])*\"		yylval.string	= quoted(yytext); return STRING;
\`(\\.|[^`])*\`		yylval.string	= quoted(yytext); return BACK_QUOTED;
[^ \t\n]		return (int) yytext[0];
[ \t\n]+		/* ignore whitespace */;
{cpp_comment}		/* ignore C++-style comment */;
{c_comment}		/* ignore C-style comment */;
{code}			yylval.string = code(yytext);   return CODE;
{ipv4_addr}		yylval.string = strdup(yytext); return IPV4_ADDR;
{ipv6_addr}		yylval.string = strdup(yytext); return IPV6_ADDR;
%%