3 * Copyright 2013 Google Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 * Author: ncardwell@google.com (Neal Cardwell)
23 * This is the specification for the lexical scanner for the packetdrill
24 * script language. It is processed by the flex lexical scanner
27 * For full documentation see: http://flex.sourceforge.net/manual/
29 * Here is a quick and dirty tutorial on flex:
31 * A flex lexical scanner specification is basically a list of rules,
32 * where each rule is a regular expressions for a lexical token to
33 * match, followed by a C fragment to execute when the scanner sees
36 * The lexer feeds a stream of terminal symbols up to this parser,
37 * passing up a FOO token for each "return FOO" in the lexer spec. The
38 * lexer specifies what value to pass up to the parser by setting a
39 * yylval.fooval field, where fooval is a field in the %union in the
42 * TODO: detect overflow in numeric literals.
47 #include <netinet/in.h>
51 #include "tcp_options.h"
55 /* This include of the bison-generated .h file must go last so that we
56 * can first include all of the declarations on which it depends.
60 /* Suppress flex's generation of an uncalled static input() function, which
61 * leads to a compiler warning:
62 * warning: ‘input’ defined but not used
66 /* Copy the string name "foo" after the "--" of a "--foo" option. */
67 static char *option(const char *s)
69 const int dash_dash_len = 2;
70 return strndup(s + dash_dash_len, strlen(s) - dash_dash_len);
73 /* Copy the string inside a quoted string. */
74 static char *quoted(const char *s)
76 const int delim_len = 1;
77 return strndup(s + delim_len, strlen(s) - 2*delim_len);
80 /* Check to see if the word in yytext is a user-defined symbol, and if so then
81 * return its value. Otherwise return the word itself.
88 /* Look in symbol table for matching user-defined symbol->value map. */
89 value = definition_get(in_config->defines, word);
91 if (value[0] == '"') {
92 yylval.string = quoted(value); /* SYM="val" */
94 } else if (value[0] == '`') {
95 yylval.string = quoted(value); /* SYM=`val` */
98 yylval.string = strdup(value); /* SYM=val */
102 /* A literal word (e.g. system call name or socket option name). */
103 yylval.string = strdup(word);
107 /* Copy the code inside a code snippet that is enclosed in %{ }% after
108 * first stripping the space and tab characters from either end of the
109 * snippet. We strip leading and trailing whitespace for Python users
110 * to remain sane, since Python is sensitive to whitespace. To summarize,
111 * given an input %{<space><code><space>}% we return: <code>
113 static char *code(const char *s)
115 const int delim_len = sizeof("%{")-1;
117 const char *start = s + delim_len;
118 while ((*start == ' ') || (*start == '\t'))
121 const char *end = s + (strlen(s) - 1) - delim_len;
122 while ((*end == ' ') || (*end == '\t'))
125 const int code_len = end - start + 1;
126 return strndup(start, code_len);
129 /* Convert a hex string prefixed by "0x" to an integer value. */
130 static s64 hextol(const char *s)
132 return strtol(yytext + 2, NULL, 16);
138 #define YY_USER_ACTION yylloc.first_line = yylloc.last_line = yylineno;
143 /* A regexp for C++ comments: */
144 cpp_comment \/\/[^\n]*\n
146 /* Here is a summary of the regexp for C comments:
149 * (non-stars) or (star then non-slash)
152 c_comment \/\*(([^*])|(\*[^\/]))*\*\/
154 /* The regexp for code snippets is analogous to that for C comments.
155 * Here is a summary of the regexp for code snippets:
158 * (non-}) or (} then non-%)
161 code \%\{(([^}])|(\}[^\%]))*\}\%
163 /* IPv4: a regular experssion for an IPv4 address */
164 ipv4_addr [0-9]+[.][0-9]+[.][0-9]+[.][0-9]+
166 /* IPv6: a regular experssion for an IPv6 address. The complexity is
167 * unfortunate, but we can't use a super-simple approach because TCP
168 * sequence number ranges like 1:1001 can look like IPv6 addresses if
169 * we use a naive approach.
173 v1 ({seg}[:]){7,7}{seg}
174 v2 ({seg}[:]){1,7}[:]
175 v3 ({seg}[:]){1,6}[:]{seg}
176 v4 ({seg}[:]){1,5}([:]{seg}){1,2}
177 v5 ({seg}[:]){1,4}([:]{seg}){1,3}
178 v6 ({seg}[:]){1,3}([:]{seg}){1,4}
179 v7 ({seg}[:]){1,2}([:]{seg}){1,5}
180 v8 {seg}[:](([:]{seg}){1,6})
181 v9 [:]([:]{seg}){1,7}
182 /* IPv4-mapped IPv6 address: */
183 v10 [:][:]ffff[:]{ipv4_addr}
184 /* IPv4-translated IPv6 address: */
185 v11 [:][:]ffff[:](0){1,4}[:]{ipv4_addr}
186 /* IPv4-embedded IPv6 addresses: */
187 v12 ({seg}[:]){1,4}[:]{ipv4_addr}
188 ipv6_addr ({v0}|{v1}|{v2}|{v3}|{v4}|{v5}|{v6}|{v7}|{v8}|{v9}|{v10}|{v11}|{v12})
191 sa_family return SA_FAMILY;
192 sin_port return SIN_PORT;
193 sin_addr return SIN_ADDR;
194 msg_name return MSG_NAME;
195 msg_iov return MSG_IOV;
196 msg_flags return MSG_FLAGS;
197 msg_control return MSG_CONTROL;
198 cmsg_data return CMSG_DATA;
199 cmsg_level return CMSG_LEVEL;
200 cmsg_type return CMSG_TYPE;
201 ee_errno return EE_ERRNO;
202 ee_origin return EE_ORIGIN;
203 ee_type return EE_TYPE;
204 ee_code return EE_CODE;
205 ee_info return EE_INFO;
206 ee_data return EE_DATA;
207 scm_sec return SCM_SEC;
208 scm_nsec return SCM_NSEC;
213 events return EVENTS;
214 revents return REVENTS;
216 linger return LINGER;
217 htons return _HTONS_;
230 checksum return CHECKSUM;
231 sequence# return SEQUENCE;
232 present return PRESENT;
237 inet_addr return INET_ADDR;
238 inet6_addr return INET6_ADDR;
246 sackOK return SACKOK;
250 FOEXP return FAST_OPEN_EXP;
252 flowlabel return FLOWLABEL;
258 wscale return WSCALE;
265 [.][.][.] return ELLIPSIS;
266 --[a-zA-Z0-9_]+ yylval.string = option(yytext); return OPTION;
267 [-]?[0-9]*[.][0-9]+ yylval.floating = atof(yytext); return FLOAT;
268 [-]?[0-9]+ yylval.integer = atoll(yytext); return INTEGER;
269 0x[0-9a-fA-F]+ yylval.integer = hextol(yytext); return HEX_INTEGER;
270 [a-zA-Z0-9_]+ return word();
271 \"(\\.|[^"])*\" yylval.string = quoted(yytext); return STRING;
272 \`(\\.|[^`])*\` yylval.string = quoted(yytext); return BACK_QUOTED;
273 [^ \t\n] return (int) yytext[0];
274 [ \t\n]+ /* ignore whitespace */;
275 {cpp_comment} /* ignore C++-style comment */;
276 {c_comment} /* ignore C-style comment */;
277 {code} yylval.string = code(yytext); return CODE;
278 {ipv4_addr} yylval.string = strdup(yytext); return IPV4_ADDR;
279 {ipv6_addr} yylval.string = strdup(yytext); return IPV6_ADDR;