2 *------------------------------------------------------------------
3 * lex.c - API generator lexical analyzer
5 * Copyright (c) 1996-2009 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
31 FILE *ifp, *ofp, *pythonfp, *jsonfp;
32 char *vlib_app_name = "vpp";
36 char *current_filename;
37 int current_filename_allocated;
38 unsigned long input_crc;
39 unsigned long message_crc;
43 * lexer variable definitions
46 static const char *version = "0.1";
47 static int the_lexer_linenumber = 1;
48 static enum lex_state the_lexer_state = START_STATE;
53 static void usage (char *);
54 static int name_check (const char *, YYSTYPE *);
55 static int name_compare (const char *, const char *);
57 extern YYSTYPE yylval;
59 unsigned int crc32c_table[256] = {
60 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
61 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
62 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
63 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
64 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
65 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
66 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
67 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
68 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
69 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
70 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
71 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
72 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
73 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
74 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
75 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
76 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
77 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
78 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
79 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
80 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
81 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
82 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
83 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
84 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
85 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
86 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
87 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
88 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
89 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
90 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
91 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
92 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
93 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
94 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
95 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
96 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
97 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
98 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
99 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
100 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
101 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
102 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
103 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
104 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
105 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
106 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
107 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
108 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
109 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
110 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
111 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
112 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
113 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
114 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
115 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
116 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
117 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
118 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
119 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
120 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
121 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
122 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
123 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
126 static inline unsigned long CRC8 (unsigned long crc,
129 return ((crc >> 8) ^ crc32c_table[(crc ^ d) & 0xFF]);
131 static inline unsigned long CRC16 (unsigned long crc,
134 crc = CRC8 (crc, d & 0xff);
136 crc = CRC8 (crc, d & 0xff);
142 crc_eliding_c_comments (const char *buf, unsigned long crc)
147 cSBACKSLASH, /* "...\ */
149 cCBACKSLASH, /* '...\ */
151 cSLASH_SLASH, /* //... */
152 cSLASH_STAR, /* / *... */
157 unsigned char c = *p++;
164 case cSTRING: case cSBACKSLASH:
165 case cCHAR: case cCBACKSLASH:
166 case cSLASH: case cSLASH_SLASH: case cSLASH_STAR: case cSTAR:
167 fprintf (stderr, "Inopportune EOF: %s\n", buf);
173 case cOTHER: ss = cSTRING; break; /* start string */
174 case cSTRING: ss = cOTHER; break; /* end string */
175 case cSBACKSLASH: ss = cSTRING; break;
177 case cCBACKSLASH: ss = cCHAR; break;
178 case cSLASH: crc = CRC8 (crc, '/'); ss = cOTHER; break;
179 case cSLASH_SLASH: continue; /* in comment */
180 case cSLASH_STAR: continue; /* in comment */
181 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
187 case cSTRING: ss = cSBACKSLASH; break;
188 case cSBACKSLASH: ss = cSTRING; break;
189 case cCHAR: ss = cCBACKSLASH; break;
190 case cCBACKSLASH: ss = cCHAR; break;
191 case cSLASH: crc = CRC8 (crc, '/'); ; ss = cOTHER; break;
192 case cSLASH_SLASH: continue; /* in comment */
193 case cSLASH_STAR: continue; /* in comment */
194 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
199 case cOTHER: ss = cSLASH; continue; /* potential comment */
201 case cSBACKSLASH: ss = cSTRING; break;
203 case cCBACKSLASH: ss = cCHAR; break;
204 case cSLASH: ss = cSLASH_SLASH; continue; /* start comment */
205 case cSLASH_SLASH: continue; /* in comment */
206 case cSLASH_STAR: continue; /* in comment */
207 case cSTAR: ss = cOTHER; continue; /* end of comment */
214 case cSBACKSLASH: ss = cSTRING; break;
216 case cCBACKSLASH: ss = cCHAR; break;
217 case cSLASH: ss = cSLASH_STAR; continue; /* start comment */
218 case cSLASH_SLASH: continue; /* in comment */
219 case cSLASH_STAR: ss = cSTAR; continue; /* potential end */
220 case cSTAR: continue; /* still potential end of comment */
223 case '\n': case '\r': case ' ': case '\t': case '\014':
225 case cOTHER: continue; /* ignore all whitespace */
227 case cSBACKSLASH: ss = cSTRING; break;
229 case cCBACKSLASH: ss = cCHAR; break;
230 case cSLASH: c = '/'; ss = cOTHER; break;
232 if (c == '\n' || c == '\r') ss = cOTHER; /* end comment */
234 case cSLASH_STAR: continue; /* in comment */
235 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
241 case cSBACKSLASH: ss = cSTRING; break;
243 case cCBACKSLASH: ss = cCHAR; break;
244 case cSLASH: crc = CRC8 (crc, '/'); ss = cOTHER; break;
245 case cSLASH_SLASH: continue; /* in comment */
246 case cSLASH_STAR: continue; /* in comment */
247 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
257 int main (int argc, char **argv)
265 while (curarg < argc) {
266 if (!strncmp (argv [curarg], "--verbose", 3)) {
267 fprintf (stderr, "%s version %s\n", argv [0], version);
272 if (!strncmp (argv [curarg], "--yydebug", 3)) {
278 if (!strncmp (argv [curarg], "--dump", 3)) {
284 if (!strncmp (argv[curarg], "--show-name", 3)) {
287 show_name = argv[curarg];
291 fprintf(stderr, "Missing filename after --show-name \n");
296 if (!strncmp (argv [curarg], "--input", 3)) {
299 input_filename = argv[curarg];
300 if (!strcmp (argv [curarg], "-"))
303 ifp = fopen (argv [curarg], "r");
305 fprintf (stderr, "Couldn't open input file %s\n",
311 fprintf(stderr, "Missing filename after --input\n");
316 if (!strncmp (argv [curarg], "--output", 3)) {
319 ofp = fopen (argv[curarg], "w");
321 fprintf (stderr, "Couldn't open output file %s\n",
325 ofile = argv[curarg];
328 fprintf(stderr, "Missing filename after --output\n");
333 if (!strncmp (argv [curarg], "--python", 8)) {
336 if (!strcmp(argv[curarg], "-")) {
339 pythonfp = fopen(argv[curarg], "w");
340 pythonfile = argv[curarg];
342 if (pythonfp == NULL) {
343 fprintf (stderr, "Couldn't open python output file %s\n",
349 fprintf(stderr, "Missing filename after --python\n");
354 if (!strncmp (argv [curarg], "--json", 6)) {
357 if (!strcmp(argv[curarg], "-")) {
360 jsonfp = fopen(argv[curarg], "w");
361 jsonfile = argv[curarg];
363 if (jsonfp == NULL) {
364 fprintf (stderr, "Couldn't open JSON output file %s\n",
370 fprintf(stderr, "Missing filename after --json\n");
375 if (!strncmp (argv [curarg], "--app", 4)) {
378 vlib_app_name = argv[curarg];
381 fprintf(stderr, "Missing app name after --app\n");
393 if (pythonfp == NULL) {
396 if (jsonfp == NULL) {
400 fprintf(stderr, "No input file specified...\n");
404 input_filename = show_name;
407 starttime = time (0);
409 if (yyparse() == 0) {
413 printf ("Output written to %s\n", ofile);
417 printf ("Python bindings written to %s\n", pythonfile);
421 printf ("JSON bindings written to %s\n", jsonfile);
430 printf ("Removing %s\n", ofile);
434 printf ("Removing %s\n", pythonfile);
438 printf ("Removing %s\n", jsonfile);
449 static void usage (char *progname)
452 "usage: %s --input <filename> [--output <filename>] "
453 "[--json <filename>] [--python <filename>]\n%s",
455 " [--yydebug] [--dump-tree]\n");
462 void yyerror (char *s)
464 fprintf (stderr, "%s:%d %s\n", current_filename, the_lexer_linenumber, s);
467 static char namebuf [MAXNAME];
470 getc_char (FILE *ifp)
472 return ((char)(getc(ifp) & 0x7f));
476 * yylex (well, yylex_1: The real yylex below does crc-hackery)
478 static int yylex_1 (void)
482 enum { LP_INITIAL_WHITESPACE, LP_LINE_NUMBER,
483 LP_PRE_FILENAME_WHITESPACE, LP_FILENAME,
486 } lp_substate = LP_INITIAL_WHITESPACE;
489 switch (the_lexer_state) {
491 * START state -- looking for something interesting
500 the_lexer_linenumber++;
504 the_lexer_state = LINE_PRAGMA_STATE;
505 lp_substate = LP_INITIAL_WHITESPACE;
539 the_lexer_state = STRING_STATE;
544 the_lexer_state = HELPER_STATE;
553 the_lexer_state = CPP_COMMENT_STATE;
555 } else if (c == '*') {
556 the_lexer_state = C_COMMENT_STATE;
559 fprintf (stderr, "unknown token /%c at line %d\n",
560 c, the_lexer_linenumber);
569 /* Note fallthrough... */
572 if (isalpha (c) || c == '_') {
575 the_lexer_state = NAME_STATE;
577 } else if (isdigit(c)) {
580 the_lexer_state = NUMBER_STATE;
584 fprintf (stderr, "unknown token %c at line %d\n",
585 c, the_lexer_linenumber);
590 * NAME state -- eat the rest of a name
597 if (!isalnum (c) && c != '_') {
599 namebuf [nameidx] = 0;
600 the_lexer_state = START_STATE;
601 return (name_check (namebuf, &yylval));
603 if (nameidx >= (MAXNAME-1)) {
604 fprintf(stderr, "lex input buffer overflow...\n");
607 namebuf [nameidx++] = c;
611 * NUMBER state -- eat the rest of a number
620 namebuf [nameidx] = 0;
621 the_lexer_state = START_STATE;
622 yylval = (void *) atol(namebuf);
625 if (nameidx >= (MAXNAME-1)) {
626 fprintf(stderr, "lex input buffer overflow...\n");
629 namebuf [nameidx++] = c;
633 * C_COMMENT state -- eat a peach
635 case C_COMMENT_STATE:
644 the_lexer_state = START_STATE;
649 the_lexer_linenumber++;
653 * CPP_COMMENT state -- eat a plum
656 case CPP_COMMENT_STATE:
661 the_lexer_linenumber++;
662 the_lexer_state = START_STATE;
676 namebuf[nameidx++] = c;
680 namebuf[nameidx] = 0;
681 yylval = (YYSTYPE) sxerox (namebuf);
682 the_lexer_state = START_STATE;
687 the_lexer_linenumber++;
689 if (nameidx >= (MAXNAME-1)) {
690 fprintf(stderr, "lex input buffer overflow...\n");
693 namebuf[nameidx++] = c;
707 namebuf[nameidx] = c;
711 namebuf[nameidx] = 0;
712 yylval = (YYSTYPE) sxerox (namebuf);
713 the_lexer_state = START_STATE;
714 return (HELPER_STRING);
718 the_lexer_linenumber++;
721 * CPP makes it approximately impossible to
722 * type "#define FOO 123", so we provide a
723 * lexical trick to achieve that result
729 if (nameidx >= (MAXNAME-1)) {
730 fprintf(stderr, "lex input buffer overflow...\n");
733 namebuf[nameidx++] = c;
738 case LINE_PRAGMA_STATE:
739 /* We're only interested in lines of the form # 259 "foo.c" 17 */
741 switch (lp_substate) {
743 case LP_INITIAL_WHITESPACE: /* no number seen yet */
747 if (c >= '0' && c <= '9') {
748 namebuf[nameidx++] = c;
749 lp_substate = LP_LINE_NUMBER;
750 } else if (c == '\n') {
752 } else if (c != ' ' && c != '\t') {
755 lp_substate = LP_OTHER;
759 case LP_LINE_NUMBER: /* eating linenumber */
763 if (c >= '0' && c <= '9') {
764 namebuf[nameidx++] = c;
765 } else if (c == ' ' || c == '\t') {
766 namebuf[nameidx++] = 0;
767 the_lexer_linenumber = atol(namebuf);
768 lp_substate = LP_PRE_FILENAME_WHITESPACE;
769 } else if (c == '\n') {
772 lp_substate = LP_OTHER;
776 case LP_PRE_FILENAME_WHITESPACE: /* awaiting filename */
782 lp_substate = LP_FILENAME;
784 } else if (c == ' ' || c == '\t') {
786 } else if (c == '\n') {
789 lp_substate = LP_OTHER;
793 case LP_FILENAME: /* eating filename */
799 lp_substate = LP_POST_FILENAME;
800 namebuf[nameidx] = 0;
801 } else if (c == '\n') {
802 goto lp_end_of_line; /* syntax error... */
804 namebuf[nameidx++] = c;
808 case LP_POST_FILENAME: /* ignoring rest of line */
815 if (lp_substate == LP_POST_FILENAME) {
816 if (current_filename_allocated) {
817 current_filename_allocated = 0;
818 free(current_filename);
821 if (!strcmp(namebuf, "<stdin>")) {
822 current_filename = input_filename;
824 current_filename = sxerox(namebuf);
825 current_filename_allocated = 1;
829 the_lexer_state = START_STATE;
836 fprintf (stderr, "LEXER BUG!\n");
843 * Parse a token and side-effect input_crc
844 * in a whitespace- and comment-insensitive fashion.
849 * Accumulate a crc32-based signature while processing the
850 * input file. The goal is to come up with a magic number
851 * which changes precisely when the original input file changes
852 * but which ignores whitespace changes.
854 unsigned long crc = input_crc;
855 int node_type = yylex_1 ();
856 unsigned long crc2 = message_crc;
857 int use_helper_string = 0;
866 use_helper_string = 1;
869 /* Other node types have no "substate" */
870 /* This code is written in this curious fashion because we
871 * want the generated CRC to be independent of the particular
872 * values a particular version of lex/bison assigned to various states.
875 case RPAR: code = 258; break;
876 case LPAR: code = 259; break;
877 case SEMI: code = 260; break;
878 case LBRACK: code = 261; break;
879 case RBRACK: code = 262; break;
880 case BARF: code = 265; break;
881 case TPACKED: code = 266; break;
882 case DEFINE: code = 267; break;
883 case LCURLY: code = 268; break;
884 case RCURLY: code = 269; break;
885 case UNION: code = 271; break;
886 case COMMA: code = 273; break;
887 case NOVERSION: code = 274; break;
888 case MANUAL_PRINT: code = 275; break;
889 case MANUAL_ENDIAN: code = 276; break;
890 case TYPEONLY: code = 278; break;
891 case DONT_TRACE: code = 279; break;
893 case EOF: code = ~0; break; /* hysterical compatibility */
896 fprintf(stderr, "yylex: node_type %d missing state CRC cookie\n",
901 if (use_helper_string)
903 /* We know these types accumulated token text into namebuf */
904 /* HELPER_STRING may still contain C comments. Argh. */
905 crc = crc_eliding_c_comments (namebuf, crc);
906 crc2 = crc_eliding_c_comments (namebuf, crc2);
909 crc = CRC16 (crc, code);
910 crc2 = CRC16 (crc2, code);
919 * name_check -- see if the name we just ate
920 * matches a known keyword. If so, set yylval
921 * to a new instance of <subclass of node>, and return PARSER_MACRO
923 * Otherwise, set yylval to sxerox (s) and return NAME
926 static struct keytab {
928 enum node_subclass subclass_id;
930 /* Keep the table sorted, binary search used below! */
932 {"define", NODE_DEFINE},
933 {"dont_trace", NODE_DONT_TRACE},
939 {"manual_endian", NODE_MANUAL_ENDIAN},
940 {"manual_print", NODE_MANUAL_PRINT},
941 {"noversion", NODE_NOVERSION},
942 {"packed", NODE_PACKED},
943 {"typeonly", NODE_TYPEONLY},
948 {"union", NODE_UNION},
949 {"uword", NODE_UWORD},
952 static int name_check (const char *s, YYSTYPE *token_value)
954 enum node_subclass subclass_id;
958 for (top = 0, bot = (sizeof(keytab) / sizeof(struct keytab))-1;
960 mid = (top + bot) / 2;
961 result = name_compare (s, keytab[mid].name);
967 subclass_id = keytab[mid].subclass_id;
969 switch (subclass_id) {
980 *token_value = make_node(subclass_id);
984 *token_value = make_node(subclass_id);
989 *token_value = make_node(subclass_id);
992 case NODE_MANUAL_PRINT:
993 *token_value = (YYSTYPE) NODE_FLAG_MANUAL_PRINT;
994 return (MANUAL_PRINT);
996 case NODE_MANUAL_ENDIAN:
997 *token_value = (YYSTYPE) NODE_FLAG_MANUAL_ENDIAN;
998 return (MANUAL_ENDIAN);
1001 *token_value = (YYSTYPE) NODE_FLAG_TYPEONLY;
1004 case NODE_DONT_TRACE:
1005 *token_value = (YYSTYPE) NODE_FLAG_DONT_TRACE;
1008 case NODE_NOVERSION:
1015 fprintf (stderr, "fatal: keytab botch!\n");
1020 *token_value = (YYSTYPE) sxerox (s);
1028 char *sxerox (const char *s)
1030 int len = strlen (s);
1033 rv = (char *) malloc (len+1);
1035 fprintf(stderr, "Out of memory...");
1047 int name_compare (const char *s1, const char *s2)
1051 while (*s1 && *s2) {