2 *------------------------------------------------------------------
3 * lex.c - API generator lexical analyzer
5 * Copyright (c) 1996-2009 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
31 FILE *ifp, *ofp, *pythonfp;
32 char *vlib_app_name = "vpp";
36 char *current_filename;
37 int current_filename_allocated;
38 unsigned long input_crc;
42 * lexer variable definitions
45 static const char *version = "0.1";
46 static int the_lexer_linenumber = 1;
47 static enum lex_state the_lexer_state = START_STATE;
52 static void usage (char *);
53 static int name_check (const char *, YYSTYPE *);
54 static int name_compare (const char *, const char *);
56 extern YYSTYPE yylval;
58 unsigned int crc32c_table[256] = {
59 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
60 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
61 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
62 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
63 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
64 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
65 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
66 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
67 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
68 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
69 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
70 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
71 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
72 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
73 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
74 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
75 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
76 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
77 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
78 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
79 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
80 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
81 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
82 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
83 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
84 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
85 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
86 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
87 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
88 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
89 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
90 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
91 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
92 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
93 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
94 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
95 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
96 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
97 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
98 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
99 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
100 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
101 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
102 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
103 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
104 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
105 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
106 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
107 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
108 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
109 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
110 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
111 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
112 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
113 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
114 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
115 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
116 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
117 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
118 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
119 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
120 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
121 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
122 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
125 static inline unsigned long CRC8 (unsigned long crc,
128 return ((crc >> 8) ^ crc32c_table[(crc ^ d) & 0xFF]);
130 static inline unsigned long CRC16 (unsigned long crc,
133 crc = CRC8 (crc, d & 0xff);
135 crc = CRC8 (crc, d & 0xff);
141 crc_eliding_c_comments (const char *buf, unsigned long crc)
146 cSBACKSLASH, /* "...\ */
148 cCBACKSLASH, /* '...\ */
150 cSLASH_SLASH, /* //... */
151 cSLASH_STAR, /* / *... */
156 unsigned char c = *p++;
163 case cSTRING: case cSBACKSLASH:
164 case cCHAR: case cCBACKSLASH:
165 case cSLASH: case cSLASH_SLASH: case cSLASH_STAR: case cSTAR:
166 fprintf (stderr, "Inopportune EOF: %s\n", buf);
172 case cOTHER: ss = cSTRING; break; /* start string */
173 case cSTRING: ss = cOTHER; break; /* end string */
174 case cSBACKSLASH: ss = cSTRING; break;
176 case cCBACKSLASH: ss = cCHAR; break;
177 case cSLASH: crc = CRC8 (crc, '/'); ss = cOTHER; break;
178 case cSLASH_SLASH: continue; /* in comment */
179 case cSLASH_STAR: continue; /* in comment */
180 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
186 case cSTRING: ss = cSBACKSLASH; break;
187 case cSBACKSLASH: ss = cSTRING; break;
188 case cCHAR: ss = cCBACKSLASH; break;
189 case cCBACKSLASH: ss = cCHAR; break;
190 case cSLASH: crc = CRC8 (crc, '/'); ; ss = cOTHER; break;
191 case cSLASH_SLASH: continue; /* in comment */
192 case cSLASH_STAR: continue; /* in comment */
193 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
198 case cOTHER: ss = cSLASH; continue; /* potential comment */
200 case cSBACKSLASH: ss = cSTRING; break;
202 case cCBACKSLASH: ss = cCHAR; break;
203 case cSLASH: ss = cSLASH_SLASH; continue; /* start comment */
204 case cSLASH_SLASH: continue; /* in comment */
205 case cSLASH_STAR: continue; /* in comment */
206 case cSTAR: ss = cOTHER; continue; /* end of comment */
213 case cSBACKSLASH: ss = cSTRING; break;
215 case cCBACKSLASH: ss = cCHAR; break;
216 case cSLASH: ss = cSLASH_STAR; continue; /* start comment */
217 case cSLASH_SLASH: continue; /* in comment */
218 case cSLASH_STAR: ss = cSTAR; continue; /* potential end */
219 case cSTAR: continue; /* still potential end of comment */
222 case '\n': case '\r': case ' ': case '\t': case '\014':
224 case cOTHER: continue; /* ignore all whitespace */
226 case cSBACKSLASH: ss = cSTRING; break;
228 case cCBACKSLASH: ss = cCHAR; break;
229 case cSLASH: c = '/'; ss = cOTHER; break;
231 if (c == '\n' || c == '\r') ss = cOTHER; /* end comment */
233 case cSLASH_STAR: continue; /* in comment */
234 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
240 case cSBACKSLASH: ss = cSTRING; break;
242 case cCBACKSLASH: ss = cCHAR; break;
243 case cSLASH: crc = CRC8 (crc, '/'); ss = cOTHER; break;
244 case cSLASH_SLASH: continue; /* in comment */
245 case cSLASH_STAR: continue; /* in comment */
246 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
256 int main (int argc, char **argv)
263 while (curarg < argc) {
264 if (!strncmp (argv [curarg], "--verbose", 3)) {
265 fprintf (stderr, "%s version %s\n", argv [0], version);
270 if (!strncmp (argv [curarg], "--yydebug", 3)) {
276 if (!strncmp (argv [curarg], "--dump", 3)) {
282 if (!strncmp (argv[curarg], "--show-name", 3)) {
285 show_name = argv[curarg];
289 fprintf(stderr, "Missing filename after --show-name \n");
294 if (!strncmp (argv [curarg], "--input", 3)) {
297 input_filename = argv[curarg];
298 if (!strcmp (argv [curarg], "-"))
301 ifp = fopen (argv [curarg], "r");
303 fprintf (stderr, "Couldn't open input file %s\n",
309 fprintf(stderr, "Missing filename after --input\n");
314 if (!strncmp (argv [curarg], "--output", 3)) {
317 ofp = fopen (argv[curarg], "w");
319 fprintf (stderr, "Couldn't open output file %s\n",
323 ofile = argv[curarg];
326 fprintf(stderr, "Missing filename after --output\n");
331 if (!strncmp (argv [curarg], "--python", 8)) {
334 pythonfp = fopen (argv[curarg], "w");
335 if (pythonfp == NULL) {
336 fprintf (stderr, "Couldn't open python output file %s\n",
340 pythonfile = argv[curarg];
343 fprintf(stderr, "Missing filename after --python\n");
348 if (!strncmp (argv [curarg], "--app", 4)) {
351 vlib_app_name = argv[curarg];
354 fprintf(stderr, "Missing app name after --app\n");
366 if (pythonfp == NULL) {
370 fprintf(stderr, "No input file specified...\n");
374 input_filename = show_name;
377 starttime = time (0);
379 if (yyparse() == 0) {
383 printf ("Output written to %s\n", ofile);
387 printf ("Python bindings written to %s\n", pythonfile);
396 printf ("Removing %s\n", ofile);
400 printf ("Removing %s\n", pythonfile);
411 static void usage (char *progname)
414 "usage: %s --input <filename> [--output <filename>] [--python <filename>]\n%s",
416 " [--yydebug] [--dump-tree]\n");
423 void yyerror (char *s)
425 fprintf (stderr, "%s:%d %s\n", current_filename, the_lexer_linenumber, s);
428 static char namebuf [MAXNAME];
431 getc_char (FILE *ifp)
433 return ((char)(getc(ifp) & 0x7f));
437 * yylex (well, yylex_1: The real yylex below does crc-hackery)
439 static int yylex_1 (void)
443 enum { LP_INITIAL_WHITESPACE, LP_LINE_NUMBER,
444 LP_PRE_FILENAME_WHITESPACE, LP_FILENAME,
447 } lp_substate = LP_INITIAL_WHITESPACE;
450 switch (the_lexer_state) {
452 * START state -- looking for something interesting
461 the_lexer_linenumber++;
465 the_lexer_state = LINE_PRAGMA_STATE;
466 lp_substate = LP_INITIAL_WHITESPACE;
500 the_lexer_state = STRING_STATE;
505 the_lexer_state = HELPER_STATE;
514 the_lexer_state = CPP_COMMENT_STATE;
516 } else if (c == '*') {
517 the_lexer_state = C_COMMENT_STATE;
520 fprintf (stderr, "unknown token /%c at line %d\n",
521 c, the_lexer_linenumber);
530 /* Note fallthrough... */
533 if (isalpha (c) || c == '_') {
536 the_lexer_state = NAME_STATE;
538 } else if (isdigit(c)) {
541 the_lexer_state = NUMBER_STATE;
545 fprintf (stderr, "unknown token %c at line %d\n",
546 c, the_lexer_linenumber);
551 * NAME state -- eat the rest of a name
558 if (!isalnum (c) && c != '_') {
560 namebuf [nameidx] = 0;
561 the_lexer_state = START_STATE;
562 return (name_check (namebuf, &yylval));
564 if (nameidx >= (MAXNAME-1)) {
565 fprintf(stderr, "lex input buffer overflow...\n");
568 namebuf [nameidx++] = c;
572 * NUMBER state -- eat the rest of a number
581 namebuf [nameidx] = 0;
582 the_lexer_state = START_STATE;
583 yylval = (void *) atol(namebuf);
586 if (nameidx >= (MAXNAME-1)) {
587 fprintf(stderr, "lex input buffer overflow...\n");
590 namebuf [nameidx++] = c;
594 * C_COMMENT state -- eat a peach
596 case C_COMMENT_STATE:
605 the_lexer_state = START_STATE;
610 the_lexer_linenumber++;
614 * CPP_COMMENT state -- eat a plum
617 case CPP_COMMENT_STATE:
622 the_lexer_linenumber++;
623 the_lexer_state = START_STATE;
637 namebuf[nameidx++] = c;
641 namebuf[nameidx] = 0;
642 yylval = (YYSTYPE) sxerox (namebuf);
643 the_lexer_state = START_STATE;
648 the_lexer_linenumber++;
650 if (nameidx >= (MAXNAME-1)) {
651 fprintf(stderr, "lex input buffer overflow...\n");
654 namebuf[nameidx++] = c;
668 namebuf[nameidx] = c;
672 namebuf[nameidx] = 0;
673 yylval = (YYSTYPE) sxerox (namebuf);
674 the_lexer_state = START_STATE;
675 return (HELPER_STRING);
679 the_lexer_linenumber++;
682 * CPP makes it approximately impossible to
683 * type "#define FOO 123", so we provide a
684 * lexical trick to achieve that result
690 if (nameidx >= (MAXNAME-1)) {
691 fprintf(stderr, "lex input buffer overflow...\n");
694 namebuf[nameidx++] = c;
699 case LINE_PRAGMA_STATE:
700 /* We're only interested in lines of the form # 259 "foo.c" 17 */
702 switch (lp_substate) {
704 case LP_INITIAL_WHITESPACE: /* no number seen yet */
708 if (c >= '0' && c <= '9') {
709 namebuf[nameidx++] = c;
710 lp_substate = LP_LINE_NUMBER;
711 } else if (c == '\n') {
713 } else if (c != ' ' && c != '\t') {
716 lp_substate = LP_OTHER;
720 case LP_LINE_NUMBER: /* eating linenumber */
724 if (c >= '0' && c <= '9') {
725 namebuf[nameidx++] = c;
726 } else if (c == ' ' || c == '\t') {
727 namebuf[nameidx++] = 0;
728 the_lexer_linenumber = atol(namebuf);
729 lp_substate = LP_PRE_FILENAME_WHITESPACE;
730 } else if (c == '\n') {
733 lp_substate = LP_OTHER;
737 case LP_PRE_FILENAME_WHITESPACE: /* awaiting filename */
743 lp_substate = LP_FILENAME;
745 } else if (c == ' ' || c == '\t') {
747 } else if (c == '\n') {
750 lp_substate = LP_OTHER;
754 case LP_FILENAME: /* eating filename */
760 lp_substate = LP_POST_FILENAME;
761 namebuf[nameidx] = 0;
762 } else if (c == '\n') {
763 goto lp_end_of_line; /* syntax error... */
765 namebuf[nameidx++] = c;
769 case LP_POST_FILENAME: /* ignoring rest of line */
776 if (lp_substate == LP_POST_FILENAME) {
777 if (current_filename_allocated) {
778 current_filename_allocated = 0;
779 free(current_filename);
782 if (!strcmp(namebuf, "<stdin>")) {
783 current_filename = input_filename;
785 current_filename = sxerox(namebuf);
786 current_filename_allocated = 1;
790 the_lexer_state = START_STATE;
797 fprintf (stderr, "LEXER BUG!\n");
804 * Parse a token and side-effect input_crc
805 * in a whitespace- and comment-insensitive fashion.
810 * Accumulate a crc32-based signature while processing the
811 * input file. The goal is to come up with a magic number
812 * which changes precisely when the original input file changes
813 * but which ignores whitespace changes.
815 unsigned long crc = input_crc;
816 int node_type = yylex_1 ();
823 case HELPER_STRING: {
824 /* We know these types accumulated token text into namebuf */
825 /* HELPER_STRING may still contain C comments. Argh. */
826 crc = crc_eliding_c_comments (namebuf, crc);
830 /* Other node types have no "substate" */
831 /* This code is written in this curious fashion because we
832 * want the generated CRC to be independent of the particular
833 * values a particular version of lex/bison assigned to various states.
836 /* case NAME: crc = CRC16 (crc, 257); break; */
837 case RPAR: crc = CRC16 (crc, 258); break;
838 case LPAR: crc = CRC16 (crc, 259); break;
839 case SEMI: crc = CRC16 (crc, 260); break;
840 case LBRACK: crc = CRC16 (crc, 261); break;
841 case RBRACK: crc = CRC16 (crc, 262); break;
842 /* case NUMBER: crc = CRC16 (crc, 263); break; */
843 /* case PRIMTYPE: crc = CRC16 (crc, 264); break; */
844 case BARF: crc = CRC16 (crc, 265); break;
845 case TPACKED: crc = CRC16 (crc, 266); break;
846 case DEFINE: crc = CRC16 (crc, 267); break;
847 case LCURLY: crc = CRC16 (crc, 268); break;
848 case RCURLY: crc = CRC16 (crc, 269); break;
849 /* case STRING: crc = CRC16 (crc, 270); break; */
850 case UNION: crc = CRC16 (crc, 271); break;
851 /* case HELPER_STRING: crc = CRC16 (crc, 272); break; */
852 case COMMA: crc = CRC16 (crc, 273); break;
853 case NOVERSION: crc = CRC16 (crc, 274); break;
854 case MANUAL_PRINT: crc = CRC16 (crc, 275); break;
855 case MANUAL_ENDIAN: crc = CRC16 (crc, 276); break;
856 case TYPEONLY: crc = CRC16 (crc, 278); break;
857 case DONT_TRACE: crc = CRC16 (crc, 279); break;
859 case EOF: crc = CRC16 (crc, ~0); break; /* hysterical compatibility */
862 fprintf(stderr, "yylex: node_type %d missing state CRC cookie\n",
873 * name_check -- see if the name we just ate
874 * matches a known keyword. If so, set yylval
875 * to a new instance of <subclass of node>, and return PARSER_MACRO
877 * Otherwise, set yylval to sxerox (s) and return NAME
880 static struct keytab {
882 enum node_subclass subclass_id;
884 /* Keep the table sorted, binary search used below! */
886 {"define", NODE_DEFINE},
887 {"dont_trace", NODE_DONT_TRACE},
893 {"manual_endian", NODE_MANUAL_ENDIAN},
894 {"manual_print", NODE_MANUAL_PRINT},
895 {"noversion", NODE_NOVERSION},
896 {"packed", NODE_PACKED},
897 {"typeonly", NODE_TYPEONLY},
902 {"union", NODE_UNION},
903 {"uword", NODE_UWORD},
906 static int name_check (const char *s, YYSTYPE *token_value)
908 enum node_subclass subclass_id;
912 for (top = 0, bot = (sizeof(keytab) / sizeof(struct keytab))-1;
914 mid = (top + bot) / 2;
915 result = name_compare (s, keytab[mid].name);
921 subclass_id = keytab[mid].subclass_id;
923 switch (subclass_id) {
934 *token_value = make_node(subclass_id);
938 *token_value = make_node(subclass_id);
942 *token_value = make_node(subclass_id);
945 case NODE_MANUAL_PRINT:
946 *token_value = (YYSTYPE) NODE_FLAG_MANUAL_PRINT;
947 return (MANUAL_PRINT);
949 case NODE_MANUAL_ENDIAN:
950 *token_value = (YYSTYPE) NODE_FLAG_MANUAL_ENDIAN;
951 return (MANUAL_ENDIAN);
954 *token_value = (YYSTYPE) NODE_FLAG_TYPEONLY;
957 case NODE_DONT_TRACE:
958 *token_value = (YYSTYPE) NODE_FLAG_DONT_TRACE;
968 fprintf (stderr, "fatal: keytab botch!\n");
973 *token_value = (YYSTYPE) sxerox (s);
981 char *sxerox (const char *s)
983 int len = strlen (s);
986 rv = (char *) malloc (len+1);
988 fprintf(stderr, "Out of memory...");
1000 int name_compare (const char *s1, const char *s2)
1004 while (*s1 && *s2) {