2 *------------------------------------------------------------------
3 * lex.c - API generator lexical analyzer
5 * Copyright (c) 1996-2009 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
31 FILE *ifp, *ofp, *pythonfp;
32 char *vlib_app_name = "vpp";
36 char *current_filename;
37 int current_filename_allocated;
38 unsigned long input_crc;
42 * lexer variable definitions
45 static const char *version = "0.1";
46 static int the_lexer_linenumber = 1;
47 static enum lex_state the_lexer_state = START_STATE;
52 static void usage (char *);
53 static int name_check (const char *, YYSTYPE *);
54 static int name_compare (const char *, const char *);
56 extern YYSTYPE yylval;
58 unsigned int crc32c_table[256] = {
59 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
60 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
61 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
62 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
63 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
64 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
65 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
66 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
67 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
68 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
69 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
70 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
71 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
72 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
73 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
74 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
75 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
76 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
77 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
78 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
79 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
80 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
81 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
82 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
83 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
84 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
85 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
86 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
87 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
88 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
89 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
90 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
91 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
92 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
93 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
94 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
95 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
96 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
97 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
98 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
99 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
100 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
101 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
102 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
103 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
104 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
105 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
106 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
107 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
108 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
109 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
110 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
111 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
112 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
113 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
114 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
115 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
116 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
117 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
118 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
119 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
120 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
121 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
122 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
125 static inline unsigned long CRC8 (unsigned long crc,
128 return ((crc >> 8) ^ crc32c_table[(crc ^ d) & 0xFF]);
130 static inline unsigned long CRC16 (unsigned long crc,
133 crc = CRC8 (crc, d & 0xff);
135 crc = CRC8 (crc, d & 0xff);
141 crc_eliding_c_comments (const char *buf, unsigned long crc)
146 cSBACKSLASH, /* "...\ */
148 cCBACKSLASH, /* '...\ */
150 cSLASH_SLASH, /* //... */
151 cSLASH_STAR, /* / *... */
156 unsigned char c = *p++;
163 case cSTRING: case cSBACKSLASH:
164 case cCHAR: case cCBACKSLASH:
165 case cSLASH: case cSLASH_SLASH: case cSLASH_STAR: case cSTAR:
166 fprintf (stderr, "Inopportune EOF: %s\n", buf);
172 case cOTHER: ss = cSTRING; break; /* start string */
173 case cSTRING: ss = cOTHER; break; /* end string */
174 case cSBACKSLASH: ss = cSTRING; break;
176 case cCBACKSLASH: ss = cCHAR; break;
177 case cSLASH: crc = CRC8 (crc, '/'); ss = cOTHER; break;
178 case cSLASH_SLASH: continue; /* in comment */
179 case cSLASH_STAR: continue; /* in comment */
180 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
186 case cSTRING: ss = cSBACKSLASH; break;
187 case cSBACKSLASH: ss = cSTRING; break;
188 case cCHAR: ss = cCBACKSLASH; break;
189 case cCBACKSLASH: ss = cCHAR; break;
190 case cSLASH: crc = CRC8 (crc, '/'); ; ss = cOTHER; break;
191 case cSLASH_SLASH: continue; /* in comment */
192 case cSLASH_STAR: continue; /* in comment */
193 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
198 case cOTHER: ss = cSLASH; continue; /* potential comment */
200 case cSBACKSLASH: ss = cSTRING; break;
202 case cCBACKSLASH: ss = cCHAR; break;
203 case cSLASH: ss = cSLASH_SLASH; continue; /* start comment */
204 case cSLASH_SLASH: continue; /* in comment */
205 case cSLASH_STAR: continue; /* in comment */
206 case cSTAR: ss = cOTHER; continue; /* end of comment */
213 case cSBACKSLASH: ss = cSTRING; break;
215 case cCBACKSLASH: ss = cCHAR; break;
216 case cSLASH: ss = cSLASH_STAR; continue; /* start comment */
217 case cSLASH_SLASH: continue; /* in comment */
218 case cSLASH_STAR: ss = cSTAR; continue; /* potential end */
219 case cSTAR: continue; /* still potential end of comment */
222 case '\n': case '\r': case ' ': case '\t': case '\014':
224 case cOTHER: continue; /* ignore all whitespace */
226 case cSBACKSLASH: ss = cSTRING; break;
228 case cCBACKSLASH: ss = cCHAR; break;
229 case cSLASH: c = '/'; ss = cOTHER; break;
231 if (c == '\n' || c == '\r') ss = cOTHER; /* end comment */
233 case cSLASH_STAR: continue; /* in comment */
234 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
240 case cSBACKSLASH: ss = cSTRING; break;
242 case cCBACKSLASH: ss = cCHAR; break;
243 case cSLASH: crc = CRC8 (crc, '/'); ss = cOTHER; break;
244 case cSLASH_SLASH: continue; /* in comment */
245 case cSLASH_STAR: continue; /* in comment */
246 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
256 int main (int argc, char **argv)
263 while (curarg < argc) {
264 if (!strncmp (argv [curarg], "--verbose", 3)) {
265 fprintf (stderr, "%s version %s\n", argv [0], version);
270 if (!strncmp (argv [curarg], "--yydebug", 3)) {
276 if (!strncmp (argv [curarg], "--dump", 3)) {
282 if (!strncmp (argv[curarg], "--show-name", 3)) {
285 show_name = argv[curarg];
289 fprintf(stderr, "Missing filename after --show-name \n");
294 if (!strncmp (argv [curarg], "--input", 3)) {
297 input_filename = argv[curarg];
298 if (!strcmp (argv [curarg], "-"))
301 ifp = fopen (argv [curarg], "r");
303 fprintf (stderr, "Couldn't open input file %s\n",
309 fprintf(stderr, "Missing filename after --input\n");
314 if (!strncmp (argv [curarg], "--output", 3)) {
317 ofp = fopen (argv[curarg], "w");
319 fprintf (stderr, "Couldn't open output file %s\n",
323 ofile = argv[curarg];
326 fprintf(stderr, "Missing filename after --output\n");
331 if (!strncmp (argv [curarg], "--python", 8)) {
334 if (!strcmp(argv[curarg], "-")) {
337 pythonfp = fopen(argv[curarg], "w");
338 pythonfile = argv[curarg];
340 if (pythonfp == NULL) {
341 fprintf (stderr, "Couldn't open python output file %s\n",
347 fprintf(stderr, "Missing filename after --python\n");
352 if (!strncmp (argv [curarg], "--app", 4)) {
355 vlib_app_name = argv[curarg];
358 fprintf(stderr, "Missing app name after --app\n");
370 if (pythonfp == NULL) {
374 fprintf(stderr, "No input file specified...\n");
378 input_filename = show_name;
381 starttime = time (0);
383 if (yyparse() == 0) {
387 printf ("Output written to %s\n", ofile);
391 printf ("Python bindings written to %s\n", pythonfile);
400 printf ("Removing %s\n", ofile);
404 printf ("Removing %s\n", pythonfile);
415 static void usage (char *progname)
418 "usage: %s --input <filename> [--output <filename>] [--python <filename>]\n%s",
420 " [--yydebug] [--dump-tree]\n");
427 void yyerror (char *s)
429 fprintf (stderr, "%s:%d %s\n", current_filename, the_lexer_linenumber, s);
432 static char namebuf [MAXNAME];
435 getc_char (FILE *ifp)
437 return ((char)(getc(ifp) & 0x7f));
441 * yylex (well, yylex_1: The real yylex below does crc-hackery)
443 static int yylex_1 (void)
447 enum { LP_INITIAL_WHITESPACE, LP_LINE_NUMBER,
448 LP_PRE_FILENAME_WHITESPACE, LP_FILENAME,
451 } lp_substate = LP_INITIAL_WHITESPACE;
454 switch (the_lexer_state) {
456 * START state -- looking for something interesting
465 the_lexer_linenumber++;
469 the_lexer_state = LINE_PRAGMA_STATE;
470 lp_substate = LP_INITIAL_WHITESPACE;
504 the_lexer_state = STRING_STATE;
509 the_lexer_state = HELPER_STATE;
518 the_lexer_state = CPP_COMMENT_STATE;
520 } else if (c == '*') {
521 the_lexer_state = C_COMMENT_STATE;
524 fprintf (stderr, "unknown token /%c at line %d\n",
525 c, the_lexer_linenumber);
534 /* Note fallthrough... */
537 if (isalpha (c) || c == '_') {
540 the_lexer_state = NAME_STATE;
542 } else if (isdigit(c)) {
545 the_lexer_state = NUMBER_STATE;
549 fprintf (stderr, "unknown token %c at line %d\n",
550 c, the_lexer_linenumber);
555 * NAME state -- eat the rest of a name
562 if (!isalnum (c) && c != '_') {
564 namebuf [nameidx] = 0;
565 the_lexer_state = START_STATE;
566 return (name_check (namebuf, &yylval));
568 if (nameidx >= (MAXNAME-1)) {
569 fprintf(stderr, "lex input buffer overflow...\n");
572 namebuf [nameidx++] = c;
576 * NUMBER state -- eat the rest of a number
585 namebuf [nameidx] = 0;
586 the_lexer_state = START_STATE;
587 yylval = (void *) atol(namebuf);
590 if (nameidx >= (MAXNAME-1)) {
591 fprintf(stderr, "lex input buffer overflow...\n");
594 namebuf [nameidx++] = c;
598 * C_COMMENT state -- eat a peach
600 case C_COMMENT_STATE:
609 the_lexer_state = START_STATE;
614 the_lexer_linenumber++;
618 * CPP_COMMENT state -- eat a plum
621 case CPP_COMMENT_STATE:
626 the_lexer_linenumber++;
627 the_lexer_state = START_STATE;
641 namebuf[nameidx++] = c;
645 namebuf[nameidx] = 0;
646 yylval = (YYSTYPE) sxerox (namebuf);
647 the_lexer_state = START_STATE;
652 the_lexer_linenumber++;
654 if (nameidx >= (MAXNAME-1)) {
655 fprintf(stderr, "lex input buffer overflow...\n");
658 namebuf[nameidx++] = c;
672 namebuf[nameidx] = c;
676 namebuf[nameidx] = 0;
677 yylval = (YYSTYPE) sxerox (namebuf);
678 the_lexer_state = START_STATE;
679 return (HELPER_STRING);
683 the_lexer_linenumber++;
686 * CPP makes it approximately impossible to
687 * type "#define FOO 123", so we provide a
688 * lexical trick to achieve that result
694 if (nameidx >= (MAXNAME-1)) {
695 fprintf(stderr, "lex input buffer overflow...\n");
698 namebuf[nameidx++] = c;
703 case LINE_PRAGMA_STATE:
704 /* We're only interested in lines of the form # 259 "foo.c" 17 */
706 switch (lp_substate) {
708 case LP_INITIAL_WHITESPACE: /* no number seen yet */
712 if (c >= '0' && c <= '9') {
713 namebuf[nameidx++] = c;
714 lp_substate = LP_LINE_NUMBER;
715 } else if (c == '\n') {
717 } else if (c != ' ' && c != '\t') {
720 lp_substate = LP_OTHER;
724 case LP_LINE_NUMBER: /* eating linenumber */
728 if (c >= '0' && c <= '9') {
729 namebuf[nameidx++] = c;
730 } else if (c == ' ' || c == '\t') {
731 namebuf[nameidx++] = 0;
732 the_lexer_linenumber = atol(namebuf);
733 lp_substate = LP_PRE_FILENAME_WHITESPACE;
734 } else if (c == '\n') {
737 lp_substate = LP_OTHER;
741 case LP_PRE_FILENAME_WHITESPACE: /* awaiting filename */
747 lp_substate = LP_FILENAME;
749 } else if (c == ' ' || c == '\t') {
751 } else if (c == '\n') {
754 lp_substate = LP_OTHER;
758 case LP_FILENAME: /* eating filename */
764 lp_substate = LP_POST_FILENAME;
765 namebuf[nameidx] = 0;
766 } else if (c == '\n') {
767 goto lp_end_of_line; /* syntax error... */
769 namebuf[nameidx++] = c;
773 case LP_POST_FILENAME: /* ignoring rest of line */
780 if (lp_substate == LP_POST_FILENAME) {
781 if (current_filename_allocated) {
782 current_filename_allocated = 0;
783 free(current_filename);
786 if (!strcmp(namebuf, "<stdin>")) {
787 current_filename = input_filename;
789 current_filename = sxerox(namebuf);
790 current_filename_allocated = 1;
794 the_lexer_state = START_STATE;
801 fprintf (stderr, "LEXER BUG!\n");
808 * Parse a token and side-effect input_crc
809 * in a whitespace- and comment-insensitive fashion.
814 * Accumulate a crc32-based signature while processing the
815 * input file. The goal is to come up with a magic number
816 * which changes precisely when the original input file changes
817 * but which ignores whitespace changes.
819 unsigned long crc = input_crc;
820 int node_type = yylex_1 ();
827 case HELPER_STRING: {
828 /* We know these types accumulated token text into namebuf */
829 /* HELPER_STRING may still contain C comments. Argh. */
830 crc = crc_eliding_c_comments (namebuf, crc);
834 /* Other node types have no "substate" */
835 /* This code is written in this curious fashion because we
836 * want the generated CRC to be independent of the particular
837 * values a particular version of lex/bison assigned to various states.
840 /* case NAME: crc = CRC16 (crc, 257); break; */
841 case RPAR: crc = CRC16 (crc, 258); break;
842 case LPAR: crc = CRC16 (crc, 259); break;
843 case SEMI: crc = CRC16 (crc, 260); break;
844 case LBRACK: crc = CRC16 (crc, 261); break;
845 case RBRACK: crc = CRC16 (crc, 262); break;
846 /* case NUMBER: crc = CRC16 (crc, 263); break; */
847 /* case PRIMTYPE: crc = CRC16 (crc, 264); break; */
848 case BARF: crc = CRC16 (crc, 265); break;
849 case TPACKED: crc = CRC16 (crc, 266); break;
850 case DEFINE: crc = CRC16 (crc, 267); break;
851 case LCURLY: crc = CRC16 (crc, 268); break;
852 case RCURLY: crc = CRC16 (crc, 269); break;
853 /* case STRING: crc = CRC16 (crc, 270); break; */
854 case UNION: crc = CRC16 (crc, 271); break;
855 /* case HELPER_STRING: crc = CRC16 (crc, 272); break; */
856 case COMMA: crc = CRC16 (crc, 273); break;
857 case NOVERSION: crc = CRC16 (crc, 274); break;
858 case MANUAL_PRINT: crc = CRC16 (crc, 275); break;
859 case MANUAL_ENDIAN: crc = CRC16 (crc, 276); break;
860 case TYPEONLY: crc = CRC16 (crc, 278); break;
861 case DONT_TRACE: crc = CRC16 (crc, 279); break;
863 case EOF: crc = CRC16 (crc, ~0); break; /* hysterical compatibility */
866 fprintf(stderr, "yylex: node_type %d missing state CRC cookie\n",
877 * name_check -- see if the name we just ate
878 * matches a known keyword. If so, set yylval
879 * to a new instance of <subclass of node>, and return PARSER_MACRO
881 * Otherwise, set yylval to sxerox (s) and return NAME
884 static struct keytab {
886 enum node_subclass subclass_id;
888 /* Keep the table sorted, binary search used below! */
890 {"define", NODE_DEFINE},
891 {"dont_trace", NODE_DONT_TRACE},
897 {"manual_endian", NODE_MANUAL_ENDIAN},
898 {"manual_print", NODE_MANUAL_PRINT},
899 {"noversion", NODE_NOVERSION},
900 {"packed", NODE_PACKED},
901 {"typeonly", NODE_TYPEONLY},
906 {"union", NODE_UNION},
907 {"uword", NODE_UWORD},
910 static int name_check (const char *s, YYSTYPE *token_value)
912 enum node_subclass subclass_id;
916 for (top = 0, bot = (sizeof(keytab) / sizeof(struct keytab))-1;
918 mid = (top + bot) / 2;
919 result = name_compare (s, keytab[mid].name);
925 subclass_id = keytab[mid].subclass_id;
927 switch (subclass_id) {
938 *token_value = make_node(subclass_id);
942 *token_value = make_node(subclass_id);
946 *token_value = make_node(subclass_id);
949 case NODE_MANUAL_PRINT:
950 *token_value = (YYSTYPE) NODE_FLAG_MANUAL_PRINT;
951 return (MANUAL_PRINT);
953 case NODE_MANUAL_ENDIAN:
954 *token_value = (YYSTYPE) NODE_FLAG_MANUAL_ENDIAN;
955 return (MANUAL_ENDIAN);
958 *token_value = (YYSTYPE) NODE_FLAG_TYPEONLY;
961 case NODE_DONT_TRACE:
962 *token_value = (YYSTYPE) NODE_FLAG_DONT_TRACE;
972 fprintf (stderr, "fatal: keytab botch!\n");
977 *token_value = (YYSTYPE) sxerox (s);
985 char *sxerox (const char *s)
987 int len = strlen (s);
990 rv = (char *) malloc (len+1);
992 fprintf(stderr, "Out of memory...");
1004 int name_compare (const char *s1, const char *s2)
1008 while (*s1 && *s2) {