2 *------------------------------------------------------------------
3 * lex.c - API generator lexical analyzer
5 * Copyright (c) 1996-2009 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
31 FILE *ifp, *ofp, *pythonfp;
32 char *vlib_app_name = "vpp";
36 char *current_filename;
37 int current_filename_allocated;
38 unsigned long input_crc;
42 * lexer variable definitions
45 static const char *version = "0.1";
46 static int the_lexer_linenumber = 1;
47 static enum lex_state the_lexer_state = START_STATE;
52 static void usage (char *);
53 static int name_check (const char *, YYSTYPE *);
54 static int name_compare (const char *, const char *);
56 extern YYSTYPE yylval;
58 unsigned int crc32c_table[256] = {
59 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
60 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
61 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
62 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
63 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
64 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
65 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
66 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
67 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
68 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
69 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
70 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
71 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
72 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
73 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
74 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
75 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
76 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
77 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
78 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
79 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
80 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
81 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
82 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
83 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
84 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
85 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
86 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
87 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
88 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
89 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
90 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
91 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
92 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
93 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
94 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
95 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
96 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
97 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
98 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
99 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
100 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
101 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
102 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
103 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
104 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
105 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
106 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
107 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
108 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
109 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
110 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
111 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
112 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
113 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
114 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
115 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
116 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
117 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
118 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
119 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
120 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
121 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
122 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
125 static inline unsigned long CRC8 (unsigned long crc,
128 return ((crc >> 8) ^ crc32c_table[(crc ^ d) & 0xFF]);
130 static inline unsigned long CRC16 (unsigned long crc,
133 crc = CRC8 (crc, d & 0xff);
135 crc = CRC8 (crc, d & 0xff);
141 crc_eliding_c_comments (const char *buf, unsigned long crc)
146 cSBACKSLASH, /* "...\ */
148 cCBACKSLASH, /* '...\ */
150 cSLASH_SLASH, /* //... */
151 cSLASH_STAR, /* / *... */
156 unsigned char c = *p++;
163 case cSTRING: case cSBACKSLASH:
164 case cCHAR: case cCBACKSLASH:
165 case cSLASH: case cSLASH_SLASH: case cSLASH_STAR: case cSTAR:
166 fprintf (stderr, "Inopportune EOF: %s\n", buf);
172 case cOTHER: ss = cSTRING; break; /* start string */
173 case cSTRING: ss = cOTHER; break; /* end string */
174 case cSBACKSLASH: ss = cSTRING; break;
176 case cCBACKSLASH: ss = cCHAR; break;
177 case cSLASH: crc = CRC8 (crc, '/'); ss = cOTHER; break;
178 case cSLASH_SLASH: continue; /* in comment */
179 case cSLASH_STAR: continue; /* in comment */
180 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
186 case cSTRING: ss = cSBACKSLASH; break;
187 case cSBACKSLASH: ss = cSTRING; break;
188 case cCHAR: ss = cCBACKSLASH; break;
189 case cCBACKSLASH: ss = cCHAR; break;
190 case cSLASH: crc = CRC8 (crc, '/'); ; ss = cOTHER; break;
191 case cSLASH_SLASH: continue; /* in comment */
192 case cSLASH_STAR: continue; /* in comment */
193 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
198 case cOTHER: ss = cSLASH; continue; /* potential comment */
200 case cSBACKSLASH: ss = cSTRING; break;
202 case cCBACKSLASH: ss = cCHAR; break;
203 case cSLASH: ss = cSLASH_SLASH; continue; /* start comment */
204 case cSLASH_SLASH: continue; /* in comment */
205 case cSLASH_STAR: continue; /* in comment */
206 case cSTAR: ss = cOTHER; continue; /* end of comment */
213 case cSBACKSLASH: ss = cSTRING; break;
215 case cCBACKSLASH: ss = cCHAR; break;
216 case cSLASH: ss = cSLASH_STAR; continue; /* start comment */
217 case cSLASH_SLASH: continue; /* in comment */
218 case cSLASH_STAR: ss = cSTAR; continue; /* potential end */
219 case cSTAR: continue; /* still potential end of comment */
222 case '\n': case '\r': case ' ': case '\t': case '\014':
224 case cOTHER: continue; /* ignore all whitespace */
226 case cSBACKSLASH: ss = cSTRING; break;
228 case cCBACKSLASH: ss = cCHAR; break;
229 case cSLASH: c = '/'; ss = cOTHER; break;
231 if (c == '\n' || c == '\r') ss = cOTHER; /* end comment */
233 case cSLASH_STAR: continue; /* in comment */
234 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
240 case cSBACKSLASH: ss = cSTRING; break;
242 case cCBACKSLASH: ss = cCHAR; break;
243 case cSLASH: crc = CRC8 (crc, '/'); ss = cOTHER; break;
244 case cSLASH_SLASH: continue; /* in comment */
245 case cSLASH_STAR: continue; /* in comment */
246 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
256 int main (int argc, char **argv)
263 while (curarg < argc) {
264 if (!strncmp (argv [curarg], "--verbose", 3)) {
265 fprintf (stderr, "%s version %s\n", argv [0], version);
270 if (!strncmp (argv [curarg], "--yydebug", 3)) {
276 if (!strncmp (argv [curarg], "--dump", 3)) {
282 if (!strncmp (argv[curarg], "--show-name", 3)) {
285 show_name = argv[curarg];
289 fprintf(stderr, "Missing filename after --show-name \n");
294 if (!strncmp (argv [curarg], "--input", 3)) {
297 input_filename = argv[curarg];
298 if (!strcmp (argv [curarg], "-"))
301 ifp = fopen (argv [curarg], "r");
303 fprintf (stderr, "Couldn't open input file %s\n",
309 fprintf(stderr, "Missing filename after --input\n");
314 if (!strncmp (argv [curarg], "--output", 3)) {
317 ofp = fopen (argv[curarg], "w");
319 fprintf (stderr, "Couldn't open output file %s\n",
323 ofile = argv[curarg];
326 fprintf(stderr, "Missing filename after --output\n");
331 if (!strncmp (argv [curarg], "--python", 8)) {
334 pythonfp = fopen (argv[curarg], "w");
335 if (pythonfp == NULL) {
336 fprintf (stderr, "Couldn't open python output file %s\n",
340 pythonfile = argv[curarg];
343 fprintf(stderr, "Missing filename after --python\n");
348 if (!strncmp (argv [curarg], "--app", 4)) {
351 vlib_app_name = argv[curarg];
354 fprintf(stderr, "Missing app name after --app\n");
366 if (pythonfp == NULL) {
370 fprintf(stderr, "No input file specified...\n");
374 input_filename = show_name;
377 starttime = time (0);
379 if (yyparse() == 0) {
383 printf ("Output written to %s\n", ofile);
387 printf ("Python bindings written to %s\n", pythonfile);
395 printf ("Removing %s\n", ofile);
399 printf ("Removing %s\n", pythonfile);
410 static void usage (char *progname)
413 "usage: %s --input <filename> [--output <filename>] [--python <filename>]\n%s",
415 " [--yydebug] [--dump-tree]\n");
422 void yyerror (char *s)
424 fprintf (stderr, "%s:%d %s\n", current_filename, the_lexer_linenumber, s);
427 static char namebuf [MAXNAME];
430 * yylex (well, yylex_1: The real yylex below does crc-hackery)
432 static int yylex_1 (void)
437 enum { LP_INITIAL_WHITESPACE, LP_LINE_NUMBER,
438 LP_PRE_FILENAME_WHITESPACE, LP_FILENAME,
441 } lp_substate = LP_INITIAL_WHITESPACE;
444 switch (the_lexer_state) {
446 * START state -- looking for something interesting
455 the_lexer_linenumber++;
461 fprintf (stderr, "unknown token /%c at line %d\n",
462 c, the_lexer_linenumber);
466 the_lexer_state = LINE_PRAGMA_STATE;
467 lp_substate = LP_INITIAL_WHITESPACE;
501 the_lexer_state = STRING_STATE;
506 the_lexer_state = HELPER_STATE;
515 the_lexer_state = CPP_COMMENT_STATE;
517 } else if (c == '*') {
518 the_lexer_state = C_COMMENT_STATE;
521 fprintf (stderr, "unknown token /%c at line %d\n",
522 c, the_lexer_linenumber);
531 /* Note fallthrough... */
534 if (isalpha (c) || c == '_') {
537 the_lexer_state = NAME_STATE;
539 } else if (isdigit(c)) {
542 the_lexer_state = NUMBER_STATE;
546 fprintf (stderr, "unknown token %c at line %d\n",
547 c, the_lexer_linenumber);
552 * NAME state -- eat the rest of a name
559 if (!isalnum (c) && c != '_') {
561 namebuf [nameidx] = 0;
562 the_lexer_state = START_STATE;
563 return (name_check (namebuf, &yylval));
565 if (nameidx >= (MAXNAME-1)) {
566 fprintf(stderr, "lex input buffer overflow...\n");
569 namebuf [nameidx++] = c;
573 * NUMBER state -- eat the rest of a number
582 namebuf [nameidx] = 0;
583 the_lexer_state = START_STATE;
584 yylval = (void *) atol(namebuf);
587 if (nameidx >= (MAXNAME-1)) {
588 fprintf(stderr, "lex input buffer overflow...\n");
591 namebuf [nameidx++] = c;
595 * C_COMMENT state -- eat a peach
597 case C_COMMENT_STATE:
606 the_lexer_state = START_STATE;
611 the_lexer_linenumber++;
615 * CPP_COMMENT state -- eat a plum
618 case CPP_COMMENT_STATE:
623 the_lexer_linenumber++;
624 the_lexer_state = START_STATE;
638 namebuf[nameidx++] = c;
642 namebuf[nameidx] = 0;
643 yylval = (YYSTYPE) sxerox (namebuf);
644 the_lexer_state = START_STATE;
649 the_lexer_linenumber++;
651 if (nameidx >= (MAXNAME-1)) {
652 fprintf(stderr, "lex input buffer overflow...\n");
655 namebuf[nameidx++] = c;
669 namebuf[nameidx] = c;
673 namebuf[nameidx] = 0;
674 yylval = (YYSTYPE) sxerox (namebuf);
675 the_lexer_state = START_STATE;
676 return (HELPER_STRING);
680 the_lexer_linenumber++;
683 * CPP makes it approximately impossible to
684 * type "#define FOO 123", so we provide a
685 * lexical trick to achieve that result
691 if (nameidx >= (MAXNAME-1)) {
692 fprintf(stderr, "lex input buffer overflow...\n");
695 namebuf[nameidx++] = c;
700 case LINE_PRAGMA_STATE:
701 /* We're only interested in lines of the form # 259 "foo.c" 17 */
703 switch (lp_substate) {
705 case LP_INITIAL_WHITESPACE: /* no number seen yet */
709 if (c >= '0' && c <= '9') {
710 namebuf[nameidx++] = c;
711 lp_substate = LP_LINE_NUMBER;
712 } else if (c == '\n') {
714 } else if (c != ' ' && c != '\t') {
717 lp_substate = LP_OTHER;
721 case LP_LINE_NUMBER: /* eating linenumber */
725 if (c >= '0' && c <= '9') {
726 namebuf[nameidx++] = c;
727 } else if (c == ' ' || c == '\t') {
728 namebuf[nameidx++] = 0;
729 the_lexer_linenumber = atol(namebuf);
730 lp_substate = LP_PRE_FILENAME_WHITESPACE;
731 } else if (c == '\n') {
734 lp_substate = LP_OTHER;
738 case LP_PRE_FILENAME_WHITESPACE: /* awaiting filename */
744 lp_substate = LP_FILENAME;
746 } else if (c == ' ' || c == '\t') {
748 } else if (c == '\n') {
751 lp_substate = LP_OTHER;
755 case LP_FILENAME: /* eating filename */
761 lp_substate = LP_POST_FILENAME;
762 namebuf[nameidx] = 0;
763 } else if (c == '\n') {
764 goto lp_end_of_line; /* syntax error... */
766 namebuf[nameidx++] = c;
770 case LP_POST_FILENAME: /* ignoring rest of line */
777 if (lp_substate == LP_POST_FILENAME) {
778 if (current_filename_allocated) {
779 current_filename_allocated = 0;
780 free(current_filename);
783 if (!strcmp(namebuf, "<stdin>")) {
784 current_filename = input_filename;
786 current_filename = sxerox(namebuf);
787 current_filename_allocated = 1;
791 the_lexer_state = START_STATE;
799 fprintf (stderr, "LEXER BUG!\n");
806 * Parse a token and side-effect input_crc
807 * in a whitespace- and comment-insensitive fashion.
812 * Accumulate a crc32-based signature while processing the
813 * input file. The goal is to come up with a magic number
814 * which changes precisely when the original input file changes
815 * but which ignores whitespace changes.
817 unsigned long crc = input_crc;
818 int node_type = yylex_1 ();
825 case HELPER_STRING: {
826 /* We know these types accumulated token text into namebuf */
827 /* HELPER_STRING may still contain C comments. Argh. */
828 crc = crc_eliding_c_comments (namebuf, crc);
832 /* Other node types have no "substate" */
833 /* This code is written in this curious fashion because we
834 * want the generated CRC to be independent of the particular
835 * values a particular version of lex/bison assigned to various states.
838 /* case NAME: crc = CRC16 (crc, 257); break; */
839 case RPAR: crc = CRC16 (crc, 258); break;
840 case LPAR: crc = CRC16 (crc, 259); break;
841 case SEMI: crc = CRC16 (crc, 260); break;
842 case LBRACK: crc = CRC16 (crc, 261); break;
843 case RBRACK: crc = CRC16 (crc, 262); break;
844 /* case NUMBER: crc = CRC16 (crc, 263); break; */
845 /* case PRIMTYPE: crc = CRC16 (crc, 264); break; */
846 case BARF: crc = CRC16 (crc, 265); break;
847 case TPACKED: crc = CRC16 (crc, 266); break;
848 case DEFINE: crc = CRC16 (crc, 267); break;
849 case LCURLY: crc = CRC16 (crc, 268); break;
850 case RCURLY: crc = CRC16 (crc, 269); break;
851 /* case STRING: crc = CRC16 (crc, 270); break; */
852 case UNION: crc = CRC16 (crc, 271); break;
853 /* case HELPER_STRING: crc = CRC16 (crc, 272); break; */
854 case COMMA: crc = CRC16 (crc, 273); break;
855 case NOVERSION: crc = CRC16 (crc, 274); break;
856 case MANUAL_PRINT: crc = CRC16 (crc, 275); break;
857 case MANUAL_ENDIAN: crc = CRC16 (crc, 276); break;
858 case TYPEONLY: crc = CRC16 (crc, 278); break;
859 case DONT_TRACE: crc = CRC16 (crc, 279); break;
861 case EOF: crc = CRC16 (crc, ~0); break; /* hysterical compatibility */
864 fprintf(stderr, "yylex: node_type %d missing state CRC cookie\n",
875 * name_check -- see if the name we just ate
876 * matches a known keyword. If so, set yylval
877 * to a new instance of <subclass of node>, and return PARSER_MACRO
879 * Otherwise, set yylval to sxerox (s) and return NAME
882 static struct keytab {
884 enum node_subclass subclass_id;
886 /* Keep the table sorted, binary search used below! */
888 {"define", NODE_DEFINE},
889 {"dont_trace", NODE_DONT_TRACE},
895 {"manual_endian", NODE_MANUAL_ENDIAN},
896 {"manual_print", NODE_MANUAL_PRINT},
897 {"noversion", NODE_NOVERSION},
898 {"packed", NODE_PACKED},
899 {"typeonly", NODE_TYPEONLY},
904 {"union", NODE_UNION},
905 {"uword", NODE_UWORD},
908 static int name_check (const char *s, YYSTYPE *token_value)
910 enum node_subclass subclass_id;
914 for (top = 0, bot = (sizeof(keytab) / sizeof(struct keytab))-1;
916 mid = (top + bot) / 2;
917 result = name_compare (s, keytab[mid].name);
923 subclass_id = keytab[mid].subclass_id;
925 switch (subclass_id) {
936 *token_value = make_node(subclass_id);
940 *token_value = make_node(subclass_id);
944 *token_value = make_node(subclass_id);
947 case NODE_MANUAL_PRINT:
948 *token_value = (YYSTYPE) NODE_FLAG_MANUAL_PRINT;
949 return (MANUAL_PRINT);
951 case NODE_MANUAL_ENDIAN:
952 *token_value = (YYSTYPE) NODE_FLAG_MANUAL_ENDIAN;
953 return (MANUAL_ENDIAN);
956 *token_value = (YYSTYPE) NODE_FLAG_TYPEONLY;
959 case NODE_DONT_TRACE:
960 *token_value = (YYSTYPE) NODE_FLAG_DONT_TRACE;
970 fprintf (stderr, "fatal: keytab botch!\n");
975 *token_value = (YYSTYPE) sxerox (s);
983 char *sxerox (const char *s)
985 int len = strlen (s);
988 rv = (char *) malloc (len+1);
997 int name_compare (const char *s1, const char *s2)
1001 while (*s1 && *s2) {