2 *------------------------------------------------------------------
3 * lex.c - API generator lexical analyzer
5 * Copyright (c) 1996-2009 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
31 FILE *ifp, *ofp, *javafp, *jnifp;
32 char *java_class = "vppApi";
33 char *vlib_app_name = "vpp";
37 char *current_filename;
38 int current_filename_allocated;
39 unsigned long input_crc;
43 * lexer variable definitions
46 static const char *version = "0.1";
47 static int the_lexer_linenumber = 1;
48 static enum lex_state the_lexer_state = START_STATE;
53 static void usage (char *);
54 static int name_check (const char *, YYSTYPE *);
55 static int name_compare (const char *, const char *);
57 extern YYSTYPE yylval;
59 unsigned int crc32c_table[256] = {
60 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
61 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
62 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
63 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
64 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
65 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
66 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
67 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
68 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
69 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
70 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
71 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
72 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
73 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
74 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
75 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
76 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
77 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
78 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
79 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
80 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
81 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
82 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
83 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
84 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
85 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
86 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
87 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
88 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
89 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
90 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
91 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
92 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
93 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
94 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
95 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
96 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
97 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
98 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
99 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
100 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
101 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
102 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
103 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
104 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
105 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
106 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
107 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
108 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
109 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
110 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
111 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
112 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
113 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
114 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
115 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
116 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
117 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
118 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
119 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
120 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
121 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
122 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
123 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
126 static inline unsigned long CRC8 (unsigned long crc,
129 return ((crc >> 8) ^ crc32c_table[(crc ^ d) & 0xFF]);
131 static inline unsigned long CRC16 (unsigned long crc,
134 crc = CRC8 (crc, d & 0xff);
136 crc = CRC8 (crc, d & 0xff);
142 crc_eliding_c_comments (const char *buf, unsigned long crc)
147 cSBACKSLASH, /* "...\ */
149 cCBACKSLASH, /* '...\ */
151 cSLASH_SLASH, /* //... */
152 cSLASH_STAR, /* / *... */
157 unsigned char c = *p++;
164 case cSTRING: case cSBACKSLASH:
165 case cCHAR: case cCBACKSLASH:
166 case cSLASH: case cSLASH_SLASH: case cSLASH_STAR: case cSTAR:
167 fprintf (stderr, "Inopportune EOF: %s\n", buf);
173 case cOTHER: ss = cSTRING; break; /* start string */
174 case cSTRING: ss = cOTHER; break; /* end string */
175 case cSBACKSLASH: ss = cSTRING; break;
177 case cCBACKSLASH: ss = cCHAR; break;
178 case cSLASH: crc = CRC8 (crc, '/'); ss = cOTHER; break;
179 case cSLASH_SLASH: continue; /* in comment */
180 case cSLASH_STAR: continue; /* in comment */
181 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
187 case cSTRING: ss = cSBACKSLASH; break;
188 case cSBACKSLASH: ss = cSTRING; break;
189 case cCHAR: ss = cCBACKSLASH; break;
190 case cCBACKSLASH: ss = cCHAR; break;
191 case cSLASH: crc = CRC8 (crc, '/'); ; ss = cOTHER; break;
192 case cSLASH_SLASH: continue; /* in comment */
193 case cSLASH_STAR: continue; /* in comment */
194 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
199 case cOTHER: ss = cSLASH; continue; /* potential comment */
201 case cSBACKSLASH: ss = cSTRING; break;
203 case cCBACKSLASH: ss = cCHAR; break;
204 case cSLASH: ss = cSLASH_SLASH; continue; /* start comment */
205 case cSLASH_SLASH: continue; /* in comment */
206 case cSLASH_STAR: continue; /* in comment */
207 case cSTAR: ss = cOTHER; continue; /* end of comment */
214 case cSBACKSLASH: ss = cSTRING; break;
216 case cCBACKSLASH: ss = cCHAR; break;
217 case cSLASH: ss = cSLASH_STAR; continue; /* start comment */
218 case cSLASH_SLASH: continue; /* in comment */
219 case cSLASH_STAR: ss = cSTAR; continue; /* potential end */
220 case cSTAR: continue; /* still potential end of comment */
223 case '\n': case '\r': case ' ': case '\t': case '\014':
225 case cOTHER: continue; /* ignore all whitespace */
227 case cSBACKSLASH: ss = cSTRING; break;
229 case cCBACKSLASH: ss = cCHAR; break;
230 case cSLASH: c = '/'; ss = cOTHER; break;
232 if (c == '\n' || c == '\r') ss = cOTHER; /* end comment */
234 case cSLASH_STAR: continue; /* in comment */
235 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
241 case cSBACKSLASH: ss = cSTRING; break;
243 case cCBACKSLASH: ss = cCHAR; break;
244 case cSLASH: crc = CRC8 (crc, '/'); ss = cOTHER; break;
245 case cSLASH_SLASH: continue; /* in comment */
246 case cSLASH_STAR: continue; /* in comment */
247 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
257 int main (int argc, char **argv)
265 while (curarg < argc) {
266 if (!strncmp (argv [curarg], "--verbose", 3)) {
267 fprintf (stderr, "%s version %s\n", argv [0], version);
272 if (!strncmp (argv [curarg], "--yydebug", 3)) {
278 if (!strncmp (argv [curarg], "--dump", 3)) {
284 if (!strncmp (argv[curarg], "--show-name", 3)) {
287 show_name = argv[curarg];
291 fprintf(stderr, "Missing filename after --show-name \n");
296 if (!strncmp (argv [curarg], "--input", 3)) {
299 input_filename = argv[curarg];
300 if (!strcmp (argv [curarg], "-"))
303 ifp = fopen (argv [curarg], "r");
305 fprintf (stderr, "Couldn't open input file %s\n",
311 fprintf(stderr, "Missing filename after --input\n");
316 if (!strncmp (argv [curarg], "--output", 3)) {
319 ofp = fopen (argv[curarg], "w");
321 fprintf (stderr, "Couldn't open output file %s\n",
325 ofile = argv[curarg];
328 fprintf(stderr, "Missing filename after --output\n");
333 if (!strncmp (argv [curarg], "--java", 4)) {
336 javafp = fopen (argv[curarg], "w");
337 if (javafp == NULL) {
338 fprintf (stderr, "Couldn't open java output file %s\n",
342 jofile = argv[curarg];
345 fprintf(stderr, "Missing filename after --java\n");
350 if (!strncmp (argv [curarg], "--jni", 4)) {
353 jnifp = fopen (argv[curarg], "w");
355 fprintf (stderr, "Couldn't open jni output file %s\n",
359 jnifile = argv[curarg];
362 fprintf(stderr, "Missing filename after --jni\n");
367 if (!strncmp (argv [curarg], "--app", 4)) {
370 vlib_app_name = argv[curarg];
373 fprintf(stderr, "Missing app name after --app\n");
378 if (!strncmp (argv [curarg], "--class", 3)) {
381 java_class = argv[curarg];
384 fprintf(stderr, "Missing class name after --class\n");
396 if (javafp == NULL) {
403 fprintf(stderr, "No input file specified...\n");
407 input_filename = show_name;
410 starttime = time (0);
412 if (yyparse() == 0) {
416 printf ("Output written to %s\n", ofile);
420 printf ("Java class defn written to %s\n", jofile);
424 printf ("Java native bindings written to %s\n", jnifile);
432 printf ("Removing %s\n", ofile);
437 printf ("Removing %s\n", jofile);
441 printf ("Removing %s\n", jnifile);
452 static void usage (char *progname)
455 "usage: %s --input <filename> [--output <filename>]\n%s",
457 " [--yydebug] [--dump-tree]\n");
464 void yyerror (char *s)
466 fprintf (stderr, "%s:%d %s\n", current_filename, the_lexer_linenumber, s);
469 static char namebuf [MAXNAME];
472 * yylex (well, yylex_1: The real yylex below does crc-hackery)
474 static int yylex_1 (void)
479 enum { LP_INITIAL_WHITESPACE, LP_LINE_NUMBER,
480 LP_PRE_FILENAME_WHITESPACE, LP_FILENAME,
483 } lp_substate = LP_INITIAL_WHITESPACE;
486 switch (the_lexer_state) {
488 * START state -- looking for something interesting
497 the_lexer_linenumber++;
503 fprintf (stderr, "unknown token /%c at line %d\n",
504 c, the_lexer_linenumber);
508 the_lexer_state = LINE_PRAGMA_STATE;
509 lp_substate = LP_INITIAL_WHITESPACE;
543 the_lexer_state = STRING_STATE;
548 the_lexer_state = HELPER_STATE;
557 the_lexer_state = CPP_COMMENT_STATE;
559 } else if (c == '*') {
560 the_lexer_state = C_COMMENT_STATE;
563 fprintf (stderr, "unknown token /%c at line %d\n",
564 c, the_lexer_linenumber);
573 /* Note fallthrough... */
576 if (isalpha (c) || c == '_') {
579 the_lexer_state = NAME_STATE;
581 } else if (isdigit(c)) {
584 the_lexer_state = NUMBER_STATE;
588 fprintf (stderr, "unknown token %c at line %d\n",
589 c, the_lexer_linenumber);
594 * NAME state -- eat the rest of a name
601 if (!isalnum (c) && c != '_') {
603 namebuf [nameidx] = 0;
604 the_lexer_state = START_STATE;
605 return (name_check (namebuf, &yylval));
607 if (nameidx >= (MAXNAME-1)) {
608 fprintf(stderr, "lex input buffer overflow...\n");
611 namebuf [nameidx++] = c;
615 * NUMBER state -- eat the rest of a number
624 namebuf [nameidx] = 0;
625 the_lexer_state = START_STATE;
626 yylval = (void *) atol(namebuf);
629 if (nameidx >= (MAXNAME-1)) {
630 fprintf(stderr, "lex input buffer overflow...\n");
633 namebuf [nameidx++] = c;
637 * C_COMMENT state -- eat a peach
639 case C_COMMENT_STATE:
648 the_lexer_state = START_STATE;
653 the_lexer_linenumber++;
657 * CPP_COMMENT state -- eat a plum
660 case CPP_COMMENT_STATE:
665 the_lexer_linenumber++;
666 the_lexer_state = START_STATE;
680 namebuf[nameidx++] = c;
684 namebuf[nameidx] = 0;
685 yylval = (YYSTYPE) sxerox (namebuf);
686 the_lexer_state = START_STATE;
691 the_lexer_linenumber++;
693 if (nameidx >= (MAXNAME-1)) {
694 fprintf(stderr, "lex input buffer overflow...\n");
697 namebuf[nameidx++] = c;
711 namebuf[nameidx] = c;
715 namebuf[nameidx] = 0;
716 yylval = (YYSTYPE) sxerox (namebuf);
717 the_lexer_state = START_STATE;
718 return (HELPER_STRING);
722 the_lexer_linenumber++;
725 * CPP makes it approximately impossible to
726 * type "#define FOO 123", so we provide a
727 * lexical trick to achieve that result
733 if (nameidx >= (MAXNAME-1)) {
734 fprintf(stderr, "lex input buffer overflow...\n");
737 namebuf[nameidx++] = c;
742 case LINE_PRAGMA_STATE:
743 /* We're only interested in lines of the form # 259 "foo.c" 17 */
745 switch (lp_substate) {
747 case LP_INITIAL_WHITESPACE: /* no number seen yet */
751 if (c >= '0' && c <= '9') {
752 namebuf[nameidx++] = c;
753 lp_substate = LP_LINE_NUMBER;
754 } else if (c == '\n') {
756 } else if (c != ' ' && c != '\t') {
759 lp_substate = LP_OTHER;
763 case LP_LINE_NUMBER: /* eating linenumber */
767 if (c >= '0' && c <= '9') {
768 namebuf[nameidx++] = c;
769 } else if (c == ' ' || c == '\t') {
770 namebuf[nameidx++] = 0;
771 the_lexer_linenumber = atol(namebuf);
772 lp_substate = LP_PRE_FILENAME_WHITESPACE;
773 } else if (c == '\n') {
776 lp_substate = LP_OTHER;
780 case LP_PRE_FILENAME_WHITESPACE: /* awaiting filename */
786 lp_substate = LP_FILENAME;
788 } else if (c == ' ' || c == '\t') {
790 } else if (c == '\n') {
793 lp_substate = LP_OTHER;
797 case LP_FILENAME: /* eating filename */
803 lp_substate = LP_POST_FILENAME;
804 namebuf[nameidx] = 0;
805 } else if (c == '\n') {
806 goto lp_end_of_line; /* syntax error... */
808 namebuf[nameidx++] = c;
812 case LP_POST_FILENAME: /* ignoring rest of line */
819 if (lp_substate == LP_POST_FILENAME) {
820 if (current_filename_allocated) {
821 current_filename_allocated = 0;
822 free(current_filename);
825 if (!strcmp(namebuf, "<stdin>")) {
826 current_filename = input_filename;
828 current_filename = sxerox(namebuf);
829 current_filename_allocated = 1;
833 the_lexer_state = START_STATE;
841 fprintf (stderr, "LEXER BUG!\n");
848 * Parse a token and side-effect input_crc
849 * in a whitespace- and comment-insensitive fashion.
854 * Accumulate a crc32-based signature while processing the
855 * input file. The goal is to come up with a magic number
856 * which changes precisely when the original input file changes
857 * but which ignores whitespace changes.
859 unsigned long crc = input_crc;
860 int node_type = yylex_1 ();
867 case HELPER_STRING: {
868 /* We know these types accumulated token text into namebuf */
869 /* HELPER_STRING may still contain C comments. Argh. */
870 crc = crc_eliding_c_comments (namebuf, crc);
874 /* Other node types have no "substate" */
875 /* This code is written in this curious fashion because we
876 * want the generated CRC to be independent of the particular
877 * values a particular version of lex/bison assigned to various states.
880 /* case NAME: crc = CRC16 (crc, 257); break; */
881 case RPAR: crc = CRC16 (crc, 258); break;
882 case LPAR: crc = CRC16 (crc, 259); break;
883 case SEMI: crc = CRC16 (crc, 260); break;
884 case LBRACK: crc = CRC16 (crc, 261); break;
885 case RBRACK: crc = CRC16 (crc, 262); break;
886 /* case NUMBER: crc = CRC16 (crc, 263); break; */
887 /* case PRIMTYPE: crc = CRC16 (crc, 264); break; */
888 case BARF: crc = CRC16 (crc, 265); break;
889 case TPACKED: crc = CRC16 (crc, 266); break;
890 case DEFINE: crc = CRC16 (crc, 267); break;
891 case LCURLY: crc = CRC16 (crc, 268); break;
892 case RCURLY: crc = CRC16 (crc, 269); break;
893 /* case STRING: crc = CRC16 (crc, 270); break; */
894 case UNION: crc = CRC16 (crc, 271); break;
895 /* case HELPER_STRING: crc = CRC16 (crc, 272); break; */
896 case COMMA: crc = CRC16 (crc, 273); break;
897 case NOVERSION: crc = CRC16 (crc, 274); break;
898 case MANUAL_PRINT: crc = CRC16 (crc, 275); break;
899 case MANUAL_ENDIAN: crc = CRC16 (crc, 276); break;
900 case MANUAL_JAVA: crc = CRC16 (crc, 277); break;
901 case TYPEONLY: crc = CRC16 (crc, 278); break;
902 case DONT_TRACE: crc = CRC16 (crc, 279); break;
904 case EOF: crc = CRC16 (crc, ~0); break; /* hysterical compatibility */
907 fprintf(stderr, "yylex: node_type %d missing state CRC cookie\n",
918 * name_check -- see if the name we just ate
919 * matches a known keyword. If so, set yylval
920 * to a new instance of <subclass of node>, and return PARSER_MACRO
922 * Otherwise, set yylval to sxerox (s) and return NAME
925 static struct keytab {
927 enum node_subclass subclass_id;
929 /* Keep the table sorted, binary search used below! */
931 {"define", NODE_DEFINE},
932 {"dont_trace", NODE_DONT_TRACE},
938 {"manual_endian", NODE_MANUAL_ENDIAN},
939 {"manual_java", NODE_MANUAL_JAVA},
940 {"manual_print", NODE_MANUAL_PRINT},
941 {"noversion", NODE_NOVERSION},
942 {"packed", NODE_PACKED},
943 {"typeonly", NODE_TYPEONLY},
948 {"union", NODE_UNION},
949 {"uword", NODE_UWORD},
952 static int name_check (const char *s, YYSTYPE *token_value)
954 enum node_subclass subclass_id;
958 for (top = 0, bot = (sizeof(keytab) / sizeof(struct keytab))-1;
960 mid = (top + bot) / 2;
961 result = name_compare (s, keytab[mid].name);
967 subclass_id = keytab[mid].subclass_id;
969 switch (subclass_id) {
980 *token_value = make_node(subclass_id);
984 *token_value = make_node(subclass_id);
988 *token_value = make_node(subclass_id);
991 case NODE_MANUAL_PRINT:
992 *token_value = (YYSTYPE) NODE_FLAG_MANUAL_PRINT;
993 return (MANUAL_PRINT);
995 case NODE_MANUAL_ENDIAN:
996 *token_value = (YYSTYPE) NODE_FLAG_MANUAL_ENDIAN;
997 return (MANUAL_ENDIAN);
999 case NODE_MANUAL_JAVA:
1000 *token_value = (YYSTYPE) NODE_FLAG_MANUAL_JAVA;
1001 return (MANUAL_JAVA);
1004 *token_value = (YYSTYPE) NODE_FLAG_TYPEONLY;
1007 case NODE_DONT_TRACE:
1008 *token_value = (YYSTYPE) NODE_FLAG_DONT_TRACE;
1011 case NODE_NOVERSION:
1018 fprintf (stderr, "fatal: keytab botch!\n");
1023 *token_value = (YYSTYPE) sxerox (s);
1031 char *sxerox (const char *s)
1033 int len = strlen (s);
1036 rv = (char *) malloc (len+1);
1045 int name_compare (const char *s1, const char *s2)
1049 while (*s1 && *s2) {