2 *------------------------------------------------------------------
3 * lex.c - API generator lexical analyzer
5 * Copyright (c) 1996-2009 Cisco and/or its affiliates.
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at:
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *------------------------------------------------------------------
31 FILE *ifp, *ofp, *javafp, *jnifp;
32 char *java_class = "vppApi";
33 char *vlib_app_name = "vpp";
37 char *current_filename;
38 int current_filename_allocated;
39 unsigned long input_crc;
43 * lexer variable definitions
46 static const char *version = "0.1";
47 static int the_lexer_linenumber = 1;
48 static enum lex_state the_lexer_state = START_STATE;
53 static void usage (char *);
54 static int name_check (const char *, YYSTYPE *);
55 static int name_compare (const char *, const char *);
58 unsigned int crc32c_table[256] = {
59 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4,
60 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB,
61 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B,
62 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24,
63 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B,
64 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384,
65 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54,
66 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B,
67 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A,
68 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35,
69 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5,
70 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA,
71 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45,
72 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A,
73 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A,
74 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595,
75 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48,
76 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957,
77 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687,
78 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198,
79 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927,
80 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38,
81 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8,
82 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7,
83 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096,
84 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789,
85 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859,
86 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46,
87 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9,
88 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6,
89 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36,
90 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829,
91 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C,
92 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93,
93 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043,
94 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C,
95 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3,
96 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC,
97 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C,
98 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033,
99 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652,
100 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D,
101 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D,
102 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982,
103 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D,
104 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622,
105 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2,
106 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED,
107 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530,
108 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F,
109 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF,
110 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0,
111 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F,
112 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540,
113 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90,
114 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F,
115 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE,
116 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1,
117 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321,
118 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E,
119 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81,
120 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E,
121 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E,
122 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351
125 static inline unsigned long CRC8 (unsigned long crc,
128 return ((crc >> 8) ^ crc32c_table[(crc ^ d) & 0xFF]);
130 static inline unsigned long CRC16 (unsigned long crc,
133 crc = CRC8 (crc, d & 0xff);
135 crc = CRC8 (crc, d & 0xff);
141 crc_eliding_c_comments (const char *buf, unsigned long crc)
146 cSBACKSLASH, /* "...\ */
148 cCBACKSLASH, /* '...\ */
150 cSLASH_SLASH, /* //... */
151 cSLASH_STAR, /* / *... */
156 unsigned char c = *p++;
163 case cSTRING: case cSBACKSLASH:
164 case cCHAR: case cCBACKSLASH:
165 case cSLASH: case cSLASH_SLASH: case cSLASH_STAR: case cSTAR:
166 fprintf (stderr, "Inopportune EOF: %s\n", buf);
172 case cOTHER: ss = cSTRING; break; /* start string */
173 case cSTRING: ss = cOTHER; break; /* end string */
174 case cSBACKSLASH: ss = cSTRING; break;
176 case cCBACKSLASH: ss = cCHAR; break;
177 case cSLASH: crc = CRC8 (crc, '/'); ss = cOTHER; break;
178 case cSLASH_SLASH: continue; /* in comment */
179 case cSLASH_STAR: continue; /* in comment */
180 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
186 case cSTRING: ss = cSBACKSLASH; break;
187 case cSBACKSLASH: ss = cSTRING; break;
188 case cCHAR: ss = cCBACKSLASH; break;
189 case cCBACKSLASH: ss = cCHAR; break;
190 case cSLASH: crc = CRC8 (crc, '/'); ; ss = cOTHER; break;
191 case cSLASH_SLASH: continue; /* in comment */
192 case cSLASH_STAR: continue; /* in comment */
193 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
198 case cOTHER: ss = cSLASH; continue; /* potential comment */
200 case cSBACKSLASH: ss = cSTRING; break;
202 case cCBACKSLASH: ss = cCHAR; break;
203 case cSLASH: ss = cSLASH_SLASH; continue; /* start comment */
204 case cSLASH_SLASH: continue; /* in comment */
205 case cSLASH_STAR: continue; /* in comment */
206 case cSTAR: ss = cOTHER; continue; /* end of comment */
213 case cSBACKSLASH: ss = cSTRING; break;
215 case cCBACKSLASH: ss = cCHAR; break;
216 case cSLASH: ss = cSLASH_STAR; continue; /* start comment */
217 case cSLASH_SLASH: continue; /* in comment */
218 case cSLASH_STAR: ss = cSTAR; continue; /* potential end */
219 case cSTAR: continue; /* still potential end of comment */
222 case '\n': case '\r': case ' ': case '\t': case '\014':
224 case cOTHER: continue; /* ignore all whitespace */
226 case cSBACKSLASH: ss = cSTRING; break;
228 case cCBACKSLASH: ss = cCHAR; break;
229 case cSLASH: c = '/'; ss = cOTHER; break;
231 if (c == '\n' || c == '\r') ss = cOTHER; /* end comment */
233 case cSLASH_STAR: continue; /* in comment */
234 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
240 case cSBACKSLASH: ss = cSTRING; break;
242 case cCBACKSLASH: ss = cCHAR; break;
243 case cSLASH: crc = CRC8 (crc, '/'); ss = cOTHER; break;
244 case cSLASH_SLASH: continue; /* in comment */
245 case cSLASH_STAR: continue; /* in comment */
246 case cSTAR: ss = cSLASH_STAR; continue; /* in comment */
256 int main (int argc, char **argv)
264 while (curarg < argc) {
265 if (!strncmp (argv [curarg], "--verbose", 3)) {
266 fprintf (stderr, "%s version %s\n", argv [0], version);
271 if (!strncmp (argv [curarg], "--yydebug", 3)) {
277 if (!strncmp (argv [curarg], "--dump", 3)) {
283 if (!strncmp (argv[curarg], "--show-name", 3)) {
286 show_name = argv[curarg];
290 fprintf(stderr, "Missing filename after --show-name \n");
295 if (!strncmp (argv [curarg], "--input", 3)) {
298 input_filename = argv[curarg];
299 if (!strcmp (argv [curarg], "-"))
302 ifp = fopen (argv [curarg], "r");
304 fprintf (stderr, "Couldn't open input file %s\n",
310 fprintf(stderr, "Missing filename after --input\n");
315 if (!strncmp (argv [curarg], "--output", 3)) {
318 ofp = fopen (argv[curarg], "w");
320 fprintf (stderr, "Couldn't open output file %s\n",
324 ofile = argv[curarg];
327 fprintf(stderr, "Missing filename after --output\n");
332 if (!strncmp (argv [curarg], "--java", 4)) {
335 javafp = fopen (argv[curarg], "w");
336 if (javafp == NULL) {
337 fprintf (stderr, "Couldn't open java output file %s\n",
341 jofile = argv[curarg];
344 fprintf(stderr, "Missing filename after --java\n");
349 if (!strncmp (argv [curarg], "--jni", 4)) {
352 jnifp = fopen (argv[curarg], "w");
354 fprintf (stderr, "Couldn't open jni output file %s\n",
358 jnifile = argv[curarg];
361 fprintf(stderr, "Missing filename after --jni\n");
366 if (!strncmp (argv [curarg], "--app", 4)) {
369 vlib_app_name = argv[curarg];
372 fprintf(stderr, "Missing app name after --app\n");
377 if (!strncmp (argv [curarg], "--class", 3)) {
380 java_class = argv[curarg];
383 fprintf(stderr, "Missing class name after --class\n");
395 if (javafp == NULL) {
402 fprintf(stderr, "No input file specified...\n");
406 input_filename = show_name;
409 starttime = time (0);
411 if (yyparse() == 0) {
415 printf ("Output written to %s\n", ofile);
419 printf ("Java class defn written to %s\n", jofile);
423 printf ("Java native bindings written to %s\n", jnifile);
431 printf ("Removing %s\n", ofile);
436 printf ("Removing %s\n", jofile);
440 printf ("Removing %s\n", jnifile);
451 static void usage (char *progname)
454 "usage: %s --input <filename> [--output <filename>]\n%s",
456 " [--yydebug] [--dump-tree]\n");
463 void yyerror (char *s)
465 fprintf (stderr, "%s:%d %s\n", current_filename, the_lexer_linenumber, s);
468 static char namebuf [MAXNAME];
471 * yylex (well, yylex_1: The real yylex below does crc-hackery)
473 static int yylex_1 (void)
478 enum { LP_INITIAL_WHITESPACE, LP_LINE_NUMBER,
479 LP_PRE_FILENAME_WHITESPACE, LP_FILENAME,
482 } lp_substate = LP_INITIAL_WHITESPACE;
485 switch (the_lexer_state) {
487 * START state -- looking for something interesting
496 the_lexer_linenumber++;
502 fprintf (stderr, "unknown token /%c at line %d\n",
503 c, the_lexer_linenumber);
507 the_lexer_state = LINE_PRAGMA_STATE;
508 lp_substate = LP_INITIAL_WHITESPACE;
542 the_lexer_state = STRING_STATE;
547 the_lexer_state = HELPER_STATE;
556 the_lexer_state = CPP_COMMENT_STATE;
558 } else if (c == '*') {
559 the_lexer_state = C_COMMENT_STATE;
562 fprintf (stderr, "unknown token /%c at line %d\n",
563 c, the_lexer_linenumber);
572 /* Note fallthrough... */
575 if (isalpha (c) || c == '_') {
578 the_lexer_state = NAME_STATE;
580 } else if (isdigit(c)) {
583 the_lexer_state = NUMBER_STATE;
587 fprintf (stderr, "unknown token %c at line %d\n",
588 c, the_lexer_linenumber);
593 * NAME state -- eat the rest of a name
600 if (!isalnum (c) && c != '_') {
602 namebuf [nameidx] = 0;
603 the_lexer_state = START_STATE;
604 return (name_check (namebuf, &yylval));
606 if (nameidx >= (MAXNAME-1)) {
607 fprintf(stderr, "lex input buffer overflow...\n");
610 namebuf [nameidx++] = c;
614 * NUMBER state -- eat the rest of a number
623 namebuf [nameidx] = 0;
624 the_lexer_state = START_STATE;
625 yylval = (void *) atol(namebuf);
628 if (nameidx >= (MAXNAME-1)) {
629 fprintf(stderr, "lex input buffer overflow...\n");
632 namebuf [nameidx++] = c;
636 * C_COMMENT state -- eat a peach
638 case C_COMMENT_STATE:
647 the_lexer_state = START_STATE;
652 the_lexer_linenumber++;
656 * CPP_COMMENT state -- eat a plum
659 case CPP_COMMENT_STATE:
664 the_lexer_linenumber++;
665 the_lexer_state = START_STATE;
679 namebuf[nameidx++] = c;
683 namebuf[nameidx] = 0;
684 yylval = (YYSTYPE) sxerox (namebuf);
685 the_lexer_state = START_STATE;
690 the_lexer_linenumber++;
692 if (nameidx >= (MAXNAME-1)) {
693 fprintf(stderr, "lex input buffer overflow...\n");
696 namebuf[nameidx++] = c;
710 namebuf[nameidx] = c;
714 namebuf[nameidx] = 0;
715 yylval = (YYSTYPE) sxerox (namebuf);
716 the_lexer_state = START_STATE;
717 return (HELPER_STRING);
721 the_lexer_linenumber++;
724 * CPP makes it approximately impossible to
725 * type "#define FOO 123", so we provide a
726 * lexical trick to achieve that result
732 if (nameidx >= (MAXNAME-1)) {
733 fprintf(stderr, "lex input buffer overflow...\n");
736 namebuf[nameidx++] = c;
741 case LINE_PRAGMA_STATE:
742 /* We're only interested in lines of the form # 259 "foo.c" 17 */
744 switch (lp_substate) {
746 case LP_INITIAL_WHITESPACE: /* no number seen yet */
750 if (c >= '0' && c <= '9') {
751 namebuf[nameidx++] = c;
752 lp_substate = LP_LINE_NUMBER;
753 } else if (c == '\n') {
755 } else if (c != ' ' && c != '\t') {
758 lp_substate = LP_OTHER;
762 case LP_LINE_NUMBER: /* eating linenumber */
766 if (c >= '0' && c <= '9') {
767 namebuf[nameidx++] = c;
768 } else if (c == ' ' || c == '\t') {
769 namebuf[nameidx++] = 0;
770 the_lexer_linenumber = atol(namebuf);
771 lp_substate = LP_PRE_FILENAME_WHITESPACE;
772 } else if (c == '\n') {
775 lp_substate = LP_OTHER;
779 case LP_PRE_FILENAME_WHITESPACE: /* awaiting filename */
785 lp_substate = LP_FILENAME;
787 } else if (c == ' ' || c == '\t') {
789 } else if (c == '\n') {
792 lp_substate = LP_OTHER;
796 case LP_FILENAME: /* eating filename */
802 lp_substate = LP_POST_FILENAME;
803 namebuf[nameidx] = 0;
804 } else if (c == '\n') {
805 goto lp_end_of_line; /* syntax error... */
807 namebuf[nameidx++] = c;
811 case LP_POST_FILENAME: /* ignoring rest of line */
818 if (lp_substate == LP_POST_FILENAME) {
819 if (current_filename_allocated) {
820 current_filename_allocated = 0;
821 free(current_filename);
824 if (!strcmp(namebuf, "<stdin>")) {
825 current_filename = input_filename;
827 current_filename = sxerox(namebuf);
828 current_filename_allocated = 1;
832 the_lexer_state = START_STATE;
840 fprintf (stderr, "LEXER BUG!\n");
847 * Parse a token and side-effect input_crc
848 * in a whitespace- and comment-insensitive fashion.
853 * Accumulate a crc32-based signature while processing the
854 * input file. The goal is to come up with a magic number
855 * which changes precisely when the original input file changes
856 * but which ignores whitespace changes.
858 unsigned long crc = input_crc;
859 int node_type = yylex_1 ();
866 case HELPER_STRING: {
867 /* We know these types accumulated token text into namebuf */
868 /* HELPER_STRING may still contain C comments. Argh. */
869 crc = crc_eliding_c_comments (namebuf, crc);
873 /* Other node types have no "substate" */
874 /* This code is written in this curious fashion because we
875 * want the generated CRC to be independent of the particular
876 * values a particular version of lex/bison assigned to various states.
879 /* case NAME: crc = CRC16 (crc, 257); break; */
880 case RPAR: crc = CRC16 (crc, 258); break;
881 case LPAR: crc = CRC16 (crc, 259); break;
882 case SEMI: crc = CRC16 (crc, 260); break;
883 case LBRACK: crc = CRC16 (crc, 261); break;
884 case RBRACK: crc = CRC16 (crc, 262); break;
885 /* case NUMBER: crc = CRC16 (crc, 263); break; */
886 /* case PRIMTYPE: crc = CRC16 (crc, 264); break; */
887 case BARF: crc = CRC16 (crc, 265); break;
888 case TPACKED: crc = CRC16 (crc, 266); break;
889 case DEFINE: crc = CRC16 (crc, 267); break;
890 case LCURLY: crc = CRC16 (crc, 268); break;
891 case RCURLY: crc = CRC16 (crc, 269); break;
892 /* case STRING: crc = CRC16 (crc, 270); break; */
893 case UNION: crc = CRC16 (crc, 271); break;
894 /* case HELPER_STRING: crc = CRC16 (crc, 272); break; */
895 case COMMA: crc = CRC16 (crc, 273); break;
896 case NOVERSION: crc = CRC16 (crc, 274); break;
897 case MANUAL_PRINT: crc = CRC16 (crc, 275); break;
898 case MANUAL_ENDIAN: crc = CRC16 (crc, 276); break;
899 case MANUAL_JAVA: crc = CRC16 (crc, 277); break;
900 case TYPEONLY: crc = CRC16 (crc, 278); break;
901 case DONT_TRACE: crc = CRC16 (crc, 279); break;
903 case EOF: crc = CRC16 (crc, ~0); break; /* hysterical compatibility */
906 fprintf(stderr, "yylex: node_type %d missing state CRC cookie\n",
917 * name_check -- see if the name we just ate
918 * matches a known keyword. If so, set yylval
919 * to a new instance of <subclass of node>, and return PARSER_MACRO
921 * Otherwise, set yylval to sxerox (s) and return NAME
924 static struct keytab {
926 enum node_subclass subclass_id;
928 /* Keep the table sorted, binary search used below! */
930 {"define", NODE_DEFINE},
931 {"dont_trace", NODE_DONT_TRACE},
937 {"manual_endian", NODE_MANUAL_ENDIAN},
938 {"manual_java", NODE_MANUAL_JAVA},
939 {"manual_print", NODE_MANUAL_PRINT},
940 {"noversion", NODE_NOVERSION},
941 {"packed", NODE_PACKED},
942 {"typeonly", NODE_TYPEONLY},
947 {"union", NODE_UNION},
948 {"uword", NODE_UWORD},
951 static int name_check (const char *s, YYSTYPE *token_value)
953 enum node_subclass subclass_id;
957 for (top = 0, bot = (sizeof(keytab) / sizeof(struct keytab))-1;
959 mid = (top + bot) / 2;
960 result = name_compare (s, keytab[mid].name);
966 subclass_id = keytab[mid].subclass_id;
968 switch (subclass_id) {
979 *token_value = make_node(subclass_id);
983 *token_value = make_node(subclass_id);
987 *token_value = make_node(subclass_id);
990 case NODE_MANUAL_PRINT:
991 *token_value = (YYSTYPE) NODE_FLAG_MANUAL_PRINT;
992 return (MANUAL_PRINT);
994 case NODE_MANUAL_ENDIAN:
995 *token_value = (YYSTYPE) NODE_FLAG_MANUAL_ENDIAN;
996 return (MANUAL_ENDIAN);
998 case NODE_MANUAL_JAVA:
999 *token_value = (YYSTYPE) NODE_FLAG_MANUAL_JAVA;
1000 return (MANUAL_JAVA);
1003 *token_value = (YYSTYPE) NODE_FLAG_TYPEONLY;
1006 case NODE_DONT_TRACE:
1007 *token_value = (YYSTYPE) NODE_FLAG_DONT_TRACE;
1010 case NODE_NOVERSION:
1017 fprintf (stderr, "fatal: keytab botch!\n");
1022 *token_value = (YYSTYPE) sxerox (s);
1030 char *sxerox (const char *s)
1032 int len = strlen (s);
1035 rv = (char *) malloc (len+1);
1044 int name_compare (const char *s1, const char *s2)
1048 while (*s1 && *s2) {