Add a LALR grammar for Rust with testing support

2025-04-13 04:26:48 +00:00 · 2015-01-20 18:46:44 -08:00 · 2015-01-20 18:46:44 -08:00 · f39297f991
commit f39297f991
parent 29bd9a06ef
7 changed files with 2663 additions and 0 deletions
--- a/2
+++ b/2
@ -647,6 +647,8 @@ probe CFG_ISCC             iscc
 probe CFG_JAVAC            javac
 probe CFG_ANTLR4           antlr4
 probe CFG_GRUN             grun
+probe CFG_FLEX             flex
+probe CFG_BISON            bison
 probe CFG_PANDOC           pandoc
 probe CFG_PDFLATEX         pdflatex
 probe CFG_XELATEX          xelatex
--- a/mk/grammar.mk
+++ b/mk/grammar.mk
@ -14,6 +14,11 @@ B = $(CFG_BUILD_DIR)/$(CFG_BUILD)/stage2/
 L = $(B)lib/rustlib/$(CFG_BUILD)/lib
 LD = $(CFG_BUILD)/stage2/lib/rustlib/$(CFG_BUILD)/lib/
 RUSTC = $(STAGE2_T_$(CFG_BUILD)_H_$(CFG_BUILD))
+ifeq ($(CFG_OSTYPE),apple-darwin)
+	FLEX_LDFLAGS=-ll
+else
+	FLEX_LDFLAGS=-lfl
+endif

 # Run the reference lexer against libsyntax and compare the tokens and spans.
 # If "// ignore-lexer-test" is present in the file, it will be ignored.
@ -67,3 +72,46 @@ $(info cfg: javac not available, skipping lexer test...)
 check-lexer:

 endif
+
+$(BG)lex.yy.c: $(SG)lexer.l $(BG)
+	@$(call E, flex: $@)
+	$(Q)$(CFG_FLEX) -o $@ $<
+
+$(BG)lexer-lalr.o: $(BG)lex.yy.c $(BG)parser-lalr.tab.h
+	@$(call E, cc: $@)
+	$(Q)$(CFG_CC) -include $(BG)parser-lalr.tab.h -c -o $@ $<
+
+$(BG)parser-lalr.tab.c $(BG)parser-lalr.tab.h: $(SG)parser-lalr.y
+	@$(call E, bison: $@)
+	$(Q)$(CFG_BISON) $< --output=$(BG)parser-lalr.tab.c --defines=$(BG)parser-lalr.tab.h \
+		--name-prefix=rs --warnings=error=all
+
+$(BG)parser-lalr.o: $(BG)parser-lalr.tab.c
+	@$(call E, cc: $@)
+	$(Q)$(CFG_CC) -c -o $@ $<
+
+$(BG)parser-lalr-main.o: $(SG)parser-lalr-main.c
+	@$(call E, cc: $@)
+	$(Q)$(CFG_CC) -std=c99 -c -o $@ $<
+
+$(BG)parser-lalr: $(BG)parser-lalr.o $(BG)parser-lalr-main.o $(BG)lexer-lalr.o
+	@$(call E, cc: $@)
+	$(Q)$(CFG_CC) -o $@ $^ $(FLEX_LDFLAGS)
+
+
+ifdef CFG_FLEX
+ifdef CFG_BISON
+check-grammar: $(BG) $(BG)parser-lalr
+	$(info Verifying grammar ...)
+	$(SG)testparser.py -p $(BG)parser-lalr -s $(S)src
+
+else
+$(info cfg: bison not available, skipping parser test...)
+check-grammar:
+
+endif
+else
+$(info cfg: flex not available, skipping parser test...)
+check-grammar:
+
+endif
--- a/src/grammar/lexer.l
+++ b/src/grammar/lexer.l
@ -0,0 +1,342 @@
+%{
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#include <stdio.h>
+#include <ctype.h>
+
+static int num_hashes;
+static int end_hashes;
+static int saw_non_hash;
+
+%}
+
+%option stack
+%option yylineno
+
+%x str
+%x rawstr
+%x rawstr_esc_begin
+%x rawstr_esc_body
+%x rawstr_esc_end
+%x byte
+%x bytestr
+%x rawbytestr
+%x rawbytestr_nohash
+%x pound
+%x shebang_or_attr
+%x ltorchar
+%x linecomment
+%x doc_line
+%x blockcomment
+%x doc_block
+%x suffix
+
+ident [a-zA-Z\x80-\xff_][a-zA-Z0-9\x80-\xff_]*
+
+%%
+
+<suffix>{ident}            { BEGIN(INITIAL); }
+<suffix>(.|\n)  { yyless(0); BEGIN(INITIAL); }
+
+[ \n\t\r]             { }
+
+\xef\xbb\xbf {
+  // UTF-8 byte order mark (BOM), ignore if in line 1, error otherwise
+  if (yyget_lineno() != 1) {
+    return -1;
+  }
+}
+
+\/\/(\/|\!)           { BEGIN(doc_line); yymore(); }
+<doc_line>\n          { BEGIN(INITIAL);
+                        yyleng--;
+                        yytext[yyleng] = 0;
+                        return ((yytext[2] == '!') ? INNER_DOC_COMMENT : OUTER_DOC_COMMENT);
+                      }
+<doc_line>[^\n]*      { yymore(); }
+
+\/\/|\/\/\/\/         { BEGIN(linecomment); }
+<linecomment>\n       { BEGIN(INITIAL); }
+<linecomment>[^\n]*   { }
+
+\/\*(\*|\!)[^*]       { yy_push_state(INITIAL); yy_push_state(doc_block); yymore(); }
+<doc_block>\/\*       { yy_push_state(doc_block); yymore(); }
+<doc_block>\*\/       {
+    yy_pop_state();
+    if (yy_top_state() == doc_block) {
+        yymore();
+    } else {
+        return ((yytext[2] == '!') ? INNER_DOC_COMMENT : OUTER_DOC_COMMENT);
+    }
+}
+<doc_block>(.|\n)     { yymore(); }
+
+\/\*                  { yy_push_state(blockcomment); }
+<blockcomment>\/\*    { yy_push_state(blockcomment); }
+<blockcomment>\*\/    { yy_pop_state(); }
+<blockcomment>(.|\n)   { }
+
+_        { return UNDERSCORE; }
+as       { return AS; }
+box      { return BOX; }
+break    { return BREAK; }
+const    { return CONST; }
+continue { return CONTINUE; }
+crate    { return CRATE; }
+else     { return ELSE; }
+enum     { return ENUM; }
+extern   { return EXTERN; }
+false    { return FALSE; }
+fn       { return FN; }
+for      { return FOR; }
+if       { return IF; }
+impl     { return IMPL; }
+in       { return IN; }
+let      { return LET; }
+loop     { return LOOP; }
+match    { return MATCH; }
+mod      { return MOD; }
+move     { return MOVE; }
+mut      { return MUT; }
+priv     { return PRIV; }
+proc     { return PROC; }
+pub      { return PUB; }
+ref      { return REF; }
+return   { return RETURN; }
+self     { return SELF; }
+static   { return STATIC; }
+struct   { return STRUCT; }
+trait    { return TRAIT; }
+true     { return TRUE; }
+type     { return TYPE; }
+typeof   { return TYPEOF; }
+unsafe   { return UNSAFE; }
+use      { return USE; }
+where    { return WHERE; }
+while    { return WHILE; }
+
+{ident}  { return IDENT; }
+
+0x[0-9a-fA-F_]+                                    { BEGIN(suffix); return LIT_INTEGER; }
+0o[0-8_]+                                          { BEGIN(suffix); return LIT_INTEGER; }
+0b[01_]+                                           { BEGIN(suffix); return LIT_INTEGER; }
+[0-9][0-9_]*                                       { BEGIN(suffix); return LIT_INTEGER; }
+[0-9][0-9_]*\.(\.|[a-zA-Z])    { yyless(yyleng - 2); BEGIN(suffix); return LIT_INTEGER; }
+
+[0-9][0-9_]*\.[0-9_]*([eE][-\+]?[0-9_]+)?          { BEGIN(suffix); return LIT_FLOAT; }
+[0-9][0-9_]*(\.[0-9_]*)?[eE][-\+]?[0-9_]+          { BEGIN(suffix); return LIT_FLOAT; }
+
+;      { return ';'; }
+,      { return ','; }
+\.\.\. { return DOTDOTDOT; }
+\.\.   { return DOTDOT; }
+\.     { return '.'; }
+\(     { return '('; }
+\)     { return ')'; }
+\{     { return '{'; }
+\}     { return '}'; }
+\[     { return '['; }
+\]     { return ']'; }
+@      { return '@'; }
+#      { BEGIN(pound); yymore(); }
+<pound>\! { BEGIN(shebang_or_attr); yymore(); }
+<shebang_or_attr>\[ {
+  BEGIN(INITIAL);
+  yyless(2);
+  return SHEBANG;
+}
+<shebang_or_attr>[^\[\n]*\n {
+  // Since the \n was eaten as part of the token, yylineno will have
+  // been incremented to the value 2 if the shebang was on the first
+  // line. This yyless undoes that, setting yylineno back to 1.
+  yyless(yyleng - 1);
+  if (yyget_lineno() == 1) {
+    BEGIN(INITIAL);
+    return SHEBANG_LINE;
+  } else {
+    BEGIN(INITIAL);
+    yyless(2);
+    return SHEBANG;
+  }
+}
+<pound>. { BEGIN(INITIAL); yyless(1); return '#'; }
+
+\~     { return '~'; }
+::     { return MOD_SEP; }
+:      { return ':'; }
+\$     { return '$'; }
+\?     { return '?'; }
+
+==    { return EQEQ; }
+=>    { return FAT_ARROW; }
+=     { return '='; }
+\!=   { return NE; }
+\!    { return '!'; }
+\<=   { return LE; }
+\<\<  { return SHL; }
+\<\<= { return SHLEQ; }
+\<    { return '<'; }
+\>=   { return GE; }
+\>\>  { return SHR; }
+\>\>= { return SHREQ; }
+\>    { return '>'; }
+
+\x27                                  { BEGIN(ltorchar); yymore(); }
+<ltorchar>static                      { BEGIN(INITIAL); return STATIC_LIFETIME; }
+<ltorchar>{ident}                     { BEGIN(INITIAL); return LIFETIME; }
+<ltorchar>\\[nrt\\\x27\x220]\x27      { BEGIN(suffix); return LIT_CHAR; }
+<ltorchar>\\x[0-9a-fA-F]{2}\x27       { BEGIN(suffix); return LIT_CHAR; }
+<ltorchar>\\u\{[0-9a-fA-F]?{6}\}\x27  { BEGIN(suffix); return LIT_CHAR; }
+<ltorchar>.\x27                       { BEGIN(suffix); return LIT_CHAR; }
+<ltorchar>[\x80-\xff]{2,4}\x27        { BEGIN(suffix); return LIT_CHAR; }
+<ltorchar><<EOF>>                     { BEGIN(INITIAL); return -1; }
+
+b\x22              { BEGIN(bytestr); yymore(); }
+<bytestr>\x22      { BEGIN(suffix); return LIT_BINARY; }
+
+<bytestr><<EOF>>                { return -1; }
+<bytestr>\\[n\nrt\\\x27\x220]   { yymore(); }
+<bytestr>\\x[0-9a-fA-F]{2}      { yymore(); }
+<bytestr>\\u\{[0-9a-fA-F]?{6}\} { yymore(); }
+<bytestr>\\[^n\nrt\\\x27\x220]  { return -1; }
+<bytestr>(.|\n)                 { yymore(); }
+
+br\x22                      { BEGIN(rawbytestr_nohash); yymore(); }
+<rawbytestr_nohash>\x22     { BEGIN(suffix); return LIT_BINARY_RAW; }
+<rawbytestr_nohash>(.|\n)   { yymore(); }
+<rawbytestr_nohash><<EOF>>  { return -1; }
+
+br/# {
+    BEGIN(rawbytestr);
+    yymore();
+    num_hashes = 0;
+    saw_non_hash = 0;
+    end_hashes = 0;
+}
+<rawbytestr># {
+    if (!saw_non_hash) {
+        num_hashes++;
+    } else if (end_hashes != 0) {
+        end_hashes++;
+        if (end_hashes == num_hashes) {
+            BEGIN(INITIAL);
+            return LIT_BINARY_RAW;
+        }
+    }
+    yymore();
+}
+<rawbytestr>\x22# {
+    end_hashes = 1;
+    if (end_hashes == num_hashes) {
+        BEGIN(INITIAL);
+        return LIT_BINARY_RAW;
+    }
+    yymore();
+}
+<rawbytestr>(.|\n) {
+    if (!saw_non_hash) {
+        saw_non_hash = 1;
+    }
+    if (end_hashes != 0) {
+        end_hashes = 0;
+    }
+    yymore();
+}
+<rawbytestr><<EOF>> { return -1; }
+
+b\x27                        { BEGIN(byte); yymore(); }
+<byte>\\[nrt\\\x27\x220]\x27 { BEGIN(INITIAL); return LIT_BYTE; }
+<byte>\\x[0-9a-fA-F]{2}\x27  { BEGIN(INITIAL); return LIT_BYTE; }
+<byte>\\u[0-9a-fA-F]{4}\x27  { BEGIN(INITIAL); return LIT_BYTE; }
+<byte>\\U[0-9a-fA-F]{8}\x27  { BEGIN(INITIAL); return LIT_BYTE; }
+<byte>.\x27                  { BEGIN(INITIAL); return LIT_BYTE; }
+<byte><<EOF>>                { BEGIN(INITIAL); return -1; }
+
+r\x22           { BEGIN(rawstr); yymore(); }
+<rawstr>\x22    { BEGIN(suffix); return LIT_STR_RAW; }
+<rawstr>(.|\n)  { yymore(); }
+<rawstr><<EOF>> { return -1; }
+
+r/#             {
+    BEGIN(rawstr_esc_begin);
+    yymore();
+    num_hashes = 0;
+    saw_non_hash = 0;
+    end_hashes = 0;
+}
+
+<rawstr_esc_begin># {
+    num_hashes++;
+    yymore();
+}
+<rawstr_esc_begin>\x22 {
+    BEGIN(rawstr_esc_body);
+    yymore();
+}
+<rawstr_esc_begin>(.|\n) { return -1; }
+
+<rawstr_esc_body>\x22/# {
+  BEGIN(rawstr_esc_end);
+  yymore();
+ }
+<rawstr_esc_body>(.|\n) {
+  yymore();
+ }
+
+<rawstr_esc_end># {
+  end_hashes++;
+  if (end_hashes == num_hashes) {
+    BEGIN(INITIAL);
+    return LIT_STR_RAW;
+  }
+  yymore();
+ }
+<rawstr_esc_end>[^#] {
+  end_hashes = 0;
+  BEGIN(rawstr_esc_body);
+  yymore();
+ }
+
+<rawstr_esc_begin,rawstr_esc_body,rawstr_esc_end><<EOF>> { return -1; }
+
+\x22                     { BEGIN(str); yymore(); }
+<str>\x22                { BEGIN(suffix); return LIT_STR; }
+
+<str><<EOF>>                { return -1; }
+<str>\\[n\nrt\\\x27\x220]   { yymore(); }
+<str>\\x[0-9a-fA-F]{2}      { yymore(); }
+<str>\\u\{[0-9a-fA-F]?{6}\} { yymore(); }
+<str>\\[^n\nrt\\\x27\x220]  { return -1; }
+<str>(.|\n)                 { yymore(); }
+
+-\>  { return RARROW; }
+-    { return '-'; }
+-=   { return MINUSEQ; }
+&&   { return ANDAND; }
+&    { return '&'; }
+&=   { return ANDEQ; }
+\|\| { return OROR; }
+\|   { return '|'; }
+\|=  { return OREQ; }
+\+   { return '+'; }
+\+=  { return PLUSEQ; }
+\*   { return '*'; }
+\*=  { return STAREQ; }
+\/   { return '/'; }
+\/=  { return SLASHEQ; }
+\^   { return '^'; }
+\^=  { return CARETEQ; }
+%    { return '%'; }
+%=   { return PERCENTEQ; }
+
+<<EOF>> { return 0; }
+
+%%
--- a/src/grammar/parser-lalr-main.c
+++ b/src/grammar/parser-lalr-main.c
@ -0,0 +1,203 @@
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+
+extern int yylex();
+extern int rsparse();
+
+#define PUSHBACK_LEN 4
+
+static char pushback[PUSHBACK_LEN];
+static int verbose;
+
+void print(const char* format, ...) {
+  va_list args;
+  va_start(args, format);
+  if (verbose) {
+    vprintf(format, args);
+  }
+  va_end(args);
+}
+
+// If there is a non-null char at the head of the pushback queue,
+// dequeue it and shift the rest of the queue forwards. Otherwise,
+// return the token from calling yylex.
+int rslex() {
+  if (pushback[0] == '\0') {
+    return yylex();
+  } else {
+    char c = pushback[0];
+    memmove(pushback, pushback + 1, PUSHBACK_LEN - 1);
+    pushback[PUSHBACK_LEN - 1] = '\0';
+    return c;
+  }
+}
+
+// Note: this does nothing if the pushback queue is full. As long as
+// there aren't more than PUSHBACK_LEN consecutive calls to push_back
+// in an action, this shouldn't be a problem.
+void push_back(char c) {
+  for (int i = 0; i < PUSHBACK_LEN; ++i) {
+    if (pushback[i] == '\0') {
+      pushback[i] = c;
+      break;
+    }
+  }
+}
+
+extern int rsdebug;
+
+struct node {
+  struct node *next;
+  struct node *prev;
+  int own_string;
+  char const *name;
+  int n_elems;
+  struct node *elems[];
+};
+
+struct node *nodes = NULL;
+int n_nodes;
+
+struct node *mk_node(char const *name, int n, ...) {
+  va_list ap;
+  int i = 0;
+  unsigned sz = sizeof(struct node) + (n * sizeof(struct node *));
+  struct node *nn, *nd = (struct node *)malloc(sz);
+
+  print("# New %d-ary node: %s = %p\n", n, name, nd);
+
+  nd->own_string = 0;
+  nd->prev = NULL;
+  nd->next = nodes;
+  if (nodes) {
+    nodes->prev = nd;
+  }
+  nodes = nd;
+
+  nd->name = name;
+  nd->n_elems = n;
+
+  va_start(ap, n);
+  while (i < n) {
+    nn = va_arg(ap, struct node *);
+    print("#   arg[%d]: %p\n", i, nn);
+    print("#            (%s ...)\n", nn->name);
+    nd->elems[i++] = nn;
+  }
+  va_end(ap);
+  n_nodes++;
+  return nd;
+}
+
+struct node *mk_atom(char *name) {
+  struct node *nd = mk_node((char const *)strdup(name), 0);
+  nd->own_string = 1;
+  return nd;
+}
+
+struct node *mk_none() {
+  return mk_atom("<none>");
+}
+
+struct node *ext_node(struct node *nd, int n, ...) {
+  va_list ap;
+  int i = 0, c = nd->n_elems + n;
+  unsigned sz = sizeof(struct node) + (c * sizeof(struct node *));
+  struct node *nn;
+
+  print("# Extending %d-ary node by %d nodes: %s = %p",
+        nd->n_elems, c, nd->name, nd);
+
+  if (nd->next) {
+    nd->next->prev = nd->prev;
+  }
+  if (nd->prev) {
+    nd->prev->next = nd->next;
+  }
+  nd = realloc(nd, sz);
+  nd->prev = NULL;
+  nd->next = nodes;
+  nodes->prev = nd;
+  nodes = nd;
+
+  print(" ==> %p\n", nd);
+
+  va_start(ap, n);
+  while (i < n) {
+    nn = va_arg(ap, struct node *);
+    print("#   arg[%d]: %p\n", i, nn);
+    print("#            (%s ...)\n", nn->name);
+    nd->elems[nd->n_elems++] = nn;
+    ++i;
+  }
+  va_end(ap);
+  return nd;
+}
+
+int const indent_step = 4;
+
+void print_indent(int depth) {
+  while (depth) {
+    if (depth-- % indent_step == 0) {
+      print("|");
+    } else {
+      print(" ");
+    }
+  }
+}
+
+void print_node(struct node *n, int depth) {
+  int i = 0;
+  print_indent(depth);
+  if (n->n_elems == 0) {
+    print("%s\n", n->name);
+  } else {
+    print("(%s\n", n->name);
+    for (i = 0; i < n->n_elems; ++i) {
+      print_node(n->elems[i], depth + indent_step);
+    }
+    print_indent(depth);
+    print(")\n");
+  }
+}
+
+int main(int argc, char **argv) {
+  if (argc == 2 && strcmp(argv[1], "-v") == 0) {
+    verbose = 1;
+  } else {
+    verbose = 0;
+  }
+  int ret = 0;
+  struct node *tmp;
+  memset(pushback, '\0', PUSHBACK_LEN);
+  ret = rsparse();
+  print("--- PARSE COMPLETE: ret:%d, n_nodes:%d ---\n", ret, n_nodes);
+  if (nodes) {
+    print_node(nodes, 0);
+  }
+  while (nodes) {
+    tmp = nodes;
+    nodes = tmp->next;
+    if (tmp->own_string) {
+      free((void*)tmp->name);
+    }
+    free(tmp);
+  }
+  return ret;
+}
+
+void rserror(char const *s) {
+  fprintf(stderr, "%s\n", s);
+}
--- a/src/grammar/parser-lalr.y
+++ b/src/grammar/parser-lalr.y
--- a/src/grammar/testparser.py
+++ b/src/grammar/testparser.py
@ -0,0 +1,65 @@
+#!/usr/bin/env python
+#
+# Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+import sys
+
+import os
+import subprocess
+import argparse
+
+# usage: testparser.py [-h] [-p PARSER [PARSER ...]] -s SOURCE_DIR
+
+# Parsers should read from stdin and return exit status 0 for a
+# successful parse, and nonzero for an unsuccessful parse
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-p', '--parser', nargs='+')
+parser.add_argument('-s', '--source-dir', nargs=1, required=True)
+args = parser.parse_args(sys.argv[1:])
+
+total = 0
+ok = {}
+bad = {}
+for parser in args.parser:
+    ok[parser] = 0
+    bad[parser] = []
+devnull = open(os.devnull, 'w')
+print "\n"
+
+for base, dirs, files in os.walk(args.source_dir[0]):
+    for f in filter(lambda p: p.endswith('.rs'), files):
+        p = os.path.join(base, f)
+        compile_fail = 'compile-fail' in p
+        ignore = any('ignore-test' in line or 'ignore-lexer-test' in line
+                     for line in open(p).readlines())
+        if compile_fail or ignore:
+            continue
+        total += 1
+        for parser in args.parser:
+            if subprocess.call(parser, stdin=open(p), stderr=subprocess.STDOUT, stdout=devnull) == 0:
+                ok[parser] += 1
+            else:
+                bad[parser].append(p)
+        parser_stats = ', '.join(['{}: {}'.format(parser, ok[parser]) for parser in args.parser])
+        sys.stdout.write("\033[K\r total: {}, {}, scanned {}"
+                         .format(total, os.path.relpath(parser_stats), os.path.relpath(p)))
+
+devnull.close()
+
+print "\n"
+
+for parser in args.parser:
+    filename = os.path.basename(parser) + '.bad'
+    print("writing {} files that failed to parse with {} to {}".format(len(bad[parser]), parser, filename))
+    with open(filename, "w") as f:
+          for p in bad[parser]:
+              f.write(p)
+              f.write("\n")
--- a/src/grammar/tokens.h
+++ b/src/grammar/tokens.h
@ -0,0 +1,91 @@
+// Copyright 2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+enum Token {
+  SHL = 257, // Parser generators reserve 0-256 for char literals
+  SHR,
+  LE,
+  EQEQ,
+  NE,
+  GE,
+  ANDAND,
+  OROR,
+  SHLEQ,
+  SHREQ,
+  MINUSEQ,
+  ANDEQ,
+  OREQ,
+  PLUSEQ,
+  STAREQ,
+  SLASHEQ,
+  CARETEQ,
+  PERCENTEQ,
+  DOTDOT,
+  DOTDOTDOT,
+  MOD_SEP,
+  RARROW,
+  FAT_ARROW,
+  LIT_BYTE,
+  LIT_CHAR,
+  LIT_INTEGER,
+  LIT_FLOAT,
+  LIT_STR,
+  LIT_STR_RAW,
+  LIT_BINARY,
+  LIT_BINARY_RAW,
+  IDENT,
+  UNDERSCORE,
+  LIFETIME,
+
+  // keywords
+  SELF,
+  STATIC,
+  AS,
+  BREAK,
+  CRATE,
+  ELSE,
+  ENUM,
+  EXTERN,
+  FALSE,
+  FN,
+  FOR,
+  IF,
+  IMPL,
+  IN,
+  LET,
+  LOOP,
+  MATCH,
+  MOD,
+  MOVE,
+  MUT,
+  PRIV,
+  PUB,
+  REF,
+  RETURN,
+  STRUCT,
+  TRUE,
+  TRAIT,
+  TYPE,
+  UNSAFE,
+  USE,
+  WHILE,
+  CONTINUE,
+  PROC,
+  BOX,
+  CONST,
+  WHERE,
+  TYPEOF,
+  INNER_DOC_COMMENT,
+  OUTER_DOC_COMMENT,
+
+  SHEBANG,
+  SHEBANG_LINE,
+  STATIC_LIFETIME
+};