Add a regex crate to the Rust distribution.

Also adds a regex_macros crate, which provides natively compiled regular expressions with a syntax extension. Closes #3591. RFC: 0007-regexps
2025-02-18 18:04:13 +00:00 · 2014-04-25 00:27:24 -04:00 · 2014-04-25 00:27:24 -04:00 · b8b7484703
commit b8b7484703
parent 66486518d5
23 changed files with 11102 additions and 2 deletions
--- a/mk/crates.mk
+++ b/mk/crates.mk
@ -51,8 +51,8 @@

 TARGET_CRATES := libc std green rustuv native flate arena glob term semver \
                 uuid serialize sync getopts collections num test time rand \
-		 workcache url log
-HOST_CRATES := syntax rustc rustdoc fourcc hexfloat
+		 workcache url log regex
+HOST_CRATES := syntax rustc rustdoc fourcc hexfloat regex_macros
 CRATES := $(TARGET_CRATES) $(HOST_CRATES)
 TOOLS := compiletest rustdoc rustc

@ -84,6 +84,8 @@ DEPS_rand := std
 DEPS_url := std collections
 DEPS_workcache := std serialize collections log
 DEPS_log := std sync
+DEPS_regex := std collections
+DEPS_regex_macros = syntax std regex

 TOOL_DEPS_compiletest := test green rustuv getopts
 TOOL_DEPS_rustdoc := rustdoc native
--- a/src/README.md
+++ b/src/README.md
@ -19,6 +19,7 @@ Source layout:
 | `libfourcc/`        | Data format identifier library                            |
 | `libgetopts/`       | Get command-line-options library                          |
 | `libglob/`          | Unix glob patterns library                                |
+| `libregex/`         | Regular expressions                                       |
 | `libsemver/`        | Rust's semantic versioning library                        |
 | `libserialize/`     | Encode-Decode types library                               |
 | `libsync/`          | Concurrency mechanisms and primitives                     |
--- a/src/doc/index.md
+++ b/src/doc/index.md
@ -41,6 +41,7 @@ li {list-style-type: none; }
 * [The `native` 1:1 threading runtime](native/index.html)
 * [The `num` arbitrary precision numerics library](num/index.html)
 * [The `rand` library for random numbers and distributions](rand/index.html)
+* [The `regex` library for regular expressions](regex/index.html)
 * [The `rustc` compiler](rustc/index.html)
 * [The `rustuv` M:N I/O library](rustuv/index.html)
 * [The `semver` version collation library](semver/index.html)
--- a/src/etc/regex-match-tests.py
+++ b/src/etc/regex-match-tests.py
@ -0,0 +1,109 @@
+#!/usr/bin/env python2
+
+# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+from __future__ import absolute_import, division, print_function
+import argparse
+import datetime
+import os.path as path
+
+
+def print_tests(tests):
+    print('\n'.join([test_tostr(t) for t in tests]))
+
+
+def read_tests(f):
+    basename, _ = path.splitext(path.basename(f))
+    tests = []
+    for lineno, line in enumerate(open(f), 1):
+        fields = filter(None, map(str.strip, line.split('\t')))
+        if not (4 <= len(fields) <= 5) \
+           or 'E' not in fields[0] or fields[0][0] == '#':
+            continue
+
+        opts, pat, text, sgroups = fields[0:4]
+        groups = []  # groups as integer ranges
+        if sgroups == 'NOMATCH':
+            groups = [None]
+        elif ',' in sgroups:
+            noparen = map(lambda s: s.strip('()'), sgroups.split(')('))
+            for g in noparen:
+                s, e = map(str.strip, g.split(','))
+                if s == '?' and e == '?':
+                    groups.append(None)
+                else:
+                    groups.append((int(s), int(e)))
+        else:
+            # This skips tests that should result in an error.
+            # There aren't many, so I think we can just capture those
+            # manually. Possibly fix this in future.
+            continue
+
+        if pat == 'SAME':
+            pat = tests[-1][1]
+        if '$' in opts:
+            pat = pat.decode('string_escape')
+            text = text.decode('string_escape')
+        if 'i' in opts:
+            pat = '(?i)%s' % pat
+
+        name = '%s_%d' % (basename, lineno)
+        tests.append((name, pat, text, groups))
+    return tests
+
+
+def test_tostr(t):
+    lineno, pat, text, groups = t
+    options = map(group_tostr, groups)
+    return 'mat!(match_%s, r"%s", r"%s", %s)' \
+           % (lineno, pat, '' if text == "NULL" else text, ', '.join(options))
+
+
+def group_tostr(g):
+    if g is None:
+        return 'None'
+    else:
+        return 'Some((%d, %d))' % (g[0], g[1])
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Generate match tests from an AT&T POSIX test file.')
+    aa = parser.add_argument
+    aa('files', nargs='+',
+       help='A list of dat AT&T POSIX test files. See src/libregexp/testdata')
+    args = parser.parse_args()
+
+    tests = []
+    for f in args.files:
+        tests += read_tests(f)
+
+    tpl = '''// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// ignore-tidy-linelength
+
+// DO NOT EDIT. Automatically generated by 'src/etc/regexp-match-tests'
+// on {date}.
+'''
+    print(tpl.format(date=str(datetime.datetime.now())))
+
+    for f in args.files:
+        print('// Tests from %s' % path.basename(f))
+        print_tests(read_tests(f))
+        print('')
--- a/src/etc/regex-unicode-tables.py
+++ b/src/etc/regex-unicode-tables.py
@ -0,0 +1,183 @@
+#!/usr/bin/env python2
+
+# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+from __future__ import absolute_import, division, print_function
+import argparse
+from collections import defaultdict
+import csv
+import datetime
+import urllib2
+
+BASE_URL = 'http://www.unicode.org/Public/6.3.0/ucd/'
+DATA = 'UnicodeData.txt'
+SCRIPTS = 'Scripts.txt'
+
+# Mapping taken from Table 12 from:
+# http://www.unicode.org/reports/tr44/#General_Category_Values
+expanded_categories = {
+    'Lu': ['LC', 'L'], 'Ll': ['LC', 'L'], 'Lt': ['LC', 'L'],
+    'Lm': ['L'], 'Lo': ['L'],
+    'Mn': ['M'], 'Mc': ['M'], 'Me': ['M'],
+    'Nd': ['N'], 'Nl': ['N'], 'No': ['No'],
+    'Pc': ['P'], 'Pd': ['P'], 'Ps': ['P'], 'Pe': ['P'],
+    'Pi': ['P'], 'Pf': ['P'], 'Po': ['P'],
+    'Sm': ['S'], 'Sc': ['S'], 'Sk': ['S'], 'So': ['S'],
+    'Zs': ['Z'], 'Zl': ['Z'], 'Zp': ['Z'],
+    'Cc': ['C'], 'Cf': ['C'], 'Cs': ['C'], 'Co': ['C'], 'Cn': ['C'],
+}
+
+
+def as_4byte_uni(n):
+    s = hex(n)[2:]
+    return '\\U%s%s' % ('0' * (8 - len(s)), s)
+
+
+def expand_cat(c):
+    return expanded_categories.get(c, []) + [c]
+
+
+def is_valid_unicode(n):
+    return 0 <= n <= 0xD7FF or 0xE000 <= n <= 0x10FFFF
+
+
+def read_cats(f):
+    assigned = defaultdict(list)
+    for row in csv.reader(f, delimiter=';'):
+        (hex, cats) = (int(row[0], 16), expand_cat(row[2]))
+        if not is_valid_unicode(hex):
+            continue
+        for cat in cats:
+            assigned[cat].append(hex)
+    return assigned
+
+
+def read_scripts(f):
+    assigned = defaultdict(list)
+    for line in f:
+        line = line.strip()
+        if not line or line.startswith('#'):
+            continue
+        hexes, name = map(str.strip, line.split(';'))[:2]
+        name = name[:name.index('#')].strip()
+        if '..' not in hexes:
+            hex = int(hexes, 16)
+            if is_valid_unicode(hex):
+                assigned[name].append(hex)
+        else:
+            hex1, hex2 = map(lambda s: int(s, 16), hexes.split('..'))
+            for hex in xrange(hex1, hex2 + 1):
+                if is_valid_unicode(hex):
+                    assigned[name].append(hex)
+    return assigned
+
+
+def group(letters):
+    letters = sorted(set(letters))
+    grouped = []
+    cur_start = letters.pop(0)
+    cur_end = cur_start
+    for letter in letters:
+        assert letter > cur_end, \
+            'cur_end: %s, letter: %s' % (hex(cur_end), hex(letter))
+
+        if letter == cur_end + 1:
+            cur_end = letter
+        else:
+            grouped.append((cur_start, cur_end))
+            cur_start, cur_end = letter, letter
+    grouped.append((cur_start, cur_end))
+    return grouped
+
+
+def ranges_to_rust(rs):
+    rs = ("('%s', '%s')" % (as_4byte_uni(s), as_4byte_uni(e)) for s, e in rs)
+    return ',\n    '.join(rs)
+
+
+def groups_to_rust(groups):
+    rust_groups = []
+    for group_name in sorted(groups):
+        rust_groups.append('("%s", &[\n    %s\n    ]),'
+                           % (group_name, ranges_to_rust(groups[group_name])))
+    return '\n'.join(rust_groups)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Generate Unicode character class tables.')
+    aa = parser.add_argument
+    aa('--local', action='store_true',
+       help='When set, Scripts.txt and UnicodeData.txt will be read from '
+            'the CWD.')
+    aa('--base-url', type=str, default=BASE_URL,
+       help='The base URL to use for downloading Unicode data files.')
+    args = parser.parse_args()
+
+    if args.local:
+        cats = read_cats(open(DATA))
+        scripts = read_scripts(open(SCRIPTS))
+    else:
+        cats = read_cats(urllib2.urlopen(args.base_url + '/' + DATA))
+        scripts = read_scripts(urllib2.urlopen(args.base_url + '/' + SCRIPTS))
+
+    # Get Rust code for all Unicode general categories and scripts.
+    combined = dict(cats, **scripts)
+    unigroups = groups_to_rust({k: group(letters)
+                                for k, letters in combined.items()})
+
+    # Now get Perl character classes that are Unicode friendly.
+    perld = range(ord('0'), ord('9') + 1)
+    dgroups = ranges_to_rust(group(perld + cats['Nd'][:]))
+
+    perls = map(ord, ['\t', '\n', '\x0C', '\r', ' '])
+    sgroups = ranges_to_rust(group(perls + cats['Z'][:]))
+
+    low, up = (range(ord('a'), ord('z') + 1), range(ord('A'), ord('Z') + 1))
+    perlw = [ord('_')] + perld + low + up
+    wgroups = ranges_to_rust(group(perlw + cats['L'][:]))
+
+    tpl = '''// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// DO NOT EDIT. Automatically generated by 'src/etc/regexp-unicode-tables'
+// on {date}.
+
+use parse::{{Class, NamedClasses}};
+
+pub static UNICODE_CLASSES: NamedClasses = &[
+
+{groups}
+
+];
+
+pub static PERLD: Class = &[
+    {dgroups}
+];
+
+pub static PERLS: Class = &[
+    {sgroups}
+];
+
+pub static PERLW: Class = &[
+    {wgroups}
+];
+'''
+    now = datetime.datetime.now()
+    print(tpl.format(date=str(now), groups=unigroups,
+                     dgroups=dgroups, sgroups=sgroups, wgroups=wgroups))
--- a/src/libregex/compile.rs
+++ b/src/libregex/compile.rs
@ -0,0 +1,274 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// Enable this to squash warnings due to exporting pieces of the representation
+// for use with the regex! macro. See lib.rs for explanation.
+#![allow(visible_private_types)]
+
+use std::cmp;
+use std::iter;
+use parse;
+use parse::{
+    Flags, FLAG_EMPTY,
+    Nothing, Literal, Dot, Class, Begin, End, WordBoundary, Capture, Cat, Alt,
+    Rep,
+    ZeroOne, ZeroMore, OneMore,
+};
+
+type InstIdx = uint;
+
+#[deriving(Show, Clone)]
+pub enum Inst {
+    // When a Match instruction is executed, the current thread is successful.
+    Match,
+
+    // The OneChar instruction matches a literal character.
+    // The flags indicate whether to do a case insensitive match.
+    OneChar(char, Flags),
+
+    // The CharClass instruction tries to match one input character against
+    // the range of characters given.
+    // The flags indicate whether to do a case insentivie match and whether
+    // the character class is negated or not.
+    CharClass(Vec<(char, char)>, Flags),
+
+    // Matches any character except new lines.
+    // The flags indicate whether to include the '\n' character.
+    Any(Flags),
+
+    // Matches the beginning of the string, consumes no characters.
+    // The flags indicate whether it matches if the preceding character
+    // is a new line.
+    EmptyBegin(Flags),
+
+    // Matches the end of the string, consumes no characters.
+    // The flags indicate whether it matches if the proceding character
+    // is a new line.
+    EmptyEnd(Flags),
+
+    // Matches a word boundary (\w on one side and \W \A or \z on the other),
+    // and consumes no character.
+    // The flags indicate whether this matches a word boundary or something
+    // that isn't a word boundary.
+    EmptyWordBoundary(Flags),
+
+    // Saves the current position in the input string to the Nth save slot.
+    Save(uint),
+
+    // Jumps to the instruction at the index given.
+    Jump(InstIdx),
+
+    // Jumps to the instruction at the first index given. If that leads to
+    // a failing state, then the instruction at the second index given is
+    // tried.
+    Split(InstIdx, InstIdx),
+}
+
+/// Program represents a compiled regular expression. Once an expression is
+/// compiled, its representation is immutable and will never change.
+///
+/// All of the data in a compiled expression is wrapped in "MaybeStatic" or
+/// "MaybeOwned" types so that a `Program` can be represented as static data.
+/// (This makes it convenient and efficient for use with the `regex!` macro.)
+#[deriving(Clone)]
+pub struct Program {
+    /// A sequence of instructions.
+    pub insts: Vec<Inst>,
+    /// If the regular expression requires a literal prefix in order to have a
+    /// match, that prefix is stored here. (It's used in the VM to implement
+    /// an optimization.)
+    pub prefix: ~str,
+}
+
+impl Program {
+    /// Compiles a Regex given its AST.
+    pub fn new(ast: ~parse::Ast) -> (Program, ~[Option<~str>]) {
+        let mut c = Compiler {
+            insts: Vec::with_capacity(100),
+            names: Vec::with_capacity(10),
+        };
+
+        c.insts.push(Save(0));
+        c.compile(ast);
+        c.insts.push(Save(1));
+        c.insts.push(Match);
+
+        // Try to discover a literal string prefix.
+        // This is a bit hacky since we have to skip over the initial
+        // 'Save' instruction.
+        let mut pre = StrBuf::with_capacity(5);
+        for i in iter::range(1, c.insts.len()) {
+            match *c.insts.get(i) {
+                OneChar(c, FLAG_EMPTY) => pre.push_char(c),
+                _ => break
+            }
+        }
+
+        let names = c.names.as_slice().into_owned();
+        let prog = Program {
+            insts: c.insts,
+            prefix: pre.into_owned(),
+        };
+        (prog, names)
+    }
+
+    /// Returns the total number of capture groups in the regular expression.
+    /// This includes the zeroth capture.
+    pub fn num_captures(&self) -> uint {
+        let mut n = 0;
+        for inst in self.insts.iter() {
+            match *inst {
+                Save(c) => n = cmp::max(n, c+1),
+                _ => {}
+            }
+        }
+        // There's exactly 2 Save slots for every capture.
+        n / 2
+    }
+}
+
+struct Compiler<'r> {
+    insts: Vec<Inst>,
+    names: Vec<Option<~str>>,
+}
+
+// The compiler implemented here is extremely simple. Most of the complexity
+// in this crate is in the parser or the VM.
+// The only tricky thing here is patching jump/split instructions to point to
+// the right instruction.
+impl<'r> Compiler<'r> {
+    fn compile(&mut self, ast: ~parse::Ast) {
+        match ast {
+            ~Nothing => {},
+            ~Literal(c, flags) => self.push(OneChar(c, flags)),
+            ~Dot(nl) => self.push(Any(nl)),
+            ~Class(ranges, flags) =>
+                self.push(CharClass(ranges, flags)),
+            ~Begin(flags) => self.push(EmptyBegin(flags)),
+            ~End(flags) => self.push(EmptyEnd(flags)),
+            ~WordBoundary(flags) => self.push(EmptyWordBoundary(flags)),
+            ~Capture(cap, name, x) => {
+                let len = self.names.len();
+                if cap >= len {
+                    self.names.grow(10 + cap - len, &None)
+                }
+                *self.names.get_mut(cap) = name;
+
+                self.push(Save(2 * cap));
+                self.compile(x);
+                self.push(Save(2 * cap + 1));
+            }
+            ~Cat(xs) => {
+                for x in xs.move_iter() {
+                    self.compile(x)
+                }
+            }
+            ~Alt(x, y) => {
+                let split = self.empty_split(); // push: split 0, 0
+                let j1 = self.insts.len();
+                self.compile(x);                // push: insts for x
+                let jmp = self.empty_jump();    // push: jmp 0
+                let j2 = self.insts.len();
+                self.compile(y);                // push: insts for y
+                let j3 = self.insts.len();
+
+                self.set_split(split, j1, j2);  // split 0, 0 -> split j1, j2
+                self.set_jump(jmp, j3);         // jmp 0      -> jmp j3
+            }
+            ~Rep(x, ZeroOne, g) => {
+                let split = self.empty_split();
+                let j1 = self.insts.len();
+                self.compile(x);
+                let j2 = self.insts.len();
+
+                if g.is_greedy() {
+                    self.set_split(split, j1, j2);
+                } else {
+                    self.set_split(split, j2, j1);
+                }
+            }
+            ~Rep(x, ZeroMore, g) => {
+                let j1 = self.insts.len();
+                let split = self.empty_split();
+                let j2 = self.insts.len();
+                self.compile(x);
+                let jmp = self.empty_jump();
+                let j3 = self.insts.len();
+
+                self.set_jump(jmp, j1);
+                if g.is_greedy() {
+                    self.set_split(split, j2, j3);
+                } else {
+                    self.set_split(split, j3, j2);
+                }
+            }
+            ~Rep(x, OneMore, g) => {
+                let j1 = self.insts.len();
+                self.compile(x);
+                let split = self.empty_split();
+                let j2 = self.insts.len();
+
+                if g.is_greedy() {
+                    self.set_split(split, j1, j2);
+                } else {
+                    self.set_split(split, j2, j1);
+                }
+            }
+        }
+    }
+
+    /// Appends the given instruction to the program.
+    #[inline]
+    fn push(&mut self, x: Inst) {
+        self.insts.push(x)
+    }
+
+    /// Appends an *empty* `Split` instruction to the program and returns
+    /// the index of that instruction. (The index can then be used to "patch"
+    /// the actual locations of the split in later.)
+    #[inline]
+    fn empty_split(&mut self) -> InstIdx {
+        self.insts.push(Split(0, 0));
+        self.insts.len() - 1
+    }
+
+    /// Sets the left and right locations of a `Split` instruction at index
+    /// `i` to `pc1` and `pc2`, respectively.
+    /// If the instruction at index `i` isn't a `Split` instruction, then
+    /// `fail!` is called.
+    #[inline]
+    fn set_split(&mut self, i: InstIdx, pc1: InstIdx, pc2: InstIdx) {
+        let split = self.insts.get_mut(i);
+        match *split {
+            Split(_, _) => *split = Split(pc1, pc2),
+            _ => fail!("BUG: Invalid split index."),
+        }
+    }
+
+    /// Appends an *empty* `Jump` instruction to the program and returns the
+    /// index of that instruction.
+    #[inline]
+    fn empty_jump(&mut self) -> InstIdx {
+        self.insts.push(Jump(0));
+        self.insts.len() - 1
+    }
+
+    /// Sets the location of a `Jump` instruction at index `i` to `pc`.
+    /// If the instruction at index `i` isn't a `Jump` instruction, then
+    /// `fail!` is called.
+    #[inline]
+    fn set_jump(&mut self, i: InstIdx, pc: InstIdx) {
+        let jmp = self.insts.get_mut(i);
+        match *jmp {
+            Jump(_) => *jmp = Jump(pc),
+            _ => fail!("BUG: Invalid jump index."),
+        }
+    }
+}
--- a/src/libregex/lib.rs
+++ b/src/libregex/lib.rs
@ -0,0 +1,425 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! This crate provides a native implementation of regular expressions that is
+//! heavily based on RE2 both in syntax and in implementation. Notably,
+//! backreferences and arbitrary lookahead/lookbehind assertions are not
+//! provided. In return, regular expression searching provided by this package
+//! has excellent worst case performance. The specific syntax supported is
+//! documented further down.
+//!
+//! This crate's documentation provides some simple examples, describes Unicode
+//! support and exhaustively lists the supported syntax. For more specific
+//! details on the API, please see the documentation for the `Regex` type.
+//!
+//! # First example: find a date
+//!
+//! General use of regular expressions in this package involves compiling an
+//! expression and then using it to search, split or replace text. For example,
+//! to confirm that some text resembles a date:
+//!
+//! ```rust
+//! use regex::Regex;
+//! let re = match Regex::new(r"^\d{4}-\d{2}-\d{2}$") {
+//!     Ok(re) => re,
+//!     Err(err) => fail!("{}", err),
+//! };
+//! assert_eq!(re.is_match("2014-01-01"), true);
+//! ```
+//!
+//! Notice the use of the `^` and `$` anchors. In this crate, every expression
+//! is executed with an implicit `.*?` at the beginning and end, which allows
+//! it to match anywhere in the text. Anchors can be used to ensure that the
+//! full text matches an expression.
+//!
+//! This example also demonstrates the utility of raw strings in Rust, which
+//! are just like regular strings except they are prefixed with an `r` and do
+//! not process any escape sequences. For example, `"\\d"` is the same
+//! expression as `r"\d"`.
+//!
+//! # The `regex!` macro
+//!
+//! Rust's compile time meta-programming facilities provide a way to write a
+//! `regex!` macro which compiles regular expressions *when your program
+//! compiles*. Said differently, if you only use `regex!` to build regular
+//! expressions in your program, then your program cannot compile with an
+//! invalid regular expression. Moreover, the `regex!` macro compiles the
+//! given expression to native Rust code, which makes it much faster for
+//! searching text.
+//!
+//! Since `regex!` provides compiled regular expressions that are both safer
+//! and faster to use, you should use them whenever possible. The only
+//! requirement for using them is that you have a string literal corresponding
+//! to your expression. Otherwise, it is indistinguishable from an expression
+//! compiled at runtime with `Regex::new`.
+//!
+//! To use the `regex!` macro, you must enable the `phase` feature and import
+//! the `regex_macros` crate as a syntax extension:
+//!
+//! ```rust
+//! #![feature(phase)]
+//! #[phase(syntax)]
+//! extern crate regex_macros;
+//! extern crate regex;
+//!
+//! fn main() {
+//!     let re = regex!(r"^\d{4}-\d{2}-\d{2}$");
+//!     assert_eq!(re.is_match("2014-01-01"), true);
+//! }
+//! ```
+//!
+//! There are a few things worth mentioning about using the `regex!` macro.
+//! Firstly, the `regex!` macro *only* accepts string *literals*.
+//! Secondly, the `regex` crate *must* be linked with the name `regex` since
+//! the generated code depends on finding symbols in the `regex` crate.
+//!
+//! The only downside of using the `regex!` macro is that it can increase the
+//! size of your program's binary since it generates specialized Rust code.
+//! The extra size probably won't be significant for a small number of
+//! expressions, but 100+ calls to `regex!` will probably result in a
+//! noticeably bigger binary.
+//!
+//! # Example: iterating over capture groups
+//!
+//! This crate provides convenient iterators for matching an expression
+//! repeatedly against a search string to find successive non-overlapping
+//! matches. For example, to find all dates in a string and be able to access
+//! them by their component pieces:
+//!
+//! ```rust
+//! # #![feature(phase)]
+//! # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+//! # fn main() {
+//! let re = regex!(r"(\d{4})-(\d{2})-(\d{2})");
+//! let text = "2012-03-14, 2013-01-01 and 2014-07-05";
+//! for cap in re.captures_iter(text) {
+//!     println!("Month: {} Day: {} Year: {}", cap.at(2), cap.at(3), cap.at(1));
+//! }
+//! // Output:
+//! // Month: 03 Day: 14 Year: 2012
+//! // Month: 01 Day: 01 Year: 2013
+//! // Month: 07 Day: 05 Year: 2014
+//! # }
+//! ```
+//!
+//! Notice that the year is in the capture group indexed at `1`. This is
+//! because the *entire match* is stored in the capture group at index `0`.
+//!
+//! # Example: replacement with named capture groups
+//!
+//! Building on the previous example, perhaps we'd like to rearrange the date
+//! formats. This can be done with text replacement. But to make the code
+//! clearer, we can *name*  our capture groups and use those names as variables
+//! in our replacement text:
+//!
+//! ```rust
+//! # #![feature(phase)]
+//! # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+//! # fn main() {
+//! let re = regex!(r"(?P<y>\d{4})-(?P<m>\d{2})-(?P<d>\d{2})");
+//! let before = "2012-03-14, 2013-01-01 and 2014-07-05";
+//! let after = re.replace_all(before, "$m/$d/$y");
+//! assert_eq!(after.as_slice(), "03/14/2012, 01/01/2013 and 07/05/2014");
+//! # }
+//! ```
+//!
+//! The `replace` methods are actually polymorphic in the replacement, which
+//! provides more flexibility than is seen here. (See the documentation for
+//! `Regex::replace` for more details.)
+//!
+//! # Pay for what you use
+//!
+//! With respect to searching text with a regular expression, there are three
+//! questions that can be asked:
+//!
+//! 1. Does the text match this expression?
+//! 2. If so, where does it match?
+//! 3. Where are the submatches?
+//!
+//! Generally speaking, this crate could provide a function to answer only #3,
+//! which would subsume #1 and #2 automatically. However, it can be
+//! significantly more expensive to compute the location of submatches, so it's
+//! best not to do it if you don't need to.
+//!
+//! Therefore, only use what you need. For example, don't use `find` if you
+//! only need to test if an expression matches a string. (Use `is_match`
+//! instead.)
+//!
+//! # Unicode
+//!
+//! This implementation executes regular expressions **only** on sequences of
+//! UTF8 codepoints while exposing match locations as byte indices.
+//!
+//! Currently, only naive case folding is supported. Namely, when matching
+//! case insensitively, the characters are first converted to their uppercase
+//! forms and then compared.
+//!
+//! Regular expressions themselves are also **only** interpreted as a sequence
+//! of UTF8 codepoints. This means you can embed Unicode characters directly
+//! into your expression:
+//!
+//! ```rust
+//! # #![feature(phase)]
+//! # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+//! # fn main() {
+//! let re = regex!(r"(?i)Δ+");
+//! assert_eq!(re.find("ΔδΔ"), Some((0, 6)));
+//! # }
+//! ```
+//!
+//! Finally, Unicode general categories and scripts are available as character
+//! classes. For example, you can match a sequence of numerals, Greek or
+//! Cherokee letters:
+//!
+//! ```rust
+//! # #![feature(phase)]
+//! # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+//! # fn main() {
+//! let re = regex!(r"[\pN\p{Greek}\p{Cherokee}]+");
+//! assert_eq!(re.find("abcΔᎠβⅠᏴγδⅡxyz"), Some((3, 23)));
+//! # }
+//! ```
+//!
+//! # Syntax
+//!
+//! The syntax supported in this crate is almost in an exact correspondence
+//! with the syntax supported by RE2.
+//!
+//! ## Matching one character
+//!
+//! <pre class="rust">
+//! .           any character except new line (includes new line with s flag)
+//! [xyz]       A character class matching either x, y or z.
+//! [^xyz]      A character class matching any character except x, y and z.
+//! [a-z]       A character class matching any character in range a-z.
+//! \d          Perl character class ([0-9])
+//! \D          Negated Perl character class ([^0-9])
+//! [:alpha:]   ASCII character class ([A-Za-z])
+//! [:^alpha:]  Negated ASCII character class ([^A-Za-z])
+//! \pN         One letter name Unicode character class
+//! \p{Greek}   Unicode character class (general category or script)
+//! \PN         Negated one letter name Unicode character class
+//! \P{Greek}   negated Unicode character class (general category or script)
+//! </pre>
+//!
+//! Any named character class may appear inside a bracketed `[...]` character
+//! class. For example, `[\p{Greek}\pN]` matches any Greek or numeral
+//! character.
+//!
+//! ## Composites
+//!
+//! <pre class="rust">
+//! xy    concatenation (x followed by y)
+//! x|y   alternation (x or y, prefer x)
+//! </pre>
+//!
+//! ## Repetitions
+//!
+//! <pre class="rust">
+//! x*        zero or more of x (greedy)
+//! x+        one or more of x (greedy)
+//! x?        zero or one of x (greedy)
+//! x*?       zero or more of x (ungreedy)
+//! x+?       one or more of x (ungreedy)
+//! x??       zero or one of x (ungreedy)
+//! x{n,m}    at least n and at most x (greedy)
+//! x{n,}     at least n x (greedy)
+//! x{n}      exactly n x
+//! x{n,m}?   at least n and at most x (ungreedy)
+//! x{n,}?    at least n x (ungreedy)
+//! x{n}?     exactly n x
+//! </pre>
+//!
+//! ## Empty matches
+//!
+//! <pre class="rust">
+//! ^     the beginning of text (or start-of-line with multi-line mode)
+//! $     the end of text (or end-of-line with multi-line mode)
+//! \A    only the beginning of text (even with multi-line mode enabled)
+//! \z    only the end of text (even with multi-line mode enabled)
+//! \b    a Unicode word boundary (\w on one side and \W, \A, or \z on other)
+//! \B    not a Unicode word boundary
+//! </pre>
+//!
+//! ## Grouping and flags
+//!
+//! <pre class="rust">
+//! (exp)          numbered capture group (indexed by opening parenthesis)
+//! (?P&lt;name&gt;exp)  named (also numbered) capture group (allowed chars: [_0-9a-zA-Z])
+//! (?:exp)        non-capturing group
+//! (?flags)       set flags within current group
+//! (?flags:exp)   set flags for exp (non-capturing)
+//! </pre>
+//!
+//! Flags are each a single character. For example, `(?x)` sets the flag `x`
+//! and `(?-x)` clears the flag `x`. Multiple flags can be set or cleared at
+//! the same time: `(?xy)` sets both the `x` and `y` flags and `(?x-y)` sets
+//! the `x` flag and clears the `y` flag.
+//!
+//! All flags are by default disabled. They are:
+//!
+//! <pre class="rust">
+//! i     case insensitive
+//! m     multi-line mode: ^ and $ match begin/end of line
+//! s     allow . to match \n
+//! U     swap the meaning of x* and x*?
+//! </pre>
+//!
+//! Here's an example that matches case insensitively for only part of the
+//! expression:
+//!
+//! ```rust
+//! # #![feature(phase)]
+//! # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+//! # fn main() {
+//! let re = regex!(r"(?i)a+(?-i)b+");
+//! let cap = re.captures("AaAaAbbBBBb").unwrap();
+//! assert_eq!(cap.at(0), "AaAaAbb");
+//! # }
+//! ```
+//!
+//! Notice that the `a+` matches either `a` or `A`, but the `b+` only matches
+//! `b`.
+//!
+//! ## Escape sequences
+//!
+//! <pre class="rust">
+//! \*         literal *, works for any punctuation character: \.+*?()|[]{}^$
+//! \a         bell (\x07)
+//! \f         form feed (\x0C)
+//! \t         horizontal tab
+//! \n         new line
+//! \r         carriage return
+//! \v         vertical tab (\x0B)
+//! \123       octal character code (up to three digits)
+//! \x7F       hex character code (exactly two digits)
+//! \x{10FFFF} any hex character code corresponding to a valid UTF8 codepoint
+//! </pre>
+//!
+//! ## Perl character classes (Unicode friendly)
+//!
+//! <pre class="rust">
+//! \d     digit ([0-9] + \p{Nd})
+//! \D     not digit
+//! \s     whitespace ([\t\n\f\r ] + \p{Z})
+//! \S     not whitespace
+//! \w     word character ([0-9A-Za-z_] + \p{L})
+//! \W     not word character
+//! </pre>
+//!
+//! ## ASCII character classes
+//!
+//! <pre class="rust">
+//! [:alnum:]    alphanumeric ([0-9A-Za-z])
+//! [:alpha:]    alphabetic ([A-Za-z])
+//! [:ascii:]    ASCII ([\x00-\x7F])
+//! [:blank:]    blank ([\t ])
+//! [:cntrl:]    control ([\x00-\x1F\x7F])
+//! [:digit:]    digits ([0-9])
+//! [:graph:]    graphical ([!-~])
+//! [:lower:]    lower case ([a-z])
+//! [:print:]    printable ([ -~])
+//! [:punct:]    punctuation ([!-/:-@[-`{-~])
+//! [:space:]    whitespace ([\t\n\v\f\r ])
+//! [:upper:]    upper case ([A-Z])
+//! [:word:]     word characters ([0-9A-Za-z_])
+//! [:xdigit:]   hex digit ([0-9A-Fa-f])
+//! </pre>
+//!
+//! # Untrusted input
+//!
+//! There are two factors to consider here: untrusted regular expressions and
+//! untrusted search text.
+//!
+//! Currently, there are no counter-measures in place to prevent a malicious
+//! user from writing an expression that may use a lot of resources. One such
+//! example is to repeat counted repetitions: `((a{100}){100}){100}` will try
+//! to repeat the `a` instruction `100^3` times. Essentially, this means it's
+//! very easy for an attacker to exhaust your system's memory if they are
+//! allowed to execute arbitrary regular expressions. A possible solution to
+//! this is to impose a hard limit on the size of a compiled expression, but it
+//! does not yet exist.
+//!
+//! The story is a bit better with untrusted search text, since this crate's
+//! implementation provides `O(nm)` search where `n` is the number of
+//! characters in the search text and `m` is the number of instructions in a
+//! compiled expression.
+
+#![crate_id = "regex#0.11-pre"]
+#![crate_type = "rlib"]
+#![crate_type = "dylib"]
+#![experimental]
+#![license = "MIT/ASL2"]
+#![doc(html_logo_url = "http://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
+       html_favicon_url = "http://www.rust-lang.org/favicon.ico",
+       html_root_url = "http://static.rust-lang.org/doc/master")]
+
+#![feature(macro_rules, phase)]
+#![deny(missing_doc)]
+
+extern crate collections;
+#[cfg(test)]
+extern crate stdtest = "test";
+#[cfg(test)]
+extern crate rand;
+
+// During tests, this links with the `regex` crate so that the `regex!` macro
+// can be tested.
+#[cfg(test)]
+extern crate regex;
+
+pub use parse::Error;
+pub use re::{Regex, Captures, SubCaptures, SubCapturesPos};
+pub use re::{FindCaptures, FindMatches};
+pub use re::{Replacer, NoExpand, RegexSplits, RegexSplitsN};
+pub use re::{quote, is_match};
+
+mod compile;
+mod parse;
+mod re;
+mod vm;
+
+#[cfg(test)]
+mod test;
+
+/// The `program` module exists to support the `regex!` macro. Do not use.
+#[doc(hidden)]
+pub mod native {
+    // Exporting this stuff is bad form, but it's necessary for two reasons.
+    // Firstly, the `regex!` syntax extension is in a different crate and
+    // requires access to the representation of a regex (particularly the
+    // instruction set) in order to compile to native Rust. This could be
+    // mitigated if `regex!` was defined in the same crate, but this has
+    // undesirable consequences (such as requiring a dependency on
+    // `libsyntax`).
+    //
+    // Secondly, the code generated generated by `regex!` must *also* be able
+    // to access various functions in this crate to reduce code duplication
+    // and to provide a value with precisely the same `Regex` type in this
+    // crate. This, AFAIK, is impossible to mitigate.
+    //
+    // On the bright side, `rustdoc` lets us hide this from the public API
+    // documentation.
+    pub use compile::{
+        Program,
+        OneChar, CharClass, Any, Save, Jump, Split,
+        Match, EmptyBegin, EmptyEnd, EmptyWordBoundary,
+    };
+    pub use parse::{
+        FLAG_EMPTY, FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL,
+        FLAG_SWAP_GREED, FLAG_NEGATED,
+    };
+    pub use re::{Dynamic, Native};
+    pub use vm::{
+        MatchKind, Exists, Location, Submatches,
+        StepState, StepMatchEarlyReturn, StepMatch, StepContinue,
+        CharReader, find_prefix,
+    };
+}
--- a/src/libregex/parse.rs
+++ b/src/libregex/parse.rs
--- a/src/libregex/re.rs
+++ b/src/libregex/re.rs
@ -0,0 +1,870 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use collections::HashMap;
+use std::fmt;
+use std::from_str::from_str;
+use std::str::{MaybeOwned, Owned, Slice};
+
+use compile::Program;
+use parse;
+use vm;
+use vm::{CaptureLocs, MatchKind, Exists, Location, Submatches};
+
+/// Escapes all regular expression meta characters in `text` so that it may be
+/// safely used in a regular expression as a literal string.
+pub fn quote(text: &str) -> ~str {
+    let mut quoted = StrBuf::with_capacity(text.len());
+    for c in text.chars() {
+        if parse::is_punct(c) {
+            quoted.push_char('\\')
+        }
+        quoted.push_char(c);
+    }
+    quoted.into_owned()
+}
+
+/// Tests if the given regular expression matches somewhere in the text given.
+///
+/// If there was a problem compiling the regular expression, an error is
+/// returned.
+///
+/// To find submatches, split or replace text, you'll need to compile an
+/// expression first.
+///
+/// Note that you should prefer the `regex!` macro when possible. For example,
+/// `regex!("...").is_match("...")`.
+pub fn is_match(regex: &str, text: &str) -> Result<bool, parse::Error> {
+    Regex::new(regex).map(|r| r.is_match(text))
+}
+
+/// Regex is a compiled regular expression, represented as either a sequence
+/// of bytecode instructions (dynamic) or as a specialized Rust function
+/// (native). It can be used to search, split
+/// or replace text. All searching is done with an implicit `.*?` at the
+/// beginning and end of an expression. To force an expression to match the
+/// whole string (or a prefix or a suffix), you must use an anchor like `^` or
+/// `$` (or `\A` and `\z`).
+///
+/// While this crate will handle Unicode strings (whether in the regular
+/// expression or in the search text), all positions returned are **byte
+/// indices**. Every byte index is guaranteed to be at a UTF8 codepoint
+/// boundary.
+///
+/// The lifetimes `'r` and `'t` in this crate correspond to the lifetime of a
+/// compiled regular expression and text to search, respectively.
+///
+/// The only methods that allocate new strings are the string replacement
+/// methods. All other methods (searching and splitting) return borrowed
+/// pointers into the string given.
+///
+/// # Examples
+///
+/// Find the location of a US phone number:
+///
+/// ```rust
+/// # use regex::Regex;
+/// let re = match Regex::new("[0-9]{3}-[0-9]{3}-[0-9]{4}") {
+///     Ok(re) => re,
+///     Err(err) => fail!("{}", err),
+/// };
+/// assert_eq!(re.find("phone: 111-222-3333"), Some((7, 19)));
+/// ```
+///
+/// You can also use the `regex!` macro to compile a regular expression when
+/// you compile your program:
+///
+/// ```rust
+/// #![feature(phase)]
+/// extern crate regex;
+/// #[phase(syntax)] extern crate regex_macros;
+///
+/// fn main() {
+///     let re = regex!(r"\d+");
+///     assert_eq!(re.find("123 abc"), Some((0, 3)));
+/// }
+/// ```
+///
+/// Given an incorrect regular expression, `regex!` will cause the Rust
+/// compiler to produce a compile time error.
+/// Note that `regex!` will compile the expression to native Rust code, which
+/// makes it much faster when searching text.
+/// More details about the `regex!` macro can be found in the `regex` crate
+/// documentation.
+#[deriving(Clone)]
+#[allow(visible_private_types)]
+pub struct Regex {
+    /// The representation of `Regex` is exported to support the `regex!`
+    /// syntax extension. Do not rely on it.
+    ///
+    /// See the comments for the `program` module in `lib.rs` for a more
+    /// detailed explanation for what `regex!` requires.
+    #[doc(hidden)]
+    pub original: ~str,
+    #[doc(hidden)]
+    pub names: ~[Option<~str>],
+    #[doc(hidden)]
+    pub p: MaybeNative,
+}
+
+impl fmt::Show for Regex {
+    /// Shows the original regular expression.
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f.buf, "{}", self.original)
+    }
+}
+
+pub enum MaybeNative {
+    Dynamic(Program),
+    Native(fn(MatchKind, &str, uint, uint) -> Vec<Option<uint>>),
+}
+
+impl Clone for MaybeNative {
+    fn clone(&self) -> MaybeNative {
+        match *self {
+            Dynamic(ref p) => Dynamic(p.clone()),
+            Native(fp) => Native(fp),
+        }
+    }
+}
+
+impl Regex {
+    /// Compiles a dynamic regular expression. Once compiled, it can be
+    /// used repeatedly to search, split or replace text in a string.
+    ///
+    /// When possible, you should prefer the `regex!` macro since it is
+    /// safer and always faster.
+    ///
+    /// If an invalid expression is given, then an error is returned.
+    pub fn new(re: &str) -> Result<Regex, parse::Error> {
+        let ast = try!(parse::parse(re));
+        let (prog, names) = Program::new(ast);
+        Ok(Regex { original: re.to_owned(), names: names, p: Dynamic(prog) })
+    }
+
+    /// Returns true if and only if the regex matches the string given.
+    ///
+    /// # Example
+    ///
+    /// Test if some text contains at least one word with exactly 13
+    /// characters:
+    ///
+    /// ```rust
+    /// # #![feature(phase)]
+    /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+    /// # fn main() {
+    /// let text = "I categorically deny having triskaidekaphobia.";
+    /// let matched = regex!(r"\b\w{13}\b").is_match(text);
+    /// assert!(matched);
+    /// # }
+    /// ```
+    pub fn is_match(&self, text: &str) -> bool {
+        has_match(&exec(self, Exists, text))
+    }
+
+    /// Returns the start and end byte range of the leftmost-first match in
+    /// `text`. If no match exists, then `None` is returned.
+    ///
+    /// Note that this should only be used if you want to discover the position
+    /// of the match. Testing the existence of a match is faster if you use
+    /// `is_match`.
+    ///
+    /// # Example
+    ///
+    /// Find the start and end location of every word with exactly 13
+    /// characters:
+    ///
+    /// ```rust
+    /// # #![feature(phase)]
+    /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+    /// # fn main() {
+    /// let text = "I categorically deny having triskaidekaphobia.";
+    /// let pos = regex!(r"\b\w{13}\b").find(text);
+    /// assert_eq!(pos, Some((2, 15)));
+    /// # }
+    /// ```
+    pub fn find(&self, text: &str) -> Option<(uint, uint)> {
+        let caps = exec(self, Location, text);
+        if has_match(&caps) {
+            Some((caps.get(0).unwrap(), caps.get(1).unwrap()))
+        } else {
+            None
+        }
+    }
+
+    /// Returns an iterator for each successive non-overlapping match in
+    /// `text`, returning the start and end byte indices with respect to
+    /// `text`.
+    ///
+    /// # Example
+    ///
+    /// Find the start and end location of the first word with exactly 13
+    /// characters:
+    ///
+    /// ```rust
+    /// # #![feature(phase)]
+    /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+    /// # fn main() {
+    /// let text = "Retroactively relinquishing remunerations is reprehensible.";
+    /// for pos in regex!(r"\b\w{13}\b").find_iter(text) {
+    ///     println!("{}", pos);
+    /// }
+    /// // Output:
+    /// // (0, 13)
+    /// // (14, 27)
+    /// // (28, 41)
+    /// // (45, 58)
+    /// # }
+    /// ```
+    pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> FindMatches<'r, 't> {
+        FindMatches {
+            re: self,
+            search: text,
+            last_end: 0,
+            last_match: None,
+        }
+    }
+
+    /// Returns the capture groups corresponding to the leftmost-first
+    /// match in `text`. Capture group `0` always corresponds to the entire
+    /// match. If no match is found, then `None` is returned.
+    ///
+    /// You should only use `captures` if you need access to submatches.
+    /// Otherwise, `find` is faster for discovering the location of the overall
+    /// match.
+    ///
+    /// # Examples
+    ///
+    /// Say you have some text with movie names and their release years,
+    /// like "'Citizen Kane' (1941)". It'd be nice if we could search for text
+    /// looking like that, while also extracting the movie name and its release
+    /// year separately.
+    ///
+    /// ```rust
+    /// # #![feature(phase)]
+    /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+    /// # fn main() {
+    /// let re = regex!(r"'([^']+)'\s+\((\d{4})\)");
+    /// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
+    /// let caps = re.captures(text).unwrap();
+    /// assert_eq!(caps.at(1), "Citizen Kane");
+    /// assert_eq!(caps.at(2), "1941");
+    /// assert_eq!(caps.at(0), "'Citizen Kane' (1941)");
+    /// # }
+    /// ```
+    ///
+    /// Note that the full match is at capture group `0`. Each subsequent
+    /// capture group is indexed by the order of its opening `(`.
+    ///
+    /// We can make this example a bit clearer by using *named* capture groups:
+    ///
+    /// ```rust
+    /// # #![feature(phase)]
+    /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+    /// # fn main() {
+    /// let re = regex!(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
+    /// let text = "Not my favorite movie: 'Citizen Kane' (1941).";
+    /// let caps = re.captures(text).unwrap();
+    /// assert_eq!(caps.name("title"), "Citizen Kane");
+    /// assert_eq!(caps.name("year"), "1941");
+    /// assert_eq!(caps.at(0), "'Citizen Kane' (1941)");
+    /// # }
+    /// ```
+    ///
+    /// Here we name the capture groups, which we can access with the `name`
+    /// method. Note that the named capture groups are still accessible with
+    /// `at`.
+    ///
+    /// The `0`th capture group is always unnamed, so it must always be
+    /// accessed with `at(0)`.
+    pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
+        let caps = exec(self, Submatches, text);
+        Captures::new(self, text, caps)
+    }
+
+    /// Returns an iterator over all the non-overlapping capture groups matched
+    /// in `text`. This is operationally the same as `find_iter` (except it
+    /// yields information about submatches).
+    ///
+    /// # Example
+    ///
+    /// We can use this to find all movie titles and their release years in
+    /// some text, where the movie is formatted like "'Title' (xxxx)":
+    ///
+    /// ```rust
+    /// # #![feature(phase)]
+    /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+    /// # fn main() {
+    /// let re = regex!(r"'(?P<title>[^']+)'\s+\((?P<year>\d{4})\)");
+    /// let text = "'Citizen Kane' (1941), 'The Wizard of Oz' (1939), 'M' (1931).";
+    /// for caps in re.captures_iter(text) {
+    ///     println!("Movie: {}, Released: {}", caps.name("title"), caps.name("year"));
+    /// }
+    /// // Output:
+    /// // Movie: Citizen Kane, Released: 1941
+    /// // Movie: The Wizard of Oz, Released: 1939
+    /// // Movie: M, Released: 1931
+    /// # }
+    /// ```
+    pub fn captures_iter<'r, 't>(&'r self, text: &'t str)
+                                -> FindCaptures<'r, 't> {
+        FindCaptures {
+            re: self,
+            search: text,
+            last_match: None,
+            last_end: 0,
+        }
+    }
+
+    /// Returns an iterator of substrings of `text` delimited by a match
+    /// of the regular expression.
+    /// Namely, each element of the iterator corresponds to text that *isn't*
+    /// matched by the regular expression.
+    ///
+    /// This method will *not* copy the text given.
+    ///
+    /// # Example
+    ///
+    /// To split a string delimited by arbitrary amounts of spaces or tabs:
+    ///
+    /// ```rust
+    /// # #![feature(phase)]
+    /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+    /// # fn main() {
+    /// let re = regex!(r"[ \t]+");
+    /// let fields: Vec<&str> = re.split("a b \t  c\td    e").collect();
+    /// assert_eq!(fields, vec!("a", "b", "c", "d", "e"));
+    /// # }
+    /// ```
+    pub fn split<'r, 't>(&'r self, text: &'t str) -> RegexSplits<'r, 't> {
+        RegexSplits {
+            finder: self.find_iter(text),
+            last: 0,
+        }
+    }
+
+    /// Returns an iterator of at most `limit` substrings of `text` delimited
+    /// by a match of the regular expression. (A `limit` of `0` will return no
+    /// substrings.)
+    /// Namely, each element of the iterator corresponds to text that *isn't*
+    /// matched by the regular expression.
+    /// The remainder of the string that is not split will be the last element
+    /// in the iterator.
+    ///
+    /// This method will *not* copy the text given.
+    ///
+    /// # Example
+    ///
+    /// Get the first two words in some text:
+    ///
+    /// ```rust
+    /// # #![feature(phase)]
+    /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+    /// # fn main() {
+    /// let re = regex!(r"\W+");
+    /// let fields: Vec<&str> = re.splitn("Hey! How are you?", 3).collect();
+    /// assert_eq!(fields, vec!("Hey", "How", "are you?"));
+    /// # }
+    /// ```
+    pub fn splitn<'r, 't>(&'r self, text: &'t str, limit: uint)
+                         -> RegexSplitsN<'r, 't> {
+        RegexSplitsN {
+            splits: self.split(text),
+            cur: 0,
+            limit: limit,
+        }
+    }
+
+    /// Replaces the leftmost-first match with the replacement provided.
+    /// The replacement can be a regular string (where `$N` and `$name` are
+    /// expanded to match capture groups) or a function that takes the matches'
+    /// `Captures` and returns the replaced string.
+    ///
+    /// If no match is found, then a copy of the string is returned unchanged.
+    ///
+    /// # Examples
+    ///
+    /// Note that this function is polymorphic with respect to the replacement.
+    /// In typical usage, this can just be a normal string:
+    ///
+    /// ```rust
+    /// # #![feature(phase)]
+    /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+    /// # fn main() {
+    /// let re = regex!("[^01]+");
+    /// assert_eq!(re.replace("1078910", "").as_slice(), "1010");
+    /// # }
+    /// ```
+    ///
+    /// But anything satisfying the `Replacer` trait will work. For example,
+    /// a closure of type `|&Captures| -> ~str` provides direct access to the
+    /// captures corresponding to a match. This allows one to access
+    /// submatches easily:
+    ///
+    /// ```rust
+    /// # #![feature(phase)]
+    /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+    /// # use regex::Captures; fn main() {
+    /// let re = regex!(r"([^,\s]+),\s+(\S+)");
+    /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| {
+    ///     format!("{} {}", caps.at(2), caps.at(1))
+    /// });
+    /// assert_eq!(result.as_slice(), "Bruce Springsteen");
+    /// # }
+    /// ```
+    ///
+    /// But this is a bit cumbersome to use all the time. Instead, a simple
+    /// syntax is supported that expands `$name` into the corresponding capture
+    /// group. Here's the last example, but using this expansion technique
+    /// with named capture groups:
+    ///
+    /// ```rust
+    /// # #![feature(phase)]
+    /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+    /// # fn main() {
+    /// let re = regex!(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)");
+    /// let result = re.replace("Springsteen, Bruce", "$first $last");
+    /// assert_eq!(result.as_slice(), "Bruce Springsteen");
+    /// # }
+    /// ```
+    ///
+    /// Note that using `$2` instead of `$first` or `$1` instead of `$last`
+    /// would produce the same result. To write a literal `$` use `$$`.
+    ///
+    /// Finally, sometimes you just want to replace a literal string with no
+    /// submatch expansion. This can be done by wrapping a string with
+    /// `NoExpand`:
+    ///
+    /// ```rust
+    /// # #![feature(phase)]
+    /// # extern crate regex; #[phase(syntax)] extern crate regex_macros;
+    /// # fn main() {
+    /// use regex::NoExpand;
+    ///
+    /// let re = regex!(r"(?P<last>[^,\s]+),\s+(\S+)");
+    /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last"));
+    /// assert_eq!(result.as_slice(), "$2 $last");
+    /// # }
+    /// ```
+    pub fn replace<R: Replacer>(&self, text: &str, rep: R) -> StrBuf {
+        self.replacen(text, 1, rep)
+    }
+
+    /// Replaces all non-overlapping matches in `text` with the
+    /// replacement provided. This is the same as calling `replacen` with
+    /// `limit` set to `0`.
+    ///
+    /// See the documentation for `replace` for details on how to access
+    /// submatches in the replacement string.
+    pub fn replace_all<R: Replacer>(&self, text: &str, rep: R) -> StrBuf {
+        self.replacen(text, 0, rep)
+    }
+
+    /// Replaces at most `limit` non-overlapping matches in `text` with the
+    /// replacement provided. If `limit` is 0, then all non-overlapping matches
+    /// are replaced.
+    ///
+    /// See the documentation for `replace` for details on how to access
+    /// submatches in the replacement string.
+    pub fn replacen<R: Replacer>
+                   (&self, text: &str, limit: uint, mut rep: R) -> StrBuf {
+        let mut new = StrBuf::with_capacity(text.len());
+        let mut last_match = 0u;
+        let mut i = 0;
+        for cap in self.captures_iter(text) {
+            // It'd be nicer to use the 'take' iterator instead, but it seemed
+            // awkward given that '0' => no limit.
+            if limit > 0 && i >= limit {
+                break
+            }
+            i += 1;
+
+            let (s, e) = cap.pos(0).unwrap(); // captures only reports matches
+            new.push_str(text.slice(last_match, s));
+            new.push_str(rep.reg_replace(&cap).as_slice());
+            last_match = e;
+        }
+        new.append(text.slice(last_match, text.len()))
+    }
+}
+
+/// NoExpand indicates literal string replacement.
+///
+/// It can be used with `replace` and `replace_all` to do a literal
+/// string replacement without expanding `$name` to their corresponding
+/// capture groups.
+///
+/// `'r` is the lifetime of the literal text.
+pub struct NoExpand<'t>(pub &'t str);
+
+/// Replacer describes types that can be used to replace matches in a string.
+pub trait Replacer {
+    /// Returns a possibly owned string that is used to replace the match
+    /// corresponding the the `caps` capture group.
+    ///
+    /// The `'a` lifetime refers to the lifetime of a borrowed string when
+    /// a new owned string isn't needed (e.g., for `NoExpand`).
+    fn reg_replace<'a>(&'a mut self, caps: &Captures) -> MaybeOwned<'a>;
+}
+
+impl<'t> Replacer for NoExpand<'t> {
+    fn reg_replace<'a>(&'a mut self, _: &Captures) -> MaybeOwned<'a> {
+        let NoExpand(s) = *self;
+        Slice(s)
+    }
+}
+
+impl<'t> Replacer for &'t str {
+    fn reg_replace<'a>(&'a mut self, caps: &Captures) -> MaybeOwned<'a> {
+        Owned(caps.expand(*self).into_owned())
+    }
+}
+
+impl<'a> Replacer for |&Captures|: 'a -> ~str {
+    fn reg_replace<'r>(&'r mut self, caps: &Captures) -> MaybeOwned<'r> {
+        Owned((*self)(caps).into_owned())
+    }
+}
+
+/// Yields all substrings delimited by a regular expression match.
+///
+/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
+/// of the string being split.
+pub struct RegexSplits<'r, 't> {
+    finder: FindMatches<'r, 't>,
+    last: uint,
+}
+
+impl<'r, 't> Iterator<&'t str> for RegexSplits<'r, 't> {
+    fn next(&mut self) -> Option<&'t str> {
+        let text = self.finder.search;
+        match self.finder.next() {
+            None => {
+                if self.last >= text.len() {
+                    None
+                } else {
+                    let s = text.slice(self.last, text.len());
+                    self.last = text.len();
+                    Some(s)
+                }
+            }
+            Some((s, e)) => {
+                let matched = text.slice(self.last, s);
+                self.last = e;
+                Some(matched)
+            }
+        }
+    }
+}
+
+/// Yields at most `N` substrings delimited by a regular expression match.
+///
+/// The last substring will be whatever remains after splitting.
+///
+/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
+/// of the string being split.
+pub struct RegexSplitsN<'r, 't> {
+    splits: RegexSplits<'r, 't>,
+    cur: uint,
+    limit: uint,
+}
+
+impl<'r, 't> Iterator<&'t str> for RegexSplitsN<'r, 't> {
+    fn next(&mut self) -> Option<&'t str> {
+        let text = self.splits.finder.search;
+        if self.cur >= self.limit {
+            None
+        } else {
+            self.cur += 1;
+            if self.cur >= self.limit {
+                Some(text.slice(self.splits.last, text.len()))
+            } else {
+                self.splits.next()
+            }
+        }
+    }
+}
+
+/// Captures represents a group of captured strings for a single match.
+///
+/// The 0th capture always corresponds to the entire match. Each subsequent
+/// index corresponds to the next capture group in the regex.
+/// If a capture group is named, then the matched string is *also* available
+/// via the `name` method. (Note that the 0th capture is always unnamed and so
+/// must be accessed with the `at` method.)
+///
+/// Positions returned from a capture group are always byte indices.
+///
+/// `'t` is the lifetime of the matched text.
+pub struct Captures<'t> {
+    text: &'t str,
+    locs: CaptureLocs,
+    named: Option<HashMap<~str, uint>>,
+}
+
+impl<'t> Captures<'t> {
+    fn new(re: &Regex, search: &'t str, locs: CaptureLocs)
+          -> Option<Captures<'t>> {
+        if !has_match(&locs) {
+            return None
+        }
+
+        let named =
+            if re.names.len() == 0 {
+                None
+            } else {
+                let mut named = HashMap::new();
+                for (i, name) in re.names.iter().enumerate() {
+                    match name {
+                        &None => {},
+                        &Some(ref name) => {
+                            named.insert(name.to_owned(), i);
+                        }
+                    }
+                }
+                Some(named)
+            };
+        Some(Captures {
+            text: search,
+            locs: locs,
+            named: named,
+        })
+    }
+
+    /// Returns the start and end positions of the Nth capture group.
+    /// Returns `None` if `i` is not a valid capture group or if the capture
+    /// group did not match anything.
+    /// The positions returned are *always* byte indices with respect to the
+    /// original string matched.
+    pub fn pos(&self, i: uint) -> Option<(uint, uint)> {
+        let (s, e) = (i * 2, i * 2 + 1);
+        if e >= self.locs.len() || self.locs.get(s).is_none() {
+            // VM guarantees that each pair of locations are both Some or None.
+            return None
+        }
+        Some((self.locs.get(s).unwrap(), self.locs.get(e).unwrap()))
+    }
+
+    /// Returns the matched string for the capture group `i`.
+    /// If `i` isn't a valid capture group or didn't match anything, then the
+    /// empty string is returned.
+    pub fn at(&self, i: uint) -> &'t str {
+        match self.pos(i) {
+            None => "",
+            Some((s, e)) => {
+                self.text.slice(s, e)
+            }
+        }
+    }
+
+    /// Returns the matched string for the capture group named `name`.
+    /// If `name` isn't a valid capture group or didn't match anything, then
+    /// the empty string is returned.
+    pub fn name(&self, name: &str) -> &'t str {
+        match self.named {
+            None => "",
+            Some(ref h) => {
+                match h.find_equiv(&name) {
+                    None => "",
+                    Some(i) => self.at(*i),
+                }
+            }
+        }
+    }
+
+    /// Creates an iterator of all the capture groups in order of appearance
+    /// in the regular expression.
+    pub fn iter(&'t self) -> SubCaptures<'t> {
+        SubCaptures { idx: 0, caps: self, }
+    }
+
+    /// Creates an iterator of all the capture group positions in order of
+    /// appearance in the regular expression. Positions are byte indices
+    /// in terms of the original string matched.
+    pub fn iter_pos(&'t self) -> SubCapturesPos<'t> {
+        SubCapturesPos { idx: 0, caps: self, }
+    }
+
+    /// Expands all instances of `$name` in `text` to the corresponding capture
+    /// group `name`.
+    ///
+    /// `name` may be an integer corresponding to the index of the
+    /// capture group (counted by order of opening parenthesis where `0` is the
+    /// entire match) or it can be a name (consisting of letters, digits or
+    /// underscores) corresponding to a named capture group.
+    ///
+    /// If `name` isn't a valid capture group (whether the name doesn't exist or
+    /// isn't a valid index), then it is replaced with the empty string.
+    ///
+    /// To write a literal `$` use `$$`.
+    pub fn expand(&self, text: &str) -> StrBuf {
+        // How evil can you get?
+        // FIXME: Don't use regexes for this. It's completely unnecessary.
+        let re = Regex::new(r"(^|[^$]|\b)\$(\w+)").unwrap();
+        let text = re.replace_all(text, |refs: &Captures| -> ~str {
+            let (pre, name) = (refs.at(1), refs.at(2));
+            pre + match from_str::<uint>(name) {
+                None => self.name(name).to_owned(),
+                Some(i) => self.at(i).to_owned(),
+            }
+        });
+        let re = Regex::new(r"\$\$").unwrap();
+        re.replace_all(text.as_slice(), NoExpand("$"))
+    }
+}
+
+impl<'t> Container for Captures<'t> {
+    /// Returns the number of captured groups.
+    #[inline]
+    fn len(&self) -> uint {
+        self.locs.len() / 2
+    }
+}
+
+/// An iterator over capture groups for a particular match of a regular
+/// expression.
+///
+/// `'t` is the lifetime of the matched text.
+pub struct SubCaptures<'t> {
+    idx: uint,
+    caps: &'t Captures<'t>,
+}
+
+impl<'t> Iterator<&'t str> for SubCaptures<'t> {
+    fn next(&mut self) -> Option<&'t str> {
+        if self.idx < self.caps.len() {
+            self.idx += 1;
+            Some(self.caps.at(self.idx - 1))
+        } else {
+            None
+        }
+    }
+}
+
+/// An iterator over capture group positions for a particular match of a
+/// regular expression.
+///
+/// Positions are byte indices in terms of the original string matched.
+///
+/// `'t` is the lifetime of the matched text.
+pub struct SubCapturesPos<'t> {
+    idx: uint,
+    caps: &'t Captures<'t>,
+}
+
+impl<'t> Iterator<Option<(uint, uint)>> for SubCapturesPos<'t> {
+    fn next(&mut self) -> Option<Option<(uint, uint)>> {
+        if self.idx < self.caps.len() {
+            self.idx += 1;
+            Some(self.caps.pos(self.idx - 1))
+        } else {
+            None
+        }
+    }
+}
+
+/// An iterator that yields all non-overlapping capture groups matching a
+/// particular regular expression. The iterator stops when no more matches can
+/// be found.
+///
+/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
+/// of the matched string.
+pub struct FindCaptures<'r, 't> {
+    re: &'r Regex,
+    search: &'t str,
+    last_match: Option<uint>,
+    last_end: uint,
+}
+
+impl<'r, 't> Iterator<Captures<'t>> for FindCaptures<'r, 't> {
+    fn next(&mut self) -> Option<Captures<'t>> {
+        if self.last_end > self.search.len() {
+            return None
+        }
+
+        let caps = exec_slice(self.re, Submatches, self.search,
+                              self.last_end, self.search.len());
+        let (s, e) =
+            if !has_match(&caps) {
+                return None
+            } else {
+                (caps.get(0).unwrap(), caps.get(1).unwrap())
+            };
+
+        // Don't accept empty matches immediately following a match.
+        // i.e., no infinite loops please.
+        if e - s == 0 && Some(self.last_end) == self.last_match {
+            self.last_end += 1;
+            return self.next()
+        }
+        self.last_end = e;
+        self.last_match = Some(self.last_end);
+        Captures::new(self.re, self.search, caps)
+    }
+}
+
+/// An iterator over all non-overlapping matches for a particular string.
+///
+/// The iterator yields a tuple of integers corresponding to the start and end
+/// of the match. The indices are byte offsets. The iterator stops when no more
+/// matches can be found.
+///
+/// `'r` is the lifetime of the compiled expression and `'t` is the lifetime
+/// of the matched string.
+pub struct FindMatches<'r, 't> {
+    re: &'r Regex,
+    search: &'t str,
+    last_match: Option<uint>,
+    last_end: uint,
+}
+
+impl<'r, 't> Iterator<(uint, uint)> for FindMatches<'r, 't> {
+    fn next(&mut self) -> Option<(uint, uint)> {
+        if self.last_end > self.search.len() {
+            return None
+        }
+
+        let caps = exec_slice(self.re, Location, self.search,
+                              self.last_end, self.search.len());
+        let (s, e) =
+            if !has_match(&caps) {
+                return None
+            } else {
+                (caps.get(0).unwrap(), caps.get(1).unwrap())
+            };
+
+        // Don't accept empty matches immediately following a match.
+        // i.e., no infinite loops please.
+        if e - s == 0 && Some(self.last_end) == self.last_match {
+            self.last_end += 1;
+            return self.next()
+        }
+        self.last_end = e;
+        self.last_match = Some(self.last_end);
+        Some((s, e))
+    }
+}
+
+fn exec(re: &Regex, which: MatchKind, input: &str) -> CaptureLocs {
+    exec_slice(re, which, input, 0, input.len())
+}
+
+fn exec_slice(re: &Regex, which: MatchKind,
+              input: &str, s: uint, e: uint) -> CaptureLocs {
+    match re.p {
+        Dynamic(ref prog) => vm::run(which, prog, input, s, e),
+        Native(exec) => exec(which, input, s, e),
+    }
+}
+
+#[inline]
+fn has_match(caps: &CaptureLocs) -> bool {
+    caps.len() >= 2 && caps.get(0).is_some() && caps.get(1).is_some()
+}
--- a/src/libregex/test/bench.rs
+++ b/src/libregex/test/bench.rs
@ -0,0 +1,179 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use rand::{Rng, task_rng};
+use stdtest::Bencher;
+use std::str;
+use regex::{Regex, NoExpand};
+
+fn bench_assert_match(b: &mut Bencher, re: Regex, text: &str) {
+    b.iter(|| if !re.is_match(text) { fail!("no match") });
+}
+
+#[bench]
+fn no_exponential(b: &mut Bencher) {
+    let n = 100;
+    let re = Regex::new("a?".repeat(n) + "a".repeat(n)).unwrap();
+    let text = "a".repeat(n);
+    bench_assert_match(b, re, text);
+}
+
+#[bench]
+fn literal(b: &mut Bencher) {
+    let re = regex!("y");
+    let text = "x".repeat(50) + "y";
+    bench_assert_match(b, re, text);
+}
+
+#[bench]
+fn not_literal(b: &mut Bencher) {
+    let re = regex!(".y");
+    let text = "x".repeat(50) + "y";
+    bench_assert_match(b, re, text);
+}
+
+#[bench]
+fn match_class(b: &mut Bencher) {
+    let re = regex!("[abcdw]");
+    let text = "xxxx".repeat(20) + "w";
+    bench_assert_match(b, re, text);
+}
+
+#[bench]
+fn match_class_in_range(b: &mut Bencher) {
+    // 'b' is between 'a' and 'c', so the class range checking doesn't help.
+    let re = regex!("[ac]");
+    let text = "bbbb".repeat(20) + "c";
+    bench_assert_match(b, re, text);
+}
+
+#[bench]
+fn replace_all(b: &mut Bencher) {
+    let re = regex!("[cjrw]");
+    let text = "abcdefghijklmnopqrstuvwxyz";
+    // FIXME: This isn't using the $name expand stuff.
+    // It's possible RE2/Go is using it, but currently, the expand in this
+    // crate is actually compiling a regex, so it's incredibly slow.
+    b.iter(|| re.replace_all(text, NoExpand("")));
+}
+
+#[bench]
+fn anchored_literal_short_non_match(b: &mut Bencher) {
+    let re = regex!("^zbc(d|e)");
+    let text = "abcdefghijklmnopqrstuvwxyz";
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn anchored_literal_long_non_match(b: &mut Bencher) {
+    let re = regex!("^zbc(d|e)");
+    let text = "abcdefghijklmnopqrstuvwxyz".repeat(15);
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn anchored_literal_short_match(b: &mut Bencher) {
+    let re = regex!("^.bc(d|e)");
+    let text = "abcdefghijklmnopqrstuvwxyz";
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn anchored_literal_long_match(b: &mut Bencher) {
+    let re = regex!("^.bc(d|e)");
+    let text = "abcdefghijklmnopqrstuvwxyz".repeat(15);
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn one_pass_short_a(b: &mut Bencher) {
+    let re = regex!("^.bc(d|e)*$");
+    let text = "abcddddddeeeededd";
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn one_pass_short_a_not(b: &mut Bencher) {
+    let re = regex!(".bc(d|e)*$");
+    let text = "abcddddddeeeededd";
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn one_pass_short_b(b: &mut Bencher) {
+    let re = regex!("^.bc(?:d|e)*$");
+    let text = "abcddddddeeeededd";
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn one_pass_short_b_not(b: &mut Bencher) {
+    let re = regex!(".bc(?:d|e)*$");
+    let text = "abcddddddeeeededd";
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn one_pass_long_prefix(b: &mut Bencher) {
+    let re = regex!("^abcdefghijklmnopqrstuvwxyz.*$");
+    let text = "abcdefghijklmnopqrstuvwxyz";
+    b.iter(|| re.is_match(text));
+}
+
+#[bench]
+fn one_pass_long_prefix_not(b: &mut Bencher) {
+    let re = regex!("^.bcdefghijklmnopqrstuvwxyz.*$");
+    let text = "abcdefghijklmnopqrstuvwxyz";
+    b.iter(|| re.is_match(text));
+}
+
+macro_rules! throughput(
+    ($name:ident, $regex:expr, $size:expr) => (
+        #[bench]
+        fn $name(b: &mut Bencher) {
+            let text = gen_text($size);
+            b.bytes = $size;
+            b.iter(|| if $regex.is_match(text) { fail!("match") });
+        }
+    );
+)
+
+fn easy0() -> Regex { regex!("ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
+fn easy1() -> Regex { regex!("A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$") }
+fn medium() -> Regex { regex!("[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
+fn hard() -> Regex { regex!("[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$") }
+
+fn gen_text(n: uint) -> ~str {
+    let mut rng = task_rng();
+    let mut bytes = rng.gen_ascii_str(n).into_bytes();
+    for (i, b) in bytes.mut_iter().enumerate() {
+        if i % 20 == 0 {
+            *b = '\n' as u8
+        }
+    }
+    str::from_utf8(bytes).unwrap().to_owned()
+}
+
+throughput!(easy0_32, easy0(), 32)
+throughput!(easy0_1K, easy0(), 1<<10)
+throughput!(easy0_32K, easy0(), 32<<10)
+
+throughput!(easy1_32, easy1(), 32)
+throughput!(easy1_1K, easy1(), 1<<10)
+throughput!(easy1_32K, easy1(), 32<<10)
+
+throughput!(medium_32, medium(), 32)
+throughput!(medium_1K, medium(), 1<<10)
+throughput!(medium_32K,medium(), 32<<10)
+
+throughput!(hard_32, hard(), 32)
+throughput!(hard_1K, hard(), 1<<10)
+throughput!(hard_32K,hard(), 32<<10)
+
--- a/src/libregex/test/matches.rs
+++ b/src/libregex/test/matches.rs
@ -0,0 +1,373 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// ignore-tidy-linelength
+
+// DO NOT EDIT. Automatically generated by 'src/etc/regex-match-tests'
+// on 2014-04-23 01:33:36.539280.
+
+// Tests from basic.dat
+mat!(match_basic_3, r"abracadabra$", r"abracadabracadabra", Some((7, 18)))
+mat!(match_basic_4, r"a...b", r"abababbb", Some((2, 7)))
+mat!(match_basic_5, r"XXXXXX", r"..XXXXXX", Some((2, 8)))
+mat!(match_basic_6, r"\)", r"()", Some((1, 2)))
+mat!(match_basic_7, r"a]", r"a]a", Some((0, 2)))
+mat!(match_basic_9, r"\}", r"}", Some((0, 1)))
+mat!(match_basic_10, r"\]", r"]", Some((0, 1)))
+mat!(match_basic_12, r"]", r"]", Some((0, 1)))
+mat!(match_basic_15, r"^a", r"ax", Some((0, 1)))
+mat!(match_basic_16, r"\^a", r"a^a", Some((1, 3)))
+mat!(match_basic_17, r"a\^", r"a^", Some((0, 2)))
+mat!(match_basic_18, r"a$", r"aa", Some((1, 2)))
+mat!(match_basic_19, r"a\$", r"a$", Some((0, 2)))
+mat!(match_basic_20, r"^$", r"", Some((0, 0)))
+mat!(match_basic_21, r"$^", r"", Some((0, 0)))
+mat!(match_basic_22, r"a($)", r"aa", Some((1, 2)), Some((2, 2)))
+mat!(match_basic_23, r"a*(^a)", r"aa", Some((0, 1)), Some((0, 1)))
+mat!(match_basic_24, r"(..)*(...)*", r"a", Some((0, 0)))
+mat!(match_basic_25, r"(..)*(...)*", r"abcd", Some((0, 4)), Some((2, 4)))
+mat!(match_basic_26, r"(ab|a)(bc|c)", r"abc", Some((0, 3)), Some((0, 2)), Some((2, 3)))
+mat!(match_basic_27, r"(ab)c|abc", r"abc", Some((0, 3)), Some((0, 2)))
+mat!(match_basic_28, r"a{0}b", r"ab", Some((1, 2)))
+mat!(match_basic_29, r"(a*)(b?)(b+)b{3}", r"aaabbbbbbb", Some((0, 10)), Some((0, 3)), Some((3, 4)), Some((4, 7)))
+mat!(match_basic_30, r"(a*)(b{0,1})(b{1,})b{3}", r"aaabbbbbbb", Some((0, 10)), Some((0, 3)), Some((3, 4)), Some((4, 7)))
+mat!(match_basic_32, r"((a|a)|a)", r"a", Some((0, 1)), Some((0, 1)), Some((0, 1)))
+mat!(match_basic_33, r"(a*)(a|aa)", r"aaaa", Some((0, 4)), Some((0, 3)), Some((3, 4)))
+mat!(match_basic_34, r"a*(a.|aa)", r"aaaa", Some((0, 4)), Some((2, 4)))
+mat!(match_basic_35, r"a(b)|c(d)|a(e)f", r"aef", Some((0, 3)), None, None, Some((1, 2)))
+mat!(match_basic_36, r"(a|b)?.*", r"b", Some((0, 1)), Some((0, 1)))
+mat!(match_basic_37, r"(a|b)c|a(b|c)", r"ac", Some((0, 2)), Some((0, 1)))
+mat!(match_basic_38, r"(a|b)c|a(b|c)", r"ab", Some((0, 2)), None, Some((1, 2)))
+mat!(match_basic_39, r"(a|b)*c|(a|ab)*c", r"abc", Some((0, 3)), Some((1, 2)))
+mat!(match_basic_40, r"(a|b)*c|(a|ab)*c", r"xc", Some((1, 2)))
+mat!(match_basic_41, r"(.a|.b).*|.*(.a|.b)", r"xa", Some((0, 2)), Some((0, 2)))
+mat!(match_basic_42, r"a?(ab|ba)ab", r"abab", Some((0, 4)), Some((0, 2)))
+mat!(match_basic_43, r"a?(ac{0}b|ba)ab", r"abab", Some((0, 4)), Some((0, 2)))
+mat!(match_basic_44, r"ab|abab", r"abbabab", Some((0, 2)))
+mat!(match_basic_45, r"aba|bab|bba", r"baaabbbaba", Some((5, 8)))
+mat!(match_basic_46, r"aba|bab", r"baaabbbaba", Some((6, 9)))
+mat!(match_basic_47, r"(aa|aaa)*|(a|aaaaa)", r"aa", Some((0, 2)), Some((0, 2)))
+mat!(match_basic_48, r"(a.|.a.)*|(a|.a...)", r"aa", Some((0, 2)), Some((0, 2)))
+mat!(match_basic_49, r"ab|a", r"xabc", Some((1, 3)))
+mat!(match_basic_50, r"ab|a", r"xxabc", Some((2, 4)))
+mat!(match_basic_51, r"(?i)(Ab|cD)*", r"aBcD", Some((0, 4)), Some((2, 4)))
+mat!(match_basic_52, r"[^-]", r"--a", Some((2, 3)))
+mat!(match_basic_53, r"[a-]*", r"--a", Some((0, 3)))
+mat!(match_basic_54, r"[a-m-]*", r"--amoma--", Some((0, 4)))
+mat!(match_basic_55, r":::1:::0:|:::1:1:0:", r":::0:::1:::1:::0:", Some((8, 17)))
+mat!(match_basic_56, r":::1:::0:|:::1:1:1:", r":::0:::1:::1:::0:", Some((8, 17)))
+mat!(match_basic_57, r"[[:upper:]]", r"A", Some((0, 1)))
+mat!(match_basic_58, r"[[:lower:]]+", r"`az{", Some((1, 3)))
+mat!(match_basic_59, r"[[:upper:]]+", r"@AZ[", Some((1, 3)))
+mat!(match_basic_65, r"
+", r"
+", Some((0, 1)))
+mat!(match_basic_66, r"
+", r"
+", Some((0, 1)))
+mat!(match_basic_67, r"[^a]", r"
+", Some((0, 1)))
+mat!(match_basic_68, r"
+a", r"
+a", Some((0, 2)))
+mat!(match_basic_69, r"(a)(b)(c)", r"abc", Some((0, 3)), Some((0, 1)), Some((1, 2)), Some((2, 3)))
+mat!(match_basic_70, r"xxx", r"xxx", Some((0, 3)))
+mat!(match_basic_71, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"feb 6,", Some((0, 6)))
+mat!(match_basic_72, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"2/7", Some((0, 3)))
+mat!(match_basic_73, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"feb 1,Feb 6", Some((5, 11)))
+mat!(match_basic_74, r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))", r"x", Some((0, 1)), Some((0, 1)), Some((0, 1)))
+mat!(match_basic_75, r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*", r"xx", Some((0, 2)), Some((1, 2)), Some((1, 2)))
+mat!(match_basic_76, r"a?(ab|ba)*", r"ababababababababababababababababababababababababababababababababababababababababa", Some((0, 81)), Some((79, 81)))
+mat!(match_basic_77, r"abaa|abbaa|abbbaa|abbbbaa", r"ababbabbbabbbabbbbabbbbaa", Some((18, 25)))
+mat!(match_basic_78, r"abaa|abbaa|abbbaa|abbbbaa", r"ababbabbbabbbabbbbabaa", Some((18, 22)))
+mat!(match_basic_79, r"aaac|aabc|abac|abbc|baac|babc|bbac|bbbc", r"baaabbbabac", Some((7, 11)))
+mat!(match_basic_80, r".*", r"", Some((0, 2)))
+mat!(match_basic_81, r"aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll", r"XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa", Some((53, 57)))
+mat!(match_basic_83, r"a*a*a*a*a*b", r"aaaaaaaaab", Some((0, 10)))
+mat!(match_basic_84, r"^", r"", Some((0, 0)))
+mat!(match_basic_85, r"$", r"", Some((0, 0)))
+mat!(match_basic_86, r"^$", r"", Some((0, 0)))
+mat!(match_basic_87, r"^a$", r"a", Some((0, 1)))
+mat!(match_basic_88, r"abc", r"abc", Some((0, 3)))
+mat!(match_basic_89, r"abc", r"xabcy", Some((1, 4)))
+mat!(match_basic_90, r"abc", r"ababc", Some((2, 5)))
+mat!(match_basic_91, r"ab*c", r"abc", Some((0, 3)))
+mat!(match_basic_92, r"ab*bc", r"abc", Some((0, 3)))
+mat!(match_basic_93, r"ab*bc", r"abbc", Some((0, 4)))
+mat!(match_basic_94, r"ab*bc", r"abbbbc", Some((0, 6)))
+mat!(match_basic_95, r"ab+bc", r"abbc", Some((0, 4)))
+mat!(match_basic_96, r"ab+bc", r"abbbbc", Some((0, 6)))
+mat!(match_basic_97, r"ab?bc", r"abbc", Some((0, 4)))
+mat!(match_basic_98, r"ab?bc", r"abc", Some((0, 3)))
+mat!(match_basic_99, r"ab?c", r"abc", Some((0, 3)))
+mat!(match_basic_100, r"^abc$", r"abc", Some((0, 3)))
+mat!(match_basic_101, r"^abc", r"abcc", Some((0, 3)))
+mat!(match_basic_102, r"abc$", r"aabc", Some((1, 4)))
+mat!(match_basic_103, r"^", r"abc", Some((0, 0)))
+mat!(match_basic_104, r"$", r"abc", Some((3, 3)))
+mat!(match_basic_105, r"a.c", r"abc", Some((0, 3)))
+mat!(match_basic_106, r"a.c", r"axc", Some((0, 3)))
+mat!(match_basic_107, r"a.*c", r"axyzc", Some((0, 5)))
+mat!(match_basic_108, r"a[bc]d", r"abd", Some((0, 3)))
+mat!(match_basic_109, r"a[b-d]e", r"ace", Some((0, 3)))
+mat!(match_basic_110, r"a[b-d]", r"aac", Some((1, 3)))
+mat!(match_basic_111, r"a[-b]", r"a-", Some((0, 2)))
+mat!(match_basic_112, r"a[b-]", r"a-", Some((0, 2)))
+mat!(match_basic_113, r"a]", r"a]", Some((0, 2)))
+mat!(match_basic_114, r"a[]]b", r"a]b", Some((0, 3)))
+mat!(match_basic_115, r"a[^bc]d", r"aed", Some((0, 3)))
+mat!(match_basic_116, r"a[^-b]c", r"adc", Some((0, 3)))
+mat!(match_basic_117, r"a[^]b]c", r"adc", Some((0, 3)))
+mat!(match_basic_118, r"ab|cd", r"abc", Some((0, 2)))
+mat!(match_basic_119, r"ab|cd", r"abcd", Some((0, 2)))
+mat!(match_basic_120, r"a\(b", r"a(b", Some((0, 3)))
+mat!(match_basic_121, r"a\(*b", r"ab", Some((0, 2)))
+mat!(match_basic_122, r"a\(*b", r"a((b", Some((0, 4)))
+mat!(match_basic_123, r"((a))", r"abc", Some((0, 1)), Some((0, 1)), Some((0, 1)))
+mat!(match_basic_124, r"(a)b(c)", r"abc", Some((0, 3)), Some((0, 1)), Some((2, 3)))
+mat!(match_basic_125, r"a+b+c", r"aabbabc", Some((4, 7)))
+mat!(match_basic_126, r"a*", r"aaa", Some((0, 3)))
+mat!(match_basic_128, r"(a*)*", r"-", Some((0, 0)), None)
+mat!(match_basic_129, r"(a*)+", r"-", Some((0, 0)), Some((0, 0)))
+mat!(match_basic_131, r"(a*|b)*", r"-", Some((0, 0)), None)
+mat!(match_basic_132, r"(a+|b)*", r"ab", Some((0, 2)), Some((1, 2)))
+mat!(match_basic_133, r"(a+|b)+", r"ab", Some((0, 2)), Some((1, 2)))
+mat!(match_basic_134, r"(a+|b)?", r"ab", Some((0, 1)), Some((0, 1)))
+mat!(match_basic_135, r"[^ab]*", r"cde", Some((0, 3)))
+mat!(match_basic_137, r"(^)*", r"-", Some((0, 0)), None)
+mat!(match_basic_138, r"a*", r"", Some((0, 0)))
+mat!(match_basic_139, r"([abc])*d", r"abbbcd", Some((0, 6)), Some((4, 5)))
+mat!(match_basic_140, r"([abc])*bcd", r"abcd", Some((0, 4)), Some((0, 1)))
+mat!(match_basic_141, r"a|b|c|d|e", r"e", Some((0, 1)))
+mat!(match_basic_142, r"(a|b|c|d|e)f", r"ef", Some((0, 2)), Some((0, 1)))
+mat!(match_basic_144, r"((a*|b))*", r"-", Some((0, 0)), None, None)
+mat!(match_basic_145, r"abcd*efg", r"abcdefg", Some((0, 7)))
+mat!(match_basic_146, r"ab*", r"xabyabbbz", Some((1, 3)))
+mat!(match_basic_147, r"ab*", r"xayabbbz", Some((1, 2)))
+mat!(match_basic_148, r"(ab|cd)e", r"abcde", Some((2, 5)), Some((2, 4)))
+mat!(match_basic_149, r"[abhgefdc]ij", r"hij", Some((0, 3)))
+mat!(match_basic_150, r"(a|b)c*d", r"abcd", Some((1, 4)), Some((1, 2)))
+mat!(match_basic_151, r"(ab|ab*)bc", r"abc", Some((0, 3)), Some((0, 1)))
+mat!(match_basic_152, r"a([bc]*)c*", r"abc", Some((0, 3)), Some((1, 3)))
+mat!(match_basic_153, r"a([bc]*)(c*d)", r"abcd", Some((0, 4)), Some((1, 3)), Some((3, 4)))
+mat!(match_basic_154, r"a([bc]+)(c*d)", r"abcd", Some((0, 4)), Some((1, 3)), Some((3, 4)))
+mat!(match_basic_155, r"a([bc]*)(c+d)", r"abcd", Some((0, 4)), Some((1, 2)), Some((2, 4)))
+mat!(match_basic_156, r"a[bcd]*dcdcde", r"adcdcde", Some((0, 7)))
+mat!(match_basic_157, r"(ab|a)b*c", r"abc", Some((0, 3)), Some((0, 2)))
+mat!(match_basic_158, r"((a)(b)c)(d)", r"abcd", Some((0, 4)), Some((0, 3)), Some((0, 1)), Some((1, 2)), Some((3, 4)))
+mat!(match_basic_159, r"[A-Za-z_][A-Za-z0-9_]*", r"alpha", Some((0, 5)))
+mat!(match_basic_160, r"^a(bc+|b[eh])g|.h$", r"abh", Some((1, 3)))
+mat!(match_basic_161, r"(bc+d$|ef*g.|h?i(j|k))", r"effgz", Some((0, 5)), Some((0, 5)))
+mat!(match_basic_162, r"(bc+d$|ef*g.|h?i(j|k))", r"ij", Some((0, 2)), Some((0, 2)), Some((1, 2)))
+mat!(match_basic_163, r"(bc+d$|ef*g.|h?i(j|k))", r"reffgz", Some((1, 6)), Some((1, 6)))
+mat!(match_basic_164, r"(((((((((a)))))))))", r"a", Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)))
+mat!(match_basic_165, r"multiple words", r"multiple words yeah", Some((0, 14)))
+mat!(match_basic_166, r"(.*)c(.*)", r"abcde", Some((0, 5)), Some((0, 2)), Some((3, 5)))
+mat!(match_basic_167, r"abcd", r"abcd", Some((0, 4)))
+mat!(match_basic_168, r"a(bc)d", r"abcd", Some((0, 4)), Some((1, 3)))
+mat!(match_basic_169, r"a[-]?c", r"ac", Some((0, 3)))
+mat!(match_basic_170, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Qaddafi", Some((0, 15)), None, Some((10, 12)))
+mat!(match_basic_171, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mo'ammar Gadhafi", Some((0, 16)), None, Some((11, 13)))
+mat!(match_basic_172, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Kaddafi", Some((0, 15)), None, Some((10, 12)))
+mat!(match_basic_173, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Qadhafi", Some((0, 15)), None, Some((10, 12)))
+mat!(match_basic_174, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Gadafi", Some((0, 14)), None, Some((10, 11)))
+mat!(match_basic_175, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mu'ammar Qadafi", Some((0, 15)), None, Some((11, 12)))
+mat!(match_basic_176, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moamar Gaddafi", Some((0, 14)), None, Some((9, 11)))
+mat!(match_basic_177, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mu'ammar Qadhdhafi", Some((0, 18)), None, Some((13, 15)))
+mat!(match_basic_178, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Khaddafi", Some((0, 16)), None, Some((11, 13)))
+mat!(match_basic_179, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghaddafy", Some((0, 16)), None, Some((11, 13)))
+mat!(match_basic_180, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghadafi", Some((0, 15)), None, Some((11, 12)))
+mat!(match_basic_181, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghaddafi", Some((0, 16)), None, Some((11, 13)))
+mat!(match_basic_182, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muamar Kaddafi", Some((0, 14)), None, Some((9, 11)))
+mat!(match_basic_183, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Quathafi", Some((0, 16)), None, Some((11, 13)))
+mat!(match_basic_184, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Gheddafi", Some((0, 16)), None, Some((11, 13)))
+mat!(match_basic_185, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moammar Khadafy", Some((0, 15)), None, Some((11, 12)))
+mat!(match_basic_186, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moammar Qudhafi", Some((0, 15)), None, Some((10, 12)))
+mat!(match_basic_187, r"a+(b|c)*d+", r"aabcdd", Some((0, 6)), Some((3, 4)))
+mat!(match_basic_188, r"^.+$", r"vivi", Some((0, 4)))
+mat!(match_basic_189, r"^(.+)$", r"vivi", Some((0, 4)), Some((0, 4)))
+mat!(match_basic_190, r"^([^!.]+).att.com!(.+)$", r"gryphon.att.com!eby", Some((0, 19)), Some((0, 7)), Some((16, 19)))
+mat!(match_basic_191, r"^([^!]+!)?([^!]+)$", r"bas", Some((0, 3)), None, Some((0, 3)))
+mat!(match_basic_192, r"^([^!]+!)?([^!]+)$", r"bar!bas", Some((0, 7)), Some((0, 4)), Some((4, 7)))
+mat!(match_basic_193, r"^([^!]+!)?([^!]+)$", r"foo!bas", Some((0, 7)), Some((0, 4)), Some((4, 7)))
+mat!(match_basic_194, r"^.+!([^!]+!)([^!]+)$", r"foo!bar!bas", Some((0, 11)), Some((4, 8)), Some((8, 11)))
+mat!(match_basic_195, r"((foo)|(bar))!bas", r"bar!bas", Some((0, 7)), Some((0, 3)), None, Some((0, 3)))
+mat!(match_basic_196, r"((foo)|(bar))!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)), None, Some((4, 7)))
+mat!(match_basic_197, r"((foo)|(bar))!bas", r"foo!bas", Some((0, 7)), Some((0, 3)), Some((0, 3)))
+mat!(match_basic_198, r"((foo)|bar)!bas", r"bar!bas", Some((0, 7)), Some((0, 3)))
+mat!(match_basic_199, r"((foo)|bar)!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)))
+mat!(match_basic_200, r"((foo)|bar)!bas", r"foo!bas", Some((0, 7)), Some((0, 3)), Some((0, 3)))
+mat!(match_basic_201, r"(foo|(bar))!bas", r"bar!bas", Some((0, 7)), Some((0, 3)), Some((0, 3)))
+mat!(match_basic_202, r"(foo|(bar))!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)), Some((4, 7)))
+mat!(match_basic_203, r"(foo|(bar))!bas", r"foo!bas", Some((0, 7)), Some((0, 3)))
+mat!(match_basic_204, r"(foo|bar)!bas", r"bar!bas", Some((0, 7)), Some((0, 3)))
+mat!(match_basic_205, r"(foo|bar)!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)))
+mat!(match_basic_206, r"(foo|bar)!bas", r"foo!bas", Some((0, 7)), Some((0, 3)))
+mat!(match_basic_207, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bar!bas", Some((0, 11)), Some((0, 11)), None, None, Some((4, 8)), Some((8, 11)))
+mat!(match_basic_208, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"bas", Some((0, 3)), None, Some((0, 3)))
+mat!(match_basic_209, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"bar!bas", Some((0, 7)), Some((0, 4)), Some((4, 7)))
+mat!(match_basic_210, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"foo!bar!bas", Some((0, 11)), None, None, Some((4, 8)), Some((8, 11)))
+mat!(match_basic_211, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"foo!bas", Some((0, 7)), Some((0, 4)), Some((4, 7)))
+mat!(match_basic_212, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"bas", Some((0, 3)), Some((0, 3)), None, Some((0, 3)))
+mat!(match_basic_213, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"bar!bas", Some((0, 7)), Some((0, 7)), Some((0, 4)), Some((4, 7)))
+mat!(match_basic_214, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bar!bas", Some((0, 11)), Some((0, 11)), None, None, Some((4, 8)), Some((8, 11)))
+mat!(match_basic_215, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bas", Some((0, 7)), Some((0, 7)), Some((0, 4)), Some((4, 7)))
+mat!(match_basic_216, r".*(/XXX).*", r"/XXX", Some((0, 4)), Some((0, 4)))
+mat!(match_basic_217, r".*(\\XXX).*", r"\XXX", Some((0, 4)), Some((0, 4)))
+mat!(match_basic_218, r"\\XXX", r"\XXX", Some((0, 4)))
+mat!(match_basic_219, r".*(/000).*", r"/000", Some((0, 4)), Some((0, 4)))
+mat!(match_basic_220, r".*(\\000).*", r"\000", Some((0, 4)), Some((0, 4)))
+mat!(match_basic_221, r"\\000", r"\000", Some((0, 4)))
+
+// Tests from nullsubexpr.dat
+mat!(match_nullsubexpr_3, r"(a*)*", r"a", Some((0, 1)), Some((0, 1)))
+mat!(match_nullsubexpr_5, r"(a*)*", r"x", Some((0, 0)), None)
+mat!(match_nullsubexpr_6, r"(a*)*", r"aaaaaa", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_7, r"(a*)*", r"aaaaaax", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_8, r"(a*)+", r"a", Some((0, 1)), Some((0, 1)))
+mat!(match_nullsubexpr_9, r"(a*)+", r"x", Some((0, 0)), Some((0, 0)))
+mat!(match_nullsubexpr_10, r"(a*)+", r"aaaaaa", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_11, r"(a*)+", r"aaaaaax", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_12, r"(a+)*", r"a", Some((0, 1)), Some((0, 1)))
+mat!(match_nullsubexpr_13, r"(a+)*", r"x", Some((0, 0)))
+mat!(match_nullsubexpr_14, r"(a+)*", r"aaaaaa", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_15, r"(a+)*", r"aaaaaax", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_16, r"(a+)+", r"a", Some((0, 1)), Some((0, 1)))
+mat!(match_nullsubexpr_17, r"(a+)+", r"x", None)
+mat!(match_nullsubexpr_18, r"(a+)+", r"aaaaaa", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_19, r"(a+)+", r"aaaaaax", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_21, r"([a]*)*", r"a", Some((0, 1)), Some((0, 1)))
+mat!(match_nullsubexpr_23, r"([a]*)*", r"x", Some((0, 0)), None)
+mat!(match_nullsubexpr_24, r"([a]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_25, r"([a]*)*", r"aaaaaax", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_26, r"([a]*)+", r"a", Some((0, 1)), Some((0, 1)))
+mat!(match_nullsubexpr_27, r"([a]*)+", r"x", Some((0, 0)), Some((0, 0)))
+mat!(match_nullsubexpr_28, r"([a]*)+", r"aaaaaa", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_29, r"([a]*)+", r"aaaaaax", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_30, r"([^b]*)*", r"a", Some((0, 1)), Some((0, 1)))
+mat!(match_nullsubexpr_32, r"([^b]*)*", r"b", Some((0, 0)), None)
+mat!(match_nullsubexpr_33, r"([^b]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_34, r"([^b]*)*", r"aaaaaab", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_35, r"([ab]*)*", r"a", Some((0, 1)), Some((0, 1)))
+mat!(match_nullsubexpr_36, r"([ab]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_37, r"([ab]*)*", r"ababab", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_38, r"([ab]*)*", r"bababa", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_39, r"([ab]*)*", r"b", Some((0, 1)), Some((0, 1)))
+mat!(match_nullsubexpr_40, r"([ab]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_41, r"([ab]*)*", r"aaaabcde", Some((0, 5)), Some((0, 5)))
+mat!(match_nullsubexpr_42, r"([^a]*)*", r"b", Some((0, 1)), Some((0, 1)))
+mat!(match_nullsubexpr_43, r"([^a]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_45, r"([^a]*)*", r"aaaaaa", Some((0, 0)), None)
+mat!(match_nullsubexpr_46, r"([^ab]*)*", r"ccccxx", Some((0, 6)), Some((0, 6)))
+mat!(match_nullsubexpr_48, r"([^ab]*)*", r"ababab", Some((0, 0)), None)
+mat!(match_nullsubexpr_50, r"((z)+|a)*", r"zabcde", Some((0, 2)), Some((1, 2)))
+mat!(match_nullsubexpr_69, r"(a*)*(x)", r"x", Some((0, 1)), None, Some((0, 1)))
+mat!(match_nullsubexpr_70, r"(a*)*(x)", r"ax", Some((0, 2)), Some((0, 1)), Some((1, 2)))
+mat!(match_nullsubexpr_71, r"(a*)*(x)", r"axa", Some((0, 2)), Some((0, 1)), Some((1, 2)))
+mat!(match_nullsubexpr_73, r"(a*)+(x)", r"x", Some((0, 1)), Some((0, 0)), Some((0, 1)))
+mat!(match_nullsubexpr_74, r"(a*)+(x)", r"ax", Some((0, 2)), Some((0, 1)), Some((1, 2)))
+mat!(match_nullsubexpr_75, r"(a*)+(x)", r"axa", Some((0, 2)), Some((0, 1)), Some((1, 2)))
+mat!(match_nullsubexpr_77, r"(a*){2}(x)", r"x", Some((0, 1)), Some((0, 0)), Some((0, 1)))
+mat!(match_nullsubexpr_78, r"(a*){2}(x)", r"ax", Some((0, 2)), Some((1, 1)), Some((1, 2)))
+mat!(match_nullsubexpr_79, r"(a*){2}(x)", r"axa", Some((0, 2)), Some((1, 1)), Some((1, 2)))
+
+// Tests from repetition.dat
+mat!(match_repetition_10, r"((..)|(.))", r"", None)
+mat!(match_repetition_11, r"((..)|(.))((..)|(.))", r"", None)
+mat!(match_repetition_12, r"((..)|(.))((..)|(.))((..)|(.))", r"", None)
+mat!(match_repetition_14, r"((..)|(.)){1}", r"", None)
+mat!(match_repetition_15, r"((..)|(.)){2}", r"", None)
+mat!(match_repetition_16, r"((..)|(.)){3}", r"", None)
+mat!(match_repetition_18, r"((..)|(.))*", r"", Some((0, 0)))
+mat!(match_repetition_20, r"((..)|(.))", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1)))
+mat!(match_repetition_21, r"((..)|(.))((..)|(.))", r"a", None)
+mat!(match_repetition_22, r"((..)|(.))((..)|(.))((..)|(.))", r"a", None)
+mat!(match_repetition_24, r"((..)|(.)){1}", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1)))
+mat!(match_repetition_25, r"((..)|(.)){2}", r"a", None)
+mat!(match_repetition_26, r"((..)|(.)){3}", r"a", None)
+mat!(match_repetition_28, r"((..)|(.))*", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1)))
+mat!(match_repetition_30, r"((..)|(.))", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None)
+mat!(match_repetition_31, r"((..)|(.))((..)|(.))", r"aa", Some((0, 2)), Some((0, 1)), None, Some((0, 1)), Some((1, 2)), None, Some((1, 2)))
+mat!(match_repetition_32, r"((..)|(.))((..)|(.))((..)|(.))", r"aa", None)
+mat!(match_repetition_34, r"((..)|(.)){1}", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None)
+mat!(match_repetition_35, r"((..)|(.)){2}", r"aa", Some((0, 2)), Some((1, 2)), None, Some((1, 2)))
+mat!(match_repetition_36, r"((..)|(.)){3}", r"aa", None)
+mat!(match_repetition_38, r"((..)|(.))*", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None)
+mat!(match_repetition_40, r"((..)|(.))", r"aaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None)
+mat!(match_repetition_41, r"((..)|(.))((..)|(.))", r"aaa", Some((0, 3)), Some((0, 2)), Some((0, 2)), None, Some((2, 3)), None, Some((2, 3)))
+mat!(match_repetition_42, r"((..)|(.))((..)|(.))((..)|(.))", r"aaa", Some((0, 3)), Some((0, 1)), None, Some((0, 1)), Some((1, 2)), None, Some((1, 2)), Some((2, 3)), None, Some((2, 3)))
+mat!(match_repetition_44, r"((..)|(.)){1}", r"aaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None)
+mat!(match_repetition_46, r"((..)|(.)){2}", r"aaa", Some((0, 3)), Some((2, 3)), Some((0, 2)), Some((2, 3)))
+mat!(match_repetition_47, r"((..)|(.)){3}", r"aaa", Some((0, 3)), Some((2, 3)), None, Some((2, 3)))
+mat!(match_repetition_50, r"((..)|(.))*", r"aaa", Some((0, 3)), Some((2, 3)), Some((0, 2)), Some((2, 3)))
+mat!(match_repetition_52, r"((..)|(.))", r"aaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None)
+mat!(match_repetition_53, r"((..)|(.))((..)|(.))", r"aaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None)
+mat!(match_repetition_54, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 3)), None, Some((2, 3)), Some((3, 4)), None, Some((3, 4)))
+mat!(match_repetition_56, r"((..)|(.)){1}", r"aaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None)
+mat!(match_repetition_57, r"((..)|(.)){2}", r"aaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None)
+mat!(match_repetition_59, r"((..)|(.)){3}", r"aaaa", Some((0, 4)), Some((3, 4)), Some((0, 2)), Some((3, 4)))
+mat!(match_repetition_61, r"((..)|(.))*", r"aaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None)
+mat!(match_repetition_63, r"((..)|(.))", r"aaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None)
+mat!(match_repetition_64, r"((..)|(.))((..)|(.))", r"aaaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None)
+mat!(match_repetition_65, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaaa", Some((0, 5)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None, Some((4, 5)), None, Some((4, 5)))
+mat!(match_repetition_67, r"((..)|(.)){1}", r"aaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None)
+mat!(match_repetition_68, r"((..)|(.)){2}", r"aaaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None)
+mat!(match_repetition_70, r"((..)|(.)){3}", r"aaaaa", Some((0, 5)), Some((4, 5)), Some((2, 4)), Some((4, 5)))
+mat!(match_repetition_73, r"((..)|(.))*", r"aaaaa", Some((0, 5)), Some((4, 5)), Some((2, 4)), Some((4, 5)))
+mat!(match_repetition_75, r"((..)|(.))", r"aaaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None)
+mat!(match_repetition_76, r"((..)|(.))((..)|(.))", r"aaaaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None)
+mat!(match_repetition_77, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaaaa", Some((0, 6)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None, Some((4, 6)), Some((4, 6)), None)
+mat!(match_repetition_79, r"((..)|(.)){1}", r"aaaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None)
+mat!(match_repetition_80, r"((..)|(.)){2}", r"aaaaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None)
+mat!(match_repetition_81, r"((..)|(.)){3}", r"aaaaaa", Some((0, 6)), Some((4, 6)), Some((4, 6)), None)
+mat!(match_repetition_83, r"((..)|(.))*", r"aaaaaa", Some((0, 6)), Some((4, 6)), Some((4, 6)), None)
+mat!(match_repetition_90, r"X(.?){0,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)))
+mat!(match_repetition_91, r"X(.?){1,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)))
+mat!(match_repetition_92, r"X(.?){2,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)))
+mat!(match_repetition_93, r"X(.?){3,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)))
+mat!(match_repetition_94, r"X(.?){4,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)))
+mat!(match_repetition_95, r"X(.?){5,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)))
+mat!(match_repetition_96, r"X(.?){6,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)))
+mat!(match_repetition_97, r"X(.?){7,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)))
+mat!(match_repetition_98, r"X(.?){8,}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)))
+mat!(match_repetition_100, r"X(.?){0,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)))
+mat!(match_repetition_102, r"X(.?){1,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)))
+mat!(match_repetition_104, r"X(.?){2,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)))
+mat!(match_repetition_106, r"X(.?){3,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)))
+mat!(match_repetition_108, r"X(.?){4,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)))
+mat!(match_repetition_110, r"X(.?){5,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)))
+mat!(match_repetition_112, r"X(.?){6,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)))
+mat!(match_repetition_114, r"X(.?){7,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)))
+mat!(match_repetition_115, r"X(.?){8,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)))
+mat!(match_repetition_126, r"(a|ab|c|bcd){0,}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1)))
+mat!(match_repetition_127, r"(a|ab|c|bcd){1,}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1)))
+mat!(match_repetition_128, r"(a|ab|c|bcd){2,}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6)))
+mat!(match_repetition_129, r"(a|ab|c|bcd){3,}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6)))
+mat!(match_repetition_130, r"(a|ab|c|bcd){4,}(d*)", r"ababcd", None)
+mat!(match_repetition_131, r"(a|ab|c|bcd){0,10}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1)))
+mat!(match_repetition_132, r"(a|ab|c|bcd){1,10}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1)))
+mat!(match_repetition_133, r"(a|ab|c|bcd){2,10}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6)))
+mat!(match_repetition_134, r"(a|ab|c|bcd){3,10}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6)))
+mat!(match_repetition_135, r"(a|ab|c|bcd){4,10}(d*)", r"ababcd", None)
+mat!(match_repetition_136, r"(a|ab|c|bcd)*(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1)))
+mat!(match_repetition_137, r"(a|ab|c|bcd)+(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1)))
+mat!(match_repetition_143, r"(ab|a|c|bcd){0,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)))
+mat!(match_repetition_145, r"(ab|a|c|bcd){1,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)))
+mat!(match_repetition_147, r"(ab|a|c|bcd){2,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)))
+mat!(match_repetition_149, r"(ab|a|c|bcd){3,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)))
+mat!(match_repetition_150, r"(ab|a|c|bcd){4,}(d*)", r"ababcd", None)
+mat!(match_repetition_152, r"(ab|a|c|bcd){0,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)))
+mat!(match_repetition_154, r"(ab|a|c|bcd){1,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)))
+mat!(match_repetition_156, r"(ab|a|c|bcd){2,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)))
+mat!(match_repetition_158, r"(ab|a|c|bcd){3,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)))
+mat!(match_repetition_159, r"(ab|a|c|bcd){4,10}(d*)", r"ababcd", None)
+mat!(match_repetition_161, r"(ab|a|c|bcd)*(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)))
+mat!(match_repetition_163, r"(ab|a|c|bcd)+(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)))
+
--- a/src/libregex/test/mod.rs
+++ b/src/libregex/test/mod.rs
@ -0,0 +1,29 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#[cfg(not(stage1))]
+#[phase(syntax)]
+extern crate regex_macros;
+
+// Dirty hack: During stage1, test dynamic regexs. For stage2, we test
+// native regexs.
+#[cfg(stage1)]
+macro_rules! regex(
+    ($re:expr) => (
+        match ::regex::Regex::new($re) {
+            Ok(re) => re,
+            Err(err) => fail!("{}", err),
+        }
+    );
+)
+
+mod bench;
+mod tests;
+
--- a/src/libregex/test/tests.rs
+++ b/src/libregex/test/tests.rs
@ -0,0 +1,199 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// ignore-tidy-linelength
+
+use regex::{Regex, NoExpand};
+
+#[test]
+fn splitn() {
+    let re = regex!(r"\d+");
+    let text = "cauchy123plato456tyler789binx";
+    let subs: Vec<&str> = re.splitn(text, 2).collect();
+    assert_eq!(subs, vec!("cauchy", "plato456tyler789binx"));
+}
+
+#[test]
+fn split() {
+    let re = regex!(r"\d+");
+    let text = "cauchy123plato456tyler789binx";
+    let subs: Vec<&str> = re.split(text).collect();
+    assert_eq!(subs, vec!("cauchy", "plato", "tyler", "binx"));
+}
+
+macro_rules! replace(
+    ($name:ident, $which:ident, $re:expr,
+     $search:expr, $replace:expr, $result:expr) => (
+        #[test]
+        fn $name() {
+            let re = regex!($re);
+            assert_eq!(re.$which($search, $replace), StrBuf::from_str($result));
+        }
+    );
+)
+
+replace!(rep_first, replace, r"\d", "age: 26", "Z", "age: Z6")
+replace!(rep_plus, replace, r"\d+", "age: 26", "Z", "age: Z")
+replace!(rep_all, replace_all, r"\d", "age: 26", "Z", "age: ZZ")
+replace!(rep_groups, replace, r"(\S+)\s+(\S+)", "w1 w2", "$2 $1", "w2 w1")
+replace!(rep_double_dollar, replace,
+         r"(\S+)\s+(\S+)", "w1 w2", "$2 $$1", "w2 $1")
+replace!(rep_no_expand, replace,
+         r"(\S+)\s+(\S+)", "w1 w2", NoExpand("$2 $1"), "$2 $1")
+replace!(rep_named, replace_all,
+         r"(?P<first>\S+)\s+(?P<last>\S+)(?P<space>\s*)",
+         "w1 w2 w3 w4", "$last $first$space", "w2 w1 w4 w3")
+replace!(rep_trim, replace_all, "^[ \t]+|[ \t]+$", " \t  trim me\t   \t",
+         "", "trim me")
+
+macro_rules! noparse(
+    ($name:ident, $re:expr) => (
+        #[test]
+        fn $name() {
+            let re = $re;
+            match Regex::new(re) {
+                Err(_) => {},
+                Ok(_) => fail!("Regex '{}' should cause a parse error.", re),
+            }
+        }
+    );
+)
+
+noparse!(fail_double_repeat, "a**")
+noparse!(fail_no_repeat_arg, "*")
+noparse!(fail_no_repeat_arg_begin, "^*")
+noparse!(fail_incomplete_escape, "\\")
+noparse!(fail_class_incomplete, "[A-")
+noparse!(fail_class_not_closed, "[A")
+noparse!(fail_class_no_begin, r"[\A]")
+noparse!(fail_class_no_end, r"[\z]")
+noparse!(fail_class_no_boundary, r"[\b]")
+noparse!(fail_open_paren, "(")
+noparse!(fail_close_paren, ")")
+noparse!(fail_invalid_range, "[a-Z]")
+noparse!(fail_empty_capture_name, "(?P<>a)")
+noparse!(fail_empty_capture_exp, "(?P<name>)")
+noparse!(fail_bad_capture_name, "(?P<na-me>)")
+noparse!(fail_bad_flag, "(?a)a")
+noparse!(fail_empty_alt_before, "|a")
+noparse!(fail_empty_alt_after, "a|")
+noparse!(fail_counted_big_exact, "a{1001}")
+noparse!(fail_counted_big_min, "a{1001,}")
+noparse!(fail_counted_no_close, "a{1001")
+noparse!(fail_unfinished_cap, "(?")
+noparse!(fail_unfinished_escape, "\\")
+noparse!(fail_octal_digit, r"\8")
+noparse!(fail_hex_digit, r"\xG0")
+noparse!(fail_hex_short, r"\xF")
+noparse!(fail_hex_long_digits, r"\x{fffg}")
+noparse!(fail_flag_bad, "(?a)")
+noparse!(fail_flag_empty, "(?)")
+noparse!(fail_double_neg, "(?-i-i)")
+noparse!(fail_neg_empty, "(?i-)")
+noparse!(fail_empty_group, "()")
+noparse!(fail_dupe_named, "(?P<a>.)(?P<a>.)")
+
+macro_rules! mat(
+    ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (
+        #[test]
+        fn $name() {
+            let text = $text;
+            let expected: Vec<Option<(uint, uint)>> = vec!($($loc)+);
+            let r = regex!($re);
+            let got = match r.captures(text) {
+                Some(c) => c.iter_pos().collect::<Vec<Option<(uint, uint)>>>(),
+                None => vec!(None),
+            };
+            // The test set sometimes leave out capture groups, so truncate
+            // actual capture groups to match test set.
+            let (sexpect, mut sgot) = (expected.as_slice(), got.as_slice());
+            if sgot.len() > sexpect.len() {
+                sgot = sgot.slice(0, sexpect.len())
+            }
+            if sexpect != sgot {
+                fail!("For RE '{}' against '{}', expected '{}' but got '{}'",
+                      $re, text, sexpect, sgot);
+            }
+        }
+    );
+)
+
+// Some crazy expressions from regular-expressions.info.
+mat!(match_ranges,
+     r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
+     "num: 255", Some((5, 8)))
+mat!(match_ranges_not,
+     r"\b(?:[0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\b",
+     "num: 256", None)
+mat!(match_float1, r"[-+]?[0-9]*\.?[0-9]+", "0.1", Some((0, 3)))
+mat!(match_float2, r"[-+]?[0-9]*\.?[0-9]+", "0.1.2", Some((0, 3)))
+mat!(match_float3, r"[-+]?[0-9]*\.?[0-9]+", "a1.2", Some((1, 4)))
+mat!(match_float4, r"^[-+]?[0-9]*\.?[0-9]+$", "1.a", None)
+mat!(match_email, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
+     "mine is jam.slam@gmail.com ", Some((8, 26)))
+mat!(match_email_not, r"(?i)\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}\b",
+     "mine is jam.slam@gmail ", None)
+mat!(match_email_big, r"[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?",
+     "mine is jam.slam@gmail.com ", Some((8, 26)))
+mat!(match_date1,
+     r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
+     "1900-01-01", Some((0, 10)))
+mat!(match_date2,
+     r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
+     "1900-00-01", None)
+mat!(match_date3,
+     r"^(19|20)\d\d[- /.](0[1-9]|1[012])[- /.](0[1-9]|[12][0-9]|3[01])$",
+     "1900-13-01", None)
+
+// Exercise the flags.
+mat!(match_flag_case, "(?i)abc", "ABC", Some((0, 3)))
+mat!(match_flag_weird_case, "(?i)a(?-i)bc", "Abc", Some((0, 3)))
+mat!(match_flag_weird_case_not, "(?i)a(?-i)bc", "ABC", None)
+mat!(match_flag_case_dotnl, "(?is)a.", "A\n", Some((0, 2)))
+mat!(match_flag_case_dotnl_toggle, "(?is)a.(?-is)a.", "A\nab", Some((0, 4)))
+mat!(match_flag_case_dotnl_toggle_not, "(?is)a.(?-is)a.", "A\na\n", None)
+mat!(match_flag_case_dotnl_toggle_ok, "(?is)a.(?-is:a.)?", "A\na\n", Some((0, 2)))
+mat!(match_flag_multi, "(?m)(?:^\\d+$\n?)+", "123\n456\n789", Some((0, 11)))
+mat!(match_flag_ungreedy, "(?U)a+", "aa", Some((0, 1)))
+mat!(match_flag_ungreedy_greedy, "(?U)a+?", "aa", Some((0, 2)))
+mat!(match_flag_ungreedy_noop, "(?U)(?-U)a+", "aa", Some((0, 2)))
+
+// Some Unicode tests.
+mat!(uni_literal, r"Ⅰ", "Ⅰ", Some((0, 3)))
+mat!(uni_one, r"\pN", "Ⅰ", Some((0, 3)))
+mat!(uni_mixed, r"\pN+", "Ⅰ1Ⅱ2", Some((0, 8)))
+mat!(uni_not, r"\PN+", "abⅠ", Some((0, 2)))
+mat!(uni_not_class, r"[\PN]+", "abⅠ", Some((0, 2)))
+mat!(uni_not_class_neg, r"[^\PN]+", "abⅠ", Some((2, 5)))
+mat!(uni_case, r"(?i)Δ", "δ", Some((0, 2)))
+mat!(uni_case_not, r"Δ", "δ", None)
+mat!(uni_case_upper, r"\p{Lu}+", "ΛΘΓΔα", Some((0, 8)))
+mat!(uni_case_upper_nocase_flag, r"(?i)\p{Lu}+", "ΛΘΓΔα", Some((0, 10)))
+mat!(uni_case_upper_nocase, r"\p{L}+", "ΛΘΓΔα", Some((0, 10)))
+mat!(uni_case_lower, r"\p{Ll}+", "ΛΘΓΔα", Some((8, 10)))
+
+// Test the Unicode friendliness of Perl character classes.
+mat!(uni_perl_w, r"\w+", "dδd", Some((0, 4)))
+mat!(uni_perl_w_not, r"\w+", "Ⅱ", None)
+mat!(uni_perl_w_neg, r"\W+", "Ⅱ", Some((0, 3)))
+mat!(uni_perl_d, r"\d+", "1२३9", Some((0, 8)))
+mat!(uni_perl_d_not, r"\d+", "Ⅱ", None)
+mat!(uni_perl_d_neg, r"\D+", "Ⅱ", Some((0, 3)))
+mat!(uni_perl_s, r"\s+", " ", Some((0, 3)))
+mat!(uni_perl_s_not, r"\s+", "☃", None)
+mat!(uni_perl_s_neg, r"\S+", "☃", Some((0, 3)))
+
+// And do the same for word boundaries.
+mat!(uni_boundary_none, r"\d\b", "6δ", None)
+mat!(uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1)))
+
+// A whole mess of tests from Glenn Fowler's regex test suite.
+// Generated by the 'src/etc/regex-match-tests' program.
+mod matches;
--- a/src/libregex/testdata/LICENSE
+++ b/src/libregex/testdata/LICENSE
@ -0,0 +1,19 @@
+The following license covers testregex.c and all associated test data.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of the
+Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following disclaimer:
+
+THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/src/libregex/testdata/README
+++ b/src/libregex/testdata/README
@ -0,0 +1,17 @@
+Test data was taken from the Go distribution, which was in turn taken from the 
+testregex test suite:
+
+  http://www2.research.att.com/~astopen/testregex/testregex.html
+
+The LICENSE in this directory corresponds to the LICENSE that the data was
+released under.
+
+The tests themselves were modified for RE2/Go. A couple were modified further 
+by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them. 
+(Yes, it seems like RE2/Go includes failing test cases.) This may or may not 
+have been a bad idea, but I think being consistent with an established Regex 
+library is worth something.
+
+Note that these files are read by 'src/etc/regexp-match-tests' and turned into 
+Rust tests found in 'src/libregexp/tests/matches.rs'.
+
--- a/src/libregex/testdata/basic.dat
+++ b/src/libregex/testdata/basic.dat
@ -0,0 +1,221 @@
+NOTE	all standard compliant implementations should pass these : 2002-05-31
+
+BE	abracadabra$	abracadabracadabra	(7,18)
+BE	a...b		abababbb		(2,7)
+BE	XXXXXX		..XXXXXX		(2,8)
+E	\)		()	(1,2)
+BE	a]		a]a	(0,2)
+B	}		}	(0,1)
+E	\}		}	(0,1)
+BE	\]		]	(0,1)
+B	]		]	(0,1)
+E	]		]	(0,1)
+B	{		{	(0,1)
+B	}		}	(0,1)
+BE	^a		ax	(0,1)
+BE	\^a		a^a	(1,3)
+BE	a\^		a^	(0,2)
+BE	a$		aa	(1,2)
+BE	a\$		a$	(0,2)
+BE	^$		NULL	(0,0)
+E	$^		NULL	(0,0)
+E	a($)		aa	(1,2)(2,2)
+E	a*(^a)		aa	(0,1)(0,1)
+E	(..)*(...)*		a	(0,0)
+E	(..)*(...)*		abcd	(0,4)(2,4)
+E	(ab|a)(bc|c)		abc	(0,3)(0,2)(2,3)
+E	(ab)c|abc		abc	(0,3)(0,2)
+E	a{0}b		ab			(1,2)
+E	(a*)(b?)(b+)b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
+E	(a*)(b{0,1})(b{1,})b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
+E	a{9876543210}	NULL	BADBR
+E	((a|a)|a)			a	(0,1)(0,1)(0,1)
+E	(a*)(a|aa)			aaaa	(0,4)(0,3)(3,4)
+E	a*(a.|aa)			aaaa	(0,4)(2,4)
+E	a(b)|c(d)|a(e)f			aef	(0,3)(?,?)(?,?)(1,2)
+E	(a|b)?.*			b	(0,1)(0,1)
+E	(a|b)c|a(b|c)			ac	(0,2)(0,1)
+E	(a|b)c|a(b|c)			ab	(0,2)(?,?)(1,2)
+E	(a|b)*c|(a|ab)*c		abc	(0,3)(1,2)
+E	(a|b)*c|(a|ab)*c		xc	(1,2)
+E	(.a|.b).*|.*(.a|.b)		xa	(0,2)(0,2)
+E	a?(ab|ba)ab			abab	(0,4)(0,2)
+E	a?(ac{0}b|ba)ab			abab	(0,4)(0,2)
+E	ab|abab				abbabab	(0,2)
+E	aba|bab|bba			baaabbbaba	(5,8)
+E	aba|bab				baaabbbaba	(6,9)
+E	(aa|aaa)*|(a|aaaaa)		aa	(0,2)(0,2)
+E	(a.|.a.)*|(a|.a...)		aa	(0,2)(0,2)
+E	ab|a				xabc	(1,3)
+E	ab|a				xxabc	(2,4)
+Ei	(Ab|cD)*			aBcD	(0,4)(2,4)
+BE	[^-]			--a		(2,3)
+BE	[a-]*			--a		(0,3)
+BE	[a-m-]*			--amoma--	(0,4)
+E	:::1:::0:|:::1:1:0:	:::0:::1:::1:::0:	(8,17)
+E	:::1:::0:|:::1:1:1:	:::0:::1:::1:::0:	(8,17)
+{E	[[:upper:]]		A		(0,1)	[[<element>]] not supported
+E	[[:lower:]]+		`az{		(1,3)
+E	[[:upper:]]+		@AZ[		(1,3)
+# No collation in Go
+#BE	[[-]]			[[-]]		(2,4)
+#BE	[[.NIL.]]	NULL	ECOLLATE
+#BE	[[=aleph=]]	NULL	ECOLLATE
+}
+BE$	\n		\n	(0,1)
+BEn$	\n		\n	(0,1)
+BE$	[^a]		\n	(0,1)
+BE$	\na		\na	(0,2)
+E	(a)(b)(c)	abc	(0,3)(0,1)(1,2)(2,3)
+BE	xxx		xxx	(0,3)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 6,	(0,6)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	2/7	(0,3)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 1,Feb 6	(5,11)
+E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))	x	(0,1)(0,1)(0,1)
+E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*	xx	(0,2)(1,2)(1,2)
+E	a?(ab|ba)*	ababababababababababababababababababababababababababababababababababababababababa	(0,81)(79,81)
+E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabbbbaa	(18,25)
+E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabaa	(18,22)
+E	aaac|aabc|abac|abbc|baac|babc|bbac|bbbc	baaabbbabac	(7,11)
+BE$	.*			\x01\x7f	(0,2)
+E	aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	(53,57)
+L	aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	NOMATCH
+E	a*a*a*a*a*b		aaaaaaaaab	(0,10)
+BE	^			NULL		(0,0)
+BE	$			NULL		(0,0)
+BE	^$			NULL		(0,0)
+BE	^a$			a		(0,1)
+BE	abc			abc		(0,3)
+BE	abc			xabcy		(1,4)
+BE	abc			ababc		(2,5)
+BE	ab*c			abc		(0,3)
+BE	ab*bc			abc		(0,3)
+BE	ab*bc			abbc		(0,4)
+BE	ab*bc			abbbbc		(0,6)
+E	ab+bc			abbc		(0,4)
+E	ab+bc			abbbbc		(0,6)
+E	ab?bc			abbc		(0,4)
+E	ab?bc			abc		(0,3)
+E	ab?c			abc		(0,3)
+BE	^abc$			abc		(0,3)
+BE	^abc			abcc		(0,3)
+BE	abc$			aabc		(1,4)
+BE	^			abc		(0,0)
+BE	$			abc		(3,3)
+BE	a.c			abc		(0,3)
+BE	a.c			axc		(0,3)
+BE	a.*c			axyzc		(0,5)
+BE	a[bc]d			abd		(0,3)
+BE	a[b-d]e			ace		(0,3)
+BE	a[b-d]			aac		(1,3)
+BE	a[-b]			a-		(0,2)
+BE	a[b-]			a-		(0,2)
+BE	a]			a]		(0,2)
+BE	a[]]b			a]b		(0,3)
+BE	a[^bc]d			aed		(0,3)
+BE	a[^-b]c			adc		(0,3)
+BE	a[^]b]c			adc		(0,3)
+E	ab|cd			abc		(0,2)
+E	ab|cd			abcd		(0,2)
+E	a\(b			a(b		(0,3)
+E	a\(*b			ab		(0,2)
+E	a\(*b			a((b		(0,4)
+E	((a))			abc		(0,1)(0,1)(0,1)
+E	(a)b(c)			abc		(0,3)(0,1)(2,3)
+E	a+b+c			aabbabc		(4,7)
+E	a*			aaa		(0,3)
+#E	(a*)*			-		(0,0)(0,0)
+E	(a*)*			-		(0,0)(?,?)	RE2/Go
+E	(a*)+			-		(0,0)(0,0)
+#E	(a*|b)*			-		(0,0)(0,0)
+E	(a*|b)*			-		(0,0)(?,?)	RE2/Go
+E	(a+|b)*			ab		(0,2)(1,2)
+E	(a+|b)+			ab		(0,2)(1,2)
+E	(a+|b)?			ab		(0,1)(0,1)
+BE	[^ab]*			cde		(0,3)
+#E	(^)*			-		(0,0)(0,0)
+E	(^)*			-		(0,0)(?,?)	RE2/Go
+BE	a*			NULL		(0,0)
+E	([abc])*d		abbbcd		(0,6)(4,5)
+E	([abc])*bcd		abcd		(0,4)(0,1)
+E	a|b|c|d|e		e		(0,1)
+E	(a|b|c|d|e)f		ef		(0,2)(0,1)
+#E	((a*|b))*		-		(0,0)(0,0)(0,0)
+E	((a*|b))*		-		(0,0)(?,?)(?,?)	RE2/Go
+BE	abcd*efg		abcdefg		(0,7)
+BE	ab*			xabyabbbz	(1,3)
+BE	ab*			xayabbbz	(1,2)
+E	(ab|cd)e		abcde		(2,5)(2,4)
+BE	[abhgefdc]ij		hij		(0,3)
+E	(a|b)c*d		abcd		(1,4)(1,2)
+E	(ab|ab*)bc		abc		(0,3)(0,1)
+E	a([bc]*)c*		abc		(0,3)(1,3)
+E	a([bc]*)(c*d)		abcd		(0,4)(1,3)(3,4)
+E	a([bc]+)(c*d)		abcd		(0,4)(1,3)(3,4)
+E	a([bc]*)(c+d)		abcd		(0,4)(1,2)(2,4)
+E	a[bcd]*dcdcde		adcdcde		(0,7)
+E	(ab|a)b*c		abc		(0,3)(0,2)
+E	((a)(b)c)(d)		abcd		(0,4)(0,3)(0,1)(1,2)(3,4)
+BE	[A-Za-z_][A-Za-z0-9_]*	alpha		(0,5)
+E	^a(bc+|b[eh])g|.h$	abh		(1,3)
+E	(bc+d$|ef*g.|h?i(j|k))	effgz		(0,5)(0,5)
+E	(bc+d$|ef*g.|h?i(j|k))	ij		(0,2)(0,2)(1,2)
+E	(bc+d$|ef*g.|h?i(j|k))	reffgz		(1,6)(1,6)
+E	(((((((((a)))))))))	a		(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
+BE	multiple words		multiple words yeah	(0,14)
+E	(.*)c(.*)		abcde		(0,5)(0,2)(3,5)
+BE	abcd			abcd		(0,4)
+E	a(bc)d			abcd		(0,4)(1,3)
+E	a[-]?c		ac		(0,3)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qaddafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mo'ammar Gadhafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Kaddafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qadhafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gadafi	(0,14)(?,?)(10,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadafi	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moamar Gaddafi	(0,14)(?,?)(9,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadhdhafi	(0,18)(?,?)(13,15)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Khaddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafy	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghadafi	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muamar Kaddafi	(0,14)(?,?)(9,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Quathafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gheddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Khadafy	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Qudhafi	(0,15)(?,?)(10,12)
+E	a+(b|c)*d+		aabcdd			(0,6)(3,4)
+E	^.+$			vivi			(0,4)
+E	^(.+)$			vivi			(0,4)(0,4)
+E	^([^!.]+).att.com!(.+)$	gryphon.att.com!eby	(0,19)(0,7)(16,19)
+E	^([^!]+!)?([^!]+)$	bas			(0,3)(?,?)(0,3)
+E	^([^!]+!)?([^!]+)$	bar!bas			(0,7)(0,4)(4,7)
+E	^([^!]+!)?([^!]+)$	foo!bas			(0,7)(0,4)(4,7)
+E	^.+!([^!]+!)([^!]+)$	foo!bar!bas		(0,11)(4,8)(8,11)
+E	((foo)|(bar))!bas	bar!bas			(0,7)(0,3)(?,?)(0,3)
+E	((foo)|(bar))!bas	foo!bar!bas		(4,11)(4,7)(?,?)(4,7)
+E	((foo)|(bar))!bas	foo!bas			(0,7)(0,3)(0,3)
+E	((foo)|bar)!bas		bar!bas			(0,7)(0,3)
+E	((foo)|bar)!bas		foo!bar!bas		(4,11)(4,7)
+E	((foo)|bar)!bas		foo!bas			(0,7)(0,3)(0,3)
+E	(foo|(bar))!bas		bar!bas			(0,7)(0,3)(0,3)
+E	(foo|(bar))!bas		foo!bar!bas		(4,11)(4,7)(4,7)
+E	(foo|(bar))!bas		foo!bas			(0,7)(0,3)
+E	(foo|bar)!bas		bar!bas			(0,7)(0,3)
+E	(foo|bar)!bas		foo!bar!bas		(4,11)(4,7)
+E	(foo|bar)!bas		foo!bas			(0,7)(0,3)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bas		(0,3)(?,?)(0,3)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bar!bas		(0,7)(0,4)(4,7)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bar!bas	(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bas		(0,7)(0,4)(4,7)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bas		(0,3)(0,3)(?,?)(0,3)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bar!bas		(0,7)(0,7)(0,4)(4,7)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bas		(0,7)(0,7)(0,4)(4,7)
+E	.*(/XXX).*			/XXX			(0,4)(0,4)
+E	.*(\\XXX).*			\XXX			(0,4)(0,4)
+E	\\XXX				\XXX			(0,4)
+E	.*(/000).*			/000			(0,4)(0,4)
+E	.*(\\000).*			\000			(0,4)(0,4)
+E	\\000				\000			(0,4)
--- a/src/libregex/testdata/nullsubexpr.dat
+++ b/src/libregex/testdata/nullsubexpr.dat
@ -0,0 +1,79 @@
+NOTE	null subexpression matches : 2002-06-06
+
+E	(a*)*		a		(0,1)(0,1)
+#E	SAME		x		(0,0)(0,0)
+E	SAME		x		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a*)+		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a+)*		a		(0,1)(0,1)
+E	SAME		x		(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a+)+		a		(0,1)(0,1)
+E	SAME		x		NOMATCH
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+
+E	([a]*)*		a		(0,1)(0,1)
+#E	SAME		x		(0,0)(0,0)
+E	SAME		x		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	([a]*)+		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	([^b]*)*	a		(0,1)(0,1)
+#E	SAME		b		(0,0)(0,0)
+E	SAME		b		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaab		(0,6)(0,6)
+E	([ab]*)*	a		(0,1)(0,1)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		ababab		(0,6)(0,6)
+E	SAME		bababa		(0,6)(0,6)
+E	SAME		b		(0,1)(0,1)
+E	SAME		bbbbbb		(0,6)(0,6)
+E	SAME		aaaabcde	(0,5)(0,5)
+E	([^a]*)*	b		(0,1)(0,1)
+E	SAME		bbbbbb		(0,6)(0,6)
+#E	SAME		aaaaaa		(0,0)(0,0)
+E	SAME		aaaaaa		(0,0)(?,?)	RE2/Go
+E	([^ab]*)*	ccccxx		(0,6)(0,6)
+#E	SAME		ababab		(0,0)(0,0)
+E	SAME		ababab		(0,0)(?,?)	RE2/Go
+
+E	((z)+|a)*	zabcde		(0,2)(1,2)
+
+#{E	a+?		aaaaaa		(0,1)	no *? +? mimimal match ops
+#E	(a)		aaa		(0,1)(0,1)
+#E	(a*?)		aaa		(0,0)(0,0)
+#E	(a)*?		aaa		(0,0)
+#E	(a*?)*?		aaa		(0,0)
+#}
+
+B	\(a*\)*\(x\)		x	(0,1)(0,0)(0,1)
+B	\(a*\)*\(x\)		ax	(0,2)(0,1)(1,2)
+B	\(a*\)*\(x\)		axa	(0,2)(0,1)(1,2)
+B	\(a*\)*\(x\)\(\1\)	x	(0,1)(0,0)(0,1)(1,1)
+B	\(a*\)*\(x\)\(\1\)	ax	(0,2)(1,1)(1,2)(2,2)
+B	\(a*\)*\(x\)\(\1\)	axa	(0,3)(0,1)(1,2)(2,3)
+B	\(a*\)*\(x\)\(\1\)\(x\)	axax	(0,4)(0,1)(1,2)(2,3)(3,4)
+B	\(a*\)*\(x\)\(\1\)\(x\)	axxa	(0,3)(1,1)(1,2)(2,2)(2,3)
+
+#E	(a*)*(x)		x	(0,1)(0,0)(0,1)
+E	(a*)*(x)		x	(0,1)(?,?)(0,1)	RE2/Go
+E	(a*)*(x)		ax	(0,2)(0,1)(1,2)
+E	(a*)*(x)		axa	(0,2)(0,1)(1,2)
+
+E	(a*)+(x)		x	(0,1)(0,0)(0,1)
+E	(a*)+(x)		ax	(0,2)(0,1)(1,2)
+E	(a*)+(x)		axa	(0,2)(0,1)(1,2)
+
+E	(a*){2}(x)		x	(0,1)(0,0)(0,1)
+E	(a*){2}(x)		ax	(0,2)(1,1)(1,2)
+E	(a*){2}(x)		axa	(0,2)(1,1)(1,2)
--- a/src/libregex/testdata/repetition.dat
+++ b/src/libregex/testdata/repetition.dat
@ -0,0 +1,163 @@
+NOTE	implicit vs. explicit repetitions : 2009-02-02
+
+# Glenn Fowler <gsf@research.att.com>
+# conforming matches (column 4) must match one of the following BREs
+#	NOMATCH
+#	(0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
+#	(0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
+# i.e., each 3-tuple has two identical elements and one (?,?)
+
+E	((..)|(.))				NULL		NOMATCH
+E	((..)|(.))((..)|(.))			NULL		NOMATCH
+E	((..)|(.))((..)|(.))((..)|(.))		NULL		NOMATCH
+
+E	((..)|(.)){1}				NULL		NOMATCH
+E	((..)|(.)){2}				NULL		NOMATCH
+E	((..)|(.)){3}				NULL		NOMATCH
+
+E	((..)|(.))*				NULL		(0,0)
+
+E	((..)|(.))				a		(0,1)(0,1)(?,?)(0,1)
+E	((..)|(.))((..)|(.))			a		NOMATCH
+E	((..)|(.))((..)|(.))((..)|(.))		a		NOMATCH
+
+E	((..)|(.)){1}				a		(0,1)(0,1)(?,?)(0,1)
+E	((..)|(.)){2}				a		NOMATCH
+E	((..)|(.)){3}				a		NOMATCH
+
+E	((..)|(.))*				a		(0,1)(0,1)(?,?)(0,1)
+
+E	((..)|(.))				aa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aa		(0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
+E	((..)|(.))((..)|(.))((..)|(.))		aa		NOMATCH
+
+E	((..)|(.)){1}				aa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aa		(0,2)(1,2)(?,?)(1,2)
+E	((..)|(.)){3}				aa		NOMATCH
+
+E	((..)|(.))*				aa		(0,2)(0,2)(0,2)(?,?)
+
+E	((..)|(.))				aaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaa		(0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
+E	((..)|(.))((..)|(.))((..)|(.))		aaa		(0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
+
+E	((..)|(.)){1}				aaa		(0,2)(0,2)(0,2)(?,?)
+#E	((..)|(.)){2}				aaa		(0,3)(2,3)(?,?)(2,3)
+E	((..)|(.)){2}				aaa		(0,3)(2,3)(0,2)(2,3)	RE2/Go
+E	((..)|(.)){3}				aaa		(0,3)(2,3)(?,?)(2,3)
+
+#E	((..)|(.))*				aaa		(0,3)(2,3)(?,?)(2,3)
+E	((..)|(.))*				aaa		(0,3)(2,3)(0,2)(2,3)	RE2/Go
+
+E	((..)|(.))				aaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaa		(0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
+
+E	((..)|(.)){1}				aaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaa		(0,4)(2,4)(2,4)(?,?)
+#E	((..)|(.)){3}				aaaa		(0,4)(3,4)(?,?)(3,4)
+E	((..)|(.)){3}				aaaa		(0,4)(3,4)(0,2)(3,4)	RE2/Go
+
+E	((..)|(.))*				aaaa		(0,4)(2,4)(2,4)(?,?)
+
+E	((..)|(.))				aaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaaa		(0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
+
+E	((..)|(.)){1}				aaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaaa		(0,4)(2,4)(2,4)(?,?)
+#E	((..)|(.)){3}				aaaaa		(0,5)(4,5)(?,?)(4,5)
+E	((..)|(.)){3}				aaaaa		(0,5)(4,5)(2,4)(4,5)	RE2/Go
+
+#E	((..)|(.))*				aaaaa		(0,5)(4,5)(?,?)(4,5)
+E	((..)|(.))*				aaaaa		(0,5)(4,5)(2,4)(4,5)	RE2/Go
+
+E	((..)|(.))				aaaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaaaa		(0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
+
+E	((..)|(.)){1}				aaaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaaaa		(0,4)(2,4)(2,4)(?,?)
+E	((..)|(.)){3}				aaaaaa		(0,6)(4,6)(4,6)(?,?)
+
+E	((..)|(.))*				aaaaaa		(0,6)(4,6)(4,6)(?,?)
+
+NOTE	additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
+
+# These test a bug in OS X / FreeBSD / NetBSD, and libtree. 
+# Linux/GLIBC gets the {8,} and {8,8} wrong.
+
+:HA#100:E	X(.?){0,}Y	X1234567Y	(0,9)(7,8)
+:HA#101:E	X(.?){1,}Y	X1234567Y	(0,9)(7,8)
+:HA#102:E	X(.?){2,}Y	X1234567Y	(0,9)(7,8)
+:HA#103:E	X(.?){3,}Y	X1234567Y	(0,9)(7,8)
+:HA#104:E	X(.?){4,}Y	X1234567Y	(0,9)(7,8)
+:HA#105:E	X(.?){5,}Y	X1234567Y	(0,9)(7,8)
+:HA#106:E	X(.?){6,}Y	X1234567Y	(0,9)(7,8)
+:HA#107:E	X(.?){7,}Y	X1234567Y	(0,9)(7,8)
+:HA#108:E	X(.?){8,}Y	X1234567Y	(0,9)(8,8)
+#:HA#110:E	X(.?){0,8}Y	X1234567Y	(0,9)(7,8)
+:HA#110:E	X(.?){0,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#111:E	X(.?){1,8}Y	X1234567Y	(0,9)(7,8)
+:HA#111:E	X(.?){1,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#112:E	X(.?){2,8}Y	X1234567Y	(0,9)(7,8)
+:HA#112:E	X(.?){2,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#113:E	X(.?){3,8}Y	X1234567Y	(0,9)(7,8)
+:HA#113:E	X(.?){3,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#114:E	X(.?){4,8}Y	X1234567Y	(0,9)(7,8)
+:HA#114:E	X(.?){4,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#115:E	X(.?){5,8}Y	X1234567Y	(0,9)(7,8)
+:HA#115:E	X(.?){5,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#116:E	X(.?){6,8}Y	X1234567Y	(0,9)(7,8)
+:HA#116:E	X(.?){6,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#117:E	X(.?){7,8}Y	X1234567Y	(0,9)(7,8)
+:HA#117:E	X(.?){7,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+:HA#118:E	X(.?){8,8}Y	X1234567Y	(0,9)(8,8)
+
+# These test a fixed bug in my regex-tdfa that did not keep the expanded
+# form properly grouped, so right association did the wrong thing with
+# these ambiguous patterns (crafted just to test my code when I became
+# suspicious of my implementation).  The first subexpression should use
+# "ab" then "a" then "bcd".
+
+# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
+# results like (0,6)(4,5)(6,6).
+
+:HA#260:E	(a|ab|c|bcd){0,}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#261:E	(a|ab|c|bcd){1,}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#262:E	(a|ab|c|bcd){2,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#263:E	(a|ab|c|bcd){3,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#264:E	(a|ab|c|bcd){4,}(d*)	ababcd	NOMATCH
+:HA#265:E	(a|ab|c|bcd){0,10}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#266:E	(a|ab|c|bcd){1,10}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#267:E	(a|ab|c|bcd){2,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#268:E	(a|ab|c|bcd){3,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#269:E	(a|ab|c|bcd){4,10}(d*)	ababcd	NOMATCH
+:HA#270:E	(a|ab|c|bcd)*(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#271:E	(a|ab|c|bcd)+(d*)	ababcd	(0,1)(0,1)(1,1)
+
+# The above worked on Linux/GLIBC but the following often fail.
+# They also trip up OS X / FreeBSD / NetBSD:
+
+#:HA#280:E	(ab|a|c|bcd){0,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#280:E	(ab|a|c|bcd){0,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#281:E	(ab|a|c|bcd){1,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#281:E	(ab|a|c|bcd){1,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#282:E	(ab|a|c|bcd){2,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#282:E	(ab|a|c|bcd){2,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#283:E	(ab|a|c|bcd){3,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#283:E	(ab|a|c|bcd){3,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+:HA#284:E	(ab|a|c|bcd){4,}(d*)	ababcd	NOMATCH
+#:HA#285:E	(ab|a|c|bcd){0,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#285:E	(ab|a|c|bcd){0,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#286:E	(ab|a|c|bcd){1,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#286:E	(ab|a|c|bcd){1,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#287:E	(ab|a|c|bcd){2,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#287:E	(ab|a|c|bcd){2,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#288:E	(ab|a|c|bcd){3,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#288:E	(ab|a|c|bcd){3,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+:HA#289:E	(ab|a|c|bcd){4,10}(d*)	ababcd	NOMATCH
+#:HA#290:E	(ab|a|c|bcd)*(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#290:E	(ab|a|c|bcd)*(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#291:E	(ab|a|c|bcd)+(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#291:E	(ab|a|c|bcd)+(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
--- a/src/libregex/unicode.rs
+++ b/src/libregex/unicode.rs
--- a/src/libregex/vm.rs
+++ b/src/libregex/vm.rs
@ -0,0 +1,587 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// FIXME: Currently, the VM simulates an NFA. It would be nice to have another
+// VM that simulates a DFA.
+//
+// According to Russ Cox[1], a DFA performs better than an NFA, principally
+// because it reuses states previously computed by the machine *and* doesn't
+// keep track of capture groups. The drawback of a DFA (aside from its
+// complexity) is that it can't accurately return the locations of submatches.
+// The NFA *can* do that. (This is my understanding anyway.)
+//
+// Cox suggests that a DFA ought to be used to answer "does this match" and
+// "where does it match" questions. (In the latter, the starting position of
+// the match is computed by executing the regex backwards.) Cox also suggests
+// that a DFA should be run when asking "where are the submatches", which can
+// 1) quickly answer "no" is there's no match and 2) discover the substring
+// that matches, which means running the NFA on smaller input.
+//
+// Currently, the NFA simulation implemented below does some dirty tricks to
+// avoid tracking capture groups when they aren't needed (which only works
+// for 'is_match', not 'find'). This is a half-measure, but does provide some
+// perf improvement.
+//
+// AFAIK, the DFA/NFA approach is implemented in RE2/C++ but *not* in RE2/Go.
+//
+// [1] - http://swtch.com/~rsc/regex/regex3.html
+
+use std::cmp;
+use std::mem;
+use std::slice::MutableVector;
+use compile::{
+    Program,
+    Match, OneChar, CharClass, Any, EmptyBegin, EmptyEnd, EmptyWordBoundary,
+    Save, Jump, Split,
+};
+use parse::{FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL, FLAG_NEGATED};
+use parse::unicode::PERLW;
+
+pub type CaptureLocs = Vec<Option<uint>>;
+
+/// Indicates the type of match to be performed by the VM.
+pub enum MatchKind {
+    /// Only checks if a match exists or not. Does not return location.
+    Exists,
+    /// Returns the start and end indices of the entire match in the input
+    /// given.
+    Location,
+    /// Returns the start and end indices of each submatch in the input given.
+    Submatches,
+}
+
+/// Runs an NFA simulation on the compiled expression given on the search text
+/// `input`. The search begins at byte index `start` and ends at byte index
+/// `end`. (The range is specified here so that zero-width assertions will work
+/// correctly when searching for successive non-overlapping matches.)
+///
+/// The `which` parameter indicates what kind of capture information the caller
+/// wants. There are three choices: match existence only, the location of the
+/// entire match or the locations of the entire match in addition to the
+/// locations of each submatch.
+pub fn run<'r, 't>(which: MatchKind, prog: &'r Program, input: &'t str,
+                   start: uint, end: uint) -> CaptureLocs {
+    Nfa {
+        which: which,
+        prog: prog,
+        input: input,
+        start: start,
+        end: end,
+        ic: 0,
+        chars: CharReader::new(input),
+    }.run()
+}
+
+struct Nfa<'r, 't> {
+    which: MatchKind,
+    prog: &'r Program,
+    input: &'t str,
+    start: uint,
+    end: uint,
+    ic: uint,
+    chars: CharReader<'t>,
+}
+
+/// Indicates the next action to take after a single non-empty instruction
+/// is processed.
+pub enum StepState {
+    /// This is returned if and only if a Match instruction is reached and
+    /// we only care about the existence of a match. It instructs the VM to
+    /// quit early.
+    StepMatchEarlyReturn,
+    /// Indicates that a match was found. Thus, the rest of the states in the
+    /// *current* queue should be dropped (i.e., leftmost-first semantics).
+    /// States in the "next" queue can still be processed.
+    StepMatch,
+    /// No match was found. Continue with the next state in the queue.
+    StepContinue,
+}
+
+impl<'r, 't> Nfa<'r, 't> {
+    fn run(&mut self) -> CaptureLocs {
+        let ncaps = match self.which {
+            Exists => 0,
+            Location => 1,
+            Submatches => self.prog.num_captures(),
+        };
+        let mut matched = false;
+        let ninsts = self.prog.insts.len();
+        let mut clist = &mut Threads::new(self.which, ninsts, ncaps);
+        let mut nlist = &mut Threads::new(self.which, ninsts, ncaps);
+
+        let mut groups = Vec::from_elem(ncaps * 2, None);
+
+        // Determine if the expression starts with a '^' so we can avoid
+        // simulating .*?
+        // Make sure multi-line mode isn't enabled for it, otherwise we can't
+        // drop the initial .*?
+        let prefix_anchor =
+            match *self.prog.insts.get(1) {
+                EmptyBegin(flags) if flags & FLAG_MULTI == 0 => true,
+                _ => false,
+            };
+
+        self.ic = self.start;
+        let mut next_ic = self.chars.set(self.start);
+        while self.ic <= self.end {
+            if clist.size == 0 {
+                // We have a match and we're done exploring alternatives.
+                // Time to quit.
+                if matched {
+                    break
+                }
+
+                // If there are no threads to try, then we'll have to start
+                // over at the beginning of the regex.
+                // BUT, if there's a literal prefix for the program, try to
+                // jump ahead quickly. If it can't be found, then we can bail
+                // out early.
+                if self.prog.prefix.len() > 0 && clist.size == 0 {
+                    let needle = self.prog.prefix.as_slice().as_bytes();
+                    let haystack = self.input.as_bytes().slice_from(self.ic);
+                    match find_prefix(needle, haystack) {
+                        None => break,
+                        Some(i) => {
+                            self.ic += i;
+                            next_ic = self.chars.set(self.ic);
+                        }
+                    }
+                }
+            }
+
+            // This simulates a preceding '.*?' for every regex by adding
+            // a state starting at the current position in the input for the
+            // beginning of the program only if we don't already have a match.
+            if clist.size == 0 || (!prefix_anchor && !matched) {
+                self.add(clist, 0, groups.as_mut_slice())
+            }
+
+            // Now we try to read the next character.
+            // As a result, the 'step' method will look at the previous
+            // character.
+            self.ic = next_ic;
+            next_ic = self.chars.advance();
+
+            let mut i = 0;
+            while i < clist.size {
+                let pc = clist.pc(i);
+                let step_state = self.step(groups.as_mut_slice(), nlist,
+                                           clist.groups(i), pc);
+                match step_state {
+                    StepMatchEarlyReturn => return vec![Some(0), Some(0)],
+                    StepMatch => { matched = true; clist.empty() },
+                    StepContinue => {},
+                }
+                i += 1;
+            }
+            mem::swap(&mut clist, &mut nlist);
+            nlist.empty();
+        }
+        match self.which {
+            Exists if matched     => vec![Some(0), Some(0)],
+            Exists                => vec![None, None],
+            Location | Submatches => groups,
+        }
+    }
+
+    fn step(&self, groups: &mut [Option<uint>], nlist: &mut Threads,
+            caps: &mut [Option<uint>], pc: uint)
+           -> StepState {
+        match *self.prog.insts.get(pc) {
+            Match => {
+                match self.which {
+                    Exists => {
+                        return StepMatchEarlyReturn
+                    }
+                    Location => {
+                        groups[0] = caps[0];
+                        groups[1] = caps[1];
+                        return StepMatch
+                    }
+                    Submatches => {
+                        for (slot, val) in groups.mut_iter().zip(caps.iter()) {
+                            *slot = *val;
+                        }
+                        return StepMatch
+                    }
+                }
+            }
+            OneChar(c, flags) => {
+                if self.char_eq(flags & FLAG_NOCASE > 0, self.chars.prev, c) {
+                    self.add(nlist, pc+1, caps);
+                }
+            }
+            CharClass(ref ranges, flags) => {
+                if self.chars.prev.is_some() {
+                    let c = self.chars.prev.unwrap();
+                    let negate = flags & FLAG_NEGATED > 0;
+                    let casei = flags & FLAG_NOCASE > 0;
+                    let found = ranges.as_slice();
+                    let found = found.bsearch(|&rc| class_cmp(casei, c, rc));
+                    let found = found.is_some();
+                    if (found && !negate) || (!found && negate) {
+                        self.add(nlist, pc+1, caps);
+                    }
+                }
+            }
+            Any(flags) => {
+                if flags & FLAG_DOTNL > 0
+                   || !self.char_eq(false, self.chars.prev, '\n') {
+                    self.add(nlist, pc+1, caps)
+                }
+            }
+            EmptyBegin(_) | EmptyEnd(_) | EmptyWordBoundary(_)
+            | Save(_) | Jump(_) | Split(_, _) => {},
+        }
+        StepContinue
+    }
+
+    fn add(&self, nlist: &mut Threads, pc: uint, groups: &mut [Option<uint>]) {
+        if nlist.contains(pc) {
+            return
+        }
+        // We have to add states to the threads list even if their empty.
+        // TL;DR - It prevents cycles.
+        // If we didn't care about cycles, we'd *only* add threads that
+        // correspond to non-jumping instructions (OneChar, Any, Match, etc.).
+        // But, it's possible for valid regexs (like '(a*)*') to result in
+        // a cycle in the instruction list. e.g., We'll keep chasing the Split
+        // instructions forever.
+        // So we add these instructions to our thread queue, but in the main
+        // VM loop, we look for them but simply ignore them.
+        // Adding them to the queue prevents them from being revisited so we
+        // can avoid cycles (and the inevitable stack overflow).
+        //
+        // We make a minor optimization by indicating that the state is "empty"
+        // so that its capture groups are not filled in.
+        match *self.prog.insts.get(pc) {
+            EmptyBegin(flags) => {
+                let multi = flags & FLAG_MULTI > 0;
+                nlist.add(pc, groups, true);
+                if self.chars.is_begin()
+                   || (multi && self.char_is(self.chars.prev, '\n')) {
+                    self.add(nlist, pc + 1, groups)
+                }
+            }
+            EmptyEnd(flags) => {
+                let multi = flags & FLAG_MULTI > 0;
+                nlist.add(pc, groups, true);
+                if self.chars.is_end()
+                   || (multi && self.char_is(self.chars.cur, '\n')) {
+                    self.add(nlist, pc + 1, groups)
+                }
+            }
+            EmptyWordBoundary(flags) => {
+                nlist.add(pc, groups, true);
+                if self.chars.is_word_boundary() == !(flags & FLAG_NEGATED > 0) {
+                    self.add(nlist, pc + 1, groups)
+                }
+            }
+            Save(slot) => {
+                nlist.add(pc, groups, true);
+                match self.which {
+                    Location if slot <= 1 => {
+                        let old = groups[slot];
+                        groups[slot] = Some(self.ic);
+                        self.add(nlist, pc + 1, groups);
+                        groups[slot] = old;
+                    }
+                    Submatches => {
+                        let old = groups[slot];
+                        groups[slot] = Some(self.ic);
+                        self.add(nlist, pc + 1, groups);
+                        groups[slot] = old;
+                    }
+                    Exists | Location => self.add(nlist, pc + 1, groups),
+                }
+            }
+            Jump(to) => {
+                nlist.add(pc, groups, true);
+                self.add(nlist, to, groups)
+            }
+            Split(x, y) => {
+                nlist.add(pc, groups, true);
+                self.add(nlist, x, groups);
+                self.add(nlist, y, groups);
+            }
+            Match | OneChar(_, _) | CharClass(_, _) | Any(_) => {
+                nlist.add(pc, groups, false);
+            }
+        }
+    }
+
+    // FIXME: For case insensitive comparisons, it uses the uppercase
+    // character and tests for equality. IIUC, this does not generalize to
+    // all of Unicode. I believe we need to check the entire fold for each
+    // character. This will be easy to add if and when it gets added to Rust's
+    // standard library.
+    #[inline]
+    fn char_eq(&self, casei: bool, textc: Option<char>, regc: char) -> bool {
+        match textc {
+            None => false,
+            Some(textc) => {
+                regc == textc
+                    || (casei && regc.to_uppercase() == textc.to_uppercase())
+            }
+        }
+    }
+
+    #[inline]
+    fn char_is(&self, textc: Option<char>, regc: char) -> bool {
+        textc == Some(regc)
+    }
+}
+
+/// CharReader is responsible for maintaining a "previous" and a "current"
+/// character. This one-character lookahead is necessary for assertions that
+/// look one character before or after the current position.
+pub struct CharReader<'t> {
+    /// The previous character read. It is None only when processing the first
+    /// character of the input.
+    pub prev: Option<char>,
+    /// The current character.
+    pub cur: Option<char>,
+    input: &'t str,
+    next: uint,
+}
+
+impl<'t> CharReader<'t> {
+    /// Returns a new CharReader that advances through the input given.
+    /// Note that a CharReader has no knowledge of the range in which to search
+    /// the input.
+    pub fn new(input: &'t str) -> CharReader<'t> {
+        CharReader {
+            prev: None,
+            cur: None,
+            input: input,
+            next: 0,
+       }
+    }
+
+    /// Sets the previous and current character given any arbitrary byte
+    /// index (at a unicode codepoint boundary).
+    #[inline]
+    pub fn set(&mut self, ic: uint) -> uint {
+        self.prev = None;
+        self.cur = None;
+        self.next = 0;
+
+        if self.input.len() == 0 {
+            return 1
+        }
+        if ic > 0 {
+            let i = cmp::min(ic, self.input.len());
+            let prev = self.input.char_range_at_reverse(i);
+            self.prev = Some(prev.ch);
+        }
+        if ic < self.input.len() {
+            let cur = self.input.char_range_at(ic);
+            self.cur = Some(cur.ch);
+            self.next = cur.next;
+            self.next
+        } else {
+            self.input.len() + 1
+        }
+    }
+
+    /// Does the same as `set`, except it always advances to the next
+    /// character in the input (and therefore does half as many UTF8 decodings).
+    #[inline]
+    pub fn advance(&mut self) -> uint {
+        self.prev = self.cur;
+        if self.next < self.input.len() {
+            let cur = self.input.char_range_at(self.next);
+            self.cur = Some(cur.ch);
+            self.next = cur.next;
+        } else {
+            self.cur = None;
+            self.next = self.input.len() + 1;
+        }
+        self.next
+    }
+
+    /// Returns true if and only if this is the beginning of the input
+    /// (ignoring the range of the input to search).
+    #[inline]
+    pub fn is_begin(&self) -> bool { self.prev.is_none() }
+
+    /// Returns true if and only if this is the end of the input
+    /// (ignoring the range of the input to search).
+    #[inline]
+    pub fn is_end(&self) -> bool { self.cur.is_none() }
+
+    /// Returns true if and only if the current position is a word boundary.
+    /// (Ignoring the range of the input to search.)
+    pub fn is_word_boundary(&self) -> bool {
+        if self.is_begin() {
+            return is_word(self.cur)
+        }
+        if self.is_end() {
+            return is_word(self.prev)
+        }
+        (is_word(self.cur) && !is_word(self.prev))
+        || (is_word(self.prev) && !is_word(self.cur))
+    }
+}
+
+struct Thread {
+    pc: uint,
+    groups: Vec<Option<uint>>,
+}
+
+struct Threads {
+    which: MatchKind,
+    queue: Vec<Thread>,
+    sparse: Vec<uint>,
+    size: uint,
+}
+
+impl Threads {
+    // This is using a wicked neat trick to provide constant time lookup
+    // for threads in the queue using a sparse set. A queue of threads is
+    // allocated once with maximal size when the VM initializes and is reused
+    // throughout execution. That is, there should be zero allocation during
+    // the execution of a VM.
+    //
+    // See http://research.swtch.com/sparse for the deets.
+    fn new(which: MatchKind, num_insts: uint, ncaps: uint) -> Threads {
+        Threads {
+            which: which,
+            queue: Vec::from_fn(num_insts, |_| {
+                Thread { pc: 0, groups: Vec::from_elem(ncaps * 2, None) }
+            }),
+            sparse: Vec::from_elem(num_insts, 0u),
+            size: 0,
+        }
+    }
+
+    fn add(&mut self, pc: uint, groups: &[Option<uint>], empty: bool) {
+        let t = self.queue.get_mut(self.size);
+        t.pc = pc;
+        match (empty, self.which) {
+            (_, Exists) | (true, _) => {},
+            (false, Location) => {
+                *t.groups.get_mut(0) = groups[0];
+                *t.groups.get_mut(1) = groups[1];
+            }
+            (false, Submatches) => {
+                for (slot, val) in t.groups.mut_iter().zip(groups.iter()) {
+                    *slot = *val;
+                }
+            }
+        }
+        *self.sparse.get_mut(pc) = self.size;
+        self.size += 1;
+    }
+
+    #[inline]
+    fn contains(&self, pc: uint) -> bool {
+        let s = *self.sparse.get(pc);
+        s < self.size && self.queue.get(s).pc == pc
+    }
+
+    #[inline]
+    fn empty(&mut self) {
+        self.size = 0;
+    }
+
+    #[inline]
+    fn pc(&self, i: uint) -> uint {
+        self.queue.get(i).pc
+    }
+
+    #[inline]
+    fn groups<'r>(&'r mut self, i: uint) -> &'r mut [Option<uint>] {
+        self.queue.get_mut(i).groups.as_mut_slice()
+    }
+}
+
+/// Returns true if the character is a word character, according to the
+/// (Unicode friendly) Perl character class '\w'.
+/// Note that this is only use for testing word boundaries. The actual '\w'
+/// is encoded as a CharClass instruction.
+pub fn is_word(c: Option<char>) -> bool {
+    let c = match c {
+        None => return false,
+        Some(c) => c,
+    };
+    // Try the common ASCII case before invoking binary search.
+    match c {
+        '_' | '0' .. '9' | 'a' .. 'z' | 'A' .. 'Z' => true,
+        _ => PERLW.bsearch(|&(start, end)| {
+            if c >= start && c <= end {
+                Equal
+            } else if start > c {
+                Greater
+            } else {
+                Less
+            }
+        }).is_some()
+    }
+}
+
+/// Given a character and a single character class range, return an ordering
+/// indicating whether the character is less than the start of the range,
+/// in the range (inclusive) or greater than the end of the range.
+///
+/// If `casei` is `true`, then this ordering is computed case insensitively.
+///
+/// This function is meant to be used with a binary search.
+#[inline]
+fn class_cmp(casei: bool, mut textc: char,
+             (mut start, mut end): (char, char)) -> Ordering {
+    if casei {
+        // FIXME: This is pretty ridiculous. All of this case conversion
+        // can be moved outside this function:
+        // 1) textc should be uppercased outside the bsearch.
+        // 2) the character class itself should be uppercased either in the
+        //    parser or the compiler.
+        // FIXME: This is too simplistic for correct Unicode support.
+        //        See also: char_eq
+        textc = textc.to_uppercase();
+        start = start.to_uppercase();
+        end = end.to_uppercase();
+    }
+    if textc >= start && textc <= end {
+        Equal
+    } else if start > textc {
+        Greater
+    } else {
+        Less
+    }
+}
+
+/// Returns the starting location of `needle` in `haystack`.
+/// If `needle` is not in `haystack`, then `None` is returned.
+///
+/// Note that this is using a naive substring algorithm.
+#[inline]
+pub fn find_prefix(needle: &[u8], haystack: &[u8]) -> Option<uint> {
+    let (hlen, nlen) = (haystack.len(), needle.len());
+    if nlen > hlen || nlen == 0 {
+        return None
+    }
+    let mut hayi = 0u;
+    'HAYSTACK: loop {
+        if hayi > hlen - nlen {
+            break
+        }
+        let mut nedi = 0;
+        while nedi < nlen {
+            if haystack[hayi+nedi] != needle[nedi] {
+                hayi += 1;
+                continue 'HAYSTACK
+            }
+            nedi += 1;
+        }
+        return Some(hayi)
+    }
+    None
+}
--- a/src/libregex_macros/lib.rs
+++ b/src/libregex_macros/lib.rs
@ -0,0 +1,684 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//! This crate provides the `regex!` macro. Its use is documented in the
+//! `regex` crate.
+
+#![crate_id = "regex_macros#0.11-pre"]
+#![crate_type = "dylib"]
+#![experimental]
+#![license = "MIT/ASL2"]
+#![doc(html_logo_url = "http://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png",
+       html_favicon_url = "http://www.rust-lang.org/favicon.ico",
+       html_root_url = "http://static.rust-lang.org/doc/master")]
+
+#![feature(macro_registrar, managed_boxes, quote)]
+
+extern crate regex;
+extern crate syntax;
+
+use syntax::ast;
+use syntax::codemap;
+use syntax::ext::base::{
+    SyntaxExtension, ExtCtxt, MacResult, MacExpr, DummyResult,
+    NormalTT, BasicMacroExpander,
+};
+use syntax::parse;
+use syntax::parse::token;
+use syntax::print::pprust;
+
+use regex::Regex;
+use regex::native::{
+    OneChar, CharClass, Any, Save, Jump, Split,
+    Match, EmptyBegin, EmptyEnd, EmptyWordBoundary,
+    Program, Dynamic, Native,
+    FLAG_NOCASE, FLAG_MULTI, FLAG_DOTNL, FLAG_NEGATED,
+};
+
+/// For the `regex!` syntax extension. Do not use.
+#[macro_registrar]
+#[doc(hidden)]
+pub fn macro_registrar(register: |ast::Name, SyntaxExtension|) {
+    let expander = ~BasicMacroExpander { expander: native, span: None };
+    register(token::intern("regex"), NormalTT(expander, None))
+}
+
+/// Generates specialized code for the Pike VM for a particular regular
+/// expression.
+///
+/// There are two primary differences between the code generated here and the
+/// general code in vm.rs.
+///
+/// 1. All heap allocation is removed. Sized vector types are used instead.
+///    Care must be taken to make sure that these vectors are not copied
+///    gratuitously. (If you're not sure, run the benchmarks. They will yell
+///    at you if you do.)
+/// 2. The main `match instruction { ... }` expressions are replaced with more
+///    direct `match pc { ... }`. The generators can be found in
+///    `step_insts` and `add_insts`.
+///
+/// Other more minor changes include eliding code when possible (although this
+/// isn't completely thorough at the moment), and translating character class
+/// matching from using a binary search to a simple `match` expression (see
+/// `match_class`).
+///
+/// It is strongly recommended to read the dynamic implementation in vm.rs
+/// first before trying to understand the code generator. The implementation
+/// strategy is identical and vm.rs has comments and will be easier to follow.
+fn native(cx: &mut ExtCtxt, sp: codemap::Span, tts: &[ast::TokenTree])
+         -> ~MacResult {
+    let regex = match parse(cx, tts) {
+        Some(r) => r,
+        // error is logged in 'parse' with cx.span_err
+        None => return DummyResult::any(sp),
+    };
+    let re = match Regex::new(regex.to_owned()) {
+        Ok(re) => re,
+        Err(err) => {
+            cx.span_err(sp, err.to_str());
+            return DummyResult::any(sp)
+        }
+    };
+    let prog = match re.p {
+        Dynamic(ref prog) => prog.clone(),
+        Native(_) => unreachable!(),
+    };
+
+    let mut gen = NfaGen {
+        cx: &*cx, sp: sp, prog: prog,
+        names: re.names.clone(), original: re.original.clone(),
+    };
+    MacExpr::new(gen.code())
+}
+
+struct NfaGen<'a> {
+    cx: &'a ExtCtxt<'a>,
+    sp: codemap::Span,
+    prog: Program,
+    names: ~[Option<~str>],
+    original: ~str,
+}
+
+impl<'a> NfaGen<'a> {
+    fn code(&mut self) -> @ast::Expr {
+        // Most or all of the following things are used in the quasiquoted
+        // expression returned.
+        let num_cap_locs = 2 * self.prog.num_captures();
+        let num_insts = self.prog.insts.len();
+        let cap_names = self.vec_expr(self.names,
+            |cx, name| match name {
+                &Some(ref name) => {
+                    let name = name.as_slice();
+                    quote_expr!(cx, Some(~$name))
+                }
+                &None => quote_expr!(cx, None),
+            }
+        );
+        let prefix_anchor =
+            match self.prog.insts.as_slice()[1] {
+                EmptyBegin(flags) if flags & FLAG_MULTI == 0 => true,
+                _ => false,
+            };
+        let init_groups = self.vec_from_fn(num_cap_locs,
+                                           |cx| quote_expr!(cx, None));
+        let prefix_bytes = self.vec_expr(self.prog.prefix.as_slice().as_bytes(),
+                                         |cx, b| quote_expr!(cx, $b));
+        let check_prefix = self.check_prefix();
+        let step_insts = self.step_insts();
+        let add_insts = self.add_insts();
+        let regex = self.original.as_slice();
+
+        quote_expr!(self.cx, {
+fn exec<'t>(which: ::regex::native::MatchKind, input: &'t str,
+            start: uint, end: uint) -> Vec<Option<uint>> {
+    #![allow(unused_imports)]
+    use regex::native::{
+        MatchKind, Exists, Location, Submatches,
+        StepState, StepMatchEarlyReturn, StepMatch, StepContinue,
+        CharReader, find_prefix,
+    };
+
+    return Nfa {
+        which: which,
+        input: input,
+        ic: 0,
+        chars: CharReader::new(input),
+    }.run(start, end);
+
+    type Captures = [Option<uint>, ..$num_cap_locs];
+
+    struct Nfa<'t> {
+        which: MatchKind,
+        input: &'t str,
+        ic: uint,
+        chars: CharReader<'t>,
+    }
+
+    impl<'t> Nfa<'t> {
+        #[allow(unused_variable)]
+        fn run(&mut self, start: uint, end: uint) -> Vec<Option<uint>> {
+            let mut matched = false;
+            let prefix_bytes: &[u8] = &$prefix_bytes;
+            let mut clist = &mut Threads::new(self.which);
+            let mut nlist = &mut Threads::new(self.which);
+
+            let mut groups = $init_groups;
+
+            self.ic = start;
+            let mut next_ic = self.chars.set(start);
+            while self.ic <= end {
+                if clist.size == 0 {
+                    if matched {
+                        break
+                    }
+                    $check_prefix
+                }
+                if clist.size == 0 || (!$prefix_anchor && !matched) {
+                    self.add(clist, 0, &mut groups)
+                }
+
+                self.ic = next_ic;
+                next_ic = self.chars.advance();
+
+                let mut i = 0;
+                while i < clist.size {
+                    let pc = clist.pc(i);
+                    let step_state = self.step(&mut groups, nlist,
+                                               clist.groups(i), pc);
+                    match step_state {
+                        StepMatchEarlyReturn =>
+                            return vec![Some(0u), Some(0u)],
+                        StepMatch => { matched = true; clist.empty() },
+                        StepContinue => {},
+                    }
+                    i += 1;
+                }
+                ::std::mem::swap(&mut clist, &mut nlist);
+                nlist.empty();
+            }
+            match self.which {
+                Exists if matched     => vec![Some(0u), Some(0u)],
+                Exists                => vec![None, None],
+                Location | Submatches => groups.iter().map(|x| *x).collect(),
+            }
+        }
+
+        // Sometimes `nlist` is never used (for empty regexes).
+        #[allow(unused_variable)]
+        #[inline]
+        fn step(&self, groups: &mut Captures, nlist: &mut Threads,
+                caps: &mut Captures, pc: uint) -> StepState {
+            $step_insts
+            StepContinue
+        }
+
+        fn add(&self, nlist: &mut Threads, pc: uint,
+               groups: &mut Captures) {
+            if nlist.contains(pc) {
+                return
+            }
+            $add_insts
+        }
+    }
+
+    struct Thread {
+        pc: uint,
+        groups: Captures,
+    }
+
+    struct Threads {
+        which: MatchKind,
+        queue: [Thread, ..$num_insts],
+        sparse: [uint, ..$num_insts],
+        size: uint,
+    }
+
+    impl Threads {
+        fn new(which: MatchKind) -> Threads {
+            Threads {
+                which: which,
+                // These unsafe blocks are used for performance reasons, as it
+                // gives us a zero-cost initialization of a sparse set. The
+                // trick is described in more detail here:
+                // http://research.swtch.com/sparse
+                // The idea here is to avoid initializing threads that never
+                // need to be initialized, particularly for larger regexs with
+                // a lot of instructions.
+                queue: unsafe { ::std::mem::uninit() },
+                sparse: unsafe { ::std::mem::uninit() },
+                size: 0,
+            }
+        }
+
+        #[inline]
+        fn add(&mut self, pc: uint, groups: &Captures) {
+            let t = &mut self.queue[self.size];
+            t.pc = pc;
+            match self.which {
+                Exists => {},
+                Location => {
+                    t.groups[0] = groups[0];
+                    t.groups[1] = groups[1];
+                }
+                Submatches => {
+                    for (slot, val) in t.groups.mut_iter().zip(groups.iter()) {
+                        *slot = *val;
+                    }
+                }
+            }
+            self.sparse[pc] = self.size;
+            self.size += 1;
+        }
+
+        #[inline]
+        fn add_empty(&mut self, pc: uint) {
+            self.queue[self.size].pc = pc;
+            self.sparse[pc] = self.size;
+            self.size += 1;
+        }
+
+        #[inline]
+        fn contains(&self, pc: uint) -> bool {
+            let s = self.sparse[pc];
+            s < self.size && self.queue[s].pc == pc
+        }
+
+        #[inline]
+        fn empty(&mut self) {
+            self.size = 0;
+        }
+
+        #[inline]
+        fn pc(&self, i: uint) -> uint {
+            self.queue[i].pc
+        }
+
+        #[inline]
+        fn groups<'r>(&'r mut self, i: uint) -> &'r mut Captures {
+            &'r mut self.queue[i].groups
+        }
+    }
+}
+
+::regex::Regex {
+    original: ~$regex,
+    names: ~$cap_names,
+    p: ::regex::native::Native(exec),
+}
+        })
+    }
+
+    // Generates code for the `add` method, which is responsible for adding
+    // zero-width states to the next queue of states to visit.
+    fn add_insts(&self) -> @ast::Expr {
+        let arms = self.prog.insts.iter().enumerate().map(|(pc, inst)| {
+            let nextpc = pc + 1;
+            let body = match *inst {
+                EmptyBegin(flags) => {
+                    let nl = '\n';
+                    let cond =
+                        if flags & FLAG_MULTI > 0 {
+                            quote_expr!(self.cx,
+                                self.chars.is_begin()
+                                || self.chars.prev == Some($nl)
+                            )
+                        } else {
+                            quote_expr!(self.cx, self.chars.is_begin())
+                        };
+                    quote_expr!(self.cx, {
+                        nlist.add_empty($pc);
+                        if $cond { self.add(nlist, $nextpc, &mut *groups) }
+                    })
+                }
+                EmptyEnd(flags) => {
+                    let nl = '\n';
+                    let cond =
+                        if flags & FLAG_MULTI > 0 {
+                            quote_expr!(self.cx,
+                                self.chars.is_end()
+                                || self.chars.cur == Some($nl)
+                            )
+                        } else {
+                            quote_expr!(self.cx, self.chars.is_end())
+                        };
+                    quote_expr!(self.cx, {
+                        nlist.add_empty($pc);
+                        if $cond { self.add(nlist, $nextpc, &mut *groups) }
+                    })
+                }
+                EmptyWordBoundary(flags) => {
+                    let cond =
+                        if flags & FLAG_NEGATED > 0 {
+                            quote_expr!(self.cx, !self.chars.is_word_boundary())
+                        } else {
+                            quote_expr!(self.cx, self.chars.is_word_boundary())
+                        };
+                    quote_expr!(self.cx, {
+                        nlist.add_empty($pc);
+                        if $cond { self.add(nlist, $nextpc, &mut *groups) }
+                    })
+                }
+                Save(slot) => {
+                    let save = quote_expr!(self.cx, {
+                        let old = groups[$slot];
+                        groups[$slot] = Some(self.ic);
+                        self.add(nlist, $nextpc, &mut *groups);
+                        groups[$slot] = old;
+                    });
+                    let add = quote_expr!(self.cx, {
+                        self.add(nlist, $nextpc, &mut *groups);
+                    });
+                    // If this is saving a submatch location but we request
+                    // existence or only full match location, then we can skip
+                    // right over it every time.
+                    if slot > 1 {
+                        quote_expr!(self.cx, {
+                            nlist.add_empty($pc);
+                            match self.which {
+                                Submatches => $save,
+                                Exists | Location => $add,
+                            }
+                        })
+                    } else {
+                        quote_expr!(self.cx, {
+                            nlist.add_empty($pc);
+                            match self.which {
+                                Submatches | Location => $save,
+                                Exists => $add,
+                            }
+                        })
+                    }
+                }
+                Jump(to) => {
+                    quote_expr!(self.cx, {
+                        nlist.add_empty($pc);
+                        self.add(nlist, $to, &mut *groups);
+                    })
+                }
+                Split(x, y) => {
+                    quote_expr!(self.cx, {
+                        nlist.add_empty($pc);
+                        self.add(nlist, $x, &mut *groups);
+                        self.add(nlist, $y, &mut *groups);
+                    })
+                }
+                // For Match, OneChar, CharClass, Any
+                _ => quote_expr!(self.cx, nlist.add($pc, &*groups)),
+            };
+            self.arm_inst(pc, body)
+        }).collect::<Vec<ast::Arm>>();
+
+        self.match_insts(arms)
+    }
+
+    // Generates the code for the `step` method, which processes all states
+    // in the current queue that consume a single character.
+    fn step_insts(&self) -> @ast::Expr {
+        let arms = self.prog.insts.iter().enumerate().map(|(pc, inst)| {
+            let nextpc = pc + 1;
+            let body = match *inst {
+                Match => {
+                    quote_expr!(self.cx, {
+                        match self.which {
+                            Exists => {
+                                return StepMatchEarlyReturn
+                            }
+                            Location => {
+                                groups[0] = caps[0];
+                                groups[1] = caps[1];
+                                return StepMatch
+                            }
+                            Submatches => {
+                                for (slot, val) in groups.mut_iter().zip(caps.iter()) {
+                                    *slot = *val;
+                                }
+                                return StepMatch
+                            }
+                        }
+                    })
+                }
+                OneChar(c, flags) => {
+                    if flags & FLAG_NOCASE > 0 {
+                        let upc = c.to_uppercase();
+                        quote_expr!(self.cx, {
+                            let upc = self.chars.prev.map(|c| c.to_uppercase());
+                            if upc == Some($upc) {
+                                self.add(nlist, $nextpc, caps);
+                            }
+                        })
+                    } else {
+                        quote_expr!(self.cx, {
+                            if self.chars.prev == Some($c) {
+                                self.add(nlist, $nextpc, caps);
+                            }
+                        })
+                    }
+                }
+                CharClass(ref ranges, flags) => {
+                    let negate = flags & FLAG_NEGATED > 0;
+                    let casei = flags & FLAG_NOCASE > 0;
+                    let get_char =
+                        if casei {
+                            quote_expr!(self.cx, self.chars.prev.unwrap().to_uppercase())
+                        } else {
+                            quote_expr!(self.cx, self.chars.prev.unwrap())
+                        };
+                    let negcond =
+                        if negate {
+                            quote_expr!(self.cx, !found)
+                        } else {
+                            quote_expr!(self.cx, found)
+                        };
+                    let mranges = self.match_class(casei, ranges.as_slice());
+                    quote_expr!(self.cx, {
+                        if self.chars.prev.is_some() {
+                            let c = $get_char;
+                            let found = $mranges;
+                            if $negcond {
+                                self.add(nlist, $nextpc, caps);
+                            }
+                        }
+                    })
+                }
+                Any(flags) => {
+                    if flags & FLAG_DOTNL > 0 {
+                        quote_expr!(self.cx, self.add(nlist, $nextpc, caps))
+                    } else {
+                        let nl = '\n'; // no char lits allowed? wtf?
+                        quote_expr!(self.cx, {
+                            if self.chars.prev != Some($nl) {
+                                self.add(nlist, $nextpc, caps)
+                            }
+                        })
+                    }
+                }
+                // EmptyBegin, EmptyEnd, EmptyWordBoundary, Save, Jump, Split
+                _ => quote_expr!(self.cx, {}),
+            };
+            self.arm_inst(pc, body)
+        }).collect::<Vec<ast::Arm>>();
+
+        self.match_insts(arms)
+    }
+
+    // Translates a character class into a match expression.
+    // This avoids a binary search (and is hopefully replaced by a jump
+    // table).
+    fn match_class(&self, casei: bool, ranges: &[(char, char)]) -> @ast::Expr {
+        let mut arms = ranges.iter().map(|&(mut start, mut end)| {
+            if casei {
+                start = start.to_uppercase();
+                end = end.to_uppercase();
+            }
+            ast::Arm {
+                attrs: vec!(),
+                pats: vec!(@ast::Pat{
+                    id: ast::DUMMY_NODE_ID,
+                    span: self.sp,
+                    node: ast::PatRange(quote_expr!(self.cx, $start),
+                                        quote_expr!(self.cx, $end)),
+                }),
+                guard: None,
+                body: quote_expr!(self.cx, true),
+            }
+        }).collect::<Vec<ast::Arm>>();
+
+        arms.push(self.wild_arm_expr(quote_expr!(self.cx, false)));
+
+        let match_on = quote_expr!(self.cx, c);
+        self.dummy_expr(ast::ExprMatch(match_on, arms))
+    }
+
+    // Generates code for checking a literal prefix of the search string.
+    // The code is only generated if the regex *has* a literal prefix.
+    // Otherwise, a no-op is returned.
+    fn check_prefix(&self) -> @ast::Expr {
+        if self.prog.prefix.len() == 0 {
+            quote_expr!(self.cx, {})
+        } else {
+            quote_expr!(self.cx,
+                if clist.size == 0 {
+                    let haystack = self.input.as_bytes().slice_from(self.ic);
+                    match find_prefix(prefix_bytes, haystack) {
+                        None => break,
+                        Some(i) => {
+                            self.ic += i;
+                            next_ic = self.chars.set(self.ic);
+                        }
+                    }
+                }
+            )
+        }
+    }
+
+    // Builds a `match pc { ... }` expression from a list of arms, specifically
+    // for matching the current program counter with an instruction.
+    // A wild-card arm is automatically added that executes a no-op. It will
+    // never be used, but is added to satisfy the compiler complaining about
+    // non-exhaustive patterns.
+    fn match_insts(&self, mut arms: Vec<ast::Arm>) -> @ast::Expr {
+        let mat_pc = quote_expr!(self.cx, pc);
+        arms.push(self.wild_arm_expr(quote_expr!(self.cx, {})));
+        self.dummy_expr(ast::ExprMatch(mat_pc, arms))
+    }
+
+    // Creates a match arm for the instruction at `pc` with the expression
+    // `body`.
+    fn arm_inst(&self, pc: uint, body: @ast::Expr) -> ast::Arm {
+        ast::Arm {
+            attrs: vec!(),
+            pats: vec!(@ast::Pat{
+                id: ast::DUMMY_NODE_ID,
+                span: self.sp,
+                node: ast::PatLit(quote_expr!(self.cx, $pc)),
+            }),
+            guard: None,
+            body: body,
+        }
+    }
+
+    // Creates a wild-card match arm with the expression `body`.
+    fn wild_arm_expr(&self, body: @ast::Expr) -> ast::Arm {
+        ast::Arm {
+            attrs: vec!(),
+            pats: vec!(@ast::Pat{
+                id: ast::DUMMY_NODE_ID,
+                span: self.sp,
+                node: ast::PatWild,
+            }),
+            guard: None,
+            body: body,
+        }
+    }
+
+    // Builds a `[a, b, .., len]` expression where each element is the result
+    // of executing `to_expr`.
+    fn vec_from_fn(&self, len: uint, to_expr: |&ExtCtxt| -> @ast::Expr)
+                  -> @ast::Expr {
+        self.vec_expr(Vec::from_elem(len, ()).as_slice(),
+                      |cx, _| to_expr(cx))
+    }
+
+    // Converts `xs` to a `[x1, x2, .., xN]` expression by calling `to_expr`
+    // on each element in `xs`.
+    fn vec_expr<T>(&self, xs: &[T], to_expr: |&ExtCtxt, &T| -> @ast::Expr)
+                  -> @ast::Expr {
+        let mut exprs = vec!();
+        for x in xs.iter() {
+            exprs.push(to_expr(self.cx, x))
+        }
+        let vec_exprs = self.dummy_expr(ast::ExprVec(exprs));
+        quote_expr!(self.cx, $vec_exprs)
+    }
+
+    // Creates an expression with a dummy node ID given an underlying
+    // `ast::Expr_`.
+    fn dummy_expr(&self, e: ast::Expr_) -> @ast::Expr {
+        @ast::Expr {
+            id: ast::DUMMY_NODE_ID,
+            node: e,
+            span: self.sp,
+        }
+    }
+}
+
+// This trait is defined in the quote module in the syntax crate, but I
+// don't think it's exported.
+// Interestingly, quote_expr! only requires that a 'to_tokens' method be
+// defined rather than satisfying a particular trait.
+#[doc(hidden)]
+trait ToTokens {
+    fn to_tokens(&self, cx: &ExtCtxt) -> Vec<ast::TokenTree>;
+}
+
+impl ToTokens for char {
+    fn to_tokens(&self, _: &ExtCtxt) -> Vec<ast::TokenTree> {
+        vec!(ast::TTTok(codemap::DUMMY_SP, token::LIT_CHAR((*self) as u32)))
+    }
+}
+
+impl ToTokens for bool {
+    fn to_tokens(&self, _: &ExtCtxt) -> Vec<ast::TokenTree> {
+        let ident = token::IDENT(token::str_to_ident(self.to_str()), false);
+        vec!(ast::TTTok(codemap::DUMMY_SP, ident))
+    }
+}
+
+/// Looks for a single string literal and returns it.
+/// Otherwise, logs an error with cx.span_err and returns None.
+fn parse(cx: &mut ExtCtxt, tts: &[ast::TokenTree]) -> Option<~str> {
+    let mut parser = parse::new_parser_from_tts(cx.parse_sess(), cx.cfg(),
+                                                Vec::from_slice(tts));
+    let entry = cx.expand_expr(parser.parse_expr());
+    let regex = match entry.node {
+        ast::ExprLit(lit) => {
+            match lit.node {
+                ast::LitStr(ref s, _) => s.to_str(),
+                _ => {
+                    cx.span_err(entry.span, format!(
+                        "expected string literal but got `{}`",
+                        pprust::lit_to_str(lit)));
+                    return None
+                }
+            }
+        }
+        _ => {
+            cx.span_err(entry.span, format!(
+                "expected string literal but got `{}`",
+                pprust::expr_to_str(entry)));
+            return None
+        }
+    };
+    if !parser.eat(&token::EOF) {
+        cx.span_err(parser.span, "only one string literal allowed");
+        return None;
+    }
+    Some(regex)
+}
--- a/src/test/bench/shootout-regex-dna.rs
+++ b/src/test/bench/shootout-regex-dna.rs
@ -0,0 +1,94 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// ignore-stage1
+// ignore-cross-compile #12102
+
+#![feature(macro_rules, phase)]
+
+extern crate regex;
+#[phase(syntax)]extern crate regex_macros;
+extern crate sync;
+
+use std::io;
+use regex::{NoExpand, Regex};
+use sync::Arc;
+
+fn count_matches(seq: &str, variant: &Regex) -> int {
+    let mut n = 0;
+    for _ in variant.find_iter(seq) {
+        n += 1;
+    }
+    n
+}
+
+fn main() {
+    let mut rdr = if std::os::getenv("RUST_BENCH").is_some() {
+        let fd = io::File::open(&Path::new("shootout-k-nucleotide.data"));
+        ~io::BufferedReader::new(fd) as ~io::Reader
+    } else {
+        ~io::stdin() as ~io::Reader
+    };
+    let mut seq = StrBuf::from_str(rdr.read_to_str().unwrap());
+    let ilen = seq.len();
+
+    seq = regex!(">[^\n]*\n|\n").replace_all(seq.as_slice(), NoExpand(""));
+    let seq_arc = Arc::new(seq.clone()); // copy before it moves
+    let clen = seq.len();
+
+    let mut seqlen = sync::Future::spawn(proc() {
+        let substs = ~[
+            (regex!("B"), "(c|g|t)"),
+            (regex!("D"), "(a|g|t)"),
+            (regex!("H"), "(a|c|t)"),
+            (regex!("K"), "(g|t)"),
+            (regex!("M"), "(a|c)"),
+            (regex!("N"), "(a|c|g|t)"),
+            (regex!("R"), "(a|g)"),
+            (regex!("S"), "(c|g)"),
+            (regex!("V"), "(a|c|g)"),
+            (regex!("W"), "(a|t)"),
+            (regex!("Y"), "(c|t)"),
+        ];
+        let mut seq = seq;
+        for (re, replacement) in substs.move_iter() {
+            seq = re.replace_all(seq.as_slice(), NoExpand(replacement));
+        }
+        seq.len()
+    });
+
+    let variants = ~[
+        regex!("agggtaaa|tttaccct"),
+        regex!("[cgt]gggtaaa|tttaccc[acg]"),
+        regex!("a[act]ggtaaa|tttacc[agt]t"),
+        regex!("ag[act]gtaaa|tttac[agt]ct"),
+        regex!("agg[act]taaa|ttta[agt]cct"),
+        regex!("aggg[acg]aaa|ttt[cgt]ccct"),
+        regex!("agggt[cgt]aa|tt[acg]accct"),
+        regex!("agggta[cgt]a|t[acg]taccct"),
+        regex!("agggtaa[cgt]|[acg]ttaccct"),
+    ];
+    let (mut variant_strs, mut counts) = (vec!(), vec!());
+    for variant in variants.move_iter() {
+        let seq_arc_copy = seq_arc.clone();
+        variant_strs.push(variant.to_str().to_owned());
+        counts.push(sync::Future::spawn(proc() {
+            count_matches(seq_arc_copy.as_slice(), &variant)
+        }));
+    }
+
+    for (i, variant) in variant_strs.iter().enumerate() {
+        println!("{} {}", variant, counts.get_mut(i).get());
+    }
+    println!("");
+    println!("{}", ilen);
+    println!("{}", clen);
+    println!("{}", seqlen.get());
+}
--- a/src/test/compile-fail/syntax-extension-regex-invalid.rs
+++ b/src/test/compile-fail/syntax-extension-regex-invalid.rs
@ -0,0 +1,26 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// ignore-stage1
+
+#![feature(phase)]
+
+extern crate regex;
+#[phase(syntax)] extern crate regex_macros;
+
+// Tests to make sure that `regex!` will produce a compile error when given
+// an invalid regular expression.
+// More exhaustive failure tests for the parser are done with the traditional
+// unit testing infrastructure, since both dynamic and native regexes use the
+// same parser.
+
+fn main() {
+    let _ = regex!("("); //~ ERROR Regex syntax error
+}