From dad95578b0f000807f23fe2d37ca37ea8bd8522c Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Fri, 12 Jul 2024 09:22:16 +1000 Subject: [PATCH] Add unit tests for `Parser::look_ahead`. It's currently buggy, so some of the test results are surprising, as described in the `FIXME` comments. The bugs will be fixed in subsequent commits. --- compiler/rustc_parse/src/parser/tests.rs | 116 +++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/compiler/rustc_parse/src/parser/tests.rs b/compiler/rustc_parse/src/parser/tests.rs index 42392ad2163..538ec6b8cfd 100644 --- a/compiler/rustc_parse/src/parser/tests.rs +++ b/compiler/rustc_parse/src/parser/tests.rs @@ -1376,6 +1376,122 @@ fn ttdelim_span() { }); } +// Uses a macro rather than a function so that failure messages mention the +// correct line in the test function. +macro_rules! look { + ($p:ident, $dist:literal, $kind:expr) => { + $p.look_ahead($dist, |tok| assert_eq!($kind, tok.kind)); + }; +} + +#[test] +fn look_ahead() { + create_default_session_globals_then(|| { + let sym_f = Symbol::intern("f"); + let sym_x = Symbol::intern("x"); + #[allow(non_snake_case)] + let sym_S = Symbol::intern("S"); + let raw_no = IdentIsRaw::No; + + let psess = psess(); + let mut p = string_to_parser(&psess, "fn f(x: u32) { x } struct S;".to_string()); + + // Current position is the `fn`. + look!(p, 0, token::Ident(kw::Fn, raw_no)); + look!(p, 1, token::Ident(sym_f, raw_no)); + look!(p, 2, token::OpenDelim(Delimiter::Parenthesis)); + look!(p, 3, token::Ident(sym_x, raw_no)); + look!(p, 4, token::Colon); + look!(p, 5, token::Ident(sym::u32, raw_no)); + look!(p, 6, token::CloseDelim(Delimiter::Parenthesis)); + look!(p, 7, token::OpenDelim(Delimiter::Brace)); + look!(p, 8, token::Ident(sym_x, raw_no)); + look!(p, 9, token::CloseDelim(Delimiter::Brace)); + look!(p, 10, token::Ident(kw::Struct, raw_no)); + look!(p, 11, token::Ident(sym_S, raw_no)); + look!(p, 12, token::Semi); + // Any lookahead past the end of the token stream returns `Eof`. + look!(p, 13, token::Eof); + look!(p, 14, token::Eof); + look!(p, 15, token::Eof); + look!(p, 100, token::Eof); + + // Move forward to the first `x`. + for _ in 0..3 { + p.bump(); + } + look!(p, 0, token::Ident(sym_x, raw_no)); + look!(p, 1, token::Colon); + look!(p, 2, token::Ident(sym::u32, raw_no)); + look!(p, 3, token::CloseDelim(Delimiter::Parenthesis)); + // FIXME(nnethercote) If we lookahead any distance past a close delim + // we currently return that close delim. + look!(p, 4, token::CloseDelim(Delimiter::Parenthesis)); + look!(p, 5, token::CloseDelim(Delimiter::Parenthesis)); + look!(p, 6, token::CloseDelim(Delimiter::Parenthesis)); + look!(p, 100, token::CloseDelim(Delimiter::Parenthesis)); + + // Move forward to the `;`. + for _ in 0..9 { + p.bump(); + } + look!(p, 0, token::Semi); + // Any lookahead past the end of the token stream returns `Eof`. + look!(p, 1, token::Eof); + look!(p, 100, token::Eof); + + // Move one past the `;`, i.e. past the end of the token stream. + p.bump(); + look!(p, 0, token::Eof); + look!(p, 1, token::Eof); + look!(p, 100, token::Eof); + + // Bumping after Eof is idempotent. + p.bump(); + look!(p, 0, token::Eof); + look!(p, 1, token::Eof); + look!(p, 100, token::Eof); + }); +} + +/// FIXME(nnethercote) Currently there is some buggy behaviour when using +/// `look_ahead` not within the outermost token stream, as this test shows. +#[test] +fn look_ahead_non_outermost_stream() { + create_default_session_globals_then(|| { + let sym_f = Symbol::intern("f"); + #[allow(non_snake_case)] + let sym_S = Symbol::intern("S"); + let raw_no = IdentIsRaw::No; + + let psess = psess(); + let mut p = string_to_parser(&psess, "mod m { fn f(x: u32) { x } struct S; }".to_string()); + + // Move forward to the `fn`, which is not within the outermost token + // stream (because it's inside the `mod { ... }`). + for _ in 0..3 { + p.bump(); + } + look!(p, 0, token::Ident(kw::Fn, raw_no)); + look!(p, 1, token::Ident(sym_f, raw_no)); + look!(p, 2, token::OpenDelim(Delimiter::Parenthesis)); + // FIXME(nnethercote) The current code incorrectly skips the `x: u32)` + // to the next token tree. + look!(p, 3, token::OpenDelim(Delimiter::Brace)); + // FIXME(nnethercote) The current code incorrectly skips the `x }` + // to the next token tree. + look!(p, 4, token::Ident(kw::Struct, raw_no)); + look!(p, 5, token::Ident(sym_S, raw_no)); + look!(p, 6, token::Semi); + // FIXME(nnethercote) If we lookahead any distance past a close delim + // we currently return that close delim. + look!(p, 7, token::CloseDelim(Delimiter::Brace)); + look!(p, 8, token::CloseDelim(Delimiter::Brace)); + look!(p, 9, token::CloseDelim(Delimiter::Brace)); + look!(p, 100, token::CloseDelim(Delimiter::Brace)); + }); +} + // This tests that when parsing a string (rather than a file) we don't try // and read in a file for a module declaration and just parse a stub. // See `recurse_into_file_modules` in the parser.