mirror of
https://github.com/rust-lang/rust.git
synced 2024-12-23 22:14:15 +00:00
rustc: Forbid interpolated tokens in the HIR
Right now the HIR contains raw `syntax::ast::Attribute` structure but nowadays these can contain arbitrary tokens. One variant of the `Token` enum is an "interpolated" token which basically means to shove all the tokens for a nonterminal in this position. A "nonterminal" in this case is roughly analagous to a macro argument: macro_rules! foo { ($a:expr) => { // $a is a nonterminal as an expression } } Currently nonterminals contain namely items and expressions, and this poses a problem for incremental compilation! With incremental we want a stable hash of all HIR items, but this means we may transitively need a stable hash *of the entire AST*, which is certainly not stable w/ node ids and whatnot. Hence today there's a "bug" where the "stable hash" of an AST is just the raw hash value of the AST, and this only arises with interpolated nonterminals. The downside of this approach, however, is that a bunch of errors get spewed out during compilation about how this isn't a great idea. This PR is focused at fixing these warnings, basically deleting them from the compiler. The implementation here is to alter attributes as they're lowered from the AST to HIR, expanding all nonterminals in-place as we see them. This code for expanding a nonterminal to a token stream already exists for the `proc_macro` crate, so we basically just reuse the same implementation there. After this PR it's considered a bug to have an `Interpolated` token and hence the stable hash implementation simply uses `bug!` in this location. Closes #40946
This commit is contained in:
parent
0701b37d97
commit
0694e4fde4
@ -54,8 +54,7 @@ use std::str::FromStr;
|
||||
|
||||
use syntax::ast;
|
||||
use syntax::errors::DiagnosticBuilder;
|
||||
use syntax::parse::{self, token, parse_stream_from_source_str};
|
||||
use syntax::print::pprust;
|
||||
use syntax::parse::{self, token};
|
||||
use syntax::symbol::Symbol;
|
||||
use syntax::tokenstream;
|
||||
use syntax_pos::DUMMY_SP;
|
||||
@ -525,47 +524,10 @@ impl TokenTree {
|
||||
Ident(ident) | Lifetime(ident) => TokenNode::Term(Term(ident.name)),
|
||||
Literal(..) | DocComment(..) => TokenNode::Literal(self::Literal(token)),
|
||||
|
||||
Interpolated(ref nt) => {
|
||||
// An `Interpolated` token means that we have a `Nonterminal`
|
||||
// which is often a parsed AST item. At this point we now need
|
||||
// to convert the parsed AST to an actual token stream, e.g.
|
||||
// un-parse it basically.
|
||||
//
|
||||
// Unfortunately there's not really a great way to do that in a
|
||||
// guaranteed lossless fashion right now. The fallback here is
|
||||
// to just stringify the AST node and reparse it, but this loses
|
||||
// all span information.
|
||||
//
|
||||
// As a result, some AST nodes are annotated with the token
|
||||
// stream they came from. Attempt to extract these lossless
|
||||
// token streams before we fall back to the stringification.
|
||||
let mut tokens = None;
|
||||
|
||||
match nt.0 {
|
||||
Nonterminal::NtItem(ref item) => {
|
||||
tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
|
||||
}
|
||||
Nonterminal::NtTraitItem(ref item) => {
|
||||
tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
|
||||
}
|
||||
Nonterminal::NtImplItem(ref item) => {
|
||||
tokens = prepend_attrs(&item.attrs, item.tokens.as_ref(), span);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
tokens.map(|tokens| {
|
||||
TokenNode::Group(Delimiter::None,
|
||||
TokenStream(tokens.clone()))
|
||||
}).unwrap_or_else(|| {
|
||||
__internal::with_sess(|(sess, _)| {
|
||||
TokenNode::Group(Delimiter::None, TokenStream(nt.1.force(|| {
|
||||
// FIXME(jseyfried): Avoid this pretty-print + reparse hack
|
||||
let name = "<macro expansion>".to_owned();
|
||||
let source = pprust::token_to_string(&token);
|
||||
parse_stream_from_source_str(name, source, sess, Some(span))
|
||||
})))
|
||||
})
|
||||
Interpolated(_) => {
|
||||
__internal::with_sess(|(sess, _)| {
|
||||
let tts = token.interpolated_to_tokenstream(sess, span);
|
||||
TokenNode::Group(Delimiter::None, TokenStream(tts))
|
||||
})
|
||||
}
|
||||
|
||||
@ -631,34 +593,6 @@ impl TokenTree {
|
||||
}
|
||||
}
|
||||
|
||||
fn prepend_attrs(attrs: &[ast::Attribute],
|
||||
tokens: Option<&tokenstream::TokenStream>,
|
||||
span: syntax_pos::Span)
|
||||
-> Option<tokenstream::TokenStream>
|
||||
{
|
||||
let tokens = match tokens {
|
||||
Some(tokens) => tokens,
|
||||
None => return None,
|
||||
};
|
||||
if attrs.len() == 0 {
|
||||
return Some(tokens.clone())
|
||||
}
|
||||
let mut builder = tokenstream::TokenStreamBuilder::new();
|
||||
for attr in attrs {
|
||||
assert_eq!(attr.style, ast::AttrStyle::Outer,
|
||||
"inner attributes should prevent cached tokens from existing");
|
||||
let stream = __internal::with_sess(|(sess, _)| {
|
||||
// FIXME: Avoid this pretty-print + reparse hack as bove
|
||||
let name = "<macro expansion>".to_owned();
|
||||
let source = pprust::attr_to_string(attr);
|
||||
parse_stream_from_source_str(name, source, sess, Some(span))
|
||||
});
|
||||
builder.push(stream);
|
||||
}
|
||||
builder.push(tokens.clone());
|
||||
Some(builder.build())
|
||||
}
|
||||
|
||||
/// Permanently unstable internal implementation details of this crate. This
|
||||
/// should not be used.
|
||||
///
|
||||
|
@ -64,6 +64,8 @@ use syntax::ptr::P;
|
||||
use syntax::codemap::{self, respan, Spanned, CompilerDesugaringKind};
|
||||
use syntax::std_inject;
|
||||
use syntax::symbol::{Symbol, keywords};
|
||||
use syntax::tokenstream::{TokenStream, TokenTree, Delimited};
|
||||
use syntax::parse::token::{Token, DelimToken};
|
||||
use syntax::util::small_vector::SmallVector;
|
||||
use syntax::visit::{self, Visitor};
|
||||
use syntax_pos::Span;
|
||||
@ -589,7 +591,50 @@ impl<'a> LoweringContext<'a> {
|
||||
}
|
||||
|
||||
fn lower_attrs(&mut self, attrs: &Vec<Attribute>) -> hir::HirVec<Attribute> {
|
||||
attrs.clone().into()
|
||||
attrs.iter().map(|a| self.lower_attr(a)).collect::<Vec<_>>().into()
|
||||
}
|
||||
|
||||
fn lower_attr(&mut self, attr: &Attribute) -> Attribute {
|
||||
Attribute {
|
||||
id: attr.id,
|
||||
style: attr.style,
|
||||
path: attr.path.clone(),
|
||||
tokens: self.lower_token_stream(attr.tokens.clone()),
|
||||
is_sugared_doc: attr.is_sugared_doc,
|
||||
span: attr.span,
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_token_stream(&mut self, tokens: TokenStream) -> TokenStream {
|
||||
tokens.into_trees().map(|tree| self.lower_token_tree(tree)).collect()
|
||||
}
|
||||
|
||||
fn lower_token_tree(&mut self, tree: TokenTree) -> TokenTree {
|
||||
match tree {
|
||||
TokenTree::Token(span, token) => {
|
||||
self.lower_token(token, span)
|
||||
}
|
||||
TokenTree::Delimited(span, delimited) => {
|
||||
TokenTree::Delimited(span, Delimited {
|
||||
delim: delimited.delim,
|
||||
tts: self.lower_token_stream(delimited.tts.into()).into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_token(&mut self, token: Token, span: Span) -> TokenTree {
|
||||
match token {
|
||||
Token::Interpolated(_) => {}
|
||||
other => return TokenTree::Token(span, other),
|
||||
}
|
||||
|
||||
let tts = token.interpolated_to_tokenstream(&self.sess.parse_sess, span);
|
||||
let tts = self.lower_token_stream(tts);
|
||||
TokenTree::Delimited(span, Delimited {
|
||||
delim: DelimToken::NoDelim,
|
||||
tts: tts.into(),
|
||||
})
|
||||
}
|
||||
|
||||
fn lower_arm(&mut self, arm: &Arm) -> hir::Arm {
|
||||
@ -1625,13 +1670,14 @@ impl<'a> LoweringContext<'a> {
|
||||
let attrs = self.lower_attrs(&i.attrs);
|
||||
if let ItemKind::MacroDef(ref def) = i.node {
|
||||
if !def.legacy || i.attrs.iter().any(|attr| attr.path == "macro_export") {
|
||||
let body = self.lower_token_stream(def.stream());
|
||||
self.exported_macros.push(hir::MacroDef {
|
||||
name,
|
||||
vis,
|
||||
attrs,
|
||||
id: i.id,
|
||||
span: i.span,
|
||||
body: def.stream(),
|
||||
body,
|
||||
legacy: def.legacy,
|
||||
});
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ use syntax::ast;
|
||||
use syntax::parse::token;
|
||||
use syntax::symbol::InternedString;
|
||||
use syntax::tokenstream;
|
||||
use syntax_pos::{Span, FileMap};
|
||||
use syntax_pos::FileMap;
|
||||
|
||||
use hir::def_id::{DefId, CrateNum, CRATE_DEF_INDEX};
|
||||
|
||||
@ -228,7 +228,7 @@ for tokenstream::TokenTree {
|
||||
match *self {
|
||||
tokenstream::TokenTree::Token(span, ref token) => {
|
||||
span.hash_stable(hcx, hasher);
|
||||
hash_token(token, hcx, hasher, span);
|
||||
hash_token(token, hcx, hasher);
|
||||
}
|
||||
tokenstream::TokenTree::Delimited(span, ref delimited) => {
|
||||
span.hash_stable(hcx, hasher);
|
||||
@ -254,8 +254,7 @@ for tokenstream::TokenStream {
|
||||
|
||||
fn hash_token<'gcx, W: StableHasherResult>(token: &token::Token,
|
||||
hcx: &mut StableHashingContext<'gcx>,
|
||||
hasher: &mut StableHasher<W>,
|
||||
error_reporting_span: Span) {
|
||||
hasher: &mut StableHasher<W>) {
|
||||
mem::discriminant(token).hash_stable(hcx, hasher);
|
||||
match *token {
|
||||
token::Token::Eq |
|
||||
@ -318,20 +317,8 @@ fn hash_token<'gcx, W: StableHasherResult>(token: &token::Token,
|
||||
token::Token::Ident(ident) |
|
||||
token::Token::Lifetime(ident) => ident.name.hash_stable(hcx, hasher),
|
||||
|
||||
token::Token::Interpolated(ref non_terminal) => {
|
||||
// FIXME(mw): This could be implemented properly. It's just a
|
||||
// lot of work, since we would need to hash the AST
|
||||
// in a stable way, in addition to the HIR.
|
||||
// Since this is hardly used anywhere, just emit a
|
||||
// warning for now.
|
||||
if hcx.sess().opts.debugging_opts.incremental.is_some() {
|
||||
let msg = format!("Quasi-quoting might make incremental \
|
||||
compilation very inefficient: {:?}",
|
||||
non_terminal);
|
||||
hcx.sess().span_warn(error_reporting_span, &msg[..]);
|
||||
}
|
||||
|
||||
std_hash::Hash::hash(non_terminal, hasher);
|
||||
token::Token::Interpolated(_) => {
|
||||
bug!("interpolated tokens should not be present in the HIR")
|
||||
}
|
||||
|
||||
token::Token::DocComment(val) |
|
||||
|
@ -15,10 +15,15 @@ pub use self::Lit::*;
|
||||
pub use self::Token::*;
|
||||
|
||||
use ast::{self};
|
||||
use parse::ParseSess;
|
||||
use print::pprust;
|
||||
use ptr::P;
|
||||
use serialize::{Decodable, Decoder, Encodable, Encoder};
|
||||
use symbol::keywords;
|
||||
use syntax::parse::parse_stream_from_source_str;
|
||||
use syntax_pos::{self, Span};
|
||||
use tokenstream::{TokenStream, TokenTree};
|
||||
use tokenstream;
|
||||
|
||||
use std::cell::Cell;
|
||||
use std::{cmp, fmt};
|
||||
@ -421,6 +426,59 @@ impl Token {
|
||||
pub fn is_reserved_ident(&self) -> bool {
|
||||
self.is_special_ident() || self.is_used_keyword() || self.is_unused_keyword()
|
||||
}
|
||||
|
||||
pub fn interpolated_to_tokenstream(&self, sess: &ParseSess, span: Span)
|
||||
-> TokenStream
|
||||
{
|
||||
let nt = match *self {
|
||||
Token::Interpolated(ref nt) => nt,
|
||||
_ => panic!("only works on interpolated tokens"),
|
||||
};
|
||||
|
||||
// An `Interpolated` token means that we have a `Nonterminal`
|
||||
// which is often a parsed AST item. At this point we now need
|
||||
// to convert the parsed AST to an actual token stream, e.g.
|
||||
// un-parse it basically.
|
||||
//
|
||||
// Unfortunately there's not really a great way to do that in a
|
||||
// guaranteed lossless fashion right now. The fallback here is
|
||||
// to just stringify the AST node and reparse it, but this loses
|
||||
// all span information.
|
||||
//
|
||||
// As a result, some AST nodes are annotated with the token
|
||||
// stream they came from. Attempt to extract these lossless
|
||||
// token streams before we fall back to the stringification.
|
||||
let mut tokens = None;
|
||||
|
||||
match nt.0 {
|
||||
Nonterminal::NtItem(ref item) => {
|
||||
tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
|
||||
}
|
||||
Nonterminal::NtTraitItem(ref item) => {
|
||||
tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
|
||||
}
|
||||
Nonterminal::NtImplItem(ref item) => {
|
||||
tokens = prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span);
|
||||
}
|
||||
Nonterminal::NtIdent(ident) => {
|
||||
let token = Token::Ident(ident.node);
|
||||
tokens = Some(TokenTree::Token(ident.span, token).into());
|
||||
}
|
||||
Nonterminal::NtTT(ref tt) => {
|
||||
tokens = Some(tt.clone().into());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
tokens.unwrap_or_else(|| {
|
||||
nt.1.force(|| {
|
||||
// FIXME(jseyfried): Avoid this pretty-print + reparse hack
|
||||
let name = "<macro expansion>".to_owned();
|
||||
let source = pprust::token_to_string(self);
|
||||
parse_stream_from_source_str(name, source, sess, Some(span))
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, RustcEncodable, RustcDecodable, PartialEq, Eq, Hash)]
|
||||
@ -533,3 +591,30 @@ impl Decodable for LazyTokenStream {
|
||||
impl ::std::hash::Hash for LazyTokenStream {
|
||||
fn hash<H: ::std::hash::Hasher>(&self, _hasher: &mut H) {}
|
||||
}
|
||||
|
||||
fn prepend_attrs(sess: &ParseSess,
|
||||
attrs: &[ast::Attribute],
|
||||
tokens: Option<&tokenstream::TokenStream>,
|
||||
span: syntax_pos::Span)
|
||||
-> Option<tokenstream::TokenStream>
|
||||
{
|
||||
let tokens = match tokens {
|
||||
Some(tokens) => tokens,
|
||||
None => return None,
|
||||
};
|
||||
if attrs.len() == 0 {
|
||||
return Some(tokens.clone())
|
||||
}
|
||||
let mut builder = tokenstream::TokenStreamBuilder::new();
|
||||
for attr in attrs {
|
||||
assert_eq!(attr.style, ast::AttrStyle::Outer,
|
||||
"inner attributes should prevent cached tokens from existing");
|
||||
// FIXME: Avoid this pretty-print + reparse hack as bove
|
||||
let name = "<macro expansion>".to_owned();
|
||||
let source = pprust::attr_to_string(attr);
|
||||
let stream = parse_stream_from_source_str(name, source, sess, Some(span));
|
||||
builder.push(stream);
|
||||
}
|
||||
builder.push(tokens.clone());
|
||||
Some(builder.build())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user