Add some comments.

This commit is contained in:
Nicholas Nethercote 2024-05-20 12:54:38 +10:00
parent a1b6d46e04
commit 4d513cb4bf
3 changed files with 46 additions and 11 deletions

View File

@ -681,22 +681,40 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
} }
} }
// The easiest way to implement token stream pretty printing would be to
// print each token followed by a single space. But that would produce ugly
// output, so we go to some effort to do better.
//
// First, we track whether each token that appears in source code is
// followed by a space, with `Spacing`, and reproduce that in the output.
// This works well in a lot of cases. E.g. `stringify!(x + y)` produces
// "x + y" and `stringify!(x+y)` produces "x+y".
//
// But this doesn't work for code produced by proc macros (which have no
// original source text representation) nor for code produced by decl
// macros (which are tricky because the whitespace after tokens appearing
// in macro rules isn't always what you want in the produced output). For
// these we mostly use `Spacing::Alone`, which is the conservative choice.
//
// So we have a backup mechanism for when `Spacing::Alone` occurs between a
// pair of tokens: we check if that pair of tokens can obviously go
// together without a space between them. E.g. token `x` followed by token
// `,` is better printed as `x,` than `x ,`. (Even if the original source
// code was `x ,`.)
//
// Finally, we must be careful about changing the output. Token pretty
// printing is used by `stringify!` and `impl Display for
// proc_macro::TokenStream`, and some programs rely on the output having a
// particular form, even though they shouldn't. In particular, some proc
// macros do `format!({stream})` on a token stream and then "parse" the
// output with simple string matching that can't handle whitespace changes.
// E.g. we have seen cases where a proc macro can handle `a :: b` but not
// `a::b`. See #117433 for some examples.
fn print_tts(&mut self, tts: &TokenStream, convert_dollar_crate: bool) { fn print_tts(&mut self, tts: &TokenStream, convert_dollar_crate: bool) {
let mut iter = tts.trees().peekable(); let mut iter = tts.trees().peekable();
while let Some(tt) = iter.next() { while let Some(tt) = iter.next() {
let spacing = self.print_tt(tt, convert_dollar_crate); let spacing = self.print_tt(tt, convert_dollar_crate);
if let Some(next) = iter.peek() { if let Some(next) = iter.peek() {
// Should we print a space after `tt`? There are two guiding
// factors.
// - `spacing` is the more important and accurate one. Most
// tokens have good spacing information, and
// `Joint`/`JointHidden` get used a lot.
// - `space_between` is the backup. Code produced by proc
// macros has worse spacing information, with no
// `JointHidden` usage and too much `Alone` usage, which
// would result in over-spaced output such as
// `( x () , y . z )`. `space_between` avoids some of the
// excess whitespace.
if spacing == Spacing::Alone && space_between(tt, next) { if spacing == Spacing::Alone && space_between(tt, next) {
self.space(); self.space();
} }

View File

@ -68,6 +68,8 @@ pub(crate) enum KleeneOp {
/// `MetaVarExpr` are "first-class" token trees. Useful for parsing macros. /// `MetaVarExpr` are "first-class" token trees. Useful for parsing macros.
#[derive(Debug, PartialEq, Encodable, Decodable)] #[derive(Debug, PartialEq, Encodable, Decodable)]
enum TokenTree { enum TokenTree {
/// A token. Unlike `tokenstream::TokenTree::Token` this lacks a `Spacing`.
/// See the comments about `Spacing` in the `transcribe` function.
Token(Token), Token(Token),
/// A delimited sequence, e.g. `($e:expr)` (RHS) or `{ $e }` (LHS). /// A delimited sequence, e.g. `($e:expr)` (RHS) or `{ $e }` (LHS).
Delimited(DelimSpan, DelimSpacing, Delimited), Delimited(DelimSpan, DelimSpacing, Delimited),

View File

@ -253,8 +253,23 @@ pub(super) fn transcribe<'a>(
mbe::TokenTree::MetaVar(mut sp, mut original_ident) => { mbe::TokenTree::MetaVar(mut sp, mut original_ident) => {
// Find the matched nonterminal from the macro invocation, and use it to replace // Find the matched nonterminal from the macro invocation, and use it to replace
// the meta-var. // the meta-var.
//
// We use `Spacing::Alone` everywhere here, because that's the conservative choice
// and spacing of declarative macros is tricky. E.g. in this macro:
// ```
// macro_rules! idents {
// ($($a:ident,)*) => { stringify!($($a)*) }
// }
// ```
// `$a` has no whitespace after it and will be marked `JointHidden`. If you then
// call `idents!(x,y,z,)`, each of `x`, `y`, and `z` will be marked as `Joint`. So
// if you choose to use `$x`'s spacing or the identifier's spacing, you'll end up
// producing "xyz", which is bad because it effectively merges tokens.
// `Spacing::Alone` is the safer option. Fortunately, `space_between` will avoid
// some of the unnecessary whitespace.
let ident = MacroRulesNormalizedIdent::new(original_ident); let ident = MacroRulesNormalizedIdent::new(original_ident);
if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) { if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) {
// njn: explain the use of alone here
let tt = match cur_matched { let tt = match cur_matched {
MatchedSingle(ParseNtResult::Tt(tt)) => { MatchedSingle(ParseNtResult::Tt(tt)) => {
// `tt`s are emitted into the output stream directly as "raw tokens", // `tt`s are emitted into the output stream directly as "raw tokens",