Auto merge of #119105 - dtolnay:paren, r=WaffleLapkin

Fix parenthesization of subexprs containing statement boundary

This PR fixes a multitude of false negatives and false positives in the AST pretty printer's parenthesis insertion related to statement boundaries — statements which terminate unexpectedly early if there aren't parentheses.

Without this fix, the AST pretty printer (including both `stringify!` and `rustc -Zunpretty=expanded`) is prone to producing output which is not syntactically valid Rust. Invalid output is problematic because it means Rustfmt is unable to parse the output of `cargo expand`, for example, causing friction by forcing someone trying to debug a macro into reading poorly formatted code.

I believe the set of bugs fixed in this PR account for the most prevalent reason that `cargo expand` produces invalid output in real-world usage.

Fixes #98790.

## False negatives

The following is a correct program — `cargo check` succeeds.

```rust
macro_rules! m {
    ($e:expr) => {
        match () { _ => $e }
    };
}

fn main() {
    m!({ 1 } - 1);
}
```

But `rustc -Zunpretty=expanded main.rs` produces output that is invalid Rust syntax, because parenthesization is needed and not being done by the pretty printer.

```rust
fn main() { match () { _ => { 1 } - 1, }; }
```

Piping this expanded code to rustfmt, it fails to parse.

```console
error: unexpected `,` in pattern
 --> <stdin>:1:38
  |
1 | fn main() { match () { _ => { 1 } - 1, }; }
  |                                      ^
  |
help: try adding parentheses to match on a tuple...
  |
1 | fn main() { match () { _ => { 1 } (- 1,) }; }
  |                                   +    +
help: ...or a vertical bar to match on multiple alternatives
  |
1 | fn main() { match () { _ => { 1 } - 1 | }; }
  |                                   ~~~~~
```

Fixed output after this PR:

```rust
fn main() { match () { _ => ({ 1 }) - 1, }; }
```

## False positives

Less problematic, but worth fixing (just like #118726).

```rust
fn main() {
    let _ = match () { _ => 1 } - 1;
}
```

Output of `rustc -Zunpretty=expanded lib.rs` before this PR. There is no reason parentheses would need to be inserted there.

```rust
fn main() { let _ = (match () { _ => 1, }) - 1; }
```

After this PR:

```rust
fn main() { let _ = match () { _ => 1, } - 1; }
```

## Alternatives considered

In this PR I opted to parenthesize only the leading subexpression causing the statement boundary, rather than the entire statement. Example:

```rust
macro_rules! m {
    ($e:expr) => {
        $e
    };
}

fn main() {
    m!(loop { break [1]; }[0] - 1);
}
```

This PR produces the following pretty-printed contents for fn main:

```rust
(loop { break [1]; })[0] - 1;
```

A different equally correct output would be:

```rust
(loop { break [1]; }[0] - 1);
```

I chose the one I did because it is the *only* approach used by handwritten code in the standard library and compiler. There are 4 places where parenthesization is being used to prevent a statement boundary, and in all 4, the developer has chosen to parenthesize the smallest subexpression rather than the whole statement:

b37d43efd9/compiler/rustc_codegen_cranelift/example/alloc_system.rs (L102)

b37d43efd9/compiler/rustc_parse/src/errors.rs (L1021-L1029)

b37d43efd9/library/core/src/future/poll_fn.rs (L151)

b37d43efd9/library/core/src/ops/range.rs (L824-L828)
This commit is contained in:
bors 2023-12-27 21:27:26 +00:00
commit 89e2160c4c
3 changed files with 358 additions and 44 deletions

View File

@ -1096,14 +1096,22 @@ impl<'a> State<'a> {
ast::StmtKind::Item(item) => self.print_item(item),
ast::StmtKind::Expr(expr) => {
self.space_if_not_bol();
self.print_expr_outer_attr_style(expr, false, FixupContext::default());
self.print_expr_outer_attr_style(
expr,
false,
FixupContext { stmt: true, ..FixupContext::default() },
);
if classify::expr_requires_semi_to_be_stmt(expr) {
self.word(";");
}
}
ast::StmtKind::Semi(expr) => {
self.space_if_not_bol();
self.print_expr_outer_attr_style(expr, false, FixupContext::default());
self.print_expr_outer_attr_style(
expr,
false,
FixupContext { stmt: true, ..FixupContext::default() },
);
self.word(";");
}
ast::StmtKind::Empty => {
@ -1155,7 +1163,11 @@ impl<'a> State<'a> {
ast::StmtKind::Expr(expr) if i == blk.stmts.len() - 1 => {
self.maybe_print_comment(st.span.lo());
self.space_if_not_bol();
self.print_expr_outer_attr_style(expr, false, FixupContext::default());
self.print_expr_outer_attr_style(
expr,
false,
FixupContext { stmt: true, ..FixupContext::default() },
);
self.maybe_print_trailing_comment(expr.span, Some(blk.span.hi()));
}
_ => self.print_stmt(st),

View File

@ -4,6 +4,7 @@ use ast::ForLoopKind;
use itertools::{Itertools, Position};
use rustc_ast::ptr::P;
use rustc_ast::token;
use rustc_ast::util::classify;
use rustc_ast::util::literal::escape_byte_str_symbol;
use rustc_ast::util::parser::{self, AssocOp, Fixity};
use rustc_ast::{self as ast, BlockCheckMode};
@ -15,6 +16,61 @@ use std::fmt::Write;
#[derive(Copy, Clone, Debug)]
pub(crate) struct FixupContext {
/// Print expression such that it can be parsed back as a statement
/// consisting of the original expression.
///
/// The effect of this is for binary operators in statement position to set
/// `leftmost_subexpression_in_stmt` when printing their left-hand operand.
///
/// ```ignore (illustrative)
/// (match x {}) - 1; // match needs parens when LHS of binary operator
///
/// match x {}; // not when its own statement
/// ```
pub stmt: bool,
/// This is the difference between:
///
/// ```ignore (illustrative)
/// (match x {}) - 1; // subexpression needs parens
///
/// let _ = match x {} - 1; // no parens
/// ```
///
/// There are 3 distinguishable contexts in which `print_expr` might be
/// called with the expression `$match` as its argument, where `$match`
/// represents an expression of kind `ExprKind::Match`:
///
/// - stmt=false leftmost_subexpression_in_stmt=false
///
/// Example: `let _ = $match - 1;`
///
/// No parentheses required.
///
/// - stmt=false leftmost_subexpression_in_stmt=true
///
/// Example: `$match - 1;`
///
/// Must parenthesize `($match)`, otherwise parsing back the output as a
/// statement would terminate the statement after the closing brace of
/// the match, parsing `-1;` as a separate statement.
///
/// - stmt=true leftmost_subexpression_in_stmt=false
///
/// Example: `$match;`
///
/// No parentheses required.
pub leftmost_subexpression_in_stmt: bool,
/// This is the difference between:
///
/// ```ignore (illustrative)
/// if let _ = (Struct {}) {} // needs parens
///
/// match () {
/// () if let _ = Struct {} => {} // no parens
/// }
/// ```
pub parenthesize_exterior_struct_lit: bool,
}
@ -22,7 +78,11 @@ pub(crate) struct FixupContext {
/// in a targetted fashion where needed.
impl Default for FixupContext {
fn default() -> Self {
FixupContext { parenthesize_exterior_struct_lit: false }
FixupContext {
stmt: false,
leftmost_subexpression_in_stmt: false,
parenthesize_exterior_struct_lit: false,
}
}
}
@ -76,7 +136,8 @@ impl<'a> State<'a> {
/// Prints an expr using syntax that's acceptable in a condition position, such as the `cond` in
/// `if cond { ... }`.
fn print_expr_as_cond(&mut self, expr: &ast::Expr) {
let fixup = FixupContext { parenthesize_exterior_struct_lit: true };
let fixup =
FixupContext { parenthesize_exterior_struct_lit: true, ..FixupContext::default() };
self.print_expr_cond_paren(expr, Self::cond_needs_par(expr), fixup)
}
@ -99,26 +160,25 @@ impl<'a> State<'a> {
&mut self,
expr: &ast::Expr,
needs_par: bool,
fixup: FixupContext,
mut fixup: FixupContext,
) {
if needs_par {
self.popen();
// If we are surrounding the whole cond in parentheses, such as:
//
// if (return Struct {}) {}
//
// then there is no need for parenthesizing the individual struct
// expressions within. On the other hand if the whole cond is not
// parenthesized, then print_expr must parenthesize exterior struct
// literals.
//
// if x == (Struct {}) {}
//
fixup = FixupContext::default();
}
// If we are surrounding the whole cond in parentheses, such as:
//
// if (return Struct {}) {}
//
// then there is no need for parenthesizing the individual struct
// expressions within. On the other hand if the whole cond is not
// parenthesized, then print_expr must parenthesize exterior struct
// literals.
//
// if x == (Struct {}) {}
//
let fixup = FixupContext {
parenthesize_exterior_struct_lit: fixup.parenthesize_exterior_struct_lit && !needs_par,
};
self.print_expr(expr, fixup);
if needs_par {
@ -234,7 +294,32 @@ impl<'a> State<'a> {
_ => parser::PREC_POSTFIX,
};
self.print_expr_maybe_paren(func, prec, fixup);
// Independent of parenthesization related to precedence, we must
// parenthesize `func` if this is a statement context in which without
// parentheses, a statement boundary would occur inside `func` or
// immediately after `func`.
//
// Suppose `func` represents `match () { _ => f }`. We must produce:
//
// (match () { _ => f })();
//
// instead of:
//
// match () { _ => f } ();
//
// because the latter is valid syntax but with the incorrect meaning.
// It's a match-expression followed by tuple-expression, not a function
// call.
self.print_expr_maybe_paren(
func,
prec,
FixupContext {
stmt: false,
leftmost_subexpression_in_stmt: fixup.stmt || fixup.leftmost_subexpression_in_stmt,
..fixup
},
);
self.print_call_post(args)
}
@ -245,7 +330,17 @@ impl<'a> State<'a> {
base_args: &[P<ast::Expr>],
fixup: FixupContext,
) {
// Unlike in `print_expr_call`, no change to fixup here because
// statement boundaries never occur in front of a `.` (or `?`) token.
//
// match () { _ => f }.method();
//
// Parenthesizing only for precedence and not with regard to statement
// boundaries, `$receiver.method()` can be parsed back as a statement
// containing an expression if and only if `$receiver` can be parsed as
// a statement containing an expression.
self.print_expr_maybe_paren(receiver, parser::PREC_POSTFIX, fixup);
self.word(".");
self.print_ident(segment.ident);
if let Some(args) = &segment.args {
@ -289,22 +384,36 @@ impl<'a> State<'a> {
(&ast::ExprKind::Let { .. }, _) if !parser::needs_par_as_let_scrutinee(prec) => {
parser::PREC_FORCE_PAREN
}
// For a binary expression like `(match () { _ => a }) OP b`, the parens are required
// otherwise the parser would interpret `match () { _ => a }` as a statement,
// with the remaining `OP b` not making sense. So we force parens.
(&ast::ExprKind::Match(..), _) => parser::PREC_FORCE_PAREN,
_ => left_prec,
};
self.print_expr_maybe_paren(lhs, left_prec, fixup);
self.print_expr_maybe_paren(
lhs,
left_prec,
FixupContext {
stmt: false,
leftmost_subexpression_in_stmt: fixup.stmt || fixup.leftmost_subexpression_in_stmt,
..fixup
},
);
self.space();
self.word_space(op.node.as_str());
self.print_expr_maybe_paren(rhs, right_prec, fixup)
self.print_expr_maybe_paren(
rhs,
right_prec,
FixupContext { stmt: false, leftmost_subexpression_in_stmt: false, ..fixup },
);
}
fn print_expr_unary(&mut self, op: ast::UnOp, expr: &ast::Expr, fixup: FixupContext) {
self.word(op.as_str());
self.print_expr_maybe_paren(expr, parser::PREC_PREFIX, fixup)
self.print_expr_maybe_paren(
expr,
parser::PREC_PREFIX,
FixupContext { stmt: false, leftmost_subexpression_in_stmt: false, ..fixup },
);
}
fn print_expr_addr_of(
@ -322,7 +431,11 @@ impl<'a> State<'a> {
self.print_mutability(mutability, true);
}
}
self.print_expr_maybe_paren(expr, parser::PREC_PREFIX, fixup)
self.print_expr_maybe_paren(
expr,
parser::PREC_PREFIX,
FixupContext { stmt: false, leftmost_subexpression_in_stmt: false, ..fixup },
);
}
pub(super) fn print_expr(&mut self, expr: &ast::Expr, fixup: FixupContext) {
@ -333,7 +446,7 @@ impl<'a> State<'a> {
&mut self,
expr: &ast::Expr,
is_inline: bool,
fixup: FixupContext,
mut fixup: FixupContext,
) {
self.maybe_print_comment(expr.span.lo());
@ -345,7 +458,27 @@ impl<'a> State<'a> {
}
self.ibox(INDENT_UNIT);
// The Match subexpression in `match x {} - 1` must be parenthesized if
// it is the leftmost subexpression in a statement:
//
// (match x {}) - 1;
//
// But not otherwise:
//
// let _ = match x {} - 1;
//
// Same applies to a small set of other expression kinds which eagerly
// terminate a statement which opens with them.
let needs_par =
fixup.leftmost_subexpression_in_stmt && !classify::expr_requires_semi_to_be_stmt(expr);
if needs_par {
self.popen();
fixup = FixupContext::default();
}
self.ann.pre(self, AnnNode::Expr(expr));
match &expr.kind {
ast::ExprKind::Array(exprs) => {
self.print_expr_vec(exprs);
@ -386,7 +519,16 @@ impl<'a> State<'a> {
}
ast::ExprKind::Cast(expr, ty) => {
let prec = AssocOp::As.precedence() as i8;
self.print_expr_maybe_paren(expr, prec, fixup);
self.print_expr_maybe_paren(
expr,
prec,
FixupContext {
stmt: false,
leftmost_subexpression_in_stmt: fixup.stmt
|| fixup.leftmost_subexpression_in_stmt,
..fixup
},
);
self.space();
self.word_space("as");
self.print_type(ty);
@ -508,31 +650,71 @@ impl<'a> State<'a> {
self.print_block_with_attrs(blk, attrs);
}
ast::ExprKind::Await(expr, _) => {
// Same fixups as ExprKind::MethodCall.
self.print_expr_maybe_paren(expr, parser::PREC_POSTFIX, fixup);
self.word(".await");
}
ast::ExprKind::Assign(lhs, rhs, _) => {
// Same fixups as ExprKind::Binary.
let prec = AssocOp::Assign.precedence() as i8;
self.print_expr_maybe_paren(lhs, prec + 1, fixup);
self.print_expr_maybe_paren(
lhs,
prec + 1,
FixupContext {
stmt: false,
leftmost_subexpression_in_stmt: fixup.stmt
|| fixup.leftmost_subexpression_in_stmt,
..fixup
},
);
self.space();
self.word_space("=");
self.print_expr_maybe_paren(rhs, prec, fixup);
self.print_expr_maybe_paren(
rhs,
prec,
FixupContext { stmt: false, leftmost_subexpression_in_stmt: false, ..fixup },
);
}
ast::ExprKind::AssignOp(op, lhs, rhs) => {
// Same fixups as ExprKind::Binary.
let prec = AssocOp::Assign.precedence() as i8;
self.print_expr_maybe_paren(lhs, prec + 1, fixup);
self.print_expr_maybe_paren(
lhs,
prec + 1,
FixupContext {
stmt: false,
leftmost_subexpression_in_stmt: fixup.stmt
|| fixup.leftmost_subexpression_in_stmt,
..fixup
},
);
self.space();
self.word(op.node.as_str());
self.word_space("=");
self.print_expr_maybe_paren(rhs, prec, fixup);
self.print_expr_maybe_paren(
rhs,
prec,
FixupContext { stmt: false, leftmost_subexpression_in_stmt: false, ..fixup },
);
}
ast::ExprKind::Field(expr, ident) => {
// Same fixups as ExprKind::MethodCall.
self.print_expr_maybe_paren(expr, parser::PREC_POSTFIX, fixup);
self.word(".");
self.print_ident(*ident);
}
ast::ExprKind::Index(expr, index, _) => {
self.print_expr_maybe_paren(expr, parser::PREC_POSTFIX, fixup);
// Same fixups as ExprKind::Call.
self.print_expr_maybe_paren(
expr,
parser::PREC_POSTFIX,
FixupContext {
stmt: false,
leftmost_subexpression_in_stmt: fixup.stmt
|| fixup.leftmost_subexpression_in_stmt,
..fixup
},
);
self.word("[");
self.print_expr(index, FixupContext::default());
self.word("]");
@ -544,14 +726,31 @@ impl<'a> State<'a> {
// a "normal" binop gets parenthesized. (`LOr` is the lowest-precedence binop.)
let fake_prec = AssocOp::LOr.precedence() as i8;
if let Some(e) = start {
self.print_expr_maybe_paren(e, fake_prec, fixup);
self.print_expr_maybe_paren(
e,
fake_prec,
FixupContext {
stmt: false,
leftmost_subexpression_in_stmt: fixup.stmt
|| fixup.leftmost_subexpression_in_stmt,
..fixup
},
);
}
match limits {
ast::RangeLimits::HalfOpen => self.word(".."),
ast::RangeLimits::Closed => self.word("..="),
}
if let Some(e) = end {
self.print_expr_maybe_paren(e, fake_prec, fixup);
self.print_expr_maybe_paren(
e,
fake_prec,
FixupContext {
stmt: false,
leftmost_subexpression_in_stmt: false,
..fixup
},
);
}
}
ast::ExprKind::Underscore => self.word("_"),
@ -565,7 +764,15 @@ impl<'a> State<'a> {
}
if let Some(expr) = opt_expr {
self.space();
self.print_expr_maybe_paren(expr, parser::PREC_JUMP, fixup);
self.print_expr_maybe_paren(
expr,
parser::PREC_JUMP,
FixupContext {
stmt: false,
leftmost_subexpression_in_stmt: false,
..fixup
},
);
}
}
ast::ExprKind::Continue(opt_label) => {
@ -579,7 +786,15 @@ impl<'a> State<'a> {
self.word("return");
if let Some(expr) = result {
self.word(" ");
self.print_expr_maybe_paren(expr, parser::PREC_JUMP, fixup);
self.print_expr_maybe_paren(
expr,
parser::PREC_JUMP,
FixupContext {
stmt: false,
leftmost_subexpression_in_stmt: false,
..fixup
},
);
}
}
ast::ExprKind::Yeet(result) => {
@ -588,13 +803,25 @@ impl<'a> State<'a> {
self.word("yeet");
if let Some(expr) = result {
self.word(" ");
self.print_expr_maybe_paren(expr, parser::PREC_JUMP, fixup);
self.print_expr_maybe_paren(
expr,
parser::PREC_JUMP,
FixupContext {
stmt: false,
leftmost_subexpression_in_stmt: false,
..fixup
},
);
}
}
ast::ExprKind::Become(result) => {
self.word("become");
self.word(" ");
self.print_expr_maybe_paren(result, parser::PREC_JUMP, fixup);
self.print_expr_maybe_paren(
result,
parser::PREC_JUMP,
FixupContext { stmt: false, leftmost_subexpression_in_stmt: false, ..fixup },
);
}
ast::ExprKind::InlineAsm(a) => {
// FIXME: This should have its own syntax, distinct from a macro invocation.
@ -644,10 +871,19 @@ impl<'a> State<'a> {
if let Some(expr) = e {
self.space();
self.print_expr_maybe_paren(expr, parser::PREC_JUMP, fixup);
self.print_expr_maybe_paren(
expr,
parser::PREC_JUMP,
FixupContext {
stmt: false,
leftmost_subexpression_in_stmt: false,
..fixup
},
);
}
}
ast::ExprKind::Try(e) => {
// Same fixups as ExprKind::MethodCall.
self.print_expr_maybe_paren(e, parser::PREC_POSTFIX, fixup);
self.word("?")
}
@ -663,7 +899,13 @@ impl<'a> State<'a> {
self.pclose()
}
}
self.ann.post(self, AnnNode::Expr(expr));
if needs_par {
self.pclose();
}
self.end();
}
@ -704,7 +946,7 @@ impl<'a> State<'a> {
}
_ => {
self.end(); // Close the ibox for the pattern.
self.print_expr(body, FixupContext::default());
self.print_expr(body, FixupContext { stmt: true, ..FixupContext::default() });
self.word(",");
}
}

View File

@ -204,6 +204,16 @@ fn test_expr() {
} ],
"match self { Ok => 1, Err => 0, }"
);
macro_rules! c2_match_arm {
([ $expr:expr ], $expr_expected:expr, $tokens_expected:expr $(,)?) => {
c2!(expr, [ match () { _ => $expr } ], $expr_expected, $tokens_expected);
};
}
c2_match_arm!(
[ { 1 } - 1 ],
"match () { _ => ({ 1 }) - 1, }",
"match() { _ => { 1 } - 1 }",
);
// ExprKind::Closure
c1!(expr, [ || {} ], "|| {}");
@ -651,6 +661,16 @@ fn test_stmt() {
"let (a, b): (u32, u32) = (1, 2);",
"let(a, b): (u32, u32) = (1, 2)" // FIXME
);
macro_rules! c2_let_expr_minus_one {
([ $expr:expr ], $stmt_expected:expr, $tokens_expected:expr $(,)?) => {
c2!(stmt, [ let _ = $expr - 1 ], $stmt_expected, $tokens_expected);
};
}
c2_let_expr_minus_one!(
[ match void {} ],
"let _ = match void {} - 1;",
"let _ = match void {} - 1",
);
// StmtKind::Item
c1!(stmt, [ struct S; ], "struct S;");
@ -661,6 +681,46 @@ fn test_stmt() {
// StmtKind::Semi
c2!(stmt, [ 1 + 1 ], "1 + 1;", "1 + 1");
macro_rules! c2_expr_as_stmt {
// Parse as expr, then reparse as stmt.
//
// The c2_minus_one macro below can't directly call `c2!(stmt, ...)`
// because `$expr - 1` cannot be parsed directly as a stmt. A statement
// boundary occurs after the `match void {}`, after which the `-` token
// hits "no rules expected this token in macro call".
//
// The unwanted statement boundary is exactly why the pretty-printer is
// injecting parentheses around the subexpression, which is the behavior
// we are interested in testing.
([ $expr:expr ], $stmt_expected:expr, $tokens_expected:expr $(,)?) => {
c2!(stmt, [ $expr ], $stmt_expected, $tokens_expected);
};
}
macro_rules! c2_minus_one {
([ $expr:expr ], $stmt_expected:expr, $tokens_expected:expr $(,)?) => {
c2_expr_as_stmt!([ $expr - 1 ], $stmt_expected, $tokens_expected);
};
}
c2_minus_one!(
[ match void {} ],
"(match void {}) - 1;",
"match void {} - 1",
);
c2_minus_one!(
[ match void {}() ],
"(match void {})() - 1;",
"match void {}() - 1",
);
c2_minus_one!(
[ match void {}[0] ],
"(match void {})[0] - 1;",
"match void {}[0] - 1",
);
c2_minus_one!(
[ loop { break 1; } ],
"(loop { break 1; }) - 1;",
"loop { break 1; } - 1",
);
// StmtKind::Empty
c1!(stmt, [ ; ], ";");