Auto merge of #103812 - clubby789:improve-include-bytes, r=petrochenkov

Delay `include_bytes` to AST lowering

Hopefully addresses #65818.
This PR introduces a new `ExprKind::IncludedBytes` which stores the path and bytes of a file included with `include_bytes!()`. We can then create a literal from the bytes during AST lowering, which means we don't need to escape the bytes into valid UTF8 which is the cause of most of the overhead of embedding large binary blobs.
This commit is contained in:
bors 2022-11-12 14:30:34 +00:00
commit 8ef2485bd5
19 changed files with 78 additions and 15 deletions

View File

@ -1208,7 +1208,7 @@ impl Expr {
ExprKind::Tup(_) => ExprPrecedence::Tup,
ExprKind::Binary(op, ..) => ExprPrecedence::Binary(op.node),
ExprKind::Unary(..) => ExprPrecedence::Unary,
ExprKind::Lit(_) => ExprPrecedence::Lit,
ExprKind::Lit(_) | ExprKind::IncludedBytes(..) => ExprPrecedence::Lit,
ExprKind::Type(..) | ExprKind::Cast(..) => ExprPrecedence::Cast,
ExprKind::Let(..) => ExprPrecedence::Let,
ExprKind::If(..) => ExprPrecedence::If,
@ -1446,6 +1446,12 @@ pub enum ExprKind {
/// with an optional value to be returned.
Yeet(Option<P<Expr>>),
/// Bytes included via `include_bytes!`
/// Added for optimization purposes to avoid the need to escape
/// large binary blobs - should always behave like [`ExprKind::Lit`]
/// with a `ByteStr` literal.
IncludedBytes(Lrc<[u8]>),
/// Placeholder for an expression that wasn't syntactically well formed in some way.
Err,
}

View File

@ -1428,7 +1428,7 @@ pub fn noop_visit_expr<T: MutVisitor>(
}
ExprKind::Try(expr) => vis.visit_expr(expr),
ExprKind::TryBlock(body) => vis.visit_block(body),
ExprKind::Lit(_) | ExprKind::Err => {}
ExprKind::Lit(_) | ExprKind::IncludedBytes(..) | ExprKind::Err => {}
}
vis.visit_id(id);
vis.visit_span(span);

View File

@ -2,6 +2,7 @@
use crate::ast::{self, Lit, LitKind};
use crate::token::{self, Token};
use rustc_data_structures::sync::Lrc;
use rustc_lexer::unescape::{byte_from_char, unescape_byte, unescape_char, unescape_literal, Mode};
use rustc_span::symbol::{kw, sym, Symbol};
use rustc_span::Span;
@ -231,6 +232,13 @@ impl Lit {
Lit { token_lit: kind.to_token_lit(), kind, span }
}
/// Recovers an AST literal from a string of bytes produced by `include_bytes!`.
/// This requires ASCII-escaping the string, which can result in poor performance
/// for very large strings of bytes.
pub fn from_included_bytes(bytes: &Lrc<[u8]>, span: Span) -> Lit {
Self::from_lit_kind(LitKind::ByteStr(bytes.clone()), span)
}
/// Losslessly convert an AST literal into a token.
pub fn to_token(&self) -> Token {
let kind = match self.token_lit.kind {

View File

@ -901,7 +901,7 @@ pub fn walk_expr<'a, V: Visitor<'a>>(visitor: &mut V, expression: &'a Expr) {
}
ExprKind::Try(ref subexpression) => visitor.visit_expr(subexpression),
ExprKind::TryBlock(ref body) => visitor.visit_block(body),
ExprKind::Lit(_) | ExprKind::Err => {}
ExprKind::Lit(_) | ExprKind::IncludedBytes(..) | ExprKind::Err => {}
}
visitor.visit_expr_post(expression)

View File

@ -87,6 +87,10 @@ impl<'hir> LoweringContext<'_, 'hir> {
ExprKind::Lit(ref l) => {
hir::ExprKind::Lit(respan(self.lower_span(l.span), l.kind.clone()))
}
ExprKind::IncludedBytes(ref bytes) => hir::ExprKind::Lit(respan(
self.lower_span(e.span),
LitKind::ByteStr(bytes.clone()),
)),
ExprKind::Cast(ref expr, ref ty) => {
let expr = self.lower_expr(expr);
let ty =

View File

@ -323,7 +323,10 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
// ```
fn lower_expr_within_pat(&mut self, expr: &Expr, allow_paths: bool) -> &'hir hir::Expr<'hir> {
match expr.kind {
ExprKind::Lit(..) | ExprKind::ConstBlock(..) | ExprKind::Err => {}
ExprKind::Lit(..)
| ExprKind::ConstBlock(..)
| ExprKind::IncludedBytes(..)
| ExprKind::Err => {}
ExprKind::Path(..) if allow_paths => {}
ExprKind::Unary(UnOp::Neg, ref inner) if matches!(inner.kind, ExprKind::Lit(_)) => {}
_ => {

View File

@ -322,6 +322,10 @@ impl<'a> State<'a> {
ast::ExprKind::Lit(ref lit) => {
self.print_literal(lit);
}
ast::ExprKind::IncludedBytes(ref bytes) => {
let lit = ast::Lit::from_included_bytes(bytes, expr.span);
self.print_literal(&lit)
}
ast::ExprKind::Cast(ref expr, ref ty) => {
let prec = AssocOp::As.precedence() as i8;
self.print_expr_maybe_paren(expr, prec);

View File

@ -303,6 +303,7 @@ impl<'cx, 'a> Context<'cx, 'a> {
| ExprKind::Field(_, _)
| ExprKind::ForLoop(_, _, _, _)
| ExprKind::If(_, _, _)
| ExprKind::IncludedBytes(..)
| ExprKind::InlineAsm(_)
| ExprKind::Let(_, _, _)
| ExprKind::Lit(_)

View File

@ -43,6 +43,9 @@ pub fn expand_concat(
has_errors = true;
}
},
ast::ExprKind::IncludedBytes(..) => {
cx.span_err(e.span, "cannot concatenate a byte string literal")
}
ast::ExprKind::Err => {
has_errors = true;
}

View File

@ -108,6 +108,16 @@ fn handle_array_element(
None
}
},
ast::ExprKind::IncludedBytes(..) => {
if !*has_errors {
cx.struct_span_err(expr.span, "cannot concatenate doubly nested array")
.note("byte strings are treated as arrays of bytes")
.help("try flattening the array")
.emit();
}
*has_errors = true;
None
}
_ => {
missing_literals.push(expr.span);
None
@ -167,6 +177,9 @@ pub fn expand_concat_bytes(
has_errors = true;
}
},
ast::ExprKind::IncludedBytes(ref bytes) => {
accumulator.extend_from_slice(bytes);
}
ast::ExprKind::Err => {
has_errors = true;
}

View File

@ -216,7 +216,10 @@ pub fn expand_include_bytes(
}
};
match cx.source_map().load_binary_file(&file) {
Ok(bytes) => base::MacEager::expr(cx.expr_byte_str(sp, bytes)),
Ok(bytes) => {
let expr = cx.expr(sp, ast::ExprKind::IncludedBytes(bytes.into()));
base::MacEager::expr(expr)
}
Err(e) => {
cx.span_err(sp, &format!("couldn't read {}: {}", file.display(), e));
DummyResult::any(sp)

View File

@ -525,6 +525,13 @@ impl server::TokenStream for Rustc<'_, '_> {
ast::ExprKind::Lit(l) => {
Ok(tokenstream::TokenStream::token_alone(token::Literal(l.token_lit), l.span))
}
ast::ExprKind::IncludedBytes(bytes) => {
let lit = ast::Lit::from_included_bytes(bytes, expr.span);
Ok(tokenstream::TokenStream::token_alone(
token::TokenKind::Literal(lit.token_lit),
expr.span,
))
}
ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind {
ast::ExprKind::Lit(l) => match l.token_lit {
token::Lit { kind: token::Integer | token::Float, .. } => {

View File

@ -631,7 +631,9 @@ impl<'a> Parser<'a> {
/// - A single-segment path.
pub(super) fn expr_is_valid_const_arg(&self, expr: &P<rustc_ast::Expr>) -> bool {
match &expr.kind {
ast::ExprKind::Block(_, _) | ast::ExprKind::Lit(_) => true,
ast::ExprKind::Block(_, _)
| ast::ExprKind::Lit(_)
| ast::ExprKind::IncludedBytes(..) => true,
ast::ExprKind::Unary(ast::UnOp::Neg, expr) => {
matches!(expr.kind, ast::ExprKind::Lit(_))
}

View File

@ -560,13 +560,14 @@ impl<'v> ast_visit::Visitor<'v> for StatCollector<'v> {
}
fn visit_expr(&mut self, e: &'v ast::Expr) {
#[rustfmt::skip]
record_variants!(
(self, e, e.kind, Id::None, ast, Expr, ExprKind),
[
Box, Array, ConstBlock, Call, MethodCall, Tup, Binary, Unary, Lit, Cast, Type, Let,
If, While, ForLoop, Loop, Match, Closure, Block, Async, Await, TryBlock, Assign,
AssignOp, Field, Index, Range, Underscore, Path, AddrOf, Break, Continue, Ret,
InlineAsm, MacCall, Struct, Repeat, Paren, Try, Yield, Yeet, Err
InlineAsm, MacCall, Struct, Repeat, Paren, Try, Yield, Yeet, IncludedBytes, Err
]
);
ast_visit::walk_expr(self, e)

View File

@ -1,5 +1,5 @@
// aux-build:expand-expr.rs
#![feature(concat_bytes)]
extern crate expand_expr;
use expand_expr::{
@ -23,6 +23,11 @@ expand_expr_is!(
concat!("contents: ", include_str!("auxiliary/included-file.txt"))
);
expand_expr_is!(
b"contents: Included file contents\n",
concat_bytes!(b"contents: ", include_bytes!("auxiliary/included-file.txt"))
);
// Correct value is checked for multiple sources.
check_expand_expr_file!(file!());

View File

@ -1,29 +1,29 @@
error: expected one of `.`, `?`, or an operator, found `;`
--> $DIR/expand-expr.rs:101:27
--> $DIR/expand-expr.rs:106:27
|
LL | expand_expr_fail!("string"; hello);
| ^ expected one of `.`, `?`, or an operator
error: expected expression, found `$`
--> $DIR/expand-expr.rs:104:19
--> $DIR/expand-expr.rs:109:19
|
LL | expand_expr_fail!($);
| ^ expected expression
error: expected expression, found `$`
--> $DIR/expand-expr.rs:33:23
--> $DIR/expand-expr.rs:38:23
|
LL | ($($t:tt)*) => { $($t)* };
| ^^^^ expected expression
error: expected expression, found `$`
--> $DIR/expand-expr.rs:106:28
--> $DIR/expand-expr.rs:111:28
|
LL | expand_expr_fail!(echo_pm!($));
| ^ expected expression
error: macro expansion ignores token `hello` and any following
--> $DIR/expand-expr.rs:110:47
--> $DIR/expand-expr.rs:115:47
|
LL | expand_expr_is!("string", echo_tts!("string"; hello));
| --------------------^^^^^-- help: you might be missing a semicolon here: `;`
@ -33,7 +33,7 @@ LL | expand_expr_is!("string", echo_tts!("string"; hello));
= note: the usage of `echo_tts!` is likely invalid in expression context
error: macro expansion ignores token `;` and any following
--> $DIR/expand-expr.rs:111:44
--> $DIR/expand-expr.rs:116:44
|
LL | expand_expr_is!("string", echo_pm!("string"; hello));
| -----------------^-------- help: you might be missing a semicolon here: `;`
@ -43,7 +43,7 @@ LL | expand_expr_is!("string", echo_pm!("string"; hello));
= note: the usage of `echo_pm!` is likely invalid in expression context
error: recursion limit reached while expanding `recursive_expand!`
--> $DIR/expand-expr.rs:119:16
--> $DIR/expand-expr.rs:124:16
|
LL | const _: u32 = recursive_expand!();
| ^^^^^^^^^^^^^^^^^^^

View File

@ -207,6 +207,7 @@ impl<'a> Sugg<'a> {
| ast::ExprKind::InlineAsm(..)
| ast::ExprKind::ConstBlock(..)
| ast::ExprKind::Lit(..)
| ast::ExprKind::IncludedBytes(..)
| ast::ExprKind::Loop(..)
| ast::ExprKind::MacCall(..)
| ast::ExprKind::MethodCall(..)

View File

@ -399,6 +399,7 @@ pub(crate) fn format_expr(
}
}
ast::ExprKind::Underscore => Some("_".to_owned()),
ast::ExprKind::IncludedBytes(..) => unreachable!(),
ast::ExprKind::Err => None,
};

View File

@ -496,6 +496,7 @@ pub(crate) fn is_block_expr(context: &RewriteContext<'_>, expr: &ast::Expr, repr
| ast::ExprKind::Continue(..)
| ast::ExprKind::Err
| ast::ExprKind::Field(..)
| ast::ExprKind::IncludedBytes(..)
| ast::ExprKind::InlineAsm(..)
| ast::ExprKind::Let(..)
| ast::ExprKind::Path(..)