internal: more production-ready proc-macro RPC deserialization

* avoid arbitrary nested JSON tree (danger of stack overflow)
* use more compact representation.
This commit is contained in:
Aleksey Kladov 2021-08-28 20:36:41 +03:00
parent 9ea3c4d53b
commit 55e9476e4b
6 changed files with 365 additions and 214 deletions

View File

@ -21,7 +21,9 @@ use tt::{SmolStr, Subtree};
use crate::process::ProcMacroProcessSrv;
pub use rpc::{ExpansionResult, ExpansionTask, ListMacrosResult, ListMacrosTask, ProcMacroKind};
pub use rpc::{
ExpansionResult, ExpansionTask, flat::FlatTree, ListMacrosResult, ListMacrosTask, ProcMacroKind,
};
pub use version::{read_dylib_info, RustCInfo};
#[derive(Debug, Clone)]
@ -58,9 +60,9 @@ impl ProcMacroProcessExpander {
env: Vec<(String, String)>,
) -> Result<Subtree, tt::ExpansionError> {
let task = ExpansionTask {
macro_body: subtree.clone(),
macro_body: FlatTree::new(subtree),
macro_name: self.name.to_string(),
attributes: attr.cloned(),
attributes: attr.map(FlatTree::new),
lib: self.dylib_path.to_path_buf(),
env,
};
@ -70,7 +72,7 @@ impl ProcMacroProcessExpander {
.lock()
.unwrap_or_else(|e| e.into_inner())
.send_task(msg::Request::ExpansionMacro(task))?;
Ok(result.expansion)
Ok(result.expansion.to_subtree())
}
}

View File

@ -12,13 +12,13 @@ use crate::{
ExpansionResult, ExpansionTask,
};
#[derive(Debug, Serialize, Deserialize, Clone)]
#[derive(Debug, Serialize, Deserialize)]
pub enum Request {
ListMacro(ListMacrosTask),
ExpansionMacro(ExpansionTask),
}
#[derive(Debug, Serialize, Deserialize, Clone)]
#[derive(Debug, Serialize, Deserialize)]
pub enum Response {
Error(ResponseError),
ListMacro(ListMacrosResult),

View File

@ -5,13 +5,12 @@
//! Although adding `Serialize` and `Deserialize` traits to `tt` directly seems
//! to be much easier, we deliberately duplicate `tt` structs with `#[serde(with = "XXDef")]`
//! for separation of code responsibility.
pub(crate) mod flat;
use paths::AbsPathBuf;
use serde::{Deserialize, Serialize};
use tt::{
Delimiter, DelimiterKind, Ident, Leaf, Literal, Punct, SmolStr, Spacing, Subtree, TokenId,
TokenTree,
};
use crate::rpc::flat::FlatTree;
#[derive(Clone, Eq, PartialEq, Debug, Serialize, Deserialize)]
pub struct ListMacrosTask {
@ -30,14 +29,13 @@ pub struct ListMacrosResult {
pub macros: Vec<(String, ProcMacroKind)>,
}
#[derive(Clone, Eq, PartialEq, Debug, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize)]
pub struct ExpansionTask {
/// Argument of macro call.
///
/// In custom derive this will be a struct or enum; in attribute-like macro - underlying
/// item; in function-like macro - the macro body.
#[serde(with = "SubtreeDef")]
pub macro_body: Subtree,
pub macro_body: FlatTree,
/// Name of macro to expand.
///
@ -46,8 +44,7 @@ pub struct ExpansionTask {
pub macro_name: String,
/// Possible attributes for the attribute-like macros.
#[serde(with = "opt_subtree_def")]
pub attributes: Option<Subtree>,
pub attributes: Option<FlatTree>,
pub lib: AbsPathBuf,
@ -55,199 +52,15 @@ pub struct ExpansionTask {
pub env: Vec<(String, String)>,
}
#[derive(Clone, Eq, PartialEq, Debug, Default, Serialize, Deserialize)]
#[derive(Debug, Serialize, Deserialize)]
pub struct ExpansionResult {
#[serde(with = "SubtreeDef")]
pub expansion: Subtree,
}
#[derive(Serialize, Deserialize)]
#[serde(remote = "DelimiterKind")]
enum DelimiterKindDef {
Parenthesis,
Brace,
Bracket,
}
#[derive(Serialize, Deserialize)]
#[serde(remote = "TokenId")]
struct TokenIdDef(u32);
#[derive(Serialize, Deserialize)]
#[serde(remote = "Delimiter")]
struct DelimiterDef {
#[serde(
with = "TokenIdDef",
default = "tt::TokenId::unspecified",
skip_serializing_if = "token_id_def::skip_if"
)]
id: TokenId,
#[serde(with = "DelimiterKindDef")]
kind: DelimiterKind,
}
#[derive(Serialize, Deserialize)]
#[serde(remote = "Subtree")]
struct SubtreeDef {
#[serde(default, with = "opt_delimiter_def")]
delimiter: Option<Delimiter>,
#[serde(with = "vec_token_tree")]
token_trees: Vec<TokenTree>,
}
#[derive(Serialize, Deserialize)]
#[serde(remote = "TokenTree")]
enum TokenTreeDef {
#[serde(with = "LeafDef")]
Leaf(Leaf),
#[serde(with = "SubtreeDef")]
Subtree(Subtree),
}
#[derive(Serialize, Deserialize)]
#[serde(remote = "Leaf")]
enum LeafDef {
#[serde(with = "LiteralDef")]
Literal(Literal),
#[serde(with = "PunctDef")]
Punct(Punct),
#[serde(with = "IdentDef")]
Ident(Ident),
}
#[derive(Serialize, Deserialize)]
#[serde(remote = "Literal")]
struct LiteralDef {
text: SmolStr,
#[serde(
with = "TokenIdDef",
default = "tt::TokenId::unspecified",
skip_serializing_if = "token_id_def::skip_if"
)]
id: TokenId,
}
#[derive(Serialize, Deserialize)]
#[serde(remote = "Punct")]
struct PunctDef {
char: char,
#[serde(with = "SpacingDef")]
spacing: Spacing,
#[serde(
with = "TokenIdDef",
default = "tt::TokenId::unspecified",
skip_serializing_if = "token_id_def::skip_if"
)]
id: TokenId,
}
#[derive(Serialize, Deserialize)]
#[serde(remote = "Spacing")]
enum SpacingDef {
Alone,
Joint,
}
#[derive(Serialize, Deserialize)]
#[serde(remote = "Ident")]
struct IdentDef {
text: SmolStr,
#[serde(
with = "TokenIdDef",
default = "tt::TokenId::unspecified",
skip_serializing_if = "token_id_def::skip_if"
)]
id: TokenId,
}
mod token_id_def {
pub(super) fn skip_if(value: &tt::TokenId) -> bool {
*value == tt::TokenId::unspecified()
}
}
mod opt_delimiter_def {
use super::{Delimiter, DelimiterDef};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
pub(super) fn serialize<S>(value: &Option<Delimiter>, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
#[derive(Serialize)]
struct Helper<'a>(#[serde(with = "DelimiterDef")] &'a Delimiter);
value.as_ref().map(Helper).serialize(serializer)
}
pub(super) fn deserialize<'de, D>(deserializer: D) -> Result<Option<Delimiter>, D::Error>
where
D: Deserializer<'de>,
{
#[derive(Deserialize)]
struct Helper(#[serde(with = "DelimiterDef")] Delimiter);
let helper = Option::deserialize(deserializer)?;
Ok(helper.map(|Helper(external)| external))
}
}
mod opt_subtree_def {
use super::{Subtree, SubtreeDef};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
pub(super) fn serialize<S>(value: &Option<Subtree>, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
#[derive(Serialize)]
struct Helper<'a>(#[serde(with = "SubtreeDef")] &'a Subtree);
value.as_ref().map(Helper).serialize(serializer)
}
pub(super) fn deserialize<'de, D>(deserializer: D) -> Result<Option<Subtree>, D::Error>
where
D: Deserializer<'de>,
{
#[derive(Deserialize)]
struct Helper(#[serde(with = "SubtreeDef")] Subtree);
let helper = Option::deserialize(deserializer)?;
Ok(helper.map(|Helper(external)| external))
}
}
mod vec_token_tree {
use super::{TokenTree, TokenTreeDef};
use serde::{ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer};
pub(super) fn serialize<S>(value: &[TokenTree], serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
#[derive(Serialize)]
struct Helper<'a>(#[serde(with = "TokenTreeDef")] &'a TokenTree);
let items: Vec<_> = value.iter().map(Helper).collect();
let mut seq = serializer.serialize_seq(Some(items.len()))?;
for element in items {
seq.serialize_element(&element)?;
}
seq.end()
}
pub(super) fn deserialize<'de, D>(deserializer: D) -> Result<Vec<TokenTree>, D::Error>
where
D: Deserializer<'de>,
{
#[derive(Deserialize)]
struct Helper(#[serde(with = "TokenTreeDef")] TokenTree);
let helper = Vec::deserialize(deserializer)?;
Ok(helper.into_iter().map(|Helper(external)| external).collect())
}
pub expansion: FlatTree,
}
#[cfg(test)]
mod tests {
use super::*;
use tt::*;
fn fixture_token_tree() -> Subtree {
let mut subtree = Subtree::default();
@ -257,6 +70,15 @@ mod tests {
subtree
.token_trees
.push(TokenTree::Leaf(Ident { text: "Foo".into(), id: TokenId(1) }.into()));
subtree.token_trees.push(TokenTree::Leaf(Leaf::Literal(Literal {
text: "Foo".into(),
id: TokenId::unspecified(),
})));
subtree.token_trees.push(TokenTree::Leaf(Leaf::Punct(Punct {
char: '@',
id: TokenId::unspecified(),
spacing: Spacing::Joint,
})));
subtree.token_trees.push(TokenTree::Subtree(Subtree {
delimiter: Some(Delimiter { id: TokenId(2), kind: DelimiterKind::Brace }),
token_trees: vec![],
@ -268,7 +90,7 @@ mod tests {
fn test_proc_macro_rpc_works() {
let tt = fixture_token_tree();
let task = ExpansionTask {
macro_body: tt.clone(),
macro_body: FlatTree::new(&tt),
macro_name: Default::default(),
attributes: None,
lib: AbsPathBuf::assert(std::env::current_dir().unwrap()),
@ -276,14 +98,9 @@ mod tests {
};
let json = serde_json::to_string(&task).unwrap();
println!("{}", json);
let back: ExpansionTask = serde_json::from_str(&json).unwrap();
assert_eq!(task.macro_body, back.macro_body);
let result = ExpansionResult { expansion: tt };
let json = serde_json::to_string(&result).unwrap();
let back: ExpansionResult = serde_json::from_str(&json).unwrap();
assert_eq!(result, back);
assert_eq!(tt, back.macro_body.to_subtree());
}
}

View File

@ -0,0 +1,328 @@
//! Serialization-friendly representation of `tt::Subtree`.
//!
//! It is possible to serialize `Subtree` as is, as a tree, but using
//! arbitrary-nested trees in JSON is problematic, as they can cause the JSON
//! parser to overflow the stack.
//!
//! Additionally, such implementation would be pretty verbose, and we do care
//! about performance here a bit.
//!
//! So what this module does is dumping a `tt::Subtree` into a bunch of flat
//! array of numbers. See the test in the parent module to get an example
//! output.
//!
//! ```json
//! {
//! // Array of subtrees, each subtree is represented by 4 numbers:
//! // id of delimiter, delimiter kind, index of first child in `token_tree`,
//! // index of last child in `token_tree`
//! "subtree":[4294967295,0,0,5,2,2,5,5],
//! // 2 ints per literal: [token id, index into `text`]
//! "literal":[4294967295,1],
//! // 3 ints per punct: [token id, char, spacing]
//! "punct":[4294967295,64,1],
//! // 2 ints per ident: [token id, index into `text`]
//! "ident": [0,0,1,1],
//! // children of all subtrees, concatenated. Each child is represented as `index << 2 | tag`
//! // where tag denotes one of subtree, literal, punct or ident.
//! "token_tree":[3,7,1,4],
//! // Strings shared by idents and literals
//! "text": ["struct","Foo"]
//! }
//! ```
//!
//! We probably should replace most of the code here with bincode someday, but,
//! as we don't have bincode in Cargo.toml yet, lets stick with serde_json for
//! the time being.
use std::{
collections::{HashMap, VecDeque},
convert::TryInto,
};
use serde::{Deserialize, Serialize};
use tt::TokenId;
#[derive(Serialize, Deserialize, Debug)]
pub struct FlatTree {
subtree: Vec<u32>,
literal: Vec<u32>,
punct: Vec<u32>,
ident: Vec<u32>,
token_tree: Vec<u32>,
text: Vec<String>,
}
struct SubtreeRepr {
id: tt::TokenId,
kind: Option<tt::DelimiterKind>,
tt: [u32; 2],
}
struct LiteralRepr {
id: tt::TokenId,
text: u32,
}
struct PunctRepr {
id: tt::TokenId,
char: char,
spacing: tt::Spacing,
}
struct IdentRepr {
id: tt::TokenId,
text: u32,
}
impl FlatTree {
pub fn new(subtree: &tt::Subtree) -> FlatTree {
let mut w = Writer {
string_table: HashMap::new(),
work: VecDeque::new(),
subtree: Vec::new(),
literal: Vec::new(),
punct: Vec::new(),
ident: Vec::new(),
token_tree: Vec::new(),
text: Vec::new(),
};
w.write(subtree);
return FlatTree {
subtree: write_vec(w.subtree, SubtreeRepr::write),
literal: write_vec(w.literal, LiteralRepr::write),
punct: write_vec(w.punct, PunctRepr::write),
ident: write_vec(w.ident, IdentRepr::write),
token_tree: w.token_tree,
text: w.text,
};
fn write_vec<T, F: Fn(T) -> [u32; N], const N: usize>(xs: Vec<T>, f: F) -> Vec<u32> {
xs.into_iter().flat_map(f).collect()
}
}
pub fn to_subtree(self) -> tt::Subtree {
return Reader {
subtree: read_vec(self.subtree, SubtreeRepr::read),
literal: read_vec(self.literal, LiteralRepr::read),
punct: read_vec(self.punct, PunctRepr::read),
ident: read_vec(self.ident, IdentRepr::read),
token_tree: self.token_tree,
text: self.text,
}
.read();
fn read_vec<T, F: Fn([u32; N]) -> T, const N: usize>(xs: Vec<u32>, f: F) -> Vec<T> {
let mut chunks = xs.chunks_exact(N);
let res = chunks.by_ref().map(|chunk| f(chunk.try_into().unwrap())).collect();
assert!(chunks.remainder().is_empty());
res
}
}
}
impl SubtreeRepr {
fn write(self) -> [u32; 4] {
let kind = match self.kind {
None => 0,
Some(tt::DelimiterKind::Parenthesis) => 1,
Some(tt::DelimiterKind::Brace) => 2,
Some(tt::DelimiterKind::Bracket) => 3,
};
[self.id.0, kind, self.tt[0], self.tt[1]]
}
fn read([id, kind, lo, len]: [u32; 4]) -> SubtreeRepr {
let kind = match kind {
0 => None,
1 => Some(tt::DelimiterKind::Parenthesis),
2 => Some(tt::DelimiterKind::Brace),
3 => Some(tt::DelimiterKind::Bracket),
other => panic!("bad kind {}", other),
};
SubtreeRepr { id: TokenId(id), kind, tt: [lo, len] }
}
}
impl LiteralRepr {
fn write(self) -> [u32; 2] {
[self.id.0, self.text]
}
fn read([id, text]: [u32; 2]) -> LiteralRepr {
LiteralRepr { id: TokenId(id), text }
}
}
impl PunctRepr {
fn write(self) -> [u32; 3] {
let spacing = match self.spacing {
tt::Spacing::Alone => 0,
tt::Spacing::Joint => 1,
};
[self.id.0, self.char as u32, spacing]
}
fn read([id, char, spacing]: [u32; 3]) -> PunctRepr {
let spacing = match spacing {
0 => tt::Spacing::Alone,
1 => tt::Spacing::Joint,
other => panic!("bad spacing {}", other),
};
PunctRepr { id: TokenId(id), char: char.try_into().unwrap(), spacing }
}
}
impl IdentRepr {
fn write(self) -> [u32; 2] {
[self.id.0, self.text]
}
fn read(data: [u32; 2]) -> IdentRepr {
IdentRepr { id: TokenId(data[0]), text: data[1] }
}
}
struct Writer<'a> {
work: VecDeque<(usize, &'a tt::Subtree)>,
string_table: HashMap<&'a str, u32>,
subtree: Vec<SubtreeRepr>,
literal: Vec<LiteralRepr>,
punct: Vec<PunctRepr>,
ident: Vec<IdentRepr>,
token_tree: Vec<u32>,
text: Vec<String>,
}
impl<'a> Writer<'a> {
fn write(&mut self, root: &'a tt::Subtree) {
self.enqueue(root);
while let Some((idx, subtree)) = self.work.pop_front() {
self.subtree(idx, subtree);
}
}
fn subtree(&mut self, idx: usize, subtree: &'a tt::Subtree) {
let mut first_tt = self.token_tree.len();
let n_tt = subtree.token_trees.len();
self.token_tree.resize(first_tt + n_tt, !0);
self.subtree[idx].tt = [first_tt as u32, (first_tt + n_tt) as u32];
for child in &subtree.token_trees {
let idx_tag = match child {
tt::TokenTree::Subtree(it) => {
let idx = self.enqueue(it);
idx << 2 | 0b00
}
tt::TokenTree::Leaf(leaf) => match leaf {
tt::Leaf::Literal(lit) => {
let idx = self.literal.len() as u32;
let text = self.intern(&lit.text);
self.literal.push(LiteralRepr { id: lit.id, text });
idx << 2 | 0b01
}
tt::Leaf::Punct(punct) => {
let idx = self.punct.len() as u32;
self.punct.push(PunctRepr {
char: punct.char,
spacing: punct.spacing,
id: punct.id,
});
idx << 2 | 0b10
}
tt::Leaf::Ident(ident) => {
let idx = self.ident.len() as u32;
let text = self.intern(&ident.text);
self.ident.push(IdentRepr { id: ident.id, text });
idx << 2 | 0b11
}
},
};
self.token_tree[first_tt] = idx_tag;
first_tt += 1;
}
}
fn enqueue(&mut self, subtree: &'a tt::Subtree) -> u32 {
let idx = self.subtree.len();
let delimiter_id = subtree.delimiter.map(|it| it.id).unwrap_or(TokenId::unspecified());
let delimiter_kind = subtree.delimiter.map(|it| it.kind);
self.subtree.push(SubtreeRepr { id: delimiter_id, kind: delimiter_kind, tt: [!0, !0] });
self.work.push_back((idx, subtree));
idx as u32
}
pub(crate) fn intern(&mut self, text: &'a str) -> u32 {
let table = &mut self.text;
*self.string_table.entry(text).or_insert_with(|| {
let idx = table.len();
table.push(text.to_string());
idx as u32
})
}
}
struct Reader {
subtree: Vec<SubtreeRepr>,
literal: Vec<LiteralRepr>,
punct: Vec<PunctRepr>,
ident: Vec<IdentRepr>,
token_tree: Vec<u32>,
text: Vec<String>,
}
impl Reader {
pub(crate) fn read(self) -> tt::Subtree {
let mut res: Vec<Option<tt::Subtree>> = vec![None; self.subtree.len()];
for i in (0..self.subtree.len()).rev() {
let repr = &self.subtree[i];
let token_trees = &self.token_tree[repr.tt[0] as usize..repr.tt[1] as usize];
let s = tt::Subtree {
delimiter: repr.kind.map(|kind| tt::Delimiter { id: repr.id, kind }),
token_trees: token_trees
.iter()
.copied()
.map(|idx_tag| {
let tag = idx_tag & 0b11;
let idx = (idx_tag >> 2) as usize;
match tag {
// XXX: we iterate subtrees in reverse to guarantee
// that this unwrap doesn't fire.
0b00 => res[idx].take().unwrap().into(),
0b01 => {
let repr = &self.literal[idx];
tt::Leaf::Literal(tt::Literal {
text: self.text[repr.text as usize].as_str().into(),
id: repr.id,
})
.into()
}
0b10 => {
let repr = &self.punct[idx];
tt::Leaf::Punct(tt::Punct {
char: repr.char,
spacing: repr.spacing,
id: repr.id,
})
.into()
}
0b11 => {
let repr = &self.ident[idx];
tt::Leaf::Ident(tt::Ident {
text: self.text[repr.text as usize].as_str().into(),
id: repr.id,
})
.into()
}
other => panic!("bad tag: {}", other),
}
})
.collect(),
};
res[i] = Some(s.into())
}
res[0].take().unwrap()
}
}

View File

@ -12,7 +12,7 @@ pub fn run() -> io::Result<()> {
let res = match req {
msg::Request::ListMacro(task) => srv.list_macros(&task).map(msg::Response::ListMacro),
msg::Request::ExpansionMacro(task) => {
srv.expand(&task).map(msg::Response::ExpansionMacro)
srv.expand(task).map(msg::Response::ExpansionMacro)
}
};

View File

@ -15,7 +15,7 @@ mod dylib;
mod abis;
use proc_macro_api::{ExpansionResult, ExpansionTask, ListMacrosResult, ListMacrosTask};
use proc_macro_api::{ExpansionResult, ExpansionTask, FlatTree, ListMacrosResult, ListMacrosTask};
use std::{
collections::{hash_map::Entry, HashMap},
env, fs,
@ -29,7 +29,7 @@ pub(crate) struct ProcMacroSrv {
}
impl ProcMacroSrv {
pub fn expand(&mut self, task: &ExpansionTask) -> Result<ExpansionResult, String> {
pub fn expand(&mut self, task: ExpansionTask) -> Result<ExpansionResult, String> {
let expander = self.expander(task.lib.as_ref())?;
let mut prev_env = HashMap::new();
@ -38,7 +38,11 @@ impl ProcMacroSrv {
env::set_var(k, v);
}
let result = expander.expand(&task.macro_name, &task.macro_body, task.attributes.as_ref());
let macro_body = task.macro_body.to_subtree();
let attributes = task.attributes.map(|it| it.to_subtree());
let result = expander
.expand(&task.macro_name, &macro_body, attributes.as_ref())
.map(|it| FlatTree::new(&it));
for (k, _) in &task.env {
match &prev_env[k.as_str()] {