diff --git a/crates/proc_macro_api/src/lib.rs b/crates/proc_macro_api/src/lib.rs index bbd26caf76d..41dfcc541a8 100644 --- a/crates/proc_macro_api/src/lib.rs +++ b/crates/proc_macro_api/src/lib.rs @@ -21,7 +21,9 @@ use tt::{SmolStr, Subtree}; use crate::process::ProcMacroProcessSrv; -pub use rpc::{ExpansionResult, ExpansionTask, ListMacrosResult, ListMacrosTask, ProcMacroKind}; +pub use rpc::{ + ExpansionResult, ExpansionTask, flat::FlatTree, ListMacrosResult, ListMacrosTask, ProcMacroKind, +}; pub use version::{read_dylib_info, RustCInfo}; #[derive(Debug, Clone)] @@ -58,9 +60,9 @@ impl ProcMacroProcessExpander { env: Vec<(String, String)>, ) -> Result { let task = ExpansionTask { - macro_body: subtree.clone(), + macro_body: FlatTree::new(subtree), macro_name: self.name.to_string(), - attributes: attr.cloned(), + attributes: attr.map(FlatTree::new), lib: self.dylib_path.to_path_buf(), env, }; @@ -70,7 +72,7 @@ impl ProcMacroProcessExpander { .lock() .unwrap_or_else(|e| e.into_inner()) .send_task(msg::Request::ExpansionMacro(task))?; - Ok(result.expansion) + Ok(result.expansion.to_subtree()) } } diff --git a/crates/proc_macro_api/src/msg.rs b/crates/proc_macro_api/src/msg.rs index 89989557868..fd10d87f08c 100644 --- a/crates/proc_macro_api/src/msg.rs +++ b/crates/proc_macro_api/src/msg.rs @@ -12,13 +12,13 @@ use crate::{ ExpansionResult, ExpansionTask, }; -#[derive(Debug, Serialize, Deserialize, Clone)] +#[derive(Debug, Serialize, Deserialize)] pub enum Request { ListMacro(ListMacrosTask), ExpansionMacro(ExpansionTask), } -#[derive(Debug, Serialize, Deserialize, Clone)] +#[derive(Debug, Serialize, Deserialize)] pub enum Response { Error(ResponseError), ListMacro(ListMacrosResult), diff --git a/crates/proc_macro_api/src/rpc.rs b/crates/proc_macro_api/src/rpc.rs index 9fc220cf3fe..10aa24d00c2 100644 --- a/crates/proc_macro_api/src/rpc.rs +++ b/crates/proc_macro_api/src/rpc.rs @@ -5,13 +5,12 @@ //! Although adding `Serialize` and `Deserialize` traits to `tt` directly seems //! to be much easier, we deliberately duplicate `tt` structs with `#[serde(with = "XXDef")]` //! for separation of code responsibility. +pub(crate) mod flat; use paths::AbsPathBuf; use serde::{Deserialize, Serialize}; -use tt::{ - Delimiter, DelimiterKind, Ident, Leaf, Literal, Punct, SmolStr, Spacing, Subtree, TokenId, - TokenTree, -}; + +use crate::rpc::flat::FlatTree; #[derive(Clone, Eq, PartialEq, Debug, Serialize, Deserialize)] pub struct ListMacrosTask { @@ -30,14 +29,13 @@ pub struct ListMacrosResult { pub macros: Vec<(String, ProcMacroKind)>, } -#[derive(Clone, Eq, PartialEq, Debug, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize)] pub struct ExpansionTask { /// Argument of macro call. /// /// In custom derive this will be a struct or enum; in attribute-like macro - underlying /// item; in function-like macro - the macro body. - #[serde(with = "SubtreeDef")] - pub macro_body: Subtree, + pub macro_body: FlatTree, /// Name of macro to expand. /// @@ -46,8 +44,7 @@ pub struct ExpansionTask { pub macro_name: String, /// Possible attributes for the attribute-like macros. - #[serde(with = "opt_subtree_def")] - pub attributes: Option, + pub attributes: Option, pub lib: AbsPathBuf, @@ -55,199 +52,15 @@ pub struct ExpansionTask { pub env: Vec<(String, String)>, } -#[derive(Clone, Eq, PartialEq, Debug, Default, Serialize, Deserialize)] +#[derive(Debug, Serialize, Deserialize)] pub struct ExpansionResult { - #[serde(with = "SubtreeDef")] - pub expansion: Subtree, -} - -#[derive(Serialize, Deserialize)] -#[serde(remote = "DelimiterKind")] -enum DelimiterKindDef { - Parenthesis, - Brace, - Bracket, -} - -#[derive(Serialize, Deserialize)] -#[serde(remote = "TokenId")] -struct TokenIdDef(u32); - -#[derive(Serialize, Deserialize)] -#[serde(remote = "Delimiter")] -struct DelimiterDef { - #[serde( - with = "TokenIdDef", - default = "tt::TokenId::unspecified", - skip_serializing_if = "token_id_def::skip_if" - )] - id: TokenId, - #[serde(with = "DelimiterKindDef")] - kind: DelimiterKind, -} - -#[derive(Serialize, Deserialize)] -#[serde(remote = "Subtree")] -struct SubtreeDef { - #[serde(default, with = "opt_delimiter_def")] - delimiter: Option, - #[serde(with = "vec_token_tree")] - token_trees: Vec, -} - -#[derive(Serialize, Deserialize)] -#[serde(remote = "TokenTree")] -enum TokenTreeDef { - #[serde(with = "LeafDef")] - Leaf(Leaf), - #[serde(with = "SubtreeDef")] - Subtree(Subtree), -} - -#[derive(Serialize, Deserialize)] -#[serde(remote = "Leaf")] -enum LeafDef { - #[serde(with = "LiteralDef")] - Literal(Literal), - #[serde(with = "PunctDef")] - Punct(Punct), - #[serde(with = "IdentDef")] - Ident(Ident), -} - -#[derive(Serialize, Deserialize)] -#[serde(remote = "Literal")] -struct LiteralDef { - text: SmolStr, - #[serde( - with = "TokenIdDef", - default = "tt::TokenId::unspecified", - skip_serializing_if = "token_id_def::skip_if" - )] - id: TokenId, -} - -#[derive(Serialize, Deserialize)] -#[serde(remote = "Punct")] -struct PunctDef { - char: char, - #[serde(with = "SpacingDef")] - spacing: Spacing, - #[serde( - with = "TokenIdDef", - default = "tt::TokenId::unspecified", - skip_serializing_if = "token_id_def::skip_if" - )] - id: TokenId, -} - -#[derive(Serialize, Deserialize)] -#[serde(remote = "Spacing")] -enum SpacingDef { - Alone, - Joint, -} - -#[derive(Serialize, Deserialize)] -#[serde(remote = "Ident")] -struct IdentDef { - text: SmolStr, - #[serde( - with = "TokenIdDef", - default = "tt::TokenId::unspecified", - skip_serializing_if = "token_id_def::skip_if" - )] - id: TokenId, -} - -mod token_id_def { - pub(super) fn skip_if(value: &tt::TokenId) -> bool { - *value == tt::TokenId::unspecified() - } -} - -mod opt_delimiter_def { - use super::{Delimiter, DelimiterDef}; - use serde::{Deserialize, Deserializer, Serialize, Serializer}; - - pub(super) fn serialize(value: &Option, serializer: S) -> Result - where - S: Serializer, - { - #[derive(Serialize)] - struct Helper<'a>(#[serde(with = "DelimiterDef")] &'a Delimiter); - value.as_ref().map(Helper).serialize(serializer) - } - - pub(super) fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> - where - D: Deserializer<'de>, - { - #[derive(Deserialize)] - struct Helper(#[serde(with = "DelimiterDef")] Delimiter); - let helper = Option::deserialize(deserializer)?; - Ok(helper.map(|Helper(external)| external)) - } -} - -mod opt_subtree_def { - use super::{Subtree, SubtreeDef}; - use serde::{Deserialize, Deserializer, Serialize, Serializer}; - - pub(super) fn serialize(value: &Option, serializer: S) -> Result - where - S: Serializer, - { - #[derive(Serialize)] - struct Helper<'a>(#[serde(with = "SubtreeDef")] &'a Subtree); - value.as_ref().map(Helper).serialize(serializer) - } - - pub(super) fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> - where - D: Deserializer<'de>, - { - #[derive(Deserialize)] - struct Helper(#[serde(with = "SubtreeDef")] Subtree); - let helper = Option::deserialize(deserializer)?; - Ok(helper.map(|Helper(external)| external)) - } -} - -mod vec_token_tree { - use super::{TokenTree, TokenTreeDef}; - use serde::{ser::SerializeSeq, Deserialize, Deserializer, Serialize, Serializer}; - - pub(super) fn serialize(value: &[TokenTree], serializer: S) -> Result - where - S: Serializer, - { - #[derive(Serialize)] - struct Helper<'a>(#[serde(with = "TokenTreeDef")] &'a TokenTree); - - let items: Vec<_> = value.iter().map(Helper).collect(); - let mut seq = serializer.serialize_seq(Some(items.len()))?; - for element in items { - seq.serialize_element(&element)?; - } - seq.end() - } - - pub(super) fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> - where - D: Deserializer<'de>, - { - #[derive(Deserialize)] - struct Helper(#[serde(with = "TokenTreeDef")] TokenTree); - - let helper = Vec::deserialize(deserializer)?; - Ok(helper.into_iter().map(|Helper(external)| external).collect()) - } + pub expansion: FlatTree, } #[cfg(test)] mod tests { use super::*; + use tt::*; fn fixture_token_tree() -> Subtree { let mut subtree = Subtree::default(); @@ -257,6 +70,15 @@ mod tests { subtree .token_trees .push(TokenTree::Leaf(Ident { text: "Foo".into(), id: TokenId(1) }.into())); + subtree.token_trees.push(TokenTree::Leaf(Leaf::Literal(Literal { + text: "Foo".into(), + id: TokenId::unspecified(), + }))); + subtree.token_trees.push(TokenTree::Leaf(Leaf::Punct(Punct { + char: '@', + id: TokenId::unspecified(), + spacing: Spacing::Joint, + }))); subtree.token_trees.push(TokenTree::Subtree(Subtree { delimiter: Some(Delimiter { id: TokenId(2), kind: DelimiterKind::Brace }), token_trees: vec![], @@ -268,7 +90,7 @@ mod tests { fn test_proc_macro_rpc_works() { let tt = fixture_token_tree(); let task = ExpansionTask { - macro_body: tt.clone(), + macro_body: FlatTree::new(&tt), macro_name: Default::default(), attributes: None, lib: AbsPathBuf::assert(std::env::current_dir().unwrap()), @@ -276,14 +98,9 @@ mod tests { }; let json = serde_json::to_string(&task).unwrap(); + println!("{}", json); let back: ExpansionTask = serde_json::from_str(&json).unwrap(); - assert_eq!(task.macro_body, back.macro_body); - - let result = ExpansionResult { expansion: tt }; - let json = serde_json::to_string(&result).unwrap(); - let back: ExpansionResult = serde_json::from_str(&json).unwrap(); - - assert_eq!(result, back); + assert_eq!(tt, back.macro_body.to_subtree()); } } diff --git a/crates/proc_macro_api/src/rpc/flat.rs b/crates/proc_macro_api/src/rpc/flat.rs new file mode 100644 index 00000000000..d427fa87d23 --- /dev/null +++ b/crates/proc_macro_api/src/rpc/flat.rs @@ -0,0 +1,328 @@ +//! Serialization-friendly representation of `tt::Subtree`. +//! +//! It is possible to serialize `Subtree` as is, as a tree, but using +//! arbitrary-nested trees in JSON is problematic, as they can cause the JSON +//! parser to overflow the stack. +//! +//! Additionally, such implementation would be pretty verbose, and we do care +//! about performance here a bit. +//! +//! So what this module does is dumping a `tt::Subtree` into a bunch of flat +//! array of numbers. See the test in the parent module to get an example +//! output. +//! +//! ```json +//! { +//! // Array of subtrees, each subtree is represented by 4 numbers: +//! // id of delimiter, delimiter kind, index of first child in `token_tree`, +//! // index of last child in `token_tree` +//! "subtree":[4294967295,0,0,5,2,2,5,5], +//! // 2 ints per literal: [token id, index into `text`] +//! "literal":[4294967295,1], +//! // 3 ints per punct: [token id, char, spacing] +//! "punct":[4294967295,64,1], +//! // 2 ints per ident: [token id, index into `text`] +//! "ident": [0,0,1,1], +//! // children of all subtrees, concatenated. Each child is represented as `index << 2 | tag` +//! // where tag denotes one of subtree, literal, punct or ident. +//! "token_tree":[3,7,1,4], +//! // Strings shared by idents and literals +//! "text": ["struct","Foo"] +//! } +//! ``` +//! +//! We probably should replace most of the code here with bincode someday, but, +//! as we don't have bincode in Cargo.toml yet, lets stick with serde_json for +//! the time being. + +use std::{ + collections::{HashMap, VecDeque}, + convert::TryInto, +}; + +use serde::{Deserialize, Serialize}; +use tt::TokenId; + +#[derive(Serialize, Deserialize, Debug)] +pub struct FlatTree { + subtree: Vec, + literal: Vec, + punct: Vec, + ident: Vec, + token_tree: Vec, + text: Vec, +} + +struct SubtreeRepr { + id: tt::TokenId, + kind: Option, + tt: [u32; 2], +} + +struct LiteralRepr { + id: tt::TokenId, + text: u32, +} + +struct PunctRepr { + id: tt::TokenId, + char: char, + spacing: tt::Spacing, +} + +struct IdentRepr { + id: tt::TokenId, + text: u32, +} + +impl FlatTree { + pub fn new(subtree: &tt::Subtree) -> FlatTree { + let mut w = Writer { + string_table: HashMap::new(), + work: VecDeque::new(), + + subtree: Vec::new(), + literal: Vec::new(), + punct: Vec::new(), + ident: Vec::new(), + token_tree: Vec::new(), + text: Vec::new(), + }; + w.write(subtree); + + return FlatTree { + subtree: write_vec(w.subtree, SubtreeRepr::write), + literal: write_vec(w.literal, LiteralRepr::write), + punct: write_vec(w.punct, PunctRepr::write), + ident: write_vec(w.ident, IdentRepr::write), + token_tree: w.token_tree, + text: w.text, + }; + + fn write_vec [u32; N], const N: usize>(xs: Vec, f: F) -> Vec { + xs.into_iter().flat_map(f).collect() + } + } + + pub fn to_subtree(self) -> tt::Subtree { + return Reader { + subtree: read_vec(self.subtree, SubtreeRepr::read), + literal: read_vec(self.literal, LiteralRepr::read), + punct: read_vec(self.punct, PunctRepr::read), + ident: read_vec(self.ident, IdentRepr::read), + token_tree: self.token_tree, + text: self.text, + } + .read(); + + fn read_vec T, const N: usize>(xs: Vec, f: F) -> Vec { + let mut chunks = xs.chunks_exact(N); + let res = chunks.by_ref().map(|chunk| f(chunk.try_into().unwrap())).collect(); + assert!(chunks.remainder().is_empty()); + res + } + } +} + +impl SubtreeRepr { + fn write(self) -> [u32; 4] { + let kind = match self.kind { + None => 0, + Some(tt::DelimiterKind::Parenthesis) => 1, + Some(tt::DelimiterKind::Brace) => 2, + Some(tt::DelimiterKind::Bracket) => 3, + }; + [self.id.0, kind, self.tt[0], self.tt[1]] + } + fn read([id, kind, lo, len]: [u32; 4]) -> SubtreeRepr { + let kind = match kind { + 0 => None, + 1 => Some(tt::DelimiterKind::Parenthesis), + 2 => Some(tt::DelimiterKind::Brace), + 3 => Some(tt::DelimiterKind::Bracket), + other => panic!("bad kind {}", other), + }; + SubtreeRepr { id: TokenId(id), kind, tt: [lo, len] } + } +} + +impl LiteralRepr { + fn write(self) -> [u32; 2] { + [self.id.0, self.text] + } + fn read([id, text]: [u32; 2]) -> LiteralRepr { + LiteralRepr { id: TokenId(id), text } + } +} + +impl PunctRepr { + fn write(self) -> [u32; 3] { + let spacing = match self.spacing { + tt::Spacing::Alone => 0, + tt::Spacing::Joint => 1, + }; + [self.id.0, self.char as u32, spacing] + } + fn read([id, char, spacing]: [u32; 3]) -> PunctRepr { + let spacing = match spacing { + 0 => tt::Spacing::Alone, + 1 => tt::Spacing::Joint, + other => panic!("bad spacing {}", other), + }; + PunctRepr { id: TokenId(id), char: char.try_into().unwrap(), spacing } + } +} + +impl IdentRepr { + fn write(self) -> [u32; 2] { + [self.id.0, self.text] + } + fn read(data: [u32; 2]) -> IdentRepr { + IdentRepr { id: TokenId(data[0]), text: data[1] } + } +} + +struct Writer<'a> { + work: VecDeque<(usize, &'a tt::Subtree)>, + string_table: HashMap<&'a str, u32>, + + subtree: Vec, + literal: Vec, + punct: Vec, + ident: Vec, + token_tree: Vec, + text: Vec, +} + +impl<'a> Writer<'a> { + fn write(&mut self, root: &'a tt::Subtree) { + self.enqueue(root); + while let Some((idx, subtree)) = self.work.pop_front() { + self.subtree(idx, subtree); + } + } + + fn subtree(&mut self, idx: usize, subtree: &'a tt::Subtree) { + let mut first_tt = self.token_tree.len(); + let n_tt = subtree.token_trees.len(); + self.token_tree.resize(first_tt + n_tt, !0); + + self.subtree[idx].tt = [first_tt as u32, (first_tt + n_tt) as u32]; + + for child in &subtree.token_trees { + let idx_tag = match child { + tt::TokenTree::Subtree(it) => { + let idx = self.enqueue(it); + idx << 2 | 0b00 + } + tt::TokenTree::Leaf(leaf) => match leaf { + tt::Leaf::Literal(lit) => { + let idx = self.literal.len() as u32; + let text = self.intern(&lit.text); + self.literal.push(LiteralRepr { id: lit.id, text }); + idx << 2 | 0b01 + } + tt::Leaf::Punct(punct) => { + let idx = self.punct.len() as u32; + self.punct.push(PunctRepr { + char: punct.char, + spacing: punct.spacing, + id: punct.id, + }); + idx << 2 | 0b10 + } + tt::Leaf::Ident(ident) => { + let idx = self.ident.len() as u32; + let text = self.intern(&ident.text); + self.ident.push(IdentRepr { id: ident.id, text }); + idx << 2 | 0b11 + } + }, + }; + self.token_tree[first_tt] = idx_tag; + first_tt += 1; + } + } + + fn enqueue(&mut self, subtree: &'a tt::Subtree) -> u32 { + let idx = self.subtree.len(); + let delimiter_id = subtree.delimiter.map(|it| it.id).unwrap_or(TokenId::unspecified()); + let delimiter_kind = subtree.delimiter.map(|it| it.kind); + self.subtree.push(SubtreeRepr { id: delimiter_id, kind: delimiter_kind, tt: [!0, !0] }); + self.work.push_back((idx, subtree)); + idx as u32 + } + + pub(crate) fn intern(&mut self, text: &'a str) -> u32 { + let table = &mut self.text; + *self.string_table.entry(text).or_insert_with(|| { + let idx = table.len(); + table.push(text.to_string()); + idx as u32 + }) + } +} + +struct Reader { + subtree: Vec, + literal: Vec, + punct: Vec, + ident: Vec, + token_tree: Vec, + text: Vec, +} + +impl Reader { + pub(crate) fn read(self) -> tt::Subtree { + let mut res: Vec> = vec![None; self.subtree.len()]; + for i in (0..self.subtree.len()).rev() { + let repr = &self.subtree[i]; + let token_trees = &self.token_tree[repr.tt[0] as usize..repr.tt[1] as usize]; + let s = tt::Subtree { + delimiter: repr.kind.map(|kind| tt::Delimiter { id: repr.id, kind }), + token_trees: token_trees + .iter() + .copied() + .map(|idx_tag| { + let tag = idx_tag & 0b11; + let idx = (idx_tag >> 2) as usize; + match tag { + // XXX: we iterate subtrees in reverse to guarantee + // that this unwrap doesn't fire. + 0b00 => res[idx].take().unwrap().into(), + 0b01 => { + let repr = &self.literal[idx]; + tt::Leaf::Literal(tt::Literal { + text: self.text[repr.text as usize].as_str().into(), + id: repr.id, + }) + .into() + } + 0b10 => { + let repr = &self.punct[idx]; + tt::Leaf::Punct(tt::Punct { + char: repr.char, + spacing: repr.spacing, + id: repr.id, + }) + .into() + } + 0b11 => { + let repr = &self.ident[idx]; + tt::Leaf::Ident(tt::Ident { + text: self.text[repr.text as usize].as_str().into(), + id: repr.id, + }) + .into() + } + other => panic!("bad tag: {}", other), + } + }) + .collect(), + }; + res[i] = Some(s.into()) + } + + res[0].take().unwrap() + } +} diff --git a/crates/proc_macro_srv/src/cli.rs b/crates/proc_macro_srv/src/cli.rs index bc48f1c436c..fe3665110db 100644 --- a/crates/proc_macro_srv/src/cli.rs +++ b/crates/proc_macro_srv/src/cli.rs @@ -12,7 +12,7 @@ pub fn run() -> io::Result<()> { let res = match req { msg::Request::ListMacro(task) => srv.list_macros(&task).map(msg::Response::ListMacro), msg::Request::ExpansionMacro(task) => { - srv.expand(&task).map(msg::Response::ExpansionMacro) + srv.expand(task).map(msg::Response::ExpansionMacro) } }; diff --git a/crates/proc_macro_srv/src/lib.rs b/crates/proc_macro_srv/src/lib.rs index 1c39455112c..c60dd2efc53 100644 --- a/crates/proc_macro_srv/src/lib.rs +++ b/crates/proc_macro_srv/src/lib.rs @@ -15,7 +15,7 @@ mod dylib; mod abis; -use proc_macro_api::{ExpansionResult, ExpansionTask, ListMacrosResult, ListMacrosTask}; +use proc_macro_api::{ExpansionResult, ExpansionTask, FlatTree, ListMacrosResult, ListMacrosTask}; use std::{ collections::{hash_map::Entry, HashMap}, env, fs, @@ -29,7 +29,7 @@ pub(crate) struct ProcMacroSrv { } impl ProcMacroSrv { - pub fn expand(&mut self, task: &ExpansionTask) -> Result { + pub fn expand(&mut self, task: ExpansionTask) -> Result { let expander = self.expander(task.lib.as_ref())?; let mut prev_env = HashMap::new(); @@ -38,7 +38,11 @@ impl ProcMacroSrv { env::set_var(k, v); } - let result = expander.expand(&task.macro_name, &task.macro_body, task.attributes.as_ref()); + let macro_body = task.macro_body.to_subtree(); + let attributes = task.attributes.map(|it| it.to_subtree()); + let result = expander + .expand(&task.macro_name, ¯o_body, attributes.as_ref()) + .map(|it| FlatTree::new(&it)); for (k, _) in &task.env { match &prev_env[k.as_str()] {