From b34dbe9e09474821af068f2a33a8108614915edf Mon Sep 17 00:00:00 2001 From: stefnotch Date: Mon, 4 Mar 2024 19:53:07 +0100 Subject: [PATCH] Nom nom (#2480) * Replace formats.rs and features.rs regex with nom * Fix missing eof in format rs * Refactor parsing logic in extensions.rs * Replace regex in mod.rs * Replace regex in spriv_reqs.rs * Improve nom usage for parsing get_header_version * Remove stray dbg * Remove final usage of regex * Replace tag("single character") with char(...) * Remove unused import * Undo my testing changes to vk.xml (I shouldn't have committed that) * Sort cargo toml alphabetically * Use nom for parse_depends * Simplify parser again * Parser cleanup * Inline parser logic for shorter code Thanks to marc for suggesting this * Fix clippy violation * Remove useless parse prefix --- Cargo.lock | 56 +++++---------- Cargo.toml | 2 +- vulkano/Cargo.toml | 2 +- vulkano/autogen/extensions.rs | 129 ++++++++-------------------------- vulkano/autogen/features.rs | 16 ++++- vulkano/autogen/formats.rs | 23 +++--- vulkano/autogen/mod.rs | 85 +++++++++++++++++----- vulkano/autogen/properties.rs | 39 +++++++--- vulkano/autogen/spirv_reqs.rs | 30 ++++---- 9 files changed, 190 insertions(+), 192 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 018d92ef..e2168959 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -46,15 +46,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "aho-corasick" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" -dependencies = [ - "memchr", -] - [[package]] name = "android-activity" version = "0.5.1" @@ -1195,6 +1186,12 @@ dependencies = [ "winit 0.29.9", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.7.1" @@ -1375,6 +1372,16 @@ dependencies = [ "memoffset 0.7.1", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num_enum" version = "0.5.11" @@ -1725,35 +1732,6 @@ dependencies = [ "bitflags 1.3.2", ] -[[package]] -name = "regex" -version = "1.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" - [[package]] name = "ron" version = "0.8.1" @@ -2364,13 +2342,13 @@ dependencies = [ "heck", "indexmap", "libloading 0.8.1", + "nom", "objc", "once_cell", "parking_lot", "proc-macro2", "quote", "raw-window-handle 0.6.0", - "regex", "serde", "serde_json", "smallvec", diff --git a/Cargo.toml b/Cargo.toml index c1782a20..7d5f14ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,7 @@ half = "2.0" heck = "0.4" indexmap = "2.0" libloading = "0.8" +nom = "7.1" objc = "0.2.5" once_cell = "1.17" parking_lot = "0.12" @@ -46,7 +47,6 @@ proc-macro2 = "1.0" proc-macro-crate = "2.0" quote = "1.0" raw-window-handle = "0.6" -regex = "1.8" serde = "1.0" serde_json = "1.0" shaderc = "0.8" diff --git a/vulkano/Cargo.toml b/vulkano/Cargo.toml index a1d2727d..3c69d9cb 100644 --- a/vulkano/Cargo.toml +++ b/vulkano/Cargo.toml @@ -36,10 +36,10 @@ core-graphics-types = { workspace = true } ahash = { workspace = true } heck = { workspace = true } indexmap = { workspace = true } +nom = { workspace = true } once_cell = { workspace = true } proc-macro2 = { workspace = true } quote = { workspace = true } -regex = { workspace = true } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } vk-parse = { workspace = true } diff --git a/vulkano/autogen/extensions.rs b/vulkano/autogen/extensions.rs index 735fbc31..ece6b1d5 100644 --- a/vulkano/autogen/extensions.rs +++ b/vulkano/autogen/extensions.rs @@ -1,9 +1,11 @@ use super::{write_file, IndexMap, RequiresOneOf, VkRegistryData}; use heck::ToSnakeCase; -use once_cell::sync::Lazy; +use nom::{ + branch::alt, bytes::complete::take_while1, character::complete, combinator::all_consuming, + multi::separated_list1, sequence::delimited, IResult, Parser, +}; use proc_macro2::{Ident, Literal, TokenStream}; use quote::{format_ident, quote}; -use regex::Regex; use std::fmt::Write as _; use vk_parse::Extension; @@ -968,109 +970,40 @@ enum DependsExpression<'a> { AllOf(Vec), } -fn parse_depends(mut depends: &str) -> Result, String> { - #[derive(Debug, PartialEq)] - enum Token<'a> { - Name(&'a str), - Plus, - Comma, - POpen, - PClose, +fn parse_depends(depends: &str) -> Result, String> { + fn name(input: &str) -> IResult<&str, &str> { + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '_')(input) } - static NAME: Lazy = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_]+").unwrap()); + fn term(input: &str) -> IResult<&str, DependsExpression> { + alt(( + name.map(DependsExpression::Name), + delimited(complete::char('('), expression, complete::char(')')), + ))(input) + } - let mut next_token = move || { - if let Some(m) = NAME.find(depends) { - depends = &depends[m.len()..]; - Ok(Some(Token::Name(m.as_str()))) + fn expression(input: &str) -> IResult<&str, DependsExpression> { + let (input, first) = term(input)?; + + if let Some(input) = input.strip_prefix('+') { + let (input, mut all_of) = separated_list1(complete::char('+'), term)(input)?; + all_of.insert(0, first); + + Ok((input, DependsExpression::AllOf(all_of))) + } else if let Some(input) = input.strip_prefix(',') { + let (input, mut one_of) = separated_list1(complete::char(','), term)(input)?; + one_of.insert(0, first); + + Ok((input, DependsExpression::OneOf(one_of))) } else { - depends - .chars() - .next() - .map(|c| { - let token = match c { - '+' => Token::Plus, - ',' => Token::Comma, - '(' => Token::POpen, - ')' => Token::PClose, - _ => return Err(format!("unexpected character: {}", c)), - }; - depends = &depends[1..]; - Ok(token) - }) - .transpose() - } - }; - - fn parse_expression<'a>( - next_token: &mut impl FnMut() -> Result>, String>, - expect_pclose: bool, - ) -> Result, String> { - let first = match next_token()? { - Some(Token::Name(name)) => DependsExpression::Name(name), - Some(Token::POpen) => parse_expression(next_token, true)?, - Some(token) => return Err(format!("unexpected token: {:?}", token)), - None => return Err("unexpected end of string".into()), - }; - - match next_token()? { - Some(separator @ (Token::Plus | Token::Comma)) => { - let mut subexpr = vec![first]; - - loop { - match next_token()? { - Some(Token::Name(name)) => subexpr.push(DependsExpression::Name(name)), - Some(Token::POpen) => subexpr.push(parse_expression(next_token, true)?), - Some(token) => return Err(format!("unexpected token: {:?}", token)), - None => return Err("unexpected end of string".into()), - } - - match next_token()? { - Some(Token::PClose) => { - if expect_pclose { - break; - } else { - return Err(format!("unexpected token: {:?}", Token::PClose)); - } - } - Some(token) if token == separator => (), - Some(token) => return Err(format!("unexpected token: {:?}", token)), - None => { - if expect_pclose { - return Err("unexpected end of string".into()); - } else { - break; - } - } - } - } - - Ok(match separator { - Token::Plus => DependsExpression::AllOf(subexpr), - Token::Comma => DependsExpression::OneOf(subexpr), - _ => unreachable!(), - }) - } - Some(Token::PClose) => { - if expect_pclose { - Ok(first) - } else { - Err(format!("unexpected token: {:?}", Token::PClose)) - } - } - Some(token) => Err(format!("unexpected token: {:?}", token)), - None => { - if expect_pclose { - Err("unexpected end of string".into()) - } else { - Ok(first) - } - } + Ok((input, first)) } } - parse_expression(&mut next_token, false) + match all_consuming(expression)(depends) { + Ok((_, expr)) => Ok(expr), + Err(err) => Err(format!("{:?}", err)), + } } fn make_doc(ext: &mut ExtensionsMember) { diff --git a/vulkano/autogen/features.rs b/vulkano/autogen/features.rs index 209d7e26..6a423e03 100644 --- a/vulkano/autogen/features.rs +++ b/vulkano/autogen/features.rs @@ -1,9 +1,9 @@ use super::{write_file, IndexMap, VkRegistryData}; use ahash::HashMap; use heck::ToSnakeCase; +use nom::{bytes::complete::tag, character::complete::digit1, combinator::eof, sequence::tuple}; use proc_macro2::{Ident, TokenStream}; use quote::{format_ident, quote}; -use regex::Regex; use std::{collections::hash_map::Entry, fmt::Write as _}; use vk_parse::{Extension, Type, TypeMember, TypeMemberMarkup, TypeSpec}; @@ -740,11 +740,21 @@ fn sorted_structs<'a>( ty.structextends.as_deref() == Some("VkPhysicalDeviceFeatures2,VkDeviceCreateInfo") }) .collect(); - let regex = Regex::new(r"^VkPhysicalDeviceVulkan\d+Features$").unwrap(); + + fn is_physical_device_features(name: &str) -> bool { + tuple(( + tag::<_, &str, ()>("VkPhysicalDeviceVulkan"), + digit1, + tag("Features"), + eof, + ))(name) + .is_ok() + } + structs.sort_unstable_by_key(|&(ty, provided_by)| { let name = ty.name.as_ref().unwrap(); ( - !regex.is_match(name), + !is_physical_device_features(name), if let Some(version) = provided_by .iter() .find_map(|s| s.strip_prefix("VK_VERSION_")) diff --git a/vulkano/autogen/formats.rs b/vulkano/autogen/formats.rs index a03bbbbd..7f7274df 100644 --- a/vulkano/autogen/formats.rs +++ b/vulkano/autogen/formats.rs @@ -1,9 +1,8 @@ use super::{write_file, IndexMap, RequiresOneOf, VkRegistryData}; use heck::ToSnakeCase; -use once_cell::sync::Lazy; +use nom::{character::complete, combinator::eof, sequence::tuple}; use proc_macro2::{Ident, Literal, TokenStream}; use quote::{format_ident, quote}; -use regex::Regex; use std::iter; use vk_parse::{ Enum, EnumSpec, Extension, ExtensionChild, Feature, Format, FormatChild, InterfaceItem, @@ -606,8 +605,17 @@ fn formats_members( features: &IndexMap<&str, &Feature>, extensions: &IndexMap<&str, &Extension>, ) -> Vec { - static BLOCK_EXTENT_REGEX: Lazy = - Lazy::new(|| Regex::new(r"^(\d+),(\d+),(\d+)$").unwrap()); + fn parse_block_extent(input: &str) -> Result<[u32; 3], nom::Err<()>> { + tuple(( + complete::u32::<_, ()>, + complete::char(','), + complete::u32, + complete::char(','), + complete::u32, + eof, + ))(input) + .map(|(_, (a, _, b, _, c, _))| [a, b, c]) + } iter::once( FormatMember { @@ -763,12 +771,7 @@ fn formats_members( } if let Some(block_extent) = format.blockExtent.as_ref() { - let captures = BLOCK_EXTENT_REGEX.captures(block_extent).unwrap(); - member.block_extent = [ - captures.get(1).unwrap().as_str().parse().unwrap(), - captures.get(2).unwrap().as_str().parse().unwrap(), - captures.get(3).unwrap().as_str().parse().unwrap(), - ]; + member.block_extent = parse_block_extent(block_extent).unwrap(); } else { match format.chroma.as_deref() { Some("420") => member.block_extent = [2, 2, 1], diff --git a/vulkano/autogen/mod.rs b/vulkano/autogen/mod.rs index 455cc7da..52e30d7a 100644 --- a/vulkano/autogen/mod.rs +++ b/vulkano/autogen/mod.rs @@ -1,7 +1,12 @@ use self::spirv_grammar::SpirvGrammar; use ahash::HashMap; -use once_cell::sync::Lazy; -use regex::Regex; +use nom::{ + bytes::complete::{tag, take_until}, + character::complete::{self, multispace0, multispace1}, + combinator::eof, + sequence::{delimited, tuple}, + IResult, Parser, +}; use std::{ cmp::min, env, @@ -114,12 +119,52 @@ impl<'r> VkRegistryData<'r> { } } + /// Returns the Vulkan header version in the vk.xml file. fn get_header_version(registry: &Registry) -> (u16, u16, u16) { - static VK_HEADER_VERSION: Lazy = - Lazy::new(|| Regex::new(r"#define\s+VK_HEADER_VERSION\s+(\d+)\s*$").unwrap()); - static VK_HEADER_VERSION_COMPLETE: Lazy = Lazy::new(|| { - Regex::new(r"#define\s+VK_HEADER_VERSION_COMPLETE\s+VK_MAKE_API_VERSION\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)\s*,\s*VK_HEADER_VERSION\s*\)").unwrap() - }); + fn spaced_comma(input: &str) -> IResult<&str, char> { + delimited(multispace0, complete::char(','), multispace0)(input) + } + + fn vk_header_patch(input: &str) -> IResult<&str, u16> { + let (input, _) = take_until("#define")(input)?; + delimited( + tuple(( + tag("#define"), + multispace1, + tag("VK_HEADER_VERSION"), + multispace0, + )), + complete::u16, + tuple((multispace0, eof)), + )(input) + } + + fn vk_header_major_minor(input: &str) -> IResult<&str, (u16, u16)> { + let (input, _) = take_until("#define")(input)?; + delimited( + tuple(( + tag("#define"), + multispace1, + tag("VK_HEADER_VERSION_COMPLETE"), + multispace1, + tag("VK_MAKE_API_VERSION"), + multispace0, + complete::char('('), + multispace0, + )), + tuple(( + complete::u16, + spaced_comma, + complete::u16, + spaced_comma, + complete::u16, + spaced_comma, + tag("VK_HEADER_VERSION"), + )) + .map(|(_ignored, _, major, _, minor, _, _)| (major, minor)), + tuple((multispace0, complete::char(')'), multispace0)), + )(input) + } let mut major = None; let mut minor = None; @@ -129,14 +174,17 @@ impl<'r> VkRegistryData<'r> { if let RegistryChild::Types(types) = child { for ty in types.children.iter() { if let TypesChild::Type(ty) = ty { + if ty.api.as_deref() != Some("vulkan") { + continue; + } if let TypeSpec::Code(code) = &ty.spec { - if let Some(captures) = VK_HEADER_VERSION.captures(&code.code) { - patch = Some(captures.get(1).unwrap().as_str().parse().unwrap()); - } else if let Some(captures) = - VK_HEADER_VERSION_COMPLETE.captures(&code.code) - { - major = Some(captures.get(2).unwrap().as_str().parse().unwrap()); - minor = Some(captures.get(3).unwrap().as_str().parse().unwrap()); + if let Ok((_, p)) = vk_header_patch(&code.code) { + assert!(patch.is_none()); + patch = Some(p); + } else if let Ok((_, (m, n))) = vk_header_major_minor(&code.code) { + assert!(major.is_none()); + major = Some(m); + minor = Some(n); } } } @@ -430,11 +478,12 @@ pub fn get_spirv_grammar + ?Sized>(path: &P) -> SpirvGrammar { } fn suffix_key(name: &str) -> u32 { - static VENDOR_SUFFIXES: Lazy = - Lazy::new(|| Regex::new(r"(?:AMD|GOOGLE|INTEL|NV)$").unwrap()); - #[allow(clippy::bool_to_int_with_if)] - if VENDOR_SUFFIXES.is_match(name) { + if name.ends_with("AMD") + || name.ends_with("GOOGLE") + || name.ends_with("INTEL") + || name.ends_with("NV") + { 3 } else if name.ends_with("EXT") { 2 diff --git a/vulkano/autogen/properties.rs b/vulkano/autogen/properties.rs index cee939b4..30ac7bc5 100644 --- a/vulkano/autogen/properties.rs +++ b/vulkano/autogen/properties.rs @@ -1,9 +1,15 @@ use super::{write_file, IndexMap, VkRegistryData}; use ahash::HashMap; use heck::ToSnakeCase; +use nom::{ + bytes::complete::{tag, take_until, take_while1}, + character::complete::{self, digit1}, + combinator::{all_consuming, eof}, + sequence::{delimited, tuple}, + IResult, +}; use proc_macro2::{Ident, TokenStream}; use quote::{format_ident, quote}; -use regex::Regex; use std::{collections::hash_map::Entry, fmt::Write as _}; use vk_parse::{Extension, Type, TypeMember, TypeMemberMarkup, TypeSpec}; @@ -374,11 +380,21 @@ fn sorted_structs<'a>( .values() .filter(|(ty, _)| ty.structextends.as_deref() == Some("VkPhysicalDeviceProperties2")) .collect(); - let regex = Regex::new(r"^VkPhysicalDeviceVulkan\d+Properties$").unwrap(); + + fn is_physical_device_properties(name: &str) -> bool { + tuple(( + tag::<_, &str, ()>("VkPhysicalDeviceVulkan"), + digit1, + tag("Properties"), + eof, + ))(name) + .is_ok() + } + structs.sort_unstable_by_key(|&(ty, provided_by)| { let name = ty.name.as_ref().unwrap(); ( - !regex.is_match(name), + !is_physical_device_properties(name), if let Some(version) = provided_by .iter() .find_map(|s| s.strip_prefix("VK_VERSION_")) @@ -415,7 +431,15 @@ struct Member<'a> { } fn members(ty: &Type) -> Vec { - let regex = Regex::new(r"\[([A-Za-z0-9_]+)\]\s*$").unwrap(); + fn array_len(input: &str) -> IResult<&str, &str> { + let (input, _) = take_until("[")(input)?; + all_consuming(delimited( + complete::char('['), + take_while1(|c: char| c.is_ascii_alphanumeric() || c == '_'), + complete::char(']'), + ))(input) + } + if let TypeSpec::Members(members) = &ty.spec { members .iter() @@ -436,12 +460,7 @@ fn members(ty: &Type) -> Vec { TypeMemberMarkup::Enum(len) => Some(len.as_str()), _ => None, }) - .or_else(|| { - regex - .captures(&def.code) - .and_then(|cap| cap.get(1)) - .map(|m| m.as_str()) - }); + .or_else(|| array_len(&def.code).map(|(_, len)| len).ok()); if name != Some("sType") && name != Some("pNext") { return name.map(|name| Member { name, diff --git a/vulkano/autogen/spirv_reqs.rs b/vulkano/autogen/spirv_reqs.rs index a2a456c9..c82e26b6 100644 --- a/vulkano/autogen/spirv_reqs.rs +++ b/vulkano/autogen/spirv_reqs.rs @@ -4,10 +4,15 @@ use super::{ }; use heck::ToSnakeCase; use indexmap::map::Entry; -use once_cell::sync::Lazy; +use nom::{ + bytes::complete::tag, + character::complete, + combinator::all_consuming, + sequence::{preceded, separated_pair}, + IResult, Parser, +}; use proc_macro2::TokenStream; use quote::{format_ident, quote}; -use regex::Regex; use vk_parse::SpirvExtOrCap; pub fn write(vk_data: &VkRegistryData, grammar: &SpirvGrammar) { @@ -303,9 +308,12 @@ fn spirv_extensions_members(extensions: &[&SpirvExtOrCap]) -> Vec (RequiresOneOf, Vec) { - static VK_API_VERSION: Lazy = - Lazy::new(|| Regex::new(r"^VK_(?:API_)?VERSION_(\d+)_(\d+)$").unwrap()); - static BIT: Lazy = Lazy::new(|| Regex::new(r"_BIT(?:_NV)?$").unwrap()); + fn vk_api_version(input: &str) -> IResult<&str, (u32, u32)> { + all_consuming(preceded( + tag("VK_API_VERSION_").or(tag("VK_VERSION_")), + separated_pair(complete::u32, complete::char('_'), complete::u32), + ))(input) + } let mut requires_one_of = RequiresOneOf::default(); let mut requires_properties = vec![]; @@ -314,12 +322,7 @@ fn make_requires(enables: &[vk_parse::Enable]) -> (RequiresOneOf, Vec { if version != "VK_VERSION_1_0" { - let captures = VK_API_VERSION.captures(version).unwrap(); - let major = captures.get(1).unwrap().as_str(); - let minor = captures.get(1).unwrap().as_str(); - - requires_one_of.api_version = - Some((major.parse().unwrap(), minor.parse().unwrap())); + requires_one_of.api_version = Some(vk_api_version(version).unwrap().1); } } vk_parse::Enable::Extension(extension) => { @@ -341,7 +344,10 @@ fn make_requires(enables: &[vk_parse::Enable]) -> (RequiresOneOf, Vec