Convert newtype_index to a proc macro

The `macro_rules!` implementation was becomng excessively complicated,
and difficult to modify. The new proc macro implementation should make
it much easier to add new features (e.g. skipping certain `#[derive]`s)
This commit is contained in:
Aaron Hill 2022-02-09 17:24:51 -05:00
parent 4b043faba3
commit 339bbebbc1
No known key found for this signature in database
GPG Key ID: B4087E510E98B164
4 changed files with 349 additions and 455 deletions

View File

@ -12,6 +12,8 @@ pub mod bit_set;
pub mod interval;
pub mod vec;
pub use rustc_macros::newtype_index;
/// Type size assertion. The first argument is a type and the second argument is its expected size.
#[macro_export]
macro_rules! static_assert_size {

View File

@ -49,461 +49,6 @@ impl Idx for u32 {
}
}
/// Creates a struct type `S` that can be used as an index with
/// `IndexVec` and so on.
///
/// There are two ways of interacting with these indices:
///
/// - The `From` impls are the preferred way. So you can do
/// `S::from(v)` with a `usize` or `u32`. And you can convert back
/// to an integer with `u32::from(s)`.
///
/// - Alternatively, you can use the methods `S::new(v)` and `s.index()`
/// to create/return a value.
///
/// Internally, the index uses a u32, so the index must not exceed
/// `u32::MAX`. You can also customize things like the `Debug` impl,
/// what traits are derived, and so forth via the macro.
#[macro_export]
#[allow_internal_unstable(step_trait, rustc_attrs, trusted_step)]
macro_rules! newtype_index {
// ---- public rules ----
// Use default constants
($(#[$attrs:meta])* $v:vis struct $name:ident { .. }) => (
$crate::newtype_index!(
// Leave out derives marker so we can use its absence to ensure it comes first
@attrs [$(#[$attrs])*]
@type [$name]
// shave off 256 indices at the end to allow space for packing these indices into enums
@max [0xFFFF_FF00]
@vis [$v]
@debug_format ["{}"]);
);
// Define any constants
($(#[$attrs:meta])* $v:vis struct $name:ident { $($tokens:tt)+ }) => (
$crate::newtype_index!(
// Leave out derives marker so we can use its absence to ensure it comes first
@attrs [$(#[$attrs])*]
@type [$name]
// shave off 256 indices at the end to allow space for packing these indices into enums
@max [0xFFFF_FF00]
@vis [$v]
@debug_format ["{}"]
$($tokens)+);
);
// ---- private rules ----
// Base case, user-defined constants (if any) have already been defined
(@derives [$($derives:ident,)*]
@attrs [$(#[$attrs:meta])*]
@type [$type:ident]
@max [$max:expr]
@vis [$v:vis]
@debug_format [$debug_format:tt]) => (
$(#[$attrs])*
#[derive(Copy, PartialEq, Eq, Hash, PartialOrd, Ord, $($derives),*)]
#[rustc_layout_scalar_valid_range_end($max)]
$v struct $type {
private: u32
}
impl Clone for $type {
#[inline]
fn clone(&self) -> Self {
*self
}
}
impl $type {
/// Maximum value the index can take, as a `u32`.
$v const MAX_AS_U32: u32 = $max;
/// Maximum value the index can take.
$v const MAX: Self = Self::from_u32($max);
/// Creates a new index from a given `usize`.
///
/// # Panics
///
/// Will panic if `value` exceeds `MAX`.
#[inline]
$v const fn from_usize(value: usize) -> Self {
assert!(value <= ($max as usize));
// SAFETY: We just checked that `value <= max`.
unsafe {
Self::from_u32_unchecked(value as u32)
}
}
/// Creates a new index from a given `u32`.
///
/// # Panics
///
/// Will panic if `value` exceeds `MAX`.
#[inline]
$v const fn from_u32(value: u32) -> Self {
assert!(value <= $max);
// SAFETY: We just checked that `value <= max`.
unsafe {
Self::from_u32_unchecked(value)
}
}
/// Creates a new index from a given `u32`.
///
/// # Safety
///
/// The provided value must be less than or equal to the maximum value for the newtype.
/// Providing a value outside this range is undefined due to layout restrictions.
///
/// Prefer using `from_u32`.
#[inline]
$v const unsafe fn from_u32_unchecked(value: u32) -> Self {
Self { private: value }
}
/// Extracts the value of this index as a `usize`.
#[inline]
$v const fn index(self) -> usize {
self.as_usize()
}
/// Extracts the value of this index as a `u32`.
#[inline]
$v const fn as_u32(self) -> u32 {
self.private
}
/// Extracts the value of this index as a `usize`.
#[inline]
$v const fn as_usize(self) -> usize {
self.as_u32() as usize
}
}
impl std::ops::Add<usize> for $type {
type Output = Self;
fn add(self, other: usize) -> Self {
Self::from_usize(self.index() + other)
}
}
impl $crate::vec::Idx for $type {
#[inline]
fn new(value: usize) -> Self {
Self::from_usize(value)
}
#[inline]
fn index(self) -> usize {
self.as_usize()
}
}
impl ::std::iter::Step for $type {
#[inline]
fn steps_between(start: &Self, end: &Self) -> Option<usize> {
<usize as ::std::iter::Step>::steps_between(
&Self::index(*start),
&Self::index(*end),
)
}
#[inline]
fn forward_checked(start: Self, u: usize) -> Option<Self> {
Self::index(start).checked_add(u).map(Self::from_usize)
}
#[inline]
fn backward_checked(start: Self, u: usize) -> Option<Self> {
Self::index(start).checked_sub(u).map(Self::from_usize)
}
}
// Safety: The implementation of `Step` upholds all invariants.
unsafe impl ::std::iter::TrustedStep for $type {}
impl From<$type> for u32 {
#[inline]
fn from(v: $type) -> u32 {
v.as_u32()
}
}
impl From<$type> for usize {
#[inline]
fn from(v: $type) -> usize {
v.as_usize()
}
}
impl From<usize> for $type {
#[inline]
fn from(value: usize) -> Self {
Self::from_usize(value)
}
}
impl From<u32> for $type {
#[inline]
fn from(value: u32) -> Self {
Self::from_u32(value)
}
}
$crate::newtype_index!(
@handle_debug
@derives [$($derives,)*]
@type [$type]
@debug_format [$debug_format]);
);
// base case for handle_debug where format is custom. No Debug implementation is emitted.
(@handle_debug
@derives [$($_derives:ident,)*]
@type [$type:ident]
@debug_format [custom]) => ();
// base case for handle_debug, no debug overrides found, so use default
(@handle_debug
@derives []
@type [$type:ident]
@debug_format [$debug_format:tt]) => (
impl ::std::fmt::Debug for $type {
fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
write!(fmt, $debug_format, self.as_u32())
}
}
);
// Debug is requested for derive, don't generate any Debug implementation.
(@handle_debug
@derives [Debug, $($derives:ident,)*]
@type [$type:ident]
@debug_format [$debug_format:tt]) => ();
// It's not Debug, so just pop it off the front of the derives stack and check the rest.
(@handle_debug
@derives [$_derive:ident, $($derives:ident,)*]
@type [$type:ident]
@debug_format [$debug_format:tt]) => (
$crate::newtype_index!(
@handle_debug
@derives [$($derives,)*]
@type [$type]
@debug_format [$debug_format]);
);
// Append comma to end of derives list if it's missing
(@attrs [$(#[$attrs:meta])*]
@type [$type:ident]
@max [$max:expr]
@vis [$v:vis]
@debug_format [$debug_format:tt]
derive [$($derives:ident),*]
$($tokens:tt)*) => (
$crate::newtype_index!(
@attrs [$(#[$attrs])*]
@type [$type]
@max [$max]
@vis [$v]
@debug_format [$debug_format]
derive [$($derives,)*]
$($tokens)*);
);
// By not including the @derives marker in this list nor in the default args, we can force it
// to come first if it exists. When encodable is custom, just use the derives list as-is.
(@attrs [$(#[$attrs:meta])*]
@type [$type:ident]
@max [$max:expr]
@vis [$v:vis]
@debug_format [$debug_format:tt]
derive [$($derives:ident,)+]
ENCODABLE = custom
$($tokens:tt)*) => (
$crate::newtype_index!(
@attrs [$(#[$attrs])*]
@derives [$($derives,)+]
@type [$type]
@max [$max]
@vis [$v]
@debug_format [$debug_format]
$($tokens)*);
);
// By not including the @derives marker in this list nor in the default args, we can force it
// to come first if it exists. When encodable isn't custom, add serialization traits by default.
(@attrs [$(#[$attrs:meta])*]
@type [$type:ident]
@max [$max:expr]
@vis [$v:vis]
@debug_format [$debug_format:tt]
derive [$($derives:ident,)+]
$($tokens:tt)*) => (
$crate::newtype_index!(
@derives [$($derives,)+]
@attrs [$(#[$attrs])*]
@type [$type]
@max [$max]
@vis [$v]
@debug_format [$debug_format]
$($tokens)*);
$crate::newtype_index!(@serializable $type);
);
// The case where no derives are added, but encodable is overridden. Don't
// derive serialization traits
(@attrs [$(#[$attrs:meta])*]
@type [$type:ident]
@max [$max:expr]
@vis [$v:vis]
@debug_format [$debug_format:tt]
ENCODABLE = custom
$($tokens:tt)*) => (
$crate::newtype_index!(
@derives []
@attrs [$(#[$attrs])*]
@type [$type]
@max [$max]
@vis [$v]
@debug_format [$debug_format]
$($tokens)*);
);
// The case where no derives are added, add serialization derives by default
(@attrs [$(#[$attrs:meta])*]
@type [$type:ident]
@max [$max:expr]
@vis [$v:vis]
@debug_format [$debug_format:tt]
$($tokens:tt)*) => (
$crate::newtype_index!(
@derives []
@attrs [$(#[$attrs])*]
@type [$type]
@max [$max]
@vis [$v]
@debug_format [$debug_format]
$($tokens)*);
$crate::newtype_index!(@serializable $type);
);
(@serializable $type:ident) => (
impl<D: ::rustc_serialize::Decoder> ::rustc_serialize::Decodable<D> for $type {
fn decode(d: &mut D) -> Self {
Self::from_u32(d.read_u32())
}
}
impl<E: ::rustc_serialize::Encoder> ::rustc_serialize::Encodable<E> for $type {
fn encode(&self, e: &mut E) -> Result<(), E::Error> {
e.emit_u32(self.private)
}
}
);
// Rewrite final without comma to one that includes comma
(@derives [$($derives:ident,)*]
@attrs [$(#[$attrs:meta])*]
@type [$type:ident]
@max [$max:expr]
@vis [$v:vis]
@debug_format [$debug_format:tt]
$name:ident = $constant:expr) => (
$crate::newtype_index!(
@derives [$($derives,)*]
@attrs [$(#[$attrs])*]
@type [$type]
@max [$max]
@vis [$v]
@debug_format [$debug_format]
$name = $constant,);
);
// Rewrite final const without comma to one that includes comma
(@derives [$($derives:ident,)*]
@attrs [$(#[$attrs:meta])*]
@type [$type:ident]
@max [$max:expr]
@vis [$v:vis]
@debug_format [$debug_format:tt]
$(#[doc = $doc:expr])*
const $name:ident = $constant:expr) => (
$crate::newtype_index!(
@derives [$($derives,)*]
@attrs [$(#[$attrs])*]
@type [$type]
@max [$max]
@vis [$v]
@debug_format [$debug_format]
$(#[doc = $doc])* const $name = $constant,);
);
// Replace existing default for max
(@derives [$($derives:ident,)*]
@attrs [$(#[$attrs:meta])*]
@type [$type:ident]
@max [$_max:expr]
@vis [$v:vis]
@debug_format [$debug_format:tt]
MAX = $max:expr,
$($tokens:tt)*) => (
$crate::newtype_index!(
@derives [$($derives,)*]
@attrs [$(#[$attrs])*]
@type [$type]
@max [$max]
@vis [$v]
@debug_format [$debug_format]
$($tokens)*);
);
// Replace existing default for debug_format
(@derives [$($derives:ident,)*]
@attrs [$(#[$attrs:meta])*]
@type [$type:ident]
@max [$max:expr]
@vis [$v:vis]
@debug_format [$_debug_format:tt]
DEBUG_FORMAT = $debug_format:tt,
$($tokens:tt)*) => (
$crate::newtype_index!(
@derives [$($derives,)*]
@attrs [$(#[$attrs])*]
@type [$type]
@max [$max]
@vis [$v]
@debug_format [$debug_format]
$($tokens)*);
);
// Assign a user-defined constant
(@derives [$($derives:ident,)*]
@attrs [$(#[$attrs:meta])*]
@type [$type:ident]
@max [$max:expr]
@vis [$v:vis]
@debug_format [$debug_format:tt]
$(#[doc = $doc:expr])*
const $name:ident = $constant:expr,
$($tokens:tt)*) => (
$(#[doc = $doc])*
$v const $name: $type = $type::from_u32($constant);
$crate::newtype_index!(
@derives [$($derives,)*]
@attrs [$(#[$attrs])*]
@type [$type]
@max [$max]
@vis [$v]
@debug_format [$debug_format]
$($tokens)*);
);
}
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct IndexVec<I: Idx, T> {
pub raw: Vec<T>,

View File

@ -1,4 +1,5 @@
#![feature(proc_macro_diagnostic)]
#![feature(allow_internal_unstable)]
#![allow(rustc::default_hash_types)]
#![recursion_limit = "128"]
@ -8,6 +9,7 @@ use proc_macro::TokenStream;
mod hash_stable;
mod lift;
mod newtype;
mod query;
mod serialize;
mod session_diagnostic;
@ -24,6 +26,27 @@ pub fn symbols(input: TokenStream) -> TokenStream {
symbols::symbols(input.into()).into()
}
/// Creates a struct type `S` that can be used as an index with
/// `IndexVec` and so on.
///
/// There are two ways of interacting with these indices:
///
/// - The `From` impls are the preferred way. So you can do
/// `S::from(v)` with a `usize` or `u32`. And you can convert back
/// to an integer with `u32::from(s)`.
///
/// - Alternatively, you can use the methods `S::new(v)` and `s.index()`
/// to create/return a value.
///
/// Internally, the index uses a u32, so the index must not exceed
/// `u32::MAX`. You can also customize things like the `Debug` impl,
/// what traits are derived, and so forth via the macro.
#[proc_macro]
#[allow_internal_unstable(step_trait, rustc_attrs, trusted_step)]
pub fn newtype_index(input: TokenStream) -> TokenStream {
newtype::newtype(input).into()
}
decl_derive!([HashStable, attributes(stable_hasher)] => hash_stable::hash_stable_derive);
decl_derive!(
[HashStable_Generic, attributes(stable_hasher)] =>

View File

@ -0,0 +1,324 @@
use proc_macro2::{Span, TokenStream};
use quote::quote;
use syn::parse::*;
use syn::punctuated::Punctuated;
use syn::*;
mod kw {
syn::custom_keyword!(derive);
syn::custom_keyword!(DEBUG_FORMAT);
syn::custom_keyword!(MAX);
syn::custom_keyword!(ENCODABLE);
syn::custom_keyword!(custom);
}
#[derive(Debug)]
enum DebugFormat {
// The user will provide a custom `Debug` impl, so we shouldn't generate
// one
Custom,
// Use the specified format string in the generated `Debug` impl
// By default, this is "{}"
Format(String),
}
// We parse the input and emit the output in a single step.
// This field stores the final macro output
struct Newtype(TokenStream);
impl Parse for Newtype {
fn parse(input: ParseStream<'_>) -> Result<Self> {
let attrs = input.call(Attribute::parse_outer)?;
let vis: Visibility = input.parse()?;
input.parse::<Token![struct]>()?;
let name: Ident = input.parse()?;
let body;
braced!(body in input);
// Any additional `#[derive]` macro paths to apply
let mut derive_paths: Option<Vec<Path>> = None;
let mut debug_format: Option<DebugFormat> = None;
let mut max = None;
let mut consts = Vec::new();
let mut encodable = true;
// Parse an optional trailing comma
let try_comma = || -> Result<()> {
if body.lookahead1().peek(Token![,]) {
body.parse::<Token![,]>()?;
}
Ok(())
};
if body.lookahead1().peek(Token![..]) {
body.parse::<Token![..]>()?;
} else {
loop {
if body.lookahead1().peek(kw::derive) {
body.parse::<kw::derive>()?;
let derives;
bracketed!(derives in body);
let derives: Punctuated<Path, Token![,]> =
derives.parse_terminated(Path::parse)?;
try_comma()?;
if let Some(old) = derive_paths.replace(derives.into_iter().collect()) {
panic!("Specified multiple derives: {:?}", old);
}
continue;
}
if body.lookahead1().peek(kw::DEBUG_FORMAT) {
body.parse::<kw::DEBUG_FORMAT>()?;
body.parse::<Token![=]>()?;
if body.lookahead1().peek(kw::custom) {
body.parse::<kw::custom>()?;
if let Some(old) = debug_format.replace(DebugFormat::Custom) {
panic!("Specified multiple debug format options: {:?}", old);
}
} else {
let format_str: LitStr = body.parse()?;
if let Some(old) =
debug_format.replace(DebugFormat::Format(format_str.value()))
{
panic!("Specified multiple debug format options: {:?}", old);
}
}
try_comma()?;
continue;
}
if body.lookahead1().peek(kw::MAX) {
body.parse::<kw::MAX>()?;
body.parse::<Token![=]>()?;
let val: Lit = body.parse()?;
try_comma()?;
if let Some(old) = max.replace(val) {
panic!("Specified multiple MAX: {:?}", old);
}
continue;
}
if body.lookahead1().peek(kw::ENCODABLE) {
body.parse::<kw::ENCODABLE>()?;
body.parse::<Token![=]>()?;
body.parse::<kw::custom>()?;
try_comma()?;
encodable = false;
continue;
}
// We've parsed everything that the user provided, so we're done
if body.is_empty() {
break;
}
// Otherwise, we are parsng a user-defined constant
let const_attrs = body.call(Attribute::parse_outer)?;
body.parse::<Token![const]>()?;
let const_name: Ident = body.parse()?;
body.parse::<Token![=]>()?;
let const_val: Expr = body.parse()?;
try_comma()?;
consts.push(quote! { #(#const_attrs)* #vis const #const_name: #name = #name::from_u32(#const_val); });
}
}
let derive_paths = derive_paths.unwrap_or_else(Vec::new);
let debug_format = debug_format.unwrap_or(DebugFormat::Format("{}".to_string()));
// shave off 256 indices at the end to allow space for packing these indices into enums
let max = max.unwrap_or_else(|| Lit::Int(LitInt::new("0xFFFF_FF00", Span::call_site())));
let encodable_impls = if encodable {
quote! {
impl<D: ::rustc_serialize::Decoder> ::rustc_serialize::Decodable<D> for #name {
fn decode(d: &mut D) -> Self {
Self::from_u32(d.read_u32())
}
}
impl<E: ::rustc_serialize::Encoder> ::rustc_serialize::Encodable<E> for #name {
fn encode(&self, e: &mut E) -> Result<(), E::Error> {
e.emit_u32(self.private)
}
}
}
} else {
quote! {}
};
let debug_impl = match debug_format {
DebugFormat::Custom => quote! {},
DebugFormat::Format(format) => {
quote! {
impl ::std::fmt::Debug for #name {
fn fmt(&self, fmt: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result {
write!(fmt, #format, self.as_u32())
}
}
}
}
};
Ok(Self(quote! {
#(#attrs)*
#[derive(Copy, PartialEq, Eq, Hash, PartialOrd, Ord, #(#derive_paths),*)]
#[rustc_layout_scalar_valid_range_end(#max)]
#vis struct #name {
private: u32,
}
#(#consts)*
impl Clone for #name {
#[inline]
fn clone(&self) -> Self {
*self
}
}
impl #name {
/// Maximum value the index can take, as a `u32`.
#vis const MAX_AS_U32: u32 = #max;
/// Maximum value the index can take.
#vis const MAX: Self = Self::from_u32(#max);
/// Creates a new index from a given `usize`.
///
/// # Panics
///
/// Will panic if `value` exceeds `MAX`.
#[inline]
#vis const fn from_usize(value: usize) -> Self {
assert!(value <= (#max as usize));
// SAFETY: We just checked that `value <= max`.
unsafe {
Self::from_u32_unchecked(value as u32)
}
}
/// Creates a new index from a given `u32`.
///
/// # Panics
///
/// Will panic if `value` exceeds `MAX`.
#[inline]
#vis const fn from_u32(value: u32) -> Self {
assert!(value <= #max);
// SAFETY: We just checked that `value <= max`.
unsafe {
Self::from_u32_unchecked(value)
}
}
/// Creates a new index from a given `u32`.
///
/// # Safety
///
/// The provided value must be less than or equal to the maximum value for the newtype.
/// Providing a value outside this range is undefined due to layout restrictions.
///
/// Prefer using `from_u32`.
#[inline]
#vis const unsafe fn from_u32_unchecked(value: u32) -> Self {
Self { private: value }
}
/// Extracts the value of this index as a `usize`.
#[inline]
#vis const fn index(self) -> usize {
self.as_usize()
}
/// Extracts the value of this index as a `u32`.
#[inline]
#vis const fn as_u32(self) -> u32 {
self.private
}
/// Extracts the value of this index as a `usize`.
#[inline]
#vis const fn as_usize(self) -> usize {
self.as_u32() as usize
}
}
impl std::ops::Add<usize> for #name {
type Output = Self;
fn add(self, other: usize) -> Self {
Self::from_usize(self.index() + other)
}
}
impl rustc_index::vec::Idx for #name {
#[inline]
fn new(value: usize) -> Self {
Self::from_usize(value)
}
#[inline]
fn index(self) -> usize {
self.as_usize()
}
}
impl ::std::iter::Step for #name {
#[inline]
fn steps_between(start: &Self, end: &Self) -> Option<usize> {
<usize as ::std::iter::Step>::steps_between(
&Self::index(*start),
&Self::index(*end),
)
}
#[inline]
fn forward_checked(start: Self, u: usize) -> Option<Self> {
Self::index(start).checked_add(u).map(Self::from_usize)
}
#[inline]
fn backward_checked(start: Self, u: usize) -> Option<Self> {
Self::index(start).checked_sub(u).map(Self::from_usize)
}
}
// Safety: The implementation of `Step` upholds all invariants.
unsafe impl ::std::iter::TrustedStep for #name {}
impl From<#name> for u32 {
#[inline]
fn from(v: #name) -> u32 {
v.as_u32()
}
}
impl From<#name> for usize {
#[inline]
fn from(v: #name) -> usize {
v.as_usize()
}
}
impl From<usize> for #name {
#[inline]
fn from(value: usize) -> Self {
Self::from_usize(value)
}
}
impl From<u32> for #name {
#[inline]
fn from(value: u32) -> Self {
Self::from_u32(value)
}
}
#encodable_impls
#debug_impl
}))
}
}
pub fn newtype(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
let input = parse_macro_input!(input as Newtype);
input.0.into()
}