diff --git a/CHANGELOG.md b/CHANGELOG.md index 78075b4a0..787cb76a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -81,8 +81,41 @@ for message in compilation_info By @stefnotch in [#5410](https://github.com/gfx-rs/wgpu/pull/5410) -### New features +#### 64 bit integer atomic support in shaders. +Add support for 64 bit integer atomic operations in shaders. + +Add the following flags to `wgpu_types::Features`: + +- `SHADER_INT64_ATOMIC_ALL_OPS` enables all atomic operations on `atomic` and + `atomic` values. + +- `SHADER_INT64_ATOMIC_MIN_MAX` is a subset of the above, enabling only + `AtomicFunction::Min` and `AtomicFunction::Max` operations on `atomic` and + `atomic` values in the `Storage` address space. These are the only 64-bit + atomic operations available on Metal as of 3.1. + +Add corresponding flags to `naga::valid::Capabilities`. These are supported by the +WGSL front end, and all Naga backends. + +Platform support: + +- On Direct3d 12, in `D3D12_FEATURE_DATA_D3D12_OPTIONS9`, if + `AtomicInt64OnTypedResourceSupported` and `AtomicInt64OnGroupSharedSupported` are + both available, then both wgpu features described above are available. + +- On Metal, `SHADER_INT64_ATOMIC_MIN_MAX` is available on Apple9 hardware, and on + hardware that advertises both Apple8 and Mac2 support. This also requires Metal + Shading Language 2.4 or later. Metal does not yet support the more general + `SHADER_INT64_ATOMIC_ALL_OPS`. + +- On Vulkan, if the `VK_KHR_shader_atomic_int64` extension is available with both the + `shader_buffer_int64_atomics` and `shader_shared_int64_atomics` features, then both + wgpu features described above are available. + +By @atlv24 in [#5383](https://github.com/gfx-rs/wgpu/pull/5383) + +### New features #### Vulkan - Added a `PipelineCache` resource to allow using Vulkan pipeline caches. By @DJMcNab in [#5319](https://github.com/gfx-rs/wgpu/pull/5319) diff --git a/naga/src/back/dot/mod.rs b/naga/src/back/dot/mod.rs index 9a7702b3f..dffd5234b 100644 --- a/naga/src/back/dot/mod.rs +++ b/naga/src/back/dot/mod.rs @@ -244,7 +244,9 @@ impl StatementGraph { value, result, } => { - self.emits.push((id, result)); + if let Some(result) = result { + self.emits.push((id, result)); + } self.dependencies.push((id, pointer, "pointer")); self.dependencies.push((id, value, "value")); if let crate::AtomicFunction::Exchange { compare: Some(cmp) } = *fun { diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs index 7138c2513..caca38254 100644 --- a/naga/src/back/glsl/mod.rs +++ b/naga/src/back/glsl/mod.rs @@ -2368,11 +2368,13 @@ impl<'a, W: Write> Writer<'a, W> { result, } => { write!(self.out, "{level}")?; - let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); - let res_ty = ctx.resolve_type(result, &self.module.types); - self.write_value_type(res_ty)?; - write!(self.out, " {res_name} = ")?; - self.named_expressions.insert(result, res_name); + if let Some(result) = result { + let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); + let res_ty = ctx.resolve_type(result, &self.module.types); + self.write_value_type(res_ty)?; + write!(self.out, " {res_name} = ")?; + self.named_expressions.insert(result, res_name); + } let fun_str = fun.to_glsl(); write!(self.out, "atomic{fun_str}(")?; diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index b4db0bcd7..dea37b6b2 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -1919,11 +1919,20 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { result, } => { write!(self.out, "{level}")?; - let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); - match func_ctx.info[result].ty { - proc::TypeResolution::Handle(handle) => self.write_type(module, handle)?, - proc::TypeResolution::Value(ref value) => { - self.write_value_type(module, value)? + let res_name = match result { + None => None, + Some(result) => { + let name = format!("{}{}", back::BAKE_PREFIX, result.index()); + match func_ctx.info[result].ty { + proc::TypeResolution::Handle(handle) => { + self.write_type(module, handle)? + } + proc::TypeResolution::Value(ref value) => { + self.write_value_type(module, value)? + } + }; + write!(self.out, " {name}; ")?; + Some((result, name)) } }; @@ -1934,7 +1943,6 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { .unwrap(); let fun_str = fun.to_hlsl_suffix(); - write!(self.out, " {res_name}; ")?; match pointer_space { crate::AddressSpace::WorkGroup => { write!(self.out, "Interlocked{fun_str}(")?; @@ -1970,8 +1978,16 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { _ => {} } self.write_expr(module, value, func_ctx)?; - writeln!(self.out, ", {res_name});")?; - self.named_expressions.insert(result, res_name); + + // The `original_value` out parameter is optional for all the + // `Interlocked` functions we generate other than + // `InterlockedExchange`. + if let Some((result, name)) = res_name { + write!(self.out, ", {name}")?; + self.named_expressions.insert(result, name); + } + + writeln!(self.out, ");")?; } Statement::WorkGroupUniformLoad { pointer, result } => { self.write_barrier(crate::Barrier::WORK_GROUP, level)?; diff --git a/naga/src/back/msl/mod.rs b/naga/src/back/msl/mod.rs index d80d012ad..37e0b98d7 100644 --- a/naga/src/back/msl/mod.rs +++ b/naga/src/back/msl/mod.rs @@ -1,6 +1,9 @@ /*! Backend for [MSL][msl] (Metal Shading Language). +This backend does not support the [`SHADER_INT64_ATOMIC_ALL_OPS`][all-atom] +capability. + ## Binding model Metal's bindings are flat per resource. Since there isn't an obvious mapping @@ -24,6 +27,8 @@ For the result type, if it's a structure, we re-compose it with a temporary valu holding the result. [msl]: https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf +[all-atom]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS + */ use crate::{arena::Handle, proc::index, valid::ModuleInfo}; @@ -661,21 +666,3 @@ fn test_error_size() { use std::mem::size_of; assert_eq!(size_of::(), 32); } - -impl crate::AtomicFunction { - fn to_msl(self) -> Result<&'static str, Error> { - Ok(match self { - Self::Add => "fetch_add", - Self::Subtract => "fetch_sub", - Self::And => "fetch_and", - Self::InclusiveOr => "fetch_or", - Self::ExclusiveOr => "fetch_xor", - Self::Min => "fetch_min", - Self::Max => "fetch_max", - Self::Exchange { compare: None } => "exchange", - Self::Exchange { compare: Some(_) } => Err(Error::FeatureNotImplemented( - "atomic CompareExchange".to_string(), - ))?, - }) - } -} diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index ba2876713..009b57701 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -3058,11 +3058,22 @@ impl Writer { value, result, } => { + // This backend supports `SHADER_INT64_ATOMIC_MIN_MAX` but not + // `SHADER_INT64_ATOMIC_ALL_OPS`, so we can assume that if `result` is + // `Some`, we are not operating on a 64-bit value, and that if we are + // operating on a 64-bit value, `result` is `None`. write!(self.out, "{level}")?; - let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); - self.start_baking_expression(result, &context.expression, &res_name)?; - self.named_expressions.insert(result, res_name); - let fun_str = fun.to_msl()?; + let fun_str = if let Some(result) = result { + let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); + self.start_baking_expression(result, &context.expression, &res_name)?; + self.named_expressions.insert(result, res_name); + fun.to_msl()? + } else if context.expression.resolve_type(value).scalar_width() == Some(8) { + fun.to_msl_64_bit()? + } else { + fun.to_msl()? + }; + self.put_atomic_operation(pointer, fun_str, value, &context.expression)?; // done writeln!(self.out, ";")?; @@ -5914,3 +5925,31 @@ fn test_stack_size() { } } } + +impl crate::AtomicFunction { + fn to_msl(self) -> Result<&'static str, Error> { + Ok(match self { + Self::Add => "fetch_add", + Self::Subtract => "fetch_sub", + Self::And => "fetch_and", + Self::InclusiveOr => "fetch_or", + Self::ExclusiveOr => "fetch_xor", + Self::Min => "fetch_min", + Self::Max => "fetch_max", + Self::Exchange { compare: None } => "exchange", + Self::Exchange { compare: Some(_) } => Err(Error::FeatureNotImplemented( + "atomic CompareExchange".to_string(), + ))?, + }) + } + + fn to_msl_64_bit(self) -> Result<&'static str, Error> { + Ok(match self { + Self::Min => "min", + Self::Max => "max", + _ => Err(Error::FeatureNotImplemented( + "64-bit atomic operation other than min/max".to_string(), + ))?, + }) + } +} diff --git a/naga/src/back/pipeline_constants.rs b/naga/src/back/pipeline_constants.rs index 4d976e366..2686a08a2 100644 --- a/naga/src/back/pipeline_constants.rs +++ b/naga/src/back/pipeline_constants.rs @@ -617,7 +617,9 @@ fn adjust_stmt(new_pos: &[Handle], stmt: &mut Statement) { } => { adjust(pointer); adjust(value); - adjust(result); + if let Some(ref mut result) = *result { + adjust(result); + } match *fun { crate::AtomicFunction::Exchange { compare: Some(ref mut compare), diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 5e6dd0ab8..ad7514ae9 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -2423,9 +2423,15 @@ impl<'w> BlockContext<'w> { result, } => { let id = self.gen_id(); - let result_type_id = self.get_expression_type_id(&self.fun_info[result].ty); + // Compare-and-exchange operations produce a struct result, + // so use `result`'s type if it is available. For no-result + // operations, fall back to `value`'s type. + let result_type_id = + self.get_expression_type_id(&self.fun_info[result.unwrap_or(value)].ty); - self.cached[result] = id; + if let Some(result) = result { + self.cached[result] = id; + } let pointer_id = match self.write_expression_pointer(pointer, &mut block, None)? { diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index 73a16c273..4b1aa3026 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -878,6 +878,9 @@ impl Writer { crate::TypeInner::RayQuery => { self.require_any("Ray Query", &[spirv::Capability::RayQueryKHR])?; } + crate::TypeInner::Atomic(crate::Scalar { width: 8, kind: _ }) => { + self.require_any("64 bit integer atomics", &[spirv::Capability::Int64Atomics])?; + } _ => {} } Ok(()) diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs index 7c2887850..8b61dbd2c 100644 --- a/naga/src/back/wgsl/writer.rs +++ b/naga/src/back/wgsl/writer.rs @@ -754,9 +754,11 @@ impl Writer { result, } => { write!(self.out, "{level}")?; - let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); - self.start_named_expr(module, result, func_ctx, &res_name)?; - self.named_expressions.insert(result, res_name); + if let Some(result) = result { + let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); + self.start_named_expr(module, result, func_ctx, &res_name)?; + self.named_expressions.insert(result, res_name); + } let fun_str = fun.to_wgsl(); write!(self.out, "atomic{fun_str}(")?; diff --git a/naga/src/compact/statements.rs b/naga/src/compact/statements.rs index a124281bc..ba3e19f5b 100644 --- a/naga/src/compact/statements.rs +++ b/naga/src/compact/statements.rs @@ -75,7 +75,9 @@ impl FunctionTracer<'_> { self.expressions_used.insert(pointer); self.trace_atomic_function(fun); self.expressions_used.insert(value); - self.expressions_used.insert(result); + if let Some(result) = result { + self.expressions_used.insert(result); + } } St::WorkGroupUniformLoad { pointer, result } => { self.expressions_used.insert(pointer); @@ -255,7 +257,9 @@ impl FunctionMap { adjust(pointer); self.adjust_atomic_function(fun); adjust(value); - adjust(result); + if let Some(ref mut result) = *result { + adjust(result); + } } St::WorkGroupUniformLoad { ref mut pointer, diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs index 480f77134..0301ea425 100644 --- a/naga/src/front/spv/mod.rs +++ b/naga/src/front/spv/mod.rs @@ -63,6 +63,7 @@ pub const SUPPORTED_CAPABILITIES: &[spirv::Capability] = &[ spirv::Capability::Int8, spirv::Capability::Int16, spirv::Capability::Int64, + spirv::Capability::Int64Atomics, spirv::Capability::Float16, spirv::Capability::Float64, spirv::Capability::Geometry, @@ -4028,7 +4029,7 @@ impl> Frontend { pointer: p_lexp_handle, fun: crate::AtomicFunction::Add, value: one_lexp_handle, - result: r_lexp_handle, + result: Some(r_lexp_handle), }; block.push(stmt, span); } diff --git a/naga/src/front/type_gen.rs b/naga/src/front/type_gen.rs index 34730c1db..1cd9f7f37 100644 --- a/naga/src/front/type_gen.rs +++ b/naga/src/front/type_gen.rs @@ -291,10 +291,10 @@ impl crate::Module { name: Some("exchanged".to_string()), ty: bool_ty, binding: None, - offset: 4, + offset: scalar.width as u32, }, ], - span: 8, + span: scalar.width as u32 * 2, }, } } diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs index e7cce1772..7c5954d06 100644 --- a/naga/src/front/wgsl/lower/mod.rs +++ b/naga/src/front/wgsl/lower/mod.rs @@ -1491,6 +1491,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { function, arguments, &mut ctx.as_expression(block, &mut emitter), + true, )?; block.extend(emitter.finish(&ctx.function.expressions)); return Ok(()); @@ -1747,7 +1748,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { ref arguments, } => { let handle = self - .call(span, function, arguments, ctx)? + .call(span, function, arguments, ctx, false)? .ok_or(Error::FunctionReturnsVoid(function.span))?; return Ok(Typed::Plain(handle)); } @@ -1941,6 +1942,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { function: &ast::Ident<'source>, arguments: &[Handle>], ctx: &mut ExpressionContext<'source, '_, '_>, + is_statement: bool, ) -> Result>, Error<'source>> { match ctx.globals.get(function.name) { Some(&LoweredGlobalDecl::Type(ty)) => { @@ -2086,7 +2088,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { self.subgroup_gather_helper(span, mode, arguments, ctx)?, )); } else if let Some(fun) = crate::AtomicFunction::map(function.name) { - return Ok(Some(self.atomic_helper(span, fun, arguments, ctx)?)); + return self.atomic_helper(span, fun, arguments, is_statement, ctx); } else { match function.name { "select" => { @@ -2168,7 +2170,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { compare: Some(compare), }, value, - result, + result: Some(result), }, span, ); @@ -2459,25 +2461,38 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { span: Span, fun: crate::AtomicFunction, args: &[Handle>], + is_statement: bool, ctx: &mut ExpressionContext<'source, '_, '_>, - ) -> Result, Error<'source>> { + ) -> Result>, Error<'source>> { let mut args = ctx.prepare_args(args, 2, span); let pointer = self.atomic_pointer(args.next()?, ctx)?; - - let value = args.next()?; - let value = self.expression(value, ctx)?; - let ty = ctx.register_type(value)?; - + let value = self.expression(args.next()?, ctx)?; + let value_inner = resolve_inner!(ctx, value); args.finish()?; - let result = ctx.interrupt_emitter( - crate::Expression::AtomicResult { - ty, - comparison: false, - }, - span, - )?; + // If we don't use the return value of a 64-bit `min` or `max` + // operation, generate a no-result form of the `Atomic` statement, so + // that we can pass validation with only `SHADER_INT64_ATOMIC_MIN_MAX` + // whenever possible. + let is_64_bit_min_max = + matches!(fun, crate::AtomicFunction::Min | crate::AtomicFunction::Max) + && matches!( + *value_inner, + crate::TypeInner::Scalar(crate::Scalar { width: 8, .. }) + ); + let result = if is_64_bit_min_max && is_statement { + None + } else { + let ty = ctx.register_type(value)?; + Some(ctx.interrupt_emitter( + crate::Expression::AtomicResult { + ty, + comparison: false, + }, + span, + )?) + }; let rctx = ctx.runtime_expression_ctx(span)?; rctx.block.push( crate::Statement::Atomic { diff --git a/naga/src/lib.rs b/naga/src/lib.rs index 2ae0eb00c..b283244c6 100644 --- a/naga/src/lib.rs +++ b/naga/src/lib.rs @@ -1612,8 +1612,29 @@ pub enum Expression { }, /// Result of calling another function. CallResult(Handle), + /// Result of an atomic operation. + /// + /// This expression must be referred to by the [`result`] field of exactly one + /// [`Atomic`][stmt] statement somewhere in the same function. Let `T` be the + /// scalar type contained by the [`Atomic`][type] value that the statement + /// operates on. + /// + /// If `comparison` is `false`, then `ty` must be the scalar type `T`. + /// + /// If `comparison` is `true`, then `ty` must be a [`Struct`] with two members: + /// + /// - A member named `old_value`, whose type is `T`, and + /// + /// - A member named `exchanged`, of type [`BOOL`]. + /// + /// [`result`]: Statement::Atomic::result + /// [stmt]: Statement::Atomic + /// [type]: TypeInner::Atomic + /// [`Struct`]: TypeInner::Struct + /// [`BOOL`]: Scalar::BOOL AtomicResult { ty: Handle, comparison: bool }, + /// Result of a [`WorkGroupUniformLoad`] statement. /// /// [`WorkGroupUniformLoad`]: Statement::WorkGroupUniformLoad @@ -1890,15 +1911,66 @@ pub enum Statement { /// Atomic function. Atomic { /// Pointer to an atomic value. + /// + /// This must be a [`Pointer`] to an [`Atomic`] value. The atomic's + /// scalar type may be [`I32`] or [`U32`]. + /// + /// If [`SHADER_INT64_ATOMIC_MIN_MAX`] or [`SHADER_INT64_ATOMIC_ALL_OPS`] are + /// enabled, this may also be [`I64`] or [`U64`]. + /// + /// [`Pointer`]: TypeInner::Pointer + /// [`Atomic`]: TypeInner::Atomic + /// [`I32`]: Scalar::I32 + /// [`U32`]: Scalar::U32 + /// [`SHADER_INT64_ATOMIC_MIN_MAX`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX + /// [`SHADER_INT64_ATOMIC_ALL_OPS`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS + /// [`I64`]: Scalar::I64 + /// [`U64`]: Scalar::U64 pointer: Handle, - /// Function to run on the atomic. + + /// Function to run on the atomic value. + /// + /// If [`pointer`] refers to a 64-bit atomic value, then: + /// + /// - The [`SHADER_INT64_ATOMIC_ALL_OPS`] capability allows any [`AtomicFunction`] + /// value here. + /// + /// - The [`SHADER_INT64_ATOMIC_MIN_MAX`] capability allows + /// [`AtomicFunction::Min`] and [`AtomicFunction::Max`] here. + /// + /// - If neither of those capabilities are present, then 64-bit scalar + /// atomics are not allowed. + /// + /// [`pointer`]: Statement::Atomic::pointer + /// [`SHADER_INT64_ATOMIC_MIN_MAX`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX + /// [`SHADER_INT64_ATOMIC_ALL_OPS`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS fun: AtomicFunction, + /// Value to use in the function. + /// + /// This must be a scalar of the same type as [`pointer`]'s atomic's scalar type. + /// + /// [`pointer`]: Statement::Atomic::pointer value: Handle, + /// [`AtomicResult`] expression representing this function's result. /// + /// If [`fun`] is [`Exchange { compare: None }`], this must be `Some`, + /// as otherwise that operation would be equivalent to a simple [`Store`] + /// to the atomic. + /// + /// Otherwise, this may be `None` if the return value of the operation is not needed. + /// + /// If `pointer` refers to a 64-bit atomic value, [`SHADER_INT64_ATOMIC_MIN_MAX`] + /// is enabled, and [`SHADER_INT64_ATOMIC_ALL_OPS`] is not, this must be `None`. + /// /// [`AtomicResult`]: crate::Expression::AtomicResult - result: Handle, + /// [`fun`]: Statement::Atomic::fun + /// [`Store`]: Statement::Store + /// [`Exchange { compare: None }`]: AtomicFunction::Exchange + /// [`SHADER_INT64_ATOMIC_MIN_MAX`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX + /// [`SHADER_INT64_ATOMIC_ALL_OPS`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS + result: Option>, }, /// Load uniformly from a uniform pointer in the workgroup address space. /// diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs index c6e225990..89bceae06 100644 --- a/naga/src/valid/expression.rs +++ b/naga/src/valid/expression.rs @@ -1,7 +1,4 @@ -use super::{ - compose::validate_compose, validate_atomic_compare_exchange_struct, FunctionInfo, ModuleInfo, - ShaderStages, TypeFlags, -}; +use super::{compose::validate_compose, FunctionInfo, ModuleInfo, ShaderStages, TypeFlags}; use crate::arena::UniqueArena; use crate::{ @@ -114,8 +111,6 @@ pub enum ExpressionError { WrongArgumentCount(crate::MathFunction), #[error("Argument [{1}] to {0:?} as expression {2:?} has an invalid type.")] InvalidArgumentType(crate::MathFunction, u32, Handle), - #[error("Atomic result type can't be {0:?}")] - InvalidAtomicResultType(Handle), #[error( "workgroupUniformLoad result type can't be {0:?}. It can only be a constructible type." )] @@ -1582,30 +1577,11 @@ impl super::Validator { ShaderStages::all() } E::CallResult(function) => mod_info.functions[function.index()].available_stages, - E::AtomicResult { ty, comparison } => { - let scalar_predicate = |ty: &crate::TypeInner| match ty { - &crate::TypeInner::Scalar( - scalar @ Sc { - kind: crate::ScalarKind::Uint | crate::ScalarKind::Sint, - .. - }, - ) => self.check_width(scalar).is_ok(), - _ => false, - }; - let good = match &module.types[ty].inner { - ty if !comparison => scalar_predicate(ty), - &crate::TypeInner::Struct { ref members, .. } if comparison => { - validate_atomic_compare_exchange_struct( - &module.types, - members, - scalar_predicate, - ) - } - _ => false, - }; - if !good { - return Err(ExpressionError::InvalidAtomicResultType(ty)); - } + E::AtomicResult { .. } => { + // These expressions are validated when we check the `Atomic` statement + // that refers to them, because we have all the information we need at + // that point. The checks driven by `Validator::needs_visit` ensure + // that this expression is indeed visited by one `Atomic` statement. ShaderStages::all() } E::WorkGroupUniformLoadResult { ty } => { diff --git a/naga/src/valid/function.rs b/naga/src/valid/function.rs index d92cda87f..ee80e4d8e 100644 --- a/naga/src/valid/function.rs +++ b/naga/src/valid/function.rs @@ -43,10 +43,22 @@ pub enum CallError { pub enum AtomicError { #[error("Pointer {0:?} to atomic is invalid.")] InvalidPointer(Handle), + #[error("Address space {0:?} does not support 64bit atomics.")] + InvalidAddressSpace(crate::AddressSpace), #[error("Operand {0:?} has invalid type.")] InvalidOperand(Handle), + #[error("Result expression {0:?} is not an `AtomicResult` expression")] + InvalidResultExpression(Handle), + #[error("Result expression {0:?} is marked as an `exchange`")] + ResultExpressionExchange(Handle), + #[error("Result expression {0:?} is not marked as an `exchange`")] + ResultExpressionNotExchange(Handle), #[error("Result type for {0:?} doesn't match the statement")] ResultTypeMismatch(Handle), + #[error("Exchange operations must return a value")] + MissingReturnValue, + #[error("Capability {0:?} is required")] + MissingCapability(super::Capabilities), #[error("Result expression {0:?} is populated by multiple `Atomic` statements")] ResultAlreadyPopulated(Handle), } @@ -350,79 +362,189 @@ impl super::Validator { pointer: Handle, fun: &crate::AtomicFunction, value: Handle, - result: Handle, + result: Option>, + span: crate::Span, context: &BlockContext, ) -> Result<(), WithSpan> { + // The `pointer` operand must be a pointer to an atomic value. let pointer_inner = context.resolve_type(pointer, &self.valid_expression_set)?; - let ptr_scalar = match *pointer_inner { - crate::TypeInner::Pointer { base, .. } => match context.types[base].inner { - crate::TypeInner::Atomic(scalar) => scalar, - ref other => { - log::error!("Atomic pointer to type {:?}", other); - return Err(AtomicError::InvalidPointer(pointer) - .with_span_handle(pointer, context.expressions) - .into_other()); - } - }, - ref other => { - log::error!("Atomic on type {:?}", other); - return Err(AtomicError::InvalidPointer(pointer) - .with_span_handle(pointer, context.expressions) - .into_other()); - } + let crate::TypeInner::Pointer { + base: pointer_base, + space: pointer_space, + } = *pointer_inner + else { + log::error!("Atomic operation on type {:?}", *pointer_inner); + return Err(AtomicError::InvalidPointer(pointer) + .with_span_handle(pointer, context.expressions) + .into_other()); + }; + let crate::TypeInner::Atomic(pointer_scalar) = context.types[pointer_base].inner else { + log::error!( + "Atomic pointer to type {:?}", + context.types[pointer_base].inner + ); + return Err(AtomicError::InvalidPointer(pointer) + .with_span_handle(pointer, context.expressions) + .into_other()); }; + // The `value` operand must be a scalar of the same type as the atomic. let value_inner = context.resolve_type(value, &self.valid_expression_set)?; - match *value_inner { - crate::TypeInner::Scalar(scalar) if scalar == ptr_scalar => {} - ref other => { - log::error!("Atomic operand type {:?}", other); - return Err(AtomicError::InvalidOperand(value) + let crate::TypeInner::Scalar(value_scalar) = *value_inner else { + log::error!("Atomic operand type {:?}", *value_inner); + return Err(AtomicError::InvalidOperand(value) + .with_span_handle(value, context.expressions) + .into_other()); + }; + if pointer_scalar != value_scalar { + log::error!("Atomic operand type {:?}", *value_inner); + return Err(AtomicError::InvalidOperand(value) + .with_span_handle(value, context.expressions) + .into_other()); + } + + // Check for the special restrictions on 64-bit atomic operations. + // + // We don't need to consider other widths here: this function has already checked + // that `pointer`'s type is an `Atomic`, and `validate_type` has already checked + // that that `Atomic` type has a permitted scalar width. + if pointer_scalar.width == 8 { + // `Capabilities::SHADER_INT64_ATOMIC_ALL_OPS` enables all sorts of 64-bit + // atomic operations. + if self + .capabilities + .contains(super::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS) + { + // okay + } else { + // `Capabilities::SHADER_INT64_ATOMIC_MIN_MAX` allows `Min` and + // `Max` on operations in `Storage`, without a return value. + if matches!( + *fun, + crate::AtomicFunction::Min | crate::AtomicFunction::Max + ) && matches!(pointer_space, crate::AddressSpace::Storage { .. }) + && result.is_none() + { + if !self + .capabilities + .contains(super::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX) + { + log::error!("Int64 min-max atomic operations are not supported"); + return Err(AtomicError::MissingCapability( + super::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX, + ) + .with_span_handle(value, context.expressions) + .into_other()); + } + } else { + // Otherwise, we require the full 64-bit atomic capability. + log::error!("Int64 atomic operations are not supported"); + return Err(AtomicError::MissingCapability( + super::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS, + ) .with_span_handle(value, context.expressions) .into_other()); + } } } - if let crate::AtomicFunction::Exchange { compare: Some(cmp) } = *fun { - if context.resolve_type(cmp, &self.valid_expression_set)? != value_inner { - log::error!("Atomic exchange comparison has a different type from the value"); - return Err(AtomicError::InvalidOperand(cmp) - .with_span_handle(cmp, context.expressions) - .into_other()); - } - } + // The result expression must be appropriate to the operation. + match result { + Some(result) => { + // The `result` handle must refer to an `AtomicResult` expression. + let crate::Expression::AtomicResult { + ty: result_ty, + comparison, + } = context.expressions[result] + else { + return Err(AtomicError::InvalidResultExpression(result) + .with_span_handle(result, context.expressions) + .into_other()); + }; - self.emit_expression(result, context)?; - match context.expressions[result] { - crate::Expression::AtomicResult { ty, comparison } - if { - let scalar_predicate = - |ty: &crate::TypeInner| *ty == crate::TypeInner::Scalar(ptr_scalar); - match &context.types[ty].inner { - ty if !comparison => scalar_predicate(ty), - &crate::TypeInner::Struct { ref members, .. } if comparison => { - validate_atomic_compare_exchange_struct( - context.types, - members, - scalar_predicate, - ) - } - _ => false, - } - } => - { + // Note that this expression has been visited by the proper kind + // of statement. if !self.needs_visit.remove(result.index()) { return Err(AtomicError::ResultAlreadyPopulated(result) .with_span_handle(result, context.expressions) .into_other()); } + + // The constraints on the result type depend on the atomic function. + if let crate::AtomicFunction::Exchange { + compare: Some(compare), + } = *fun + { + // The comparison value must be a scalar of the same type as the + // atomic we're operating on. + let compare_inner = + context.resolve_type(compare, &self.valid_expression_set)?; + if !compare_inner.equivalent(value_inner, context.types) { + log::error!( + "Atomic exchange comparison has a different type from the value" + ); + return Err(AtomicError::InvalidOperand(compare) + .with_span_handle(compare, context.expressions) + .into_other()); + } + + // The result expression must be an `__atomic_compare_exchange_result` + // struct whose `old_value` member is of the same type as the atomic + // we're operating on. + let crate::TypeInner::Struct { ref members, .. } = + context.types[result_ty].inner + else { + return Err(AtomicError::ResultTypeMismatch(result) + .with_span_handle(result, context.expressions) + .into_other()); + }; + if !validate_atomic_compare_exchange_struct( + context.types, + members, + |ty: &crate::TypeInner| *ty == crate::TypeInner::Scalar(pointer_scalar), + ) { + return Err(AtomicError::ResultTypeMismatch(result) + .with_span_handle(result, context.expressions) + .into_other()); + } + + // The result expression must be for a comparison operation. + if !comparison { + return Err(AtomicError::ResultExpressionNotExchange(result) + .with_span_handle(result, context.expressions) + .into_other()); + } + } else { + // The result expression must be a scalar of the same type as the + // atomic we're operating on. + let result_inner = &context.types[result_ty].inner; + if !result_inner.equivalent(value_inner, context.types) { + return Err(AtomicError::ResultTypeMismatch(result) + .with_span_handle(result, context.expressions) + .into_other()); + } + + // The result expression must not be for a comparison. + if comparison { + return Err(AtomicError::ResultExpressionExchange(result) + .with_span_handle(result, context.expressions) + .into_other()); + } + } + self.emit_expression(result, context)?; } - _ => { - return Err(AtomicError::ResultTypeMismatch(result) - .with_span_handle(result, context.expressions) - .into_other()) + + None => { + // Exchange operations must always produce a value. + if let crate::AtomicFunction::Exchange { compare: None } = *fun { + log::error!("Atomic exchange's value is unused"); + return Err(AtomicError::MissingReturnValue + .with_span_static(span, "atomic exchange operation") + .into_other()); + } } } + Ok(()) } fn validate_subgroup_operation( @@ -1017,7 +1139,7 @@ impl super::Validator { value, result, } => { - self.validate_atomic(pointer, fun, value, result, context)?; + self.validate_atomic(pointer, fun, value, result, span, context)?; } S::WorkGroupUniformLoad { pointer, result } => { stages &= super::ShaderStages::COMPUTE; diff --git a/naga/src/valid/handles.rs b/naga/src/valid/handles.rs index 8f7820405..297b67dff 100644 --- a/naga/src/valid/handles.rs +++ b/naga/src/valid/handles.rs @@ -530,7 +530,9 @@ impl super::Validator { crate::AtomicFunction::Exchange { compare } => validate_expr_opt(compare)?, }; validate_expr(value)?; - validate_expr(result)?; + if let Some(result) = result { + validate_expr(result)?; + } Ok(()) } crate::Statement::WorkGroupUniformLoad { pointer, result } => { diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs index d86c23c1e..113dc0cd3 100644 --- a/naga/src/valid/mod.rs +++ b/naga/src/valid/mod.rs @@ -127,6 +127,18 @@ bitflags::bitflags! { const SUBGROUP = 0x10000; /// Support for subgroup barriers. const SUBGROUP_BARRIER = 0x20000; + /// Support for [`AtomicFunction::Min`] and [`AtomicFunction::Max`] on + /// 64-bit integers in the [`Storage`] address space, when the return + /// value is not used. + /// + /// This is the only 64-bit atomic functionality available on Metal 3.1. + /// + /// [`AtomicFunction::Min`]: crate::AtomicFunction::Min + /// [`AtomicFunction::Max`]: crate::AtomicFunction::Max + /// [`Storage`]: crate::AddressSpace::Storage + const SHADER_INT64_ATOMIC_MIN_MAX = 0x40000; + /// Support for all atomic operations on 64-bit integers. + const SHADER_INT64_ATOMIC_ALL_OPS = 0x80000; } } diff --git a/naga/src/valid/type.rs b/naga/src/valid/type.rs index ff33e37cb..32d5d58f1 100644 --- a/naga/src/valid/type.rs +++ b/naga/src/valid/type.rs @@ -360,16 +360,28 @@ impl super::Validator { ) } Ti::Atomic(crate::Scalar { kind, width }) => { - let good = match kind { + match kind { crate::ScalarKind::Bool | crate::ScalarKind::Float | crate::ScalarKind::AbstractInt - | crate::ScalarKind::AbstractFloat => false, - crate::ScalarKind::Sint | crate::ScalarKind::Uint => width == 4, + | crate::ScalarKind::AbstractFloat => { + return Err(TypeError::InvalidAtomicWidth(kind, width)) + } + crate::ScalarKind::Sint | crate::ScalarKind::Uint => { + if width == 8 { + if !self.capabilities.intersects( + Capabilities::SHADER_INT64_ATOMIC_ALL_OPS + | Capabilities::SHADER_INT64_ATOMIC_MIN_MAX, + ) { + return Err(TypeError::MissingCapability( + Capabilities::SHADER_INT64_ATOMIC_ALL_OPS, + )); + } + } else if width != 4 { + return Err(TypeError::InvalidAtomicWidth(kind, width)); + } + } }; - if !good { - return Err(TypeError::InvalidAtomicWidth(kind, width)); - } TypeInfo::new( TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::HOST_SHAREABLE, Alignment::from_width(width), diff --git a/naga/tests/in/atomicCompareExchange-int64.param.ron b/naga/tests/in/atomicCompareExchange-int64.param.ron new file mode 100644 index 000000000..ba6291cb8 --- /dev/null +++ b/naga/tests/in/atomicCompareExchange-int64.param.ron @@ -0,0 +1,15 @@ +( + god_mode: true, + spv: ( + version: (1, 0), + capabilities: [ Int64, Int64Atomics ], + ), + hlsl: ( + shader_model: V6_6, + binding_map: {}, + fake_missing_bindings: true, + special_constants_binding: Some((space: 1, register: 0)), + push_constants_target: Some((space: 0, register: 0)), + zero_initialize_workgroup_memory: true, + ), +) diff --git a/naga/tests/in/atomicCompareExchange-int64.wgsl b/naga/tests/in/atomicCompareExchange-int64.wgsl new file mode 100644 index 000000000..84f93880b --- /dev/null +++ b/naga/tests/in/atomicCompareExchange-int64.wgsl @@ -0,0 +1,34 @@ +const SIZE: u32 = 128u; + +@group(0) @binding(0) +var arr_i64: array, SIZE>; +@group(0) @binding(1) +var arr_u64: array, SIZE>; + +@compute @workgroup_size(1) +fn test_atomic_compare_exchange_i64() { + for(var i = 0u; i < SIZE; i++) { + var old : i64 = atomicLoad(&arr_i64[i]); + var exchanged = false; + while(!exchanged) { + let new_ : i64 = bitcast(old + 10li); + let result = atomicCompareExchangeWeak(&arr_i64[i], old, new_); + old = result.old_value; + exchanged = result.exchanged; + } + } +} + +@compute @workgroup_size(1) +fn test_atomic_compare_exchange_u64() { + for(var i = 0u; i < SIZE; i++) { + var old : u64 = atomicLoad(&arr_u64[i]); + var exchanged = false; + while(!exchanged) { + let new_ : u64 = bitcast(old + 10lu); + let result = atomicCompareExchangeWeak(&arr_u64[i], old, new_); + old = result.old_value; + exchanged = result.exchanged; + } + } +} diff --git a/naga/tests/in/atomicOps-int64-min-max.param.ron b/naga/tests/in/atomicOps-int64-min-max.param.ron new file mode 100644 index 000000000..11b4b0d73 --- /dev/null +++ b/naga/tests/in/atomicOps-int64-min-max.param.ron @@ -0,0 +1,23 @@ +( + god_mode: true, + spv: ( + version: (1, 0), + capabilities: [ Int64, Int64Atomics ], + ), + hlsl: ( + shader_model: V6_6, + binding_map: {}, + fake_missing_bindings: true, + special_constants_binding: Some((space: 1, register: 0)), + push_constants_target: Some((space: 0, register: 0)), + zero_initialize_workgroup_memory: true, + ), + msl: ( + lang_version: (2, 4), + per_entry_point_map: {}, + inline_samplers: [], + spirv_cross_compatibility: false, + fake_missing_bindings: true, + zero_initialize_workgroup_memory: true, + ), +) diff --git a/naga/tests/in/atomicOps-int64-min-max.wgsl b/naga/tests/in/atomicOps-int64-min-max.wgsl new file mode 100644 index 000000000..94e6aa686 --- /dev/null +++ b/naga/tests/in/atomicOps-int64-min-max.wgsl @@ -0,0 +1,27 @@ +struct Struct { + atomic_scalar: atomic, + atomic_arr: array, 2>, +} + +@group(0) @binding(0) +var storage_atomic_scalar: atomic; +@group(0) @binding(1) +var storage_atomic_arr: array, 2>; +@group(0) @binding(2) +var storage_struct: Struct; + +@compute +@workgroup_size(2) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + atomicMax(&storage_atomic_scalar, 1lu); + atomicMax(&storage_atomic_arr[1], 1lu); + atomicMax(&storage_struct.atomic_scalar, 1lu); + atomicMax(&storage_struct.atomic_arr[1], 1lu); + + workgroupBarrier(); + + atomicMin(&storage_atomic_scalar, 1lu); + atomicMin(&storage_atomic_arr[1], 1lu); + atomicMin(&storage_struct.atomic_scalar, 1lu); + atomicMin(&storage_struct.atomic_arr[1], 1lu); +} diff --git a/naga/tests/in/atomicOps-int64.param.ron b/naga/tests/in/atomicOps-int64.param.ron new file mode 100644 index 000000000..ba6291cb8 --- /dev/null +++ b/naga/tests/in/atomicOps-int64.param.ron @@ -0,0 +1,15 @@ +( + god_mode: true, + spv: ( + version: (1, 0), + capabilities: [ Int64, Int64Atomics ], + ), + hlsl: ( + shader_model: V6_6, + binding_map: {}, + fake_missing_bindings: true, + special_constants_binding: Some((space: 1, register: 0)), + push_constants_target: Some((space: 0, register: 0)), + zero_initialize_workgroup_memory: true, + ), +) diff --git a/naga/tests/in/atomicOps-int64.wgsl b/naga/tests/in/atomicOps-int64.wgsl new file mode 100644 index 000000000..42857d2fa --- /dev/null +++ b/naga/tests/in/atomicOps-int64.wgsl @@ -0,0 +1,141 @@ +// This test covers the cross product of: +// +// * All int64 atomic operations. +// * On all applicable scopes (storage read-write, workgroup). +// * For all shapes of modeling atomic data. + +struct Struct { + atomic_scalar: atomic, + atomic_arr: array, 2>, +} + +@group(0) @binding(0) +var storage_atomic_scalar: atomic; +@group(0) @binding(1) +var storage_atomic_arr: array, 2>; +@group(0) @binding(2) +var storage_struct: Struct; + +var workgroup_atomic_scalar: atomic; +var workgroup_atomic_arr: array, 2>; +var workgroup_struct: Struct; + +@compute +@workgroup_size(2) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + atomicStore(&storage_atomic_scalar, 1lu); + atomicStore(&storage_atomic_arr[1], 1li); + atomicStore(&storage_struct.atomic_scalar, 1lu); + atomicStore(&storage_struct.atomic_arr[1], 1li); + atomicStore(&workgroup_atomic_scalar, 1lu); + atomicStore(&workgroup_atomic_arr[1], 1li); + atomicStore(&workgroup_struct.atomic_scalar, 1lu); + atomicStore(&workgroup_struct.atomic_arr[1], 1li); + + workgroupBarrier(); + + let l0 = atomicLoad(&storage_atomic_scalar); + let l1 = atomicLoad(&storage_atomic_arr[1]); + let l2 = atomicLoad(&storage_struct.atomic_scalar); + let l3 = atomicLoad(&storage_struct.atomic_arr[1]); + let l4 = atomicLoad(&workgroup_atomic_scalar); + let l5 = atomicLoad(&workgroup_atomic_arr[1]); + let l6 = atomicLoad(&workgroup_struct.atomic_scalar); + let l7 = atomicLoad(&workgroup_struct.atomic_arr[1]); + + workgroupBarrier(); + + atomicAdd(&storage_atomic_scalar, 1lu); + atomicAdd(&storage_atomic_arr[1], 1li); + atomicAdd(&storage_struct.atomic_scalar, 1lu); + atomicAdd(&storage_struct.atomic_arr[1], 1li); + atomicAdd(&workgroup_atomic_scalar, 1lu); + atomicAdd(&workgroup_atomic_arr[1], 1li); + atomicAdd(&workgroup_struct.atomic_scalar, 1lu); + atomicAdd(&workgroup_struct.atomic_arr[1], 1li); + + workgroupBarrier(); + + atomicSub(&storage_atomic_scalar, 1lu); + atomicSub(&storage_atomic_arr[1], 1li); + atomicSub(&storage_struct.atomic_scalar, 1lu); + atomicSub(&storage_struct.atomic_arr[1], 1li); + atomicSub(&workgroup_atomic_scalar, 1lu); + atomicSub(&workgroup_atomic_arr[1], 1li); + atomicSub(&workgroup_struct.atomic_scalar, 1lu); + atomicSub(&workgroup_struct.atomic_arr[1], 1li); + + workgroupBarrier(); + + atomicMax(&storage_atomic_scalar, 1lu); + atomicMax(&storage_atomic_arr[1], 1li); + atomicMax(&storage_struct.atomic_scalar, 1lu); + atomicMax(&storage_struct.atomic_arr[1], 1li); + atomicMax(&workgroup_atomic_scalar, 1lu); + atomicMax(&workgroup_atomic_arr[1], 1li); + atomicMax(&workgroup_struct.atomic_scalar, 1lu); + atomicMax(&workgroup_struct.atomic_arr[1], 1li); + + workgroupBarrier(); + + atomicMin(&storage_atomic_scalar, 1lu); + atomicMin(&storage_atomic_arr[1], 1li); + atomicMin(&storage_struct.atomic_scalar, 1lu); + atomicMin(&storage_struct.atomic_arr[1], 1li); + atomicMin(&workgroup_atomic_scalar, 1lu); + atomicMin(&workgroup_atomic_arr[1], 1li); + atomicMin(&workgroup_struct.atomic_scalar, 1lu); + atomicMin(&workgroup_struct.atomic_arr[1], 1li); + + workgroupBarrier(); + + atomicAnd(&storage_atomic_scalar, 1lu); + atomicAnd(&storage_atomic_arr[1], 1li); + atomicAnd(&storage_struct.atomic_scalar, 1lu); + atomicAnd(&storage_struct.atomic_arr[1], 1li); + atomicAnd(&workgroup_atomic_scalar, 1lu); + atomicAnd(&workgroup_atomic_arr[1], 1li); + atomicAnd(&workgroup_struct.atomic_scalar, 1lu); + atomicAnd(&workgroup_struct.atomic_arr[1], 1li); + + workgroupBarrier(); + + atomicOr(&storage_atomic_scalar, 1lu); + atomicOr(&storage_atomic_arr[1], 1li); + atomicOr(&storage_struct.atomic_scalar, 1lu); + atomicOr(&storage_struct.atomic_arr[1], 1li); + atomicOr(&workgroup_atomic_scalar, 1lu); + atomicOr(&workgroup_atomic_arr[1], 1li); + atomicOr(&workgroup_struct.atomic_scalar, 1lu); + atomicOr(&workgroup_struct.atomic_arr[1], 1li); + + workgroupBarrier(); + + atomicXor(&storage_atomic_scalar, 1lu); + atomicXor(&storage_atomic_arr[1], 1li); + atomicXor(&storage_struct.atomic_scalar, 1lu); + atomicXor(&storage_struct.atomic_arr[1], 1li); + atomicXor(&workgroup_atomic_scalar, 1lu); + atomicXor(&workgroup_atomic_arr[1], 1li); + atomicXor(&workgroup_struct.atomic_scalar, 1lu); + atomicXor(&workgroup_struct.atomic_arr[1], 1li); + + atomicExchange(&storage_atomic_scalar, 1lu); + atomicExchange(&storage_atomic_arr[1], 1li); + atomicExchange(&storage_struct.atomic_scalar, 1lu); + atomicExchange(&storage_struct.atomic_arr[1], 1li); + atomicExchange(&workgroup_atomic_scalar, 1lu); + atomicExchange(&workgroup_atomic_arr[1], 1li); + atomicExchange(&workgroup_struct.atomic_scalar, 1lu); + atomicExchange(&workgroup_struct.atomic_arr[1], 1li); + + // // TODO: https://github.com/gpuweb/gpuweb/issues/2021 + // atomicCompareExchangeWeak(&storage_atomic_scalar, 1lu); + // atomicCompareExchangeWeak(&storage_atomic_arr[1], 1li); + // atomicCompareExchangeWeak(&storage_struct.atomic_scalar, 1lu); + // atomicCompareExchangeWeak(&storage_struct.atomic_arr[1], 1li); + // atomicCompareExchangeWeak(&workgroup_atomic_scalar, 1lu); + // atomicCompareExchangeWeak(&workgroup_atomic_arr[1], 1li); + // atomicCompareExchangeWeak(&workgroup_struct.atomic_scalar, 1lu); + // atomicCompareExchangeWeak(&workgroup_struct.atomic_arr[1], 1li); +} diff --git a/naga/tests/in/int64.param.ron b/naga/tests/in/int64.param.ron index 15348b905..0e76c83e4 100644 --- a/naga/tests/in/int64.param.ron +++ b/naga/tests/in/int64.param.ron @@ -2,6 +2,7 @@ god_mode: true, spv: ( version: (1, 0), + capabilities: [ Int64 ], ), hlsl: ( shader_model: V6_0, diff --git a/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl b/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl new file mode 100644 index 000000000..8c52e5b3b --- /dev/null +++ b/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl @@ -0,0 +1,30 @@ +struct NagaConstants { + int first_vertex; + int first_instance; + uint other; +}; +ConstantBuffer _NagaConstants: register(b0, space1); + +struct Struct { + uint64_t atomic_scalar; + uint64_t atomic_arr[2]; +}; + +RWByteAddressBuffer storage_atomic_scalar : register(u0); +RWByteAddressBuffer storage_atomic_arr : register(u1); +RWByteAddressBuffer storage_struct : register(u2); + +[numthreads(2, 1, 1)] +void cs_main(uint3 id : SV_GroupThreadID) +{ + storage_atomic_scalar.InterlockedMax(0, 1uL); + storage_atomic_arr.InterlockedMax(8, 1uL); + storage_struct.InterlockedMax(0, 1uL); + storage_struct.InterlockedMax(8+8, 1uL); + GroupMemoryBarrierWithGroupSync(); + storage_atomic_scalar.InterlockedMin(0, 1uL); + storage_atomic_arr.InterlockedMin(8, 1uL); + storage_struct.InterlockedMin(0, 1uL); + storage_struct.InterlockedMin(8+8, 1uL); + return; +} diff --git a/naga/tests/out/hlsl/atomicOps-int64-min-max.ron b/naga/tests/out/hlsl/atomicOps-int64-min-max.ron new file mode 100644 index 000000000..67a903551 --- /dev/null +++ b/naga/tests/out/hlsl/atomicOps-int64-min-max.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"cs_main", + target_profile:"cs_6_6", + ), + ], +) diff --git a/naga/tests/out/hlsl/atomicOps-int64.hlsl b/naga/tests/out/hlsl/atomicOps-int64.hlsl new file mode 100644 index 000000000..973cf0730 --- /dev/null +++ b/naga/tests/out/hlsl/atomicOps-int64.hlsl @@ -0,0 +1,118 @@ +struct NagaConstants { + int first_vertex; + int first_instance; + uint other; +}; +ConstantBuffer _NagaConstants: register(b0, space1); + +struct Struct { + uint64_t atomic_scalar; + int64_t atomic_arr[2]; +}; + +RWByteAddressBuffer storage_atomic_scalar : register(u0); +RWByteAddressBuffer storage_atomic_arr : register(u1); +RWByteAddressBuffer storage_struct : register(u2); +groupshared uint64_t workgroup_atomic_scalar; +groupshared int64_t workgroup_atomic_arr[2]; +groupshared Struct workgroup_struct; + +[numthreads(2, 1, 1)] +void cs_main(uint3 id : SV_GroupThreadID, uint3 __local_invocation_id : SV_GroupThreadID) +{ + if (all(__local_invocation_id == uint3(0u, 0u, 0u))) { + workgroup_atomic_scalar = (uint64_t)0; + workgroup_atomic_arr = (int64_t[2])0; + workgroup_struct = (Struct)0; + } + GroupMemoryBarrierWithGroupSync(); + storage_atomic_scalar.Store(0, 1uL); + storage_atomic_arr.Store(8, 1L); + storage_struct.Store(0, 1uL); + storage_struct.Store(8+8, 1L); + workgroup_atomic_scalar = 1uL; + workgroup_atomic_arr[1] = 1L; + workgroup_struct.atomic_scalar = 1uL; + workgroup_struct.atomic_arr[1] = 1L; + GroupMemoryBarrierWithGroupSync(); + uint64_t l0_ = storage_atomic_scalar.Load(0); + int64_t l1_ = storage_atomic_arr.Load(8); + uint64_t l2_ = storage_struct.Load(0); + int64_t l3_ = storage_struct.Load(8+8); + uint64_t l4_ = workgroup_atomic_scalar; + int64_t l5_ = workgroup_atomic_arr[1]; + uint64_t l6_ = workgroup_struct.atomic_scalar; + int64_t l7_ = workgroup_struct.atomic_arr[1]; + GroupMemoryBarrierWithGroupSync(); + uint64_t _e51; storage_atomic_scalar.InterlockedAdd(0, 1uL, _e51); + int64_t _e55; storage_atomic_arr.InterlockedAdd(8, 1L, _e55); + uint64_t _e59; storage_struct.InterlockedAdd(0, 1uL, _e59); + int64_t _e64; storage_struct.InterlockedAdd(8+8, 1L, _e64); + uint64_t _e67; InterlockedAdd(workgroup_atomic_scalar, 1uL, _e67); + int64_t _e71; InterlockedAdd(workgroup_atomic_arr[1], 1L, _e71); + uint64_t _e75; InterlockedAdd(workgroup_struct.atomic_scalar, 1uL, _e75); + int64_t _e80; InterlockedAdd(workgroup_struct.atomic_arr[1], 1L, _e80); + GroupMemoryBarrierWithGroupSync(); + uint64_t _e83; storage_atomic_scalar.InterlockedAdd(0, -1uL, _e83); + int64_t _e87; storage_atomic_arr.InterlockedAdd(8, -1L, _e87); + uint64_t _e91; storage_struct.InterlockedAdd(0, -1uL, _e91); + int64_t _e96; storage_struct.InterlockedAdd(8+8, -1L, _e96); + uint64_t _e99; InterlockedAdd(workgroup_atomic_scalar, -1uL, _e99); + int64_t _e103; InterlockedAdd(workgroup_atomic_arr[1], -1L, _e103); + uint64_t _e107; InterlockedAdd(workgroup_struct.atomic_scalar, -1uL, _e107); + int64_t _e112; InterlockedAdd(workgroup_struct.atomic_arr[1], -1L, _e112); + GroupMemoryBarrierWithGroupSync(); + storage_atomic_scalar.InterlockedMax(0, 1uL); + storage_atomic_arr.InterlockedMax(8, 1L); + storage_struct.InterlockedMax(0, 1uL); + storage_struct.InterlockedMax(8+8, 1L); + InterlockedMax(workgroup_atomic_scalar, 1uL); + InterlockedMax(workgroup_atomic_arr[1], 1L); + InterlockedMax(workgroup_struct.atomic_scalar, 1uL); + InterlockedMax(workgroup_struct.atomic_arr[1], 1L); + GroupMemoryBarrierWithGroupSync(); + storage_atomic_scalar.InterlockedMin(0, 1uL); + storage_atomic_arr.InterlockedMin(8, 1L); + storage_struct.InterlockedMin(0, 1uL); + storage_struct.InterlockedMin(8+8, 1L); + InterlockedMin(workgroup_atomic_scalar, 1uL); + InterlockedMin(workgroup_atomic_arr[1], 1L); + InterlockedMin(workgroup_struct.atomic_scalar, 1uL); + InterlockedMin(workgroup_struct.atomic_arr[1], 1L); + GroupMemoryBarrierWithGroupSync(); + uint64_t _e163; storage_atomic_scalar.InterlockedAnd(0, 1uL, _e163); + int64_t _e167; storage_atomic_arr.InterlockedAnd(8, 1L, _e167); + uint64_t _e171; storage_struct.InterlockedAnd(0, 1uL, _e171); + int64_t _e176; storage_struct.InterlockedAnd(8+8, 1L, _e176); + uint64_t _e179; InterlockedAnd(workgroup_atomic_scalar, 1uL, _e179); + int64_t _e183; InterlockedAnd(workgroup_atomic_arr[1], 1L, _e183); + uint64_t _e187; InterlockedAnd(workgroup_struct.atomic_scalar, 1uL, _e187); + int64_t _e192; InterlockedAnd(workgroup_struct.atomic_arr[1], 1L, _e192); + GroupMemoryBarrierWithGroupSync(); + uint64_t _e195; storage_atomic_scalar.InterlockedOr(0, 1uL, _e195); + int64_t _e199; storage_atomic_arr.InterlockedOr(8, 1L, _e199); + uint64_t _e203; storage_struct.InterlockedOr(0, 1uL, _e203); + int64_t _e208; storage_struct.InterlockedOr(8+8, 1L, _e208); + uint64_t _e211; InterlockedOr(workgroup_atomic_scalar, 1uL, _e211); + int64_t _e215; InterlockedOr(workgroup_atomic_arr[1], 1L, _e215); + uint64_t _e219; InterlockedOr(workgroup_struct.atomic_scalar, 1uL, _e219); + int64_t _e224; InterlockedOr(workgroup_struct.atomic_arr[1], 1L, _e224); + GroupMemoryBarrierWithGroupSync(); + uint64_t _e227; storage_atomic_scalar.InterlockedXor(0, 1uL, _e227); + int64_t _e231; storage_atomic_arr.InterlockedXor(8, 1L, _e231); + uint64_t _e235; storage_struct.InterlockedXor(0, 1uL, _e235); + int64_t _e240; storage_struct.InterlockedXor(8+8, 1L, _e240); + uint64_t _e243; InterlockedXor(workgroup_atomic_scalar, 1uL, _e243); + int64_t _e247; InterlockedXor(workgroup_atomic_arr[1], 1L, _e247); + uint64_t _e251; InterlockedXor(workgroup_struct.atomic_scalar, 1uL, _e251); + int64_t _e256; InterlockedXor(workgroup_struct.atomic_arr[1], 1L, _e256); + uint64_t _e259; storage_atomic_scalar.InterlockedExchange(0, 1uL, _e259); + int64_t _e263; storage_atomic_arr.InterlockedExchange(8, 1L, _e263); + uint64_t _e267; storage_struct.InterlockedExchange(0, 1uL, _e267); + int64_t _e272; storage_struct.InterlockedExchange(8+8, 1L, _e272); + uint64_t _e275; InterlockedExchange(workgroup_atomic_scalar, 1uL, _e275); + int64_t _e279; InterlockedExchange(workgroup_atomic_arr[1], 1L, _e279); + uint64_t _e283; InterlockedExchange(workgroup_struct.atomic_scalar, 1uL, _e283); + int64_t _e288; InterlockedExchange(workgroup_struct.atomic_arr[1], 1L, _e288); + return; +} diff --git a/naga/tests/out/hlsl/atomicOps-int64.ron b/naga/tests/out/hlsl/atomicOps-int64.ron new file mode 100644 index 000000000..67a903551 --- /dev/null +++ b/naga/tests/out/hlsl/atomicOps-int64.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"cs_main", + target_profile:"cs_6_6", + ), + ], +) diff --git a/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.compact.ron b/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.compact.ron index 8c889382d..9d4e82fd8 100644 --- a/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.compact.ron +++ b/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.compact.ron @@ -116,7 +116,7 @@ compare: Some(3), ), value: 4, - result: 5, + result: Some(5), ), Return( value: None, diff --git a/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.ron b/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.ron index 8c889382d..9d4e82fd8 100644 --- a/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.ron +++ b/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.ron @@ -116,7 +116,7 @@ compare: Some(3), ), value: 4, - result: 5, + result: Some(5), ), Return( value: None, diff --git a/naga/tests/out/msl/atomicOps-int64-min-max.msl b/naga/tests/out/msl/atomicOps-int64-min-max.msl new file mode 100644 index 000000000..a5dd1c97f --- /dev/null +++ b/naga/tests/out/msl/atomicOps-int64-min-max.msl @@ -0,0 +1,33 @@ +// language: metal2.4 +#include +#include + +using metal::uint; + +struct type_1 { + metal::atomic_ulong inner[2]; +}; +struct Struct { + metal::atomic_ulong atomic_scalar; + type_1 atomic_arr; +}; + +struct cs_mainInput { +}; +kernel void cs_main( + metal::uint3 id [[thread_position_in_threadgroup]] +, device metal::atomic_ulong& storage_atomic_scalar [[user(fake0)]] +, device type_1& storage_atomic_arr [[user(fake0)]] +, device Struct& storage_struct [[user(fake0)]] +) { + metal::atomic_max_explicit(&storage_atomic_scalar, 1uL, metal::memory_order_relaxed); + metal::atomic_max_explicit(&storage_atomic_arr.inner[1], 1uL, metal::memory_order_relaxed); + metal::atomic_max_explicit(&storage_struct.atomic_scalar, 1uL, metal::memory_order_relaxed); + metal::atomic_max_explicit(&storage_struct.atomic_arr.inner[1], 1uL, metal::memory_order_relaxed); + metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup); + metal::atomic_min_explicit(&storage_atomic_scalar, 1uL, metal::memory_order_relaxed); + metal::atomic_min_explicit(&storage_atomic_arr.inner[1], 1uL, metal::memory_order_relaxed); + metal::atomic_min_explicit(&storage_struct.atomic_scalar, 1uL, metal::memory_order_relaxed); + metal::atomic_min_explicit(&storage_struct.atomic_arr.inner[1], 1uL, metal::memory_order_relaxed); + return; +} diff --git a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm new file mode 100644 index 000000000..f174ad3b3 --- /dev/null +++ b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm @@ -0,0 +1,205 @@ +; SPIR-V +; Version: 1.0 +; Generator: rspirv +; Bound: 125 +OpCapability Shader +OpCapability Int64Atomics +OpCapability Int64 +OpExtension "SPV_KHR_storage_buffer_storage_class" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %19 "test_atomic_compare_exchange_i64" +OpEntryPoint GLCompute %77 "test_atomic_compare_exchange_u64" +OpExecutionMode %19 LocalSize 1 1 1 +OpExecutionMode %77 LocalSize 1 1 1 +OpDecorate %5 ArrayStride 8 +OpDecorate %8 ArrayStride 8 +OpMemberDecorate %10 0 Offset 0 +OpMemberDecorate %10 1 Offset 8 +OpMemberDecorate %11 0 Offset 0 +OpMemberDecorate %11 1 Offset 8 +OpDecorate %12 DescriptorSet 0 +OpDecorate %12 Binding 0 +OpDecorate %13 Block +OpMemberDecorate %13 0 Offset 0 +OpDecorate %15 DescriptorSet 0 +OpDecorate %15 Binding 1 +OpDecorate %16 Block +OpMemberDecorate %16 0 Offset 0 +%2 = OpTypeVoid +%3 = OpTypeInt 32 0 +%4 = OpTypeInt 64 1 +%6 = OpConstant %3 128 +%5 = OpTypeArray %4 %6 +%7 = OpTypeInt 64 0 +%8 = OpTypeArray %7 %6 +%9 = OpTypeBool +%10 = OpTypeStruct %4 %9 +%11 = OpTypeStruct %7 %9 +%13 = OpTypeStruct %5 +%14 = OpTypePointer StorageBuffer %13 +%12 = OpVariable %14 StorageBuffer +%16 = OpTypeStruct %8 +%17 = OpTypePointer StorageBuffer %16 +%15 = OpVariable %17 StorageBuffer +%20 = OpTypeFunction %2 +%21 = OpTypePointer StorageBuffer %5 +%22 = OpConstant %3 0 +%24 = OpConstantFalse %9 +%25 = OpConstant %4 10 +%26 = OpConstant %3 1 +%28 = OpTypePointer Function %3 +%30 = OpTypePointer Function %4 +%31 = OpConstantNull %4 +%33 = OpTypePointer Function %9 +%34 = OpConstantNull %9 +%47 = OpTypePointer StorageBuffer %4 +%51 = OpTypeInt 32 1 +%50 = OpConstant %51 1 +%52 = OpConstant %3 64 +%78 = OpTypePointer StorageBuffer %8 +%80 = OpConstant %7 10 +%83 = OpTypePointer Function %7 +%84 = OpConstantNull %7 +%86 = OpConstantNull %9 +%99 = OpTypePointer StorageBuffer %7 +%19 = OpFunction %2 None %20 +%18 = OpLabel +%27 = OpVariable %28 Function %22 +%29 = OpVariable %30 Function %31 +%32 = OpVariable %33 Function %34 +%23 = OpAccessChain %21 %12 %22 +OpBranch %35 +%35 = OpLabel +OpBranch %36 +%36 = OpLabel +OpLoopMerge %37 %39 None +OpBranch %38 +%38 = OpLabel +%40 = OpLoad %3 %27 +%41 = OpULessThan %9 %40 %6 +OpSelectionMerge %42 None +OpBranchConditional %41 %42 %43 +%43 = OpLabel +OpBranch %37 +%42 = OpLabel +OpBranch %44 +%44 = OpLabel +%46 = OpLoad %3 %27 +%48 = OpAccessChain %47 %23 %46 +%49 = OpAtomicLoad %4 %48 %50 %52 +OpStore %29 %49 +OpStore %32 %24 +OpBranch %53 +%53 = OpLabel +OpLoopMerge %54 %56 None +OpBranch %55 +%55 = OpLabel +%57 = OpLoad %9 %32 +%58 = OpLogicalNot %9 %57 +OpSelectionMerge %59 None +OpBranchConditional %58 %59 %60 +%60 = OpLabel +OpBranch %54 +%59 = OpLabel +OpBranch %61 +%61 = OpLabel +%63 = OpLoad %4 %29 +%64 = OpIAdd %4 %63 %25 +%66 = OpLoad %3 %27 +%67 = OpLoad %4 %29 +%69 = OpAccessChain %47 %23 %66 +%70 = OpAtomicCompareExchange %4 %69 %50 %52 %52 %64 %67 +%71 = OpIEqual %9 %70 %67 +%68 = OpCompositeConstruct %10 %70 %71 +%72 = OpCompositeExtract %4 %68 0 +OpStore %29 %72 +%73 = OpCompositeExtract %9 %68 1 +OpStore %32 %73 +OpBranch %62 +%62 = OpLabel +OpBranch %56 +%56 = OpLabel +OpBranch %53 +%54 = OpLabel +OpBranch %45 +%45 = OpLabel +OpBranch %39 +%39 = OpLabel +%74 = OpLoad %3 %27 +%75 = OpIAdd %3 %74 %26 +OpStore %27 %75 +OpBranch %36 +%37 = OpLabel +OpReturn +OpFunctionEnd +%77 = OpFunction %2 None %20 +%76 = OpLabel +%81 = OpVariable %28 Function %22 +%82 = OpVariable %83 Function %84 +%85 = OpVariable %33 Function %86 +%79 = OpAccessChain %78 %15 %22 +OpBranch %87 +%87 = OpLabel +OpBranch %88 +%88 = OpLabel +OpLoopMerge %89 %91 None +OpBranch %90 +%90 = OpLabel +%92 = OpLoad %3 %81 +%93 = OpULessThan %9 %92 %6 +OpSelectionMerge %94 None +OpBranchConditional %93 %94 %95 +%95 = OpLabel +OpBranch %89 +%94 = OpLabel +OpBranch %96 +%96 = OpLabel +%98 = OpLoad %3 %81 +%100 = OpAccessChain %99 %79 %98 +%101 = OpAtomicLoad %7 %100 %50 %52 +OpStore %82 %101 +OpStore %85 %24 +OpBranch %102 +%102 = OpLabel +OpLoopMerge %103 %105 None +OpBranch %104 +%104 = OpLabel +%106 = OpLoad %9 %85 +%107 = OpLogicalNot %9 %106 +OpSelectionMerge %108 None +OpBranchConditional %107 %108 %109 +%109 = OpLabel +OpBranch %103 +%108 = OpLabel +OpBranch %110 +%110 = OpLabel +%112 = OpLoad %7 %82 +%113 = OpIAdd %7 %112 %80 +%115 = OpLoad %3 %81 +%116 = OpLoad %7 %82 +%118 = OpAccessChain %99 %79 %115 +%119 = OpAtomicCompareExchange %7 %118 %50 %52 %52 %113 %116 +%120 = OpIEqual %9 %119 %116 +%117 = OpCompositeConstruct %11 %119 %120 +%121 = OpCompositeExtract %7 %117 0 +OpStore %82 %121 +%122 = OpCompositeExtract %9 %117 1 +OpStore %85 %122 +OpBranch %111 +%111 = OpLabel +OpBranch %105 +%105 = OpLabel +OpBranch %102 +%103 = OpLabel +OpBranch %97 +%97 = OpLabel +OpBranch %91 +%91 = OpLabel +%123 = OpLoad %3 %81 +%124 = OpIAdd %3 %123 %26 +OpStore %81 %124 +OpBranch %88 +%89 = OpLabel +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/atomicOps-int64-min-max.spvasm b/naga/tests/out/spv/atomicOps-int64-min-max.spvasm new file mode 100644 index 000000000..aa798f546 --- /dev/null +++ b/naga/tests/out/spv/atomicOps-int64-min-max.spvasm @@ -0,0 +1,82 @@ +; SPIR-V +; Version: 1.0 +; Generator: rspirv +; Bound: 52 +OpCapability Shader +OpCapability Int64Atomics +OpCapability Int64 +OpExtension "SPV_KHR_storage_buffer_storage_class" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %22 "cs_main" %19 +OpExecutionMode %22 LocalSize 2 1 1 +OpDecorate %4 ArrayStride 8 +OpMemberDecorate %7 0 Offset 0 +OpMemberDecorate %7 1 Offset 8 +OpDecorate %9 DescriptorSet 0 +OpDecorate %9 Binding 0 +OpDecorate %10 Block +OpMemberDecorate %10 0 Offset 0 +OpDecorate %12 DescriptorSet 0 +OpDecorate %12 Binding 1 +OpDecorate %13 Block +OpMemberDecorate %13 0 Offset 0 +OpDecorate %15 DescriptorSet 0 +OpDecorate %15 Binding 2 +OpDecorate %16 Block +OpMemberDecorate %16 0 Offset 0 +OpDecorate %19 BuiltIn LocalInvocationId +%2 = OpTypeVoid +%3 = OpTypeInt 64 0 +%6 = OpTypeInt 32 0 +%5 = OpConstant %6 2 +%4 = OpTypeArray %3 %5 +%7 = OpTypeStruct %3 %4 +%8 = OpTypeVector %6 3 +%10 = OpTypeStruct %3 +%11 = OpTypePointer StorageBuffer %10 +%9 = OpVariable %11 StorageBuffer +%13 = OpTypeStruct %4 +%14 = OpTypePointer StorageBuffer %13 +%12 = OpVariable %14 StorageBuffer +%16 = OpTypeStruct %7 +%17 = OpTypePointer StorageBuffer %16 +%15 = OpVariable %17 StorageBuffer +%20 = OpTypePointer Input %8 +%19 = OpVariable %20 Input +%23 = OpTypeFunction %2 +%24 = OpTypePointer StorageBuffer %3 +%25 = OpConstant %6 0 +%27 = OpTypePointer StorageBuffer %4 +%29 = OpTypePointer StorageBuffer %7 +%31 = OpConstant %3 1 +%35 = OpTypeInt 32 1 +%34 = OpConstant %35 1 +%36 = OpConstant %6 64 +%38 = OpConstant %6 1 +%44 = OpConstant %6 264 +%22 = OpFunction %2 None %23 +%18 = OpLabel +%21 = OpLoad %8 %19 +%26 = OpAccessChain %24 %9 %25 +%28 = OpAccessChain %27 %12 %25 +%30 = OpAccessChain %29 %15 %25 +OpBranch %32 +%32 = OpLabel +%33 = OpAtomicUMax %3 %26 %34 %36 %31 +%39 = OpAccessChain %24 %28 %38 +%37 = OpAtomicUMax %3 %39 %34 %36 %31 +%41 = OpAccessChain %24 %30 %25 +%40 = OpAtomicUMax %3 %41 %34 %36 %31 +%43 = OpAccessChain %24 %30 %38 %38 +%42 = OpAtomicUMax %3 %43 %34 %36 %31 +OpControlBarrier %5 %5 %44 +%45 = OpAtomicUMin %3 %26 %34 %36 %31 +%47 = OpAccessChain %24 %28 %38 +%46 = OpAtomicUMin %3 %47 %34 %36 %31 +%49 = OpAccessChain %24 %30 %25 +%48 = OpAtomicUMin %3 %49 %34 %36 %31 +%51 = OpAccessChain %24 %30 %38 %38 +%50 = OpAtomicUMin %3 %51 %34 %36 %31 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/atomicOps-int64.spvasm b/naga/tests/out/spv/atomicOps-int64.spvasm new file mode 100644 index 000000000..943107f50 --- /dev/null +++ b/naga/tests/out/spv/atomicOps-int64.spvasm @@ -0,0 +1,246 @@ +; SPIR-V +; Version: 1.0 +; Generator: rspirv +; Bound: 193 +OpCapability Shader +OpCapability Int64Atomics +OpCapability Int64 +OpExtension "SPV_KHR_storage_buffer_storage_class" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %29 "cs_main" %26 +OpExecutionMode %29 LocalSize 2 1 1 +OpDecorate %5 ArrayStride 8 +OpMemberDecorate %8 0 Offset 0 +OpMemberDecorate %8 1 Offset 8 +OpDecorate %10 DescriptorSet 0 +OpDecorate %10 Binding 0 +OpDecorate %11 Block +OpMemberDecorate %11 0 Offset 0 +OpDecorate %13 DescriptorSet 0 +OpDecorate %13 Binding 1 +OpDecorate %14 Block +OpMemberDecorate %14 0 Offset 0 +OpDecorate %16 DescriptorSet 0 +OpDecorate %16 Binding 2 +OpDecorate %17 Block +OpMemberDecorate %17 0 Offset 0 +OpDecorate %26 BuiltIn LocalInvocationId +%2 = OpTypeVoid +%3 = OpTypeInt 64 0 +%4 = OpTypeInt 64 1 +%7 = OpTypeInt 32 0 +%6 = OpConstant %7 2 +%5 = OpTypeArray %4 %6 +%8 = OpTypeStruct %3 %5 +%9 = OpTypeVector %7 3 +%11 = OpTypeStruct %3 +%12 = OpTypePointer StorageBuffer %11 +%10 = OpVariable %12 StorageBuffer +%14 = OpTypeStruct %5 +%15 = OpTypePointer StorageBuffer %14 +%13 = OpVariable %15 StorageBuffer +%17 = OpTypeStruct %8 +%18 = OpTypePointer StorageBuffer %17 +%16 = OpVariable %18 StorageBuffer +%20 = OpTypePointer Workgroup %3 +%19 = OpVariable %20 Workgroup +%22 = OpTypePointer Workgroup %5 +%21 = OpVariable %22 Workgroup +%24 = OpTypePointer Workgroup %8 +%23 = OpVariable %24 Workgroup +%27 = OpTypePointer Input %9 +%26 = OpVariable %27 Input +%30 = OpTypeFunction %2 +%31 = OpTypePointer StorageBuffer %3 +%32 = OpConstant %7 0 +%34 = OpTypePointer StorageBuffer %5 +%36 = OpTypePointer StorageBuffer %8 +%38 = OpConstant %3 1 +%39 = OpConstant %4 1 +%41 = OpConstantNull %3 +%42 = OpConstantNull %5 +%43 = OpConstantNull %8 +%44 = OpConstantNull %9 +%46 = OpTypeBool +%45 = OpTypeVector %46 3 +%51 = OpConstant %7 264 +%54 = OpTypeInt 32 1 +%53 = OpConstant %54 1 +%55 = OpConstant %7 64 +%56 = OpTypePointer StorageBuffer %4 +%57 = OpConstant %7 1 +%61 = OpConstant %54 2 +%62 = OpConstant %7 256 +%63 = OpTypePointer Workgroup %4 +%29 = OpFunction %2 None %30 +%25 = OpLabel +%28 = OpLoad %9 %26 +%33 = OpAccessChain %31 %10 %32 +%35 = OpAccessChain %34 %13 %32 +%37 = OpAccessChain %36 %16 %32 +OpBranch %40 +%40 = OpLabel +%47 = OpIEqual %45 %28 %44 +%48 = OpAll %46 %47 +OpSelectionMerge %49 None +OpBranchConditional %48 %50 %49 +%50 = OpLabel +OpStore %19 %41 +OpStore %21 %42 +OpStore %23 %43 +OpBranch %49 +%49 = OpLabel +OpControlBarrier %6 %6 %51 +OpBranch %52 +%52 = OpLabel +OpAtomicStore %33 %53 %55 %38 +%58 = OpAccessChain %56 %35 %57 +OpAtomicStore %58 %53 %55 %39 +%59 = OpAccessChain %31 %37 %32 +OpAtomicStore %59 %53 %55 %38 +%60 = OpAccessChain %56 %37 %57 %57 +OpAtomicStore %60 %53 %55 %39 +OpAtomicStore %19 %61 %62 %38 +%64 = OpAccessChain %63 %21 %57 +OpAtomicStore %64 %61 %62 %39 +%65 = OpAccessChain %20 %23 %32 +OpAtomicStore %65 %61 %62 %38 +%66 = OpAccessChain %63 %23 %57 %57 +OpAtomicStore %66 %61 %62 %39 +OpControlBarrier %6 %6 %51 +%67 = OpAtomicLoad %3 %33 %53 %55 +%68 = OpAccessChain %56 %35 %57 +%69 = OpAtomicLoad %4 %68 %53 %55 +%70 = OpAccessChain %31 %37 %32 +%71 = OpAtomicLoad %3 %70 %53 %55 +%72 = OpAccessChain %56 %37 %57 %57 +%73 = OpAtomicLoad %4 %72 %53 %55 +%74 = OpAtomicLoad %3 %19 %61 %62 +%75 = OpAccessChain %63 %21 %57 +%76 = OpAtomicLoad %4 %75 %61 %62 +%77 = OpAccessChain %20 %23 %32 +%78 = OpAtomicLoad %3 %77 %61 %62 +%79 = OpAccessChain %63 %23 %57 %57 +%80 = OpAtomicLoad %4 %79 %61 %62 +OpControlBarrier %6 %6 %51 +%81 = OpAtomicIAdd %3 %33 %53 %55 %38 +%83 = OpAccessChain %56 %35 %57 +%82 = OpAtomicIAdd %4 %83 %53 %55 %39 +%85 = OpAccessChain %31 %37 %32 +%84 = OpAtomicIAdd %3 %85 %53 %55 %38 +%87 = OpAccessChain %56 %37 %57 %57 +%86 = OpAtomicIAdd %4 %87 %53 %55 %39 +%88 = OpAtomicIAdd %3 %19 %61 %62 %38 +%90 = OpAccessChain %63 %21 %57 +%89 = OpAtomicIAdd %4 %90 %61 %62 %39 +%92 = OpAccessChain %20 %23 %32 +%91 = OpAtomicIAdd %3 %92 %61 %62 %38 +%94 = OpAccessChain %63 %23 %57 %57 +%93 = OpAtomicIAdd %4 %94 %61 %62 %39 +OpControlBarrier %6 %6 %51 +%95 = OpAtomicISub %3 %33 %53 %55 %38 +%97 = OpAccessChain %56 %35 %57 +%96 = OpAtomicISub %4 %97 %53 %55 %39 +%99 = OpAccessChain %31 %37 %32 +%98 = OpAtomicISub %3 %99 %53 %55 %38 +%101 = OpAccessChain %56 %37 %57 %57 +%100 = OpAtomicISub %4 %101 %53 %55 %39 +%102 = OpAtomicISub %3 %19 %61 %62 %38 +%104 = OpAccessChain %63 %21 %57 +%103 = OpAtomicISub %4 %104 %61 %62 %39 +%106 = OpAccessChain %20 %23 %32 +%105 = OpAtomicISub %3 %106 %61 %62 %38 +%108 = OpAccessChain %63 %23 %57 %57 +%107 = OpAtomicISub %4 %108 %61 %62 %39 +OpControlBarrier %6 %6 %51 +%109 = OpAtomicUMax %3 %33 %53 %55 %38 +%111 = OpAccessChain %56 %35 %57 +%110 = OpAtomicSMax %4 %111 %53 %55 %39 +%113 = OpAccessChain %31 %37 %32 +%112 = OpAtomicUMax %3 %113 %53 %55 %38 +%115 = OpAccessChain %56 %37 %57 %57 +%114 = OpAtomicSMax %4 %115 %53 %55 %39 +%116 = OpAtomicUMax %3 %19 %61 %62 %38 +%118 = OpAccessChain %63 %21 %57 +%117 = OpAtomicSMax %4 %118 %61 %62 %39 +%120 = OpAccessChain %20 %23 %32 +%119 = OpAtomicUMax %3 %120 %61 %62 %38 +%122 = OpAccessChain %63 %23 %57 %57 +%121 = OpAtomicSMax %4 %122 %61 %62 %39 +OpControlBarrier %6 %6 %51 +%123 = OpAtomicUMin %3 %33 %53 %55 %38 +%125 = OpAccessChain %56 %35 %57 +%124 = OpAtomicSMin %4 %125 %53 %55 %39 +%127 = OpAccessChain %31 %37 %32 +%126 = OpAtomicUMin %3 %127 %53 %55 %38 +%129 = OpAccessChain %56 %37 %57 %57 +%128 = OpAtomicSMin %4 %129 %53 %55 %39 +%130 = OpAtomicUMin %3 %19 %61 %62 %38 +%132 = OpAccessChain %63 %21 %57 +%131 = OpAtomicSMin %4 %132 %61 %62 %39 +%134 = OpAccessChain %20 %23 %32 +%133 = OpAtomicUMin %3 %134 %61 %62 %38 +%136 = OpAccessChain %63 %23 %57 %57 +%135 = OpAtomicSMin %4 %136 %61 %62 %39 +OpControlBarrier %6 %6 %51 +%137 = OpAtomicAnd %3 %33 %53 %55 %38 +%139 = OpAccessChain %56 %35 %57 +%138 = OpAtomicAnd %4 %139 %53 %55 %39 +%141 = OpAccessChain %31 %37 %32 +%140 = OpAtomicAnd %3 %141 %53 %55 %38 +%143 = OpAccessChain %56 %37 %57 %57 +%142 = OpAtomicAnd %4 %143 %53 %55 %39 +%144 = OpAtomicAnd %3 %19 %61 %62 %38 +%146 = OpAccessChain %63 %21 %57 +%145 = OpAtomicAnd %4 %146 %61 %62 %39 +%148 = OpAccessChain %20 %23 %32 +%147 = OpAtomicAnd %3 %148 %61 %62 %38 +%150 = OpAccessChain %63 %23 %57 %57 +%149 = OpAtomicAnd %4 %150 %61 %62 %39 +OpControlBarrier %6 %6 %51 +%151 = OpAtomicOr %3 %33 %53 %55 %38 +%153 = OpAccessChain %56 %35 %57 +%152 = OpAtomicOr %4 %153 %53 %55 %39 +%155 = OpAccessChain %31 %37 %32 +%154 = OpAtomicOr %3 %155 %53 %55 %38 +%157 = OpAccessChain %56 %37 %57 %57 +%156 = OpAtomicOr %4 %157 %53 %55 %39 +%158 = OpAtomicOr %3 %19 %61 %62 %38 +%160 = OpAccessChain %63 %21 %57 +%159 = OpAtomicOr %4 %160 %61 %62 %39 +%162 = OpAccessChain %20 %23 %32 +%161 = OpAtomicOr %3 %162 %61 %62 %38 +%164 = OpAccessChain %63 %23 %57 %57 +%163 = OpAtomicOr %4 %164 %61 %62 %39 +OpControlBarrier %6 %6 %51 +%165 = OpAtomicXor %3 %33 %53 %55 %38 +%167 = OpAccessChain %56 %35 %57 +%166 = OpAtomicXor %4 %167 %53 %55 %39 +%169 = OpAccessChain %31 %37 %32 +%168 = OpAtomicXor %3 %169 %53 %55 %38 +%171 = OpAccessChain %56 %37 %57 %57 +%170 = OpAtomicXor %4 %171 %53 %55 %39 +%172 = OpAtomicXor %3 %19 %61 %62 %38 +%174 = OpAccessChain %63 %21 %57 +%173 = OpAtomicXor %4 %174 %61 %62 %39 +%176 = OpAccessChain %20 %23 %32 +%175 = OpAtomicXor %3 %176 %61 %62 %38 +%178 = OpAccessChain %63 %23 %57 %57 +%177 = OpAtomicXor %4 %178 %61 %62 %39 +%179 = OpAtomicExchange %3 %33 %53 %55 %38 +%181 = OpAccessChain %56 %35 %57 +%180 = OpAtomicExchange %4 %181 %53 %55 %39 +%183 = OpAccessChain %31 %37 %32 +%182 = OpAtomicExchange %3 %183 %53 %55 %38 +%185 = OpAccessChain %56 %37 %57 %57 +%184 = OpAtomicExchange %4 %185 %53 %55 %39 +%186 = OpAtomicExchange %3 %19 %61 %62 %38 +%188 = OpAccessChain %63 %21 %57 +%187 = OpAtomicExchange %4 %188 %61 %62 %39 +%190 = OpAccessChain %20 %23 %32 +%189 = OpAtomicExchange %3 %190 %61 %62 %38 +%192 = OpAccessChain %63 %23 %57 %57 +%191 = OpAtomicExchange %4 %192 %61 %62 %39 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/atomicCompareExchange-int64.wgsl b/naga/tests/out/wgsl/atomicCompareExchange-int64.wgsl new file mode 100644 index 000000000..07cce9a89 --- /dev/null +++ b/naga/tests/out/wgsl/atomicCompareExchange-int64.wgsl @@ -0,0 +1,90 @@ +const SIZE: u32 = 128u; + +@group(0) @binding(0) +var arr_i64_: array, 128>; +@group(0) @binding(1) +var arr_u64_: array, 128>; + +@compute @workgroup_size(1, 1, 1) +fn test_atomic_compare_exchange_i64_() { + var i: u32 = 0u; + var old: i64; + var exchanged: bool; + + loop { + let _e2 = i; + if (_e2 < SIZE) { + } else { + break; + } + { + let _e6 = i; + let _e8 = atomicLoad((&arr_i64_[_e6])); + old = _e8; + exchanged = false; + loop { + let _e12 = exchanged; + if !(_e12) { + } else { + break; + } + { + let _e14 = old; + let new_ = bitcast((_e14 + 10li)); + let _e19 = i; + let _e21 = old; + let _e22 = atomicCompareExchangeWeak((&arr_i64_[_e19]), _e21, new_); + old = _e22.old_value; + exchanged = _e22.exchanged; + } + } + } + continuing { + let _e26 = i; + i = (_e26 + 1u); + } + } + return; +} + +@compute @workgroup_size(1, 1, 1) +fn test_atomic_compare_exchange_u64_() { + var i_1: u32 = 0u; + var old_1: u64; + var exchanged_1: bool; + + loop { + let _e2 = i_1; + if (_e2 < SIZE) { + } else { + break; + } + { + let _e6 = i_1; + let _e8 = atomicLoad((&arr_u64_[_e6])); + old_1 = _e8; + exchanged_1 = false; + loop { + let _e12 = exchanged_1; + if !(_e12) { + } else { + break; + } + { + let _e14 = old_1; + let new_1 = bitcast((_e14 + 10lu)); + let _e19 = i_1; + let _e21 = old_1; + let _e22 = atomicCompareExchangeWeak((&arr_u64_[_e19]), _e21, new_1); + old_1 = _e22.old_value; + exchanged_1 = _e22.exchanged; + } + } + } + continuing { + let _e26 = i_1; + i_1 = (_e26 + 1u); + } + } + return; +} diff --git a/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl b/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl new file mode 100644 index 000000000..37bbb680f --- /dev/null +++ b/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl @@ -0,0 +1,25 @@ +struct Struct { + atomic_scalar: atomic, + atomic_arr: array, 2>, +} + +@group(0) @binding(0) +var storage_atomic_scalar: atomic; +@group(0) @binding(1) +var storage_atomic_arr: array, 2>; +@group(0) @binding(2) +var storage_struct: Struct; + +@compute @workgroup_size(2, 1, 1) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + atomicMax((&storage_atomic_scalar), 1lu); + atomicMax((&storage_atomic_arr[1]), 1lu); + atomicMax((&storage_struct.atomic_scalar), 1lu); + atomicMax((&storage_struct.atomic_arr[1]), 1lu); + workgroupBarrier(); + atomicMin((&storage_atomic_scalar), 1lu); + atomicMin((&storage_atomic_arr[1]), 1lu); + atomicMin((&storage_struct.atomic_scalar), 1lu); + atomicMin((&storage_struct.atomic_arr[1]), 1lu); + return; +} diff --git a/naga/tests/out/wgsl/atomicOps-int64.wgsl b/naga/tests/out/wgsl/atomicOps-int64.wgsl new file mode 100644 index 000000000..364108c64 --- /dev/null +++ b/naga/tests/out/wgsl/atomicOps-int64.wgsl @@ -0,0 +1,107 @@ +struct Struct { + atomic_scalar: atomic, + atomic_arr: array, 2>, +} + +@group(0) @binding(0) +var storage_atomic_scalar: atomic; +@group(0) @binding(1) +var storage_atomic_arr: array, 2>; +@group(0) @binding(2) +var storage_struct: Struct; +var workgroup_atomic_scalar: atomic; +var workgroup_atomic_arr: array, 2>; +var workgroup_struct: Struct; + +@compute @workgroup_size(2, 1, 1) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + atomicStore((&storage_atomic_scalar), 1lu); + atomicStore((&storage_atomic_arr[1]), 1li); + atomicStore((&storage_struct.atomic_scalar), 1lu); + atomicStore((&storage_struct.atomic_arr[1]), 1li); + atomicStore((&workgroup_atomic_scalar), 1lu); + atomicStore((&workgroup_atomic_arr[1]), 1li); + atomicStore((&workgroup_struct.atomic_scalar), 1lu); + atomicStore((&workgroup_struct.atomic_arr[1]), 1li); + workgroupBarrier(); + let l0_ = atomicLoad((&storage_atomic_scalar)); + let l1_ = atomicLoad((&storage_atomic_arr[1])); + let l2_ = atomicLoad((&storage_struct.atomic_scalar)); + let l3_ = atomicLoad((&storage_struct.atomic_arr[1])); + let l4_ = atomicLoad((&workgroup_atomic_scalar)); + let l5_ = atomicLoad((&workgroup_atomic_arr[1])); + let l6_ = atomicLoad((&workgroup_struct.atomic_scalar)); + let l7_ = atomicLoad((&workgroup_struct.atomic_arr[1])); + workgroupBarrier(); + let _e51 = atomicAdd((&storage_atomic_scalar), 1lu); + let _e55 = atomicAdd((&storage_atomic_arr[1]), 1li); + let _e59 = atomicAdd((&storage_struct.atomic_scalar), 1lu); + let _e64 = atomicAdd((&storage_struct.atomic_arr[1]), 1li); + let _e67 = atomicAdd((&workgroup_atomic_scalar), 1lu); + let _e71 = atomicAdd((&workgroup_atomic_arr[1]), 1li); + let _e75 = atomicAdd((&workgroup_struct.atomic_scalar), 1lu); + let _e80 = atomicAdd((&workgroup_struct.atomic_arr[1]), 1li); + workgroupBarrier(); + let _e83 = atomicSub((&storage_atomic_scalar), 1lu); + let _e87 = atomicSub((&storage_atomic_arr[1]), 1li); + let _e91 = atomicSub((&storage_struct.atomic_scalar), 1lu); + let _e96 = atomicSub((&storage_struct.atomic_arr[1]), 1li); + let _e99 = atomicSub((&workgroup_atomic_scalar), 1lu); + let _e103 = atomicSub((&workgroup_atomic_arr[1]), 1li); + let _e107 = atomicSub((&workgroup_struct.atomic_scalar), 1lu); + let _e112 = atomicSub((&workgroup_struct.atomic_arr[1]), 1li); + workgroupBarrier(); + atomicMax((&storage_atomic_scalar), 1lu); + atomicMax((&storage_atomic_arr[1]), 1li); + atomicMax((&storage_struct.atomic_scalar), 1lu); + atomicMax((&storage_struct.atomic_arr[1]), 1li); + atomicMax((&workgroup_atomic_scalar), 1lu); + atomicMax((&workgroup_atomic_arr[1]), 1li); + atomicMax((&workgroup_struct.atomic_scalar), 1lu); + atomicMax((&workgroup_struct.atomic_arr[1]), 1li); + workgroupBarrier(); + atomicMin((&storage_atomic_scalar), 1lu); + atomicMin((&storage_atomic_arr[1]), 1li); + atomicMin((&storage_struct.atomic_scalar), 1lu); + atomicMin((&storage_struct.atomic_arr[1]), 1li); + atomicMin((&workgroup_atomic_scalar), 1lu); + atomicMin((&workgroup_atomic_arr[1]), 1li); + atomicMin((&workgroup_struct.atomic_scalar), 1lu); + atomicMin((&workgroup_struct.atomic_arr[1]), 1li); + workgroupBarrier(); + let _e163 = atomicAnd((&storage_atomic_scalar), 1lu); + let _e167 = atomicAnd((&storage_atomic_arr[1]), 1li); + let _e171 = atomicAnd((&storage_struct.atomic_scalar), 1lu); + let _e176 = atomicAnd((&storage_struct.atomic_arr[1]), 1li); + let _e179 = atomicAnd((&workgroup_atomic_scalar), 1lu); + let _e183 = atomicAnd((&workgroup_atomic_arr[1]), 1li); + let _e187 = atomicAnd((&workgroup_struct.atomic_scalar), 1lu); + let _e192 = atomicAnd((&workgroup_struct.atomic_arr[1]), 1li); + workgroupBarrier(); + let _e195 = atomicOr((&storage_atomic_scalar), 1lu); + let _e199 = atomicOr((&storage_atomic_arr[1]), 1li); + let _e203 = atomicOr((&storage_struct.atomic_scalar), 1lu); + let _e208 = atomicOr((&storage_struct.atomic_arr[1]), 1li); + let _e211 = atomicOr((&workgroup_atomic_scalar), 1lu); + let _e215 = atomicOr((&workgroup_atomic_arr[1]), 1li); + let _e219 = atomicOr((&workgroup_struct.atomic_scalar), 1lu); + let _e224 = atomicOr((&workgroup_struct.atomic_arr[1]), 1li); + workgroupBarrier(); + let _e227 = atomicXor((&storage_atomic_scalar), 1lu); + let _e231 = atomicXor((&storage_atomic_arr[1]), 1li); + let _e235 = atomicXor((&storage_struct.atomic_scalar), 1lu); + let _e240 = atomicXor((&storage_struct.atomic_arr[1]), 1li); + let _e243 = atomicXor((&workgroup_atomic_scalar), 1lu); + let _e247 = atomicXor((&workgroup_atomic_arr[1]), 1li); + let _e251 = atomicXor((&workgroup_struct.atomic_scalar), 1lu); + let _e256 = atomicXor((&workgroup_struct.atomic_arr[1]), 1li); + let _e259 = atomicExchange((&storage_atomic_scalar), 1lu); + let _e263 = atomicExchange((&storage_atomic_arr[1]), 1li); + let _e267 = atomicExchange((&storage_struct.atomic_scalar), 1lu); + let _e272 = atomicExchange((&storage_struct.atomic_arr[1]), 1li); + let _e275 = atomicExchange((&workgroup_atomic_scalar), 1lu); + let _e279 = atomicExchange((&workgroup_atomic_arr[1]), 1li); + let _e283 = atomicExchange((&workgroup_struct.atomic_scalar), 1lu); + let _e288 = atomicExchange((&workgroup_struct.atomic_arr[1]), 1li); + return; +} diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index 826337515..be8eb6a17 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -759,6 +759,18 @@ fn convert_wgsl() { "padding", Targets::SPIRV | Targets::METAL | Targets::GLSL | Targets::HLSL | Targets::WGSL, ), + ( + "atomicOps-int64", + Targets::SPIRV | Targets::HLSL | Targets::WGSL, + ), + ( + "atomicOps-int64-min-max", + Targets::SPIRV | Targets::METAL | Targets::HLSL | Targets::WGSL, + ), + ( + "atomicCompareExchange-int64", + Targets::SPIRV | Targets::WGSL, + ), ("pointers", Targets::SPIRV | Targets::WGSL), ( "control-flow", diff --git a/naga/tests/validation.rs b/naga/tests/validation.rs index 7491fd262..f64b40884 100644 --- a/naga/tests/validation.rs +++ b/naga/tests/validation.rs @@ -75,7 +75,7 @@ fn populate_atomic_result() { pointer: ex_global, fun: naga::AtomicFunction::Add, value: ex_42, - result: ex_result, + result: Some(ex_result), }, span, ); diff --git a/tests/tests/shader/mod.rs b/tests/tests/shader/mod.rs index 2716caabd..f5c2d4c96 100644 --- a/tests/tests/shader/mod.rs +++ b/tests/tests/shader/mod.rs @@ -155,6 +155,12 @@ impl ShaderTest { self } + fn output_type(mut self, output_type: String) -> Self { + self.output_type = output_type; + + self + } + /// Add another set of possible outputs. If any of the given /// output values are seen it's considered a success (i.e. this is OR, not AND). /// diff --git a/tests/tests/shader/numeric_builtins.rs b/tests/tests/shader/numeric_builtins.rs index 999d9dfb0..f6cb0bb39 100644 --- a/tests/tests/shader/numeric_builtins.rs +++ b/tests/tests/shader/numeric_builtins.rs @@ -52,6 +52,105 @@ static NUMERIC_BUILTINS: GpuTestConfiguration = GpuTestConfiguration::new() ) }); +fn create_int64_atomic_min_max_test() -> Vec { + let mut tests = Vec::new(); + + let test = ShaderTest::new( + "atomicMax".into(), + "value: u64".into(), + "atomicMin(&output, 0lu); atomicMax(&output, 2lu);".into(), + &[0], + &[2], + ) + .output_type("atomic".into()); + + tests.push(test); + + let test = ShaderTest::new( + "atomicMin".into(), + "value: u64".into(), + "atomicMax(&output, 100lu); atomicMin(&output, 4lu);".into(), + &[0], + &[4], + ) + .output_type("atomic".into()); + + tests.push(test); + + tests +} + +#[gpu_test] +static INT64_ATOMIC_MIN_MAX: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(wgt::Features::SHADER_INT64 | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Storage, + create_int64_atomic_min_max_test(), + ) + }); + +fn create_int64_atomic_all_ops_test() -> Vec { + let mut tests = Vec::new(); + + let test = ShaderTest::new( + "atomicAdd".into(), + "value: u64".into(), + "atomicStore(&output, 0lu); atomicAdd(&output, 1lu); atomicAdd(&output, 1lu);".into(), + &[0], + &[2], + ) + .output_type("atomic".into()); + + tests.push(test); + + let test = ShaderTest::new( + "atomicAnd".into(), + "value: u64".into(), + "atomicStore(&output, 31lu); atomicAnd(&output, 30lu); atomicAnd(&output, 3lu);".into(), + &[0], + &[2], + ) + .output_type("atomic".into()); + + tests.push(test); + + let test = ShaderTest::new( + "atomicOr".into(), + "value: u64".into(), + "atomicStore(&output, 0lu); atomicOr(&output, 3lu); atomicOr(&output, 6lu);".into(), + &[0], + &[7], + ) + .output_type("atomic".into()); + + tests.push(test); + + tests +} + +#[gpu_test] +static INT64_ATOMIC_ALL_OPS: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(wgt::Features::SHADER_INT64 | wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Storage, + create_int64_atomic_all_ops_test(), + ) + }); + // See https://github.com/gfx-rs/wgpu/issues/5276 /* fn create_int64_polyfill_test() -> Vec { diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index a2f0bf31d..e52f611f8 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -491,6 +491,16 @@ pub fn create_validator( Caps::SHADER_INT64, features.contains(wgt::Features::SHADER_INT64), ); + caps.set( + Caps::SHADER_INT64_ATOMIC_MIN_MAX, + features.intersects( + wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX | wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS, + ), + ); + caps.set( + Caps::SHADER_INT64_ATOMIC_ALL_OPS, + features.contains(wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS), + ); caps.set( Caps::MULTISAMPLED_SHADING, downlevel.contains(wgt::DownlevelFlags::MULTISAMPLED_SHADING), diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 650330061..a81f15fc3 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -354,6 +354,25 @@ impl super::Adapter { && features1.WaveOps != 0, ); + let atomic_int64_on_typed_resource_supported = { + let mut features9: crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS9 = + unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + 37, // D3D12_FEATURE_D3D12_OPTIONS9 + &mut features9 as *mut _ as *mut _, + mem::size_of::() as _, + ) + }; + hr == 0 + && features9.AtomicInt64OnGroupSharedSupported != 0 + && features9.AtomicInt64OnTypedResourceSupported != 0 + }; + features.set( + wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX, + atomic_int64_on_typed_resource_supported, + ); + // float32-filterable should always be available on d3d12 features.set(wgt::Features::FLOAT32_FILTERABLE, true); diff --git a/wgpu-hal/src/dx12/types.rs b/wgpu-hal/src/dx12/types.rs index 17b608b84..57a0d94a8 100644 --- a/wgpu-hal/src/dx12/types.rs +++ b/wgpu-hal/src/dx12/types.rs @@ -42,6 +42,24 @@ winapi::STRUCT! { } } +winapi::ENUM! { + enum D3D12_WAVE_MMA_TIER { + D3D12_WAVE_MMA_TIER_NOT_SUPPORTED = 0, + D3D12_WAVE_MMA_TIER_1_0 = 10, + } +} + +winapi::STRUCT! { + struct D3D12_FEATURE_DATA_D3D12_OPTIONS9 { + MeshShaderPipelineStatsSupported: winapi::shared::minwindef::BOOL, + MeshShaderSupportsFullRangeRenderTargetArrayIndex: winapi::shared::minwindef::BOOL, + AtomicInt64OnTypedResourceSupported: winapi::shared::minwindef::BOOL, + AtomicInt64OnGroupSharedSupported: winapi::shared::minwindef::BOOL, + DerivativesInMeshAndAmplificationShadersSupported: winapi::shared::minwindef::BOOL, + WaveMMATier: D3D12_WAVE_MMA_TIER, + } +} + winapi::ENUM! { enum D3D_SHADER_MODEL { D3D_SHADER_MODEL_NONE = 0, diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 0ffe37f5e..33de70f71 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -821,6 +821,11 @@ impl super::PrivateCapabilities { int64: family_check && (device.supports_family(MTLGPUFamily::Apple3) || device.supports_family(MTLGPUFamily::Metal3)), + // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=6 + int64_atomics: family_check + && ((device.supports_family(MTLGPUFamily::Apple8) + && device.supports_family(MTLGPUFamily::Mac2)) + || device.supports_family(MTLGPUFamily::Apple9)), } } @@ -896,6 +901,10 @@ impl super::PrivateCapabilities { F::SHADER_INT64, self.int64 && self.msl_version >= MTLLanguageVersion::V2_3, ); + features.set( + F::SHADER_INT64_ATOMIC_MIN_MAX, + self.int64_atomics && self.msl_version >= MTLLanguageVersion::V2_4, + ); features.set( F::ADDRESS_MODE_CLAMP_TO_BORDER, diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 1867d7de4..b944bb6e9 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -272,6 +272,7 @@ struct PrivateCapabilities { timestamp_query_support: TimestampQuerySupport, supports_simd_scoped_operations: bool, int64: bool, + int64_atomics: bool, } #[derive(Clone, Debug)] diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index fe2a6f970..d3c0d4246 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -106,6 +106,9 @@ pub struct PhysicalDeviceFeatures { zero_initialize_workgroup_memory: Option>, + /// Features provided by `VK_KHR_shader_atomic_int64`, promoted to Vulkan 1.2. + shader_atomic_int64: Option>, + /// Features provided by `VK_EXT_subgroup_size_control`, promoted to Vulkan 1.3. subgroup_size_control: Option>, } @@ -151,6 +154,9 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.ray_query { info = info.push_next(feature); } + if let Some(ref mut feature) = self.shader_atomic_int64 { + info = info.push_next(feature); + } if let Some(ref mut feature) = self.subgroup_size_control { info = info.push_next(feature); } @@ -419,6 +425,19 @@ impl PhysicalDeviceFeatures { } else { None }, + shader_atomic_int64: if device_api_version >= vk::API_VERSION_1_2 + || enabled_extensions.contains(&khr::shader_atomic_int64::NAME) + { + Some( + vk::PhysicalDeviceShaderAtomicInt64Features::default() + .shader_buffer_int64_atomics(requested_features.intersects( + wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS + | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX, + )), + ) + } else { + None + }, subgroup_size_control: if device_api_version >= vk::API_VERSION_1_3 || enabled_extensions.contains(&ext::subgroup_size_control::NAME) { @@ -559,6 +578,14 @@ impl PhysicalDeviceFeatures { features.set(F::SHADER_INT64, self.core.shader_int64 != 0); features.set(F::SHADER_I16, self.core.shader_int16 != 0); + if let Some(ref shader_atomic_int64) = self.shader_atomic_int64 { + features.set( + F::SHADER_INT64_ATOMIC_ALL_OPS | F::SHADER_INT64_ATOMIC_MIN_MAX, + shader_atomic_int64.shader_buffer_int64_atomics != 0 + && shader_atomic_int64.shader_shared_int64_atomics != 0, + ); + } + //if caps.supports_extension(khr::sampler_mirror_clamp_to_edge::NAME) { //if caps.supports_extension(ext::sampler_filter_minmax::NAME) { features.set( @@ -964,6 +991,13 @@ impl PhysicalDeviceProperties { extensions.push(ext::texture_compression_astc_hdr::NAME); } + // Require `VK_KHR_shader_atomic_int64` if the associated feature was requested + if requested_features.intersects( + wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX, + ) { + extensions.push(khr::shader_atomic_int64::NAME); + } + extensions } @@ -1681,6 +1715,13 @@ impl super::Adapter { capabilities.push(spv::Capability::Int64); } + if features.intersects( + wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS + | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX, + ) { + capabilities.push(spv::Capability::Int64Atomics); + } + let mut flags = spv::WriterFlags::empty(); flags.set( spv::WriterFlags::DEBUG, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 943d8eb75..7fd29e0de 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -923,6 +923,23 @@ bitflags::bitflags! { /// - DX12 /// - Metal const PIPELINE_CACHE = 1 << 59; + /// Allows shaders to use i64 and u64 atomic min and max. + /// + /// Supported platforms: + /// - Vulkan (with VK_KHR_shader_atomic_int64) + /// - DX12 (with SM 6.6+) + /// - Metal (with MSL 2.4+) + /// + /// This is a native only feature. + const SHADER_INT64_ATOMIC_MIN_MAX = 1 << 60; + /// Allows shaders to use all i64 and u64 atomic operations. + /// + /// Supported platforms: + /// - Vulkan (with VK_KHR_shader_atomic_int64) + /// - DX12 (with SM 6.6+) + /// + /// This is a native only feature. + const SHADER_INT64_ATOMIC_ALL_OPS = 1 << 61; } }