From abba12ae4e5488b08d9e189fc37dab5e1755b443 Mon Sep 17 00:00:00 2001 From: Atlas Dostal Date: Sat, 8 Jun 2024 17:21:25 -0700 Subject: [PATCH] Add support for 64 bit integer atomic operations in shaders. Add the following flags to `wgpu_types::Features`: - `SHADER_INT64_ATOMIC_ALL_OPS` enables all atomic operations on `atomic` and `atomic` values. - `SHADER_INT64_ATOMIC_MIN_MAX` is a subset of the above, enabling only `AtomicFunction::Min` and `AtomicFunction::Max` operations on `atomic` and `atomic` values in the `Storage` address space. These are the only 64-bit atomic operations available on Metal as of 3.1. Add corresponding flags to `naga::valid::Capabilities`. These are supported by the WGSL front end, and all Naga backends. Platform support: - On Direct3d 12, in `D3D12_FEATURE_DATA_D3D12_OPTIONS9`, if `AtomicInt64OnTypedResourceSupported` and `AtomicInt64OnGroupSharedSupported` are both available, then both wgpu features described above are available. - On Metal, `SHADER_INT64_ATOMIC_MIN_MAX` is available on Apple9 hardware, and on hardware that advertises both Apple8 and Mac2 support. This also requires Metal Shading Language 2.4 or later. Metal does not yet support the more general `SHADER_INT64_ATOMIC_ALL_OPS`. - On Vulkan, if the `VK_KHR_shader_atomic_int64` extension is available with both the `shader_buffer_int64_atomics` and `shader_shared_int64_atomics` features, then both wgpu features described above are available. --- CHANGELOG.md | 35 ++- naga/src/back/dot/mod.rs | 4 +- naga/src/back/glsl/mod.rs | 12 +- naga/src/back/hlsl/writer.rs | 32 ++- naga/src/back/msl/mod.rs | 23 +- naga/src/back/msl/writer.rs | 47 +++- naga/src/back/pipeline_constants.rs | 4 +- naga/src/back/spv/block.rs | 10 +- naga/src/back/spv/writer.rs | 3 + naga/src/back/wgsl/writer.rs | 8 +- naga/src/compact/statements.rs | 8 +- naga/src/front/spv/mod.rs | 3 +- naga/src/front/type_gen.rs | 4 +- naga/src/front/wgsl/lower/mod.rs | 47 ++-- naga/src/lib.rs | 76 +++++- naga/src/valid/expression.rs | 36 +-- naga/src/valid/function.rs | 230 ++++++++++++---- naga/src/valid/handles.rs | 4 +- naga/src/valid/mod.rs | 12 + naga/src/valid/type.rs | 24 +- .../in/atomicCompareExchange-int64.param.ron | 15 ++ .../tests/in/atomicCompareExchange-int64.wgsl | 34 +++ .../in/atomicOps-int64-min-max.param.ron | 23 ++ naga/tests/in/atomicOps-int64-min-max.wgsl | 27 ++ naga/tests/in/atomicOps-int64.param.ron | 15 ++ naga/tests/in/atomicOps-int64.wgsl | 141 ++++++++++ naga/tests/in/int64.param.ron | 1 + .../out/hlsl/atomicOps-int64-min-max.hlsl | 30 +++ .../out/hlsl/atomicOps-int64-min-max.ron | 12 + naga/tests/out/hlsl/atomicOps-int64.hlsl | 118 +++++++++ naga/tests/out/hlsl/atomicOps-int64.ron | 12 + ...ides-atomicCompareExchangeWeak.compact.ron | 2 +- .../overrides-atomicCompareExchangeWeak.ron | 2 +- .../tests/out/msl/atomicOps-int64-min-max.msl | 33 +++ .../spv/atomicCompareExchange-int64.spvasm | 205 +++++++++++++++ .../out/spv/atomicOps-int64-min-max.spvasm | 82 ++++++ naga/tests/out/spv/atomicOps-int64.spvasm | 246 ++++++++++++++++++ .../out/wgsl/atomicCompareExchange-int64.wgsl | 90 +++++++ .../out/wgsl/atomicOps-int64-min-max.wgsl | 25 ++ naga/tests/out/wgsl/atomicOps-int64.wgsl | 107 ++++++++ naga/tests/snapshots.rs | 12 + naga/tests/validation.rs | 2 +- tests/tests/shader/mod.rs | 6 + tests/tests/shader/numeric_builtins.rs | 99 +++++++ wgpu-core/src/device/mod.rs | 10 + wgpu-hal/src/dx12/adapter.rs | 19 ++ wgpu-hal/src/dx12/types.rs | 18 ++ wgpu-hal/src/metal/adapter.rs | 9 + wgpu-hal/src/metal/mod.rs | 1 + wgpu-hal/src/vulkan/adapter.rs | 41 +++ wgpu-types/src/lib.rs | 17 ++ 51 files changed, 1916 insertions(+), 160 deletions(-) create mode 100644 naga/tests/in/atomicCompareExchange-int64.param.ron create mode 100644 naga/tests/in/atomicCompareExchange-int64.wgsl create mode 100644 naga/tests/in/atomicOps-int64-min-max.param.ron create mode 100644 naga/tests/in/atomicOps-int64-min-max.wgsl create mode 100644 naga/tests/in/atomicOps-int64.param.ron create mode 100644 naga/tests/in/atomicOps-int64.wgsl create mode 100644 naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl create mode 100644 naga/tests/out/hlsl/atomicOps-int64-min-max.ron create mode 100644 naga/tests/out/hlsl/atomicOps-int64.hlsl create mode 100644 naga/tests/out/hlsl/atomicOps-int64.ron create mode 100644 naga/tests/out/msl/atomicOps-int64-min-max.msl create mode 100644 naga/tests/out/spv/atomicCompareExchange-int64.spvasm create mode 100644 naga/tests/out/spv/atomicOps-int64-min-max.spvasm create mode 100644 naga/tests/out/spv/atomicOps-int64.spvasm create mode 100644 naga/tests/out/wgsl/atomicCompareExchange-int64.wgsl create mode 100644 naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl create mode 100644 naga/tests/out/wgsl/atomicOps-int64.wgsl diff --git a/CHANGELOG.md b/CHANGELOG.md index 78075b4a0..787cb76a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -81,8 +81,41 @@ for message in compilation_info By @stefnotch in [#5410](https://github.com/gfx-rs/wgpu/pull/5410) -### New features +#### 64 bit integer atomic support in shaders. +Add support for 64 bit integer atomic operations in shaders. + +Add the following flags to `wgpu_types::Features`: + +- `SHADER_INT64_ATOMIC_ALL_OPS` enables all atomic operations on `atomic` and + `atomic` values. + +- `SHADER_INT64_ATOMIC_MIN_MAX` is a subset of the above, enabling only + `AtomicFunction::Min` and `AtomicFunction::Max` operations on `atomic` and + `atomic` values in the `Storage` address space. These are the only 64-bit + atomic operations available on Metal as of 3.1. + +Add corresponding flags to `naga::valid::Capabilities`. These are supported by the +WGSL front end, and all Naga backends. + +Platform support: + +- On Direct3d 12, in `D3D12_FEATURE_DATA_D3D12_OPTIONS9`, if + `AtomicInt64OnTypedResourceSupported` and `AtomicInt64OnGroupSharedSupported` are + both available, then both wgpu features described above are available. + +- On Metal, `SHADER_INT64_ATOMIC_MIN_MAX` is available on Apple9 hardware, and on + hardware that advertises both Apple8 and Mac2 support. This also requires Metal + Shading Language 2.4 or later. Metal does not yet support the more general + `SHADER_INT64_ATOMIC_ALL_OPS`. + +- On Vulkan, if the `VK_KHR_shader_atomic_int64` extension is available with both the + `shader_buffer_int64_atomics` and `shader_shared_int64_atomics` features, then both + wgpu features described above are available. + +By @atlv24 in [#5383](https://github.com/gfx-rs/wgpu/pull/5383) + +### New features #### Vulkan - Added a `PipelineCache` resource to allow using Vulkan pipeline caches. By @DJMcNab in [#5319](https://github.com/gfx-rs/wgpu/pull/5319) diff --git a/naga/src/back/dot/mod.rs b/naga/src/back/dot/mod.rs index 9a7702b3f..dffd5234b 100644 --- a/naga/src/back/dot/mod.rs +++ b/naga/src/back/dot/mod.rs @@ -244,7 +244,9 @@ impl StatementGraph { value, result, } => { - self.emits.push((id, result)); + if let Some(result) = result { + self.emits.push((id, result)); + } self.dependencies.push((id, pointer, "pointer")); self.dependencies.push((id, value, "value")); if let crate::AtomicFunction::Exchange { compare: Some(cmp) } = *fun { diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs index 7138c2513..caca38254 100644 --- a/naga/src/back/glsl/mod.rs +++ b/naga/src/back/glsl/mod.rs @@ -2368,11 +2368,13 @@ impl<'a, W: Write> Writer<'a, W> { result, } => { write!(self.out, "{level}")?; - let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); - let res_ty = ctx.resolve_type(result, &self.module.types); - self.write_value_type(res_ty)?; - write!(self.out, " {res_name} = ")?; - self.named_expressions.insert(result, res_name); + if let Some(result) = result { + let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); + let res_ty = ctx.resolve_type(result, &self.module.types); + self.write_value_type(res_ty)?; + write!(self.out, " {res_name} = ")?; + self.named_expressions.insert(result, res_name); + } let fun_str = fun.to_glsl(); write!(self.out, "atomic{fun_str}(")?; diff --git a/naga/src/back/hlsl/writer.rs b/naga/src/back/hlsl/writer.rs index b4db0bcd7..dea37b6b2 100644 --- a/naga/src/back/hlsl/writer.rs +++ b/naga/src/back/hlsl/writer.rs @@ -1919,11 +1919,20 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { result, } => { write!(self.out, "{level}")?; - let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); - match func_ctx.info[result].ty { - proc::TypeResolution::Handle(handle) => self.write_type(module, handle)?, - proc::TypeResolution::Value(ref value) => { - self.write_value_type(module, value)? + let res_name = match result { + None => None, + Some(result) => { + let name = format!("{}{}", back::BAKE_PREFIX, result.index()); + match func_ctx.info[result].ty { + proc::TypeResolution::Handle(handle) => { + self.write_type(module, handle)? + } + proc::TypeResolution::Value(ref value) => { + self.write_value_type(module, value)? + } + }; + write!(self.out, " {name}; ")?; + Some((result, name)) } }; @@ -1934,7 +1943,6 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { .unwrap(); let fun_str = fun.to_hlsl_suffix(); - write!(self.out, " {res_name}; ")?; match pointer_space { crate::AddressSpace::WorkGroup => { write!(self.out, "Interlocked{fun_str}(")?; @@ -1970,8 +1978,16 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> { _ => {} } self.write_expr(module, value, func_ctx)?; - writeln!(self.out, ", {res_name});")?; - self.named_expressions.insert(result, res_name); + + // The `original_value` out parameter is optional for all the + // `Interlocked` functions we generate other than + // `InterlockedExchange`. + if let Some((result, name)) = res_name { + write!(self.out, ", {name}")?; + self.named_expressions.insert(result, name); + } + + writeln!(self.out, ");")?; } Statement::WorkGroupUniformLoad { pointer, result } => { self.write_barrier(crate::Barrier::WORK_GROUP, level)?; diff --git a/naga/src/back/msl/mod.rs b/naga/src/back/msl/mod.rs index d80d012ad..37e0b98d7 100644 --- a/naga/src/back/msl/mod.rs +++ b/naga/src/back/msl/mod.rs @@ -1,6 +1,9 @@ /*! Backend for [MSL][msl] (Metal Shading Language). +This backend does not support the [`SHADER_INT64_ATOMIC_ALL_OPS`][all-atom] +capability. + ## Binding model Metal's bindings are flat per resource. Since there isn't an obvious mapping @@ -24,6 +27,8 @@ For the result type, if it's a structure, we re-compose it with a temporary valu holding the result. [msl]: https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf +[all-atom]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS + */ use crate::{arena::Handle, proc::index, valid::ModuleInfo}; @@ -661,21 +666,3 @@ fn test_error_size() { use std::mem::size_of; assert_eq!(size_of::(), 32); } - -impl crate::AtomicFunction { - fn to_msl(self) -> Result<&'static str, Error> { - Ok(match self { - Self::Add => "fetch_add", - Self::Subtract => "fetch_sub", - Self::And => "fetch_and", - Self::InclusiveOr => "fetch_or", - Self::ExclusiveOr => "fetch_xor", - Self::Min => "fetch_min", - Self::Max => "fetch_max", - Self::Exchange { compare: None } => "exchange", - Self::Exchange { compare: Some(_) } => Err(Error::FeatureNotImplemented( - "atomic CompareExchange".to_string(), - ))?, - }) - } -} diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index ba2876713..009b57701 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -3058,11 +3058,22 @@ impl Writer { value, result, } => { + // This backend supports `SHADER_INT64_ATOMIC_MIN_MAX` but not + // `SHADER_INT64_ATOMIC_ALL_OPS`, so we can assume that if `result` is + // `Some`, we are not operating on a 64-bit value, and that if we are + // operating on a 64-bit value, `result` is `None`. write!(self.out, "{level}")?; - let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); - self.start_baking_expression(result, &context.expression, &res_name)?; - self.named_expressions.insert(result, res_name); - let fun_str = fun.to_msl()?; + let fun_str = if let Some(result) = result { + let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); + self.start_baking_expression(result, &context.expression, &res_name)?; + self.named_expressions.insert(result, res_name); + fun.to_msl()? + } else if context.expression.resolve_type(value).scalar_width() == Some(8) { + fun.to_msl_64_bit()? + } else { + fun.to_msl()? + }; + self.put_atomic_operation(pointer, fun_str, value, &context.expression)?; // done writeln!(self.out, ";")?; @@ -5914,3 +5925,31 @@ fn test_stack_size() { } } } + +impl crate::AtomicFunction { + fn to_msl(self) -> Result<&'static str, Error> { + Ok(match self { + Self::Add => "fetch_add", + Self::Subtract => "fetch_sub", + Self::And => "fetch_and", + Self::InclusiveOr => "fetch_or", + Self::ExclusiveOr => "fetch_xor", + Self::Min => "fetch_min", + Self::Max => "fetch_max", + Self::Exchange { compare: None } => "exchange", + Self::Exchange { compare: Some(_) } => Err(Error::FeatureNotImplemented( + "atomic CompareExchange".to_string(), + ))?, + }) + } + + fn to_msl_64_bit(self) -> Result<&'static str, Error> { + Ok(match self { + Self::Min => "min", + Self::Max => "max", + _ => Err(Error::FeatureNotImplemented( + "64-bit atomic operation other than min/max".to_string(), + ))?, + }) + } +} diff --git a/naga/src/back/pipeline_constants.rs b/naga/src/back/pipeline_constants.rs index 4d976e366..2686a08a2 100644 --- a/naga/src/back/pipeline_constants.rs +++ b/naga/src/back/pipeline_constants.rs @@ -617,7 +617,9 @@ fn adjust_stmt(new_pos: &[Handle], stmt: &mut Statement) { } => { adjust(pointer); adjust(value); - adjust(result); + if let Some(ref mut result) = *result { + adjust(result); + } match *fun { crate::AtomicFunction::Exchange { compare: Some(ref mut compare), diff --git a/naga/src/back/spv/block.rs b/naga/src/back/spv/block.rs index 5e6dd0ab8..ad7514ae9 100644 --- a/naga/src/back/spv/block.rs +++ b/naga/src/back/spv/block.rs @@ -2423,9 +2423,15 @@ impl<'w> BlockContext<'w> { result, } => { let id = self.gen_id(); - let result_type_id = self.get_expression_type_id(&self.fun_info[result].ty); + // Compare-and-exchange operations produce a struct result, + // so use `result`'s type if it is available. For no-result + // operations, fall back to `value`'s type. + let result_type_id = + self.get_expression_type_id(&self.fun_info[result.unwrap_or(value)].ty); - self.cached[result] = id; + if let Some(result) = result { + self.cached[result] = id; + } let pointer_id = match self.write_expression_pointer(pointer, &mut block, None)? { diff --git a/naga/src/back/spv/writer.rs b/naga/src/back/spv/writer.rs index 73a16c273..4b1aa3026 100644 --- a/naga/src/back/spv/writer.rs +++ b/naga/src/back/spv/writer.rs @@ -878,6 +878,9 @@ impl Writer { crate::TypeInner::RayQuery => { self.require_any("Ray Query", &[spirv::Capability::RayQueryKHR])?; } + crate::TypeInner::Atomic(crate::Scalar { width: 8, kind: _ }) => { + self.require_any("64 bit integer atomics", &[spirv::Capability::Int64Atomics])?; + } _ => {} } Ok(()) diff --git a/naga/src/back/wgsl/writer.rs b/naga/src/back/wgsl/writer.rs index 7c2887850..8b61dbd2c 100644 --- a/naga/src/back/wgsl/writer.rs +++ b/naga/src/back/wgsl/writer.rs @@ -754,9 +754,11 @@ impl Writer { result, } => { write!(self.out, "{level}")?; - let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); - self.start_named_expr(module, result, func_ctx, &res_name)?; - self.named_expressions.insert(result, res_name); + if let Some(result) = result { + let res_name = format!("{}{}", back::BAKE_PREFIX, result.index()); + self.start_named_expr(module, result, func_ctx, &res_name)?; + self.named_expressions.insert(result, res_name); + } let fun_str = fun.to_wgsl(); write!(self.out, "atomic{fun_str}(")?; diff --git a/naga/src/compact/statements.rs b/naga/src/compact/statements.rs index a124281bc..ba3e19f5b 100644 --- a/naga/src/compact/statements.rs +++ b/naga/src/compact/statements.rs @@ -75,7 +75,9 @@ impl FunctionTracer<'_> { self.expressions_used.insert(pointer); self.trace_atomic_function(fun); self.expressions_used.insert(value); - self.expressions_used.insert(result); + if let Some(result) = result { + self.expressions_used.insert(result); + } } St::WorkGroupUniformLoad { pointer, result } => { self.expressions_used.insert(pointer); @@ -255,7 +257,9 @@ impl FunctionMap { adjust(pointer); self.adjust_atomic_function(fun); adjust(value); - adjust(result); + if let Some(ref mut result) = *result { + adjust(result); + } } St::WorkGroupUniformLoad { ref mut pointer, diff --git a/naga/src/front/spv/mod.rs b/naga/src/front/spv/mod.rs index 480f77134..0301ea425 100644 --- a/naga/src/front/spv/mod.rs +++ b/naga/src/front/spv/mod.rs @@ -63,6 +63,7 @@ pub const SUPPORTED_CAPABILITIES: &[spirv::Capability] = &[ spirv::Capability::Int8, spirv::Capability::Int16, spirv::Capability::Int64, + spirv::Capability::Int64Atomics, spirv::Capability::Float16, spirv::Capability::Float64, spirv::Capability::Geometry, @@ -4028,7 +4029,7 @@ impl> Frontend { pointer: p_lexp_handle, fun: crate::AtomicFunction::Add, value: one_lexp_handle, - result: r_lexp_handle, + result: Some(r_lexp_handle), }; block.push(stmt, span); } diff --git a/naga/src/front/type_gen.rs b/naga/src/front/type_gen.rs index 34730c1db..1cd9f7f37 100644 --- a/naga/src/front/type_gen.rs +++ b/naga/src/front/type_gen.rs @@ -291,10 +291,10 @@ impl crate::Module { name: Some("exchanged".to_string()), ty: bool_ty, binding: None, - offset: 4, + offset: scalar.width as u32, }, ], - span: 8, + span: scalar.width as u32 * 2, }, } } diff --git a/naga/src/front/wgsl/lower/mod.rs b/naga/src/front/wgsl/lower/mod.rs index e7cce1772..7c5954d06 100644 --- a/naga/src/front/wgsl/lower/mod.rs +++ b/naga/src/front/wgsl/lower/mod.rs @@ -1491,6 +1491,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { function, arguments, &mut ctx.as_expression(block, &mut emitter), + true, )?; block.extend(emitter.finish(&ctx.function.expressions)); return Ok(()); @@ -1747,7 +1748,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { ref arguments, } => { let handle = self - .call(span, function, arguments, ctx)? + .call(span, function, arguments, ctx, false)? .ok_or(Error::FunctionReturnsVoid(function.span))?; return Ok(Typed::Plain(handle)); } @@ -1941,6 +1942,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { function: &ast::Ident<'source>, arguments: &[Handle>], ctx: &mut ExpressionContext<'source, '_, '_>, + is_statement: bool, ) -> Result>, Error<'source>> { match ctx.globals.get(function.name) { Some(&LoweredGlobalDecl::Type(ty)) => { @@ -2086,7 +2088,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { self.subgroup_gather_helper(span, mode, arguments, ctx)?, )); } else if let Some(fun) = crate::AtomicFunction::map(function.name) { - return Ok(Some(self.atomic_helper(span, fun, arguments, ctx)?)); + return self.atomic_helper(span, fun, arguments, is_statement, ctx); } else { match function.name { "select" => { @@ -2168,7 +2170,7 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { compare: Some(compare), }, value, - result, + result: Some(result), }, span, ); @@ -2459,25 +2461,38 @@ impl<'source, 'temp> Lowerer<'source, 'temp> { span: Span, fun: crate::AtomicFunction, args: &[Handle>], + is_statement: bool, ctx: &mut ExpressionContext<'source, '_, '_>, - ) -> Result, Error<'source>> { + ) -> Result>, Error<'source>> { let mut args = ctx.prepare_args(args, 2, span); let pointer = self.atomic_pointer(args.next()?, ctx)?; - - let value = args.next()?; - let value = self.expression(value, ctx)?; - let ty = ctx.register_type(value)?; - + let value = self.expression(args.next()?, ctx)?; + let value_inner = resolve_inner!(ctx, value); args.finish()?; - let result = ctx.interrupt_emitter( - crate::Expression::AtomicResult { - ty, - comparison: false, - }, - span, - )?; + // If we don't use the return value of a 64-bit `min` or `max` + // operation, generate a no-result form of the `Atomic` statement, so + // that we can pass validation with only `SHADER_INT64_ATOMIC_MIN_MAX` + // whenever possible. + let is_64_bit_min_max = + matches!(fun, crate::AtomicFunction::Min | crate::AtomicFunction::Max) + && matches!( + *value_inner, + crate::TypeInner::Scalar(crate::Scalar { width: 8, .. }) + ); + let result = if is_64_bit_min_max && is_statement { + None + } else { + let ty = ctx.register_type(value)?; + Some(ctx.interrupt_emitter( + crate::Expression::AtomicResult { + ty, + comparison: false, + }, + span, + )?) + }; let rctx = ctx.runtime_expression_ctx(span)?; rctx.block.push( crate::Statement::Atomic { diff --git a/naga/src/lib.rs b/naga/src/lib.rs index 2ae0eb00c..b283244c6 100644 --- a/naga/src/lib.rs +++ b/naga/src/lib.rs @@ -1612,8 +1612,29 @@ pub enum Expression { }, /// Result of calling another function. CallResult(Handle), + /// Result of an atomic operation. + /// + /// This expression must be referred to by the [`result`] field of exactly one + /// [`Atomic`][stmt] statement somewhere in the same function. Let `T` be the + /// scalar type contained by the [`Atomic`][type] value that the statement + /// operates on. + /// + /// If `comparison` is `false`, then `ty` must be the scalar type `T`. + /// + /// If `comparison` is `true`, then `ty` must be a [`Struct`] with two members: + /// + /// - A member named `old_value`, whose type is `T`, and + /// + /// - A member named `exchanged`, of type [`BOOL`]. + /// + /// [`result`]: Statement::Atomic::result + /// [stmt]: Statement::Atomic + /// [type]: TypeInner::Atomic + /// [`Struct`]: TypeInner::Struct + /// [`BOOL`]: Scalar::BOOL AtomicResult { ty: Handle, comparison: bool }, + /// Result of a [`WorkGroupUniformLoad`] statement. /// /// [`WorkGroupUniformLoad`]: Statement::WorkGroupUniformLoad @@ -1890,15 +1911,66 @@ pub enum Statement { /// Atomic function. Atomic { /// Pointer to an atomic value. + /// + /// This must be a [`Pointer`] to an [`Atomic`] value. The atomic's + /// scalar type may be [`I32`] or [`U32`]. + /// + /// If [`SHADER_INT64_ATOMIC_MIN_MAX`] or [`SHADER_INT64_ATOMIC_ALL_OPS`] are + /// enabled, this may also be [`I64`] or [`U64`]. + /// + /// [`Pointer`]: TypeInner::Pointer + /// [`Atomic`]: TypeInner::Atomic + /// [`I32`]: Scalar::I32 + /// [`U32`]: Scalar::U32 + /// [`SHADER_INT64_ATOMIC_MIN_MAX`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX + /// [`SHADER_INT64_ATOMIC_ALL_OPS`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS + /// [`I64`]: Scalar::I64 + /// [`U64`]: Scalar::U64 pointer: Handle, - /// Function to run on the atomic. + + /// Function to run on the atomic value. + /// + /// If [`pointer`] refers to a 64-bit atomic value, then: + /// + /// - The [`SHADER_INT64_ATOMIC_ALL_OPS`] capability allows any [`AtomicFunction`] + /// value here. + /// + /// - The [`SHADER_INT64_ATOMIC_MIN_MAX`] capability allows + /// [`AtomicFunction::Min`] and [`AtomicFunction::Max`] here. + /// + /// - If neither of those capabilities are present, then 64-bit scalar + /// atomics are not allowed. + /// + /// [`pointer`]: Statement::Atomic::pointer + /// [`SHADER_INT64_ATOMIC_MIN_MAX`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX + /// [`SHADER_INT64_ATOMIC_ALL_OPS`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS fun: AtomicFunction, + /// Value to use in the function. + /// + /// This must be a scalar of the same type as [`pointer`]'s atomic's scalar type. + /// + /// [`pointer`]: Statement::Atomic::pointer value: Handle, + /// [`AtomicResult`] expression representing this function's result. /// + /// If [`fun`] is [`Exchange { compare: None }`], this must be `Some`, + /// as otherwise that operation would be equivalent to a simple [`Store`] + /// to the atomic. + /// + /// Otherwise, this may be `None` if the return value of the operation is not needed. + /// + /// If `pointer` refers to a 64-bit atomic value, [`SHADER_INT64_ATOMIC_MIN_MAX`] + /// is enabled, and [`SHADER_INT64_ATOMIC_ALL_OPS`] is not, this must be `None`. + /// /// [`AtomicResult`]: crate::Expression::AtomicResult - result: Handle, + /// [`fun`]: Statement::Atomic::fun + /// [`Store`]: Statement::Store + /// [`Exchange { compare: None }`]: AtomicFunction::Exchange + /// [`SHADER_INT64_ATOMIC_MIN_MAX`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX + /// [`SHADER_INT64_ATOMIC_ALL_OPS`]: crate::valid::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS + result: Option>, }, /// Load uniformly from a uniform pointer in the workgroup address space. /// diff --git a/naga/src/valid/expression.rs b/naga/src/valid/expression.rs index c6e225990..89bceae06 100644 --- a/naga/src/valid/expression.rs +++ b/naga/src/valid/expression.rs @@ -1,7 +1,4 @@ -use super::{ - compose::validate_compose, validate_atomic_compare_exchange_struct, FunctionInfo, ModuleInfo, - ShaderStages, TypeFlags, -}; +use super::{compose::validate_compose, FunctionInfo, ModuleInfo, ShaderStages, TypeFlags}; use crate::arena::UniqueArena; use crate::{ @@ -114,8 +111,6 @@ pub enum ExpressionError { WrongArgumentCount(crate::MathFunction), #[error("Argument [{1}] to {0:?} as expression {2:?} has an invalid type.")] InvalidArgumentType(crate::MathFunction, u32, Handle), - #[error("Atomic result type can't be {0:?}")] - InvalidAtomicResultType(Handle), #[error( "workgroupUniformLoad result type can't be {0:?}. It can only be a constructible type." )] @@ -1582,30 +1577,11 @@ impl super::Validator { ShaderStages::all() } E::CallResult(function) => mod_info.functions[function.index()].available_stages, - E::AtomicResult { ty, comparison } => { - let scalar_predicate = |ty: &crate::TypeInner| match ty { - &crate::TypeInner::Scalar( - scalar @ Sc { - kind: crate::ScalarKind::Uint | crate::ScalarKind::Sint, - .. - }, - ) => self.check_width(scalar).is_ok(), - _ => false, - }; - let good = match &module.types[ty].inner { - ty if !comparison => scalar_predicate(ty), - &crate::TypeInner::Struct { ref members, .. } if comparison => { - validate_atomic_compare_exchange_struct( - &module.types, - members, - scalar_predicate, - ) - } - _ => false, - }; - if !good { - return Err(ExpressionError::InvalidAtomicResultType(ty)); - } + E::AtomicResult { .. } => { + // These expressions are validated when we check the `Atomic` statement + // that refers to them, because we have all the information we need at + // that point. The checks driven by `Validator::needs_visit` ensure + // that this expression is indeed visited by one `Atomic` statement. ShaderStages::all() } E::WorkGroupUniformLoadResult { ty } => { diff --git a/naga/src/valid/function.rs b/naga/src/valid/function.rs index d92cda87f..ee80e4d8e 100644 --- a/naga/src/valid/function.rs +++ b/naga/src/valid/function.rs @@ -43,10 +43,22 @@ pub enum CallError { pub enum AtomicError { #[error("Pointer {0:?} to atomic is invalid.")] InvalidPointer(Handle), + #[error("Address space {0:?} does not support 64bit atomics.")] + InvalidAddressSpace(crate::AddressSpace), #[error("Operand {0:?} has invalid type.")] InvalidOperand(Handle), + #[error("Result expression {0:?} is not an `AtomicResult` expression")] + InvalidResultExpression(Handle), + #[error("Result expression {0:?} is marked as an `exchange`")] + ResultExpressionExchange(Handle), + #[error("Result expression {0:?} is not marked as an `exchange`")] + ResultExpressionNotExchange(Handle), #[error("Result type for {0:?} doesn't match the statement")] ResultTypeMismatch(Handle), + #[error("Exchange operations must return a value")] + MissingReturnValue, + #[error("Capability {0:?} is required")] + MissingCapability(super::Capabilities), #[error("Result expression {0:?} is populated by multiple `Atomic` statements")] ResultAlreadyPopulated(Handle), } @@ -350,79 +362,189 @@ impl super::Validator { pointer: Handle, fun: &crate::AtomicFunction, value: Handle, - result: Handle, + result: Option>, + span: crate::Span, context: &BlockContext, ) -> Result<(), WithSpan> { + // The `pointer` operand must be a pointer to an atomic value. let pointer_inner = context.resolve_type(pointer, &self.valid_expression_set)?; - let ptr_scalar = match *pointer_inner { - crate::TypeInner::Pointer { base, .. } => match context.types[base].inner { - crate::TypeInner::Atomic(scalar) => scalar, - ref other => { - log::error!("Atomic pointer to type {:?}", other); - return Err(AtomicError::InvalidPointer(pointer) - .with_span_handle(pointer, context.expressions) - .into_other()); - } - }, - ref other => { - log::error!("Atomic on type {:?}", other); - return Err(AtomicError::InvalidPointer(pointer) - .with_span_handle(pointer, context.expressions) - .into_other()); - } + let crate::TypeInner::Pointer { + base: pointer_base, + space: pointer_space, + } = *pointer_inner + else { + log::error!("Atomic operation on type {:?}", *pointer_inner); + return Err(AtomicError::InvalidPointer(pointer) + .with_span_handle(pointer, context.expressions) + .into_other()); + }; + let crate::TypeInner::Atomic(pointer_scalar) = context.types[pointer_base].inner else { + log::error!( + "Atomic pointer to type {:?}", + context.types[pointer_base].inner + ); + return Err(AtomicError::InvalidPointer(pointer) + .with_span_handle(pointer, context.expressions) + .into_other()); }; + // The `value` operand must be a scalar of the same type as the atomic. let value_inner = context.resolve_type(value, &self.valid_expression_set)?; - match *value_inner { - crate::TypeInner::Scalar(scalar) if scalar == ptr_scalar => {} - ref other => { - log::error!("Atomic operand type {:?}", other); - return Err(AtomicError::InvalidOperand(value) + let crate::TypeInner::Scalar(value_scalar) = *value_inner else { + log::error!("Atomic operand type {:?}", *value_inner); + return Err(AtomicError::InvalidOperand(value) + .with_span_handle(value, context.expressions) + .into_other()); + }; + if pointer_scalar != value_scalar { + log::error!("Atomic operand type {:?}", *value_inner); + return Err(AtomicError::InvalidOperand(value) + .with_span_handle(value, context.expressions) + .into_other()); + } + + // Check for the special restrictions on 64-bit atomic operations. + // + // We don't need to consider other widths here: this function has already checked + // that `pointer`'s type is an `Atomic`, and `validate_type` has already checked + // that that `Atomic` type has a permitted scalar width. + if pointer_scalar.width == 8 { + // `Capabilities::SHADER_INT64_ATOMIC_ALL_OPS` enables all sorts of 64-bit + // atomic operations. + if self + .capabilities + .contains(super::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS) + { + // okay + } else { + // `Capabilities::SHADER_INT64_ATOMIC_MIN_MAX` allows `Min` and + // `Max` on operations in `Storage`, without a return value. + if matches!( + *fun, + crate::AtomicFunction::Min | crate::AtomicFunction::Max + ) && matches!(pointer_space, crate::AddressSpace::Storage { .. }) + && result.is_none() + { + if !self + .capabilities + .contains(super::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX) + { + log::error!("Int64 min-max atomic operations are not supported"); + return Err(AtomicError::MissingCapability( + super::Capabilities::SHADER_INT64_ATOMIC_MIN_MAX, + ) + .with_span_handle(value, context.expressions) + .into_other()); + } + } else { + // Otherwise, we require the full 64-bit atomic capability. + log::error!("Int64 atomic operations are not supported"); + return Err(AtomicError::MissingCapability( + super::Capabilities::SHADER_INT64_ATOMIC_ALL_OPS, + ) .with_span_handle(value, context.expressions) .into_other()); + } } } - if let crate::AtomicFunction::Exchange { compare: Some(cmp) } = *fun { - if context.resolve_type(cmp, &self.valid_expression_set)? != value_inner { - log::error!("Atomic exchange comparison has a different type from the value"); - return Err(AtomicError::InvalidOperand(cmp) - .with_span_handle(cmp, context.expressions) - .into_other()); - } - } + // The result expression must be appropriate to the operation. + match result { + Some(result) => { + // The `result` handle must refer to an `AtomicResult` expression. + let crate::Expression::AtomicResult { + ty: result_ty, + comparison, + } = context.expressions[result] + else { + return Err(AtomicError::InvalidResultExpression(result) + .with_span_handle(result, context.expressions) + .into_other()); + }; - self.emit_expression(result, context)?; - match context.expressions[result] { - crate::Expression::AtomicResult { ty, comparison } - if { - let scalar_predicate = - |ty: &crate::TypeInner| *ty == crate::TypeInner::Scalar(ptr_scalar); - match &context.types[ty].inner { - ty if !comparison => scalar_predicate(ty), - &crate::TypeInner::Struct { ref members, .. } if comparison => { - validate_atomic_compare_exchange_struct( - context.types, - members, - scalar_predicate, - ) - } - _ => false, - } - } => - { + // Note that this expression has been visited by the proper kind + // of statement. if !self.needs_visit.remove(result.index()) { return Err(AtomicError::ResultAlreadyPopulated(result) .with_span_handle(result, context.expressions) .into_other()); } + + // The constraints on the result type depend on the atomic function. + if let crate::AtomicFunction::Exchange { + compare: Some(compare), + } = *fun + { + // The comparison value must be a scalar of the same type as the + // atomic we're operating on. + let compare_inner = + context.resolve_type(compare, &self.valid_expression_set)?; + if !compare_inner.equivalent(value_inner, context.types) { + log::error!( + "Atomic exchange comparison has a different type from the value" + ); + return Err(AtomicError::InvalidOperand(compare) + .with_span_handle(compare, context.expressions) + .into_other()); + } + + // The result expression must be an `__atomic_compare_exchange_result` + // struct whose `old_value` member is of the same type as the atomic + // we're operating on. + let crate::TypeInner::Struct { ref members, .. } = + context.types[result_ty].inner + else { + return Err(AtomicError::ResultTypeMismatch(result) + .with_span_handle(result, context.expressions) + .into_other()); + }; + if !validate_atomic_compare_exchange_struct( + context.types, + members, + |ty: &crate::TypeInner| *ty == crate::TypeInner::Scalar(pointer_scalar), + ) { + return Err(AtomicError::ResultTypeMismatch(result) + .with_span_handle(result, context.expressions) + .into_other()); + } + + // The result expression must be for a comparison operation. + if !comparison { + return Err(AtomicError::ResultExpressionNotExchange(result) + .with_span_handle(result, context.expressions) + .into_other()); + } + } else { + // The result expression must be a scalar of the same type as the + // atomic we're operating on. + let result_inner = &context.types[result_ty].inner; + if !result_inner.equivalent(value_inner, context.types) { + return Err(AtomicError::ResultTypeMismatch(result) + .with_span_handle(result, context.expressions) + .into_other()); + } + + // The result expression must not be for a comparison. + if comparison { + return Err(AtomicError::ResultExpressionExchange(result) + .with_span_handle(result, context.expressions) + .into_other()); + } + } + self.emit_expression(result, context)?; } - _ => { - return Err(AtomicError::ResultTypeMismatch(result) - .with_span_handle(result, context.expressions) - .into_other()) + + None => { + // Exchange operations must always produce a value. + if let crate::AtomicFunction::Exchange { compare: None } = *fun { + log::error!("Atomic exchange's value is unused"); + return Err(AtomicError::MissingReturnValue + .with_span_static(span, "atomic exchange operation") + .into_other()); + } } } + Ok(()) } fn validate_subgroup_operation( @@ -1017,7 +1139,7 @@ impl super::Validator { value, result, } => { - self.validate_atomic(pointer, fun, value, result, context)?; + self.validate_atomic(pointer, fun, value, result, span, context)?; } S::WorkGroupUniformLoad { pointer, result } => { stages &= super::ShaderStages::COMPUTE; diff --git a/naga/src/valid/handles.rs b/naga/src/valid/handles.rs index 8f7820405..297b67dff 100644 --- a/naga/src/valid/handles.rs +++ b/naga/src/valid/handles.rs @@ -530,7 +530,9 @@ impl super::Validator { crate::AtomicFunction::Exchange { compare } => validate_expr_opt(compare)?, }; validate_expr(value)?; - validate_expr(result)?; + if let Some(result) = result { + validate_expr(result)?; + } Ok(()) } crate::Statement::WorkGroupUniformLoad { pointer, result } => { diff --git a/naga/src/valid/mod.rs b/naga/src/valid/mod.rs index d86c23c1e..113dc0cd3 100644 --- a/naga/src/valid/mod.rs +++ b/naga/src/valid/mod.rs @@ -127,6 +127,18 @@ bitflags::bitflags! { const SUBGROUP = 0x10000; /// Support for subgroup barriers. const SUBGROUP_BARRIER = 0x20000; + /// Support for [`AtomicFunction::Min`] and [`AtomicFunction::Max`] on + /// 64-bit integers in the [`Storage`] address space, when the return + /// value is not used. + /// + /// This is the only 64-bit atomic functionality available on Metal 3.1. + /// + /// [`AtomicFunction::Min`]: crate::AtomicFunction::Min + /// [`AtomicFunction::Max`]: crate::AtomicFunction::Max + /// [`Storage`]: crate::AddressSpace::Storage + const SHADER_INT64_ATOMIC_MIN_MAX = 0x40000; + /// Support for all atomic operations on 64-bit integers. + const SHADER_INT64_ATOMIC_ALL_OPS = 0x80000; } } diff --git a/naga/src/valid/type.rs b/naga/src/valid/type.rs index ff33e37cb..32d5d58f1 100644 --- a/naga/src/valid/type.rs +++ b/naga/src/valid/type.rs @@ -360,16 +360,28 @@ impl super::Validator { ) } Ti::Atomic(crate::Scalar { kind, width }) => { - let good = match kind { + match kind { crate::ScalarKind::Bool | crate::ScalarKind::Float | crate::ScalarKind::AbstractInt - | crate::ScalarKind::AbstractFloat => false, - crate::ScalarKind::Sint | crate::ScalarKind::Uint => width == 4, + | crate::ScalarKind::AbstractFloat => { + return Err(TypeError::InvalidAtomicWidth(kind, width)) + } + crate::ScalarKind::Sint | crate::ScalarKind::Uint => { + if width == 8 { + if !self.capabilities.intersects( + Capabilities::SHADER_INT64_ATOMIC_ALL_OPS + | Capabilities::SHADER_INT64_ATOMIC_MIN_MAX, + ) { + return Err(TypeError::MissingCapability( + Capabilities::SHADER_INT64_ATOMIC_ALL_OPS, + )); + } + } else if width != 4 { + return Err(TypeError::InvalidAtomicWidth(kind, width)); + } + } }; - if !good { - return Err(TypeError::InvalidAtomicWidth(kind, width)); - } TypeInfo::new( TypeFlags::DATA | TypeFlags::SIZED | TypeFlags::HOST_SHAREABLE, Alignment::from_width(width), diff --git a/naga/tests/in/atomicCompareExchange-int64.param.ron b/naga/tests/in/atomicCompareExchange-int64.param.ron new file mode 100644 index 000000000..ba6291cb8 --- /dev/null +++ b/naga/tests/in/atomicCompareExchange-int64.param.ron @@ -0,0 +1,15 @@ +( + god_mode: true, + spv: ( + version: (1, 0), + capabilities: [ Int64, Int64Atomics ], + ), + hlsl: ( + shader_model: V6_6, + binding_map: {}, + fake_missing_bindings: true, + special_constants_binding: Some((space: 1, register: 0)), + push_constants_target: Some((space: 0, register: 0)), + zero_initialize_workgroup_memory: true, + ), +) diff --git a/naga/tests/in/atomicCompareExchange-int64.wgsl b/naga/tests/in/atomicCompareExchange-int64.wgsl new file mode 100644 index 000000000..84f93880b --- /dev/null +++ b/naga/tests/in/atomicCompareExchange-int64.wgsl @@ -0,0 +1,34 @@ +const SIZE: u32 = 128u; + +@group(0) @binding(0) +var arr_i64: array, SIZE>; +@group(0) @binding(1) +var arr_u64: array, SIZE>; + +@compute @workgroup_size(1) +fn test_atomic_compare_exchange_i64() { + for(var i = 0u; i < SIZE; i++) { + var old : i64 = atomicLoad(&arr_i64[i]); + var exchanged = false; + while(!exchanged) { + let new_ : i64 = bitcast(old + 10li); + let result = atomicCompareExchangeWeak(&arr_i64[i], old, new_); + old = result.old_value; + exchanged = result.exchanged; + } + } +} + +@compute @workgroup_size(1) +fn test_atomic_compare_exchange_u64() { + for(var i = 0u; i < SIZE; i++) { + var old : u64 = atomicLoad(&arr_u64[i]); + var exchanged = false; + while(!exchanged) { + let new_ : u64 = bitcast(old + 10lu); + let result = atomicCompareExchangeWeak(&arr_u64[i], old, new_); + old = result.old_value; + exchanged = result.exchanged; + } + } +} diff --git a/naga/tests/in/atomicOps-int64-min-max.param.ron b/naga/tests/in/atomicOps-int64-min-max.param.ron new file mode 100644 index 000000000..11b4b0d73 --- /dev/null +++ b/naga/tests/in/atomicOps-int64-min-max.param.ron @@ -0,0 +1,23 @@ +( + god_mode: true, + spv: ( + version: (1, 0), + capabilities: [ Int64, Int64Atomics ], + ), + hlsl: ( + shader_model: V6_6, + binding_map: {}, + fake_missing_bindings: true, + special_constants_binding: Some((space: 1, register: 0)), + push_constants_target: Some((space: 0, register: 0)), + zero_initialize_workgroup_memory: true, + ), + msl: ( + lang_version: (2, 4), + per_entry_point_map: {}, + inline_samplers: [], + spirv_cross_compatibility: false, + fake_missing_bindings: true, + zero_initialize_workgroup_memory: true, + ), +) diff --git a/naga/tests/in/atomicOps-int64-min-max.wgsl b/naga/tests/in/atomicOps-int64-min-max.wgsl new file mode 100644 index 000000000..94e6aa686 --- /dev/null +++ b/naga/tests/in/atomicOps-int64-min-max.wgsl @@ -0,0 +1,27 @@ +struct Struct { + atomic_scalar: atomic, + atomic_arr: array, 2>, +} + +@group(0) @binding(0) +var storage_atomic_scalar: atomic; +@group(0) @binding(1) +var storage_atomic_arr: array, 2>; +@group(0) @binding(2) +var storage_struct: Struct; + +@compute +@workgroup_size(2) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + atomicMax(&storage_atomic_scalar, 1lu); + atomicMax(&storage_atomic_arr[1], 1lu); + atomicMax(&storage_struct.atomic_scalar, 1lu); + atomicMax(&storage_struct.atomic_arr[1], 1lu); + + workgroupBarrier(); + + atomicMin(&storage_atomic_scalar, 1lu); + atomicMin(&storage_atomic_arr[1], 1lu); + atomicMin(&storage_struct.atomic_scalar, 1lu); + atomicMin(&storage_struct.atomic_arr[1], 1lu); +} diff --git a/naga/tests/in/atomicOps-int64.param.ron b/naga/tests/in/atomicOps-int64.param.ron new file mode 100644 index 000000000..ba6291cb8 --- /dev/null +++ b/naga/tests/in/atomicOps-int64.param.ron @@ -0,0 +1,15 @@ +( + god_mode: true, + spv: ( + version: (1, 0), + capabilities: [ Int64, Int64Atomics ], + ), + hlsl: ( + shader_model: V6_6, + binding_map: {}, + fake_missing_bindings: true, + special_constants_binding: Some((space: 1, register: 0)), + push_constants_target: Some((space: 0, register: 0)), + zero_initialize_workgroup_memory: true, + ), +) diff --git a/naga/tests/in/atomicOps-int64.wgsl b/naga/tests/in/atomicOps-int64.wgsl new file mode 100644 index 000000000..42857d2fa --- /dev/null +++ b/naga/tests/in/atomicOps-int64.wgsl @@ -0,0 +1,141 @@ +// This test covers the cross product of: +// +// * All int64 atomic operations. +// * On all applicable scopes (storage read-write, workgroup). +// * For all shapes of modeling atomic data. + +struct Struct { + atomic_scalar: atomic, + atomic_arr: array, 2>, +} + +@group(0) @binding(0) +var storage_atomic_scalar: atomic; +@group(0) @binding(1) +var storage_atomic_arr: array, 2>; +@group(0) @binding(2) +var storage_struct: Struct; + +var workgroup_atomic_scalar: atomic; +var workgroup_atomic_arr: array, 2>; +var workgroup_struct: Struct; + +@compute +@workgroup_size(2) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + atomicStore(&storage_atomic_scalar, 1lu); + atomicStore(&storage_atomic_arr[1], 1li); + atomicStore(&storage_struct.atomic_scalar, 1lu); + atomicStore(&storage_struct.atomic_arr[1], 1li); + atomicStore(&workgroup_atomic_scalar, 1lu); + atomicStore(&workgroup_atomic_arr[1], 1li); + atomicStore(&workgroup_struct.atomic_scalar, 1lu); + atomicStore(&workgroup_struct.atomic_arr[1], 1li); + + workgroupBarrier(); + + let l0 = atomicLoad(&storage_atomic_scalar); + let l1 = atomicLoad(&storage_atomic_arr[1]); + let l2 = atomicLoad(&storage_struct.atomic_scalar); + let l3 = atomicLoad(&storage_struct.atomic_arr[1]); + let l4 = atomicLoad(&workgroup_atomic_scalar); + let l5 = atomicLoad(&workgroup_atomic_arr[1]); + let l6 = atomicLoad(&workgroup_struct.atomic_scalar); + let l7 = atomicLoad(&workgroup_struct.atomic_arr[1]); + + workgroupBarrier(); + + atomicAdd(&storage_atomic_scalar, 1lu); + atomicAdd(&storage_atomic_arr[1], 1li); + atomicAdd(&storage_struct.atomic_scalar, 1lu); + atomicAdd(&storage_struct.atomic_arr[1], 1li); + atomicAdd(&workgroup_atomic_scalar, 1lu); + atomicAdd(&workgroup_atomic_arr[1], 1li); + atomicAdd(&workgroup_struct.atomic_scalar, 1lu); + atomicAdd(&workgroup_struct.atomic_arr[1], 1li); + + workgroupBarrier(); + + atomicSub(&storage_atomic_scalar, 1lu); + atomicSub(&storage_atomic_arr[1], 1li); + atomicSub(&storage_struct.atomic_scalar, 1lu); + atomicSub(&storage_struct.atomic_arr[1], 1li); + atomicSub(&workgroup_atomic_scalar, 1lu); + atomicSub(&workgroup_atomic_arr[1], 1li); + atomicSub(&workgroup_struct.atomic_scalar, 1lu); + atomicSub(&workgroup_struct.atomic_arr[1], 1li); + + workgroupBarrier(); + + atomicMax(&storage_atomic_scalar, 1lu); + atomicMax(&storage_atomic_arr[1], 1li); + atomicMax(&storage_struct.atomic_scalar, 1lu); + atomicMax(&storage_struct.atomic_arr[1], 1li); + atomicMax(&workgroup_atomic_scalar, 1lu); + atomicMax(&workgroup_atomic_arr[1], 1li); + atomicMax(&workgroup_struct.atomic_scalar, 1lu); + atomicMax(&workgroup_struct.atomic_arr[1], 1li); + + workgroupBarrier(); + + atomicMin(&storage_atomic_scalar, 1lu); + atomicMin(&storage_atomic_arr[1], 1li); + atomicMin(&storage_struct.atomic_scalar, 1lu); + atomicMin(&storage_struct.atomic_arr[1], 1li); + atomicMin(&workgroup_atomic_scalar, 1lu); + atomicMin(&workgroup_atomic_arr[1], 1li); + atomicMin(&workgroup_struct.atomic_scalar, 1lu); + atomicMin(&workgroup_struct.atomic_arr[1], 1li); + + workgroupBarrier(); + + atomicAnd(&storage_atomic_scalar, 1lu); + atomicAnd(&storage_atomic_arr[1], 1li); + atomicAnd(&storage_struct.atomic_scalar, 1lu); + atomicAnd(&storage_struct.atomic_arr[1], 1li); + atomicAnd(&workgroup_atomic_scalar, 1lu); + atomicAnd(&workgroup_atomic_arr[1], 1li); + atomicAnd(&workgroup_struct.atomic_scalar, 1lu); + atomicAnd(&workgroup_struct.atomic_arr[1], 1li); + + workgroupBarrier(); + + atomicOr(&storage_atomic_scalar, 1lu); + atomicOr(&storage_atomic_arr[1], 1li); + atomicOr(&storage_struct.atomic_scalar, 1lu); + atomicOr(&storage_struct.atomic_arr[1], 1li); + atomicOr(&workgroup_atomic_scalar, 1lu); + atomicOr(&workgroup_atomic_arr[1], 1li); + atomicOr(&workgroup_struct.atomic_scalar, 1lu); + atomicOr(&workgroup_struct.atomic_arr[1], 1li); + + workgroupBarrier(); + + atomicXor(&storage_atomic_scalar, 1lu); + atomicXor(&storage_atomic_arr[1], 1li); + atomicXor(&storage_struct.atomic_scalar, 1lu); + atomicXor(&storage_struct.atomic_arr[1], 1li); + atomicXor(&workgroup_atomic_scalar, 1lu); + atomicXor(&workgroup_atomic_arr[1], 1li); + atomicXor(&workgroup_struct.atomic_scalar, 1lu); + atomicXor(&workgroup_struct.atomic_arr[1], 1li); + + atomicExchange(&storage_atomic_scalar, 1lu); + atomicExchange(&storage_atomic_arr[1], 1li); + atomicExchange(&storage_struct.atomic_scalar, 1lu); + atomicExchange(&storage_struct.atomic_arr[1], 1li); + atomicExchange(&workgroup_atomic_scalar, 1lu); + atomicExchange(&workgroup_atomic_arr[1], 1li); + atomicExchange(&workgroup_struct.atomic_scalar, 1lu); + atomicExchange(&workgroup_struct.atomic_arr[1], 1li); + + // // TODO: https://github.com/gpuweb/gpuweb/issues/2021 + // atomicCompareExchangeWeak(&storage_atomic_scalar, 1lu); + // atomicCompareExchangeWeak(&storage_atomic_arr[1], 1li); + // atomicCompareExchangeWeak(&storage_struct.atomic_scalar, 1lu); + // atomicCompareExchangeWeak(&storage_struct.atomic_arr[1], 1li); + // atomicCompareExchangeWeak(&workgroup_atomic_scalar, 1lu); + // atomicCompareExchangeWeak(&workgroup_atomic_arr[1], 1li); + // atomicCompareExchangeWeak(&workgroup_struct.atomic_scalar, 1lu); + // atomicCompareExchangeWeak(&workgroup_struct.atomic_arr[1], 1li); +} diff --git a/naga/tests/in/int64.param.ron b/naga/tests/in/int64.param.ron index 15348b905..0e76c83e4 100644 --- a/naga/tests/in/int64.param.ron +++ b/naga/tests/in/int64.param.ron @@ -2,6 +2,7 @@ god_mode: true, spv: ( version: (1, 0), + capabilities: [ Int64 ], ), hlsl: ( shader_model: V6_0, diff --git a/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl b/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl new file mode 100644 index 000000000..8c52e5b3b --- /dev/null +++ b/naga/tests/out/hlsl/atomicOps-int64-min-max.hlsl @@ -0,0 +1,30 @@ +struct NagaConstants { + int first_vertex; + int first_instance; + uint other; +}; +ConstantBuffer _NagaConstants: register(b0, space1); + +struct Struct { + uint64_t atomic_scalar; + uint64_t atomic_arr[2]; +}; + +RWByteAddressBuffer storage_atomic_scalar : register(u0); +RWByteAddressBuffer storage_atomic_arr : register(u1); +RWByteAddressBuffer storage_struct : register(u2); + +[numthreads(2, 1, 1)] +void cs_main(uint3 id : SV_GroupThreadID) +{ + storage_atomic_scalar.InterlockedMax(0, 1uL); + storage_atomic_arr.InterlockedMax(8, 1uL); + storage_struct.InterlockedMax(0, 1uL); + storage_struct.InterlockedMax(8+8, 1uL); + GroupMemoryBarrierWithGroupSync(); + storage_atomic_scalar.InterlockedMin(0, 1uL); + storage_atomic_arr.InterlockedMin(8, 1uL); + storage_struct.InterlockedMin(0, 1uL); + storage_struct.InterlockedMin(8+8, 1uL); + return; +} diff --git a/naga/tests/out/hlsl/atomicOps-int64-min-max.ron b/naga/tests/out/hlsl/atomicOps-int64-min-max.ron new file mode 100644 index 000000000..67a903551 --- /dev/null +++ b/naga/tests/out/hlsl/atomicOps-int64-min-max.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"cs_main", + target_profile:"cs_6_6", + ), + ], +) diff --git a/naga/tests/out/hlsl/atomicOps-int64.hlsl b/naga/tests/out/hlsl/atomicOps-int64.hlsl new file mode 100644 index 000000000..973cf0730 --- /dev/null +++ b/naga/tests/out/hlsl/atomicOps-int64.hlsl @@ -0,0 +1,118 @@ +struct NagaConstants { + int first_vertex; + int first_instance; + uint other; +}; +ConstantBuffer _NagaConstants: register(b0, space1); + +struct Struct { + uint64_t atomic_scalar; + int64_t atomic_arr[2]; +}; + +RWByteAddressBuffer storage_atomic_scalar : register(u0); +RWByteAddressBuffer storage_atomic_arr : register(u1); +RWByteAddressBuffer storage_struct : register(u2); +groupshared uint64_t workgroup_atomic_scalar; +groupshared int64_t workgroup_atomic_arr[2]; +groupshared Struct workgroup_struct; + +[numthreads(2, 1, 1)] +void cs_main(uint3 id : SV_GroupThreadID, uint3 __local_invocation_id : SV_GroupThreadID) +{ + if (all(__local_invocation_id == uint3(0u, 0u, 0u))) { + workgroup_atomic_scalar = (uint64_t)0; + workgroup_atomic_arr = (int64_t[2])0; + workgroup_struct = (Struct)0; + } + GroupMemoryBarrierWithGroupSync(); + storage_atomic_scalar.Store(0, 1uL); + storage_atomic_arr.Store(8, 1L); + storage_struct.Store(0, 1uL); + storage_struct.Store(8+8, 1L); + workgroup_atomic_scalar = 1uL; + workgroup_atomic_arr[1] = 1L; + workgroup_struct.atomic_scalar = 1uL; + workgroup_struct.atomic_arr[1] = 1L; + GroupMemoryBarrierWithGroupSync(); + uint64_t l0_ = storage_atomic_scalar.Load(0); + int64_t l1_ = storage_atomic_arr.Load(8); + uint64_t l2_ = storage_struct.Load(0); + int64_t l3_ = storage_struct.Load(8+8); + uint64_t l4_ = workgroup_atomic_scalar; + int64_t l5_ = workgroup_atomic_arr[1]; + uint64_t l6_ = workgroup_struct.atomic_scalar; + int64_t l7_ = workgroup_struct.atomic_arr[1]; + GroupMemoryBarrierWithGroupSync(); + uint64_t _e51; storage_atomic_scalar.InterlockedAdd(0, 1uL, _e51); + int64_t _e55; storage_atomic_arr.InterlockedAdd(8, 1L, _e55); + uint64_t _e59; storage_struct.InterlockedAdd(0, 1uL, _e59); + int64_t _e64; storage_struct.InterlockedAdd(8+8, 1L, _e64); + uint64_t _e67; InterlockedAdd(workgroup_atomic_scalar, 1uL, _e67); + int64_t _e71; InterlockedAdd(workgroup_atomic_arr[1], 1L, _e71); + uint64_t _e75; InterlockedAdd(workgroup_struct.atomic_scalar, 1uL, _e75); + int64_t _e80; InterlockedAdd(workgroup_struct.atomic_arr[1], 1L, _e80); + GroupMemoryBarrierWithGroupSync(); + uint64_t _e83; storage_atomic_scalar.InterlockedAdd(0, -1uL, _e83); + int64_t _e87; storage_atomic_arr.InterlockedAdd(8, -1L, _e87); + uint64_t _e91; storage_struct.InterlockedAdd(0, -1uL, _e91); + int64_t _e96; storage_struct.InterlockedAdd(8+8, -1L, _e96); + uint64_t _e99; InterlockedAdd(workgroup_atomic_scalar, -1uL, _e99); + int64_t _e103; InterlockedAdd(workgroup_atomic_arr[1], -1L, _e103); + uint64_t _e107; InterlockedAdd(workgroup_struct.atomic_scalar, -1uL, _e107); + int64_t _e112; InterlockedAdd(workgroup_struct.atomic_arr[1], -1L, _e112); + GroupMemoryBarrierWithGroupSync(); + storage_atomic_scalar.InterlockedMax(0, 1uL); + storage_atomic_arr.InterlockedMax(8, 1L); + storage_struct.InterlockedMax(0, 1uL); + storage_struct.InterlockedMax(8+8, 1L); + InterlockedMax(workgroup_atomic_scalar, 1uL); + InterlockedMax(workgroup_atomic_arr[1], 1L); + InterlockedMax(workgroup_struct.atomic_scalar, 1uL); + InterlockedMax(workgroup_struct.atomic_arr[1], 1L); + GroupMemoryBarrierWithGroupSync(); + storage_atomic_scalar.InterlockedMin(0, 1uL); + storage_atomic_arr.InterlockedMin(8, 1L); + storage_struct.InterlockedMin(0, 1uL); + storage_struct.InterlockedMin(8+8, 1L); + InterlockedMin(workgroup_atomic_scalar, 1uL); + InterlockedMin(workgroup_atomic_arr[1], 1L); + InterlockedMin(workgroup_struct.atomic_scalar, 1uL); + InterlockedMin(workgroup_struct.atomic_arr[1], 1L); + GroupMemoryBarrierWithGroupSync(); + uint64_t _e163; storage_atomic_scalar.InterlockedAnd(0, 1uL, _e163); + int64_t _e167; storage_atomic_arr.InterlockedAnd(8, 1L, _e167); + uint64_t _e171; storage_struct.InterlockedAnd(0, 1uL, _e171); + int64_t _e176; storage_struct.InterlockedAnd(8+8, 1L, _e176); + uint64_t _e179; InterlockedAnd(workgroup_atomic_scalar, 1uL, _e179); + int64_t _e183; InterlockedAnd(workgroup_atomic_arr[1], 1L, _e183); + uint64_t _e187; InterlockedAnd(workgroup_struct.atomic_scalar, 1uL, _e187); + int64_t _e192; InterlockedAnd(workgroup_struct.atomic_arr[1], 1L, _e192); + GroupMemoryBarrierWithGroupSync(); + uint64_t _e195; storage_atomic_scalar.InterlockedOr(0, 1uL, _e195); + int64_t _e199; storage_atomic_arr.InterlockedOr(8, 1L, _e199); + uint64_t _e203; storage_struct.InterlockedOr(0, 1uL, _e203); + int64_t _e208; storage_struct.InterlockedOr(8+8, 1L, _e208); + uint64_t _e211; InterlockedOr(workgroup_atomic_scalar, 1uL, _e211); + int64_t _e215; InterlockedOr(workgroup_atomic_arr[1], 1L, _e215); + uint64_t _e219; InterlockedOr(workgroup_struct.atomic_scalar, 1uL, _e219); + int64_t _e224; InterlockedOr(workgroup_struct.atomic_arr[1], 1L, _e224); + GroupMemoryBarrierWithGroupSync(); + uint64_t _e227; storage_atomic_scalar.InterlockedXor(0, 1uL, _e227); + int64_t _e231; storage_atomic_arr.InterlockedXor(8, 1L, _e231); + uint64_t _e235; storage_struct.InterlockedXor(0, 1uL, _e235); + int64_t _e240; storage_struct.InterlockedXor(8+8, 1L, _e240); + uint64_t _e243; InterlockedXor(workgroup_atomic_scalar, 1uL, _e243); + int64_t _e247; InterlockedXor(workgroup_atomic_arr[1], 1L, _e247); + uint64_t _e251; InterlockedXor(workgroup_struct.atomic_scalar, 1uL, _e251); + int64_t _e256; InterlockedXor(workgroup_struct.atomic_arr[1], 1L, _e256); + uint64_t _e259; storage_atomic_scalar.InterlockedExchange(0, 1uL, _e259); + int64_t _e263; storage_atomic_arr.InterlockedExchange(8, 1L, _e263); + uint64_t _e267; storage_struct.InterlockedExchange(0, 1uL, _e267); + int64_t _e272; storage_struct.InterlockedExchange(8+8, 1L, _e272); + uint64_t _e275; InterlockedExchange(workgroup_atomic_scalar, 1uL, _e275); + int64_t _e279; InterlockedExchange(workgroup_atomic_arr[1], 1L, _e279); + uint64_t _e283; InterlockedExchange(workgroup_struct.atomic_scalar, 1uL, _e283); + int64_t _e288; InterlockedExchange(workgroup_struct.atomic_arr[1], 1L, _e288); + return; +} diff --git a/naga/tests/out/hlsl/atomicOps-int64.ron b/naga/tests/out/hlsl/atomicOps-int64.ron new file mode 100644 index 000000000..67a903551 --- /dev/null +++ b/naga/tests/out/hlsl/atomicOps-int64.ron @@ -0,0 +1,12 @@ +( + vertex:[ + ], + fragment:[ + ], + compute:[ + ( + entry_point:"cs_main", + target_profile:"cs_6_6", + ), + ], +) diff --git a/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.compact.ron b/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.compact.ron index 8c889382d..9d4e82fd8 100644 --- a/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.compact.ron +++ b/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.compact.ron @@ -116,7 +116,7 @@ compare: Some(3), ), value: 4, - result: 5, + result: Some(5), ), Return( value: None, diff --git a/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.ron b/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.ron index 8c889382d..9d4e82fd8 100644 --- a/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.ron +++ b/naga/tests/out/ir/overrides-atomicCompareExchangeWeak.ron @@ -116,7 +116,7 @@ compare: Some(3), ), value: 4, - result: 5, + result: Some(5), ), Return( value: None, diff --git a/naga/tests/out/msl/atomicOps-int64-min-max.msl b/naga/tests/out/msl/atomicOps-int64-min-max.msl new file mode 100644 index 000000000..a5dd1c97f --- /dev/null +++ b/naga/tests/out/msl/atomicOps-int64-min-max.msl @@ -0,0 +1,33 @@ +// language: metal2.4 +#include +#include + +using metal::uint; + +struct type_1 { + metal::atomic_ulong inner[2]; +}; +struct Struct { + metal::atomic_ulong atomic_scalar; + type_1 atomic_arr; +}; + +struct cs_mainInput { +}; +kernel void cs_main( + metal::uint3 id [[thread_position_in_threadgroup]] +, device metal::atomic_ulong& storage_atomic_scalar [[user(fake0)]] +, device type_1& storage_atomic_arr [[user(fake0)]] +, device Struct& storage_struct [[user(fake0)]] +) { + metal::atomic_max_explicit(&storage_atomic_scalar, 1uL, metal::memory_order_relaxed); + metal::atomic_max_explicit(&storage_atomic_arr.inner[1], 1uL, metal::memory_order_relaxed); + metal::atomic_max_explicit(&storage_struct.atomic_scalar, 1uL, metal::memory_order_relaxed); + metal::atomic_max_explicit(&storage_struct.atomic_arr.inner[1], 1uL, metal::memory_order_relaxed); + metal::threadgroup_barrier(metal::mem_flags::mem_threadgroup); + metal::atomic_min_explicit(&storage_atomic_scalar, 1uL, metal::memory_order_relaxed); + metal::atomic_min_explicit(&storage_atomic_arr.inner[1], 1uL, metal::memory_order_relaxed); + metal::atomic_min_explicit(&storage_struct.atomic_scalar, 1uL, metal::memory_order_relaxed); + metal::atomic_min_explicit(&storage_struct.atomic_arr.inner[1], 1uL, metal::memory_order_relaxed); + return; +} diff --git a/naga/tests/out/spv/atomicCompareExchange-int64.spvasm b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm new file mode 100644 index 000000000..f174ad3b3 --- /dev/null +++ b/naga/tests/out/spv/atomicCompareExchange-int64.spvasm @@ -0,0 +1,205 @@ +; SPIR-V +; Version: 1.0 +; Generator: rspirv +; Bound: 125 +OpCapability Shader +OpCapability Int64Atomics +OpCapability Int64 +OpExtension "SPV_KHR_storage_buffer_storage_class" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %19 "test_atomic_compare_exchange_i64" +OpEntryPoint GLCompute %77 "test_atomic_compare_exchange_u64" +OpExecutionMode %19 LocalSize 1 1 1 +OpExecutionMode %77 LocalSize 1 1 1 +OpDecorate %5 ArrayStride 8 +OpDecorate %8 ArrayStride 8 +OpMemberDecorate %10 0 Offset 0 +OpMemberDecorate %10 1 Offset 8 +OpMemberDecorate %11 0 Offset 0 +OpMemberDecorate %11 1 Offset 8 +OpDecorate %12 DescriptorSet 0 +OpDecorate %12 Binding 0 +OpDecorate %13 Block +OpMemberDecorate %13 0 Offset 0 +OpDecorate %15 DescriptorSet 0 +OpDecorate %15 Binding 1 +OpDecorate %16 Block +OpMemberDecorate %16 0 Offset 0 +%2 = OpTypeVoid +%3 = OpTypeInt 32 0 +%4 = OpTypeInt 64 1 +%6 = OpConstant %3 128 +%5 = OpTypeArray %4 %6 +%7 = OpTypeInt 64 0 +%8 = OpTypeArray %7 %6 +%9 = OpTypeBool +%10 = OpTypeStruct %4 %9 +%11 = OpTypeStruct %7 %9 +%13 = OpTypeStruct %5 +%14 = OpTypePointer StorageBuffer %13 +%12 = OpVariable %14 StorageBuffer +%16 = OpTypeStruct %8 +%17 = OpTypePointer StorageBuffer %16 +%15 = OpVariable %17 StorageBuffer +%20 = OpTypeFunction %2 +%21 = OpTypePointer StorageBuffer %5 +%22 = OpConstant %3 0 +%24 = OpConstantFalse %9 +%25 = OpConstant %4 10 +%26 = OpConstant %3 1 +%28 = OpTypePointer Function %3 +%30 = OpTypePointer Function %4 +%31 = OpConstantNull %4 +%33 = OpTypePointer Function %9 +%34 = OpConstantNull %9 +%47 = OpTypePointer StorageBuffer %4 +%51 = OpTypeInt 32 1 +%50 = OpConstant %51 1 +%52 = OpConstant %3 64 +%78 = OpTypePointer StorageBuffer %8 +%80 = OpConstant %7 10 +%83 = OpTypePointer Function %7 +%84 = OpConstantNull %7 +%86 = OpConstantNull %9 +%99 = OpTypePointer StorageBuffer %7 +%19 = OpFunction %2 None %20 +%18 = OpLabel +%27 = OpVariable %28 Function %22 +%29 = OpVariable %30 Function %31 +%32 = OpVariable %33 Function %34 +%23 = OpAccessChain %21 %12 %22 +OpBranch %35 +%35 = OpLabel +OpBranch %36 +%36 = OpLabel +OpLoopMerge %37 %39 None +OpBranch %38 +%38 = OpLabel +%40 = OpLoad %3 %27 +%41 = OpULessThan %9 %40 %6 +OpSelectionMerge %42 None +OpBranchConditional %41 %42 %43 +%43 = OpLabel +OpBranch %37 +%42 = OpLabel +OpBranch %44 +%44 = OpLabel +%46 = OpLoad %3 %27 +%48 = OpAccessChain %47 %23 %46 +%49 = OpAtomicLoad %4 %48 %50 %52 +OpStore %29 %49 +OpStore %32 %24 +OpBranch %53 +%53 = OpLabel +OpLoopMerge %54 %56 None +OpBranch %55 +%55 = OpLabel +%57 = OpLoad %9 %32 +%58 = OpLogicalNot %9 %57 +OpSelectionMerge %59 None +OpBranchConditional %58 %59 %60 +%60 = OpLabel +OpBranch %54 +%59 = OpLabel +OpBranch %61 +%61 = OpLabel +%63 = OpLoad %4 %29 +%64 = OpIAdd %4 %63 %25 +%66 = OpLoad %3 %27 +%67 = OpLoad %4 %29 +%69 = OpAccessChain %47 %23 %66 +%70 = OpAtomicCompareExchange %4 %69 %50 %52 %52 %64 %67 +%71 = OpIEqual %9 %70 %67 +%68 = OpCompositeConstruct %10 %70 %71 +%72 = OpCompositeExtract %4 %68 0 +OpStore %29 %72 +%73 = OpCompositeExtract %9 %68 1 +OpStore %32 %73 +OpBranch %62 +%62 = OpLabel +OpBranch %56 +%56 = OpLabel +OpBranch %53 +%54 = OpLabel +OpBranch %45 +%45 = OpLabel +OpBranch %39 +%39 = OpLabel +%74 = OpLoad %3 %27 +%75 = OpIAdd %3 %74 %26 +OpStore %27 %75 +OpBranch %36 +%37 = OpLabel +OpReturn +OpFunctionEnd +%77 = OpFunction %2 None %20 +%76 = OpLabel +%81 = OpVariable %28 Function %22 +%82 = OpVariable %83 Function %84 +%85 = OpVariable %33 Function %86 +%79 = OpAccessChain %78 %15 %22 +OpBranch %87 +%87 = OpLabel +OpBranch %88 +%88 = OpLabel +OpLoopMerge %89 %91 None +OpBranch %90 +%90 = OpLabel +%92 = OpLoad %3 %81 +%93 = OpULessThan %9 %92 %6 +OpSelectionMerge %94 None +OpBranchConditional %93 %94 %95 +%95 = OpLabel +OpBranch %89 +%94 = OpLabel +OpBranch %96 +%96 = OpLabel +%98 = OpLoad %3 %81 +%100 = OpAccessChain %99 %79 %98 +%101 = OpAtomicLoad %7 %100 %50 %52 +OpStore %82 %101 +OpStore %85 %24 +OpBranch %102 +%102 = OpLabel +OpLoopMerge %103 %105 None +OpBranch %104 +%104 = OpLabel +%106 = OpLoad %9 %85 +%107 = OpLogicalNot %9 %106 +OpSelectionMerge %108 None +OpBranchConditional %107 %108 %109 +%109 = OpLabel +OpBranch %103 +%108 = OpLabel +OpBranch %110 +%110 = OpLabel +%112 = OpLoad %7 %82 +%113 = OpIAdd %7 %112 %80 +%115 = OpLoad %3 %81 +%116 = OpLoad %7 %82 +%118 = OpAccessChain %99 %79 %115 +%119 = OpAtomicCompareExchange %7 %118 %50 %52 %52 %113 %116 +%120 = OpIEqual %9 %119 %116 +%117 = OpCompositeConstruct %11 %119 %120 +%121 = OpCompositeExtract %7 %117 0 +OpStore %82 %121 +%122 = OpCompositeExtract %9 %117 1 +OpStore %85 %122 +OpBranch %111 +%111 = OpLabel +OpBranch %105 +%105 = OpLabel +OpBranch %102 +%103 = OpLabel +OpBranch %97 +%97 = OpLabel +OpBranch %91 +%91 = OpLabel +%123 = OpLoad %3 %81 +%124 = OpIAdd %3 %123 %26 +OpStore %81 %124 +OpBranch %88 +%89 = OpLabel +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/atomicOps-int64-min-max.spvasm b/naga/tests/out/spv/atomicOps-int64-min-max.spvasm new file mode 100644 index 000000000..aa798f546 --- /dev/null +++ b/naga/tests/out/spv/atomicOps-int64-min-max.spvasm @@ -0,0 +1,82 @@ +; SPIR-V +; Version: 1.0 +; Generator: rspirv +; Bound: 52 +OpCapability Shader +OpCapability Int64Atomics +OpCapability Int64 +OpExtension "SPV_KHR_storage_buffer_storage_class" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %22 "cs_main" %19 +OpExecutionMode %22 LocalSize 2 1 1 +OpDecorate %4 ArrayStride 8 +OpMemberDecorate %7 0 Offset 0 +OpMemberDecorate %7 1 Offset 8 +OpDecorate %9 DescriptorSet 0 +OpDecorate %9 Binding 0 +OpDecorate %10 Block +OpMemberDecorate %10 0 Offset 0 +OpDecorate %12 DescriptorSet 0 +OpDecorate %12 Binding 1 +OpDecorate %13 Block +OpMemberDecorate %13 0 Offset 0 +OpDecorate %15 DescriptorSet 0 +OpDecorate %15 Binding 2 +OpDecorate %16 Block +OpMemberDecorate %16 0 Offset 0 +OpDecorate %19 BuiltIn LocalInvocationId +%2 = OpTypeVoid +%3 = OpTypeInt 64 0 +%6 = OpTypeInt 32 0 +%5 = OpConstant %6 2 +%4 = OpTypeArray %3 %5 +%7 = OpTypeStruct %3 %4 +%8 = OpTypeVector %6 3 +%10 = OpTypeStruct %3 +%11 = OpTypePointer StorageBuffer %10 +%9 = OpVariable %11 StorageBuffer +%13 = OpTypeStruct %4 +%14 = OpTypePointer StorageBuffer %13 +%12 = OpVariable %14 StorageBuffer +%16 = OpTypeStruct %7 +%17 = OpTypePointer StorageBuffer %16 +%15 = OpVariable %17 StorageBuffer +%20 = OpTypePointer Input %8 +%19 = OpVariable %20 Input +%23 = OpTypeFunction %2 +%24 = OpTypePointer StorageBuffer %3 +%25 = OpConstant %6 0 +%27 = OpTypePointer StorageBuffer %4 +%29 = OpTypePointer StorageBuffer %7 +%31 = OpConstant %3 1 +%35 = OpTypeInt 32 1 +%34 = OpConstant %35 1 +%36 = OpConstant %6 64 +%38 = OpConstant %6 1 +%44 = OpConstant %6 264 +%22 = OpFunction %2 None %23 +%18 = OpLabel +%21 = OpLoad %8 %19 +%26 = OpAccessChain %24 %9 %25 +%28 = OpAccessChain %27 %12 %25 +%30 = OpAccessChain %29 %15 %25 +OpBranch %32 +%32 = OpLabel +%33 = OpAtomicUMax %3 %26 %34 %36 %31 +%39 = OpAccessChain %24 %28 %38 +%37 = OpAtomicUMax %3 %39 %34 %36 %31 +%41 = OpAccessChain %24 %30 %25 +%40 = OpAtomicUMax %3 %41 %34 %36 %31 +%43 = OpAccessChain %24 %30 %38 %38 +%42 = OpAtomicUMax %3 %43 %34 %36 %31 +OpControlBarrier %5 %5 %44 +%45 = OpAtomicUMin %3 %26 %34 %36 %31 +%47 = OpAccessChain %24 %28 %38 +%46 = OpAtomicUMin %3 %47 %34 %36 %31 +%49 = OpAccessChain %24 %30 %25 +%48 = OpAtomicUMin %3 %49 %34 %36 %31 +%51 = OpAccessChain %24 %30 %38 %38 +%50 = OpAtomicUMin %3 %51 %34 %36 %31 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/spv/atomicOps-int64.spvasm b/naga/tests/out/spv/atomicOps-int64.spvasm new file mode 100644 index 000000000..943107f50 --- /dev/null +++ b/naga/tests/out/spv/atomicOps-int64.spvasm @@ -0,0 +1,246 @@ +; SPIR-V +; Version: 1.0 +; Generator: rspirv +; Bound: 193 +OpCapability Shader +OpCapability Int64Atomics +OpCapability Int64 +OpExtension "SPV_KHR_storage_buffer_storage_class" +%1 = OpExtInstImport "GLSL.std.450" +OpMemoryModel Logical GLSL450 +OpEntryPoint GLCompute %29 "cs_main" %26 +OpExecutionMode %29 LocalSize 2 1 1 +OpDecorate %5 ArrayStride 8 +OpMemberDecorate %8 0 Offset 0 +OpMemberDecorate %8 1 Offset 8 +OpDecorate %10 DescriptorSet 0 +OpDecorate %10 Binding 0 +OpDecorate %11 Block +OpMemberDecorate %11 0 Offset 0 +OpDecorate %13 DescriptorSet 0 +OpDecorate %13 Binding 1 +OpDecorate %14 Block +OpMemberDecorate %14 0 Offset 0 +OpDecorate %16 DescriptorSet 0 +OpDecorate %16 Binding 2 +OpDecorate %17 Block +OpMemberDecorate %17 0 Offset 0 +OpDecorate %26 BuiltIn LocalInvocationId +%2 = OpTypeVoid +%3 = OpTypeInt 64 0 +%4 = OpTypeInt 64 1 +%7 = OpTypeInt 32 0 +%6 = OpConstant %7 2 +%5 = OpTypeArray %4 %6 +%8 = OpTypeStruct %3 %5 +%9 = OpTypeVector %7 3 +%11 = OpTypeStruct %3 +%12 = OpTypePointer StorageBuffer %11 +%10 = OpVariable %12 StorageBuffer +%14 = OpTypeStruct %5 +%15 = OpTypePointer StorageBuffer %14 +%13 = OpVariable %15 StorageBuffer +%17 = OpTypeStruct %8 +%18 = OpTypePointer StorageBuffer %17 +%16 = OpVariable %18 StorageBuffer +%20 = OpTypePointer Workgroup %3 +%19 = OpVariable %20 Workgroup +%22 = OpTypePointer Workgroup %5 +%21 = OpVariable %22 Workgroup +%24 = OpTypePointer Workgroup %8 +%23 = OpVariable %24 Workgroup +%27 = OpTypePointer Input %9 +%26 = OpVariable %27 Input +%30 = OpTypeFunction %2 +%31 = OpTypePointer StorageBuffer %3 +%32 = OpConstant %7 0 +%34 = OpTypePointer StorageBuffer %5 +%36 = OpTypePointer StorageBuffer %8 +%38 = OpConstant %3 1 +%39 = OpConstant %4 1 +%41 = OpConstantNull %3 +%42 = OpConstantNull %5 +%43 = OpConstantNull %8 +%44 = OpConstantNull %9 +%46 = OpTypeBool +%45 = OpTypeVector %46 3 +%51 = OpConstant %7 264 +%54 = OpTypeInt 32 1 +%53 = OpConstant %54 1 +%55 = OpConstant %7 64 +%56 = OpTypePointer StorageBuffer %4 +%57 = OpConstant %7 1 +%61 = OpConstant %54 2 +%62 = OpConstant %7 256 +%63 = OpTypePointer Workgroup %4 +%29 = OpFunction %2 None %30 +%25 = OpLabel +%28 = OpLoad %9 %26 +%33 = OpAccessChain %31 %10 %32 +%35 = OpAccessChain %34 %13 %32 +%37 = OpAccessChain %36 %16 %32 +OpBranch %40 +%40 = OpLabel +%47 = OpIEqual %45 %28 %44 +%48 = OpAll %46 %47 +OpSelectionMerge %49 None +OpBranchConditional %48 %50 %49 +%50 = OpLabel +OpStore %19 %41 +OpStore %21 %42 +OpStore %23 %43 +OpBranch %49 +%49 = OpLabel +OpControlBarrier %6 %6 %51 +OpBranch %52 +%52 = OpLabel +OpAtomicStore %33 %53 %55 %38 +%58 = OpAccessChain %56 %35 %57 +OpAtomicStore %58 %53 %55 %39 +%59 = OpAccessChain %31 %37 %32 +OpAtomicStore %59 %53 %55 %38 +%60 = OpAccessChain %56 %37 %57 %57 +OpAtomicStore %60 %53 %55 %39 +OpAtomicStore %19 %61 %62 %38 +%64 = OpAccessChain %63 %21 %57 +OpAtomicStore %64 %61 %62 %39 +%65 = OpAccessChain %20 %23 %32 +OpAtomicStore %65 %61 %62 %38 +%66 = OpAccessChain %63 %23 %57 %57 +OpAtomicStore %66 %61 %62 %39 +OpControlBarrier %6 %6 %51 +%67 = OpAtomicLoad %3 %33 %53 %55 +%68 = OpAccessChain %56 %35 %57 +%69 = OpAtomicLoad %4 %68 %53 %55 +%70 = OpAccessChain %31 %37 %32 +%71 = OpAtomicLoad %3 %70 %53 %55 +%72 = OpAccessChain %56 %37 %57 %57 +%73 = OpAtomicLoad %4 %72 %53 %55 +%74 = OpAtomicLoad %3 %19 %61 %62 +%75 = OpAccessChain %63 %21 %57 +%76 = OpAtomicLoad %4 %75 %61 %62 +%77 = OpAccessChain %20 %23 %32 +%78 = OpAtomicLoad %3 %77 %61 %62 +%79 = OpAccessChain %63 %23 %57 %57 +%80 = OpAtomicLoad %4 %79 %61 %62 +OpControlBarrier %6 %6 %51 +%81 = OpAtomicIAdd %3 %33 %53 %55 %38 +%83 = OpAccessChain %56 %35 %57 +%82 = OpAtomicIAdd %4 %83 %53 %55 %39 +%85 = OpAccessChain %31 %37 %32 +%84 = OpAtomicIAdd %3 %85 %53 %55 %38 +%87 = OpAccessChain %56 %37 %57 %57 +%86 = OpAtomicIAdd %4 %87 %53 %55 %39 +%88 = OpAtomicIAdd %3 %19 %61 %62 %38 +%90 = OpAccessChain %63 %21 %57 +%89 = OpAtomicIAdd %4 %90 %61 %62 %39 +%92 = OpAccessChain %20 %23 %32 +%91 = OpAtomicIAdd %3 %92 %61 %62 %38 +%94 = OpAccessChain %63 %23 %57 %57 +%93 = OpAtomicIAdd %4 %94 %61 %62 %39 +OpControlBarrier %6 %6 %51 +%95 = OpAtomicISub %3 %33 %53 %55 %38 +%97 = OpAccessChain %56 %35 %57 +%96 = OpAtomicISub %4 %97 %53 %55 %39 +%99 = OpAccessChain %31 %37 %32 +%98 = OpAtomicISub %3 %99 %53 %55 %38 +%101 = OpAccessChain %56 %37 %57 %57 +%100 = OpAtomicISub %4 %101 %53 %55 %39 +%102 = OpAtomicISub %3 %19 %61 %62 %38 +%104 = OpAccessChain %63 %21 %57 +%103 = OpAtomicISub %4 %104 %61 %62 %39 +%106 = OpAccessChain %20 %23 %32 +%105 = OpAtomicISub %3 %106 %61 %62 %38 +%108 = OpAccessChain %63 %23 %57 %57 +%107 = OpAtomicISub %4 %108 %61 %62 %39 +OpControlBarrier %6 %6 %51 +%109 = OpAtomicUMax %3 %33 %53 %55 %38 +%111 = OpAccessChain %56 %35 %57 +%110 = OpAtomicSMax %4 %111 %53 %55 %39 +%113 = OpAccessChain %31 %37 %32 +%112 = OpAtomicUMax %3 %113 %53 %55 %38 +%115 = OpAccessChain %56 %37 %57 %57 +%114 = OpAtomicSMax %4 %115 %53 %55 %39 +%116 = OpAtomicUMax %3 %19 %61 %62 %38 +%118 = OpAccessChain %63 %21 %57 +%117 = OpAtomicSMax %4 %118 %61 %62 %39 +%120 = OpAccessChain %20 %23 %32 +%119 = OpAtomicUMax %3 %120 %61 %62 %38 +%122 = OpAccessChain %63 %23 %57 %57 +%121 = OpAtomicSMax %4 %122 %61 %62 %39 +OpControlBarrier %6 %6 %51 +%123 = OpAtomicUMin %3 %33 %53 %55 %38 +%125 = OpAccessChain %56 %35 %57 +%124 = OpAtomicSMin %4 %125 %53 %55 %39 +%127 = OpAccessChain %31 %37 %32 +%126 = OpAtomicUMin %3 %127 %53 %55 %38 +%129 = OpAccessChain %56 %37 %57 %57 +%128 = OpAtomicSMin %4 %129 %53 %55 %39 +%130 = OpAtomicUMin %3 %19 %61 %62 %38 +%132 = OpAccessChain %63 %21 %57 +%131 = OpAtomicSMin %4 %132 %61 %62 %39 +%134 = OpAccessChain %20 %23 %32 +%133 = OpAtomicUMin %3 %134 %61 %62 %38 +%136 = OpAccessChain %63 %23 %57 %57 +%135 = OpAtomicSMin %4 %136 %61 %62 %39 +OpControlBarrier %6 %6 %51 +%137 = OpAtomicAnd %3 %33 %53 %55 %38 +%139 = OpAccessChain %56 %35 %57 +%138 = OpAtomicAnd %4 %139 %53 %55 %39 +%141 = OpAccessChain %31 %37 %32 +%140 = OpAtomicAnd %3 %141 %53 %55 %38 +%143 = OpAccessChain %56 %37 %57 %57 +%142 = OpAtomicAnd %4 %143 %53 %55 %39 +%144 = OpAtomicAnd %3 %19 %61 %62 %38 +%146 = OpAccessChain %63 %21 %57 +%145 = OpAtomicAnd %4 %146 %61 %62 %39 +%148 = OpAccessChain %20 %23 %32 +%147 = OpAtomicAnd %3 %148 %61 %62 %38 +%150 = OpAccessChain %63 %23 %57 %57 +%149 = OpAtomicAnd %4 %150 %61 %62 %39 +OpControlBarrier %6 %6 %51 +%151 = OpAtomicOr %3 %33 %53 %55 %38 +%153 = OpAccessChain %56 %35 %57 +%152 = OpAtomicOr %4 %153 %53 %55 %39 +%155 = OpAccessChain %31 %37 %32 +%154 = OpAtomicOr %3 %155 %53 %55 %38 +%157 = OpAccessChain %56 %37 %57 %57 +%156 = OpAtomicOr %4 %157 %53 %55 %39 +%158 = OpAtomicOr %3 %19 %61 %62 %38 +%160 = OpAccessChain %63 %21 %57 +%159 = OpAtomicOr %4 %160 %61 %62 %39 +%162 = OpAccessChain %20 %23 %32 +%161 = OpAtomicOr %3 %162 %61 %62 %38 +%164 = OpAccessChain %63 %23 %57 %57 +%163 = OpAtomicOr %4 %164 %61 %62 %39 +OpControlBarrier %6 %6 %51 +%165 = OpAtomicXor %3 %33 %53 %55 %38 +%167 = OpAccessChain %56 %35 %57 +%166 = OpAtomicXor %4 %167 %53 %55 %39 +%169 = OpAccessChain %31 %37 %32 +%168 = OpAtomicXor %3 %169 %53 %55 %38 +%171 = OpAccessChain %56 %37 %57 %57 +%170 = OpAtomicXor %4 %171 %53 %55 %39 +%172 = OpAtomicXor %3 %19 %61 %62 %38 +%174 = OpAccessChain %63 %21 %57 +%173 = OpAtomicXor %4 %174 %61 %62 %39 +%176 = OpAccessChain %20 %23 %32 +%175 = OpAtomicXor %3 %176 %61 %62 %38 +%178 = OpAccessChain %63 %23 %57 %57 +%177 = OpAtomicXor %4 %178 %61 %62 %39 +%179 = OpAtomicExchange %3 %33 %53 %55 %38 +%181 = OpAccessChain %56 %35 %57 +%180 = OpAtomicExchange %4 %181 %53 %55 %39 +%183 = OpAccessChain %31 %37 %32 +%182 = OpAtomicExchange %3 %183 %53 %55 %38 +%185 = OpAccessChain %56 %37 %57 %57 +%184 = OpAtomicExchange %4 %185 %53 %55 %39 +%186 = OpAtomicExchange %3 %19 %61 %62 %38 +%188 = OpAccessChain %63 %21 %57 +%187 = OpAtomicExchange %4 %188 %61 %62 %39 +%190 = OpAccessChain %20 %23 %32 +%189 = OpAtomicExchange %3 %190 %61 %62 %38 +%192 = OpAccessChain %63 %23 %57 %57 +%191 = OpAtomicExchange %4 %192 %61 %62 %39 +OpReturn +OpFunctionEnd \ No newline at end of file diff --git a/naga/tests/out/wgsl/atomicCompareExchange-int64.wgsl b/naga/tests/out/wgsl/atomicCompareExchange-int64.wgsl new file mode 100644 index 000000000..07cce9a89 --- /dev/null +++ b/naga/tests/out/wgsl/atomicCompareExchange-int64.wgsl @@ -0,0 +1,90 @@ +const SIZE: u32 = 128u; + +@group(0) @binding(0) +var arr_i64_: array, 128>; +@group(0) @binding(1) +var arr_u64_: array, 128>; + +@compute @workgroup_size(1, 1, 1) +fn test_atomic_compare_exchange_i64_() { + var i: u32 = 0u; + var old: i64; + var exchanged: bool; + + loop { + let _e2 = i; + if (_e2 < SIZE) { + } else { + break; + } + { + let _e6 = i; + let _e8 = atomicLoad((&arr_i64_[_e6])); + old = _e8; + exchanged = false; + loop { + let _e12 = exchanged; + if !(_e12) { + } else { + break; + } + { + let _e14 = old; + let new_ = bitcast((_e14 + 10li)); + let _e19 = i; + let _e21 = old; + let _e22 = atomicCompareExchangeWeak((&arr_i64_[_e19]), _e21, new_); + old = _e22.old_value; + exchanged = _e22.exchanged; + } + } + } + continuing { + let _e26 = i; + i = (_e26 + 1u); + } + } + return; +} + +@compute @workgroup_size(1, 1, 1) +fn test_atomic_compare_exchange_u64_() { + var i_1: u32 = 0u; + var old_1: u64; + var exchanged_1: bool; + + loop { + let _e2 = i_1; + if (_e2 < SIZE) { + } else { + break; + } + { + let _e6 = i_1; + let _e8 = atomicLoad((&arr_u64_[_e6])); + old_1 = _e8; + exchanged_1 = false; + loop { + let _e12 = exchanged_1; + if !(_e12) { + } else { + break; + } + { + let _e14 = old_1; + let new_1 = bitcast((_e14 + 10lu)); + let _e19 = i_1; + let _e21 = old_1; + let _e22 = atomicCompareExchangeWeak((&arr_u64_[_e19]), _e21, new_1); + old_1 = _e22.old_value; + exchanged_1 = _e22.exchanged; + } + } + } + continuing { + let _e26 = i_1; + i_1 = (_e26 + 1u); + } + } + return; +} diff --git a/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl b/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl new file mode 100644 index 000000000..37bbb680f --- /dev/null +++ b/naga/tests/out/wgsl/atomicOps-int64-min-max.wgsl @@ -0,0 +1,25 @@ +struct Struct { + atomic_scalar: atomic, + atomic_arr: array, 2>, +} + +@group(0) @binding(0) +var storage_atomic_scalar: atomic; +@group(0) @binding(1) +var storage_atomic_arr: array, 2>; +@group(0) @binding(2) +var storage_struct: Struct; + +@compute @workgroup_size(2, 1, 1) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + atomicMax((&storage_atomic_scalar), 1lu); + atomicMax((&storage_atomic_arr[1]), 1lu); + atomicMax((&storage_struct.atomic_scalar), 1lu); + atomicMax((&storage_struct.atomic_arr[1]), 1lu); + workgroupBarrier(); + atomicMin((&storage_atomic_scalar), 1lu); + atomicMin((&storage_atomic_arr[1]), 1lu); + atomicMin((&storage_struct.atomic_scalar), 1lu); + atomicMin((&storage_struct.atomic_arr[1]), 1lu); + return; +} diff --git a/naga/tests/out/wgsl/atomicOps-int64.wgsl b/naga/tests/out/wgsl/atomicOps-int64.wgsl new file mode 100644 index 000000000..364108c64 --- /dev/null +++ b/naga/tests/out/wgsl/atomicOps-int64.wgsl @@ -0,0 +1,107 @@ +struct Struct { + atomic_scalar: atomic, + atomic_arr: array, 2>, +} + +@group(0) @binding(0) +var storage_atomic_scalar: atomic; +@group(0) @binding(1) +var storage_atomic_arr: array, 2>; +@group(0) @binding(2) +var storage_struct: Struct; +var workgroup_atomic_scalar: atomic; +var workgroup_atomic_arr: array, 2>; +var workgroup_struct: Struct; + +@compute @workgroup_size(2, 1, 1) +fn cs_main(@builtin(local_invocation_id) id: vec3) { + atomicStore((&storage_atomic_scalar), 1lu); + atomicStore((&storage_atomic_arr[1]), 1li); + atomicStore((&storage_struct.atomic_scalar), 1lu); + atomicStore((&storage_struct.atomic_arr[1]), 1li); + atomicStore((&workgroup_atomic_scalar), 1lu); + atomicStore((&workgroup_atomic_arr[1]), 1li); + atomicStore((&workgroup_struct.atomic_scalar), 1lu); + atomicStore((&workgroup_struct.atomic_arr[1]), 1li); + workgroupBarrier(); + let l0_ = atomicLoad((&storage_atomic_scalar)); + let l1_ = atomicLoad((&storage_atomic_arr[1])); + let l2_ = atomicLoad((&storage_struct.atomic_scalar)); + let l3_ = atomicLoad((&storage_struct.atomic_arr[1])); + let l4_ = atomicLoad((&workgroup_atomic_scalar)); + let l5_ = atomicLoad((&workgroup_atomic_arr[1])); + let l6_ = atomicLoad((&workgroup_struct.atomic_scalar)); + let l7_ = atomicLoad((&workgroup_struct.atomic_arr[1])); + workgroupBarrier(); + let _e51 = atomicAdd((&storage_atomic_scalar), 1lu); + let _e55 = atomicAdd((&storage_atomic_arr[1]), 1li); + let _e59 = atomicAdd((&storage_struct.atomic_scalar), 1lu); + let _e64 = atomicAdd((&storage_struct.atomic_arr[1]), 1li); + let _e67 = atomicAdd((&workgroup_atomic_scalar), 1lu); + let _e71 = atomicAdd((&workgroup_atomic_arr[1]), 1li); + let _e75 = atomicAdd((&workgroup_struct.atomic_scalar), 1lu); + let _e80 = atomicAdd((&workgroup_struct.atomic_arr[1]), 1li); + workgroupBarrier(); + let _e83 = atomicSub((&storage_atomic_scalar), 1lu); + let _e87 = atomicSub((&storage_atomic_arr[1]), 1li); + let _e91 = atomicSub((&storage_struct.atomic_scalar), 1lu); + let _e96 = atomicSub((&storage_struct.atomic_arr[1]), 1li); + let _e99 = atomicSub((&workgroup_atomic_scalar), 1lu); + let _e103 = atomicSub((&workgroup_atomic_arr[1]), 1li); + let _e107 = atomicSub((&workgroup_struct.atomic_scalar), 1lu); + let _e112 = atomicSub((&workgroup_struct.atomic_arr[1]), 1li); + workgroupBarrier(); + atomicMax((&storage_atomic_scalar), 1lu); + atomicMax((&storage_atomic_arr[1]), 1li); + atomicMax((&storage_struct.atomic_scalar), 1lu); + atomicMax((&storage_struct.atomic_arr[1]), 1li); + atomicMax((&workgroup_atomic_scalar), 1lu); + atomicMax((&workgroup_atomic_arr[1]), 1li); + atomicMax((&workgroup_struct.atomic_scalar), 1lu); + atomicMax((&workgroup_struct.atomic_arr[1]), 1li); + workgroupBarrier(); + atomicMin((&storage_atomic_scalar), 1lu); + atomicMin((&storage_atomic_arr[1]), 1li); + atomicMin((&storage_struct.atomic_scalar), 1lu); + atomicMin((&storage_struct.atomic_arr[1]), 1li); + atomicMin((&workgroup_atomic_scalar), 1lu); + atomicMin((&workgroup_atomic_arr[1]), 1li); + atomicMin((&workgroup_struct.atomic_scalar), 1lu); + atomicMin((&workgroup_struct.atomic_arr[1]), 1li); + workgroupBarrier(); + let _e163 = atomicAnd((&storage_atomic_scalar), 1lu); + let _e167 = atomicAnd((&storage_atomic_arr[1]), 1li); + let _e171 = atomicAnd((&storage_struct.atomic_scalar), 1lu); + let _e176 = atomicAnd((&storage_struct.atomic_arr[1]), 1li); + let _e179 = atomicAnd((&workgroup_atomic_scalar), 1lu); + let _e183 = atomicAnd((&workgroup_atomic_arr[1]), 1li); + let _e187 = atomicAnd((&workgroup_struct.atomic_scalar), 1lu); + let _e192 = atomicAnd((&workgroup_struct.atomic_arr[1]), 1li); + workgroupBarrier(); + let _e195 = atomicOr((&storage_atomic_scalar), 1lu); + let _e199 = atomicOr((&storage_atomic_arr[1]), 1li); + let _e203 = atomicOr((&storage_struct.atomic_scalar), 1lu); + let _e208 = atomicOr((&storage_struct.atomic_arr[1]), 1li); + let _e211 = atomicOr((&workgroup_atomic_scalar), 1lu); + let _e215 = atomicOr((&workgroup_atomic_arr[1]), 1li); + let _e219 = atomicOr((&workgroup_struct.atomic_scalar), 1lu); + let _e224 = atomicOr((&workgroup_struct.atomic_arr[1]), 1li); + workgroupBarrier(); + let _e227 = atomicXor((&storage_atomic_scalar), 1lu); + let _e231 = atomicXor((&storage_atomic_arr[1]), 1li); + let _e235 = atomicXor((&storage_struct.atomic_scalar), 1lu); + let _e240 = atomicXor((&storage_struct.atomic_arr[1]), 1li); + let _e243 = atomicXor((&workgroup_atomic_scalar), 1lu); + let _e247 = atomicXor((&workgroup_atomic_arr[1]), 1li); + let _e251 = atomicXor((&workgroup_struct.atomic_scalar), 1lu); + let _e256 = atomicXor((&workgroup_struct.atomic_arr[1]), 1li); + let _e259 = atomicExchange((&storage_atomic_scalar), 1lu); + let _e263 = atomicExchange((&storage_atomic_arr[1]), 1li); + let _e267 = atomicExchange((&storage_struct.atomic_scalar), 1lu); + let _e272 = atomicExchange((&storage_struct.atomic_arr[1]), 1li); + let _e275 = atomicExchange((&workgroup_atomic_scalar), 1lu); + let _e279 = atomicExchange((&workgroup_atomic_arr[1]), 1li); + let _e283 = atomicExchange((&workgroup_struct.atomic_scalar), 1lu); + let _e288 = atomicExchange((&workgroup_struct.atomic_arr[1]), 1li); + return; +} diff --git a/naga/tests/snapshots.rs b/naga/tests/snapshots.rs index 826337515..be8eb6a17 100644 --- a/naga/tests/snapshots.rs +++ b/naga/tests/snapshots.rs @@ -759,6 +759,18 @@ fn convert_wgsl() { "padding", Targets::SPIRV | Targets::METAL | Targets::GLSL | Targets::HLSL | Targets::WGSL, ), + ( + "atomicOps-int64", + Targets::SPIRV | Targets::HLSL | Targets::WGSL, + ), + ( + "atomicOps-int64-min-max", + Targets::SPIRV | Targets::METAL | Targets::HLSL | Targets::WGSL, + ), + ( + "atomicCompareExchange-int64", + Targets::SPIRV | Targets::WGSL, + ), ("pointers", Targets::SPIRV | Targets::WGSL), ( "control-flow", diff --git a/naga/tests/validation.rs b/naga/tests/validation.rs index 7491fd262..f64b40884 100644 --- a/naga/tests/validation.rs +++ b/naga/tests/validation.rs @@ -75,7 +75,7 @@ fn populate_atomic_result() { pointer: ex_global, fun: naga::AtomicFunction::Add, value: ex_42, - result: ex_result, + result: Some(ex_result), }, span, ); diff --git a/tests/tests/shader/mod.rs b/tests/tests/shader/mod.rs index 2716caabd..f5c2d4c96 100644 --- a/tests/tests/shader/mod.rs +++ b/tests/tests/shader/mod.rs @@ -155,6 +155,12 @@ impl ShaderTest { self } + fn output_type(mut self, output_type: String) -> Self { + self.output_type = output_type; + + self + } + /// Add another set of possible outputs. If any of the given /// output values are seen it's considered a success (i.e. this is OR, not AND). /// diff --git a/tests/tests/shader/numeric_builtins.rs b/tests/tests/shader/numeric_builtins.rs index 999d9dfb0..f6cb0bb39 100644 --- a/tests/tests/shader/numeric_builtins.rs +++ b/tests/tests/shader/numeric_builtins.rs @@ -52,6 +52,105 @@ static NUMERIC_BUILTINS: GpuTestConfiguration = GpuTestConfiguration::new() ) }); +fn create_int64_atomic_min_max_test() -> Vec { + let mut tests = Vec::new(); + + let test = ShaderTest::new( + "atomicMax".into(), + "value: u64".into(), + "atomicMin(&output, 0lu); atomicMax(&output, 2lu);".into(), + &[0], + &[2], + ) + .output_type("atomic".into()); + + tests.push(test); + + let test = ShaderTest::new( + "atomicMin".into(), + "value: u64".into(), + "atomicMax(&output, 100lu); atomicMin(&output, 4lu);".into(), + &[0], + &[4], + ) + .output_type("atomic".into()); + + tests.push(test); + + tests +} + +#[gpu_test] +static INT64_ATOMIC_MIN_MAX: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(wgt::Features::SHADER_INT64 | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Storage, + create_int64_atomic_min_max_test(), + ) + }); + +fn create_int64_atomic_all_ops_test() -> Vec { + let mut tests = Vec::new(); + + let test = ShaderTest::new( + "atomicAdd".into(), + "value: u64".into(), + "atomicStore(&output, 0lu); atomicAdd(&output, 1lu); atomicAdd(&output, 1lu);".into(), + &[0], + &[2], + ) + .output_type("atomic".into()); + + tests.push(test); + + let test = ShaderTest::new( + "atomicAnd".into(), + "value: u64".into(), + "atomicStore(&output, 31lu); atomicAnd(&output, 30lu); atomicAnd(&output, 3lu);".into(), + &[0], + &[2], + ) + .output_type("atomic".into()); + + tests.push(test); + + let test = ShaderTest::new( + "atomicOr".into(), + "value: u64".into(), + "atomicStore(&output, 0lu); atomicOr(&output, 3lu); atomicOr(&output, 6lu);".into(), + &[0], + &[7], + ) + .output_type("atomic".into()); + + tests.push(test); + + tests +} + +#[gpu_test] +static INT64_ATOMIC_ALL_OPS: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .features(wgt::Features::SHADER_INT64 | wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS) + .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS) + .limits(Limits::downlevel_defaults()), + ) + .run_async(|ctx| { + shader_input_output_test( + ctx, + InputStorageType::Storage, + create_int64_atomic_all_ops_test(), + ) + }); + // See https://github.com/gfx-rs/wgpu/issues/5276 /* fn create_int64_polyfill_test() -> Vec { diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index a2f0bf31d..e52f611f8 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -491,6 +491,16 @@ pub fn create_validator( Caps::SHADER_INT64, features.contains(wgt::Features::SHADER_INT64), ); + caps.set( + Caps::SHADER_INT64_ATOMIC_MIN_MAX, + features.intersects( + wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX | wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS, + ), + ); + caps.set( + Caps::SHADER_INT64_ATOMIC_ALL_OPS, + features.contains(wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS), + ); caps.set( Caps::MULTISAMPLED_SHADING, downlevel.contains(wgt::DownlevelFlags::MULTISAMPLED_SHADING), diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 650330061..a81f15fc3 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -354,6 +354,25 @@ impl super::Adapter { && features1.WaveOps != 0, ); + let atomic_int64_on_typed_resource_supported = { + let mut features9: crate::dx12::types::D3D12_FEATURE_DATA_D3D12_OPTIONS9 = + unsafe { mem::zeroed() }; + let hr = unsafe { + device.CheckFeatureSupport( + 37, // D3D12_FEATURE_D3D12_OPTIONS9 + &mut features9 as *mut _ as *mut _, + mem::size_of::() as _, + ) + }; + hr == 0 + && features9.AtomicInt64OnGroupSharedSupported != 0 + && features9.AtomicInt64OnTypedResourceSupported != 0 + }; + features.set( + wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX, + atomic_int64_on_typed_resource_supported, + ); + // float32-filterable should always be available on d3d12 features.set(wgt::Features::FLOAT32_FILTERABLE, true); diff --git a/wgpu-hal/src/dx12/types.rs b/wgpu-hal/src/dx12/types.rs index 17b608b84..57a0d94a8 100644 --- a/wgpu-hal/src/dx12/types.rs +++ b/wgpu-hal/src/dx12/types.rs @@ -42,6 +42,24 @@ winapi::STRUCT! { } } +winapi::ENUM! { + enum D3D12_WAVE_MMA_TIER { + D3D12_WAVE_MMA_TIER_NOT_SUPPORTED = 0, + D3D12_WAVE_MMA_TIER_1_0 = 10, + } +} + +winapi::STRUCT! { + struct D3D12_FEATURE_DATA_D3D12_OPTIONS9 { + MeshShaderPipelineStatsSupported: winapi::shared::minwindef::BOOL, + MeshShaderSupportsFullRangeRenderTargetArrayIndex: winapi::shared::minwindef::BOOL, + AtomicInt64OnTypedResourceSupported: winapi::shared::minwindef::BOOL, + AtomicInt64OnGroupSharedSupported: winapi::shared::minwindef::BOOL, + DerivativesInMeshAndAmplificationShadersSupported: winapi::shared::minwindef::BOOL, + WaveMMATier: D3D12_WAVE_MMA_TIER, + } +} + winapi::ENUM! { enum D3D_SHADER_MODEL { D3D_SHADER_MODEL_NONE = 0, diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 0ffe37f5e..33de70f71 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -821,6 +821,11 @@ impl super::PrivateCapabilities { int64: family_check && (device.supports_family(MTLGPUFamily::Apple3) || device.supports_family(MTLGPUFamily::Metal3)), + // https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf#page=6 + int64_atomics: family_check + && ((device.supports_family(MTLGPUFamily::Apple8) + && device.supports_family(MTLGPUFamily::Mac2)) + || device.supports_family(MTLGPUFamily::Apple9)), } } @@ -896,6 +901,10 @@ impl super::PrivateCapabilities { F::SHADER_INT64, self.int64 && self.msl_version >= MTLLanguageVersion::V2_3, ); + features.set( + F::SHADER_INT64_ATOMIC_MIN_MAX, + self.int64_atomics && self.msl_version >= MTLLanguageVersion::V2_4, + ); features.set( F::ADDRESS_MODE_CLAMP_TO_BORDER, diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 1867d7de4..b944bb6e9 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -272,6 +272,7 @@ struct PrivateCapabilities { timestamp_query_support: TimestampQuerySupport, supports_simd_scoped_operations: bool, int64: bool, + int64_atomics: bool, } #[derive(Clone, Debug)] diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index fe2a6f970..d3c0d4246 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -106,6 +106,9 @@ pub struct PhysicalDeviceFeatures { zero_initialize_workgroup_memory: Option>, + /// Features provided by `VK_KHR_shader_atomic_int64`, promoted to Vulkan 1.2. + shader_atomic_int64: Option>, + /// Features provided by `VK_EXT_subgroup_size_control`, promoted to Vulkan 1.3. subgroup_size_control: Option>, } @@ -151,6 +154,9 @@ impl PhysicalDeviceFeatures { if let Some(ref mut feature) = self.ray_query { info = info.push_next(feature); } + if let Some(ref mut feature) = self.shader_atomic_int64 { + info = info.push_next(feature); + } if let Some(ref mut feature) = self.subgroup_size_control { info = info.push_next(feature); } @@ -419,6 +425,19 @@ impl PhysicalDeviceFeatures { } else { None }, + shader_atomic_int64: if device_api_version >= vk::API_VERSION_1_2 + || enabled_extensions.contains(&khr::shader_atomic_int64::NAME) + { + Some( + vk::PhysicalDeviceShaderAtomicInt64Features::default() + .shader_buffer_int64_atomics(requested_features.intersects( + wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS + | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX, + )), + ) + } else { + None + }, subgroup_size_control: if device_api_version >= vk::API_VERSION_1_3 || enabled_extensions.contains(&ext::subgroup_size_control::NAME) { @@ -559,6 +578,14 @@ impl PhysicalDeviceFeatures { features.set(F::SHADER_INT64, self.core.shader_int64 != 0); features.set(F::SHADER_I16, self.core.shader_int16 != 0); + if let Some(ref shader_atomic_int64) = self.shader_atomic_int64 { + features.set( + F::SHADER_INT64_ATOMIC_ALL_OPS | F::SHADER_INT64_ATOMIC_MIN_MAX, + shader_atomic_int64.shader_buffer_int64_atomics != 0 + && shader_atomic_int64.shader_shared_int64_atomics != 0, + ); + } + //if caps.supports_extension(khr::sampler_mirror_clamp_to_edge::NAME) { //if caps.supports_extension(ext::sampler_filter_minmax::NAME) { features.set( @@ -964,6 +991,13 @@ impl PhysicalDeviceProperties { extensions.push(ext::texture_compression_astc_hdr::NAME); } + // Require `VK_KHR_shader_atomic_int64` if the associated feature was requested + if requested_features.intersects( + wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX, + ) { + extensions.push(khr::shader_atomic_int64::NAME); + } + extensions } @@ -1681,6 +1715,13 @@ impl super::Adapter { capabilities.push(spv::Capability::Int64); } + if features.intersects( + wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS + | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX, + ) { + capabilities.push(spv::Capability::Int64Atomics); + } + let mut flags = spv::WriterFlags::empty(); flags.set( spv::WriterFlags::DEBUG, diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 943d8eb75..7fd29e0de 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -923,6 +923,23 @@ bitflags::bitflags! { /// - DX12 /// - Metal const PIPELINE_CACHE = 1 << 59; + /// Allows shaders to use i64 and u64 atomic min and max. + /// + /// Supported platforms: + /// - Vulkan (with VK_KHR_shader_atomic_int64) + /// - DX12 (with SM 6.6+) + /// - Metal (with MSL 2.4+) + /// + /// This is a native only feature. + const SHADER_INT64_ATOMIC_MIN_MAX = 1 << 60; + /// Allows shaders to use all i64 and u64 atomic operations. + /// + /// Supported platforms: + /// - Vulkan (with VK_KHR_shader_atomic_int64) + /// - DX12 (with SM 6.6+) + /// + /// This is a native only feature. + const SHADER_INT64_ATOMIC_ALL_OPS = 1 << 61; } }