Add shader I64 and U64 support (#5154)

Co-authored-by: Connor Fitzgerald <connorwadefitzgerald@gmail.com>
2024-11-22 06:44:14 +00:00 · 2024-03-12 04:34:06 -07:00 · 2024-03-12 04:34:06 -07:00 · 4e6f873da5
commit 4e6f873da5
parent 3107f5e148
40 changed files with 2183 additions and 164 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -108,6 +108,7 @@ By @cwfitzgerald in [#5325](https://github.com/gfx-rs/wgpu/pull/5325).
  - As with other instance flags, this flag can be changed in calls to `InstanceFlags::with_env` with the new `WGPU_GPU_BASED_VALIDATION` environment variable.

  By @ErichDonGubler in [#5146](https://github.com/gfx-rs/wgpu/pull/5146), [#5046](https://github.com/gfx-rs/wgpu/pull/5046).
+- Signed and unsigned 64 bit integer support in shaders. By @rodolphito and @cwfitzgerald in [#5154](https://github.com/gfx-rs/wgpu/pull/5154)
 - `wgpu::Instance` can now report which `wgpu::Backends` are available based on the build configuration. By @wumpf [#5167](https://github.com/gfx-rs/wgpu/pull/5167)
 ```diff
 -wgpu::Instance::any_backend_feature_enabled()
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@ -2456,6 +2456,9 @@ impl<'a, W: Write> Writer<'a, W> {
                    crate::Literal::I64(_) => {
                        return Err(Error::Custom("GLSL has no 64-bit integer type".into()));
                    }
+                    crate::Literal::U64(_) => {
+                        return Err(Error::Custom("GLSL has no 64-bit integer type".into()));
+                    }
                    crate::Literal::AbstractInt(_) | crate::Literal::AbstractFloat(_) => {
                        return Err(Error::Custom(
                            "Abstract types should not appear in IR presented to backends".into(),
--- a/naga/src/back/hlsl/conv.rs
+++ b/naga/src/back/hlsl/conv.rs
@ -21,8 +21,16 @@ impl crate::Scalar {
    /// <https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-scalar>
    pub(super) const fn to_hlsl_str(self) -> Result<&'static str, Error> {
        match self.kind {
-            crate::ScalarKind::Sint => Ok("int"),
-            crate::ScalarKind::Uint => Ok("uint"),
+            crate::ScalarKind::Sint => match self.width {
+                4 => Ok("int"),
+                8 => Ok("int64_t"),
+                _ => Err(Error::UnsupportedScalar(self)),
+            },
+            crate::ScalarKind::Uint => match self.width {
+                4 => Ok("uint"),
+                8 => Ok("uint64_t"),
+                _ => Err(Error::UnsupportedScalar(self)),
+            },
            crate::ScalarKind::Float => match self.width {
                2 => Ok("half"),
                4 => Ok("float"),
--- a/naga/src/back/hlsl/storage.rs
+++ b/naga/src/back/hlsl/storage.rs
@ -32,6 +32,16 @@ The [`temp_access_chain`] field is a member of [`Writer`] solely to
 allow re-use of the `Vec`'s dynamic allocation. Its value is no longer
 needed once HLSL for the access has been generated.

+Note about DXC and Load/Store functions:
+
+DXC's HLSL has a generic [`Load` and `Store`] function for [`ByteAddressBuffer`] and
+[`RWByteAddressBuffer`]. This is not available in FXC's HLSL, so we use
+it only for types that are only available in DXC. Notably 64 and 16 bit types.
+
+FXC's HLSL has functions Load, Load2, Load3, and Load4 and Store, Store2, Store3, Store4.
+This loads/stores a vector of length 1, 2, 3, or 4. We use that for 32bit types, bitcasting to the
+correct type if necessary.
+
 [`Storage`]: crate::AddressSpace::Storage
 [`ByteAddressBuffer`]: https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-byteaddressbuffer
 [`RWByteAddressBuffer`]: https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-rwbyteaddressbuffer
@ -42,6 +52,7 @@ needed once HLSL for the access has been generated.
 [`Writer::temp_access_chain`]: super::Writer::temp_access_chain
 [`temp_access_chain`]: super::Writer::temp_access_chain
 [`Writer`]: super::Writer
+[`Load` and `Store`]: https://github.com/microsoft/DirectXShaderCompiler/wiki/ByteAddressBuffer-Load-Store-Additions
 */

 use super::{super::FunctionCtx, BackendResult, Error};
@ -161,20 +172,39 @@ impl<W: fmt::Write> super::Writer<'_, W> {
                // working around the borrow checker in `self.write_expr`
                let chain = mem::take(&mut self.temp_access_chain);
                let var_name = &self.names[&NameKey::GlobalVariable(var_handle)];
+                // See note about DXC and Load/Store in the module's documentation.
+                if scalar.width == 4 {
                    let cast = scalar.kind.to_hlsl_cast();
                    write!(self.out, "{cast}({var_name}.Load(")?;
+                } else {
+                    let ty = scalar.to_hlsl_str()?;
+                    write!(self.out, "{var_name}.Load<{ty}>(")?;
+                };
                self.write_storage_address(module, &chain, func_ctx)?;
-                write!(self.out, "))")?;
+                write!(self.out, ")")?;
+                if scalar.width == 4 {
+                    write!(self.out, ")")?;
+                }
                self.temp_access_chain = chain;
            }
            crate::TypeInner::Vector { size, scalar } => {
                // working around the borrow checker in `self.write_expr`
                let chain = mem::take(&mut self.temp_access_chain);
                let var_name = &self.names[&NameKey::GlobalVariable(var_handle)];
+                let size = size as u8;
+                // See note about DXC and Load/Store in the module's documentation.
+                if scalar.width == 4 {
                    let cast = scalar.kind.to_hlsl_cast();
-                write!(self.out, "{}({}.Load{}(", cast, var_name, size as u8)?;
+                    write!(self.out, "{cast}({var_name}.Load{size}(")?;
+                } else {
+                    let ty = scalar.to_hlsl_str()?;
+                    write!(self.out, "{var_name}.Load<{ty}{size}>(")?;
+                };
                self.write_storage_address(module, &chain, func_ctx)?;
-                write!(self.out, "))")?;
+                write!(self.out, ")")?;
+                if scalar.width == 4 {
+                    write!(self.out, ")")?;
+                }
                self.temp_access_chain = chain;
            }
            crate::TypeInner::Matrix {
@ -288,26 +318,44 @@ impl<W: fmt::Write> super::Writer<'_, W> {
            }
        };
        match *ty_resolution.inner_with(&module.types) {
-            crate::TypeInner::Scalar(_) => {
+            crate::TypeInner::Scalar(scalar) => {
                // working around the borrow checker in `self.write_expr`
                let chain = mem::take(&mut self.temp_access_chain);
                let var_name = &self.names[&NameKey::GlobalVariable(var_handle)];
+                // See note about DXC and Load/Store in the module's documentation.
+                if scalar.width == 4 {
                    write!(self.out, "{level}{var_name}.Store(")?;
                    self.write_storage_address(module, &chain, func_ctx)?;
                    write!(self.out, ", asuint(")?;
                    self.write_store_value(module, &value, func_ctx)?;
                    writeln!(self.out, "));")?;
+                } else {
+                    write!(self.out, "{level}{var_name}.Store(")?;
+                    self.write_storage_address(module, &chain, func_ctx)?;
+                    write!(self.out, ", ")?;
+                    self.write_store_value(module, &value, func_ctx)?;
+                    writeln!(self.out, ");")?;
+                }
                self.temp_access_chain = chain;
            }
-            crate::TypeInner::Vector { size, .. } => {
+            crate::TypeInner::Vector { size, scalar } => {
                // working around the borrow checker in `self.write_expr`
                let chain = mem::take(&mut self.temp_access_chain);
                let var_name = &self.names[&NameKey::GlobalVariable(var_handle)];
+                // See note about DXC and Load/Store in the module's documentation.
+                if scalar.width == 4 {
                    write!(self.out, "{}{}.Store{}(", level, var_name, size as u8)?;
                    self.write_storage_address(module, &chain, func_ctx)?;
                    write!(self.out, ", asuint(")?;
                    self.write_store_value(module, &value, func_ctx)?;
                    writeln!(self.out, "));")?;
+                } else {
+                    write!(self.out, "{}{}.Store(", level, var_name)?;
+                    self.write_storage_address(module, &chain, func_ctx)?;
+                    write!(self.out, ", ")?;
+                    self.write_store_value(module, &value, func_ctx)?;
+                    writeln!(self.out, ");")?;
+                }
                self.temp_access_chain = chain;
            }
            crate::TypeInner::Matrix {
--- a/naga/src/back/hlsl/writer.rs
+++ b/naga/src/back/hlsl/writer.rs
@ -2022,6 +2022,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                crate::Literal::F32(value) => write!(self.out, "{value:?}")?,
                crate::Literal::U32(value) => write!(self.out, "{}u", value)?,
                crate::Literal::I32(value) => write!(self.out, "{}", value)?,
+                crate::Literal::U64(value) => write!(self.out, "{}uL", value)?,
                crate::Literal::I64(value) => write!(self.out, "{}L", value)?,
                crate::Literal::Bool(value) => write!(self.out, "{}", value)?,
                crate::Literal::AbstractInt(_) | crate::Literal::AbstractFloat(_) => {
@ -2551,7 +2552,7 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                convert,
            } => {
                let inner = func_ctx.resolve_type(expr, &module.types);
-                match convert {
+                let close_paren = match convert {
                    Some(dst_width) => {
                        let scalar = crate::Scalar {
                            kind,
@ -2584,14 +2585,22 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                                )));
                            }
                        };
+                        true
                    }
                    None => {
+                        if inner.scalar_width() == Some(64) {
+                            false
+                        } else {
                            write!(self.out, "{}(", kind.to_hlsl_cast(),)?;
+                            true
                        }
                    }
+                };
                self.write_expr(module, expr, func_ctx)?;
+                if close_paren {
                    write!(self.out, ")")?;
                }
+            }
            Expression::Math {
                fun,
                arg,
@ -2862,9 +2871,15 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                        }
                        write!(self.out, ")")?
                    }
+                    // These overloads are only missing on FXC, so this is only needed for 32bit types,
+                    // as non-32bit types are DXC only.
                    Function::MissingIntOverload(fun_name) => {
-                        let scalar_kind = func_ctx.resolve_type(arg, &module.types).scalar_kind();
-                        if let Some(ScalarKind::Sint) = scalar_kind {
+                        let scalar_kind = func_ctx.resolve_type(arg, &module.types).scalar();
+                        if let Some(crate::Scalar {
+                            kind: ScalarKind::Sint,
+                            width: 4,
+                        }) = scalar_kind
+                        {
                            write!(self.out, "asint({fun_name}(asuint(")?;
                            self.write_expr(module, arg, func_ctx)?;
                            write!(self.out, ")))")?;
@ -2874,9 +2889,15 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                            write!(self.out, ")")?;
                        }
                    }
+                    // These overloads are only missing on FXC, so this is only needed for 32bit types,
+                    // as non-32bit types are DXC only.
                    Function::MissingIntReturnType(fun_name) => {
-                        let scalar_kind = func_ctx.resolve_type(arg, &module.types).scalar_kind();
-                        if let Some(ScalarKind::Sint) = scalar_kind {
+                        let scalar_kind = func_ctx.resolve_type(arg, &module.types).scalar();
+                        if let Some(crate::Scalar {
+                            kind: ScalarKind::Sint,
+                            width: 4,
+                        }) = scalar_kind
+                        {
                            write!(self.out, "asint({fun_name}(")?;
                            self.write_expr(module, arg, func_ctx)?;
                            write!(self.out, "))")?;
@ -2895,23 +2916,38 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                                    crate::VectorSize::Quad => ".xxxx",
                                };

-                                if let ScalarKind::Uint = scalar.kind {
-                                    write!(self.out, "min((32u){s}, firstbitlow(")?;
+                                let scalar_width_bits = scalar.width * 8;
+
+                                if scalar.kind == ScalarKind::Uint || scalar.width != 4 {
+                                    write!(
+                                        self.out,
+                                        "min(({scalar_width_bits}u){s}, firstbitlow("
+                                    )?;
                                    self.write_expr(module, arg, func_ctx)?;
                                    write!(self.out, "))")?;
                                } else {
-                                    write!(self.out, "asint(min((32u){s}, firstbitlow(")?;
+                                    // This is only needed for the FXC path, on 32bit signed integers.
+                                    write!(
+                                        self.out,
+                                        "asint(min(({scalar_width_bits}u){s}, firstbitlow("
+                                    )?;
                                    self.write_expr(module, arg, func_ctx)?;
                                    write!(self.out, ")))")?;
                                }
                            }
                            TypeInner::Scalar(scalar) => {
-                                if let ScalarKind::Uint = scalar.kind {
-                                    write!(self.out, "min(32u, firstbitlow(")?;
+                                let scalar_width_bits = scalar.width * 8;
+
+                                if scalar.kind == ScalarKind::Uint || scalar.width != 4 {
+                                    write!(self.out, "min({scalar_width_bits}u, firstbitlow(")?;
                                    self.write_expr(module, arg, func_ctx)?;
                                    write!(self.out, "))")?;
                                } else {
-                                    write!(self.out, "asint(min(32u, firstbitlow(")?;
+                                    // This is only needed for the FXC path, on 32bit signed integers.
+                                    write!(
+                                        self.out,
+                                        "asint(min({scalar_width_bits}u, firstbitlow("
+                                    )?;
                                    self.write_expr(module, arg, func_ctx)?;
                                    write!(self.out, ")))")?;
                                }
@ -2930,30 +2966,47 @@ impl<'a, W: fmt::Write> super::Writer<'a, W> {
                                    crate::VectorSize::Quad => ".xxxx",
                                };

-                                if let ScalarKind::Uint = scalar.kind {
-                                    write!(self.out, "((31u){s} - firstbithigh(")?;
+                                // scalar width - 1
+                                let constant = scalar.width * 8 - 1;
+
+                                if scalar.kind == ScalarKind::Uint {
+                                    write!(self.out, "(({constant}u){s} - firstbithigh(")?;
                                    self.write_expr(module, arg, func_ctx)?;
                                    write!(self.out, "))")?;
                                } else {
+                                    let conversion_func = match scalar.width {
+                                        4 => "asint",
+                                        _ => "",
+                                    };
                                    write!(self.out, "(")?;
                                    self.write_expr(module, arg, func_ctx)?;
                                    write!(
                                        self.out,
-                                        " < (0){s} ? (0){s} : (31){s} - asint(firstbithigh("
+                                        " < (0){s} ? (0){s} : ({constant}){s} - {conversion_func}(firstbithigh("
                                    )?;
                                    self.write_expr(module, arg, func_ctx)?;
                                    write!(self.out, ")))")?;
                                }
                            }
                            TypeInner::Scalar(scalar) => {
+                                // scalar width - 1
+                                let constant = scalar.width * 8 - 1;
+
                                if let ScalarKind::Uint = scalar.kind {
-                                    write!(self.out, "(31u - firstbithigh(")?;
+                                    write!(self.out, "({constant}u - firstbithigh(")?;
                                    self.write_expr(module, arg, func_ctx)?;
                                    write!(self.out, "))")?;
                                } else {
+                                    let conversion_func = match scalar.width {
+                                        4 => "asint",
+                                        _ => "",
+                                    };
                                    write!(self.out, "(")?;
                                    self.write_expr(module, arg, func_ctx)?;
-                                    write!(self.out, " < 0 ? 0 : 31 - asint(firstbithigh(")?;
+                                    write!(
+                                        self.out,
+                                        " < 0 ? 0 : {constant} - {conversion_func}(firstbithigh("
+                                    )?;
                                    self.write_expr(module, arg, func_ctx)?;
                                    write!(self.out, ")))")?;
                                }
--- a/naga/src/back/msl/mod.rs
+++ b/naga/src/back/msl/mod.rs
@ -121,8 +121,8 @@ pub enum Error {
    UnsupportedCall(String),
    #[error("feature '{0}' is not implemented yet")]
    FeatureNotImplemented(String),
-    #[error("module is not valid")]
-    Validation,
+    #[error("internal naga error: module should not have validated: {0}")]
+    GenericValidation(String),
    #[error("BuiltIn {0:?} is not supported")]
    UnsupportedBuiltIn(crate::BuiltIn),
    #[error("capability {0:?} is not supported")]
@ -306,13 +306,10 @@ impl Options {
                        },
                    })
                }
-                LocationMode::Uniform => {
-                    log::error!(
+                LocationMode::Uniform => Err(Error::GenericValidation(format!(
                    "Unexpected Binding::Location({}) for the Uniform mode",
                    location
-                    );
-                    Err(Error::Validation)
-                }
+                ))),
            },
        }
    }
--- a/naga/src/back/msl/writer.rs
+++ b/naga/src/back/msl/writer.rs
@ -319,7 +319,7 @@ pub struct Writer<W> {
 }

 impl crate::Scalar {
-    const fn to_msl_name(self) -> &'static str {
+    fn to_msl_name(self) -> &'static str {
        use crate::ScalarKind as Sk;
        match self {
            Self {
@ -328,12 +328,20 @@ impl crate::Scalar {
            } => "float",
            Self {
                kind: Sk::Sint,
-                width: _,
+                width: 4,
            } => "int",
            Self {
                kind: Sk::Uint,
-                width: _,
+                width: 4,
            } => "uint",
+            Self {
+                kind: Sk::Sint,
+                width: 8,
+            } => "long",
+            Self {
+                kind: Sk::Uint,
+                width: 8,
+            } => "ulong",
            Self {
                kind: Sk::Bool,
                width: _,
@ -341,7 +349,8 @@ impl crate::Scalar {
            Self {
                kind: Sk::AbstractInt | Sk::AbstractFloat,
                width: _,
-            } => unreachable!(),
+            } => unreachable!("Found Abstract scalar kind"),
+            _ => unreachable!("Unsupported scalar kind: {:?}", self),
        }
    }
 }
@ -735,7 +744,11 @@ impl<W: Write> Writer<W> {
            crate::TypeInner::Vector { size, .. } => {
                put_numeric_type(&mut self.out, crate::Scalar::U32, &[size])?
            }
-            _ => return Err(Error::Validation),
+            _ => {
+                return Err(Error::GenericValidation(
+                    "Invalid type for image coordinate".into(),
+                ))
+            }
        };

        write!(self.out, "(")?;
@ -1068,13 +1081,17 @@ impl<W: Write> Writer<W> {
        let (offset, array_ty) = match context.module.types[global.ty].inner {
            crate::TypeInner::Struct { ref members, .. } => match members.last() {
                Some(&crate::StructMember { offset, ty, .. }) => (offset, ty),
-                None => return Err(Error::Validation),
+                None => return Err(Error::GenericValidation("Struct has no members".into())),
            },
            crate::TypeInner::Array {
                size: crate::ArraySize::Dynamic,
                ..
            } => (0, global.ty),
-            _ => return Err(Error::Validation),
+            ref ty => {
+                return Err(Error::GenericValidation(format!(
+                    "Expected type with dynamic array, got {ty:?}"
+                )))
+            }
        };

        let (size, stride) = match context.module.types[array_ty].inner {
@ -1084,7 +1101,11 @@ impl<W: Write> Writer<W> {
                    .size(context.module.to_ctx()),
                stride,
            ),
-            _ => return Err(Error::Validation),
+            ref ty => {
+                return Err(Error::GenericValidation(format!(
+                    "Expected array type, got {ty:?}"
+                )))
+            }
        };

        // When the stride length is larger than the size, the final element's stride of
@ -1273,6 +1294,9 @@ impl<W: Write> Writer<W> {
                crate::Literal::I32(value) => {
                    write!(self.out, "{value}")?;
                }
+                crate::Literal::U64(value) => {
+                    write!(self.out, "{value}uL")?;
+                }
                crate::Literal::I64(value) => {
                    write!(self.out, "{value}L")?;
                }
@ -1280,7 +1304,9 @@ impl<W: Write> Writer<W> {
                    write!(self.out, "{value}")?;
                }
                crate::Literal::AbstractInt(_) | crate::Literal::AbstractFloat(_) => {
-                    return Err(Error::Validation);
+                    return Err(Error::GenericValidation(
+                        "Unsupported abstract literal".into(),
+                    ));
                }
            },
            crate::Expression::Constant(handle) => {
@ -1342,7 +1368,11 @@ impl<W: Write> Writer<W> {
            crate::Expression::Splat { size, value } => {
                let scalar = match *get_expr_ty(ctx, value).inner_with(&module.types) {
                    crate::TypeInner::Scalar(scalar) => scalar,
-                    _ => return Err(Error::Validation),
+                    ref ty => {
+                        return Err(Error::GenericValidation(format!(
+                            "Expected splat value type must be a scalar, got {ty:?}",
+                        )))
+                    }
                };
                put_numeric_type(&mut self.out, scalar, &[size])?;
                write!(self.out, "(")?;
@ -1672,7 +1702,11 @@ impl<W: Write> Writer<W> {
                    self.put_expression(condition, context, true)?;
                    write!(self.out, ")")?;
                }
-                _ => return Err(Error::Validation),
+                ref ty => {
+                    return Err(Error::GenericValidation(format!(
+                        "Expected select condition to be a non-bool type, got {ty:?}",
+                    )))
+                }
            },
            crate::Expression::Derivative { axis, expr, .. } => {
                use crate::DerivativeAxis as Axis;
@ -1836,15 +1870,23 @@ impl<W: Write> Writer<W> {
                    self.put_expression(arg1.unwrap(), context, false)?;
                    write!(self.out, ")")?;
                } else if fun == Mf::FindLsb {
+                    let scalar = context.resolve_type(arg).scalar().unwrap();
+                    let constant = scalar.width * 8 + 1;
+
                    write!(self.out, "((({NAMESPACE}::ctz(")?;
                    self.put_expression(arg, context, true)?;
-                    write!(self.out, ") + 1) % 33) - 1)")?;
+                    write!(self.out, ") + 1) % {constant}) - 1)")?;
                } else if fun == Mf::FindMsb {
                    let inner = context.resolve_type(arg);
+                    let scalar = inner.scalar().unwrap();
+                    let constant = scalar.width * 8 - 1;

-                    write!(self.out, "{NAMESPACE}::select(31 - {NAMESPACE}::clz(")?;
+                    write!(
+                        self.out,
+                        "{NAMESPACE}::select({constant} - {NAMESPACE}::clz("
+                    )?;

-                    if let Some(crate::ScalarKind::Sint) = inner.scalar_kind() {
+                    if scalar.kind == crate::ScalarKind::Sint {
                        write!(self.out, "{NAMESPACE}::select(")?;
                        self.put_expression(arg, context, true)?;
                        write!(self.out, ", ~")?;
@ -1862,18 +1904,12 @@ impl<W: Write> Writer<W> {
                    match *inner {
                        crate::TypeInner::Vector { size, scalar } => {
                            let size = back::vector_size_str(size);
-                            if let crate::ScalarKind::Sint = scalar.kind {
-                                write!(self.out, "int{size}")?;
-                            } else {
-                                write!(self.out, "uint{size}")?;
-                            }
+                            let name = scalar.to_msl_name();
+                            write!(self.out, "{name}{size}")?;
                        }
                        crate::TypeInner::Scalar(scalar) => {
-                            if let crate::ScalarKind::Sint = scalar.kind {
-                                write!(self.out, "int")?;
-                            } else {
-                                write!(self.out, "uint")?;
-                            }
+                            let name = scalar.to_msl_name();
+                            write!(self.out, "{name}")?;
                        }
                        _ => (),
                    }
@ -1966,14 +2002,8 @@ impl<W: Write> Writer<W> {
                        kind,
                        width: convert.unwrap_or(src.width),
                    };
-                    let is_bool_cast =
-                        kind == crate::ScalarKind::Bool || src.kind == crate::ScalarKind::Bool;
                    let op = match convert {
-                        Some(w) if w == src.width || is_bool_cast => "static_cast",
-                        Some(8) if kind == crate::ScalarKind::Float => {
-                            return Err(Error::CapabilityNotSupported(valid::Capabilities::FLOAT64))
-                        }
-                        Some(_) => return Err(Error::Validation),
+                        Some(_) => "static_cast",
                        None => "as_type",
                    };
                    write!(self.out, "{op}<")?;
@ -2001,7 +2031,11 @@ impl<W: Write> Writer<W> {
                    self.put_expression(expr, context, true)?;
                    write!(self.out, ")")?;
                }
-                _ => return Err(Error::Validation),
+                ref ty => {
+                    return Err(Error::GenericValidation(format!(
+                        "Unsupported type for As: {ty:?}"
+                    )))
+                }
            },
            // has to be a named expression
            crate::Expression::CallResult(_)
@ -2016,11 +2050,19 @@ impl<W: Write> Writer<W> {
                    crate::Expression::AccessIndex { base, .. } => {
                        match context.function.expressions[base] {
                            crate::Expression::GlobalVariable(handle) => handle,
-                            _ => return Err(Error::Validation),
+                            ref ex => {
+                                return Err(Error::GenericValidation(format!(
+                                    "Expected global variable in AccessIndex, got {ex:?}"
+                                )))
+                            }
                        }
                    }
                    crate::Expression::GlobalVariable(handle) => handle,
-                    _ => return Err(Error::Validation),
+                    ref ex => {
+                        return Err(Error::GenericValidation(format!(
+                            "Unexpected expression in ArrayLength, got {ex:?}"
+                        )))
+                    }
                };

                if !is_scoped {
@ -2186,10 +2228,12 @@ impl<W: Write> Writer<W> {
                    match length {
                        index::IndexableLength::Known(value) => write!(self.out, "{value}")?,
                        index::IndexableLength::Dynamic => {
-                            let global = context
-                                .function
-                                .originating_global(base)
-                                .ok_or(Error::Validation)?;
+                            let global =
+                                context.function.originating_global(base).ok_or_else(|| {
+                                    Error::GenericValidation(
+                                        "Could not find originating global".into(),
+                                    )
+                                })?;
                            write!(self.out, "1 + ")?;
                            self.put_dynamic_array_max_index(global, context)?
                        }
@ -2346,10 +2390,9 @@ impl<W: Write> Writer<W> {
                    write!(self.out, "{}u", limit - 1)?;
                }
                index::IndexableLength::Dynamic => {
-                    let global = context
-                        .function
-                        .originating_global(base)
-                        .ok_or(Error::Validation)?;
+                    let global = context.function.originating_global(base).ok_or_else(|| {
+                        Error::GenericValidation("Could not find originating global".into())
+                    })?;
                    self.put_dynamic_array_max_index(global, context)?;
                }
            }
@ -3958,7 +4001,9 @@ impl<W: Write> Writer<W> {
                            binding: None,
                            first_time: true,
                        };
-                        let binding = binding.ok_or(Error::Validation)?;
+                        let binding = binding.ok_or_else(|| {
+                            Error::GenericValidation("Expected binding, got None".into())
+                        })?;

                        if let crate::Binding::BuiltIn(crate::BuiltIn::PointSize) = *binding {
                            has_point_size = true;
--- a/naga/src/back/spv/block.rs
+++ b/naga/src/back/spv/block.rs
@ -944,8 +944,7 @@ impl<'w> BlockContext<'w> {
                    )),
                    Mf::CountTrailingZeros => {
                        let uint_id = match *arg_ty {
-                            crate::TypeInner::Vector { size, mut scalar } => {
-                                scalar.kind = crate::ScalarKind::Uint;
+                            crate::TypeInner::Vector { size, scalar } => {
                                let ty = LocalType::Value {
                                    vector_size: Some(size),
                                    scalar,
@ -956,15 +955,15 @@ impl<'w> BlockContext<'w> {
                                self.temp_list.clear();
                                self.temp_list.resize(
                                    size as _,
-                                    self.writer.get_constant_scalar_with(32, scalar)?,
+                                    self.writer
+                                        .get_constant_scalar_with(scalar.width * 8, scalar)?,
                                );

                                self.writer.get_constant_composite(ty, &self.temp_list)
                            }
-                            crate::TypeInner::Scalar(mut scalar) => {
-                                scalar.kind = crate::ScalarKind::Uint;
-                                self.writer.get_constant_scalar_with(32, scalar)?
-                            }
+                            crate::TypeInner::Scalar(scalar) => self
+                                .writer
+                                .get_constant_scalar_with(scalar.width * 8, scalar)?,
                            _ => unreachable!(),
                        };

@ -986,9 +985,8 @@ impl<'w> BlockContext<'w> {
                        ))
                    }
                    Mf::CountLeadingZeros => {
-                        let (int_type_id, int_id) = match *arg_ty {
-                            crate::TypeInner::Vector { size, mut scalar } => {
-                                scalar.kind = crate::ScalarKind::Sint;
+                        let (int_type_id, int_id, width) = match *arg_ty {
+                            crate::TypeInner::Vector { size, scalar } => {
                                let ty = LocalType::Value {
                                    vector_size: Some(size),
                                    scalar,
@ -999,32 +997,41 @@ impl<'w> BlockContext<'w> {
                                self.temp_list.clear();
                                self.temp_list.resize(
                                    size as _,
-                                    self.writer.get_constant_scalar_with(31, scalar)?,
+                                    self.writer
+                                        .get_constant_scalar_with(scalar.width * 8 - 1, scalar)?,
                                );

                                (
                                    self.get_type_id(ty),
                                    self.writer.get_constant_composite(ty, &self.temp_list),
+                                    scalar.width,
                                )
                            }
-                            crate::TypeInner::Scalar(mut scalar) => {
-                                scalar.kind = crate::ScalarKind::Sint;
-                                (
+                            crate::TypeInner::Scalar(scalar) => (
                                self.get_type_id(LookupType::Local(LocalType::Value {
                                    vector_size: None,
                                    scalar,
                                    pointer_space: None,
                                })),
-                                    self.writer.get_constant_scalar_with(31, scalar)?,
-                                )
-                            }
+                                self.writer
+                                    .get_constant_scalar_with(scalar.width * 8 - 1, scalar)?,
+                                scalar.width,
+                            ),
                            _ => unreachable!(),
                        };

+                        if width != 4 {
+                            unreachable!("This is validated out until a polyfill is implemented. https://github.com/gfx-rs/wgpu/issues/5276");
+                        };
+
                        let msb_id = self.gen_id();
                        block.body.push(Instruction::ext_inst(
                            self.writer.gl450_ext_inst_id,
-                            spirv::GLOp::FindUMsb,
+                            if width != 4 {
+                                spirv::GLOp::FindILsb
+                            } else {
+                                spirv::GLOp::FindUMsb
+                            },
                            int_type_id,
                            msb_id,
                            &[arg0_id],
@ -1176,11 +1183,18 @@ impl<'w> BlockContext<'w> {
                        ))
                    }
                    Mf::FindLsb => MathOp::Ext(spirv::GLOp::FindILsb),
-                    Mf::FindMsb => MathOp::Ext(match arg_scalar_kind {
+                    Mf::FindMsb => {
+                        if arg_ty.scalar_width() == Some(32) {
+                            let thing = match arg_scalar_kind {
                                Some(crate::ScalarKind::Uint) => spirv::GLOp::FindUMsb,
                                Some(crate::ScalarKind::Sint) => spirv::GLOp::FindSMsb,
                                other => unimplemented!("Unexpected findMSB({:?})", other),
-                    }),
+                            };
+                            MathOp::Ext(thing)
+                        } else {
+                            unreachable!("This is validated out until a polyfill is implemented. https://github.com/gfx-rs/wgpu/issues/5276");
+                        }
+                    }
                    Mf::Pack4x8unorm => MathOp::Ext(spirv::GLOp::PackUnorm4x8),
                    Mf::Pack4x8snorm => MathOp::Ext(spirv::GLOp::PackSnorm4x8),
                    Mf::Pack2x16float => MathOp::Ext(spirv::GLOp::PackHalf2x16),
@ -1386,6 +1400,12 @@ impl<'w> BlockContext<'w> {
                        (Sk::Uint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
                            Cast::Unary(spirv::Op::UConvert)
                        }
+                        (Sk::Uint, Sk::Sint, Some(dst_width)) if src_scalar.width != dst_width => {
+                            Cast::Unary(spirv::Op::SConvert)
+                        }
+                        (Sk::Sint, Sk::Uint, Some(dst_width)) if src_scalar.width != dst_width => {
+                            Cast::Unary(spirv::Op::UConvert)
+                        }
                        // We assume it's either an identity cast, or int-uint.
                        _ => Cast::Unary(spirv::Op::Bitcast),
                    }
--- a/naga/src/back/spv/writer.rs
+++ b/naga/src/back/spv/writer.rs
@ -1182,6 +1182,9 @@ impl Writer {
            crate::Literal::F32(value) => Instruction::constant_32bit(type_id, id, value.to_bits()),
            crate::Literal::U32(value) => Instruction::constant_32bit(type_id, id, value),
            crate::Literal::I32(value) => Instruction::constant_32bit(type_id, id, value as u32),
+            crate::Literal::U64(value) => {
+                Instruction::constant_64bit(type_id, id, value as u32, (value >> 32) as u32)
+            }
            crate::Literal::I64(value) => {
                Instruction::constant_64bit(type_id, id, value as u32, (value >> 32) as u32)
            }
--- a/naga/src/back/wgsl/writer.rs
+++ b/naga/src/back/wgsl/writer.rs
@ -1096,16 +1096,24 @@ impl<W: Write> Writer<W> {
                    // value can only be expressed in WGSL using AbstractInt and
                    // a unary negation operator.
                    if value == i32::MIN {
-                        write!(self.out, "i32(-2147483648)")?;
+                        write!(self.out, "i32({})", value)?;
                    } else {
                        write!(self.out, "{}i", value)?;
                    }
                }
                crate::Literal::Bool(value) => write!(self.out, "{}", value)?,
                crate::Literal::F64(value) => write!(self.out, "{:?}lf", value)?,
-                crate::Literal::I64(_) => {
-                    return Err(Error::Custom("unsupported i64 literal".to_string()));
+                crate::Literal::I64(value) => {
+                    // `-9223372036854775808li` is not valid WGSL. The most negative `i64`
+                    // value can only be expressed in WGSL using AbstractInt and
+                    // a unary negation operator.
+                    if value == i64::MIN {
+                        write!(self.out, "i64({})", value)?;
+                    } else {
+                        write!(self.out, "{}li", value)?;
                    }
+                }
+                crate::Literal::U64(value) => write!(self.out, "{:?}lu", value)?,
                crate::Literal::AbstractInt(_) | crate::Literal::AbstractFloat(_) => {
                    return Err(Error::Custom(
                        "Abstract types should not appear in IR presented to backends".into(),
@ -1828,6 +1836,14 @@ const fn scalar_kind_str(scalar: crate::Scalar) -> &'static str {
            kind: Sk::Uint,
            width: 4,
        } => "u32",
+        Scalar {
+            kind: Sk::Sint,
+            width: 8,
+        } => "i64",
+        Scalar {
+            kind: Sk::Uint,
+            width: 8,
+        } => "u64",
        Scalar {
            kind: Sk::Bool,
            width: 1,
--- a/naga/src/front/spv/mod.rs
+++ b/naga/src/front/spv/mod.rs
@ -4876,6 +4876,11 @@ impl<I: Iterator<Item = u32>> Frontend<I> {
                let low = self.next()?;
                match width {
                    4 => crate::Literal::U32(low),
+                    8 => {
+                        inst.expect(5)?;
+                        let high = self.next()?;
+                        crate::Literal::U64(u64::from(high) << 32 | u64::from(low))
+                    }
                    _ => return Err(Error::InvalidTypeWidth(width as u32)),
                }
            }
--- a/naga/src/front/wgsl/lower/mod.rs
+++ b/naga/src/front/wgsl/lower/mod.rs
@ -1530,6 +1530,8 @@ impl<'source, 'temp> Lowerer<'source, 'temp> {
                    ast::Literal::Number(Number::F32(f)) => crate::Literal::F32(f),
                    ast::Literal::Number(Number::I32(i)) => crate::Literal::I32(i),
                    ast::Literal::Number(Number::U32(u)) => crate::Literal::U32(u),
+                    ast::Literal::Number(Number::I64(i)) => crate::Literal::I64(i),
+                    ast::Literal::Number(Number::U64(u)) => crate::Literal::U64(u),
                    ast::Literal::Number(Number::F64(f)) => crate::Literal::F64(f),
                    ast::Literal::Number(Number::AbstractInt(i)) => crate::Literal::AbstractInt(i),
                    ast::Literal::Number(Number::AbstractFloat(f)) => {
--- a/naga/src/front/wgsl/parse/conv.rs
+++ b/naga/src/front/wgsl/parse/conv.rs
@ -124,6 +124,14 @@ pub fn get_scalar_type(word: &str) -> Option<Scalar> {
            kind: Sk::Uint,
            width: 4,
        }),
+        "i64" => Some(Scalar {
+            kind: Sk::Sint,
+            width: 8,
+        }),
+        "u64" => Some(Scalar {
+            kind: Sk::Uint,
+            width: 8,
+        }),
        "bool" => Some(Scalar {
            kind: Sk::Bool,
            width: crate::BOOL_WIDTH,
--- a/naga/src/front/wgsl/parse/number.rs
+++ b/naga/src/front/wgsl/parse/number.rs
@ -12,6 +12,10 @@ pub enum Number {
    I32(i32),
    /// Concrete u32
    U32(u32),
+    /// Concrete i64
+    I64(i64),
+    /// Concrete u64
+    U64(u64),
    /// Concrete f32
    F32(f32),
    /// Concrete f64
@ -31,6 +35,8 @@ enum Kind {
 enum IntKind {
    I32,
    U32,
+    I64,
+    U64,
 }

 #[derive(Debug)]
@ -270,6 +276,8 @@ fn parse(input: &str) -> (Result<Number, NumberError>, &str) {
                let kind = consume_map!(bytes, [
                    b'i' => Kind::Int(IntKind::I32),
                    b'u' => Kind::Int(IntKind::U32),
+                    b'l', b'i' => Kind::Int(IntKind::I64),
+                    b'l', b'u' => Kind::Int(IntKind::U64),
                    b'h' => Kind::Float(FloatKind::F16),
                    b'f' => Kind::Float(FloatKind::F32),
                    b'l', b'f' => Kind::Float(FloatKind::F64),
@ -416,5 +424,13 @@ fn parse_int(input: &str, kind: Option<IntKind>, radix: u32) -> Result<Number, N
            Ok(num) => Ok(Number::U32(num)),
            Err(e) => Err(map_err(e)),
        },
+        Some(IntKind::I64) => match i64::from_str_radix(input, radix) {
+            Ok(num) => Ok(Number::I64(num)),
+            Err(e) => Err(map_err(e)),
+        },
+        Some(IntKind::U64) => match u64::from_str_radix(input, radix) {
+            Ok(num) => Ok(Number::U64(num)),
+            Err(e) => Err(map_err(e)),
+        },
    }
 }
--- a/naga/src/front/wgsl/tests.rs
+++ b/naga/src/front/wgsl/tests.rs
@ -17,6 +17,7 @@ fn parse_comment() {
 #[test]
 fn parse_types() {
    parse_str("const a : i32 = 2;").unwrap();
+    parse_str("const a : u64 = 2lu;").unwrap();
    assert!(parse_str("const a : x32 = 2;").is_err());
    parse_str("var t: texture_2d<f32>;").unwrap();
    parse_str("var t: texture_cube_array<i32>;").unwrap();
--- a/naga/src/keywords/wgsl.rs
+++ b/naga/src/keywords/wgsl.rs
@ -14,6 +14,7 @@ pub const RESERVED: &[&str] = &[
    "f32",
    "f16",
    "i32",
+    "i64",
    "mat2x2",
    "mat2x3",
    "mat2x4",
@ -43,6 +44,7 @@ pub const RESERVED: &[&str] = &[
    "texture_depth_cube_array",
    "texture_depth_multisampled_2d",
    "u32",
+    "u64",
    "vec2",
    "vec3",
    "vec4",
--- a/naga/src/lib.rs
+++ b/naga/src/lib.rs
@ -885,6 +885,7 @@ pub enum Literal {
    F32(f32),
    U32(u32),
    I32(i32),
+    U64(u64),
    I64(i64),
    Bool(bool),
    AbstractInt(i64),
--- a/naga/src/proc/constant_evaluator.rs
+++ b/naga/src/proc/constant_evaluator.rs
@ -200,6 +200,8 @@ gen_component_wise_extractor! {
        AbstractInt => AbstractInt: i64,
        U32 => U32: u32,
        I32 => I32: i32,
+        U64 => U64: u64,
+        I64 => I64: i64,
    ],
    scalar_kinds: [
        Float,
@ -847,6 +849,8 @@ impl<'a> ConstantEvaluator<'a> {
                    Scalar::AbstractInt([e]) => Ok(Scalar::AbstractInt([e.abs()])),
                    Scalar::I32([e]) => Ok(Scalar::I32([e.wrapping_abs()])),
                    Scalar::U32([e]) => Ok(Scalar::U32([e])), // TODO: just re-use the expression, ezpz
+                    Scalar::I64([e]) => Ok(Scalar::I64([e.wrapping_abs()])),
+                    Scalar::U64([e]) => Ok(Scalar::U64([e])),
                })
            }
            crate::MathFunction::Min => {
@ -1280,7 +1284,7 @@ impl<'a> ConstantEvaluator<'a> {
                        Literal::U32(v) => v as i32,
                        Literal::F32(v) => v as i32,
                        Literal::Bool(v) => v as i32,
-                        Literal::F64(_) | Literal::I64(_) => {
+                        Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                            return make_error();
                        }
                        Literal::AbstractInt(v) => i32::try_from_abstract(v)?,
@ -1291,18 +1295,40 @@ impl<'a> ConstantEvaluator<'a> {
                        Literal::U32(v) => v,
                        Literal::F32(v) => v as u32,
                        Literal::Bool(v) => v as u32,
-                        Literal::F64(_) | Literal::I64(_) => {
+                        Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                            return make_error();
                        }
                        Literal::AbstractInt(v) => u32::try_from_abstract(v)?,
                        Literal::AbstractFloat(v) => u32::try_from_abstract(v)?,
                    }),
+                    Sc::I64 => Literal::I64(match literal {
+                        Literal::I32(v) => v as i64,
+                        Literal::U32(v) => v as i64,
+                        Literal::F32(v) => v as i64,
+                        Literal::Bool(v) => v as i64,
+                        Literal::F64(v) => v as i64,
+                        Literal::I64(v) => v,
+                        Literal::U64(v) => v as i64,
+                        Literal::AbstractInt(v) => i64::try_from_abstract(v)?,
+                        Literal::AbstractFloat(v) => i64::try_from_abstract(v)?,
+                    }),
+                    Sc::U64 => Literal::U64(match literal {
+                        Literal::I32(v) => v as u64,
+                        Literal::U32(v) => v as u64,
+                        Literal::F32(v) => v as u64,
+                        Literal::Bool(v) => v as u64,
+                        Literal::F64(v) => v as u64,
+                        Literal::I64(v) => v as u64,
+                        Literal::U64(v) => v,
+                        Literal::AbstractInt(v) => u64::try_from_abstract(v)?,
+                        Literal::AbstractFloat(v) => u64::try_from_abstract(v)?,
+                    }),
                    Sc::F32 => Literal::F32(match literal {
                        Literal::I32(v) => v as f32,
                        Literal::U32(v) => v as f32,
                        Literal::F32(v) => v,
                        Literal::Bool(v) => v as u32 as f32,
-                        Literal::F64(_) | Literal::I64(_) => {
+                        Literal::F64(_) | Literal::I64(_) | Literal::U64(_) => {
                            return make_error();
                        }
                        Literal::AbstractInt(v) => f32::try_from_abstract(v)?,
@ -1314,7 +1340,7 @@ impl<'a> ConstantEvaluator<'a> {
                        Literal::F32(v) => v as f64,
                        Literal::F64(v) => v,
                        Literal::Bool(v) => v as u32 as f64,
-                        Literal::I64(_) => return make_error(),
+                        Literal::I64(_) | Literal::U64(_) => return make_error(),
                        Literal::AbstractInt(v) => f64::try_from_abstract(v)?,
                        Literal::AbstractFloat(v) => f64::try_from_abstract(v)?,
                    }),
@ -1325,6 +1351,7 @@ impl<'a> ConstantEvaluator<'a> {
                        Literal::Bool(v) => v,
                        Literal::F64(_)
                        | Literal::I64(_)
+                        | Literal::U64(_)
                        | Literal::AbstractInt(_)
                        | Literal::AbstractFloat(_) => {
                            return make_error();
@ -1915,6 +1942,21 @@ impl TryFromAbstract<i64> for u32 {
    }
 }

+impl TryFromAbstract<i64> for u64 {
+    fn try_from_abstract(value: i64) -> Result<u64, ConstantEvaluatorError> {
+        u64::try_from(value).map_err(|_| ConstantEvaluatorError::AutomaticConversionLossy {
+            value: format!("{value:?}"),
+            to_type: "u64",
+        })
+    }
+}
+
+impl TryFromAbstract<i64> for i64 {
+    fn try_from_abstract(value: i64) -> Result<i64, ConstantEvaluatorError> {
+        Ok(value)
+    }
+}
+
 impl TryFromAbstract<i64> for f32 {
    fn try_from_abstract(value: i64) -> Result<Self, ConstantEvaluatorError> {
        let f = value as f32;
@ -1966,6 +2008,18 @@ impl TryFromAbstract<f64> for u32 {
    }
 }

+impl TryFromAbstract<f64> for i64 {
+    fn try_from_abstract(_: f64) -> Result<Self, ConstantEvaluatorError> {
+        Err(ConstantEvaluatorError::AutomaticConversionFloatToInt { to_type: "i64" })
+    }
+}
+
+impl TryFromAbstract<f64> for u64 {
+    fn try_from_abstract(_: f64) -> Result<Self, ConstantEvaluatorError> {
+        Err(ConstantEvaluatorError::AutomaticConversionFloatToInt { to_type: "u64" })
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use std::vec;
--- a/naga/src/proc/mod.rs
+++ b/naga/src/proc/mod.rs
@ -102,6 +102,10 @@ impl super::Scalar {
        kind: crate::ScalarKind::Sint,
        width: 8,
    };
+    pub const U64: Self = Self {
+        kind: crate::ScalarKind::Uint,
+        width: 8,
+    };
    pub const BOOL: Self = Self {
        kind: crate::ScalarKind::Bool,
        width: crate::BOOL_WIDTH,
@ -156,6 +160,7 @@ impl PartialEq for crate::Literal {
            (Self::F32(a), Self::F32(b)) => a.to_bits() == b.to_bits(),
            (Self::U32(a), Self::U32(b)) => a == b,
            (Self::I32(a), Self::I32(b)) => a == b,
+            (Self::U64(a), Self::U64(b)) => a == b,
            (Self::I64(a), Self::I64(b)) => a == b,
            (Self::Bool(a), Self::Bool(b)) => a == b,
            _ => false,
@ -186,10 +191,18 @@ impl std::hash::Hash for crate::Literal {
                hasher.write_u8(4);
                v.hash(hasher);
            }
-            Self::I64(v) | Self::AbstractInt(v) => {
+            Self::I64(v) => {
                hasher.write_u8(5);
                v.hash(hasher);
            }
+            Self::U64(v) => {
+                hasher.write_u8(6);
+                v.hash(hasher);
+            }
+            Self::AbstractInt(v) => {
+                hasher.write_u8(7);
+                v.hash(hasher);
+            }
        }
    }
 }
@ -201,6 +214,7 @@ impl crate::Literal {
            (value, crate::ScalarKind::Float, 4) => Some(Self::F32(value as _)),
            (value, crate::ScalarKind::Uint, 4) => Some(Self::U32(value as _)),
            (value, crate::ScalarKind::Sint, 4) => Some(Self::I32(value as _)),
+            (value, crate::ScalarKind::Uint, 8) => Some(Self::U64(value as _)),
            (value, crate::ScalarKind::Sint, 8) => Some(Self::I64(value as _)),
            (1, crate::ScalarKind::Bool, 4) => Some(Self::Bool(true)),
            (0, crate::ScalarKind::Bool, 4) => Some(Self::Bool(false)),
@ -218,7 +232,7 @@ impl crate::Literal {

    pub const fn width(&self) -> crate::Bytes {
        match *self {
-            Self::F64(_) | Self::I64(_) => 8,
+            Self::F64(_) | Self::I64(_) | Self::U64(_) => 8,
            Self::F32(_) | Self::U32(_) | Self::I32(_) => 4,
            Self::Bool(_) => crate::BOOL_WIDTH,
            Self::AbstractInt(_) | Self::AbstractFloat(_) => crate::ABSTRACT_WIDTH,
@ -230,6 +244,7 @@ impl crate::Literal {
            Self::F32(_) => crate::Scalar::F32,
            Self::U32(_) => crate::Scalar::U32,
            Self::I32(_) => crate::Scalar::I32,
+            Self::U64(_) => crate::Scalar::U64,
            Self::I64(_) => crate::Scalar::I64,
            Self::Bool(_) => crate::Scalar::BOOL,
            Self::AbstractInt(_) => crate::Scalar::ABSTRACT_INT,
--- a/naga/src/valid/expression.rs
+++ b/naga/src/valid/expression.rs
@ -124,6 +124,8 @@ pub enum ExpressionError {
    MissingCapabilities(super::Capabilities),
    #[error(transparent)]
    Literal(#[from] LiteralError),
+    #[error("{0:?} is not supported for Width {2} {1:?} arguments yet, see https://github.com/gfx-rs/wgpu/issues/5276")]
+    UnsupportedWidth(crate::MathFunction, crate::ScalarKind, crate::Bytes),
 }

 #[derive(Clone, Debug, thiserror::Error)]
@ -1332,28 +1334,29 @@ impl super::Validator {
                            _ => return Err(ExpressionError::InvalidArgumentType(fun, 0, arg)),
                        }
                    }
-                    Mf::CountTrailingZeros
-                    | Mf::CountLeadingZeros
+                    // Remove once fixed https://github.com/gfx-rs/wgpu/issues/5276
+                    Mf::CountLeadingZeros
+                    | Mf::CountTrailingZeros
                    | Mf::CountOneBits
                    | Mf::ReverseBits
-                    | Mf::FindLsb
-                    | Mf::FindMsb => {
+                    | Mf::FindMsb
+                    | Mf::FindLsb => {
                        if arg1_ty.is_some() || arg2_ty.is_some() || arg3_ty.is_some() {
                            return Err(ExpressionError::WrongArgumentCount(fun));
                        }
                        match *arg_ty {
-                            Ti::Scalar(Sc {
-                                kind: Sk::Sint | Sk::Uint,
-                                ..
-                            })
-                            | Ti::Vector {
-                                scalar:
-                                    Sc {
-                                        kind: Sk::Sint | Sk::Uint,
-                                        ..
+                            Ti::Scalar(scalar) | Ti::Vector { scalar, .. } => match scalar.kind {
+                                Sk::Sint | Sk::Uint => {
+                                    if scalar.width != 4 {
+                                        return Err(ExpressionError::UnsupportedWidth(
+                                            fun,
+                                            scalar.kind,
+                                            scalar.width,
+                                        ));
+                                    }
+                                }
+                                _ => return Err(ExpressionError::InvalidArgumentType(fun, 0, arg)),
                            },
-                                ..
-                            } => {}
                            _ => return Err(ExpressionError::InvalidArgumentType(fun, 0, arg)),
                        }
                    }
@ -1404,6 +1407,21 @@ impl super::Validator {
                                ))
                            }
                        }
+                        // Remove once fixed https://github.com/gfx-rs/wgpu/issues/5276
+                        for &arg in [arg_ty, arg1_ty, arg2_ty, arg3_ty].iter() {
+                            match *arg {
+                                Ti::Scalar(scalar) | Ti::Vector { scalar, .. } => {
+                                    if scalar.width != 4 {
+                                        return Err(ExpressionError::UnsupportedWidth(
+                                            fun,
+                                            scalar.kind,
+                                            scalar.width,
+                                        ));
+                                    }
+                                }
+                                _ => {}
+                            }
+                        }
                    }
                    Mf::ExtractBits => {
                        let (arg1_ty, arg2_ty) = match (arg1_ty, arg2_ty, arg3_ty) {
@ -1445,6 +1463,21 @@ impl super::Validator {
                                ))
                            }
                        }
+                        // Remove once fixed https://github.com/gfx-rs/wgpu/issues/5276
+                        for &arg in [arg_ty, arg1_ty, arg2_ty].iter() {
+                            match *arg {
+                                Ti::Scalar(scalar) | Ti::Vector { scalar, .. } => {
+                                    if scalar.width != 4 {
+                                        return Err(ExpressionError::UnsupportedWidth(
+                                            fun,
+                                            scalar.kind,
+                                            scalar.width,
+                                        ));
+                                    }
+                                }
+                                _ => {}
+                            }
+                        }
                    }
                    Mf::Pack2x16unorm | Mf::Pack2x16snorm | Mf::Pack2x16float => {
                        if arg1_ty.is_some() || arg2_ty.is_some() || arg3_ty.is_some() {
--- a/naga/src/valid/mod.rs
+++ b/naga/src/valid/mod.rs
@ -28,7 +28,7 @@ pub use expression::{check_literal_value, LiteralError};
 pub use expression::{ConstExpressionError, ExpressionError};
 pub use function::{CallError, FunctionError, LocalVariableError};
 pub use interface::{EntryPointError, GlobalVariableError, VaryingError};
-pub use r#type::{Disalignment, TypeError, TypeFlags};
+pub use r#type::{Disalignment, TypeError, TypeFlags, WidthError};

 use self::handles::InvalidHandleError;

@ -108,6 +108,8 @@ bitflags::bitflags! {
        const DUAL_SOURCE_BLENDING = 0x2000;
        /// Support for arrayed cube textures.
        const CUBE_ARRAY_TEXTURES = 0x4000;
+        /// Support for 64-bit signed and unsigned integers.
+        const SHADER_INT64 = 0x8000;
    }
 }

--- a/naga/src/valid/type.rs
+++ b/naga/src/valid/type.rs
@ -147,9 +147,6 @@ pub enum WidthError {
        flag: &'static str,
    },

-    #[error("64-bit integers are not yet supported")]
-    Unsupported64Bit,
-
    #[error("Abstract types may only appear in constant expressions")]
    Abstract,
 }
@ -251,12 +248,32 @@ impl super::Validator {
                    scalar.width == 4
                }
            }
-            crate::ScalarKind::Sint | crate::ScalarKind::Uint => {
+            crate::ScalarKind::Sint => {
                if scalar.width == 8 {
-                    return Err(WidthError::Unsupported64Bit);
+                    if !self.capabilities.contains(Capabilities::SHADER_INT64) {
+                        return Err(WidthError::MissingCapability {
+                            name: "i64",
+                            flag: "SHADER_INT64",
+                        });
                    }
+                    true
+                } else {
                    scalar.width == 4
                }
+            }
+            crate::ScalarKind::Uint => {
+                if scalar.width == 8 {
+                    if !self.capabilities.contains(Capabilities::SHADER_INT64) {
+                        return Err(WidthError::MissingCapability {
+                            name: "u64",
+                            flag: "SHADER_INT64",
+                        });
+                    }
+                    true
+                } else {
+                    scalar.width == 4
+                }
+            }
            crate::ScalarKind::AbstractInt | crate::ScalarKind::AbstractFloat => {
                return Err(WidthError::Abstract);
            }
--- a/naga/tests/in/int64.param.ron
+++ b/naga/tests/in/int64.param.ron
@ -0,0 +1,22 @@
+(
+	god_mode: true,
+	spv: (
+		version: (1, 0),
+	),
+	hlsl: (
+		shader_model: V6_0,
+		binding_map: {},
+		fake_missing_bindings: true,
+		special_constants_binding: Some((space: 1, register: 0)),
+		push_constants_target: Some((space: 0, register: 0)),
+		zero_initialize_workgroup_memory: true,
+	),
+	msl: (
+		lang_version: (2, 3),
+		per_entry_point_map: {},
+		inline_samplers: [],
+		spirv_cross_compatibility: false,
+		fake_missing_bindings: true,
+		zero_initialize_workgroup_memory: true,
+	),
+)
--- a/naga/tests/in/int64.wgsl
+++ b/naga/tests/in/int64.wgsl
@ -0,0 +1,141 @@
+var<private> private_variable: i64 = 1li;
+const constant_variable: u64 = 20lu;
+
+struct UniformCompatible {
+   // Other types
+   val_u32: u32,
+   val_i32: i32,
+   val_f32: f32,
+
+   // u64
+   val_u64: u64,
+   val_u64_2: vec2<u64>,
+   val_u64_3: vec3<u64>,
+   val_u64_4: vec4<u64>,
+
+   // i64
+   val_i64: i64,
+   val_i64_2: vec2<i64>,
+   val_i64_3: vec3<i64>,
+   val_i64_4: vec4<i64>,
+
+   final_value: u64,
+}
+
+struct StorageCompatible {
+   val_u64_array_2: array<u64, 2>,
+   val_i64_array_2: array<i64, 2>,
+}
+
+@group(0) @binding(0)
+var<uniform> input_uniform: UniformCompatible;
+@group(0) @binding(1)
+var<storage> input_storage: UniformCompatible;
+@group(0) @binding(2)
+var<storage> input_arrays: StorageCompatible;
+@group(0) @binding(3)
+var<storage, read_write> output: UniformCompatible;
+@group(0) @binding(4)
+var<storage, read_write> output_arrays: StorageCompatible;
+
+fn int64_function(x: i64) -> i64 {
+   var val: i64 = i64(constant_variable);
+   // A number too big for i32
+   val += 31li - 1002003004005006li;
+   // Constructing an i64 from an AbstractInt
+   val += val + i64(5);
+   // Constructing a i64 from other types and other types from u64.
+   val += i64(input_uniform.val_u32 + u32(val));
+   val += i64(input_uniform.val_i32 + i32(val));
+   val += i64(input_uniform.val_f32 + f32(val));
+   // Constructing a vec3<i64> from a i64
+   val += vec3<i64>(input_uniform.val_i64).z;
+   // Bitcasting from u64 to i64
+   val += bitcast<i64>(input_uniform.val_u64);
+   val += bitcast<vec2<i64>>(input_uniform.val_u64_2).y;
+   val += bitcast<vec3<i64>>(input_uniform.val_u64_3).z;
+   val += bitcast<vec4<i64>>(input_uniform.val_u64_4).w;
+
+   // Reading/writing to a uniform/storage buffer
+   output.val_i64 = input_uniform.val_i64 + input_storage.val_i64;
+   output.val_i64_2 = input_uniform.val_i64_2 + input_storage.val_i64_2;
+   output.val_i64_3 = input_uniform.val_i64_3 + input_storage.val_i64_3;
+   output.val_i64_4 = input_uniform.val_i64_4 + input_storage.val_i64_4;
+
+   output_arrays.val_i64_array_2 = input_arrays.val_i64_array_2;
+
+   // We make sure not to use 32 in these arguments, so it's clear in the results which are builtin
+   // constants based on the size of the type, and which are arguments.
+
+   // Numeric functions
+   val += abs(val);
+   val += clamp(val, val, val);
+   //val += countLeadingZeros(val);
+   //val += countOneBits(val);
+   //val += countTrailingZeros(val);
+   val += dot(vec2(val), vec2(val));
+   //val += extractBits(val, 15u, 28u);
+   //val += firstLeadingBit(val);
+   //val += firstTrailingBit(val);
+   //val += insertBits(val, 12li, 15u, 28u);
+   val += max(val, val);
+   val += min(val, val);
+   //val += reverseBits(val);
+   val += sign(val); // only for i64
+
+   // Make sure all the variables are used.
+   return val;
+}
+
+fn uint64_function(x: u64) -> u64 {
+   var val: u64 = u64(constant_variable);
+   // A number too big for u32
+   val += 31lu + 1002003004005006lu;
+   // Constructing a u64 from an AbstractInt
+   val += val + u64(5);
+   // Constructing a u64 from other types and other types from u64.
+   val += u64(input_uniform.val_u32 + u32(val));
+   val += u64(input_uniform.val_i32 + i32(val));
+   val += u64(input_uniform.val_f32 + f32(val));
+   // Constructing a vec3<u64> from a u64
+   val += vec3<u64>(input_uniform.val_u64).z;
+   // Bitcasting from i64 to u64
+   val += bitcast<u64>(input_uniform.val_i64);
+   val += bitcast<vec2<u64>>(input_uniform.val_i64_2).y;
+   val += bitcast<vec3<u64>>(input_uniform.val_i64_3).z;
+   val += bitcast<vec4<u64>>(input_uniform.val_i64_4).w;
+
+   // Reading/writing to a uniform/storage buffer
+   output.val_u64 = input_uniform.val_u64 + input_storage.val_u64;
+   output.val_u64_2 = input_uniform.val_u64_2 + input_storage.val_u64_2;
+   output.val_u64_3 = input_uniform.val_u64_3 + input_storage.val_u64_3;
+   output.val_u64_4 = input_uniform.val_u64_4 + input_storage.val_u64_4;
+
+   output_arrays.val_u64_array_2 = input_arrays.val_u64_array_2;
+
+   // We make sure not to use 32 in these arguments, so it's clear in the results which are builtin
+   // constants based on the size of the type, and which are arguments.
+
+   // Numeric functions
+   val += abs(val);
+   val += clamp(val, val, val);
+   //val += countLeadingZeros(val);
+   //val += countOneBits(val);
+   //val += countTrailingZeros(val);
+   val += dot(vec2(val), vec2(val));
+   //val += extractBits(val, 15u, 28u);
+   //val += firstLeadingBit(val);
+   //val += firstTrailingBit(val);
+   //val += insertBits(val, 12lu, 15u, 28u);
+   val += max(val, val);
+   val += min(val, val);
+   //val += reverseBits(val);
+
+   // Make sure all the variables are used.
+   return val;
+}
+
+@compute @workgroup_size(1)
+fn main() {
+   output.final_value = uint64_function(67lu) + bitcast<u64>(int64_function(60li));
+}
--- a/naga/tests/out/hlsl/int64.hlsl
+++ b/naga/tests/out/hlsl/int64.hlsl
@ -0,0 +1,234 @@
+struct NagaConstants {
+    int first_vertex;
+    int first_instance;
+    uint other;
+};
+ConstantBuffer<NagaConstants> _NagaConstants: register(b0, space1);
+
+struct UniformCompatible {
+    uint val_u32_;
+    int val_i32_;
+    float val_f32_;
+    int _pad3_0;
+    uint64_t val_u64_;
+    int _pad4_0;
+    int _pad4_1;
+    uint64_t2 val_u64_2_;
+    int _pad5_0;
+    int _pad5_1;
+    int _pad5_2;
+    int _pad5_3;
+    uint64_t3 val_u64_3_;
+    int _pad6_0;
+    int _pad6_1;
+    uint64_t4 val_u64_4_;
+    int64_t val_i64_;
+    int _pad8_0;
+    int _pad8_1;
+    int64_t2 val_i64_2_;
+    int64_t3 val_i64_3_;
+    int _pad10_0;
+    int _pad10_1;
+    int64_t4 val_i64_4_;
+    uint64_t final_value;
+    int _end_pad_0;
+    int _end_pad_1;
+    int _end_pad_2;
+    int _end_pad_3;
+    int _end_pad_4;
+    int _end_pad_5;
+};
+
+struct StorageCompatible {
+    uint64_t val_u64_array_2_[2];
+    int64_t val_i64_array_2_[2];
+};
+
+static const uint64_t constant_variable = 20uL;
+
+static int64_t private_variable = 1L;
+cbuffer input_uniform : register(b0) { UniformCompatible input_uniform; }
+ByteAddressBuffer input_storage : register(t1);
+ByteAddressBuffer input_arrays : register(t2);
+RWByteAddressBuffer output : register(u3);
+RWByteAddressBuffer output_arrays : register(u4);
+
+typedef int64_t ret_Constructarray2_int64_t_[2];
+ret_Constructarray2_int64_t_ Constructarray2_int64_t_(int64_t arg0, int64_t arg1) {
+    int64_t ret[2] = { arg0, arg1 };
+    return ret;
+}
+
+int64_t int64_function(int64_t x)
+{
+    int64_t val = 20L;
+
+    int64_t _expr6 = val;
+    val = (_expr6 + (31L - 1002003004005006L));
+    int64_t _expr8 = val;
+    int64_t _expr11 = val;
+    val = (_expr11 + (_expr8 + 5L));
+    uint _expr15 = input_uniform.val_u32_;
+    int64_t _expr16 = val;
+    int64_t _expr20 = val;
+    val = (_expr20 + int64_t((_expr15 + uint(_expr16))));
+    int _expr24 = input_uniform.val_i32_;
+    int64_t _expr25 = val;
+    int64_t _expr29 = val;
+    val = (_expr29 + int64_t((_expr24 + int(_expr25))));
+    float _expr33 = input_uniform.val_f32_;
+    int64_t _expr34 = val;
+    int64_t _expr38 = val;
+    val = (_expr38 + int64_t((_expr33 + float(_expr34))));
+    int64_t _expr42 = input_uniform.val_i64_;
+    int64_t _expr45 = val;
+    val = (_expr45 + (_expr42).xxx.z);
+    uint64_t _expr49 = input_uniform.val_u64_;
+    int64_t _expr51 = val;
+    val = (_expr51 + _expr49);
+    uint64_t2 _expr55 = input_uniform.val_u64_2_;
+    int64_t _expr58 = val;
+    val = (_expr58 + _expr55.y);
+    uint64_t3 _expr62 = input_uniform.val_u64_3_;
+    int64_t _expr65 = val;
+    val = (_expr65 + _expr62.z);
+    uint64_t4 _expr69 = input_uniform.val_u64_4_;
+    int64_t _expr72 = val;
+    val = (_expr72 + _expr69.w);
+    int64_t _expr78 = input_uniform.val_i64_;
+    int64_t _expr81 = input_storage.Load<int64_t>(128);
+    output.Store(128, (_expr78 + _expr81));
+    int64_t2 _expr87 = input_uniform.val_i64_2_;
+    int64_t2 _expr90 = input_storage.Load<int64_t2>(144);
+    output.Store(144, (_expr87 + _expr90));
+    int64_t3 _expr96 = input_uniform.val_i64_3_;
+    int64_t3 _expr99 = input_storage.Load<int64_t3>(160);
+    output.Store(160, (_expr96 + _expr99));
+    int64_t4 _expr105 = input_uniform.val_i64_4_;
+    int64_t4 _expr108 = input_storage.Load<int64_t4>(192);
+    output.Store(192, (_expr105 + _expr108));
+    int64_t _expr114[2] = Constructarray2_int64_t_(input_arrays.Load<int64_t>(16+0), input_arrays.Load<int64_t>(16+8));
+    {
+        int64_t _value2[2] = _expr114;
+        output_arrays.Store(16+0, _value2[0]);
+        output_arrays.Store(16+8, _value2[1]);
+    }
+    int64_t _expr115 = val;
+    int64_t _expr117 = val;
+    val = (_expr117 + abs(_expr115));
+    int64_t _expr119 = val;
+    int64_t _expr120 = val;
+    int64_t _expr121 = val;
+    int64_t _expr123 = val;
+    val = (_expr123 + clamp(_expr119, _expr120, _expr121));
+    int64_t _expr125 = val;
+    int64_t _expr127 = val;
+    int64_t _expr130 = val;
+    val = (_expr130 + dot((_expr125).xx, (_expr127).xx));
+    int64_t _expr132 = val;
+    int64_t _expr133 = val;
+    int64_t _expr135 = val;
+    val = (_expr135 + max(_expr132, _expr133));
+    int64_t _expr137 = val;
+    int64_t _expr138 = val;
+    int64_t _expr140 = val;
+    val = (_expr140 + min(_expr137, _expr138));
+    int64_t _expr142 = val;
+    int64_t _expr144 = val;
+    val = (_expr144 + sign(_expr142));
+    int64_t _expr146 = val;
+    return _expr146;
+}
+
+typedef uint64_t ret_Constructarray2_uint64_t_[2];
+ret_Constructarray2_uint64_t_ Constructarray2_uint64_t_(uint64_t arg0, uint64_t arg1) {
+    uint64_t ret[2] = { arg0, arg1 };
+    return ret;
+}
+
+uint64_t uint64_function(uint64_t x_1)
+{
+    uint64_t val_1 = 20uL;
+
+    uint64_t _expr6 = val_1;
+    val_1 = (_expr6 + (31uL + 1002003004005006uL));
+    uint64_t _expr8 = val_1;
+    uint64_t _expr11 = val_1;
+    val_1 = (_expr11 + (_expr8 + 5uL));
+    uint _expr15 = input_uniform.val_u32_;
+    uint64_t _expr16 = val_1;
+    uint64_t _expr20 = val_1;
+    val_1 = (_expr20 + uint64_t((_expr15 + uint(_expr16))));
+    int _expr24 = input_uniform.val_i32_;
+    uint64_t _expr25 = val_1;
+    uint64_t _expr29 = val_1;
+    val_1 = (_expr29 + uint64_t((_expr24 + int(_expr25))));
+    float _expr33 = input_uniform.val_f32_;
+    uint64_t _expr34 = val_1;
+    uint64_t _expr38 = val_1;
+    val_1 = (_expr38 + uint64_t((_expr33 + float(_expr34))));
+    uint64_t _expr42 = input_uniform.val_u64_;
+    uint64_t _expr45 = val_1;
+    val_1 = (_expr45 + (_expr42).xxx.z);
+    int64_t _expr49 = input_uniform.val_i64_;
+    uint64_t _expr51 = val_1;
+    val_1 = (_expr51 + _expr49);
+    int64_t2 _expr55 = input_uniform.val_i64_2_;
+    uint64_t _expr58 = val_1;
+    val_1 = (_expr58 + _expr55.y);
+    int64_t3 _expr62 = input_uniform.val_i64_3_;
+    uint64_t _expr65 = val_1;
+    val_1 = (_expr65 + _expr62.z);
+    int64_t4 _expr69 = input_uniform.val_i64_4_;
+    uint64_t _expr72 = val_1;
+    val_1 = (_expr72 + _expr69.w);
+    uint64_t _expr78 = input_uniform.val_u64_;
+    uint64_t _expr81 = input_storage.Load<uint64_t>(16);
+    output.Store(16, (_expr78 + _expr81));
+    uint64_t2 _expr87 = input_uniform.val_u64_2_;
+    uint64_t2 _expr90 = input_storage.Load<uint64_t2>(32);
+    output.Store(32, (_expr87 + _expr90));
+    uint64_t3 _expr96 = input_uniform.val_u64_3_;
+    uint64_t3 _expr99 = input_storage.Load<uint64_t3>(64);
+    output.Store(64, (_expr96 + _expr99));
+    uint64_t4 _expr105 = input_uniform.val_u64_4_;
+    uint64_t4 _expr108 = input_storage.Load<uint64_t4>(96);
+    output.Store(96, (_expr105 + _expr108));
+    uint64_t _expr114[2] = Constructarray2_uint64_t_(input_arrays.Load<uint64_t>(0+0), input_arrays.Load<uint64_t>(0+8));
+    {
+        uint64_t _value2[2] = _expr114;
+        output_arrays.Store(0+0, _value2[0]);
+        output_arrays.Store(0+8, _value2[1]);
+    }
+    uint64_t _expr115 = val_1;
+    uint64_t _expr117 = val_1;
+    val_1 = (_expr117 + abs(_expr115));
+    uint64_t _expr119 = val_1;
+    uint64_t _expr120 = val_1;
+    uint64_t _expr121 = val_1;
+    uint64_t _expr123 = val_1;
+    val_1 = (_expr123 + clamp(_expr119, _expr120, _expr121));
+    uint64_t _expr125 = val_1;
+    uint64_t _expr127 = val_1;
+    uint64_t _expr130 = val_1;
+    val_1 = (_expr130 + dot((_expr125).xx, (_expr127).xx));
+    uint64_t _expr132 = val_1;
+    uint64_t _expr133 = val_1;
+    uint64_t _expr135 = val_1;
+    val_1 = (_expr135 + max(_expr132, _expr133));
+    uint64_t _expr137 = val_1;
+    uint64_t _expr138 = val_1;
+    uint64_t _expr140 = val_1;
+    val_1 = (_expr140 + min(_expr137, _expr138));
+    uint64_t _expr142 = val_1;
+    return _expr142;
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    const uint64_t _e3 = uint64_function(67uL);
+    const int64_t _e5 = int64_function(60L);
+    output.Store(224, (_e3 + _e5));
+    return;
+}
--- a/naga/tests/out/hlsl/int64.ron
+++ b/naga/tests/out/hlsl/int64.ron
@ -0,0 +1,12 @@
+(
+    vertex:[
+    ],
+    fragment:[
+    ],
+    compute:[
+        (
+            entry_point:"main",
+            target_profile:"cs_6_0",
+        ),
+    ],
+)
--- a/naga/tests/out/msl/int64.msl
+++ b/naga/tests/out/msl/int64.msl
@ -0,0 +1,213 @@
+// language: metal2.3
+#include <metal_stdlib>
+#include <simd/simd.h>
+
+using metal::uint;
+
+struct UniformCompatible {
+    uint val_u32_;
+    int val_i32_;
+    float val_f32_;
+    char _pad3[4];
+    ulong val_u64_;
+    char _pad4[8];
+    metal::ulong2 val_u64_2_;
+    char _pad5[16];
+    metal::ulong3 val_u64_3_;
+    metal::ulong4 val_u64_4_;
+    long val_i64_;
+    char _pad8[8];
+    metal::long2 val_i64_2_;
+    metal::long3 val_i64_3_;
+    metal::long4 val_i64_4_;
+    ulong final_value;
+};
+struct type_11 {
+    ulong inner[2];
+};
+struct type_12 {
+    long inner[2];
+};
+struct StorageCompatible {
+    type_11 val_u64_array_2_;
+    type_12 val_i64_array_2_;
+};
+constant ulong constant_variable = 20uL;
+
+long int64_function(
+    long x,
+    constant UniformCompatible& input_uniform,
+    device UniformCompatible const& input_storage,
+    device StorageCompatible const& input_arrays,
+    device UniformCompatible& output,
+    device StorageCompatible& output_arrays
+) {
+    long val = 20L;
+    long _e6 = val;
+    val = _e6 + (31L - 1002003004005006L);
+    long _e8 = val;
+    long _e11 = val;
+    val = _e11 + (_e8 + 5L);
+    uint _e15 = input_uniform.val_u32_;
+    long _e16 = val;
+    long _e20 = val;
+    val = _e20 + static_cast<long>(_e15 + static_cast<uint>(_e16));
+    int _e24 = input_uniform.val_i32_;
+    long _e25 = val;
+    long _e29 = val;
+    val = _e29 + static_cast<long>(_e24 + static_cast<int>(_e25));
+    float _e33 = input_uniform.val_f32_;
+    long _e34 = val;
+    long _e38 = val;
+    val = _e38 + static_cast<long>(_e33 + static_cast<float>(_e34));
+    long _e42 = input_uniform.val_i64_;
+    long _e45 = val;
+    val = _e45 + metal::long3(_e42).z;
+    ulong _e49 = input_uniform.val_u64_;
+    long _e51 = val;
+    val = _e51 + as_type<long>(_e49);
+    metal::ulong2 _e55 = input_uniform.val_u64_2_;
+    long _e58 = val;
+    val = _e58 + as_type<metal::long2>(_e55).y;
+    metal::ulong3 _e62 = input_uniform.val_u64_3_;
+    long _e65 = val;
+    val = _e65 + as_type<metal::long3>(_e62).z;
+    metal::ulong4 _e69 = input_uniform.val_u64_4_;
+    long _e72 = val;
+    val = _e72 + as_type<metal::long4>(_e69).w;
+    long _e78 = input_uniform.val_i64_;
+    long _e81 = input_storage.val_i64_;
+    output.val_i64_ = _e78 + _e81;
+    metal::long2 _e87 = input_uniform.val_i64_2_;
+    metal::long2 _e90 = input_storage.val_i64_2_;
+    output.val_i64_2_ = _e87 + _e90;
+    metal::long3 _e96 = input_uniform.val_i64_3_;
+    metal::long3 _e99 = input_storage.val_i64_3_;
+    output.val_i64_3_ = _e96 + _e99;
+    metal::long4 _e105 = input_uniform.val_i64_4_;
+    metal::long4 _e108 = input_storage.val_i64_4_;
+    output.val_i64_4_ = _e105 + _e108;
+    type_12 _e114 = input_arrays.val_i64_array_2_;
+    output_arrays.val_i64_array_2_ = _e114;
+    long _e115 = val;
+    long _e117 = val;
+    val = _e117 + metal::abs(_e115);
+    long _e119 = val;
+    long _e120 = val;
+    long _e121 = val;
+    long _e123 = val;
+    val = _e123 + metal::clamp(_e119, _e120, _e121);
+    long _e125 = val;
+    metal::long2 _e126 = metal::long2(_e125);
+    long _e127 = val;
+    metal::long2 _e128 = metal::long2(_e127);
+    long _e130 = val;
+    val = _e130 + ( + _e126.x * _e128.x + _e126.y * _e128.y);
+    long _e132 = val;
+    long _e133 = val;
+    long _e135 = val;
+    val = _e135 + metal::max(_e132, _e133);
+    long _e137 = val;
+    long _e138 = val;
+    long _e140 = val;
+    val = _e140 + metal::min(_e137, _e138);
+    long _e142 = val;
+    long _e144 = val;
+    val = _e144 + metal::select(metal::select(-1, 1, (_e142 > 0)), 0, (_e142 == 0));
+    long _e146 = val;
+    return _e146;
+}
+
+ulong uint64_function(
+    ulong x_1,
+    constant UniformCompatible& input_uniform,
+    device UniformCompatible const& input_storage,
+    device StorageCompatible const& input_arrays,
+    device UniformCompatible& output,
+    device StorageCompatible& output_arrays
+) {
+    ulong val_1 = 20uL;
+    ulong _e6 = val_1;
+    val_1 = _e6 + (31uL + 1002003004005006uL);
+    ulong _e8 = val_1;
+    ulong _e11 = val_1;
+    val_1 = _e11 + (_e8 + 5uL);
+    uint _e15 = input_uniform.val_u32_;
+    ulong _e16 = val_1;
+    ulong _e20 = val_1;
+    val_1 = _e20 + static_cast<ulong>(_e15 + static_cast<uint>(_e16));
+    int _e24 = input_uniform.val_i32_;
+    ulong _e25 = val_1;
+    ulong _e29 = val_1;
+    val_1 = _e29 + static_cast<ulong>(_e24 + static_cast<int>(_e25));
+    float _e33 = input_uniform.val_f32_;
+    ulong _e34 = val_1;
+    ulong _e38 = val_1;
+    val_1 = _e38 + static_cast<ulong>(_e33 + static_cast<float>(_e34));
+    ulong _e42 = input_uniform.val_u64_;
+    ulong _e45 = val_1;
+    val_1 = _e45 + metal::ulong3(_e42).z;
+    long _e49 = input_uniform.val_i64_;
+    ulong _e51 = val_1;
+    val_1 = _e51 + as_type<ulong>(_e49);
+    metal::long2 _e55 = input_uniform.val_i64_2_;
+    ulong _e58 = val_1;
+    val_1 = _e58 + as_type<metal::ulong2>(_e55).y;
+    metal::long3 _e62 = input_uniform.val_i64_3_;
+    ulong _e65 = val_1;
+    val_1 = _e65 + as_type<metal::ulong3>(_e62).z;
+    metal::long4 _e69 = input_uniform.val_i64_4_;
+    ulong _e72 = val_1;
+    val_1 = _e72 + as_type<metal::ulong4>(_e69).w;
+    ulong _e78 = input_uniform.val_u64_;
+    ulong _e81 = input_storage.val_u64_;
+    output.val_u64_ = _e78 + _e81;
+    metal::ulong2 _e87 = input_uniform.val_u64_2_;
+    metal::ulong2 _e90 = input_storage.val_u64_2_;
+    output.val_u64_2_ = _e87 + _e90;
+    metal::ulong3 _e96 = input_uniform.val_u64_3_;
+    metal::ulong3 _e99 = input_storage.val_u64_3_;
+    output.val_u64_3_ = _e96 + _e99;
+    metal::ulong4 _e105 = input_uniform.val_u64_4_;
+    metal::ulong4 _e108 = input_storage.val_u64_4_;
+    output.val_u64_4_ = _e105 + _e108;
+    type_11 _e114 = input_arrays.val_u64_array_2_;
+    output_arrays.val_u64_array_2_ = _e114;
+    ulong _e115 = val_1;
+    ulong _e117 = val_1;
+    val_1 = _e117 + metal::abs(_e115);
+    ulong _e119 = val_1;
+    ulong _e120 = val_1;
+    ulong _e121 = val_1;
+    ulong _e123 = val_1;
+    val_1 = _e123 + metal::clamp(_e119, _e120, _e121);
+    ulong _e125 = val_1;
+    metal::ulong2 _e126 = metal::ulong2(_e125);
+    ulong _e127 = val_1;
+    metal::ulong2 _e128 = metal::ulong2(_e127);
+    ulong _e130 = val_1;
+    val_1 = _e130 + ( + _e126.x * _e128.x + _e126.y * _e128.y);
+    ulong _e132 = val_1;
+    ulong _e133 = val_1;
+    ulong _e135 = val_1;
+    val_1 = _e135 + metal::max(_e132, _e133);
+    ulong _e137 = val_1;
+    ulong _e138 = val_1;
+    ulong _e140 = val_1;
+    val_1 = _e140 + metal::min(_e137, _e138);
+    ulong _e142 = val_1;
+    return _e142;
+}
+
+kernel void main_(
+  constant UniformCompatible& input_uniform [[user(fake0)]]
+, device UniformCompatible const& input_storage [[user(fake0)]]
+, device StorageCompatible const& input_arrays [[user(fake0)]]
+, device UniformCompatible& output [[user(fake0)]]
+, device StorageCompatible& output_arrays [[user(fake0)]]
+) {
+    ulong _e3 = uint64_function(67uL, input_uniform, input_storage, input_arrays, output, output_arrays);
+    long _e5 = int64_function(60L, input_uniform, input_storage, input_arrays, output, output_arrays);
+    output.final_value = _e3 + as_type<ulong>(_e5);
+    return;
+}
--- a/naga/tests/out/spv/int64.spvasm
+++ b/naga/tests/out/spv/int64.spvasm
@ -0,0 +1,470 @@
+; SPIR-V
+; Version: 1.0
+; Generator: rspirv
+; Bound: 372
+OpCapability Shader
+OpCapability Int64
+OpExtension "SPV_KHR_storage_buffer_storage_class"
+%1 = OpExtInstImport "GLSL.std.450"
+OpMemoryModel Logical GLSL450
+OpEntryPoint GLCompute %356 "main"
+OpExecutionMode %356 LocalSize 1 1 1
+OpMemberDecorate %14 0 Offset 0
+OpMemberDecorate %14 1 Offset 4
+OpMemberDecorate %14 2 Offset 8
+OpMemberDecorate %14 3 Offset 16
+OpMemberDecorate %14 4 Offset 32
+OpMemberDecorate %14 5 Offset 64
+OpMemberDecorate %14 6 Offset 96
+OpMemberDecorate %14 7 Offset 128
+OpMemberDecorate %14 8 Offset 144
+OpMemberDecorate %14 9 Offset 160
+OpMemberDecorate %14 10 Offset 192
+OpMemberDecorate %14 11 Offset 224
+OpDecorate %15 ArrayStride 8
+OpDecorate %17 ArrayStride 8
+OpMemberDecorate %18 0 Offset 0
+OpMemberDecorate %18 1 Offset 16
+OpDecorate %23 DescriptorSet 0
+OpDecorate %23 Binding 0
+OpDecorate %24 Block
+OpMemberDecorate %24 0 Offset 0
+OpDecorate %26 NonWritable
+OpDecorate %26 DescriptorSet 0
+OpDecorate %26 Binding 1
+OpDecorate %27 Block
+OpMemberDecorate %27 0 Offset 0
+OpDecorate %29 NonWritable
+OpDecorate %29 DescriptorSet 0
+OpDecorate %29 Binding 2
+OpDecorate %30 Block
+OpMemberDecorate %30 0 Offset 0
+OpDecorate %32 DescriptorSet 0
+OpDecorate %32 Binding 3
+OpDecorate %33 Block
+OpMemberDecorate %33 0 Offset 0
+OpDecorate %35 DescriptorSet 0
+OpDecorate %35 Binding 4
+OpDecorate %36 Block
+OpMemberDecorate %36 0 Offset 0
+%2 = OpTypeVoid
+%3 = OpTypeInt 64 1
+%4 = OpTypeInt 64 0
+%5 = OpTypeInt 32 0
+%6 = OpTypeInt 32 1
+%7 = OpTypeFloat 32
+%8 = OpTypeVector %4 2
+%9 = OpTypeVector %4 3
+%10 = OpTypeVector %4 4
+%11 = OpTypeVector %3 2
+%12 = OpTypeVector %3 3
+%13 = OpTypeVector %3 4
+%14 = OpTypeStruct %5 %6 %7 %4 %8 %9 %10 %3 %11 %12 %13 %4
+%16 = OpConstant  %5  2
+%15 = OpTypeArray %4 %16
+%17 = OpTypeArray %3 %16
+%18 = OpTypeStruct %15 %17
+%19 = OpConstant  %3  1
+%20 = OpConstant  %4  20
+%22 = OpTypePointer Private %3
+%21 = OpVariable  %22  Private %19
+%24 = OpTypeStruct %14
+%25 = OpTypePointer Uniform %24
+%23 = OpVariable  %25  Uniform
+%27 = OpTypeStruct %14
+%28 = OpTypePointer StorageBuffer %27
+%26 = OpVariable  %28  StorageBuffer
+%30 = OpTypeStruct %18
+%31 = OpTypePointer StorageBuffer %30
+%29 = OpVariable  %31  StorageBuffer
+%33 = OpTypeStruct %14
+%34 = OpTypePointer StorageBuffer %33
+%32 = OpVariable  %34  StorageBuffer
+%36 = OpTypeStruct %18
+%37 = OpTypePointer StorageBuffer %36
+%35 = OpVariable  %37  StorageBuffer
+%41 = OpTypeFunction %3 %3
+%42 = OpTypePointer Uniform %14
+%43 = OpConstant  %5  0
+%45 = OpTypePointer StorageBuffer %14
+%47 = OpTypePointer StorageBuffer %18
+%51 = OpConstant  %3  20
+%52 = OpConstant  %3  31
+%53 = OpConstant  %3  1002003004005006
+%54 = OpConstant  %3  5
+%56 = OpTypePointer Function %3
+%65 = OpTypePointer Uniform %5
+%74 = OpTypePointer Uniform %6
+%75 = OpConstant  %5  1
+%84 = OpTypePointer Uniform %7
+%93 = OpTypePointer Uniform %3
+%94 = OpConstant  %5  7
+%101 = OpTypePointer Uniform %4
+%102 = OpConstant  %5  3
+%108 = OpTypePointer Uniform %8
+%109 = OpConstant  %5  4
+%116 = OpTypePointer Uniform %9
+%117 = OpConstant  %5  5
+%124 = OpTypePointer Uniform %10
+%125 = OpConstant  %5  6
+%132 = OpTypePointer StorageBuffer %3
+%139 = OpTypePointer StorageBuffer %11
+%140 = OpTypePointer Uniform %11
+%141 = OpConstant  %5  8
+%148 = OpTypePointer StorageBuffer %12
+%149 = OpTypePointer Uniform %12
+%150 = OpConstant  %5  9
+%157 = OpTypePointer StorageBuffer %13
+%158 = OpTypePointer Uniform %13
+%159 = OpConstant  %5  10
+%166 = OpTypePointer StorageBuffer %17
+%186 = OpConstantNull  %3
+%214 = OpTypeFunction %4 %4
+%220 = OpConstant  %4  31
+%221 = OpConstant  %4  1002003004005006
+%222 = OpConstant  %4  5
+%224 = OpTypePointer Function %4
+%286 = OpTypePointer StorageBuffer %4
+%293 = OpTypePointer StorageBuffer %8
+%300 = OpTypePointer StorageBuffer %9
+%307 = OpTypePointer StorageBuffer %10
+%314 = OpTypePointer StorageBuffer %15
+%334 = OpConstantNull  %4
+%357 = OpTypeFunction %2
+%363 = OpConstant  %4  67
+%364 = OpConstant  %3  60
+%370 = OpConstant  %5  11
+%40 = OpFunction  %3  None %41
+%39 = OpFunctionParameter  %3
+%38 = OpLabel
+%55 = OpVariable  %56  Function %51
+%44 = OpAccessChain  %42  %23 %43
+%46 = OpAccessChain  %45  %26 %43
+%48 = OpAccessChain  %47  %29 %43
+%49 = OpAccessChain  %45  %32 %43
+%50 = OpAccessChain  %47  %35 %43
+OpBranch %57
+%57 = OpLabel
+%58 = OpISub  %3  %52 %53
+%59 = OpLoad  %3  %55
+%60 = OpIAdd  %3  %59 %58
+OpStore %55 %60
+%61 = OpLoad  %3  %55
+%62 = OpIAdd  %3  %61 %54
+%63 = OpLoad  %3  %55
+%64 = OpIAdd  %3  %63 %62
+OpStore %55 %64
+%66 = OpAccessChain  %65  %44 %43
+%67 = OpLoad  %5  %66
+%68 = OpLoad  %3  %55
+%69 = OpUConvert  %5  %68
+%70 = OpIAdd  %5  %67 %69
+%71 = OpSConvert  %3  %70
+%72 = OpLoad  %3  %55
+%73 = OpIAdd  %3  %72 %71
+OpStore %55 %73
+%76 = OpAccessChain  %74  %44 %75
+%77 = OpLoad  %6  %76
+%78 = OpLoad  %3  %55
+%79 = OpSConvert  %6  %78
+%80 = OpIAdd  %6  %77 %79
+%81 = OpSConvert  %3  %80
+%82 = OpLoad  %3  %55
+%83 = OpIAdd  %3  %82 %81
+OpStore %55 %83
+%85 = OpAccessChain  %84  %44 %16
+%86 = OpLoad  %7  %85
+%87 = OpLoad  %3  %55
+%88 = OpConvertSToF  %7  %87
+%89 = OpFAdd  %7  %86 %88
+%90 = OpConvertFToS  %3  %89
+%91 = OpLoad  %3  %55
+%92 = OpIAdd  %3  %91 %90
+OpStore %55 %92
+%95 = OpAccessChain  %93  %44 %94
+%96 = OpLoad  %3  %95
+%97 = OpCompositeConstruct  %12  %96 %96 %96
+%98 = OpCompositeExtract  %3  %97 2
+%99 = OpLoad  %3  %55
+%100 = OpIAdd  %3  %99 %98
+OpStore %55 %100
+%103 = OpAccessChain  %101  %44 %102
+%104 = OpLoad  %4  %103
+%105 = OpBitcast  %3  %104
+%106 = OpLoad  %3  %55
+%107 = OpIAdd  %3  %106 %105
+OpStore %55 %107
+%110 = OpAccessChain  %108  %44 %109
+%111 = OpLoad  %8  %110
+%112 = OpBitcast  %11  %111
+%113 = OpCompositeExtract  %3  %112 1
+%114 = OpLoad  %3  %55
+%115 = OpIAdd  %3  %114 %113
+OpStore %55 %115
+%118 = OpAccessChain  %116  %44 %117
+%119 = OpLoad  %9  %118
+%120 = OpBitcast  %12  %119
+%121 = OpCompositeExtract  %3  %120 2
+%122 = OpLoad  %3  %55
+%123 = OpIAdd  %3  %122 %121
+OpStore %55 %123
+%126 = OpAccessChain  %124  %44 %125
+%127 = OpLoad  %10  %126
+%128 = OpBitcast  %13  %127
+%129 = OpCompositeExtract  %3  %128 3
+%130 = OpLoad  %3  %55
+%131 = OpIAdd  %3  %130 %129
+OpStore %55 %131
+%133 = OpAccessChain  %93  %44 %94
+%134 = OpLoad  %3  %133
+%135 = OpAccessChain  %132  %46 %94
+%136 = OpLoad  %3  %135
+%137 = OpIAdd  %3  %134 %136
+%138 = OpAccessChain  %132  %49 %94
+OpStore %138 %137
+%142 = OpAccessChain  %140  %44 %141
+%143 = OpLoad  %11  %142
+%144 = OpAccessChain  %139  %46 %141
+%145 = OpLoad  %11  %144
+%146 = OpIAdd  %11  %143 %145
+%147 = OpAccessChain  %139  %49 %141
+OpStore %147 %146
+%151 = OpAccessChain  %149  %44 %150
+%152 = OpLoad  %12  %151
+%153 = OpAccessChain  %148  %46 %150
+%154 = OpLoad  %12  %153
+%155 = OpIAdd  %12  %152 %154
+%156 = OpAccessChain  %148  %49 %150
+OpStore %156 %155
+%160 = OpAccessChain  %158  %44 %159
+%161 = OpLoad  %13  %160
+%162 = OpAccessChain  %157  %46 %159
+%163 = OpLoad  %13  %162
+%164 = OpIAdd  %13  %161 %163
+%165 = OpAccessChain  %157  %49 %159
+OpStore %165 %164
+%167 = OpAccessChain  %166  %48 %75
+%168 = OpLoad  %17  %167
+%169 = OpAccessChain  %166  %50 %75
+OpStore %169 %168
+%170 = OpLoad  %3  %55
+%171 = OpExtInst  %3  %1 SAbs %170
+%172 = OpLoad  %3  %55
+%173 = OpIAdd  %3  %172 %171
+OpStore %55 %173
+%174 = OpLoad  %3  %55
+%175 = OpLoad  %3  %55
+%176 = OpLoad  %3  %55
+%178 = OpExtInst  %3  %1 SMax %174 %175
+%177 = OpExtInst  %3  %1 SMin %178 %176
+%179 = OpLoad  %3  %55
+%180 = OpIAdd  %3  %179 %177
+OpStore %55 %180
+%181 = OpLoad  %3  %55
+%182 = OpCompositeConstruct  %11  %181 %181
+%183 = OpLoad  %3  %55
+%184 = OpCompositeConstruct  %11  %183 %183
+%187 = OpCompositeExtract  %3  %182 0
+%188 = OpCompositeExtract  %3  %184 0
+%189 = OpIMul  %3  %187 %188
+%190 = OpIAdd  %3  %186 %189
+%191 = OpCompositeExtract  %3  %182 1
+%192 = OpCompositeExtract  %3  %184 1
+%193 = OpIMul  %3  %191 %192
+%185 = OpIAdd  %3  %190 %193
+%194 = OpLoad  %3  %55
+%195 = OpIAdd  %3  %194 %185
+OpStore %55 %195
+%196 = OpLoad  %3  %55
+%197 = OpLoad  %3  %55
+%198 = OpExtInst  %3  %1 SMax %196 %197
+%199 = OpLoad  %3  %55
+%200 = OpIAdd  %3  %199 %198
+OpStore %55 %200
+%201 = OpLoad  %3  %55
+%202 = OpLoad  %3  %55
+%203 = OpExtInst  %3  %1 SMin %201 %202
+%204 = OpLoad  %3  %55
+%205 = OpIAdd  %3  %204 %203
+OpStore %55 %205
+%206 = OpLoad  %3  %55
+%207 = OpExtInst  %3  %1 SSign %206
+%208 = OpLoad  %3  %55
+%209 = OpIAdd  %3  %208 %207
+OpStore %55 %209
+%210 = OpLoad  %3  %55
+OpReturnValue %210
+OpFunctionEnd
+%213 = OpFunction  %4  None %214
+%212 = OpFunctionParameter  %4
+%211 = OpLabel
+%223 = OpVariable  %224  Function %20
+%215 = OpAccessChain  %42  %23 %43
+%216 = OpAccessChain  %45  %26 %43
+%217 = OpAccessChain  %47  %29 %43
+%218 = OpAccessChain  %45  %32 %43
+%219 = OpAccessChain  %47  %35 %43
+OpBranch %225
+%225 = OpLabel
+%226 = OpIAdd  %4  %220 %221
+%227 = OpLoad  %4  %223
+%228 = OpIAdd  %4  %227 %226
+OpStore %223 %228
+%229 = OpLoad  %4  %223
+%230 = OpIAdd  %4  %229 %222
+%231 = OpLoad  %4  %223
+%232 = OpIAdd  %4  %231 %230
+OpStore %223 %232
+%233 = OpAccessChain  %65  %215 %43
+%234 = OpLoad  %5  %233
+%235 = OpLoad  %4  %223
+%236 = OpUConvert  %5  %235
+%237 = OpIAdd  %5  %234 %236
+%238 = OpUConvert  %4  %237
+%239 = OpLoad  %4  %223
+%240 = OpIAdd  %4  %239 %238
+OpStore %223 %240
+%241 = OpAccessChain  %74  %215 %75
+%242 = OpLoad  %6  %241
+%243 = OpLoad  %4  %223
+%244 = OpSConvert  %6  %243
+%245 = OpIAdd  %6  %242 %244
+%246 = OpUConvert  %4  %245
+%247 = OpLoad  %4  %223
+%248 = OpIAdd  %4  %247 %246
+OpStore %223 %248
+%249 = OpAccessChain  %84  %215 %16
+%250 = OpLoad  %7  %249
+%251 = OpLoad  %4  %223
+%252 = OpConvertUToF  %7  %251
+%253 = OpFAdd  %7  %250 %252
+%254 = OpConvertFToU  %4  %253
+%255 = OpLoad  %4  %223
+%256 = OpIAdd  %4  %255 %254
+OpStore %223 %256
+%257 = OpAccessChain  %101  %215 %102
+%258 = OpLoad  %4  %257
+%259 = OpCompositeConstruct  %9  %258 %258 %258
+%260 = OpCompositeExtract  %4  %259 2
+%261 = OpLoad  %4  %223
+%262 = OpIAdd  %4  %261 %260
+OpStore %223 %262
+%263 = OpAccessChain  %93  %215 %94
+%264 = OpLoad  %3  %263
+%265 = OpBitcast  %4  %264
+%266 = OpLoad  %4  %223
+%267 = OpIAdd  %4  %266 %265
+OpStore %223 %267
+%268 = OpAccessChain  %140  %215 %141
+%269 = OpLoad  %11  %268
+%270 = OpBitcast  %8  %269
+%271 = OpCompositeExtract  %4  %270 1
+%272 = OpLoad  %4  %223
+%273 = OpIAdd  %4  %272 %271
+OpStore %223 %273
+%274 = OpAccessChain  %149  %215 %150
+%275 = OpLoad  %12  %274
+%276 = OpBitcast  %9  %275
+%277 = OpCompositeExtract  %4  %276 2
+%278 = OpLoad  %4  %223
+%279 = OpIAdd  %4  %278 %277
+OpStore %223 %279
+%280 = OpAccessChain  %158  %215 %159
+%281 = OpLoad  %13  %280
+%282 = OpBitcast  %10  %281
+%283 = OpCompositeExtract  %4  %282 3
+%284 = OpLoad  %4  %223
+%285 = OpIAdd  %4  %284 %283
+OpStore %223 %285
+%287 = OpAccessChain  %101  %215 %102
+%288 = OpLoad  %4  %287
+%289 = OpAccessChain  %286  %216 %102
+%290 = OpLoad  %4  %289
+%291 = OpIAdd  %4  %288 %290
+%292 = OpAccessChain  %286  %218 %102
+OpStore %292 %291
+%294 = OpAccessChain  %108  %215 %109
+%295 = OpLoad  %8  %294
+%296 = OpAccessChain  %293  %216 %109
+%297 = OpLoad  %8  %296
+%298 = OpIAdd  %8  %295 %297
+%299 = OpAccessChain  %293  %218 %109
+OpStore %299 %298
+%301 = OpAccessChain  %116  %215 %117
+%302 = OpLoad  %9  %301
+%303 = OpAccessChain  %300  %216 %117
+%304 = OpLoad  %9  %303
+%305 = OpIAdd  %9  %302 %304
+%306 = OpAccessChain  %300  %218 %117
+OpStore %306 %305
+%308 = OpAccessChain  %124  %215 %125
+%309 = OpLoad  %10  %308
+%310 = OpAccessChain  %307  %216 %125
+%311 = OpLoad  %10  %310
+%312 = OpIAdd  %10  %309 %311
+%313 = OpAccessChain  %307  %218 %125
+OpStore %313 %312
+%315 = OpAccessChain  %314  %217 %43
+%316 = OpLoad  %15  %315
+%317 = OpAccessChain  %314  %219 %43
+OpStore %317 %316
+%318 = OpLoad  %4  %223
+%319 = OpCopyObject  %4  %318
+%320 = OpLoad  %4  %223
+%321 = OpIAdd  %4  %320 %319
+OpStore %223 %321
+%322 = OpLoad  %4  %223
+%323 = OpLoad  %4  %223
+%324 = OpLoad  %4  %223
+%326 = OpExtInst  %4  %1 UMax %322 %323
+%325 = OpExtInst  %4  %1 UMin %326 %324
+%327 = OpLoad  %4  %223
+%328 = OpIAdd  %4  %327 %325
+OpStore %223 %328
+%329 = OpLoad  %4  %223
+%330 = OpCompositeConstruct  %8  %329 %329
+%331 = OpLoad  %4  %223
+%332 = OpCompositeConstruct  %8  %331 %331
+%335 = OpCompositeExtract  %4  %330 0
+%336 = OpCompositeExtract  %4  %332 0
+%337 = OpIMul  %4  %335 %336
+%338 = OpIAdd  %4  %334 %337
+%339 = OpCompositeExtract  %4  %330 1
+%340 = OpCompositeExtract  %4  %332 1
+%341 = OpIMul  %4  %339 %340
+%333 = OpIAdd  %4  %338 %341
+%342 = OpLoad  %4  %223
+%343 = OpIAdd  %4  %342 %333
+OpStore %223 %343
+%344 = OpLoad  %4  %223
+%345 = OpLoad  %4  %223
+%346 = OpExtInst  %4  %1 UMax %344 %345
+%347 = OpLoad  %4  %223
+%348 = OpIAdd  %4  %347 %346
+OpStore %223 %348
+%349 = OpLoad  %4  %223
+%350 = OpLoad  %4  %223
+%351 = OpExtInst  %4  %1 UMin %349 %350
+%352 = OpLoad  %4  %223
+%353 = OpIAdd  %4  %352 %351
+OpStore %223 %353
+%354 = OpLoad  %4  %223
+OpReturnValue %354
+OpFunctionEnd
+%356 = OpFunction  %2  None %357
+%355 = OpLabel
+%358 = OpAccessChain  %42  %23 %43
+%359 = OpAccessChain  %45  %26 %43
+%360 = OpAccessChain  %47  %29 %43
+%361 = OpAccessChain  %45  %32 %43
+%362 = OpAccessChain  %47  %35 %43
+OpBranch %365
+%365 = OpLabel
+%366 = OpFunctionCall  %4  %213 %363
+%367 = OpFunctionCall  %3  %40 %364
+%368 = OpBitcast  %4  %367
+%369 = OpIAdd  %4  %366 %368
+%371 = OpAccessChain  %286  %361 %370
+OpStore %371 %369
+OpReturn
+OpFunctionEnd
--- a/naga/tests/out/wgsl/int64.wgsl
+++ b/naga/tests/out/wgsl/int64.wgsl
@ -0,0 +1,190 @@
+struct UniformCompatible {
+    val_u32_: u32,
+    val_i32_: i32,
+    val_f32_: f32,
+    val_u64_: u64,
+    val_u64_2_: vec2<u64>,
+    val_u64_3_: vec3<u64>,
+    val_u64_4_: vec4<u64>,
+    val_i64_: i64,
+    val_i64_2_: vec2<i64>,
+    val_i64_3_: vec3<i64>,
+    val_i64_4_: vec4<i64>,
+    final_value: u64,
+}
+
+struct StorageCompatible {
+    val_u64_array_2_: array<u64, 2>,
+    val_i64_array_2_: array<i64, 2>,
+}
+
+const constant_variable: u64 = 20lu;
+
+var<private> private_variable: i64 = 1li;
+@group(0) @binding(0) 
+var<uniform> input_uniform: UniformCompatible;
+@group(0) @binding(1) 
+var<storage> input_storage: UniformCompatible;
+@group(0) @binding(2) 
+var<storage> input_arrays: StorageCompatible;
+@group(0) @binding(3) 
+var<storage, read_write> output: UniformCompatible;
+@group(0) @binding(4) 
+var<storage, read_write> output_arrays: StorageCompatible;
+
+fn int64_function(x: i64) -> i64 {
+    var val: i64 = 20li;
+
+    let _e6 = val;
+    val = (_e6 + (31li - 1002003004005006li));
+    let _e8 = val;
+    let _e11 = val;
+    val = (_e11 + (_e8 + 5li));
+    let _e15 = input_uniform.val_u32_;
+    let _e16 = val;
+    let _e20 = val;
+    val = (_e20 + i64((_e15 + u32(_e16))));
+    let _e24 = input_uniform.val_i32_;
+    let _e25 = val;
+    let _e29 = val;
+    val = (_e29 + i64((_e24 + i32(_e25))));
+    let _e33 = input_uniform.val_f32_;
+    let _e34 = val;
+    let _e38 = val;
+    val = (_e38 + i64((_e33 + f32(_e34))));
+    let _e42 = input_uniform.val_i64_;
+    let _e45 = val;
+    val = (_e45 + vec3(_e42).z);
+    let _e49 = input_uniform.val_u64_;
+    let _e51 = val;
+    val = (_e51 + bitcast<i64>(_e49));
+    let _e55 = input_uniform.val_u64_2_;
+    let _e58 = val;
+    val = (_e58 + bitcast<vec2<i64>>(_e55).y);
+    let _e62 = input_uniform.val_u64_3_;
+    let _e65 = val;
+    val = (_e65 + bitcast<vec3<i64>>(_e62).z);
+    let _e69 = input_uniform.val_u64_4_;
+    let _e72 = val;
+    val = (_e72 + bitcast<vec4<i64>>(_e69).w);
+    let _e78 = input_uniform.val_i64_;
+    let _e81 = input_storage.val_i64_;
+    output.val_i64_ = (_e78 + _e81);
+    let _e87 = input_uniform.val_i64_2_;
+    let _e90 = input_storage.val_i64_2_;
+    output.val_i64_2_ = (_e87 + _e90);
+    let _e96 = input_uniform.val_i64_3_;
+    let _e99 = input_storage.val_i64_3_;
+    output.val_i64_3_ = (_e96 + _e99);
+    let _e105 = input_uniform.val_i64_4_;
+    let _e108 = input_storage.val_i64_4_;
+    output.val_i64_4_ = (_e105 + _e108);
+    let _e114 = input_arrays.val_i64_array_2_;
+    output_arrays.val_i64_array_2_ = _e114;
+    let _e115 = val;
+    let _e117 = val;
+    val = (_e117 + abs(_e115));
+    let _e119 = val;
+    let _e120 = val;
+    let _e121 = val;
+    let _e123 = val;
+    val = (_e123 + clamp(_e119, _e120, _e121));
+    let _e125 = val;
+    let _e127 = val;
+    let _e130 = val;
+    val = (_e130 + dot(vec2(_e125), vec2(_e127)));
+    let _e132 = val;
+    let _e133 = val;
+    let _e135 = val;
+    val = (_e135 + max(_e132, _e133));
+    let _e137 = val;
+    let _e138 = val;
+    let _e140 = val;
+    val = (_e140 + min(_e137, _e138));
+    let _e142 = val;
+    let _e144 = val;
+    val = (_e144 + sign(_e142));
+    let _e146 = val;
+    return _e146;
+}
+
+fn uint64_function(x_1: u64) -> u64 {
+    var val_1: u64 = 20lu;
+
+    let _e6 = val_1;
+    val_1 = (_e6 + (31lu + 1002003004005006lu));
+    let _e8 = val_1;
+    let _e11 = val_1;
+    val_1 = (_e11 + (_e8 + 5lu));
+    let _e15 = input_uniform.val_u32_;
+    let _e16 = val_1;
+    let _e20 = val_1;
+    val_1 = (_e20 + u64((_e15 + u32(_e16))));
+    let _e24 = input_uniform.val_i32_;
+    let _e25 = val_1;
+    let _e29 = val_1;
+    val_1 = (_e29 + u64((_e24 + i32(_e25))));
+    let _e33 = input_uniform.val_f32_;
+    let _e34 = val_1;
+    let _e38 = val_1;
+    val_1 = (_e38 + u64((_e33 + f32(_e34))));
+    let _e42 = input_uniform.val_u64_;
+    let _e45 = val_1;
+    val_1 = (_e45 + vec3(_e42).z);
+    let _e49 = input_uniform.val_i64_;
+    let _e51 = val_1;
+    val_1 = (_e51 + bitcast<u64>(_e49));
+    let _e55 = input_uniform.val_i64_2_;
+    let _e58 = val_1;
+    val_1 = (_e58 + bitcast<vec2<u64>>(_e55).y);
+    let _e62 = input_uniform.val_i64_3_;
+    let _e65 = val_1;
+    val_1 = (_e65 + bitcast<vec3<u64>>(_e62).z);
+    let _e69 = input_uniform.val_i64_4_;
+    let _e72 = val_1;
+    val_1 = (_e72 + bitcast<vec4<u64>>(_e69).w);
+    let _e78 = input_uniform.val_u64_;
+    let _e81 = input_storage.val_u64_;
+    output.val_u64_ = (_e78 + _e81);
+    let _e87 = input_uniform.val_u64_2_;
+    let _e90 = input_storage.val_u64_2_;
+    output.val_u64_2_ = (_e87 + _e90);
+    let _e96 = input_uniform.val_u64_3_;
+    let _e99 = input_storage.val_u64_3_;
+    output.val_u64_3_ = (_e96 + _e99);
+    let _e105 = input_uniform.val_u64_4_;
+    let _e108 = input_storage.val_u64_4_;
+    output.val_u64_4_ = (_e105 + _e108);
+    let _e114 = input_arrays.val_u64_array_2_;
+    output_arrays.val_u64_array_2_ = _e114;
+    let _e115 = val_1;
+    let _e117 = val_1;
+    val_1 = (_e117 + abs(_e115));
+    let _e119 = val_1;
+    let _e120 = val_1;
+    let _e121 = val_1;
+    let _e123 = val_1;
+    val_1 = (_e123 + clamp(_e119, _e120, _e121));
+    let _e125 = val_1;
+    let _e127 = val_1;
+    let _e130 = val_1;
+    val_1 = (_e130 + dot(vec2(_e125), vec2(_e127)));
+    let _e132 = val_1;
+    let _e133 = val_1;
+    let _e135 = val_1;
+    val_1 = (_e135 + max(_e132, _e133));
+    let _e137 = val_1;
+    let _e138 = val_1;
+    let _e140 = val_1;
+    val_1 = (_e140 + min(_e137, _e138));
+    let _e142 = val_1;
+    return _e142;
+}
+
+@compute @workgroup_size(1, 1, 1) 
+fn main() {
+    let _e3 = uint64_function(67lu);
+    let _e5 = int64_function(60li);
+    output.final_value = (_e3 + bitcast<u64>(_e5));
+    return;
+}
--- a/naga/tests/snapshots.rs
+++ b/naga/tests/snapshots.rs
@ -807,6 +807,10 @@ fn convert_wgsl() {
            "abstract-types-operators",
            Targets::SPIRV | Targets::METAL | Targets::GLSL | Targets::WGSL,
        ),
+        (
+            "int64",
+            Targets::SPIRV | Targets::HLSL | Targets::WGSL | Targets::METAL,
+        ),
    ];

    for &(name, targets) in inputs.iter() {
--- a/naga/tests/spirv_capabilities.rs
+++ b/naga/tests/spirv_capabilities.rs
@ -176,3 +176,35 @@ fn storage_image_formats() {
        "#,
    );
 }
+
+#[test]
+fn float64() {
+    require(
+        &[Ca::Float64],
+        r#"
+            fn f(x: f64) -> f64 {
+                return x;
+            }
+        "#,
+    );
+}
+
+#[test]
+fn int64() {
+    require(
+        &[Ca::Int64],
+        r#"
+            fn f(x: i64) -> i64 {
+                return x;
+            }
+        "#,
+    );
+    require(
+        &[Ca::Int64],
+        r#"
+            fn f(x: u64) -> u64 {
+                return x;
+            }
+        "#,
+    );
+}
--- a/naga/tests/wgsl_errors.rs
+++ b/naga/tests/wgsl_errors.rs
@ -870,7 +870,27 @@ fn matrix_constructor_inferred() {
 macro_rules! check_one_validation {
    ( $source:expr, $pattern:pat $( if $guard:expr )? ) => {
        let source = $source;
-        let error = validation_error($source);
+        let error = validation_error($source, naga::valid::Capabilities::default());
+        #[allow(clippy::redundant_pattern_matching)]
+        if ! matches!(&error, $pattern $( if $guard )? ) {
+            eprintln!("validation error does not match pattern:\n\
+                       source code: {}\n\
+                       \n\
+                       actual result:\n\
+                       {:#?}\n\
+                       \n\
+                       expected match for pattern:\n\
+                       {}",
+                      &source,
+                      error,
+                      stringify!($pattern));
+            $( eprintln!("if {}", stringify!($guard)); )?
+            panic!("validation error does not match pattern");
+        }
+    };
+    ( $source:expr, $pattern:pat $( if $guard:expr )?, $capabilities:expr ) => {
+        let source = $source;
+        let error = validation_error($source, $capabilities);
        #[allow(clippy::redundant_pattern_matching)]
        if ! matches!(&error, $pattern $( if $guard )? ) {
            eprintln!("validation error does not match pattern:\n\
@ -901,14 +921,27 @@ macro_rules! check_validation {
            check_one_validation!($source, $pattern);
        )*
    };
+    ( $( $source:literal ),* : $pattern:pat, $capabilities:expr ) => {
+        $(
+            check_one_validation!($source, $pattern, $capabilities);
+        )*
+    };
    ( $( $source:literal ),* : $pattern:pat if $guard:expr ) => {
        $(
            check_one_validation!($source, $pattern if $guard);
        )*
+    };
+    ( $( $source:literal ),* : $pattern:pat if $guard:expr, $capabilities:expr ) => {
+        $(
+            check_one_validation!($source, $pattern if $guard, $capabilities);
+        )*
    }
 }

-fn validation_error(source: &str) -> Result<naga::valid::ModuleInfo, naga::valid::ValidationError> {
+fn validation_error(
+    source: &str,
+    caps: naga::valid::Capabilities,
+) -> Result<naga::valid::ModuleInfo, naga::valid::ValidationError> {
    let module = match naga::front::wgsl::parse_str(source) {
        Ok(module) => module,
        Err(err) => {
@ -916,14 +949,23 @@ fn validation_error(source: &str) -> Result<naga::valid::ModuleInfo, naga::valid
            panic!("{}", err.emit_to_string(source));
        }
    };
-    naga::valid::Validator::new(
-        naga::valid::ValidationFlags::all(),
-        naga::valid::Capabilities::default(),
-    )
+    naga::valid::Validator::new(naga::valid::ValidationFlags::all(), caps)
        .validate(&module)
        .map_err(|e| e.into_inner()) // TODO: Add tests for spans, too?
 }

+#[test]
+fn int64_capability() {
+    check_validation! {
+        "var input: u64;",
+        "var input: i64;":
+        Err(naga::valid::ValidationError::Type {
+            source: naga::valid::TypeError::WidthError(naga::valid::WidthError::MissingCapability {flag: "SHADER_INT64",..}),
+            ..
+        })
+    }
+}
+
 #[test]
 fn invalid_arrays() {
    check_validation! {
@ -936,6 +978,16 @@ fn invalid_arrays() {
        })
    }

+    check_validation! {
+        "var<uniform> input: array<u64, 2>;",
+        "var<uniform> input: array<vec2<u32>, 2>;":
+        Err(naga::valid::ValidationError::GlobalVariable {
+            source: naga::valid::GlobalVariableError::Alignment(naga::AddressSpace::Uniform,_,_),
+            ..
+        }),
+        naga::valid::Capabilities::SHADER_INT64
+    }
+
    check_validation! {
        r#"
            fn main() -> f32 {
--- a/tests/tests/shader/numeric_builtins.rs
+++ b/tests/tests/shader/numeric_builtins.rs
@ -21,7 +21,7 @@ fn create_numeric_builtin_test() -> Vec<ShaderTest> {

    for &(input, low, high, output) in clamp_values {
        let mut test = ShaderTest::new(
-            format!("clamp({input}, 0.0, 10.0) == {output:?})"),
+            format!("clamp({input}, {low}, {high}) == {output:?}"),
            String::from("value: f32, low: f32, high: f32"),
            String::from("output[0] = bitcast<u32>(clamp(input.value, input.low, input.high));"),
            &[input, low, high],
@ -51,3 +51,112 @@ static NUMERIC_BUILTINS: GpuTestConfiguration = GpuTestConfiguration::new()
            create_numeric_builtin_test(),
        )
    });
+
+// See https://github.com/gfx-rs/wgpu/issues/5276
+/*
+fn create_int64_polyfill_test() -> Vec<ShaderTest> {
+    let mut tests = Vec::new();
+
+    let u64_clz_values: &[(u64, u32)] = &[
+        (u64::MAX, 0),
+        (1, 63),
+        (2, 62),
+        (3, 62),
+        (1 << 63, 0),
+        (1 << 62, 1),
+        (0, 64),
+    ];
+
+    for &(input, output) in u64_clz_values {
+        let test = ShaderTest::new(
+            format!("countLeadingZeros({input}lu) == {output:?}"),
+            String::from("value: u64"),
+            String::from("output[0] = u32(countLeadingZeros(input.value));"),
+            &[input],
+            &[output],
+        );
+
+        tests.push(test);
+    }
+
+    let i64_clz_values: &[(i64, u32)] = &[
+        (i64::MAX, 1),
+        (i64::MIN, 0),
+        (1, 63),
+        (1 << 62, 1),
+        (-1 << 62, 0),
+        (0, 64),
+        (-1, 0),
+    ];
+
+    for &(input, output) in i64_clz_values {
+        let test = ShaderTest::new(
+            format!("countLeadingZeros({input}li) == {output:?}"),
+            String::from("value: i64"),
+            String::from("output[0] = u32(countLeadingZeros(input.value));"),
+            &[input],
+            &[output],
+        );
+
+        tests.push(test);
+    }
+
+    let u64_flb_values: &[(u64, u32)] = &[
+        (u64::MAX, 63),
+        (1, 0),
+        (2, 1),
+        (3, 1),
+        (1 << 63, 63),
+        (1 << 62, 62),
+        (0, u32::MAX),
+    ];
+
+    for &(input, output) in u64_flb_values {
+        let test = ShaderTest::new(
+            format!("firstLeadingBit({input}lu) == {output:?}"),
+            String::from("value: u64"),
+            String::from("output[0] = u32(firstLeadingBit(input.value));"),
+            &[input],
+            &[output],
+        );
+
+        tests.push(test);
+    }
+
+    let i64_flb_values: &[(i64, u32)] = &[
+        (i64::MAX, 62),
+        (i64::MIN, 62),
+        (1, 0),
+        (1 << 62, 62),
+        (-1 << 62, 61),
+        (0, u32::MAX),
+        (-1, u32::MAX),
+    ];
+
+    for &(input, output) in i64_flb_values {
+        let test = ShaderTest::new(
+            format!("firstLeadingBit({input}li) == {output:?}"),
+            String::from("value: i64"),
+            String::from("output[0] = u32(firstLeadingBit(input.value));"),
+            &[input],
+            &[output],
+        );
+
+        tests.push(test);
+    }
+
+    tests
+}
+
+#[gpu_test]
+static INT64_POLYFILL: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(ctx, InputStorageType::Storage, create_int64_polyfill_test())
+    });
+*/
--- a/tests/tests/shader/struct_layout.rs
+++ b/tests/tests/shader/struct_layout.rs
@ -253,6 +253,108 @@ fn create_struct_layout_tests(storage_type: InputStorageType) -> Vec<ShaderTest>
    tests
 }

+fn create_64bit_struct_layout_tests() -> Vec<ShaderTest> {
+    let input_values: Vec<_> = (0..(MAX_BUFFER_SIZE as u32 / 4)).collect();
+
+    let mut tests = Vec::new();
+
+    // 64 bit alignment tests
+    for ty in ["u64", "i64"] {
+        let members = format!("scalar: {ty},");
+        let direct = String::from(
+            "\
+            output[0] = u32(bitcast<u64>(input.scalar) & 0xFFFFFFFF);
+            output[1] = u32((bitcast<u64>(input.scalar) >> 32) & 0xFFFFFFFF);
+        ",
+        );
+
+        tests.push(ShaderTest::new(
+            format!("{ty} alignment"),
+            members,
+            direct,
+            &input_values,
+            &[0, 1],
+        ));
+    }
+
+    // Nested struct and array test.
+    //
+    // This tries to exploit all the weird edge cases of the struct layout algorithm.
+    // We dont go as all-out as the other nested struct test because
+    // all our primitives are twice as wide and we have only so much buffer to spare.
+    {
+        let header = String::from(
+            "struct Inner { scalar: u64, scalar32: u32, member: array<vec3<u64>, 2> }",
+        );
+        let members = String::from("inner: Inner");
+        let direct = String::from(
+            "\
+            output[0] = u32(bitcast<u64>(input.inner.scalar) & 0xFFFFFFFF);
+            output[1] = u32((bitcast<u64>(input.inner.scalar) >> 32) & 0xFFFFFFFF);
+            output[2] = bitcast<u32>(input.inner.scalar32);
+            for (var index = 0u; index < 2u; index += 1u) {
+                for (var component = 0u; component < 3u; component += 1u) {
+                    output[3 + index * 6 + component * 2] = u32(bitcast<u64>(input.inner.member[index][component]) & 0xFFFFFFFF);
+                    output[4 + index * 6 + component * 2] = u32((bitcast<u64>(input.inner.member[index][component]) >> 32) & 0xFFFFFFFF);
+                }
+            }
+        ",
+        );
+
+        tests.push(
+            ShaderTest::new(
+                String::from("nested struct and array"),
+                members,
+                direct,
+                &input_values,
+                &[
+                    0, 1, // inner.scalar
+                    2, // inner.scalar32
+                    8, 9, 10, 11, 12, 13, // inner.member[0]
+                    16, 17, 18, 19, 20, 21, // inner.member[1]
+                ],
+            )
+            .header(header),
+        );
+    }
+    {
+        let header = String::from("struct Inner { scalar32: u32, scalar: u64, scalar32_2: u32 }");
+        let members = String::from("inner: Inner, vector: vec3<i64>");
+        let direct = String::from(
+            "\
+            output[0] = bitcast<u32>(input.inner.scalar32);
+            output[1] = u32(bitcast<u64>(input.inner.scalar) & 0xFFFFFFFF);
+            output[2] = u32((bitcast<u64>(input.inner.scalar) >> 32) & 0xFFFFFFFF);
+            output[3] = bitcast<u32>(input.inner.scalar32_2);
+            output[4] = u32(bitcast<u64>(input.vector.x) & 0xFFFFFFFF);
+            output[5] = u32((bitcast<u64>(input.vector.x) >> 32) & 0xFFFFFFFF);
+            output[6] = u32(bitcast<u64>(input.vector.y) & 0xFFFFFFFF);
+            output[7] = u32((bitcast<u64>(input.vector.y) >> 32) & 0xFFFFFFFF);
+            output[8] = u32(bitcast<u64>(input.vector.z) & 0xFFFFFFFF);
+            output[9] = u32((bitcast<u64>(input.vector.z) >> 32) & 0xFFFFFFFF);
+        ",
+        );
+
+        tests.push(
+            ShaderTest::new(
+                String::from("nested struct and array"),
+                members,
+                direct,
+                &input_values,
+                &[
+                    0, // inner.scalar32
+                    2, 3, // inner.scalar
+                    4, // inner.scalar32_2
+                    8, 9, 10, 11, 12, 13, // vector
+                ],
+            )
+            .header(header),
+        );
+    }
+
+    tests
+}
+
 #[gpu_test]
 static UNIFORM_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
    .parameters(
@ -306,3 +408,54 @@ static PUSH_CONSTANT_INPUT: GpuTestConfiguration = GpuTestConfiguration::new()
            create_struct_layout_tests(InputStorageType::PushConstant),
        )
    });
+
+#[gpu_test]
+static UNIFORM_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Storage,
+            create_64bit_struct_layout_tests(),
+        )
+    });
+
+#[gpu_test]
+static STORAGE_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits::downlevel_defaults()),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::Storage,
+            create_64bit_struct_layout_tests(),
+        )
+    });
+
+#[gpu_test]
+static PUSH_CONSTANT_INPUT_INT64: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .features(Features::SHADER_INT64 | Features::PUSH_CONSTANTS)
+            .downlevel_flags(DownlevelFlags::COMPUTE_SHADERS)
+            .limits(Limits {
+                max_push_constant_size: MAX_BUFFER_SIZE as u32,
+                ..Limits::downlevel_defaults()
+            }),
+    )
+    .run_async(|ctx| {
+        shader_input_output_test(
+            ctx,
+            InputStorageType::PushConstant,
+            create_64bit_struct_layout_tests(),
+        )
+    });
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@ -1511,6 +1511,10 @@ impl<A: HalApi> Device<A> {
            self.features
                .contains(wgt::Features::SHADER_EARLY_DEPTH_TEST),
        );
+        caps.set(
+            Caps::SHADER_INT64,
+            self.features.contains(wgt::Features::SHADER_INT64),
+        );
        caps.set(
            Caps::MULTISAMPLED_SHADING,
            self.downlevel
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@ -295,6 +295,22 @@ impl super::Adapter {
            bgra8unorm_storage_supported,
        );

+        // we must be using DXC because uint64_t was added with Shader Model 6
+        // and FXC only supports up to 5.1
+        let int64_shader_ops_supported = dxc_container.is_some() && {
+            let mut features1: d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1 =
+                unsafe { mem::zeroed() };
+            let hr = unsafe {
+                device.CheckFeatureSupport(
+                    d3d12_ty::D3D12_FEATURE_D3D12_OPTIONS1,
+                    &mut features1 as *mut _ as *mut _,
+                    mem::size_of::<d3d12_ty::D3D12_FEATURE_DATA_D3D12_OPTIONS1>() as _,
+                )
+            };
+            hr == 0 && features1.Int64ShaderOps != 0
+        };
+        features.set(wgt::Features::SHADER_INT64, int64_shader_ops_supported);
+
        // float32-filterable should always be available on d3d12
        features.set(wgt::Features::FLOAT32_FILTERABLE, true);

--- a/wgpu-hal/src/dx12/shader_compilation.rs
+++ b/wgpu-hal/src/dx12/shader_compilation.rs
@ -211,7 +211,7 @@ mod dxc {
                Err(crate::PipelineError::Linkage(
                    stage_bit,
                    format!(
-                        "DXC compile error: {:?}",
+                        "DXC compile error: {}",
                        get_error_string_from_dxc_result(&dxc_container.library, &e.0)
                            .unwrap_or_default()
                    ),
--- a/wgpu-hal/src/metal/adapter.rs
+++ b/wgpu-hal/src/metal/adapter.rs
@ -878,6 +878,10 @@ impl super::PrivateCapabilities {
        {
            features.insert(F::STORAGE_RESOURCE_BINDING_ARRAY);
        }
+        features.set(
+            F::SHADER_INT64,
+            self.msl_version >= MTLLanguageVersion::V2_3,
+        );

        features.set(
            F::ADDRESS_MODE_CLAMP_TO_BORDER,
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@ -189,7 +189,7 @@ impl PhysicalDeviceFeatures {
                //.shader_clip_distance(requested_features.contains(wgt::Features::SHADER_CLIP_DISTANCE))
                //.shader_cull_distance(requested_features.contains(wgt::Features::SHADER_CULL_DISTANCE))
                .shader_float64(requested_features.contains(wgt::Features::SHADER_F64))
-                //.shader_int64(requested_features.contains(wgt::Features::SHADER_INT64))
+                .shader_int64(requested_features.contains(wgt::Features::SHADER_INT64))
                .shader_int16(requested_features.contains(wgt::Features::SHADER_I16))
                //.shader_resource_residency(requested_features.contains(wgt::Features::SHADER_RESOURCE_RESIDENCY))
                .geometry_shader(requested_features.contains(wgt::Features::SHADER_PRIMITIVE_INDEX))
@ -469,7 +469,7 @@ impl PhysicalDeviceFeatures {
        //if self.core.shader_clip_distance != 0 {
        //if self.core.shader_cull_distance != 0 {
        features.set(F::SHADER_F64, self.core.shader_float64 != 0);
-        //if self.core.shader_int64 != 0 {
+        features.set(F::SHADER_INT64, self.core.shader_int64 != 0);
        features.set(F::SHADER_I16, self.core.shader_int16 != 0);

        //if caps.supports_extension(vk::KhrSamplerMirrorClampToEdgeFn::name()) {
@ -1454,6 +1454,10 @@ impl super::Adapter {
                capabilities.push(spv::Capability::RayQueryKHR);
            }

+            if features.contains(wgt::Features::SHADER_INT64) {
+                capabilities.push(spv::Capability::Int64);
+            }
+
            let mut flags = spv::WriterFlags::empty();
            flags.set(
                spv::WriterFlags::DEBUG,
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@ -371,7 +371,7 @@ bitflags::bitflags! {

        /// Allows shaders to acquire the FP16 ability
        ///
-        /// Note: this is not supported in `naga` yet，only through `spirv-passthrough` right now.
+        /// Note: this is not supported in `naga` yet, only through `spirv-passthrough` right now.
        ///
        /// Supported Platforms:
        /// - Vulkan
@ -874,6 +874,15 @@ bitflags::bitflags! {
        /// - Vulkan (with dualSrcBlend)
        /// - DX12
        const DUAL_SOURCE_BLENDING = 1 << 54;
+        /// Allows shaders to use i64 and u64.
+        ///
+        /// Supported platforms:
+        /// - Vulkan
+        /// - DX12 (DXC only)
+        /// - Metal (with MSL 2.3+)
+        ///
+        /// This is a native only feature.
+        const SHADER_INT64 = 1 << 55;
    }
 }