From 5a583b1fb7367346f6847678eafc37b45686e801 Mon Sep 17 00:00:00 2001
From: Daniel McNab <36049421+DJMcNab@users.noreply.github.com>
Date: Thu, 27 Mar 2025 15:55:14 +0000
Subject: [PATCH] Polyfill unpackUnorm4x8 and friends on unsupported GLSL
 versions (#7408)

---
 CHANGELOG.md                                  |   1 +
 naga/src/back/glsl/mod.rs                     | 167 ++++++++++++++++--
 naga/tests/in/wgsl/bits.wgsl                  |   2 +
 naga/tests/in/wgsl/bits_downlevel.toml        |   4 +
 naga/tests/in/wgsl/bits_downlevel.wgsl        |  76 ++++++++
 naga/tests/in/wgsl/bits_downlevel_webgl.toml  |   4 +
 naga/tests/in/wgsl/bits_downlevel_webgl.wgsl  |  75 ++++++++
 .../glsl/bits_downlevel.main.Fragment.glsl    |  28 +++
 .../bits_downlevel_webgl.main.Fragment.glsl   |  32 ++++
 9 files changed, 379 insertions(+), 10 deletions(-)
 create mode 100644 naga/tests/in/wgsl/bits_downlevel.toml
 create mode 100644 naga/tests/in/wgsl/bits_downlevel.wgsl
 create mode 100644 naga/tests/in/wgsl/bits_downlevel_webgl.toml
 create mode 100644 naga/tests/in/wgsl/bits_downlevel_webgl.wgsl
 create mode 100644 naga/tests/out/glsl/bits_downlevel.main.Fragment.glsl
 create mode 100644 naga/tests/out/glsl/bits_downlevel_webgl.main.Fragment.glsl

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ae7324cc3..d081addaf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -193,6 +193,7 @@ By @wumpf in [#7144](https://github.com/gfx-rs/wgpu/pull/7144)
 - Support for generating the candidate intersections from AABB geometry, and confirming the hits. By @kvark in [#7047](https://github.com/gfx-rs/wgpu/pull/7047).
 - Make naga::back::spv::Function::to_words write the OpFunctionEnd instruction in itself, instead of making another call after it. By @junjunjd in [#7156](https://github.com/gfx-rs/wgpu/pull/7156).
 - Add support for texture memory barriers. By @Devon7925 in [#7173](https://github.com/gfx-rs/wgpu/pull/7173).
+- Add polyfills for `unpackSnorm4x8`, `unpackUnorm4x8`, `unpackSnorm2x16`, `unpackUnorm2x16` for GLSL versions they aren't supported in. By @DJMcNab in [#7408](https://github.com/gfx-rs/wgpu/pull/7408).
 
 ### Changes
 
diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs
index 403cefd56..256fceb5e 100644
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@@ -230,6 +230,30 @@ impl Version {
     fn supports_derivative_control(&self) -> bool {
         *self >= Version::Desktop(450)
     }
+
+    // For supports_pack_unpack_4x8, supports_pack_unpack_snorm_2x16, supports_pack_unpack_unorm_2x16
+    // see:
+    // https://registry.khronos.org/OpenGL-Refpages/gl4/html/unpackUnorm.xhtml
+    // https://registry.khronos.org/OpenGL-Refpages/es3/html/unpackUnorm.xhtml
+    // https://registry.khronos.org/OpenGL-Refpages/gl4/html/packUnorm.xhtml
+    // https://registry.khronos.org/OpenGL-Refpages/es3/html/packUnorm.xhtml
+    fn supports_pack_unpack_4x8(&self) -> bool {
+        *self >= Version::Desktop(400) || *self >= Version::new_gles(310)
+    }
+    fn supports_pack_unpack_snorm_2x16(&self) -> bool {
+        *self >= Version::Desktop(420) || *self >= Version::new_gles(300)
+    }
+    fn supports_pack_unpack_unorm_2x16(&self) -> bool {
+        *self >= Version::Desktop(400) || *self >= Version::new_gles(300)
+    }
+
+    // https://registry.khronos.org/OpenGL-Refpages/gl4/html/unpackHalf2x16.xhtml
+    // https://registry.khronos.org/OpenGL-Refpages/gl4/html/packHalf2x16.xhtml
+    // https://registry.khronos.org/OpenGL-Refpages/es3/html/unpackHalf2x16.xhtml
+    // https://registry.khronos.org/OpenGL-Refpages/es3/html/packHalf2x16.xhtml
+    fn supports_pack_unpack_half_2x16(&self) -> bool {
+        *self >= Version::Desktop(420) || *self >= Version::new_gles(300)
+    }
 }
 
 impl PartialOrd for Version {
@@ -1369,6 +1393,31 @@ impl<'a, W: Write> Writer<'a, W> {
                     | crate::MathFunction::QuantizeToF16 => {
                         self.need_bake_expressions.insert(arg);
                     }
+                    /* crate::MathFunction::Pack4x8unorm | */
+                    crate::MathFunction::Unpack4x8snorm
+                        if !self.options.version.supports_pack_unpack_4x8() =>
+                    {
+                        // We have a fallback if the platform doesn't natively support these
+                        self.need_bake_expressions.insert(arg);
+                    }
+                    /* crate::MathFunction::Pack4x8unorm | */
+                    crate::MathFunction::Unpack4x8unorm
+                        if !self.options.version.supports_pack_unpack_4x8() =>
+                    {
+                        self.need_bake_expressions.insert(arg);
+                    }
+                    /* crate::MathFunction::Pack2x16snorm |  */
+                    crate::MathFunction::Unpack2x16snorm
+                        if !self.options.version.supports_pack_unpack_snorm_2x16() =>
+                    {
+                        self.need_bake_expressions.insert(arg);
+                    }
+                    /* crate::MathFunction::Pack2x16unorm | */
+                    crate::MathFunction::Unpack2x16unorm
+                        if !self.options.version.supports_pack_unpack_unorm_2x16() =>
+                    {
+                        self.need_bake_expressions.insert(arg);
+                    }
                     crate::MathFunction::ExtractBits => {
                         // Only argument 1 is re-used.
                         self.need_bake_expressions.insert(arg1.unwrap());
@@ -3756,11 +3805,43 @@ impl<'a, W: Write> Writer<'a, W> {
                     Mf::FirstTrailingBit => "findLSB",
                     Mf::FirstLeadingBit => "findMSB",
                     // data packing
-                    Mf::Pack4x8snorm => "packSnorm4x8",
-                    Mf::Pack4x8unorm => "packUnorm4x8",
-                    Mf::Pack2x16snorm => "packSnorm2x16",
-                    Mf::Pack2x16unorm => "packUnorm2x16",
-                    Mf::Pack2x16float => "packHalf2x16",
+                    Mf::Pack4x8snorm => {
+                        if self.options.version.supports_pack_unpack_4x8() {
+                            "packSnorm4x8"
+                        } else {
+                            // polyfill should go here. Needs a corresponding entry in `need_bake_expression`
+                            return Err(Error::UnsupportedExternal("packSnorm4x8".into()));
+                        }
+                    }
+                    Mf::Pack4x8unorm => {
+                        if self.options.version.supports_pack_unpack_4x8() {
+                            "packUnorm4x8"
+                        } else {
+                            return Err(Error::UnsupportedExternal("packUnorm4x8".to_owned()));
+                        }
+                    }
+                    Mf::Pack2x16snorm => {
+                        if self.options.version.supports_pack_unpack_snorm_2x16() {
+                            "packSnorm2x16"
+                        } else {
+                            return Err(Error::UnsupportedExternal("packSnorm2x16".to_owned()));
+                        }
+                    }
+                    Mf::Pack2x16unorm => {
+                        if self.options.version.supports_pack_unpack_unorm_2x16() {
+                            "packUnorm2x16"
+                        } else {
+                            return Err(Error::UnsupportedExternal("packUnorm2x16".to_owned()));
+                        }
+                    }
+                    Mf::Pack2x16float => {
+                        if self.options.version.supports_pack_unpack_half_2x16() {
+                            "packHalf2x16"
+                        } else {
+                            return Err(Error::UnsupportedExternal("packHalf2x16".to_owned()));
+                        }
+                    }
+
                     fun @ (Mf::Pack4xI8 | Mf::Pack4xU8) => {
                         let was_signed = match fun {
                             Mf::Pack4xI8 => true,
@@ -3787,11 +3868,77 @@ impl<'a, W: Write> Writer<'a, W> {
                         return Ok(());
                     }
                     // data unpacking
-                    Mf::Unpack4x8snorm => "unpackSnorm4x8",
-                    Mf::Unpack4x8unorm => "unpackUnorm4x8",
-                    Mf::Unpack2x16snorm => "unpackSnorm2x16",
-                    Mf::Unpack2x16unorm => "unpackUnorm2x16",
-                    Mf::Unpack2x16float => "unpackHalf2x16",
+                    Mf::Unpack2x16float => {
+                        if self.options.version.supports_pack_unpack_half_2x16() {
+                            "unpackHalf2x16"
+                        } else {
+                            return Err(Error::UnsupportedExternal("unpackHalf2x16".into()));
+                        }
+                    }
+                    Mf::Unpack2x16snorm => {
+                        if self.options.version.supports_pack_unpack_snorm_2x16() {
+                            "unpackSnorm2x16"
+                        } else {
+                            let scale = 32767;
+
+                            write!(self.out, "(vec2(ivec2(")?;
+                            self.write_expr(arg, ctx)?;
+                            write!(self.out, " << 16, ")?;
+                            self.write_expr(arg, ctx)?;
+                            write!(self.out, ") >> 16) / {scale}.0)")?;
+                            return Ok(());
+                        }
+                    }
+                    Mf::Unpack2x16unorm => {
+                        if self.options.version.supports_pack_unpack_unorm_2x16() {
+                            "unpackUnorm2x16"
+                        } else {
+                            let scale = 65535;
+
+                            write!(self.out, "(vec2(")?;
+                            self.write_expr(arg, ctx)?;
+                            write!(self.out, " & 0xFFFFu, ")?;
+                            self.write_expr(arg, ctx)?;
+                            write!(self.out, " >> 16) / {scale}.0)")?;
+                            return Ok(());
+                        }
+                    }
+                    Mf::Unpack4x8snorm => {
+                        if self.options.version.supports_pack_unpack_4x8() {
+                            "unpackSnorm4x8"
+                        } else {
+                            let scale = 127;
+
+                            write!(self.out, "(vec4(ivec4(")?;
+                            self.write_expr(arg, ctx)?;
+                            write!(self.out, " << 24, ")?;
+                            self.write_expr(arg, ctx)?;
+                            write!(self.out, " << 16, ")?;
+                            self.write_expr(arg, ctx)?;
+                            write!(self.out, " << 8, ")?;
+                            self.write_expr(arg, ctx)?;
+                            write!(self.out, ") >> 24) / {scale}.0)")?;
+                            return Ok(());
+                        }
+                    }
+                    Mf::Unpack4x8unorm => {
+                        if self.options.version.supports_pack_unpack_4x8() {
+                            "unpackUnorm4x8"
+                        } else {
+                            let scale = 255;
+
+                            write!(self.out, "(vec4(")?;
+                            self.write_expr(arg, ctx)?;
+                            write!(self.out, " & 0xFFu, ")?;
+                            self.write_expr(arg, ctx)?;
+                            write!(self.out, " >> 8 & 0xFFu, ")?;
+                            self.write_expr(arg, ctx)?;
+                            write!(self.out, " >> 16 & 0xFFu, ")?;
+                            self.write_expr(arg, ctx)?;
+                            write!(self.out, " >> 24) / {scale}.0)")?;
+                            return Ok(());
+                        }
+                    }
                     fun @ (Mf::Unpack4xI8 | Mf::Unpack4xU8) => {
                         let sign_prefix = match fun {
                             Mf::Unpack4xI8 => 'i',
diff --git a/naga/tests/in/wgsl/bits.wgsl b/naga/tests/in/wgsl/bits.wgsl
index 077572faa..b5cfbaf1c 100644
--- a/naga/tests/in/wgsl/bits.wgsl
+++ b/naga/tests/in/wgsl/bits.wgsl
@@ -1,3 +1,5 @@
+// Keep in sync with `bits_downlevel` and `bits_downlevel_webgl`
+
 @compute @workgroup_size(1)
 fn main() {
     var i = 0;
diff --git a/naga/tests/in/wgsl/bits_downlevel.toml b/naga/tests/in/wgsl/bits_downlevel.toml
new file mode 100644
index 000000000..f4ae479ec
--- /dev/null
+++ b/naga/tests/in/wgsl/bits_downlevel.toml
@@ -0,0 +1,4 @@
+targets = "GLSL"
+
+[glsl]
+version.Desktop = 330
diff --git a/naga/tests/in/wgsl/bits_downlevel.wgsl b/naga/tests/in/wgsl/bits_downlevel.wgsl
new file mode 100644
index 000000000..038218776
--- /dev/null
+++ b/naga/tests/in/wgsl/bits_downlevel.wgsl
@@ -0,0 +1,76 @@
+// Keep in sync with bits.wgsl
+
+@fragment
+fn main() {
+    var i = 0;
+    var i2 = vec2<i32>(0);
+    var i3 = vec3<i32>(0);
+    var i4 = vec4<i32>(0);
+    var u = 0u;
+    var u2 = vec2<u32>(0u);
+    var u3 = vec3<u32>(0u);
+    var u4 = vec4<u32>(0u);
+    var f2 = vec2<f32>(0.0);
+    var f4 = vec4<f32>(0.0);
+    // No polyfill for these yet
+    // u = pack4x8snorm(f4);
+    // u = pack4x8unorm(f4);
+    // u = pack2x16snorm(f2);
+    // u = pack2x16unorm(f2);
+    // u = pack2x16float(f2);
+    u = pack4xI8(i4);
+    u = pack4xU8(u4);
+    f4 = unpack4x8snorm(u);
+    f4 = unpack4x8unorm(u);
+    f2 = unpack2x16snorm(u);
+    f2 = unpack2x16unorm(u);
+    // No polyfill for this yet
+    // f2 = unpack2x16float(u);
+    // Polyfill for this is broken in downlevel
+    // i4 = unpack4xI8(u);
+    // u4 = unpack4xU8(u);
+    // Implementation is broken on downlevel
+    // i = insertBits(i, i, 5u, 10u);
+    // i2 = insertBits(i2, i2, 5u, 10u);
+    // i3 = insertBits(i3, i3, 5u, 10u);
+    // i4 = insertBits(i4, i4, 5u, 10u);
+    // u = insertBits(u, u, 5u, 10u);
+    // u2 = insertBits(u2, u2, 5u, 10u);
+    // u3 = insertBits(u3, u3, 5u, 10u);
+    // u4 = insertBits(u4, u4, 5u, 10u);
+    // Implementation is broken on downlevel
+    // i = extractBits(i, 5u, 10u);
+    // i2 = extractBits(i2, 5u, 10u);
+    // i3 = extractBits(i3, 5u, 10u);
+    // i4 = extractBits(i4, 5u, 10u);
+    // u = extractBits(u, 5u, 10u);
+    // u2 = extractBits(u2, 5u, 10u);
+    // u3 = extractBits(u3, 5u, 10u);
+    // u4 = extractBits(u4, 5u, 10u);
+    // Implementation is broken on downlevel
+    // i = firstTrailingBit(i);
+    // u2 = firstTrailingBit(u2);
+    // i3 = firstLeadingBit(i3);
+    // u3 = firstLeadingBit(u3);
+    // Implementation is broken on downlevel
+    // i = firstLeadingBit(i);
+    // u = firstLeadingBit(u);
+    // Implementation is broken on downlevel
+    // i = countOneBits(i);
+    // i2 = countOneBits(i2);
+    // i3 = countOneBits(i3);
+    // i4 = countOneBits(i4);
+    // u = countOneBits(u);
+    // u2 = countOneBits(u2);
+    // u3 = countOneBits(u3);
+    // u4 = countOneBits(u4);
+    // Implementation is broken on downlevel
+    // i = reverseBits(i);
+    // i2 = reverseBits(i2);
+    // i3 = reverseBits(i3);
+    // i4 = reverseBits(i4);
+    // u = reverseBits(u);
+    // u2 = reverseBits(u2);
+    // u3 = reverseBits(u3);
+    // u4 = reverseBits(u4);
+}
diff --git a/naga/tests/in/wgsl/bits_downlevel_webgl.toml b/naga/tests/in/wgsl/bits_downlevel_webgl.toml
new file mode 100644
index 000000000..cbf2ceca0
--- /dev/null
+++ b/naga/tests/in/wgsl/bits_downlevel_webgl.toml
@@ -0,0 +1,4 @@
+targets = "GLSL"
+
+[glsl]
+version.Embedded = { is_webgl = true, version = 300 }
diff --git a/naga/tests/in/wgsl/bits_downlevel_webgl.wgsl b/naga/tests/in/wgsl/bits_downlevel_webgl.wgsl
new file mode 100644
index 000000000..88eea72e8
--- /dev/null
+++ b/naga/tests/in/wgsl/bits_downlevel_webgl.wgsl
@@ -0,0 +1,75 @@
+// Keep in sync with bits.wgsl
+
+@fragment
+fn main() {
+    var i = 0;
+    var i2 = vec2<i32>(0);
+    var i3 = vec3<i32>(0);
+    var i4 = vec4<i32>(0);
+    var u = 0u;
+    var u2 = vec2<u32>(0u);
+    var u3 = vec3<u32>(0u);
+    var u4 = vec4<u32>(0u);
+    var f2 = vec2<f32>(0.0);
+    var f4 = vec4<f32>(0.0);
+    // No polyfill for these yet
+    // u = pack4x8snorm(f4);
+    // u = pack4x8unorm(f4);
+    // u = pack2x16snorm(f2);
+    // u = pack2x16unorm(f2);
+    // u = pack2x16float(f2);
+    u = pack4xI8(i4);
+    u = pack4xU8(u4);
+    f4 = unpack4x8snorm(u);
+    f4 = unpack4x8unorm(u);
+    f2 = unpack2x16snorm(u);
+    f2 = unpack2x16unorm(u);
+    // No polyfill for this yet
+    // f2 = unpack2x16float(u);
+    // Polyfill for this is broken in downlevel
+    // i4 = unpack4xI8(u);
+    // u4 = unpack4xU8(u);
+    // Implementation is broken on downlevel
+    // i = insertBits(i, i, 5u, 10u);
+    // i2 = insertBits(i2, i2, 5u, 10u);
+    // i3 = insertBits(i3, i3, 5u, 10u);
+    // i4 = insertBits(i4, i4, 5u, 10u);
+    // u = insertBits(u, u, 5u, 10u);
+    // u2 = insertBits(u2, u2, 5u, 10u);
+    // u3 = insertBits(u3, u3, 5u, 10u);
+    // u4 = insertBits(u4, u4, 5u, 10u);
+    // Implementation is broken on downlevel
+    // i = extractBits(i, 5u, 10u);
+    // i2 = extractBits(i2, 5u, 10u);
+    // i3 = extractBits(i3, 5u, 10u);
+    // i4 = extractBits(i4, 5u, 10u);
+    // u = extractBits(u, 5u, 10u);
+    // u2 = extractBits(u2, 5u, 10u);
+    // u3 = extractBits(u3, 5u, 10u);
+    // u4 = extractBits(u4, 5u, 10u);
+    // Implementation is broken on downlevel
+    // i = firstTrailingBit(i);
+    // u2 = firstTrailingBit(u2);
+    // Implementation is broken on downlevel
+    // i3 = firstLeadingBit(i3);
+    // u3 = firstLeadingBit(u3);
+    // i = firstLeadingBit(i);
+    // u = firstLeadingBit(u);
+    // Implementation is broken on downlevel
+    // i = countOneBits(i);
+    // i2 = countOneBits(i2);
+    // i3 = countOneBits(i3);
+    // i4 = countOneBits(i4);
+    // u = countOneBits(u);
+    // u2 = countOneBits(u2);
+    // u3 = countOneBits(u3);
+    // u4 = countOneBits(u4);
+    // i = reverseBits(i);
+    // i2 = reverseBits(i2);
+    // i3 = reverseBits(i3);
+    // i4 = reverseBits(i4);
+    // u = reverseBits(u);
+    // u2 = reverseBits(u2);
+    // u3 = reverseBits(u3);
+    // u4 = reverseBits(u4);
+}
diff --git a/naga/tests/out/glsl/bits_downlevel.main.Fragment.glsl b/naga/tests/out/glsl/bits_downlevel.main.Fragment.glsl
new file mode 100644
index 000000000..64de8d95b
--- /dev/null
+++ b/naga/tests/out/glsl/bits_downlevel.main.Fragment.glsl
@@ -0,0 +1,28 @@
+#version 330 core
+
+void main() {
+    int i = 0;
+    ivec2 i2_ = ivec2(0);
+    ivec3 i3_ = ivec3(0);
+    ivec4 i4_ = ivec4(0);
+    uint u = 0u;
+    uvec2 u2_ = uvec2(0u);
+    uvec3 u3_ = uvec3(0u);
+    uvec4 u4_ = uvec4(0u);
+    vec2 f2_ = vec2(0.0);
+    vec4 f4_ = vec4(0.0);
+    ivec4 _e23 = i4_;
+    u = uint((_e23[0] & 0xFF) | ((_e23[1] & 0xFF) << 8) | ((_e23[2] & 0xFF) << 16) | ((_e23[3] & 0xFF) << 24));
+    uvec4 _e25 = u4_;
+    u = (_e25[0] & 0xFFu) | ((_e25[1] & 0xFFu) << 8) | ((_e25[2] & 0xFFu) << 16) | ((_e25[3] & 0xFFu) << 24);
+    uint _e27 = u;
+    f4_ = (vec4(ivec4(_e27 << 24, _e27 << 16, _e27 << 8, _e27) >> 24) / 127.0);
+    uint _e29 = u;
+    f4_ = (vec4(_e29 & 0xFFu, _e29 >> 8 & 0xFFu, _e29 >> 16 & 0xFFu, _e29 >> 24) / 255.0);
+    uint _e31 = u;
+    f2_ = (vec2(ivec2(_e31 << 16, _e31) >> 16) / 32767.0);
+    uint _e33 = u;
+    f2_ = (vec2(_e33 & 0xFFFFu, _e33 >> 16) / 65535.0);
+    return;
+}
+
diff --git a/naga/tests/out/glsl/bits_downlevel_webgl.main.Fragment.glsl b/naga/tests/out/glsl/bits_downlevel_webgl.main.Fragment.glsl
new file mode 100644
index 000000000..d5b60c525
--- /dev/null
+++ b/naga/tests/out/glsl/bits_downlevel_webgl.main.Fragment.glsl
@@ -0,0 +1,32 @@
+#version 300 es
+
+precision highp float;
+precision highp int;
+
+
+void main() {
+    int i = 0;
+    ivec2 i2_ = ivec2(0);
+    ivec3 i3_ = ivec3(0);
+    ivec4 i4_ = ivec4(0);
+    uint u = 0u;
+    uvec2 u2_ = uvec2(0u);
+    uvec3 u3_ = uvec3(0u);
+    uvec4 u4_ = uvec4(0u);
+    vec2 f2_ = vec2(0.0);
+    vec4 f4_ = vec4(0.0);
+    ivec4 _e23 = i4_;
+    u = uint((_e23[0] & 0xFF) | ((_e23[1] & 0xFF) << 8) | ((_e23[2] & 0xFF) << 16) | ((_e23[3] & 0xFF) << 24));
+    uvec4 _e25 = u4_;
+    u = (_e25[0] & 0xFFu) | ((_e25[1] & 0xFFu) << 8) | ((_e25[2] & 0xFFu) << 16) | ((_e25[3] & 0xFFu) << 24);
+    uint _e27 = u;
+    f4_ = (vec4(ivec4(_e27 << 24, _e27 << 16, _e27 << 8, _e27) >> 24) / 127.0);
+    uint _e29 = u;
+    f4_ = (vec4(_e29 & 0xFFu, _e29 >> 8 & 0xFFu, _e29 >> 16 & 0xFFu, _e29 >> 24) / 255.0);
+    uint _e31 = u;
+    f2_ = unpackSnorm2x16(_e31);
+    uint _e33 = u;
+    f2_ = unpackUnorm2x16(_e33);
+    return;
+}
+