From 1a3a12f505ba9433ef101ea1a86479c5c55fa348 Mon Sep 17 00:00:00 2001
From: Ashley Hauck <github@khyperia.com>
Date: Tue, 10 Aug 2021 13:54:44 +0200
Subject: [PATCH] Implement some float packing/unpacking instructions (#709)

* Implement some float packing/unpacking instructions

* Rename f32xN to vecN in packing functions
---
 crates/spirv-std/src/float.rs | 232 ++++++++++++++++++++++++++++++++++
 tests/ui/lang/f32/packing.rs  |  65 ++++++++++
 2 files changed, 297 insertions(+)
 create mode 100644 tests/ui/lang/f32/packing.rs
diff --git a/crates/spirv-std/src/float.rs b/crates/spirv-std/src/float.rs
index 14985489d3..3ab2d0aa18 100644
--- a/crates/spirv-std/src/float.rs
+++ b/crates/spirv-std/src/float.rs
@@ -1,3 +1,5 @@
+use crate::vector::Vector;
+
 /// Abstract trait representing a SPIR-V floating point type.
 pub unsafe trait Float: num_traits::Float + crate::scalar::Scalar + Default {
     const WIDTH: usize;
@@ -10,3 +12,233 @@ unsafe impl Float for f32 {
 unsafe impl Float for f64 {
     const WIDTH: usize = 64;
 }
+
+/// Converts two f32 values (floats) into two f16 values (halfs). The result is a u32, with the low
+/// 16 bits being the first f16, and the high 16 bits being the second f16.
+#[spirv_std_macros::gpu_only]
+pub fn vec2_to_f16x2(vec: impl Vector<f32, 2>) -> u32 {
+    let result;
+    unsafe {
+        asm!(
+            "%glsl = OpExtInstImport \"GLSL.std.450\"",
+            "%uint = OpTypeInt 32 0",
+            "%vec = OpLoad _ {vec}",
+            // 58 = PackHalf2x16
+            "{result} = OpExtInst %uint %glsl 58 %vec",
+            vec = in(reg) &vec,
+            result = out(reg) result,
+        );
+    }
+    result
+}
+
+/// Converts two f16 values (halfs) into two f32 values (floats). The parameter is a u32, with the
+/// low 16 bits being the first f16, and the high 16 bits being the second f16.
+#[spirv_std_macros::gpu_only]
+pub fn f16x2_to_vec2<V: Vector<f32, 2>>(int: u32) -> V {
+    let mut result = Default::default();
+    unsafe {
+        asm!(
+            "%glsl = OpExtInstImport \"GLSL.std.450\"",
+            "%float = OpTypeFloat 32",
+            "%vec2 = OpTypeVector %float 2",
+            // 62 = UnpackHalf2x16
+            "%result = OpExtInst %vec2 %glsl 62 {int}",
+            "OpStore {result} %result",
+            int = in(reg) int,
+            result = in(reg) &mut result,
+        );
+    }
+    result
+}
+
+// We don't have access to a concrete vector type (cfg(feature = "glam") might not be enabled), so
+// synth up one manually.
+#[cfg_attr(target_arch = "spirv", repr(simd))]
+// sometimes dead because on cpu, the `gpu_only` macro nukes the method bodies
+#[allow(dead_code)]
+#[derive(Default)]
+struct F32x2 {
+    x: f32,
+    y: f32,
+}
+unsafe impl Vector<f32, 2> for F32x2 {}
+
+/// Converts an f32 (float) into an f16 (half). The result is a u32, not a u16, due to GPU support
+/// for u16 not being universal - the upper 16 bits will always be zero.
+#[spirv_std_macros::gpu_only]
+pub fn f32_to_f16(float: f32) -> u32 {
+    vec2_to_f16x2(F32x2 { x: float, y: 0.0 })
+}
+
+/// Converts an f16 (half) into an f32 (float). The parameter is a u32, due to GPU support for u16
+/// not being universal - the upper 16 bits are ignored.
+#[cfg(feature = "glam")]
+#[spirv_std_macros::gpu_only]
+pub fn f16_to_f32(packed: u32) -> f32 {
+    f16x2_to_vec2::<F32x2>(packed).x
+}
+
+/// Packs a vec4 into 4 8-bit signed integers. See
+/// [PackSnorm4x8](https://www.khronos.org/registry/SPIR-V/specs/1.0/GLSL.std.450.html) for exact
+/// semantics.
+#[spirv_std_macros::gpu_only]
+pub fn vec4_to_u8x4_snorm(vec: impl Vector<f32, 4>) -> u32 {
+    let result;
+    unsafe {
+        asm!(
+            "%glsl = OpExtInstImport \"GLSL.std.450\"",
+            "%uint = OpTypeInt 32 0",
+            "%vec = OpLoad _ {vec}",
+            // 54 = PackSnorm4x8
+            "{result} = OpExtInst %uint %glsl 54 %vec",
+            vec = in(reg) &vec,
+            result = out(reg) result,
+        );
+    }
+    result
+}
+
+/// Packs a vec4 into 4 8-bit unsigned integers. See
+/// [PackUnorm4x8](https://www.khronos.org/registry/SPIR-V/specs/1.0/GLSL.std.450.html) for exact
+/// semantics.
+#[spirv_std_macros::gpu_only]
+pub fn vec4_to_u8x4_unorm(vec: impl Vector<f32, 4>) -> u32 {
+    let result;
+    unsafe {
+        asm!(
+            "%glsl = OpExtInstImport \"GLSL.std.450\"",
+            "%uint = OpTypeInt 32 0",
+            "%vec = OpLoad _ {vec}",
+            // 55 = PackUnorm4x8
+            "{result} = OpExtInst %uint %glsl 55 %vec",
+            vec = in(reg) &vec,
+            result = out(reg) result,
+        );
+    }
+    result
+}
+
+/// Packs a vec2 into 2 16-bit signed integers. See
+/// [PackSnorm2x16](https://www.khronos.org/registry/SPIR-V/specs/1.0/GLSL.std.450.html) for exact
+/// semantics.
+#[spirv_std_macros::gpu_only]
+pub fn vec2_to_u16x2_snorm(vec: impl Vector<f32, 2>) -> u32 {
+    let result;
+    unsafe {
+        asm!(
+            "%glsl = OpExtInstImport \"GLSL.std.450\"",
+            "%uint = OpTypeInt 32 0",
+            "%vec = OpLoad _ {vec}",
+            // 56 = PackSnorm2x16
+            "{result} = OpExtInst %uint %glsl 56 %vec",
+            vec = in(reg) &vec,
+            result = out(reg) result,
+        );
+    }
+    result
+}
+
+/// Packs a vec2 into 2 16-bit unsigned integers. See
+/// [PackUnorm2x16](https://www.khronos.org/registry/SPIR-V/specs/1.0/GLSL.std.450.html) for exact
+/// semantics.
+#[spirv_std_macros::gpu_only]
+pub fn vec2_to_u16x2_unorm(vec: impl Vector<f32, 2>) -> u32 {
+    let result;
+    unsafe {
+        asm!(
+            "%glsl = OpExtInstImport \"GLSL.std.450\"",
+            "%uint = OpTypeInt 32 0",
+            "%vec = OpLoad _ {vec}",
+            // 57 = PackUnorm2x16
+            "{result} = OpExtInst %uint %glsl 57 %vec",
+            vec = in(reg) &vec,
+            result = out(reg) result,
+        );
+    }
+    result
+}
+
+/// Unpacks 4 8-bit signed integers into a vec4. See
+/// [UnpackSnorm4x8](https://www.khronos.org/registry/SPIR-V/specs/1.0/GLSL.std.450.html) for exact
+/// semantics.
+#[spirv_std_macros::gpu_only]
+pub fn u8x4_to_vec4_snorm<V: Vector<f32, 4>>(int: u32) -> V {
+    let mut result = Default::default();
+    unsafe {
+        asm!(
+            "%glsl = OpExtInstImport \"GLSL.std.450\"",
+            "%float = OpTypeFloat 32",
+            "%vec4 = OpTypeVector %float 4",
+            // 63 = UnpackSnorm4x8
+            "%result = OpExtInst %vec4 %glsl 63 {int}",
+            "OpStore {result} %result",
+            int = in(reg) int,
+            result = in(reg) &mut result,
+        );
+    }
+    result
+}
+
+/// Unpacks 4 8-bit unsigned integers into a vec4. See
+/// [UnpackSnorm4x8](https://www.khronos.org/registry/SPIR-V/specs/1.0/GLSL.std.450.html) for exact
+/// semantics.
+#[spirv_std_macros::gpu_only]
+pub fn u8x4_to_vec4_unorm<V: Vector<f32, 4>>(int: u32) -> V {
+    let mut result = Default::default();
+    unsafe {
+        asm!(
+            "%glsl = OpExtInstImport \"GLSL.std.450\"",
+            "%float = OpTypeFloat 32",
+            "%vec4 = OpTypeVector %float 4",
+            // 64 = UnpackUnorm4x8
+            "%result = OpExtInst %vec4 %glsl 64 {int}",
+            "OpStore {result} %result",
+            int = in(reg) int,
+            result = in(reg) &mut result,
+        );
+    }
+    result
+}
+
+/// Unpacks 2 16-bit signed integers into a vec2. See
+/// [UnpackSnorm2x16](https://www.khronos.org/registry/SPIR-V/specs/1.0/GLSL.std.450.html) for
+/// exact semantics.
+#[spirv_std_macros::gpu_only]
+pub fn u16x2_to_vec2_snorm<V: Vector<f32, 2>>(int: u32) -> V {
+    let mut result = Default::default();
+    unsafe {
+        asm!(
+            "%glsl = OpExtInstImport \"GLSL.std.450\"",
+            "%float = OpTypeFloat 32",
+            "%vec2 = OpTypeVector %float 2",
+            // 60 = UnpackSnorm2x16
+            "%result = OpExtInst %vec2 %glsl 60 {int}",
+            "OpStore {result} %result",
+            int = in(reg) int,
+            result = in(reg) &mut result,
+        );
+    }
+    result
+}
+
+/// Unpacks 2 16-bit unsigned integers into a vec2. See
+/// [UnpackUnorm2x16](https://www.khronos.org/registry/SPIR-V/specs/1.0/GLSL.std.450.html) for
+/// exact semantics.
+#[spirv_std_macros::gpu_only]
+pub fn u16x2_to_vec2_unorm<V: Vector<f32, 2>>(int: u32) -> V {
+    let mut result = Default::default();
+    unsafe {
+        asm!(
+            "%glsl = OpExtInstImport \"GLSL.std.450\"",
+            "%float = OpTypeFloat 32",
+            "%vec2 = OpTypeVector %float 2",
+            // 61 = UnpackUnorm2x16
+            "%result = OpExtInst %vec2 %glsl 61 {int}",
+            "OpStore {result} %result",
+            int = in(reg) int,
+            result = in(reg) &mut result,
+        );
+    }
+    result
+}
diff --git a/tests/ui/lang/f32/packing.rs b/tests/ui/lang/f32/packing.rs
new file mode 100644
index 0000000000..d44d2183b0
--- /dev/null
+++ b/tests/ui/lang/f32/packing.rs
@@ -0,0 +1,65 @@
+// Test that various packing methods work.
+// build-pass
+
+use spirv_std::float::*;
+use spirv_std::glam::{Vec2, Vec4};
+
+#[spirv(fragment)]
+pub fn test_vec2_to_f16x2(i: Vec2, o: &mut u32) {
+    *o = vec2_to_f16x2(i);
+}
+
+#[spirv(fragment)]
+pub fn test_f16x2_to_vec2(i: u32, o: &mut Vec2) {
+    *o = f16x2_to_vec2(i);
+}
+
+#[spirv(fragment)]
+pub fn test_f32_to_f16(i: f32, o: &mut u32) {
+    *o = f32_to_f16(i);
+}
+
+#[spirv(fragment)]
+pub fn test_f16_to_f32(i: u32, o: &mut f32) {
+    *o = f16_to_f32(i);
+}
+
+#[spirv(fragment)]
+pub fn test_vec4_to_u8x4_snorm(i: Vec4, o: &mut u32) {
+    *o = vec4_to_u8x4_snorm(i);
+}
+
+#[spirv(fragment)]
+pub fn test_vec4_to_u8x4_unorm(i: Vec4, o: &mut u32) {
+    *o = vec4_to_u8x4_unorm(i);
+}
+
+#[spirv(fragment)]
+pub fn test_vec2_to_u16x2_snorm(i: Vec2, o: &mut u32) {
+    *o = vec2_to_u16x2_snorm(i);
+}
+
+#[spirv(fragment)]
+pub fn test_vec2_to_u16x2_unorm(i: Vec2, o: &mut u32) {
+    *o = vec2_to_u16x2_unorm(i);
+}
+
+#[spirv(fragment)]
+pub fn test_u8x4_to_vec4_snorm(i: u32, o: &mut Vec4) {
+    *o = u8x4_to_vec4_snorm(i);
+}
+
+#[spirv(fragment)]
+pub fn test_u8x4_to_vec4_unorm(i: u32, o: &mut Vec4) {
+    *o = u8x4_to_vec4_unorm(i);
+}
+
+#[spirv(fragment)]
+pub fn test_u16x2_to_vec2_snorm(i: u32, o: &mut Vec2) {
+    *o = u16x2_to_vec2_snorm(i);
+}
+
+#[spirv(fragment)]
+pub fn test_u16x2_to_vec2_unorm(i: u32, o: &mut Vec2) {
+    *o = u16x2_to_vec2_unorm(i);
+}