From 0b827769476b40f03477cdb9b3cf947fd0e70e2d Mon Sep 17 00:00:00 2001 From: Jim Blandy Date: Thu, 14 Nov 2024 11:30:19 -0800 Subject: [PATCH] [naga msl-out] Avoid UB by making all loops bounded. In MSL output, avoid undefined behavior due to unbounded loops by adding an unpredictable, never-actually-taken `break` to the bottom of each loop body, rather than adding an unpredictable, never-actually-taken branch over each loop. This will probably have more of a performance impact, because it affects each iteration of the loop, but unlike branching over the loop, which leaves infinite loops (and thus undefined behavior) in the output, this actually ensures that no loop presented to Metal is unbounded, so that there is no undefined behavior present that the optimizer could use to make unwelcome inferences. Fixes #6528. --- naga/src/back/msl/writer.rs | 96 ++++++++++---------- naga/tests/out/msl/atomicCompareExchange.msl | 14 ++- naga/tests/out/msl/boids.msl | 5 +- naga/tests/out/msl/break-if.msl | 14 ++- naga/tests/out/msl/collatz.msl | 5 +- naga/tests/out/msl/control-flow.msl | 20 ++-- naga/tests/out/msl/do-while.msl | 5 +- naga/tests/out/msl/overrides-ray-query.msl | 5 +- naga/tests/out/msl/ray-query.msl | 5 +- naga/tests/out/msl/shadow.msl | 8 +- 10 files changed, 98 insertions(+), 79 deletions(-) diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index 282e28fe1..ed91aa248 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -383,10 +383,10 @@ pub struct Writer { /// padding inserted **before** them (i.e. between fields at index - 1 and index) struct_member_pads: FastHashSet<(Handle, u32)>, - /// Name of the loop reachability macro. + /// Name of the force-bounded-loop macro. /// - /// See `emit_loop_reachable_macro` for details. - loop_reachable_macro_name: String, + /// See `emit_force_bounded_loop_macro` for details. + force_bounded_loop_macro_name: String, } impl crate::Scalar { @@ -682,7 +682,7 @@ impl Writer { #[cfg(test)] put_block_stack_pointers: Default::default(), struct_member_pads: FastHashSet::default(), - loop_reachable_macro_name: String::default(), + force_bounded_loop_macro_name: String::default(), } } @@ -693,12 +693,13 @@ impl Writer { self.out } - /// Define a macro to invoke before loops, to defeat MSL infinite loop - /// reasoning. + /// Define a macro to invoke at the bottom of each loop body, to + /// defeat MSL infinite loop reasoning. /// /// If we haven't done so already, emit the definition of a preprocessor - /// macro to be invoked before each loop in the generated MSL, to ensure - /// that the MSL compiler's optimizations do not remove bounds checks. + /// macro to be invoked at the end of each loop body in the generated MSL, + /// to ensure that the MSL compiler's optimizations do not remove bounds + /// checks. /// /// Only the first call to this function for a given module actually causes /// the macro definition to be written. Subsequent loops can simply use the @@ -764,52 +765,51 @@ impl Writer { /// nicely, after having stolen data from elsewhere in the GPU address /// space. /// - /// Ideally, Naga would prevent UB entirely via some means that persuades - /// the MSL compiler that no loop Naga generates is infinite. One approach - /// would be to add inline assembly to each loop that is annotated as - /// potentially branching out of the loop, but which in fact generates no - /// instructions. Unfortunately, inline assembly is not handled correctly by - /// some Metal device drivers. Further experimentation hasn't produced a - /// satisfactory approach. + /// To avoid UB, Naga must persuade the MSL compiler that no loop Naga + /// generates is infinite. One approach would be to add inline assembly to + /// each loop that is annotated as potentially branching out of the loop, + /// but which in fact generates no instructions. Unfortunately, inline + /// assembly is not handled correctly by some Metal device drivers. /// - /// Instead, we accept that the MSL compiler may determine that some loops - /// are infinite, and focus instead on preventing the range analysis from - /// being affected. We transform *every* loop into something like this: + /// Instead, we add the following code to the bottom of every loop: /// /// ```ignore - /// if (volatile bool unpredictable = true; unpredictable) - /// while (true) { } + /// if (volatile bool unpredictable = false; unpredictable) + /// break; /// ``` /// - /// Since the `volatile` qualifier prevents the compiler from assuming that - /// the `if` condition is true, it cannot be sure the infinite loop is - /// reached, and thus it cannot assume the entire structure is unreachable. - /// This prevents the range analysis impact described above. + /// Although the `if` condition will always be false in any real execution, + /// the `volatile` qualifier prevents the compiler from assuming this. Thus, + /// it must assume that the `break` might be reached, and hence that the + /// loop is not unbounded. This prevents the range analysis impact described + /// above. /// /// Unfortunately, what makes this a kludge, not a hack, is that this /// solution leaves the GPU executing a pointless conditional branch, at - /// runtime, before each loop. There's no part of the system that has a - /// global enough view to be sure that `unpredictable` is true, and remove - /// it from the code. + /// runtime, in every iteration of the loop. There's no part of the system + /// that has a global enough view to be sure that `unpredictable` is true, + /// and remove it from the code. Adding the branch also affects + /// optimization: for example, it's impossible to unroll this loop. This + /// transformation has been observed to significantly hurt performance. /// /// To make our output a bit more legible, we pull the condition out into a /// preprocessor macro defined at the top of the module. /// - /// This approach is also used by Chromium WebGPU's Dawn shader compiler, as of - /// . - fn emit_loop_reachable_macro(&mut self) -> BackendResult { - if !self.loop_reachable_macro_name.is_empty() { + /// This approach is also used by Chromium WebGPU's Dawn shader compiler: + /// + fn emit_force_bounded_loop_macro(&mut self) -> BackendResult { + if !self.force_bounded_loop_macro_name.is_empty() { return Ok(()); } - self.loop_reachable_macro_name = self.namer.call("LOOP_IS_REACHABLE"); - let loop_reachable_volatile_name = self.namer.call("unpredictable_jump_over_loop"); + self.force_bounded_loop_macro_name = self.namer.call("LOOP_IS_BOUNDED"); + let loop_bounded_volatile_name = self.namer.call("unpredictable_break_from_loop"); writeln!( self.out, - "#define {} if (volatile bool {} = true; {})", - self.loop_reachable_macro_name, - loop_reachable_volatile_name, - loop_reachable_volatile_name, + "#define {} {{ volatile bool {} = false; if ({}) break; }}", + self.force_bounded_loop_macro_name, + loop_bounded_volatile_name, + loop_bounded_volatile_name, )?; Ok(()) @@ -3045,15 +3045,10 @@ impl Writer { ref continuing, break_if, } => { - self.emit_loop_reachable_macro()?; if !continuing.is_empty() || break_if.is_some() { let gate_name = self.namer.call("loop_init"); writeln!(self.out, "{level}bool {gate_name} = true;")?; - writeln!( - self.out, - "{level}{} while(true) {{", - self.loop_reachable_macro_name, - )?; + writeln!(self.out, "{level}while(true) {{",)?; let lif = level.next(); let lcontinuing = lif.next(); writeln!(self.out, "{lif}if (!{gate_name}) {{")?; @@ -3068,13 +3063,16 @@ impl Writer { writeln!(self.out, "{lif}}}")?; writeln!(self.out, "{lif}{gate_name} = false;")?; } else { - writeln!( - self.out, - "{level}{} while(true) {{", - self.loop_reachable_macro_name, - )?; + writeln!(self.out, "{level}while(true) {{",)?; } self.put_block(level.next(), body, context)?; + self.emit_force_bounded_loop_macro()?; + writeln!( + self.out, + "{}{}", + level.next(), + self.force_bounded_loop_macro_name + )?; writeln!(self.out, "{level}}}")?; } crate::Statement::Break => { @@ -3553,7 +3551,7 @@ impl Writer { &[CLAMPED_LOD_LOAD_PREFIX], &mut self.names, ); - self.loop_reachable_macro_name.clear(); + self.force_bounded_loop_macro_name.clear(); self.struct_member_pads.clear(); writeln!( diff --git a/naga/tests/out/msl/atomicCompareExchange.msl b/naga/tests/out/msl/atomicCompareExchange.msl index 800b5b201..6655fad7e 100644 --- a/naga/tests/out/msl/atomicCompareExchange.msl +++ b/naga/tests/out/msl/atomicCompareExchange.msl @@ -76,9 +76,8 @@ kernel void test_atomic_compare_exchange_i32_( uint i = 0u; int old = {}; bool exchanged = {}; -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) bool loop_init = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init) { uint _e27 = i; i = _e27 + 1u; @@ -94,7 +93,7 @@ kernel void test_atomic_compare_exchange_i32_( int _e8 = metal::atomic_load_explicit(&arr_i32_.inner[_e6], metal::memory_order_relaxed); old = _e8; exchanged = false; - LOOP_IS_REACHABLE while(true) { + while(true) { bool _e12 = exchanged; if (!(_e12)) { } else { @@ -109,8 +108,11 @@ kernel void test_atomic_compare_exchange_i32_( old = _e23.old_value; exchanged = _e23.exchanged; } +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } } + LOOP_IS_BOUNDED } return; } @@ -123,7 +125,7 @@ kernel void test_atomic_compare_exchange_u32_( uint old_1 = {}; bool exchanged_1 = {}; bool loop_init_1 = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init_1) { uint _e27 = i_1; i_1 = _e27 + 1u; @@ -139,7 +141,7 @@ kernel void test_atomic_compare_exchange_u32_( uint _e8 = metal::atomic_load_explicit(&arr_u32_.inner[_e6], metal::memory_order_relaxed); old_1 = _e8; exchanged_1 = false; - LOOP_IS_REACHABLE while(true) { + while(true) { bool _e12 = exchanged_1; if (!(_e12)) { } else { @@ -154,8 +156,10 @@ kernel void test_atomic_compare_exchange_u32_( old_1 = _e23.old_value; exchanged_1 = _e23.exchanged; } + LOOP_IS_BOUNDED } } + LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/boids.msl b/naga/tests/out/msl/boids.msl index 0dd520ac7..07acd7cf6 100644 --- a/naga/tests/out/msl/boids.msl +++ b/naga/tests/out/msl/boids.msl @@ -55,9 +55,8 @@ kernel void main_( vPos = _e8; metal::float2 _e14 = particlesSrc.particles[index].vel; vVel = _e14; -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) bool loop_init = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init) { uint _e91 = i; i = _e91 + 1u; @@ -106,6 +105,8 @@ kernel void main_( int _e88 = cVelCount; cVelCount = _e88 + 1; } +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } int _e94 = cMassCount; if (_e94 > 0) { diff --git a/naga/tests/out/msl/break-if.msl b/naga/tests/out/msl/break-if.msl index 3684f7222..4d3397234 100644 --- a/naga/tests/out/msl/break-if.msl +++ b/naga/tests/out/msl/break-if.msl @@ -7,15 +7,16 @@ using metal::uint; void breakIfEmpty( ) { -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) bool loop_init = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init) { if (true) { break; } } loop_init = false; +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } return; } @@ -26,7 +27,7 @@ void breakIfEmptyBody( bool b = {}; bool c = {}; bool loop_init_1 = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init_1) { b = a; bool _e2 = b; @@ -37,6 +38,7 @@ void breakIfEmptyBody( } } loop_init_1 = false; + LOOP_IS_BOUNDED } return; } @@ -47,7 +49,7 @@ void breakIf( bool d = {}; bool e = {}; bool loop_init_2 = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init_2) { bool _e5 = e; if (a_1 == e) { @@ -58,6 +60,7 @@ void breakIf( d = a_1; bool _e2 = d; e = a_1 != _e2; + LOOP_IS_BOUNDED } return; } @@ -66,7 +69,7 @@ void breakIfSeparateVariable( ) { uint counter = 0u; bool loop_init_3 = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init_3) { uint _e5 = counter; if (counter == 5u) { @@ -76,6 +79,7 @@ void breakIfSeparateVariable( loop_init_3 = false; uint _e3 = counter; counter = _e3 + 1u; + LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/collatz.msl b/naga/tests/out/msl/collatz.msl index 1ae910de6..e282d13ab 100644 --- a/naga/tests/out/msl/collatz.msl +++ b/naga/tests/out/msl/collatz.msl @@ -19,8 +19,7 @@ uint collatz_iterations( uint n = {}; uint i = 0u; n = n_base; -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) - LOOP_IS_REACHABLE while(true) { + while(true) { uint _e4 = n; if (_e4 > 1u) { } else { @@ -38,6 +37,8 @@ uint collatz_iterations( uint _e20 = i; i = _e20 + 1u; } +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } uint _e23 = i; return _e23; diff --git a/naga/tests/out/msl/control-flow.msl b/naga/tests/out/msl/control-flow.msl index dbf75163a..1b35249f3 100644 --- a/naga/tests/out/msl/control-flow.msl +++ b/naga/tests/out/msl/control-flow.msl @@ -31,8 +31,7 @@ void switch_case_break( void loop_switch_continue( int x ) { -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) - LOOP_IS_REACHABLE while(true) { + while(true) { switch(x) { case 1: { continue; @@ -41,6 +40,8 @@ void loop_switch_continue( break; } } +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } return; } @@ -50,7 +51,7 @@ void loop_switch_continue_nesting( int y, int z ) { - LOOP_IS_REACHABLE while(true) { + while(true) { switch(x_1) { case 1: { continue; @@ -61,7 +62,7 @@ void loop_switch_continue_nesting( continue; } default: { - LOOP_IS_REACHABLE while(true) { + while(true) { switch(z) { case 1: { continue; @@ -70,6 +71,7 @@ void loop_switch_continue_nesting( break; } } + LOOP_IS_BOUNDED } break; } @@ -85,8 +87,9 @@ void loop_switch_continue_nesting( continue; } } + LOOP_IS_BOUNDED } - LOOP_IS_REACHABLE while(true) { + while(true) { switch(y) { case 1: default: { @@ -98,6 +101,7 @@ void loop_switch_continue_nesting( break; } } + LOOP_IS_BOUNDED } return; } @@ -109,7 +113,7 @@ void loop_switch_omit_continue_variable_checks( int w ) { int pos_1 = 0; - LOOP_IS_REACHABLE while(true) { + while(true) { switch(x_2) { case 1: { pos_1 = 1; @@ -119,8 +123,9 @@ void loop_switch_omit_continue_variable_checks( break; } } + LOOP_IS_BOUNDED } - LOOP_IS_REACHABLE while(true) { + while(true) { switch(x_2) { case 1: { break; @@ -149,6 +154,7 @@ void loop_switch_omit_continue_variable_checks( break; } } + LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/do-while.msl b/naga/tests/out/msl/do-while.msl index b093da1dc..2a883304d 100644 --- a/naga/tests/out/msl/do-while.msl +++ b/naga/tests/out/msl/do-while.msl @@ -8,9 +8,8 @@ using metal::uint; void fb1_( thread bool& cond ) { -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) bool loop_init = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init) { bool _e1 = cond; if (!(cond)) { @@ -19,6 +18,8 @@ void fb1_( } loop_init = false; continue; +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/overrides-ray-query.msl b/naga/tests/out/msl/overrides-ray-query.msl index f2ad45c98..3aa0ee035 100644 --- a/naga/tests/out/msl/overrides-ray-query.msl +++ b/naga/tests/out/msl/overrides-ray-query.msl @@ -33,14 +33,15 @@ kernel void main_( rq.intersector.force_opacity((desc.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (desc.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none); rq.intersector.accept_any_intersection((desc.flags & 4) != 0); rq.intersection = rq.intersector.intersect(metal::raytracing::ray(desc.origin, desc.dir, desc.tmin, desc.tmax), acc_struct, desc.cull_mask); rq.ready = true; -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) - LOOP_IS_REACHABLE while(true) { + while(true) { bool _e31 = rq.ready; rq.ready = false; if (_e31) { } else { break; } +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/ray-query.msl b/naga/tests/out/msl/ray-query.msl index 129ad108a..e5e44d0b5 100644 --- a/naga/tests/out/msl/ray-query.msl +++ b/naga/tests/out/msl/ray-query.msl @@ -53,14 +53,15 @@ RayIntersection query_loop( rq.intersector.force_opacity((_e8.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (_e8.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none); rq.intersector.accept_any_intersection((_e8.flags & 4) != 0); rq.intersection = rq.intersector.intersect(metal::raytracing::ray(_e8.origin, _e8.dir, _e8.tmin, _e8.tmax), acs, _e8.cull_mask); rq.ready = true; -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) - LOOP_IS_REACHABLE while(true) { + while(true) { bool _e9 = rq.ready; rq.ready = false; if (_e9) { } else { break; } +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } return RayIntersection {_map_intersection_type(rq.intersection.type), rq.intersection.distance, rq.intersection.user_instance_id, rq.intersection.instance_id, {}, rq.intersection.geometry_id, rq.intersection.primitive_id, rq.intersection.triangle_barycentric_coord, rq.intersection.triangle_front_facing, {}, rq.intersection.object_to_world_transform, rq.intersection.world_to_object_transform}; } diff --git a/naga/tests/out/msl/shadow.msl b/naga/tests/out/msl/shadow.msl index f8aeef9d4..18cc84211 100644 --- a/naga/tests/out/msl/shadow.msl +++ b/naga/tests/out/msl/shadow.msl @@ -100,9 +100,8 @@ fragment fs_mainOutput fs_main( metal::float3 color = c_ambient; uint i = 0u; metal::float3 normal_1 = metal::normalize(in.world_normal); -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) bool loop_init = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init) { uint _e40 = i; i = _e40 + 1u; @@ -124,6 +123,8 @@ fragment fs_mainOutput fs_main( metal::float3 _e37 = color; color = _e37 + ((_e23 * diffuse) * light.color.xyz); } +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } metal::float3 _e42 = color; metal::float4 _e47 = u_entity.color; @@ -152,7 +153,7 @@ fragment fs_main_without_storageOutput fs_main_without_storage( uint i_1 = 0u; metal::float3 normal_2 = metal::normalize(in_1.world_normal); bool loop_init_1 = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init_1) { uint _e40 = i_1; i_1 = _e40 + 1u; @@ -174,6 +175,7 @@ fragment fs_main_without_storageOutput fs_main_without_storage( metal::float3 _e37 = color_1; color_1 = _e37 + ((_e23 * diffuse_1) * light_1.color.xyz); } + LOOP_IS_BOUNDED } metal::float3 _e42 = color_1; metal::float4 _e47 = u_entity.color;