diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index 282e28fe1..ed91aa248 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -383,10 +383,10 @@ pub struct Writer { /// padding inserted **before** them (i.e. between fields at index - 1 and index) struct_member_pads: FastHashSet<(Handle, u32)>, - /// Name of the loop reachability macro. + /// Name of the force-bounded-loop macro. /// - /// See `emit_loop_reachable_macro` for details. - loop_reachable_macro_name: String, + /// See `emit_force_bounded_loop_macro` for details. + force_bounded_loop_macro_name: String, } impl crate::Scalar { @@ -682,7 +682,7 @@ impl Writer { #[cfg(test)] put_block_stack_pointers: Default::default(), struct_member_pads: FastHashSet::default(), - loop_reachable_macro_name: String::default(), + force_bounded_loop_macro_name: String::default(), } } @@ -693,12 +693,13 @@ impl Writer { self.out } - /// Define a macro to invoke before loops, to defeat MSL infinite loop - /// reasoning. + /// Define a macro to invoke at the bottom of each loop body, to + /// defeat MSL infinite loop reasoning. /// /// If we haven't done so already, emit the definition of a preprocessor - /// macro to be invoked before each loop in the generated MSL, to ensure - /// that the MSL compiler's optimizations do not remove bounds checks. + /// macro to be invoked at the end of each loop body in the generated MSL, + /// to ensure that the MSL compiler's optimizations do not remove bounds + /// checks. /// /// Only the first call to this function for a given module actually causes /// the macro definition to be written. Subsequent loops can simply use the @@ -764,52 +765,51 @@ impl Writer { /// nicely, after having stolen data from elsewhere in the GPU address /// space. /// - /// Ideally, Naga would prevent UB entirely via some means that persuades - /// the MSL compiler that no loop Naga generates is infinite. One approach - /// would be to add inline assembly to each loop that is annotated as - /// potentially branching out of the loop, but which in fact generates no - /// instructions. Unfortunately, inline assembly is not handled correctly by - /// some Metal device drivers. Further experimentation hasn't produced a - /// satisfactory approach. + /// To avoid UB, Naga must persuade the MSL compiler that no loop Naga + /// generates is infinite. One approach would be to add inline assembly to + /// each loop that is annotated as potentially branching out of the loop, + /// but which in fact generates no instructions. Unfortunately, inline + /// assembly is not handled correctly by some Metal device drivers. /// - /// Instead, we accept that the MSL compiler may determine that some loops - /// are infinite, and focus instead on preventing the range analysis from - /// being affected. We transform *every* loop into something like this: + /// Instead, we add the following code to the bottom of every loop: /// /// ```ignore - /// if (volatile bool unpredictable = true; unpredictable) - /// while (true) { } + /// if (volatile bool unpredictable = false; unpredictable) + /// break; /// ``` /// - /// Since the `volatile` qualifier prevents the compiler from assuming that - /// the `if` condition is true, it cannot be sure the infinite loop is - /// reached, and thus it cannot assume the entire structure is unreachable. - /// This prevents the range analysis impact described above. + /// Although the `if` condition will always be false in any real execution, + /// the `volatile` qualifier prevents the compiler from assuming this. Thus, + /// it must assume that the `break` might be reached, and hence that the + /// loop is not unbounded. This prevents the range analysis impact described + /// above. /// /// Unfortunately, what makes this a kludge, not a hack, is that this /// solution leaves the GPU executing a pointless conditional branch, at - /// runtime, before each loop. There's no part of the system that has a - /// global enough view to be sure that `unpredictable` is true, and remove - /// it from the code. + /// runtime, in every iteration of the loop. There's no part of the system + /// that has a global enough view to be sure that `unpredictable` is true, + /// and remove it from the code. Adding the branch also affects + /// optimization: for example, it's impossible to unroll this loop. This + /// transformation has been observed to significantly hurt performance. /// /// To make our output a bit more legible, we pull the condition out into a /// preprocessor macro defined at the top of the module. /// - /// This approach is also used by Chromium WebGPU's Dawn shader compiler, as of - /// . - fn emit_loop_reachable_macro(&mut self) -> BackendResult { - if !self.loop_reachable_macro_name.is_empty() { + /// This approach is also used by Chromium WebGPU's Dawn shader compiler: + /// + fn emit_force_bounded_loop_macro(&mut self) -> BackendResult { + if !self.force_bounded_loop_macro_name.is_empty() { return Ok(()); } - self.loop_reachable_macro_name = self.namer.call("LOOP_IS_REACHABLE"); - let loop_reachable_volatile_name = self.namer.call("unpredictable_jump_over_loop"); + self.force_bounded_loop_macro_name = self.namer.call("LOOP_IS_BOUNDED"); + let loop_bounded_volatile_name = self.namer.call("unpredictable_break_from_loop"); writeln!( self.out, - "#define {} if (volatile bool {} = true; {})", - self.loop_reachable_macro_name, - loop_reachable_volatile_name, - loop_reachable_volatile_name, + "#define {} {{ volatile bool {} = false; if ({}) break; }}", + self.force_bounded_loop_macro_name, + loop_bounded_volatile_name, + loop_bounded_volatile_name, )?; Ok(()) @@ -3045,15 +3045,10 @@ impl Writer { ref continuing, break_if, } => { - self.emit_loop_reachable_macro()?; if !continuing.is_empty() || break_if.is_some() { let gate_name = self.namer.call("loop_init"); writeln!(self.out, "{level}bool {gate_name} = true;")?; - writeln!( - self.out, - "{level}{} while(true) {{", - self.loop_reachable_macro_name, - )?; + writeln!(self.out, "{level}while(true) {{",)?; let lif = level.next(); let lcontinuing = lif.next(); writeln!(self.out, "{lif}if (!{gate_name}) {{")?; @@ -3068,13 +3063,16 @@ impl Writer { writeln!(self.out, "{lif}}}")?; writeln!(self.out, "{lif}{gate_name} = false;")?; } else { - writeln!( - self.out, - "{level}{} while(true) {{", - self.loop_reachable_macro_name, - )?; + writeln!(self.out, "{level}while(true) {{",)?; } self.put_block(level.next(), body, context)?; + self.emit_force_bounded_loop_macro()?; + writeln!( + self.out, + "{}{}", + level.next(), + self.force_bounded_loop_macro_name + )?; writeln!(self.out, "{level}}}")?; } crate::Statement::Break => { @@ -3553,7 +3551,7 @@ impl Writer { &[CLAMPED_LOD_LOAD_PREFIX], &mut self.names, ); - self.loop_reachable_macro_name.clear(); + self.force_bounded_loop_macro_name.clear(); self.struct_member_pads.clear(); writeln!( diff --git a/naga/tests/out/msl/atomicCompareExchange.msl b/naga/tests/out/msl/atomicCompareExchange.msl index 800b5b201..6655fad7e 100644 --- a/naga/tests/out/msl/atomicCompareExchange.msl +++ b/naga/tests/out/msl/atomicCompareExchange.msl @@ -76,9 +76,8 @@ kernel void test_atomic_compare_exchange_i32_( uint i = 0u; int old = {}; bool exchanged = {}; -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) bool loop_init = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init) { uint _e27 = i; i = _e27 + 1u; @@ -94,7 +93,7 @@ kernel void test_atomic_compare_exchange_i32_( int _e8 = metal::atomic_load_explicit(&arr_i32_.inner[_e6], metal::memory_order_relaxed); old = _e8; exchanged = false; - LOOP_IS_REACHABLE while(true) { + while(true) { bool _e12 = exchanged; if (!(_e12)) { } else { @@ -109,8 +108,11 @@ kernel void test_atomic_compare_exchange_i32_( old = _e23.old_value; exchanged = _e23.exchanged; } +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } } + LOOP_IS_BOUNDED } return; } @@ -123,7 +125,7 @@ kernel void test_atomic_compare_exchange_u32_( uint old_1 = {}; bool exchanged_1 = {}; bool loop_init_1 = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init_1) { uint _e27 = i_1; i_1 = _e27 + 1u; @@ -139,7 +141,7 @@ kernel void test_atomic_compare_exchange_u32_( uint _e8 = metal::atomic_load_explicit(&arr_u32_.inner[_e6], metal::memory_order_relaxed); old_1 = _e8; exchanged_1 = false; - LOOP_IS_REACHABLE while(true) { + while(true) { bool _e12 = exchanged_1; if (!(_e12)) { } else { @@ -154,8 +156,10 @@ kernel void test_atomic_compare_exchange_u32_( old_1 = _e23.old_value; exchanged_1 = _e23.exchanged; } + LOOP_IS_BOUNDED } } + LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/boids.msl b/naga/tests/out/msl/boids.msl index 0dd520ac7..07acd7cf6 100644 --- a/naga/tests/out/msl/boids.msl +++ b/naga/tests/out/msl/boids.msl @@ -55,9 +55,8 @@ kernel void main_( vPos = _e8; metal::float2 _e14 = particlesSrc.particles[index].vel; vVel = _e14; -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) bool loop_init = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init) { uint _e91 = i; i = _e91 + 1u; @@ -106,6 +105,8 @@ kernel void main_( int _e88 = cVelCount; cVelCount = _e88 + 1; } +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } int _e94 = cMassCount; if (_e94 > 0) { diff --git a/naga/tests/out/msl/break-if.msl b/naga/tests/out/msl/break-if.msl index 3684f7222..4d3397234 100644 --- a/naga/tests/out/msl/break-if.msl +++ b/naga/tests/out/msl/break-if.msl @@ -7,15 +7,16 @@ using metal::uint; void breakIfEmpty( ) { -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) bool loop_init = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init) { if (true) { break; } } loop_init = false; +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } return; } @@ -26,7 +27,7 @@ void breakIfEmptyBody( bool b = {}; bool c = {}; bool loop_init_1 = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init_1) { b = a; bool _e2 = b; @@ -37,6 +38,7 @@ void breakIfEmptyBody( } } loop_init_1 = false; + LOOP_IS_BOUNDED } return; } @@ -47,7 +49,7 @@ void breakIf( bool d = {}; bool e = {}; bool loop_init_2 = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init_2) { bool _e5 = e; if (a_1 == e) { @@ -58,6 +60,7 @@ void breakIf( d = a_1; bool _e2 = d; e = a_1 != _e2; + LOOP_IS_BOUNDED } return; } @@ -66,7 +69,7 @@ void breakIfSeparateVariable( ) { uint counter = 0u; bool loop_init_3 = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init_3) { uint _e5 = counter; if (counter == 5u) { @@ -76,6 +79,7 @@ void breakIfSeparateVariable( loop_init_3 = false; uint _e3 = counter; counter = _e3 + 1u; + LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/collatz.msl b/naga/tests/out/msl/collatz.msl index 1ae910de6..e282d13ab 100644 --- a/naga/tests/out/msl/collatz.msl +++ b/naga/tests/out/msl/collatz.msl @@ -19,8 +19,7 @@ uint collatz_iterations( uint n = {}; uint i = 0u; n = n_base; -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) - LOOP_IS_REACHABLE while(true) { + while(true) { uint _e4 = n; if (_e4 > 1u) { } else { @@ -38,6 +37,8 @@ uint collatz_iterations( uint _e20 = i; i = _e20 + 1u; } +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } uint _e23 = i; return _e23; diff --git a/naga/tests/out/msl/control-flow.msl b/naga/tests/out/msl/control-flow.msl index dbf75163a..1b35249f3 100644 --- a/naga/tests/out/msl/control-flow.msl +++ b/naga/tests/out/msl/control-flow.msl @@ -31,8 +31,7 @@ void switch_case_break( void loop_switch_continue( int x ) { -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) - LOOP_IS_REACHABLE while(true) { + while(true) { switch(x) { case 1: { continue; @@ -41,6 +40,8 @@ void loop_switch_continue( break; } } +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } return; } @@ -50,7 +51,7 @@ void loop_switch_continue_nesting( int y, int z ) { - LOOP_IS_REACHABLE while(true) { + while(true) { switch(x_1) { case 1: { continue; @@ -61,7 +62,7 @@ void loop_switch_continue_nesting( continue; } default: { - LOOP_IS_REACHABLE while(true) { + while(true) { switch(z) { case 1: { continue; @@ -70,6 +71,7 @@ void loop_switch_continue_nesting( break; } } + LOOP_IS_BOUNDED } break; } @@ -85,8 +87,9 @@ void loop_switch_continue_nesting( continue; } } + LOOP_IS_BOUNDED } - LOOP_IS_REACHABLE while(true) { + while(true) { switch(y) { case 1: default: { @@ -98,6 +101,7 @@ void loop_switch_continue_nesting( break; } } + LOOP_IS_BOUNDED } return; } @@ -109,7 +113,7 @@ void loop_switch_omit_continue_variable_checks( int w ) { int pos_1 = 0; - LOOP_IS_REACHABLE while(true) { + while(true) { switch(x_2) { case 1: { pos_1 = 1; @@ -119,8 +123,9 @@ void loop_switch_omit_continue_variable_checks( break; } } + LOOP_IS_BOUNDED } - LOOP_IS_REACHABLE while(true) { + while(true) { switch(x_2) { case 1: { break; @@ -149,6 +154,7 @@ void loop_switch_omit_continue_variable_checks( break; } } + LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/do-while.msl b/naga/tests/out/msl/do-while.msl index b093da1dc..2a883304d 100644 --- a/naga/tests/out/msl/do-while.msl +++ b/naga/tests/out/msl/do-while.msl @@ -8,9 +8,8 @@ using metal::uint; void fb1_( thread bool& cond ) { -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) bool loop_init = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init) { bool _e1 = cond; if (!(cond)) { @@ -19,6 +18,8 @@ void fb1_( } loop_init = false; continue; +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/overrides-ray-query.msl b/naga/tests/out/msl/overrides-ray-query.msl index f2ad45c98..3aa0ee035 100644 --- a/naga/tests/out/msl/overrides-ray-query.msl +++ b/naga/tests/out/msl/overrides-ray-query.msl @@ -33,14 +33,15 @@ kernel void main_( rq.intersector.force_opacity((desc.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (desc.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none); rq.intersector.accept_any_intersection((desc.flags & 4) != 0); rq.intersection = rq.intersector.intersect(metal::raytracing::ray(desc.origin, desc.dir, desc.tmin, desc.tmax), acc_struct, desc.cull_mask); rq.ready = true; -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) - LOOP_IS_REACHABLE while(true) { + while(true) { bool _e31 = rq.ready; rq.ready = false; if (_e31) { } else { break; } +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } return; } diff --git a/naga/tests/out/msl/ray-query.msl b/naga/tests/out/msl/ray-query.msl index 129ad108a..e5e44d0b5 100644 --- a/naga/tests/out/msl/ray-query.msl +++ b/naga/tests/out/msl/ray-query.msl @@ -53,14 +53,15 @@ RayIntersection query_loop( rq.intersector.force_opacity((_e8.flags & 1) != 0 ? metal::raytracing::forced_opacity::opaque : (_e8.flags & 2) != 0 ? metal::raytracing::forced_opacity::non_opaque : metal::raytracing::forced_opacity::none); rq.intersector.accept_any_intersection((_e8.flags & 4) != 0); rq.intersection = rq.intersector.intersect(metal::raytracing::ray(_e8.origin, _e8.dir, _e8.tmin, _e8.tmax), acs, _e8.cull_mask); rq.ready = true; -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) - LOOP_IS_REACHABLE while(true) { + while(true) { bool _e9 = rq.ready; rq.ready = false; if (_e9) { } else { break; } +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } return RayIntersection {_map_intersection_type(rq.intersection.type), rq.intersection.distance, rq.intersection.user_instance_id, rq.intersection.instance_id, {}, rq.intersection.geometry_id, rq.intersection.primitive_id, rq.intersection.triangle_barycentric_coord, rq.intersection.triangle_front_facing, {}, rq.intersection.object_to_world_transform, rq.intersection.world_to_object_transform}; } diff --git a/naga/tests/out/msl/shadow.msl b/naga/tests/out/msl/shadow.msl index f8aeef9d4..18cc84211 100644 --- a/naga/tests/out/msl/shadow.msl +++ b/naga/tests/out/msl/shadow.msl @@ -100,9 +100,8 @@ fragment fs_mainOutput fs_main( metal::float3 color = c_ambient; uint i = 0u; metal::float3 normal_1 = metal::normalize(in.world_normal); -#define LOOP_IS_REACHABLE if (volatile bool unpredictable_jump_over_loop = true; unpredictable_jump_over_loop) bool loop_init = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init) { uint _e40 = i; i = _e40 + 1u; @@ -124,6 +123,8 @@ fragment fs_mainOutput fs_main( metal::float3 _e37 = color; color = _e37 + ((_e23 * diffuse) * light.color.xyz); } +#define LOOP_IS_BOUNDED { volatile bool unpredictable_break_from_loop = false; if (unpredictable_break_from_loop) break; } + LOOP_IS_BOUNDED } metal::float3 _e42 = color; metal::float4 _e47 = u_entity.color; @@ -152,7 +153,7 @@ fragment fs_main_without_storageOutput fs_main_without_storage( uint i_1 = 0u; metal::float3 normal_2 = metal::normalize(in_1.world_normal); bool loop_init_1 = true; - LOOP_IS_REACHABLE while(true) { + while(true) { if (!loop_init_1) { uint _e40 = i_1; i_1 = _e40 + 1u; @@ -174,6 +175,7 @@ fragment fs_main_without_storageOutput fs_main_without_storage( metal::float3 _e37 = color_1; color_1 = _e37 + ((_e23 * diffuse_1) * light_1.color.xyz); } + LOOP_IS_BOUNDED } metal::float3 _e42 = color_1; metal::float4 _e47 = u_entity.color;