Add spans to provide more information profile gaps

This commit is contained in:
Connor Fitzgerald 2021-10-03 00:12:00 -04:00 committed by Dzmitry Malyshau
parent d8fd9dd5c7
commit c3d906856f
15 changed files with 266 additions and 144 deletions

View File

@ -128,35 +128,65 @@ impl<A: hal::Api> NonReferencedResources<A> {
}
unsafe fn clean(&mut self, device: &A::Device) {
for raw in self.buffers.drain(..) {
device.destroy_buffer(raw);
if !self.buffers.is_empty() {
profiling::scope!("destroy_buffers");
for raw in self.buffers.drain(..) {
device.destroy_buffer(raw);
}
}
for raw in self.textures.drain(..) {
device.destroy_texture(raw);
if !self.textures.is_empty() {
profiling::scope!("destroy_textures");
for raw in self.textures.drain(..) {
device.destroy_texture(raw);
}
}
for (_, raw) in self.texture_views.drain(..) {
device.destroy_texture_view(raw);
if !self.texture_views.is_empty() {
profiling::scope!("destroy_texture_views");
for (_, raw) in self.texture_views.drain(..) {
device.destroy_texture_view(raw);
}
}
for raw in self.samplers.drain(..) {
device.destroy_sampler(raw);
if !self.samplers.is_empty() {
profiling::scope!("destroy_samplers");
for raw in self.samplers.drain(..) {
device.destroy_sampler(raw);
}
}
for raw in self.bind_groups.drain(..) {
device.destroy_bind_group(raw);
if !self.bind_groups.is_empty() {
profiling::scope!("destroy_bind_groups");
for raw in self.bind_groups.drain(..) {
device.destroy_bind_group(raw);
}
}
for raw in self.compute_pipes.drain(..) {
device.destroy_compute_pipeline(raw);
if !self.compute_pipes.is_empty() {
profiling::scope!("destroy_compute_pipelines");
for raw in self.compute_pipes.drain(..) {
device.destroy_compute_pipeline(raw);
}
}
for raw in self.render_pipes.drain(..) {
device.destroy_render_pipeline(raw);
if !self.render_pipes.is_empty() {
profiling::scope!("destroy_render_pipelines");
for raw in self.render_pipes.drain(..) {
device.destroy_render_pipeline(raw);
}
}
for raw in self.bind_group_layouts.drain(..) {
device.destroy_bind_group_layout(raw);
if !self.bind_group_layouts.is_empty() {
profiling::scope!("destroy_bind_group_layouts");
for raw in self.bind_group_layouts.drain(..) {
device.destroy_bind_group_layout(raw);
}
}
for raw in self.pipeline_layouts.drain(..) {
device.destroy_pipeline_layout(raw);
if !self.pipeline_layouts.is_empty() {
profiling::scope!("destroy_pipeline_layouts");
for raw in self.pipeline_layouts.drain(..) {
device.destroy_pipeline_layout(raw);
}
}
for raw in self.query_sets.drain(..) {
device.destroy_query_set(raw);
if !self.query_sets.is_empty() {
profiling::scope!("destroy_query_sets");
for raw in self.query_sets.drain(..) {
device.destroy_query_set(raw);
}
}
}
}
@ -290,7 +320,7 @@ impl<A: hal::Api> LifetimeTracker<A> {
}
pub fn cleanup(&mut self, device: &A::Device) {
profiling::scope!("cleanup");
profiling::scope!("cleanup", "LifetimeTracker");
unsafe {
self.free_resources.clean(device);
}

View File

@ -2658,6 +2658,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
surface_id: id::SurfaceId,
adapter_id: id::AdapterId,
) -> Result<TextureFormat, instance::GetSurfacePreferredFormatError> {
profiling::scope!("surface_get_preferred_format");
let hub = A::hub(self);
let mut token = Token::root();

View File

@ -269,7 +269,11 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
}
let stage = device.prepare_stage(data_size)?;
unsafe { stage.write(&device.raw, 0, data) }.map_err(DeviceError::from)?;
unsafe {
profiling::scope!("copy");
stage.write(&device.raw, 0, data)
}
.map_err(DeviceError::from)?;
let mut trackers = device.trackers.lock();
let (dst, transition) = trackers
@ -445,9 +449,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
let mapping = unsafe { device.raw.map_buffer(&stage.buffer, 0..stage_size) }
.map_err(DeviceError::from)?;
unsafe {
profiling::scope!("copy");
if stage_bytes_per_row == bytes_per_row {
profiling::scope!("copy aligned");
// Fast path if the data is already being aligned optimally.
ptr::copy_nonoverlapping(
data.as_ptr().offset(data_layout.offset as isize),
@ -455,6 +458,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
stage_size as usize,
);
} else {
profiling::scope!("copy chunked");
// Copy row by row into the optimal alignment.
let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize;
for layer in 0..size.depth_or_array_layers {

View File

@ -193,6 +193,7 @@ impl Surface {
let suf = A::get_surface(self);
let caps = unsafe {
profiling::scope!("surface_capabilities");
adapter
.raw
.adapter
@ -517,6 +518,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
if let Some(ref inst) = *instance_field {
let hub = HalApi::hub(self);
if let Some(id_backend) = inputs.find(backend) {
profiling::scope!("enumerating", backend_info);
for raw in unsafe {inst.enumerate_adapters()} {
let adapter = Adapter::new(raw);
log::info!("Adapter {} {:?}", backend_info, adapter.raw.info);

View File

@ -22,6 +22,7 @@ renderdoc = ["libloading", "renderdoc-sys"]
[dependencies]
bitflags = "1.0"
parking_lot = "0.11"
profiling = { version = "1", default-features = false }
raw-window-handle = "0.3"
thiserror = "1"

View File

@ -45,20 +45,25 @@ impl super::Adapter {
instance_flags: crate::InstanceFlags,
) -> Option<crate::ExposedAdapter<super::Api>> {
// Create the device so that we can get the capabilities.
let device = match library.create_device(adapter, native::FeatureLevel::L11_0) {
Ok(pair) => match pair.into_result() {
Ok(device) => device,
let device = {
profiling::scope!("ID3D12Device::create_device");
match library.create_device(adapter, native::FeatureLevel::L11_0) {
Ok(pair) => match pair.into_result() {
Ok(device) => device,
Err(err) => {
log::warn!("Device creation failed: {}", err);
return None;
}
},
Err(err) => {
log::warn!("Device creation failed: {}", err);
log::warn!("Device creation function is not found: {:?}", err);
return None;
}
},
Err(err) => {
log::warn!("Device creation function is not found: {:?}", err);
return None;
}
};
profiling::scope!("feature queries");
// We have found a possible adapter.
// Acquire the device information.
let mut desc: dxgi1_2::DXGI_ADAPTER_DESC2 = unsafe { mem::zeroed() };
@ -264,15 +269,17 @@ impl crate::Adapter<super::Api> for super::Adapter {
features: wgt::Features,
_limits: &wgt::Limits,
) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
let queue = self
.device
.create_command_queue(
native::CmdListType::Direct,
native::Priority::Normal,
native::CommandQueueFlags::empty(),
0,
)
.into_device_result("Queue creation")?;
let queue = {
profiling::scope!("ID3D12Device::CreateCommandQueue");
self.device
.create_command_queue(
native::CmdListType::Direct,
native::Priority::Normal,
native::CommandQueueFlags::empty(),
0,
)
.into_device_result("Queue creation")?
};
let device = super::Device::new(
self.device,

View File

@ -767,9 +767,14 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
};
self.temp.barriers.push(barrier);
}
list.ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr());
if !self.temp.barriers.is_empty() {
profiling::scope!("ID3D12GraphicsCommandList::ResourceBarrier");
list.ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr());
}
for resolve in self.pass.resolves.iter() {
profiling::scope!("ID3D12GraphicsCommandList::ResolveSubresource");
list.ResolveSubresource(
resolve.dst.0.as_mut_ptr(),
resolve.dst.1,
@ -784,7 +789,10 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
let transition = barrier.u.Transition_mut();
mem::swap(&mut transition.StateBefore, &mut transition.StateAfter);
}
list.ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr());
if !self.temp.barriers.is_empty() {
profiling::scope!("ID3D12GraphicsCommandList::ResourceBarrier");
list.ResourceBarrier(self.temp.barriers.len() as u32, self.temp.barriers.as_ptr());
}
}
self.end_pass();

View File

@ -41,14 +41,17 @@ impl GeneralHeap {
ty: native::DescriptorHeapType,
total_handles: u64,
) -> Result<Self, crate::DeviceError> {
let raw = device
.create_descriptor_heap(
total_handles as u32,
ty,
native::DescriptorHeapFlags::SHADER_VISIBLE,
0,
)
.into_device_result("Descriptor heap creation")?;
let raw = {
profiling::scope!("ID3D12Device::CreateDescriptorHeap");
device
.create_descriptor_heap(
total_handles as u32,
ty,
native::DescriptorHeapFlags::SHADER_VISIBLE,
0,
)
.into_device_result("Descriptor heap creation")?
};
Ok(Self {
raw,

View File

@ -22,6 +22,7 @@ impl super::Device {
) -> Result<Self, crate::DeviceError> {
let mut idle_fence = native::Fence::null();
let hr = unsafe {
profiling::scope!("ID3D12Device::CreateFence");
raw.CreateFence(
0,
d3d12::D3D12_FENCE_FLAG_NONE,
@ -60,6 +61,7 @@ impl super::Device {
VisibleNodeMask: 0,
};
profiling::scope!("Zero Buffer Allocation");
raw.CreateCommittedResource(
&heap_properties,
d3d12::D3D12_HEAP_FLAG_NONE,
@ -71,7 +73,7 @@ impl super::Device {
)
.into_device_result("Zero buffer creation")?;
//Note: without `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED`
// Note: without `D3D12_HEAP_FLAG_CREATE_NOT_ZEROED`
// this resource is zeroed by default.
};
@ -182,9 +184,12 @@ impl super::Device {
//TODO: reuse the writer
let mut source = String::new();
let mut writer = hlsl::Writer::new(&mut source, &layout.naga_options);
let reflection_info = writer
.write(module, &stage.module.naga.info)
.map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {:?}", e)))?;
let reflection_info = {
profiling::scope!("naga::back::hlsl::write");
writer
.write(module, &stage.module.naga.info)
.map_err(|e| crate::PipelineError::Linkage(stage_bit, format!("HLSL: {:?}", e)))?
};
let full_stage = format!(
"{}_{}\0",
@ -226,6 +231,7 @@ impl super::Device {
};
let hr = unsafe {
profiling::scope!("d3dcompiler::D3DCompile");
d3dcompiler::D3DCompile(
source.as_ptr() as *const _,
source.len(),
@ -1332,11 +1338,14 @@ impl crate::Device<super::Api> for super::Device {
};
let mut raw = native::PipelineState::null();
let hr = self.raw.CreateGraphicsPipelineState(
&raw_desc,
&d3d12::ID3D12PipelineState::uuidof(),
raw.mut_void(),
);
let hr = {
profiling::scope!("ID3D12Device::CreateGraphicsPipelineState");
self.raw.CreateGraphicsPipelineState(
&raw_desc,
&d3d12::ID3D12PipelineState::uuidof(),
raw.mut_void(),
)
};
blob_vs.destroy();
if !blob_fs.is_null() {
@ -1368,13 +1377,16 @@ impl crate::Device<super::Api> for super::Device {
) -> Result<super::ComputePipeline, crate::PipelineError> {
let blob_cs = self.load_shader(&desc.stage, desc.layout, naga::ShaderStage::Compute)?;
let pair = self.raw.create_compute_pipeline_state(
desc.layout.shared.signature,
native::Shader::from_blob(blob_cs),
0,
native::CachedPSO::null(),
native::PipelineStateFlags::empty(),
);
let pair = {
profiling::scope!("ID3D12Device::CreateComputePipelineState");
self.raw.create_compute_pipeline_state(
desc.layout.shared.signature,
native::Shader::from_blob(blob_cs),
0,
native::CachedPSO::null(),
native::PipelineStateFlags::empty(),
)
};
blob_cs.destroy();

View File

@ -183,6 +183,7 @@ impl crate::Instance<super::Api> for super::Instance {
for cur_index in 0.. {
let raw = match factory6 {
Some(factory) => {
profiling::scope!("IDXGIFactory6::EnumAdapterByGpuPreference");
let mut adapter2 = native::WeakPtr::<dxgi1_2::IDXGIAdapter2>::null();
let hr = factory.EnumAdapterByGpuPreference(
cur_index,
@ -202,6 +203,7 @@ impl crate::Instance<super::Api> for super::Instance {
adapter2
}
None => {
profiling::scope!("IDXGIFactory1::EnumAdapters1");
let mut adapter1 = native::WeakPtr::<dxgi::IDXGIAdapter1>::null();
let hr = self
.factory

View File

@ -615,14 +615,17 @@ impl crate::Surface<Api> for Surface {
SwapEffect: dxgi::DXGI_SWAP_EFFECT_FLIP_DISCARD,
};
let hr = self.factory.CreateSwapChainForHwnd(
device.present_queue.as_mut_ptr() as *mut _,
self.wnd_handle,
&raw_desc,
ptr::null(),
ptr::null_mut(),
swap_chain1.mut_void() as *mut *mut _,
);
let hr = {
profiling::scope!("IDXGIFactory4::CreateSwapChainForHwnd");
self.factory.CreateSwapChainForHwnd(
device.present_queue.as_mut_ptr() as *mut _,
self.wnd_handle,
&raw_desc,
ptr::null(),
ptr::null_mut(),
swap_chain1.mut_void() as *mut *mut _,
)
};
if let Err(err) = hr.into_result() {
log::error!("SwapChain creation error: {}", err);
@ -724,7 +727,10 @@ impl crate::Queue<Api> for Queue {
self.temp_lists.push(cmd_buf.raw.as_list());
}
self.raw.execute_command_lists(&self.temp_lists);
{
profiling::scope!("ID3D12CommandQueue::ExecuteCommandLists");
self.raw.execute_command_lists(&self.temp_lists);
}
if let Some((fence, value)) = signal_fence {
self.raw
@ -746,6 +752,8 @@ impl crate::Queue<Api> for Queue {
wgt::PresentMode::Fifo => (1, 0),
wgt::PresentMode::Mailbox => (1, 0),
};
profiling::scope!("IDXGISwapchain3::Present");
sc.raw.Present(interval, flags);
Ok(())

View File

@ -960,10 +960,12 @@ impl super::Adapter {
family_index: u32,
queue_index: u32,
) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
let mem_properties = self
.instance
.raw
.get_physical_device_memory_properties(self.raw);
let mem_properties = {
profiling::scope!("vkGetPhysicalDeviceMemoryProperties");
self.instance
.raw
.get_physical_device_memory_properties(self.raw)
};
let memory_types =
&mem_properties.memory_types[..mem_properties.memory_type_count as usize];
let valid_ash_memory_types = memory_types.iter().enumerate().fold(0, |u, (i, mem)| {
@ -1051,7 +1053,10 @@ impl super::Adapter {
};
log::info!("Private capabilities: {:?}", self.private_caps);
let raw_queue = raw_device.get_device_queue(family_index, queue_index);
let raw_queue = {
profiling::scope!("vkGetDeviceQueue");
raw_device.get_device_queue(family_index, queue_index)
};
let shared = Arc::new(super::DeviceShared {
raw: raw_device,
@ -1166,7 +1171,10 @@ impl crate::Adapter<super::Api> for super::Adapter {
let info = enabled_phd_features
.add_to_device_create_builder(pre_info)
.build();
let raw_device = self.instance.raw.create_device(self.raw, &info, None)?;
let raw_device = {
profiling::scope!("vkCreateDevice");
self.instance.raw.create_device(self.raw, &info, None)?
};
self.device_from_raw(
raw_device,
@ -1247,27 +1255,33 @@ impl crate::Adapter<super::Api> for super::Adapter {
}
let queue_family_index = 0; //TODO
match surface.functor.get_physical_device_surface_support(
self.raw,
queue_family_index,
surface.raw,
) {
Ok(true) => (),
Ok(false) => return None,
Err(e) => {
log::error!("get_physical_device_surface_support: {}", e);
return None;
{
profiling::scope!("vkGetPhysicalDeviceSurfaceSupportKHR");
match surface.functor.get_physical_device_surface_support(
self.raw,
queue_family_index,
surface.raw,
) {
Ok(true) => (),
Ok(false) => return None,
Err(e) => {
log::error!("get_physical_device_surface_support: {}", e);
return None;
}
}
}
let caps = match surface
.functor
.get_physical_device_surface_capabilities(self.raw, surface.raw)
{
Ok(caps) => caps,
Err(e) => {
log::error!("get_physical_device_surface_capabilities: {}", e);
return None;
let caps = {
profiling::scope!("vkGetPhysicalDeviceSurfaceCapabilitiesKHR");
match surface
.functor
.get_physical_device_surface_capabilities(self.raw, surface.raw)
{
Ok(caps) => caps,
Err(e) => {
log::error!("get_physical_device_surface_capabilities: {}", e);
return None;
}
}
};
@ -1302,25 +1316,31 @@ impl crate::Adapter<super::Api> for super::Adapter {
depth_or_array_layers: caps.max_image_array_layers,
};
let raw_present_modes = match surface
.functor
.get_physical_device_surface_present_modes(self.raw, surface.raw)
{
Ok(present_modes) => present_modes,
Err(e) => {
log::error!("get_physical_device_surface_present_modes: {}", e);
Vec::new()
let raw_present_modes = {
profiling::scope!("vkGetPhysicalDeviceSurfacePresentModesKHR");
match surface
.functor
.get_physical_device_surface_present_modes(self.raw, surface.raw)
{
Ok(present_modes) => present_modes,
Err(e) => {
log::error!("get_physical_device_surface_present_modes: {}", e);
Vec::new()
}
}
};
let raw_surface_formats = match surface
.functor
.get_physical_device_surface_formats(self.raw, surface.raw)
{
Ok(formats) => formats,
Err(e) => {
log::error!("get_physical_device_surface_formats: {}", e);
Vec::new()
let raw_surface_formats = {
profiling::scope!("vkGetPhysicalDeviceSurfaceFormatsKHR");
match surface
.functor
.get_physical_device_surface_formats(self.raw, surface.raw)
{
Ok(formats) => formats,
Err(e) => {
log::error!("get_physical_device_surface_formats: {}", e);
Vec::new()
}
}
};

View File

@ -478,6 +478,7 @@ impl super::Device {
config: &crate::SurfaceConfiguration,
provided_old_swapchain: Option<super::Swapchain>,
) -> Result<super::Swapchain, crate::SurfaceError> {
profiling::scope!("Device::create_swapchain");
let functor = khr::Swapchain::new(&surface.instance.raw, &self.shared.raw);
let old_swapchain = match provided_old_swapchain {
@ -504,7 +505,10 @@ impl super::Device {
.clipped(true)
.old_swapchain(old_swapchain);
let result = functor.create_swapchain(&info, None);
let result = {
profiling::scope!("vkCreateSwapchainKHR");
functor.create_swapchain(&info, None)
};
// doing this before bailing out with error
if old_swapchain != vk::SwapchainKHR::null() {
@ -575,7 +579,10 @@ impl super::Device {
.flags(vk::ShaderModuleCreateFlags::empty())
.code(spv);
let raw = unsafe { self.shared.raw.create_shader_module(&vk_info, None)? };
let raw = unsafe {
profiling::scope!("vkCreateShaderModule");
self.shared.raw.create_shader_module(&vk_info, None)?
};
Ok(raw)
}
@ -609,12 +616,15 @@ impl super::Device {
} else {
&self.naga_options
};
let spv = naga::back::spv::write_vec(
&naga_shader.module,
&naga_shader.info,
options,
Some(&pipeline_options),
)
let spv = {
profiling::scope!("naga::spv::write_vec");
naga::back::spv::write_vec(
&naga_shader.module,
&naga_shader.info,
options,
Some(&pipeline_options),
)
}
.map_err(|e| crate::PipelineError::Linkage(stage_flags, format!("{}", e)))?;
self.create_shader_module_impl(&spv)?
}
@ -1146,7 +1156,10 @@ impl crate::Device<super::Api> for super::Device {
.set_layouts(&vk_set_layouts)
.push_constant_ranges(&vk_push_constant_ranges);
let raw = self.shared.raw.create_pipeline_layout(&vk_info, None)?;
let raw = {
profiling::scope!("vkCreatePipelineLayout");
self.shared.raw.create_pipeline_layout(&vk_info, None)?
};
if let Some(label) = desc.label {
self.shared
@ -1494,11 +1507,13 @@ impl crate::Device<super::Api> for super::Device {
.build()
}];
let mut raw_vec = self
.shared
.raw
.create_graphics_pipelines(vk::PipelineCache::null(), &vk_infos, None)
.map_err(|(_, e)| crate::DeviceError::from(e))?;
let mut raw_vec = {
profiling::scope!("vkCreateGraphicsPipelines");
self.shared
.raw
.create_graphics_pipelines(vk::PipelineCache::null(), &vk_infos, None)
.map_err(|(_, e)| crate::DeviceError::from(e))?
};
let raw = raw_vec.pop().unwrap();
if let Some(label) = desc.label {
@ -1536,11 +1551,13 @@ impl crate::Device<super::Api> for super::Device {
.build()
}];
let mut raw_vec = self
.shared
.raw
.create_compute_pipelines(vk::PipelineCache::null(), &vk_infos, None)
.map_err(|(_, e)| crate::DeviceError::from(e))?;
let mut raw_vec = {
profiling::scope!("vkCreateComputePipelines");
self.shared
.raw
.create_compute_pipelines(vk::PipelineCache::null(), &vk_infos, None)
.map_err(|(_, e)| crate::DeviceError::from(e))?
};
let raw = raw_vec.pop().unwrap();
if let Some(label) = desc.label {

View File

@ -108,7 +108,11 @@ unsafe extern "system" fn debug_utils_messenger_callback(
impl super::Swapchain {
unsafe fn release_resources(self, device: &ash::Device) -> Self {
let _ = device.device_wait_idle();
profiling::scope!("Swapchain::release_resources");
{
profiling::scope!("vkDeviceWaitIdle");
let _ = device.device_wait_idle();
};
device.destroy_fence(self.fence, None);
self
}

View File

@ -607,6 +607,7 @@ impl crate::Queue<Api> for Queue {
};
vk_info = vk_info.signal_semaphores(&semaphores[..signal_count]);
profiling::scope!("vkQueueSubmit");
self.device
.raw
.queue_submit(self.raw, &[vk_info.build()], fence_raw)?;
@ -632,14 +633,16 @@ impl crate::Queue<Api> for Queue {
self.relay_active = false;
}
let suboptimal = self
.swapchain_fn
.queue_present(self.raw, &vk_info)
.map_err(|error| match error {
vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
_ => crate::DeviceError::from(error).into(),
})?;
let suboptimal = {
profiling::scope!("vkQueuePresentKHR");
self.swapchain_fn
.queue_present(self.raw, &vk_info)
.map_err(|error| match error {
vk::Result::ERROR_OUT_OF_DATE_KHR => crate::SurfaceError::Outdated,
vk::Result::ERROR_SURFACE_LOST_KHR => crate::SurfaceError::Lost,
_ => crate::DeviceError::from(error).into(),
})?
};
if suboptimal {
log::warn!("Suboptimal present of frame {}", texture.index);
}