// This example showcases how you can most effectively update a resource asynchronously, such that // your rendering or any other tasks can use the resource without any latency at the same time as // it's being updated. // // There are two kinds of resources that are updated asynchronously here: // // - A uniform buffer, which needs to be updated every frame. // - A large texture, which needs to be updated partially at the request of the user. // // For the first, since the data needs to be updated every frame, we have to use one buffer per // frame in flight. The swapchain most commonly has multiple images that are all processed at the // same time, therefore writing the same buffer during each frame in flight would result in one of // two things: either you would have to synchronize the writes from the host and reads from the // device such that only one of the images in the swapchain is actually processed at any point in // time (bad), or a race condition (bad). Therefore we are left with no choice but to use a // different buffer for each frame in flight. This is best suited to very small pieces of data that // change rapidly, and where the data of one frame doesn't depend on data from a previous one. // // For the second, since this texture is rather large, we can't afford to overwrite the entire // texture every time a part of it needs to be updated. Also, we don't need as many textures as // there are frames in flight since the texture doesn't need to be updated every frame, but we // still need at least two textures. That way we can write one of the textures at the same time as // reading the other, swapping them after the write is done such that the newly updated one is read // and the now out-of-date one can be written to next time, known as *eventual consistency*. // // In an eventually consistent system, a number of *replicas* are used, all of which represent the // same data but their consistency is not strict. A replica might be out-of-date for some time // before *reaching convergence*, hence becoming consistent, eventually. use glam::f32::Mat4; use rand::Rng; use std::{ error::Error, slice, sync::{ atomic::{AtomicBool, Ordering}, mpsc, Arc, }, thread, time::{SystemTime, UNIX_EPOCH}, }; use vulkano::{ buffer::{Buffer, BufferContents, BufferCreateInfo, BufferUsage}, command_buffer::RenderPassBeginInfo, descriptor_set::{ allocator::StandardDescriptorSetAllocator, DescriptorSet, WriteDescriptorSet, }, device::{ physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, Queue, QueueCreateInfo, QueueFlags, }, format::Format, image::{ sampler::{Sampler, SamplerCreateInfo}, view::ImageView, Image, ImageAspects, ImageCreateInfo, ImageSubresourceLayers, ImageType, ImageUsage, }, instance::{Instance, InstanceCreateFlags, InstanceCreateInfo}, memory::allocator::{AllocationCreateInfo, DeviceLayout, MemoryTypeFilter}, pipeline::{ graphics::{ color_blend::{ColorBlendAttachmentState, ColorBlendState}, input_assembly::{InputAssemblyState, PrimitiveTopology}, multisample::MultisampleState, rasterization::RasterizationState, vertex_input::{Vertex, VertexDefinition}, viewport::{Viewport, ViewportState}, GraphicsPipelineCreateInfo, }, layout::PipelineDescriptorSetLayoutCreateInfo, DynamicState, GraphicsPipeline, Pipeline, PipelineBindPoint, PipelineLayout, PipelineShaderStageCreateInfo, }, render_pass::{Framebuffer, FramebufferCreateInfo, RenderPass, Subpass}, swapchain::{Surface, Swapchain, SwapchainCreateInfo}, sync::Sharing, DeviceSize, Validated, VulkanError, VulkanLibrary, }; use vulkano_taskgraph::{ command_buffer::{ BufferImageCopy, ClearColorImageInfo, CopyBufferToImageInfo, RecordingCommandBuffer, }, graph::{CompileInfo, ExecutableTaskGraph, ExecuteError, TaskGraph}, resource::{AccessType, Flight, HostAccessType, ImageLayoutType, Resources}, resource_map, Id, QueueFamilyType, Task, TaskContext, TaskResult, }; use winit::{ application::ApplicationHandler, event::{ElementState, KeyEvent, WindowEvent}, event_loop::{ActiveEventLoop, EventLoop}, keyboard::{Key, NamedKey}, window::{Window, WindowId}, }; const TRANSFER_GRANULARITY: u32 = 4096; const MAX_FRAMES_IN_FLIGHT: u32 = 2; fn main() -> Result<(), impl Error> { let event_loop = EventLoop::new().unwrap(); let mut app = App::new(&event_loop); println!("\nPress space to update part of the texture"); event_loop.run_app(&mut app) } struct App { instance: Arc, device: Arc, graphics_family_index: u32, transfer_family_index: u32, graphics_queue: Arc, resources: Arc, graphics_flight_id: Id, vertex_buffer_id: Id, uniform_buffer_ids: [Id; MAX_FRAMES_IN_FLIGHT as usize], texture_ids: [Id; 2], current_texture_index: Arc, channel: mpsc::Sender<()>, rcx: Option, } struct RenderContext { window: Arc, swapchain_id: Id, render_pass: Arc, framebuffers: Vec>, viewport: Viewport, recreate_swapchain: bool, task_graph: ExecutableTaskGraph, virtual_swapchain_id: Id, virtual_texture_id: Id, virtual_uniform_buffer_id: Id, } impl App { fn new(event_loop: &EventLoop<()>) -> Self { let library = VulkanLibrary::new().unwrap(); let required_extensions = Surface::required_extensions(event_loop).unwrap(); let instance = Instance::new( library, InstanceCreateInfo { flags: InstanceCreateFlags::ENUMERATE_PORTABILITY, enabled_extensions: required_extensions, ..Default::default() }, ) .unwrap(); let device_extensions = DeviceExtensions { khr_swapchain: true, ..DeviceExtensions::empty() }; let (physical_device, graphics_family_index) = instance .enumerate_physical_devices() .unwrap() .filter(|p| p.supported_extensions().contains(&device_extensions)) .filter_map(|p| { p.queue_family_properties() .iter() .enumerate() .position(|(i, q)| { q.queue_flags.intersects(QueueFlags::GRAPHICS) && p.presentation_support(i as u32, event_loop).unwrap() }) .map(|i| (p, i as u32)) }) .min_by_key(|(p, _)| match p.properties().device_type { PhysicalDeviceType::DiscreteGpu => 0, PhysicalDeviceType::IntegratedGpu => 1, PhysicalDeviceType::VirtualGpu => 2, PhysicalDeviceType::Cpu => 3, PhysicalDeviceType::Other => 4, _ => 5, }) .unwrap(); println!( "Using device: {} (type: {:?})", physical_device.properties().device_name, physical_device.properties().device_type, ); // Since we are going to be updating the texture on a separate thread asynchronously from // the execution of graphics commands, it would make sense to also do the transfer on a // dedicated transfer queue, if such a queue family exists. That way, the graphics queue is // not blocked during the transfers either and the two tasks are truly asynchronous. // // For this, we need to find the queue family with the fewest queue flags set, since if the // queue family has more flags than `TRANSFER | SPARSE_BINDING`, that means it is not // dedicated to transfer operations. let transfer_family_index = physical_device .queue_family_properties() .iter() .enumerate() .filter(|(_, q)| { q.queue_flags.intersects(QueueFlags::TRANSFER) // Queue families dedicated to transfers are not required to support partial // transfers of images, reported by a minimum granularity of [0, 0, 0]. If you need // to do partial transfers of images like we do in this example, you therefore have // to make sure the queue family supports that. && q.min_image_transfer_granularity != [0; 3] // Unlike queue families for graphics and/or compute, queue families dedicated to // transfers don't have to support image transfers of arbitrary granularity. // Therefore, if you are going to use one, you have to either make sure the // granularity is granular enough for your needs, or you have to align your // transfer offsets and extents to this granularity. Our minimum granularity is // 4096 which should be more than coarse enough so we just check that it is. && q.min_image_transfer_granularity[0..2] .iter() .all(|&g| TRANSFER_GRANULARITY % g == 0) }) .min_by_key(|(_, q)| q.queue_flags.count()) .unwrap() .0 as u32; let (device, mut queues) = { let mut queue_create_infos = vec![QueueCreateInfo { queue_family_index: graphics_family_index, ..Default::default() }]; // It's possible that the physical device doesn't have any queue families supporting // transfers other than the graphics and/or compute queue family. In that case we must // make sure we don't request the same queue family twice. if transfer_family_index != graphics_family_index { queue_create_infos.push(QueueCreateInfo { queue_family_index: transfer_family_index, ..Default::default() }); } else { let queue_family_properties = &physical_device.queue_family_properties()[graphics_family_index as usize]; // Even if we can't get an async transfer queue family, it's still better to use // different queues on the same queue family. This way, at least the threads on the // host don't have to lock the same queue when submitting. if queue_family_properties.queue_count > 1 { queue_create_infos[0].queues.push(0.5); } } Device::new( physical_device, DeviceCreateInfo { enabled_extensions: device_extensions, queue_create_infos, ..Default::default() }, ) .unwrap() }; let graphics_queue = queues.next().unwrap(); // If we didn't get a dedicated transfer queue, fall back to the graphics queue for // transfers. let transfer_queue = queues.next().unwrap_or_else(|| graphics_queue.clone()); println!( "Using queue family {graphics_family_index} for graphics and queue family \ {transfer_family_index} for transfers", ); let resources = Resources::new(&device, &Default::default()); let graphics_flight_id = resources.create_flight(MAX_FRAMES_IN_FLIGHT).unwrap(); let transfer_flight_id = resources.create_flight(1).unwrap(); let vertices = [ MyVertex { position: [-0.5, -0.5], }, MyVertex { position: [-0.5, 0.5], }, MyVertex { position: [0.5, -0.5], }, MyVertex { position: [0.5, 0.5], }, ]; let vertex_buffer_id = resources .create_buffer( BufferCreateInfo { usage: BufferUsage::VERTEX_BUFFER, ..Default::default() }, AllocationCreateInfo { memory_type_filter: MemoryTypeFilter::PREFER_DEVICE | MemoryTypeFilter::HOST_SEQUENTIAL_WRITE, ..Default::default() }, DeviceLayout::for_value(vertices.as_slice()).unwrap(), ) .unwrap(); // Create a pool of uniform buffers, one per frame in flight. This way we always have an // available buffer to write during each frame while reusing them as much as possible. let uniform_buffer_ids = [(); MAX_FRAMES_IN_FLIGHT as usize].map(|_| { resources .create_buffer( BufferCreateInfo { usage: BufferUsage::UNIFORM_BUFFER, ..Default::default() }, AllocationCreateInfo { memory_type_filter: MemoryTypeFilter::PREFER_DEVICE | MemoryTypeFilter::HOST_SEQUENTIAL_WRITE, ..Default::default() }, DeviceLayout::new_sized::(), ) .unwrap() }); // Create two textures, where at any point in time one is used exclusively for reading and // one is used exclusively for writing, swapping the two after each update. let texture_ids = [(); 2].map(|_| { resources .create_image( ImageCreateInfo { image_type: ImageType::Dim2d, format: Format::R8G8B8A8_UNORM, extent: [TRANSFER_GRANULARITY * 2, TRANSFER_GRANULARITY * 2, 1], usage: ImageUsage::TRANSFER_DST | ImageUsage::SAMPLED, sharing: if graphics_family_index != transfer_family_index { Sharing::Concurrent( [graphics_family_index, transfer_family_index] .into_iter() .collect(), ) } else { Sharing::Exclusive }, ..Default::default() }, AllocationCreateInfo::default(), ) .unwrap() }); // The index of the currently most up-to-date texture. The worker thread swaps the index // after every finished write, which is always done to the, at that point in time, unused // texture. let current_texture_index = Arc::new(AtomicBool::new(false)); // Initialize the resources. unsafe { vulkano_taskgraph::execute( &graphics_queue, &resources, graphics_flight_id, |cbf, tcx| { tcx.write_buffer::<[MyVertex]>(vertex_buffer_id, ..)? .copy_from_slice(&vertices); for &texture_id in &texture_ids { cbf.clear_color_image(&ClearColorImageInfo { image: texture_id, ..Default::default() })?; } Ok(()) }, [(vertex_buffer_id, HostAccessType::Write)], [], [ ( texture_ids[0], AccessType::ClearTransferWrite, ImageLayoutType::Optimal, ), ( texture_ids[1], AccessType::ClearTransferWrite, ImageLayoutType::Optimal, ), ], ) } .unwrap(); // Start the worker thread. let (channel, receiver) = mpsc::channel(); run_worker( receiver, graphics_family_index, transfer_family_index, transfer_queue.clone(), resources.clone(), graphics_flight_id, transfer_flight_id, texture_ids, current_texture_index.clone(), ); App { instance, device, graphics_family_index, transfer_family_index, graphics_queue, resources, graphics_flight_id, vertex_buffer_id, uniform_buffer_ids, texture_ids, current_texture_index, channel, rcx: None, } } } impl ApplicationHandler for App { fn resumed(&mut self, event_loop: &ActiveEventLoop) { let window = Arc::new( event_loop .create_window(Window::default_attributes()) .unwrap(), ); let surface = Surface::from_window(self.instance.clone(), window.clone()).unwrap(); let window_size = window.inner_size(); let swapchain_format; let swapchain_id = { let surface_capabilities = self .device .physical_device() .surface_capabilities(&surface, Default::default()) .unwrap(); (swapchain_format, _) = self .device .physical_device() .surface_formats(&surface, Default::default()) .unwrap()[0]; self.resources .create_swapchain( self.graphics_flight_id, surface, SwapchainCreateInfo { min_image_count: surface_capabilities.min_image_count.max(3), image_format: swapchain_format, image_extent: window_size.into(), image_usage: ImageUsage::COLOR_ATTACHMENT, composite_alpha: surface_capabilities .supported_composite_alpha .into_iter() .next() .unwrap(), ..Default::default() }, ) .unwrap() }; let render_pass = vulkano::single_pass_renderpass!( self.device.clone(), attachments: { color: { format: swapchain_format, samples: 1, load_op: Clear, store_op: Store, }, }, pass: { color: [color], depth_stencil: {}, }, ) .unwrap(); let framebuffers = window_size_dependent_setup(&self.resources, swapchain_id, &render_pass); let pipeline = { let vs = vs::load(self.device.clone()) .unwrap() .entry_point("main") .unwrap(); let fs = fs::load(self.device.clone()) .unwrap() .entry_point("main") .unwrap(); let vertex_input_state = MyVertex::per_vertex().definition(&vs).unwrap(); let stages = [ PipelineShaderStageCreateInfo::new(vs), PipelineShaderStageCreateInfo::new(fs), ]; let layout = PipelineLayout::new( self.device.clone(), PipelineDescriptorSetLayoutCreateInfo::from_stages(&stages) .into_pipeline_layout_create_info(self.device.clone()) .unwrap(), ) .unwrap(); let subpass = Subpass::from(render_pass.clone(), 0).unwrap(); GraphicsPipeline::new( self.device.clone(), None, GraphicsPipelineCreateInfo { stages: stages.into_iter().collect(), vertex_input_state: Some(vertex_input_state), input_assembly_state: Some(InputAssemblyState { topology: PrimitiveTopology::TriangleStrip, ..Default::default() }), viewport_state: Some(ViewportState::default()), rasterization_state: Some(RasterizationState::default()), multisample_state: Some(MultisampleState::default()), color_blend_state: Some(ColorBlendState::with_attachment_states( subpass.num_color_attachments(), ColorBlendAttachmentState::default(), )), dynamic_state: [DynamicState::Viewport].into_iter().collect(), subpass: Some(subpass.into()), ..GraphicsPipelineCreateInfo::layout(layout) }, ) .unwrap() }; let viewport = Viewport { offset: [0.0, 0.0], extent: window_size.into(), depth_range: 0.0..=1.0, }; let descriptor_set_allocator = Arc::new(StandardDescriptorSetAllocator::new( self.device.clone(), Default::default(), )); // A byproduct of always using the same set of uniform buffers is that we can also create // one descriptor set for each, reusing them in the same way as the buffers. let uniform_buffer_sets = self.uniform_buffer_ids.map(|buffer_id| { let buffer_state = self.resources.buffer(buffer_id).unwrap(); let buffer = buffer_state.buffer(); DescriptorSet::new( descriptor_set_allocator.clone(), pipeline.layout().set_layouts()[0].clone(), [WriteDescriptorSet::buffer(0, buffer.clone().into())], [], ) .unwrap() }); // Create the descriptor sets for sampling the textures. let sampler = Sampler::new( self.device.clone(), SamplerCreateInfo::simple_repeat_linear(), ) .unwrap(); let sampler_sets = self.texture_ids.map(|texture_id| { let texture_state = self.resources.image(texture_id).unwrap(); let texture = texture_state.image(); DescriptorSet::new( descriptor_set_allocator.clone(), pipeline.layout().set_layouts()[1].clone(), [ WriteDescriptorSet::sampler(0, sampler.clone()), WriteDescriptorSet::image_view( 1, ImageView::new_default(texture.clone()).unwrap(), ), ], [], ) .unwrap() }); let mut task_graph = TaskGraph::new(&self.resources, 1, 4); let virtual_swapchain_id = task_graph.add_swapchain(&SwapchainCreateInfo::default()); let virtual_texture_id = task_graph.add_image(&ImageCreateInfo { sharing: if self.graphics_family_index != self.transfer_family_index { Sharing::Concurrent( [self.graphics_family_index, self.transfer_family_index] .into_iter() .collect(), ) } else { Sharing::Exclusive }, ..Default::default() }); let virtual_uniform_buffer_id = task_graph.add_buffer(&BufferCreateInfo::default()); task_graph.add_host_buffer_access(virtual_uniform_buffer_id, HostAccessType::Write); task_graph .create_task_node( "Render", QueueFamilyType::Graphics, RenderTask { swapchain_id: virtual_swapchain_id, vertex_buffer_id: self.vertex_buffer_id, current_texture_index: self.current_texture_index.clone(), pipeline, uniform_buffer_id: virtual_uniform_buffer_id, uniform_buffer_sets, sampler_sets, }, ) .image_access( virtual_swapchain_id.current_image_id(), AccessType::ColorAttachmentWrite, ImageLayoutType::Optimal, ) .buffer_access(self.vertex_buffer_id, AccessType::VertexAttributeRead) .image_access( virtual_texture_id, AccessType::FragmentShaderSampledRead, ImageLayoutType::Optimal, ) .buffer_access( virtual_uniform_buffer_id, AccessType::VertexShaderUniformRead, ); let task_graph = unsafe { task_graph.compile(&CompileInfo { queues: &[&self.graphics_queue], present_queue: Some(&self.graphics_queue), flight_id: self.graphics_flight_id, ..Default::default() }) } .unwrap(); self.rcx = Some(RenderContext { window, swapchain_id, render_pass, framebuffers, viewport, recreate_swapchain: false, task_graph, virtual_swapchain_id, virtual_texture_id, virtual_uniform_buffer_id, }); } fn window_event( &mut self, event_loop: &ActiveEventLoop, _window_id: WindowId, event: WindowEvent, ) { let rcx = self.rcx.as_mut().unwrap(); match event { WindowEvent::CloseRequested => { event_loop.exit(); } WindowEvent::Resized(_) => { rcx.recreate_swapchain = true; } WindowEvent::KeyboardInput { event: KeyEvent { logical_key: Key::Named(NamedKey::Space), state: ElementState::Released, .. }, .. } => { self.channel.send(()).unwrap(); } WindowEvent::RedrawRequested => { let window_size = rcx.window.inner_size(); if window_size.width == 0 || window_size.height == 0 { return; } let flight = self.resources.flight(self.graphics_flight_id).unwrap(); if rcx.recreate_swapchain { rcx.swapchain_id = self .resources .recreate_swapchain(rcx.swapchain_id, |create_info| SwapchainCreateInfo { image_extent: window_size.into(), ..create_info }) .expect("failed to recreate swapchain"); rcx.framebuffers = window_size_dependent_setup( &self.resources, rcx.swapchain_id, &rcx.render_pass, ); rcx.viewport.extent = window_size.into(); rcx.recreate_swapchain = false; } let frame_index = flight.current_frame_index(); let texture_index = self.current_texture_index.load(Ordering::Relaxed); let resource_map = resource_map!( &rcx.task_graph, rcx.virtual_swapchain_id => rcx.swapchain_id, rcx.virtual_texture_id => self.texture_ids[texture_index as usize], rcx.virtual_uniform_buffer_id => self.uniform_buffer_ids[frame_index as usize], ) .unwrap(); flight.wait(None).unwrap(); match unsafe { rcx.task_graph .execute(resource_map, rcx, || rcx.window.pre_present_notify()) } { Ok(()) => {} Err(ExecuteError::Swapchain { error: Validated::Error(VulkanError::OutOfDate), .. }) => { rcx.recreate_swapchain = true; } Err(e) => { panic!("failed to execute next frame: {e:?}"); } } } _ => {} } } fn about_to_wait(&mut self, _event_loop: &ActiveEventLoop) { let rcx = self.rcx.as_mut().unwrap(); rcx.window.request_redraw(); } } #[derive(Clone, Copy, BufferContents, Vertex)] #[repr(C)] struct MyVertex { #[format(R32G32_SFLOAT)] position: [f32; 2], } mod vs { vulkano_shaders::shader! { ty: "vertex", src: r" #version 450 layout(location = 0) in vec2 position; layout(location = 0) out vec2 tex_coords; layout(set = 0, binding = 0) uniform Data { mat4 transform; }; void main() { gl_Position = vec4(transform * vec4(position, 0.0, 1.0)); tex_coords = position + vec2(0.5); } ", } } mod fs { vulkano_shaders::shader! { ty: "fragment", src: r" #version 450 layout(location = 0) in vec2 tex_coords; layout(location = 0) out vec4 f_color; layout(set = 1, binding = 0) uniform sampler s; layout(set = 1, binding = 1) uniform texture2D tex; void main() { f_color = texture(sampler2D(tex, s), tex_coords); } ", } } struct RenderTask { swapchain_id: Id, vertex_buffer_id: Id, current_texture_index: Arc, pipeline: Arc, uniform_buffer_id: Id, uniform_buffer_sets: [Arc; MAX_FRAMES_IN_FLIGHT as usize], sampler_sets: [Arc; 2], } impl Task for RenderTask { type World = RenderContext; unsafe fn execute( &self, cbf: &mut RecordingCommandBuffer<'_>, tcx: &mut TaskContext<'_>, rcx: &Self::World, ) -> TaskResult { let frame_index = tcx.current_frame_index(); let swapchain_state = tcx.swapchain(self.swapchain_id)?; let image_index = swapchain_state.current_image_index().unwrap(); // Write to the uniform buffer designated for this frame. *tcx.write_buffer(self.uniform_buffer_id, ..)? = vs::Data { transform: { const DURATION: f64 = 5.0; let elapsed = SystemTime::now() .duration_since(UNIX_EPOCH) .unwrap() .as_secs_f64(); let remainder = elapsed.rem_euclid(DURATION); let delta = (remainder / DURATION) as f32; let angle = delta * std::f32::consts::PI * 2.0; Mat4::from_rotation_z(angle).to_cols_array_2d() }, }; cbf.as_raw().begin_render_pass( &RenderPassBeginInfo { clear_values: vec![Some([0.0, 0.0, 0.0, 1.0].into())], ..RenderPassBeginInfo::framebuffer(rcx.framebuffers[image_index as usize].clone()) }, &Default::default(), )?; cbf.set_viewport(0, slice::from_ref(&rcx.viewport))?; cbf.bind_pipeline_graphics(&self.pipeline)?; cbf.as_raw().bind_descriptor_sets( PipelineBindPoint::Graphics, self.pipeline.layout(), 0, &[ // Bind the uniform buffer designated for this frame. self.uniform_buffer_sets[frame_index as usize].as_raw(), // Bind the currently most up-to-date texture. self.sampler_sets[self.current_texture_index.load(Ordering::Relaxed) as usize] .as_raw(), ], &[], )?; cbf.bind_vertex_buffers(0, &[self.vertex_buffer_id], &[0], &[], &[])?; unsafe { cbf.draw(4, 1, 0, 0) }?; cbf.as_raw().end_render_pass(&Default::default())?; cbf.destroy_objects(rcx.framebuffers.iter().cloned()); cbf.destroy_objects(self.uniform_buffer_sets.iter().cloned()); cbf.destroy_objects(self.sampler_sets.iter().cloned()); Ok(()) } } #[allow(clippy::too_many_arguments)] fn run_worker( channel: mpsc::Receiver<()>, graphics_family_index: u32, transfer_family_index: u32, transfer_queue: Arc, resources: Arc, graphics_flight_id: Id, transfer_flight_id: Id, texture_ids: [Id; 2], current_texture_index: Arc, ) { // We are going to be updating one of 4 corners of the texture at any point in time. For that, // we will use a staging buffer and initiate a copy. However, since our texture is eventually // consistent and there are 2 replicas, that means that every time we update one of the // replicas the other replica is going to be behind by one update. Therefore we actually need 2 // staging buffers as well: one for the update that happened to the currently up-to-date // texture (at `current_index`) and one for the update that is about to happen to the currently // out-of-date texture (at `!current_index`), so that we can apply both the current and the // upcoming update to the out-of-date texture. Then the out-of-date texture is the current // up-to-date texture and vice-versa, cycle repeating. let staging_buffer_ids = [(); 2].map(|_| { resources .create_buffer( BufferCreateInfo { usage: BufferUsage::TRANSFER_SRC, ..Default::default() }, AllocationCreateInfo { memory_type_filter: MemoryTypeFilter::PREFER_HOST | MemoryTypeFilter::HOST_SEQUENTIAL_WRITE, ..Default::default() }, DeviceLayout::from_size_alignment( TRANSFER_GRANULARITY as DeviceSize * TRANSFER_GRANULARITY as DeviceSize * 4, 1, ) .unwrap(), ) .unwrap() }); let mut task_graph = TaskGraph::new(&resources, 1, 3); let virtual_front_staging_buffer_id = task_graph.add_buffer(&BufferCreateInfo::default()); let virtual_back_staging_buffer_id = task_graph.add_buffer(&BufferCreateInfo::default()); let virtual_texture_id = task_graph.add_image(&ImageCreateInfo { sharing: if graphics_family_index != transfer_family_index { Sharing::Concurrent( [graphics_family_index, transfer_family_index] .into_iter() .collect(), ) } else { Sharing::Exclusive }, ..Default::default() }); task_graph.add_host_buffer_access(virtual_front_staging_buffer_id, HostAccessType::Write); task_graph .create_task_node( "Image Upload", QueueFamilyType::Transfer, UploadTask { front_staging_buffer_id: virtual_front_staging_buffer_id, back_staging_buffer_id: virtual_back_staging_buffer_id, texture_id: virtual_texture_id, }, ) .buffer_access( virtual_front_staging_buffer_id, AccessType::CopyTransferRead, ) .buffer_access(virtual_back_staging_buffer_id, AccessType::CopyTransferRead) .image_access( virtual_texture_id, AccessType::CopyTransferWrite, ImageLayoutType::Optimal, ); let task_graph = unsafe { task_graph.compile(&CompileInfo { queues: &[&transfer_queue], flight_id: transfer_flight_id, ..Default::default() }) } .unwrap(); thread::spawn(move || { let mut current_corner = 0; let mut last_frame = 0; // The worker thread is awakened by sending a signal through the channel. In a real program // you would likely send some actual data over the channel, instructing the worker what to // do, but our work is hard-coded. while let Ok(()) = channel.recv() { let graphics_flight = resources.flight(graphics_flight_id).unwrap(); // We swap the texture index to use after a write, but there is no guarantee that other // tasks have actually moved on to using the new texture. What could happen then, if // the writes being done are quicker than rendering a frame (or any other task reading // the same resource), is the following: // // 1. Task A starts reading texture 0 // 2. Task B writes texture 1, swapping the index // 3. Task B writes texture 0, swapping the index // 4. Task A stops reading texture 0 // // This is known as the A/B/A problem. In this case it results in a data race, since // task A (rendering, in our case) is still reading texture 0 while task B (our worker) // has already started writing the very same texture. // // To solve this issue, we keep track of the frame counter before swapping the texture // index and ensure that any further write only happens after a frame was reached which // makes it impossible for any readers to be stuck on the old index, by waiting on the // frame to finish on the rendering thread. graphics_flight.wait_for_frame(last_frame, None).unwrap(); let current_index = current_texture_index.load(Ordering::Relaxed); let resource_map = resource_map!( &task_graph, virtual_front_staging_buffer_id => staging_buffer_ids[current_index as usize], virtual_back_staging_buffer_id => staging_buffer_ids[!current_index as usize], // Write to the texture that's currently not in use for rendering. virtual_texture_id => texture_ids[!current_index as usize], ) .unwrap(); unsafe { task_graph.execute(resource_map, ¤t_corner, || {}) }.unwrap(); // Block the thread until the transfer finishes. resources .flight(transfer_flight_id) .unwrap() .wait(None) .unwrap(); last_frame = graphics_flight.current_frame(); // Swap the texture used for rendering to the newly updated one. // // NOTE: We are relying on the fact that this thread is the only one doing stores. current_texture_index.store(!current_index, Ordering::Relaxed); current_corner += 1; } }); } struct UploadTask { front_staging_buffer_id: Id, back_staging_buffer_id: Id, texture_id: Id, } impl Task for UploadTask { type World = usize; unsafe fn execute( &self, cbf: &mut RecordingCommandBuffer<'_>, tcx: &mut TaskContext<'_>, ¤t_corner: &Self::World, ) -> TaskResult { const CORNER_OFFSETS: [[u32; 3]; 4] = [ [0, 0, 0], [TRANSFER_GRANULARITY, 0, 0], [TRANSFER_GRANULARITY, TRANSFER_GRANULARITY, 0], [0, TRANSFER_GRANULARITY, 0], ]; let mut rng = rand::thread_rng(); // We simulate some work for the worker to indulge in. In a real program this would likely // be some kind of I/O, for example reading from disk (think loading the next level in a // level-based game, loading the next chunk of terrain in an open-world game, etc.) or // downloading images or other data from the internet. // // NOTE: The size of these textures is exceedingly large on purpose, so that you can feel // that the update is in fact asynchronous due to the latency of the updates while the // rendering continues without any. let color = [rng.gen(), rng.gen(), rng.gen(), u8::MAX]; tcx.write_buffer::<[_]>(self.front_staging_buffer_id, ..)? .fill(color); cbf.copy_buffer_to_image(&CopyBufferToImageInfo { src_buffer: self.front_staging_buffer_id, dst_image: self.texture_id, regions: &[BufferImageCopy { image_subresource: ImageSubresourceLayers { aspects: ImageAspects::COLOR, mip_level: 0, array_layers: 0..1, }, image_offset: CORNER_OFFSETS[current_corner % 4], image_extent: [TRANSFER_GRANULARITY, TRANSFER_GRANULARITY, 1], ..Default::default() }], ..Default::default() })?; if current_corner > 0 { cbf.copy_buffer_to_image(&CopyBufferToImageInfo { src_buffer: self.back_staging_buffer_id, dst_image: self.texture_id, regions: &[BufferImageCopy { image_subresource: ImageSubresourceLayers { aspects: ImageAspects::COLOR, mip_level: 0, array_layers: 0..1, }, image_offset: CORNER_OFFSETS[(current_corner - 1) % 4], image_extent: [TRANSFER_GRANULARITY, TRANSFER_GRANULARITY, 1], ..Default::default() }], ..Default::default() })?; } Ok(()) } } /// This function is called once during initialization, then again whenever the window is resized. fn window_size_dependent_setup( resources: &Resources, swapchain_id: Id, render_pass: &Arc, ) -> Vec> { let swapchain_state = resources.swapchain(swapchain_id).unwrap(); let images = swapchain_state.images(); images .iter() .map(|image| { let view = ImageView::new_default(image.clone()).unwrap(); Framebuffer::new( render_pass.clone(), FramebufferCreateInfo { attachments: vec![view], ..Default::default() }, ) .unwrap() }) .collect::>() }