// Indirect draw example // // Indirect draw calls allow us to issue a draw without needing to know the number of vertices // until later when the draw is executed by the GPU. // // This is used in situations where vertices are being generated on the GPU, such as a GPU particle // simulation, and the exact number of output vertices cannot be known until the compute shader has // run. // // In this example the compute shader is trivial and the number of vertices does not change. // However is does demonstrate that each compute instance atomically updates the vertex counter // before filling the vertex buffer. // // For an explanation of how the rendering of the triangles takes place see the `triangle.rs` // example. use std::{error::Error, sync::Arc}; use vulkano::{ buffer::{ allocator::{SubbufferAllocator, SubbufferAllocatorCreateInfo}, BufferContents, BufferUsage, }, command_buffer::{ allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage, DrawIndirectCommand, RenderPassBeginInfo, }, descriptor_set::{ allocator::StandardDescriptorSetAllocator, DescriptorSet, WriteDescriptorSet, }, device::{ physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, Queue, QueueCreateInfo, QueueFlags, }, image::{view::ImageView, Image, ImageUsage}, instance::{Instance, InstanceCreateFlags, InstanceCreateInfo}, memory::allocator::{MemoryTypeFilter, StandardMemoryAllocator}, pipeline::{ compute::ComputePipelineCreateInfo, graphics::{ color_blend::{ColorBlendAttachmentState, ColorBlendState}, input_assembly::InputAssemblyState, multisample::MultisampleState, rasterization::RasterizationState, vertex_input::{Vertex, VertexDefinition}, viewport::{Viewport, ViewportState}, GraphicsPipelineCreateInfo, }, layout::PipelineDescriptorSetLayoutCreateInfo, ComputePipeline, DynamicState, GraphicsPipeline, Pipeline, PipelineBindPoint, PipelineLayout, PipelineShaderStageCreateInfo, }, render_pass::{Framebuffer, FramebufferCreateInfo, RenderPass, Subpass}, single_pass_renderpass, swapchain::{ acquire_next_image, Surface, Swapchain, SwapchainCreateInfo, SwapchainPresentInfo, }, sync::{self, GpuFuture}, Validated, VulkanError, VulkanLibrary, }; use winit::{ application::ApplicationHandler, event::WindowEvent, event_loop::{ActiveEventLoop, EventLoop}, window::{Window, WindowId}, }; fn main() -> Result<(), impl Error> { let event_loop = EventLoop::new().unwrap(); let mut app = App::new(&event_loop); event_loop.run_app(&mut app) } struct App { instance: Arc, device: Arc, queue: Arc, descriptor_set_allocator: Arc, command_buffer_allocator: Arc, indirect_buffer_allocator: SubbufferAllocator, vertex_buffer_allocator: SubbufferAllocator, compute_pipeline: Arc, rcx: Option, } struct RenderContext { window: Arc, swapchain: Arc, render_pass: Arc, framebuffers: Vec>, pipeline: Arc, viewport: Viewport, recreate_swapchain: bool, previous_frame_end: Option>, } impl App { fn new(event_loop: &EventLoop<()>) -> Self { let library = VulkanLibrary::new().unwrap(); let required_extensions = Surface::required_extensions(event_loop).unwrap(); let instance = Instance::new( library, InstanceCreateInfo { flags: InstanceCreateFlags::ENUMERATE_PORTABILITY, enabled_extensions: required_extensions, ..Default::default() }, ) .unwrap(); let device_extensions = DeviceExtensions { khr_swapchain: true, khr_storage_buffer_storage_class: true, ..DeviceExtensions::empty() }; let (physical_device, queue_family_index) = instance .enumerate_physical_devices() .unwrap() .filter(|p| p.supported_extensions().contains(&device_extensions)) .filter_map(|p| { p.queue_family_properties() .iter() .enumerate() .position(|(i, q)| { q.queue_flags.intersects(QueueFlags::GRAPHICS) && p.presentation_support(i as u32, event_loop).unwrap() }) .map(|i| (p, i as u32)) }) .min_by_key(|(p, _)| match p.properties().device_type { PhysicalDeviceType::DiscreteGpu => 0, PhysicalDeviceType::IntegratedGpu => 1, PhysicalDeviceType::VirtualGpu => 2, PhysicalDeviceType::Cpu => 3, PhysicalDeviceType::Other => 4, _ => 5, }) .unwrap(); println!( "Using device: {} (type: {:?})", physical_device.properties().device_name, physical_device.properties().device_type, ); let (device, mut queues) = Device::new( physical_device, DeviceCreateInfo { enabled_extensions: device_extensions, queue_create_infos: vec![QueueCreateInfo { queue_family_index, ..Default::default() }], ..Default::default() }, ) .unwrap(); let queue = queues.next().unwrap(); let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); let descriptor_set_allocator = Arc::new(StandardDescriptorSetAllocator::new( device.clone(), Default::default(), )); let command_buffer_allocator = Arc::new(StandardCommandBufferAllocator::new( device.clone(), Default::default(), )); // Each frame we generate a new set of vertices and each frame we need a new // `DrawIndirectCommand` struct to set the number of vertices to draw. let indirect_buffer_allocator = SubbufferAllocator::new( memory_allocator.clone(), SubbufferAllocatorCreateInfo { buffer_usage: BufferUsage::INDIRECT_BUFFER | BufferUsage::STORAGE_BUFFER, memory_type_filter: MemoryTypeFilter::PREFER_DEVICE | MemoryTypeFilter::HOST_SEQUENTIAL_WRITE, ..Default::default() }, ); let vertex_buffer_allocator = SubbufferAllocator::new( memory_allocator, SubbufferAllocatorCreateInfo { buffer_usage: BufferUsage::STORAGE_BUFFER | BufferUsage::VERTEX_BUFFER, memory_type_filter: MemoryTypeFilter::PREFER_DEVICE | MemoryTypeFilter::HOST_SEQUENTIAL_WRITE, ..Default::default() }, ); // A simple compute shader that generates vertices. It has two buffers bound: the first is // where we output the vertices, the second is the `IndirectDrawArgs` struct we passed the // `draw_indirect` so we can set the number to vertices to draw. mod cs { vulkano_shaders::shader! { ty: "compute", src: r" #version 450 layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in; layout(set = 0, binding = 0) buffer Output { vec2 pos[]; } triangles; layout(set = 0, binding = 1) buffer IndirectDrawArgs { uint vertices; uint unused0; uint unused1; uint unused2; }; void main() { uint idx = gl_GlobalInvocationID.x; // Each invocation of the compute shader is going to increment the counter, // so we need to use atomic operations for safety. The previous value of // the counter is returned so that gives us the offset into the vertex // buffer this thread can write it's vertices into. uint offset = atomicAdd(vertices, 6); vec2 center = vec2(-0.8, -0.8) + idx * vec2(0.1, 0.1); triangles.pos[offset + 0] = center + vec2(0.0, 0.0375); triangles.pos[offset + 1] = center + vec2(0.025, -0.01725); triangles.pos[offset + 2] = center + vec2(-0.025, -0.01725); triangles.pos[offset + 3] = center + vec2(0.0, -0.0375); triangles.pos[offset + 4] = center + vec2(0.025, 0.01725); triangles.pos[offset + 5] = center + vec2(-0.025, 0.01725); } ", } } let compute_pipeline = { let cs = cs::load(device.clone()) .unwrap() .entry_point("main") .unwrap(); let stage = PipelineShaderStageCreateInfo::new(cs); let layout = PipelineLayout::new( device.clone(), PipelineDescriptorSetLayoutCreateInfo::from_stages([&stage]) .into_pipeline_layout_create_info(device.clone()) .unwrap(), ) .unwrap(); ComputePipeline::new( device.clone(), None, ComputePipelineCreateInfo::stage_layout(stage, layout), ) .unwrap() }; App { instance, device, queue, descriptor_set_allocator, command_buffer_allocator, indirect_buffer_allocator, vertex_buffer_allocator, compute_pipeline, rcx: None, } } } impl ApplicationHandler for App { fn resumed(&mut self, event_loop: &ActiveEventLoop) { let window = Arc::new( event_loop .create_window(Window::default_attributes()) .unwrap(), ); let surface = Surface::from_window(self.instance.clone(), window.clone()).unwrap(); let window_size = window.inner_size(); let (swapchain, images) = { let surface_capabilities = self .device .physical_device() .surface_capabilities(&surface, Default::default()) .unwrap(); let (image_format, _) = self .device .physical_device() .surface_formats(&surface, Default::default()) .unwrap()[0]; Swapchain::new( self.device.clone(), surface, SwapchainCreateInfo { min_image_count: surface_capabilities.min_image_count.max(2), image_format, image_extent: window_size.into(), image_usage: ImageUsage::COLOR_ATTACHMENT, composite_alpha: surface_capabilities .supported_composite_alpha .into_iter() .next() .unwrap(), ..Default::default() }, ) .unwrap() }; let render_pass = single_pass_renderpass!( self.device.clone(), attachments: { color: { format: swapchain.image_format(), samples: 1, load_op: Clear, store_op: Store, }, }, pass: { color: [color], depth_stencil: {}, }, ) .unwrap(); let framebuffers = window_size_dependent_setup(&images, &render_pass); mod vs { vulkano_shaders::shader! { ty: "vertex", src: r" #version 450 // The triangle vertex positions. layout(location = 0) in vec2 position; void main() { gl_Position = vec4(position, 0.0, 1.0); } ", } } mod fs { vulkano_shaders::shader! { ty: "fragment", src: r" #version 450 layout(location = 0) out vec4 f_color; void main() { f_color = vec4(1.0, 0.0, 0.0, 1.0); } ", } } let pipeline = { let vs = vs::load(self.device.clone()) .unwrap() .entry_point("main") .unwrap(); let fs = fs::load(self.device.clone()) .unwrap() .entry_point("main") .unwrap(); let vertex_input_state = MyVertex::per_vertex().definition(&vs).unwrap(); let stages = [ PipelineShaderStageCreateInfo::new(vs), PipelineShaderStageCreateInfo::new(fs), ]; let layout = PipelineLayout::new( self.device.clone(), PipelineDescriptorSetLayoutCreateInfo::from_stages(&stages) .into_pipeline_layout_create_info(self.device.clone()) .unwrap(), ) .unwrap(); let subpass = Subpass::from(render_pass.clone(), 0).unwrap(); GraphicsPipeline::new( self.device.clone(), None, GraphicsPipelineCreateInfo { stages: stages.into_iter().collect(), vertex_input_state: Some(vertex_input_state), input_assembly_state: Some(InputAssemblyState::default()), viewport_state: Some(ViewportState::default()), rasterization_state: Some(RasterizationState::default()), multisample_state: Some(MultisampleState::default()), color_blend_state: Some(ColorBlendState::with_attachment_states( subpass.num_color_attachments(), ColorBlendAttachmentState::default(), )), dynamic_state: [DynamicState::Viewport].into_iter().collect(), subpass: Some(subpass.into()), ..GraphicsPipelineCreateInfo::layout(layout) }, ) .unwrap() }; let viewport = Viewport { offset: [0.0, 0.0], extent: window_size.into(), depth_range: 0.0..=1.0, }; let previous_frame_end = Some(sync::now(self.device.clone()).boxed()); self.rcx = Some(RenderContext { window, swapchain, render_pass, framebuffers, pipeline, viewport, recreate_swapchain: false, previous_frame_end, }); } fn window_event( &mut self, event_loop: &ActiveEventLoop, _window_id: WindowId, event: WindowEvent, ) { let rcx = self.rcx.as_mut().unwrap(); match event { WindowEvent::CloseRequested => { event_loop.exit(); } WindowEvent::Resized(_) => { rcx.recreate_swapchain = true; } WindowEvent::RedrawRequested => { let window_size = rcx.window.inner_size(); if window_size.width == 0 || window_size.height == 0 { return; } rcx.previous_frame_end.as_mut().unwrap().cleanup_finished(); if rcx.recreate_swapchain { let (new_swapchain, new_images) = rcx .swapchain .recreate(SwapchainCreateInfo { image_extent: window_size.into(), ..rcx.swapchain.create_info() }) .expect("failed to recreate swapchain"); rcx.swapchain = new_swapchain; rcx.framebuffers = window_size_dependent_setup(&new_images, &rcx.render_pass); rcx.viewport.extent = window_size.into(); rcx.recreate_swapchain = false; } let (image_index, suboptimal, acquire_future) = match acquire_next_image( rcx.swapchain.clone(), None, ) .map_err(Validated::unwrap) { Ok(r) => r, Err(VulkanError::OutOfDate) => { rcx.recreate_swapchain = true; return; } Err(e) => panic!("failed to acquire next image: {e}"), }; if suboptimal { rcx.recreate_swapchain = true; } // Allocate a buffer to hold the arguments for this frame's draw call. The compute // shader will only update `vertex_count`, so set the other parameters correctly // here. let indirect_commands = [DrawIndirectCommand { vertex_count: 0, instance_count: 1, first_vertex: 0, first_instance: 0, }]; let indirect_buffer = self .indirect_buffer_allocator .allocate_slice(indirect_commands.len() as _) .unwrap(); indirect_buffer .write() .unwrap() .copy_from_slice(&indirect_commands); // Allocate a buffer to hold this frame's vertices. This needs to be large enough // to hold the worst case number of vertices generated by the compute shader. let iter = (0..(6 * 16)).map(|_| MyVertex { position: [0.0; 2] }); let vertices = self .vertex_buffer_allocator .allocate_slice(iter.len() as _) .unwrap(); for (o, i) in vertices.write().unwrap().iter_mut().zip(iter) { *o = i; } // Pass the two buffers to the compute shader. let layout = &self.compute_pipeline.layout().set_layouts()[0]; let cs_descriptor_set = DescriptorSet::new( self.descriptor_set_allocator.clone(), layout.clone(), [ WriteDescriptorSet::buffer(0, vertices.clone()), WriteDescriptorSet::buffer(1, indirect_buffer.clone()), ], [], ) .unwrap(); let mut builder = AutoCommandBufferBuilder::primary( self.command_buffer_allocator.clone(), self.queue.queue_family_index(), CommandBufferUsage::OneTimeSubmit, ) .unwrap(); // First in the command buffer we dispatch the compute shader to generate the // vertices and fill out the draw call arguments. builder .bind_pipeline_compute(self.compute_pipeline.clone()) .unwrap() .bind_descriptor_sets( PipelineBindPoint::Compute, self.compute_pipeline.layout().clone(), 0, cs_descriptor_set, ) .unwrap(); unsafe { builder.dispatch([1, 1, 1]) }.unwrap(); builder .begin_render_pass( RenderPassBeginInfo { clear_values: vec![Some([0.0, 0.0, 1.0, 1.0].into())], ..RenderPassBeginInfo::framebuffer( rcx.framebuffers[image_index as usize].clone(), ) }, Default::default(), ) .unwrap() .set_viewport(0, [rcx.viewport.clone()].into_iter().collect()) .unwrap() .bind_pipeline_graphics(rcx.pipeline.clone()) .unwrap() .bind_vertex_buffers(0, vertices) .unwrap(); // The indirect draw call is placed in the command buffer with a reference to // the buffer that will contain the arguments for the draw. unsafe { builder.draw_indirect(indirect_buffer) }.unwrap(); builder.end_render_pass(Default::default()).unwrap(); let command_buffer = builder.build().unwrap(); let future = rcx .previous_frame_end .take() .unwrap() .join(acquire_future) .then_execute(self.queue.clone(), command_buffer) .unwrap() .then_swapchain_present( self.queue.clone(), SwapchainPresentInfo::swapchain_image_index( rcx.swapchain.clone(), image_index, ), ) .then_signal_fence_and_flush(); match future.map_err(Validated::unwrap) { Ok(future) => { rcx.previous_frame_end = Some(future.boxed()); } Err(VulkanError::OutOfDate) => { rcx.recreate_swapchain = true; rcx.previous_frame_end = Some(sync::now(self.device.clone()).boxed()); } Err(e) => { println!("failed to flush future: {e}"); rcx.previous_frame_end = Some(sync::now(self.device.clone()).boxed()); } } } _ => {} } } fn about_to_wait(&mut self, _event_loop: &ActiveEventLoop) { let rcx = self.rcx.as_mut().unwrap(); rcx.window.request_redraw(); } } // `MyVertex` is the vertex type that will be output from the compute shader and be input to the // vertex shader. #[derive(BufferContents, Vertex)] #[repr(C)] struct MyVertex { #[format(R32G32_SFLOAT)] position: [f32; 2], } /// This function is called once during initialization, then again whenever the window is resized. fn window_size_dependent_setup( images: &[Arc], render_pass: &Arc, ) -> Vec> { images .iter() .map(|image| { let view = ImageView::new_default(image.clone()).unwrap(); Framebuffer::new( render_pass.clone(), FramebufferCreateInfo { attachments: vec![view], ..Default::default() }, ) .unwrap() }) .collect::>() }