// Copyright (c) 2019 The vulkano developers // Licensed under the Apache License, Version 2.0 // or the MIT // license , // at your option. All files in the project carrying such // notice may not be copied, modified, or distributed except // according to those terms. // Indirect draw example // // Indirect draw calls allow us to issue a draw without needing to know the number of vertices // until later when the draw is executed by the GPU. // // This is used in situations where vertices are being generated on the GPU, such as a GPU particle // simulation, and the exact number of output vertices cannot be known until the compute shader has // run. // // In this example the compute shader is trivial and the number of vertices does not change. // However is does demonstrate that each compute instance atomically updates the vertex counter // before filling the vertex buffer. // // For an explanation of how the rendering of the triangles takes place see the `triangle.rs` // example. use std::sync::Arc; use vulkano::{ buffer::{ allocator::{SubbufferAllocator, SubbufferAllocatorCreateInfo}, BufferContents, BufferUsage, }, command_buffer::{ allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage, DrawIndirectCommand, RenderPassBeginInfo, }, descriptor_set::{ allocator::StandardDescriptorSetAllocator, PersistentDescriptorSet, WriteDescriptorSet, }, device::{ physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, QueueCreateInfo, QueueFlags, }, image::{view::ImageView, Image, ImageUsage}, instance::{Instance, InstanceCreateFlags, InstanceCreateInfo}, memory::allocator::{MemoryTypeFilter, StandardMemoryAllocator}, pipeline::{ compute::ComputePipelineCreateInfo, graphics::{ color_blend::{ColorBlendAttachmentState, ColorBlendState}, input_assembly::InputAssemblyState, multisample::MultisampleState, rasterization::RasterizationState, vertex_input::{Vertex, VertexDefinition}, viewport::{Viewport, ViewportState}, GraphicsPipelineCreateInfo, }, layout::PipelineDescriptorSetLayoutCreateInfo, ComputePipeline, DynamicState, GraphicsPipeline, Pipeline, PipelineBindPoint, PipelineLayout, PipelineShaderStageCreateInfo, }, render_pass::{Framebuffer, FramebufferCreateInfo, RenderPass, Subpass}, single_pass_renderpass, swapchain::{ acquire_next_image, Surface, Swapchain, SwapchainCreateInfo, SwapchainPresentInfo, }, sync::{self, GpuFuture}, Validated, VulkanError, VulkanLibrary, }; use winit::{ event::{Event, WindowEvent}, event_loop::{ControlFlow, EventLoop}, window::WindowBuilder, }; fn main() { let event_loop = EventLoop::new(); let library = VulkanLibrary::new().unwrap(); let required_extensions = Surface::required_extensions(&event_loop); let instance = Instance::new( library, InstanceCreateInfo { flags: InstanceCreateFlags::ENUMERATE_PORTABILITY, enabled_extensions: required_extensions, ..Default::default() }, ) .unwrap(); let window = Arc::new(WindowBuilder::new().build(&event_loop).unwrap()); let surface = Surface::from_window(instance.clone(), window.clone()).unwrap(); let device_extensions = DeviceExtensions { khr_swapchain: true, khr_storage_buffer_storage_class: true, ..DeviceExtensions::empty() }; let (physical_device, queue_family_index) = instance .enumerate_physical_devices() .unwrap() .filter(|p| p.supported_extensions().contains(&device_extensions)) .filter_map(|p| { p.queue_family_properties() .iter() .enumerate() .position(|(i, q)| { q.queue_flags.intersects(QueueFlags::GRAPHICS) && p.surface_support(i as u32, &surface).unwrap_or(false) }) .map(|i| (p, i as u32)) }) .min_by_key(|(p, _)| match p.properties().device_type { PhysicalDeviceType::DiscreteGpu => 0, PhysicalDeviceType::IntegratedGpu => 1, PhysicalDeviceType::VirtualGpu => 2, PhysicalDeviceType::Cpu => 3, PhysicalDeviceType::Other => 4, _ => 5, }) .unwrap(); println!( "Using device: {} (type: {:?})", physical_device.properties().device_name, physical_device.properties().device_type, ); let (device, mut queues) = Device::new( physical_device, DeviceCreateInfo { enabled_extensions: device_extensions, queue_create_infos: vec![QueueCreateInfo { queue_family_index, ..Default::default() }], ..Default::default() }, ) .unwrap(); let queue = queues.next().unwrap(); let (mut swapchain, images) = { let surface_capabilities = device .physical_device() .surface_capabilities(&surface, Default::default()) .unwrap(); let image_format = device .physical_device() .surface_formats(&surface, Default::default()) .unwrap()[0] .0; Swapchain::new( device.clone(), surface, SwapchainCreateInfo { min_image_count: surface_capabilities.min_image_count.max(2), image_format, image_extent: window.inner_size().into(), image_usage: ImageUsage::COLOR_ATTACHMENT, composite_alpha: surface_capabilities .supported_composite_alpha .into_iter() .next() .unwrap(), ..Default::default() }, ) .unwrap() }; mod vs { vulkano_shaders::shader! { ty: "vertex", src: r" #version 450 // The triangle vertex positions. layout(location = 0) in vec2 position; void main() { gl_Position = vec4(position, 0.0, 1.0); } ", } } mod fs { vulkano_shaders::shader! { ty: "fragment", src: r#" #version 450 layout(location = 0) out vec4 f_color; void main() { f_color = vec4(1.0, 0.0, 0.0, 1.0); } "#, } } // A simple compute shader that generates vertices. It has two buffers bound: the first is // where we output the vertices, the second is the `IndirectDrawArgs` struct we passed the // `draw_indirect` so we can set the number to vertices to draw. mod cs { vulkano_shaders::shader! { ty: "compute", src: r" #version 450 layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in; layout(set = 0, binding = 0) buffer Output { vec2 pos[]; } triangles; layout(set = 0, binding = 1) buffer IndirectDrawArgs { uint vertices; uint unused0; uint unused1; uint unused2; }; void main() { uint idx = gl_GlobalInvocationID.x; // Each invocation of the compute shader is going to increment the counter, so // we need to use atomic operations for safety. The previous value of the // counter is returned so that gives us the offset into the vertex buffer this // thread can write it's vertices into. uint offset = atomicAdd(vertices, 6); vec2 center = vec2(-0.8, -0.8) + idx * vec2(0.1, 0.1); triangles.pos[offset + 0] = center + vec2(0.0, 0.0375); triangles.pos[offset + 1] = center + vec2(0.025, -0.01725); triangles.pos[offset + 2] = center + vec2(-0.025, -0.01725); triangles.pos[offset + 3] = center + vec2(0.0, -0.0375); triangles.pos[offset + 4] = center + vec2(0.025, 0.01725); triangles.pos[offset + 5] = center + vec2(-0.025, 0.01725); } ", } } let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); // Each frame we generate a new set of vertices and each frame we need a new // DrawIndirectCommand struct to set the number of vertices to draw. let indirect_args_pool = SubbufferAllocator::new( memory_allocator.clone(), SubbufferAllocatorCreateInfo { buffer_usage: BufferUsage::INDIRECT_BUFFER | BufferUsage::STORAGE_BUFFER, memory_type_filter: MemoryTypeFilter::PREFER_DEVICE | MemoryTypeFilter::HOST_SEQUENTIAL_WRITE, ..Default::default() }, ); let vertex_pool = SubbufferAllocator::new( memory_allocator, SubbufferAllocatorCreateInfo { buffer_usage: BufferUsage::STORAGE_BUFFER | BufferUsage::VERTEX_BUFFER, memory_type_filter: MemoryTypeFilter::PREFER_DEVICE | MemoryTypeFilter::HOST_SEQUENTIAL_WRITE, ..Default::default() }, ); let compute_pipeline = { let cs = cs::load(device.clone()) .unwrap() .entry_point("main") .unwrap(); let stage = PipelineShaderStageCreateInfo::new(cs); let layout = PipelineLayout::new( device.clone(), PipelineDescriptorSetLayoutCreateInfo::from_stages([&stage]) .into_pipeline_layout_create_info(device.clone()) .unwrap(), ) .unwrap(); ComputePipeline::new( device.clone(), None, ComputePipelineCreateInfo::stage_layout(stage, layout), ) .unwrap() }; let render_pass = single_pass_renderpass!( device.clone(), attachments: { color: { format: swapchain.image_format(), samples: 1, load_op: Clear, store_op: Store, }, }, pass: { color: [color], depth_stencil: {}, }, ) .unwrap(); // `Vertex` is the vertex type that will be output from the compute shader and be input to the // vertex shader. #[derive(BufferContents, Vertex)] #[repr(C)] struct Vertex { #[format(R32G32_SFLOAT)] position: [f32; 2], } let render_pipeline = { let vs = vs::load(device.clone()) .unwrap() .entry_point("main") .unwrap(); let fs = fs::load(device.clone()) .unwrap() .entry_point("main") .unwrap(); let vertex_input_state = Vertex::per_vertex() .definition(&vs.info().input_interface) .unwrap(); let stages = [ PipelineShaderStageCreateInfo::new(vs), PipelineShaderStageCreateInfo::new(fs), ]; let layout = PipelineLayout::new( device.clone(), PipelineDescriptorSetLayoutCreateInfo::from_stages(&stages) .into_pipeline_layout_create_info(device.clone()) .unwrap(), ) .unwrap(); let subpass = Subpass::from(render_pass.clone(), 0).unwrap(); GraphicsPipeline::new( device.clone(), None, GraphicsPipelineCreateInfo { stages: stages.into_iter().collect(), vertex_input_state: Some(vertex_input_state), input_assembly_state: Some(InputAssemblyState::default()), viewport_state: Some(ViewportState::default()), rasterization_state: Some(RasterizationState::default()), multisample_state: Some(MultisampleState::default()), color_blend_state: Some(ColorBlendState::with_attachment_states( subpass.num_color_attachments(), ColorBlendAttachmentState::default(), )), dynamic_state: [DynamicState::Viewport].into_iter().collect(), subpass: Some(subpass.into()), ..GraphicsPipelineCreateInfo::layout(layout) }, ) .unwrap() }; let mut viewport = Viewport { offset: [0.0, 0.0], extent: [0.0, 0.0], depth_range: 0.0..=1.0, }; let mut framebuffers = window_size_dependent_setup(&images, render_pass.clone(), &mut viewport); let mut recreate_swapchain = false; let mut previous_frame_end = Some(sync::now(device.clone()).boxed()); let descriptor_set_allocator = StandardDescriptorSetAllocator::new(device.clone(), Default::default()); let command_buffer_allocator = StandardCommandBufferAllocator::new(device.clone(), Default::default()); event_loop.run(move |event, _, control_flow| { match event { Event::WindowEvent { event: WindowEvent::CloseRequested, .. } => { *control_flow = ControlFlow::Exit; } Event::WindowEvent { event: WindowEvent::Resized(_), .. } => { recreate_swapchain = true; } Event::RedrawEventsCleared => { let image_extent: [u32; 2] = window.inner_size().into(); if image_extent.contains(&0) { return; } previous_frame_end.as_mut().unwrap().cleanup_finished(); if recreate_swapchain { let (new_swapchain, new_images) = swapchain .recreate(SwapchainCreateInfo { image_extent, ..swapchain.create_info() }) .expect("failed to recreate swapchain"); swapchain = new_swapchain; framebuffers = window_size_dependent_setup( &new_images, render_pass.clone(), &mut viewport, ); recreate_swapchain = false; } let (image_index, suboptimal, acquire_future) = match acquire_next_image(swapchain.clone(), None).map_err(Validated::unwrap) { Ok(r) => r, Err(VulkanError::OutOfDate) => { recreate_swapchain = true; return; } Err(e) => panic!("failed to acquire next image: {e}"), }; if suboptimal { recreate_swapchain = true; } // Allocate a buffer to hold the arguments for this frame's draw call. The compute // shader will only update `vertex_count`, so set the other parameters correctly // here. let indirect_commands = [DrawIndirectCommand { vertex_count: 0, instance_count: 1, first_vertex: 0, first_instance: 0, }]; let indirect_buffer = indirect_args_pool .allocate_slice(indirect_commands.len() as _) .unwrap(); indirect_buffer .write() .unwrap() .copy_from_slice(&indirect_commands); // Allocate a buffer to hold this frame's vertices. This needs to be large enough // to hold the worst case number of vertices generated by the compute shader. let iter = (0..(6 * 16)).map(|_| Vertex { position: [0.0; 2] }); let vertices = vertex_pool.allocate_slice(iter.len() as _).unwrap(); for (o, i) in vertices.write().unwrap().iter_mut().zip(iter) { *o = i; } // Pass the two buffers to the compute shader. let layout = compute_pipeline.layout().set_layouts().get(0).unwrap(); let cs_desciptor_set = PersistentDescriptorSet::new( &descriptor_set_allocator, layout.clone(), [ WriteDescriptorSet::buffer(0, vertices.clone()), WriteDescriptorSet::buffer(1, indirect_buffer.clone()), ], [], ) .unwrap(); let mut builder = AutoCommandBufferBuilder::primary( &command_buffer_allocator, queue.queue_family_index(), CommandBufferUsage::OneTimeSubmit, ) .unwrap(); // First in the command buffer we dispatch the compute shader to generate the // vertices and fill out the draw call arguments. builder .bind_pipeline_compute(compute_pipeline.clone()) .unwrap() .bind_descriptor_sets( PipelineBindPoint::Compute, compute_pipeline.layout().clone(), 0, cs_desciptor_set, ) .unwrap() .dispatch([1, 1, 1]) .unwrap() .begin_render_pass( RenderPassBeginInfo { clear_values: vec![Some([0.0, 0.0, 1.0, 1.0].into())], ..RenderPassBeginInfo::framebuffer( framebuffers[image_index as usize].clone(), ) }, Default::default(), ) .unwrap() .set_viewport(0, [viewport.clone()].into_iter().collect()) .unwrap() .bind_pipeline_graphics(render_pipeline.clone()) .unwrap() .bind_vertex_buffers(0, vertices) .unwrap() // The indirect draw call is placed in the command buffer with a reference to // the buffer that will contain the arguments for the draw. .draw_indirect(indirect_buffer) .unwrap() .end_render_pass(Default::default()) .unwrap(); let command_buffer = builder.build().unwrap(); let future = previous_frame_end .take() .unwrap() .join(acquire_future) .then_execute(queue.clone(), command_buffer) .unwrap() .then_swapchain_present( queue.clone(), SwapchainPresentInfo::swapchain_image_index(swapchain.clone(), image_index), ) .then_signal_fence_and_flush(); match future.map_err(Validated::unwrap) { Ok(future) => { previous_frame_end = Some(future.boxed()); } Err(VulkanError::OutOfDate) => { recreate_swapchain = true; previous_frame_end = Some(sync::now(device.clone()).boxed()); } Err(e) => { println!("failed to flush future: {e}"); previous_frame_end = Some(sync::now(device.clone()).boxed()); } } } _ => (), } }); } /// This function is called once during initialization, then again whenever the window is resized. fn window_size_dependent_setup( images: &[Arc], render_pass: Arc, viewport: &mut Viewport, ) -> Vec> { let extent = images[0].extent(); viewport.extent = [extent[0] as f32, extent[1] as f32]; images .iter() .map(|image| { let view = ImageView::new_default(image.clone()).unwrap(); Framebuffer::new( render_pass.clone(), FramebufferCreateInfo { attachments: vec![view], ..Default::default() }, ) .unwrap() }) .collect::>() }