vulkano/examples/src/bin/indirect.rs

510 lines
18 KiB
Rust
Raw Normal View History

// Copyright (c) 2019 The vulkano developers
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
// at your option. All files in the project carrying such
// notice may not be copied, modified, or distributed except
// according to those terms.
// Indirect draw example
//
// Indirect draw calls allow us to issue a draw without needing to know the number of vertices
// until later when the draw is executed by the GPU.
//
// This is used in situations where vertices are being generated on the GPU, such as a GPU
// particle simulation, and the exact number of output vertices cannot be known until
// the compute shader has run.
//
// In this example the compute shader is trivial and the number of vertices does not change.
// However is does demonstrate that each compute instance atomically updates the vertex
// counter before filling the vertex buffer.
//
// For an explanation of how the rendering of the triangles takes place see the `triangle.rs`
// example.
//
use bytemuck::{Pod, Zeroable};
use std::sync::Arc;
use vulkano::{
buffer::{BufferUsage, CpuBufferPool},
command_buffer::{
allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage,
DrawIndirectCommand, RenderPassBeginInfo, SubpassContents,
},
descriptor_set::{
allocator::StandardDescriptorSetAllocator, PersistentDescriptorSet, WriteDescriptorSet,
},
device::{
physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, QueueCreateInfo,
},
image::{view::ImageView, ImageAccess, ImageUsage, SwapchainImage},
impl_vertex,
instance::{Instance, InstanceCreateInfo},
2022-10-26 14:25:01 +00:00
memory::allocator::{MemoryUsage, StandardMemoryAllocator},
pipeline::{
graphics::{
input_assembly::InputAssemblyState,
vertex_input::BuffersDefinition,
viewport::{Viewport, ViewportState},
},
ComputePipeline, GraphicsPipeline, Pipeline, PipelineBindPoint,
},
render_pass::{Framebuffer, FramebufferCreateInfo, RenderPass, Subpass},
single_pass_renderpass,
swapchain::{
acquire_next_image, AcquireError, Swapchain, SwapchainCreateInfo, SwapchainCreationError,
SwapchainPresentInfo,
},
sync::{self, FlushError, GpuFuture},
VulkanLibrary,
};
use vulkano_win::VkSurfaceBuild;
use winit::{
event::{Event, WindowEvent},
event_loop::{ControlFlow, EventLoop},
window::{Window, WindowBuilder},
};
// # Vertex Types
// `Vertex` is the vertex type that will be output from the compute shader and be input to the vertex shader.
2021-11-24 14:19:57 +00:00
#[repr(C)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
struct Vertex {
position: [f32; 2],
}
impl_vertex!(Vertex, position);
fn main() {
let library = VulkanLibrary::new().unwrap();
let required_extensions = vulkano_win::required_extensions(&library);
let instance = Instance::new(
library,
InstanceCreateInfo {
enabled_extensions: required_extensions,
// Enable enumerating devices that use non-conformant vulkan implementations. (ex. MoltenVK)
enumerate_portability: true,
..Default::default()
},
)
.unwrap();
let event_loop = EventLoop::new();
let surface = WindowBuilder::new()
.build_vk_surface(&event_loop, instance.clone())
.unwrap();
let device_extensions = DeviceExtensions {
khr_swapchain: true,
khr_storage_buffer_storage_class: true,
..DeviceExtensions::empty()
};
let (physical_device, queue_family_index) = instance
.enumerate_physical_devices()
.unwrap()
.filter(|p| p.supported_extensions().contains(&device_extensions))
.filter_map(|p| {
p.queue_family_properties()
.iter()
.enumerate()
.position(|(i, q)| {
q.queue_flags.graphics && p.surface_support(i as u32, &surface).unwrap_or(false)
})
.map(|i| (p, i as u32))
})
.min_by_key(|(p, _)| match p.properties().device_type {
PhysicalDeviceType::DiscreteGpu => 0,
PhysicalDeviceType::IntegratedGpu => 1,
PhysicalDeviceType::VirtualGpu => 2,
PhysicalDeviceType::Cpu => 3,
PhysicalDeviceType::Other => 4,
_ => 5,
})
.unwrap();
println!(
"Using device: {} (type: {:?})",
physical_device.properties().device_name,
physical_device.properties().device_type,
);
let (device, mut queues) = Device::new(
physical_device,
DeviceCreateInfo {
enabled_extensions: device_extensions,
queue_create_infos: vec![QueueCreateInfo {
queue_family_index,
..Default::default()
}],
..Default::default()
},
)
.unwrap();
let queue = queues.next().unwrap();
let (mut swapchain, images) = {
let surface_capabilities = device
.physical_device()
.surface_capabilities(&surface, Default::default())
.unwrap();
let image_format = Some(
device
.physical_device()
.surface_formats(&surface, Default::default())
.unwrap()[0]
.0,
);
let window = surface.object().unwrap().downcast_ref::<Window>().unwrap();
Swapchain::new(
device.clone(),
surface.clone(),
SwapchainCreateInfo {
min_image_count: surface_capabilities.min_image_count,
image_format,
image_extent: window.inner_size().into(),
image_usage: ImageUsage {
color_attachment: true,
..ImageUsage::empty()
},
composite_alpha: surface_capabilities
.supported_composite_alpha
.iter()
.next()
.unwrap(),
..Default::default()
},
)
.unwrap()
};
mod vs {
vulkano_shaders::shader! {
ty: "vertex",
src: "
#version 450
// The triangle vertex positions.
layout(location = 0) in vec2 position;
void main() {
gl_Position = vec4(position, 0.0, 1.0);
}
"
}
}
mod fs {
vulkano_shaders::shader! {
ty: "fragment",
src: "
#version 450
layout(location = 0) out vec4 f_color;
void main() {
f_color = vec4(1.0, 0.0, 0.0, 1.0);
}
"
}
}
// A simple compute shader that generates vertices. It has two buffers bound: the first is where we output the vertices, the second
// is the IndirectDrawArgs struct we passed the draw_indirect so we can set the number to vertices to draw
mod cs {
vulkano_shaders::shader! {
ty: "compute",
src: "
#version 450
layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in;
layout(set = 0, binding = 0) buffer Output {
vec2 pos[];
} triangles;
layout(set = 0, binding = 1) buffer IndirectDrawArgs {
uint vertices;
uint unused0;
uint unused1;
uint unused2;
};
void main() {
uint idx = gl_GlobalInvocationID.x;
// each thread of compute shader is going to increment the counter, so we need to use atomic
// operations for safety. The previous value of the counter is returned so that gives us
// the offset into the vertex buffer this thread can write it's vertices into.
uint offset = atomicAdd(vertices, 6);
vec2 center = vec2(-0.8, -0.8) + idx * vec2(0.1, 0.1);
triangles.pos[offset + 0] = center + vec2(0.0, 0.0375);
triangles.pos[offset + 1] = center + vec2(0.025, -0.01725);
triangles.pos[offset + 2] = center + vec2(-0.025, -0.01725);
triangles.pos[offset + 3] = center + vec2(0.0, -0.0375);
triangles.pos[offset + 4] = center + vec2(0.025, 0.01725);
triangles.pos[offset + 5] = center + vec2(-0.025, 0.01725);
}
"
}
}
let vs = vs::load(device.clone()).unwrap();
let fs = fs::load(device.clone()).unwrap();
let cs = cs::load(device.clone()).unwrap();
2022-10-26 14:25:01 +00:00
let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone()));
// Each frame we generate a new set of vertices and each frame we need a new DrawIndirectCommand struct to
// set the number of vertices to draw
let indirect_args_pool: CpuBufferPool<DrawIndirectCommand> = CpuBufferPool::new(
2022-10-26 14:25:01 +00:00
memory_allocator.clone(),
BufferUsage {
indirect_buffer: true,
storage_buffer: true,
..BufferUsage::empty()
},
2022-10-26 14:25:01 +00:00
MemoryUsage::Upload,
);
let vertex_pool: CpuBufferPool<Vertex> = CpuBufferPool::new(
2022-10-26 14:25:01 +00:00
memory_allocator,
BufferUsage {
storage_buffer: true,
vertex_buffer: true,
..BufferUsage::empty()
},
2022-10-26 14:25:01 +00:00
MemoryUsage::Upload,
);
let compute_pipeline = ComputePipeline::new(
device.clone(),
cs.entry_point("main").unwrap(),
&(),
None,
|_| {},
)
.unwrap();
let render_pass = single_pass_renderpass!(
device.clone(),
attachments: {
color: {
load: Clear,
store: Store,
format: swapchain.image_format(),
samples: 1,
}
},
pass: {
color: [color],
depth_stencil: {}
}
)
.unwrap();
let render_pipeline = GraphicsPipeline::start()
.vertex_input_state(BuffersDefinition::new().vertex::<Vertex>())
.vertex_shader(vs.entry_point("main").unwrap(), ())
.input_assembly_state(InputAssemblyState::new())
.viewport_state(ViewportState::viewport_dynamic_scissor_irrelevant())
.fragment_shader(fs.entry_point("main").unwrap(), ())
.render_pass(Subpass::from(render_pass.clone(), 0).unwrap())
.build(device.clone())
.unwrap();
let mut viewport = Viewport {
origin: [0.0, 0.0],
dimensions: [0.0, 0.0],
depth_range: 0.0..1.0,
};
let mut framebuffers = window_size_dependent_setup(&images, render_pass.clone(), &mut viewport);
let mut recreate_swapchain = false;
let mut previous_frame_end = Some(sync::now(device.clone()).boxed());
let descriptor_set_allocator = StandardDescriptorSetAllocator::new(device.clone());
let command_buffer_allocator =
StandardCommandBufferAllocator::new(device.clone(), Default::default());
event_loop.run(move |event, _, control_flow| {
match event {
Event::WindowEvent {
event: WindowEvent::CloseRequested,
..
} => {
*control_flow = ControlFlow::Exit;
}
Event::WindowEvent {
event: WindowEvent::Resized(_),
..
} => {
recreate_swapchain = true;
}
Event::RedrawEventsCleared => {
let window = surface.object().unwrap().downcast_ref::<Window>().unwrap();
let dimensions = window.inner_size();
if dimensions.width == 0 || dimensions.height == 0 {
return;
}
previous_frame_end.as_mut().unwrap().cleanup_finished();
if recreate_swapchain {
let (new_swapchain, new_images) =
match swapchain.recreate(SwapchainCreateInfo {
image_extent: dimensions.into(),
..swapchain.create_info()
}) {
Ok(r) => r,
Err(SwapchainCreationError::ImageExtentNotSupported { .. }) => return,
Err(e) => panic!("Failed to recreate swapchain: {:?}", e),
};
swapchain = new_swapchain;
framebuffers = window_size_dependent_setup(
&new_images,
render_pass.clone(),
&mut viewport,
);
recreate_swapchain = false;
}
2022-09-24 06:45:06 +00:00
let (image_index, suboptimal, acquire_future) =
match acquire_next_image(swapchain.clone(), None) {
Ok(r) => r,
Err(AcquireError::OutOfDate) => {
recreate_swapchain = true;
return;
}
Err(e) => panic!("Failed to acquire next image: {:?}", e),
};
if suboptimal {
recreate_swapchain = true;
}
// Allocate a GPU buffer to hold the arguments for this frames draw call. The compute
// shader will only update vertex_count, so set the other parameters correctly here.
let indirect_commands = [DrawIndirectCommand {
vertex_count: 0,
instance_count: 1,
first_vertex: 0,
first_instance: 0,
}];
let indirect_buffer = indirect_args_pool.from_iter(indirect_commands).unwrap();
// Allocate a GPU buffer to hold this frames vertices. This needs to be large enough to hold
// the worst case number of vertices generated by the compute shader
let vertices = vertex_pool
.from_iter((0..(6 * 16)).map(|_| Vertex { position: [0.0; 2] }))
.unwrap();
// Pass the two buffers to the compute shader
let layout = compute_pipeline.layout().set_layouts().get(0).unwrap();
let cs_desciptor_set = PersistentDescriptorSet::new(
&descriptor_set_allocator,
layout.clone(),
[
WriteDescriptorSet::buffer(0, vertices.clone()),
WriteDescriptorSet::buffer(1, indirect_buffer.clone()),
],
)
.unwrap();
let mut builder = AutoCommandBufferBuilder::primary(
&command_buffer_allocator,
queue.queue_family_index(),
CommandBufferUsage::OneTimeSubmit,
)
.unwrap();
// First in the command buffer we dispatch the compute shader to generate the vertices and fill out the draw
// call arguments
builder
.bind_pipeline_compute(compute_pipeline.clone())
.bind_descriptor_sets(
PipelineBindPoint::Compute,
compute_pipeline.layout().clone(),
0,
cs_desciptor_set,
)
.dispatch([1, 1, 1])
.unwrap()
.begin_render_pass(
RenderPassBeginInfo {
clear_values: vec![Some([0.0, 0.0, 1.0, 1.0].into())],
2022-09-24 06:45:06 +00:00
..RenderPassBeginInfo::framebuffer(
framebuffers[image_index as usize].clone(),
)
},
SubpassContents::Inline,
)
.unwrap()
// The indirect draw call is placed in the command buffer with a reference to the GPU buffer that will
// contain the arguments when the draw is executed on the GPU
.set_viewport(0, [viewport.clone()])
.bind_pipeline_graphics(render_pipeline.clone())
.bind_vertex_buffers(0, vertices)
.draw_indirect(indirect_buffer)
.unwrap()
.end_render_pass()
.unwrap();
let command_buffer = builder.build().unwrap();
let future = previous_frame_end
.take()
.unwrap()
.join(acquire_future)
.then_execute(queue.clone(), command_buffer)
.unwrap()
.then_swapchain_present(
queue.clone(),
2022-09-24 06:45:06 +00:00
SwapchainPresentInfo::swapchain_image_index(swapchain.clone(), image_index),
)
.then_signal_fence_and_flush();
match future {
Ok(future) => {
previous_frame_end = Some(future.boxed());
}
Err(FlushError::OutOfDate) => {
recreate_swapchain = true;
previous_frame_end = Some(sync::now(device.clone()).boxed());
}
Err(e) => {
println!("Failed to flush future: {:?}", e);
previous_frame_end = Some(sync::now(device.clone()).boxed());
}
}
}
_ => (),
}
});
}
/// This method is called once during initialization, then again whenever the window is resized
fn window_size_dependent_setup(
images: &[Arc<SwapchainImage>],
render_pass: Arc<RenderPass>,
viewport: &mut Viewport,
) -> Vec<Arc<Framebuffer>> {
let dimensions = images[0].dimensions().width_height();
viewport.dimensions = [dimensions[0] as f32, dimensions[1] as f32];
images
.iter()
.map(|image| {
let view = ImageView::new_default(image.clone()).unwrap();
Framebuffer::new(
render_pass.clone(),
FramebufferCreateInfo {
attachments: vec![view],
..Default::default()
},
)
.unwrap()
})
.collect::<Vec<_>>()
}