diff --git a/examples/mesh-shader/Cargo.toml b/examples/mesh-shader/Cargo.toml new file mode 100644 index 00000000..e460ef23 --- /dev/null +++ b/examples/mesh-shader/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "mesh-shader" +version = "0.0.0" +edition = "2021" +publish = false + +[[bin]] +name = "mesh-shader" +path = "main.rs" +test = false +bench = false +doc = false + +[dependencies] +vulkano = { workspace = true, features = ["macros"] } +vulkano-shaders = { workspace = true } +winit = { workspace = true } diff --git a/examples/mesh-shader/frag.glsl b/examples/mesh-shader/frag.glsl new file mode 100644 index 00000000..2dd47aaf --- /dev/null +++ b/examples/mesh-shader/frag.glsl @@ -0,0 +1,9 @@ +#version 450 + +layout(location = 0) in vec4 in_color; + +layout(location = 0) out vec4 f_color; + +void main() { + f_color = in_color; +} diff --git a/examples/mesh-shader/main.rs b/examples/mesh-shader/main.rs new file mode 100644 index 00000000..43c88091 --- /dev/null +++ b/examples/mesh-shader/main.rs @@ -0,0 +1,503 @@ +// Welcome to the mesh shader example! +// +// This is a simple, modified version of the `instancing.rs` example that demonstrates how to use mesh shaders to +// generate geometry, that looks identical to the instancing example. We expect you to be familiar with both +// instancing and compute shaders before approaching mesh shaders, due to their high complexity. +// +// This example is intentionally kept simple and does not follow the recommended pattern by which one should emit +// vertices and indices. This pattern should best match what the hardware likes, and thus is unique to each vendor. +// +// See these presentation slides for an overview of mesh shaders and best practices: +// https://vulkan.org/user/pages/09.events/vulkanised-2023/vulkanised_mesh_best_practices_2023.02.09-1.pdf +// Presentation: https://www.youtube.com/watch?v=g9FoZcEQlbA + +use std::{error::Error, sync::Arc}; +use vulkano::{ + buffer::{Buffer, BufferContents, BufferCreateInfo, BufferUsage}, + command_buffer::{ + allocator::StandardCommandBufferAllocator, CommandBufferBeginInfo, CommandBufferLevel, + CommandBufferUsage, RecordingCommandBuffer, RenderPassBeginInfo, + }, + descriptor_set::{ + allocator::StandardDescriptorSetAllocator, DescriptorSet, WriteDescriptorSet, + }, + device::{ + physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, Features, + QueueCreateInfo, QueueFlags, + }, + image::{view::ImageView, Image, ImageUsage}, + instance::{Instance, InstanceCreateFlags, InstanceCreateInfo}, + memory::allocator::{AllocationCreateInfo, MemoryTypeFilter, StandardMemoryAllocator}, + padded::Padded, + pipeline::{ + graphics::{ + color_blend::{ColorBlendAttachmentState, ColorBlendState}, + multisample::MultisampleState, + rasterization::RasterizationState, + viewport::{Viewport, ViewportState}, + GraphicsPipelineCreateInfo, + }, + layout::PipelineDescriptorSetLayoutCreateInfo, + DynamicState, GraphicsPipeline, Pipeline, PipelineBindPoint, PipelineLayout, + PipelineShaderStageCreateInfo, + }, + render_pass::{Framebuffer, FramebufferCreateInfo, RenderPass, Subpass}, + single_pass_renderpass, + swapchain::{ + acquire_next_image, Surface, Swapchain, SwapchainCreateInfo, SwapchainPresentInfo, + }, + sync::{self, GpuFuture}, + DeviceSize, Validated, VulkanError, VulkanLibrary, +}; +use winit::{ + event::{Event, WindowEvent}, + event_loop::{ControlFlow, EventLoop}, + window::WindowBuilder, +}; + +/// The vertex type that we will be used to describe the triangle's geometry. +#[derive(BufferContents)] +#[repr(C)] +struct TriangleVertex { + position: [f32; 2], +} + +/// The vertex type that describes the unique data per instance. +type InstanceData = mesh::Instance; + +mod mesh { + vulkano_shaders::shader! { + ty: "mesh", + path: "mesh.glsl", + vulkan_version: "1.2", + } +} + +mod fs { + vulkano_shaders::shader! { + ty: "fragment", + path: "frag.glsl", + } +} + +fn main() -> Result<(), impl Error> { + let event_loop = EventLoop::new().unwrap(); + + let library = VulkanLibrary::new().unwrap(); + let required_extensions = Surface::required_extensions(&event_loop).unwrap(); + let instance = Instance::new( + library, + InstanceCreateInfo { + flags: InstanceCreateFlags::ENUMERATE_PORTABILITY, + enabled_extensions: required_extensions, + ..Default::default() + }, + ) + .unwrap(); + + let window = Arc::new(WindowBuilder::new().build(&event_loop).unwrap()); + let surface = Surface::from_window(instance.clone(), window.clone()).unwrap(); + + let device_extensions = DeviceExtensions { + khr_swapchain: true, + ext_mesh_shader: true, + ..DeviceExtensions::empty() + }; + let (physical_device, queue_family_index) = instance + .enumerate_physical_devices() + .unwrap() + .filter(|p| p.supported_extensions().contains(&device_extensions)) + .filter_map(|p| { + p.queue_family_properties() + .iter() + .enumerate() + .position(|(i, q)| { + q.queue_flags.intersects(QueueFlags::GRAPHICS) + && p.surface_support(i as u32, &surface).unwrap_or(false) + }) + .map(|i| (p, i as u32)) + }) + .min_by_key(|(p, _)| match p.properties().device_type { + PhysicalDeviceType::DiscreteGpu => 0, + PhysicalDeviceType::IntegratedGpu => 1, + PhysicalDeviceType::VirtualGpu => 2, + PhysicalDeviceType::Cpu => 3, + PhysicalDeviceType::Other => 4, + _ => 5, + }) + .unwrap(); + + println!( + "Using device: {} (type: {:?})", + physical_device.properties().device_name, + physical_device.properties().device_type, + ); + + let (device, mut queues) = Device::new( + physical_device, + DeviceCreateInfo { + enabled_extensions: device_extensions, + enabled_features: Features { + mesh_shader: true, + ..Features::default() + }, + queue_create_infos: vec![QueueCreateInfo { + queue_family_index, + ..Default::default() + }], + ..Default::default() + }, + ) + .unwrap(); + + let queue = queues.next().unwrap(); + + let (mut swapchain, images) = { + let surface_capabilities = device + .physical_device() + .surface_capabilities(&surface, Default::default()) + .unwrap(); + let image_format = device + .physical_device() + .surface_formats(&surface, Default::default()) + .unwrap()[0] + .0; + + Swapchain::new( + device.clone(), + surface, + SwapchainCreateInfo { + min_image_count: surface_capabilities.min_image_count.max(2), + image_format, + image_extent: window.inner_size().into(), + image_usage: ImageUsage::COLOR_ATTACHMENT, + composite_alpha: surface_capabilities + .supported_composite_alpha + .into_iter() + .next() + .unwrap(), + ..Default::default() + }, + ) + .unwrap() + }; + + let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); + let descriptor_set_allocator = Arc::new(StandardDescriptorSetAllocator::new( + device.clone(), + Default::default(), + )); + + // We now create a buffer that will store the shape of our triangle. This triangle is identical + // to the one in the `triangle.rs` example. + let vertices = [ + TriangleVertex { + position: [-0.5, -0.25], + }, + TriangleVertex { + position: [0.0, 0.5], + }, + TriangleVertex { + position: [0.25, -0.1], + }, + ]; + let vertex_buffer = Buffer::from_iter( + memory_allocator.clone(), + BufferCreateInfo { + usage: BufferUsage::STORAGE_BUFFER, + ..Default::default() + }, + AllocationCreateInfo { + memory_type_filter: MemoryTypeFilter::PREFER_DEVICE + | MemoryTypeFilter::HOST_SEQUENTIAL_WRITE, + ..Default::default() + }, + vertices, + ) + .unwrap(); + + // Now we create another buffer that will store the unique data per instance. For this example, + // we'll have the instances form a 10x10 grid that slowly gets larger. + let rows = 10; + let cols = 10; + let instances = { + let n_instances = rows * cols; + let mut data = Vec::new(); + for c in 0..cols { + for r in 0..rows { + let half_cell_w = 0.5 / cols as f32; + let half_cell_h = 0.5 / rows as f32; + let x = half_cell_w + (c as f32 / cols as f32) * 2.0 - 1.0; + let y = half_cell_h + (r as f32 / rows as f32) * 2.0 - 1.0; + let position_offset = [x, y]; + let scale = (2.0 / rows as f32) * (c * rows + r) as f32 / n_instances as f32; + data.push(InstanceData { + position_offset, + scale, + }); + } + } + data + }; + let instance_buffer = Buffer::new_unsized::( + memory_allocator, + BufferCreateInfo { + usage: BufferUsage::STORAGE_BUFFER, + ..Default::default() + }, + AllocationCreateInfo { + memory_type_filter: MemoryTypeFilter::PREFER_DEVICE + | MemoryTypeFilter::HOST_SEQUENTIAL_WRITE, + ..Default::default() + }, + instances.len() as DeviceSize, + ) + .unwrap(); + { + let mut guard = instance_buffer.write().unwrap(); + for (i, instance) in instances.iter().enumerate() { + guard.instance[i] = Padded(*instance); + } + } + + let render_pass = single_pass_renderpass!( + device.clone(), + attachments: { + color: { + format: swapchain.image_format(), + samples: 1, + load_op: Clear, + store_op: Store, + }, + }, + pass: { + color: [color], + depth_stencil: {}, + }, + ) + .unwrap(); + + let pipeline = { + let mesh = mesh::load(device.clone()) + .unwrap() + .entry_point("main") + .unwrap(); + let fs = fs::load(device.clone()) + .unwrap() + .entry_point("main") + .unwrap(); + let stages = [ + PipelineShaderStageCreateInfo::new(mesh), + PipelineShaderStageCreateInfo::new(fs), + ]; + let layout = PipelineLayout::new( + device.clone(), + PipelineDescriptorSetLayoutCreateInfo::from_stages(&stages) + .into_pipeline_layout_create_info(device.clone()) + .unwrap(), + ) + .unwrap(); + let subpass = Subpass::from(render_pass.clone(), 0).unwrap(); + + GraphicsPipeline::new( + device.clone(), + None, + GraphicsPipelineCreateInfo { + stages: stages.into_iter().collect(), + viewport_state: Some(ViewportState::default()), + rasterization_state: Some(RasterizationState::default()), + multisample_state: Some(MultisampleState::default()), + color_blend_state: Some(ColorBlendState::with_attachment_states( + subpass.num_color_attachments(), + ColorBlendAttachmentState::default(), + )), + dynamic_state: [DynamicState::Viewport].into_iter().collect(), + subpass: Some(subpass.into()), + ..GraphicsPipelineCreateInfo::layout(layout) + }, + ) + .unwrap() + }; + + let descriptor_set = DescriptorSet::new( + descriptor_set_allocator, + pipeline.layout().set_layouts()[0].clone(), + [ + WriteDescriptorSet::buffer(0, vertex_buffer.clone()), + WriteDescriptorSet::buffer(1, instance_buffer.clone()), + ], + [], + ) + .unwrap(); + + let mut viewport = Viewport { + offset: [0.0, 0.0], + extent: [0.0, 0.0], + depth_range: 0.0..=1.0, + }; + let mut framebuffers = window_size_dependent_setup(&images, render_pass.clone(), &mut viewport); + let mut recreate_swapchain = false; + let mut previous_frame_end = Some(sync::now(device.clone()).boxed()); + + let command_buffer_allocator = Arc::new(StandardCommandBufferAllocator::new( + device.clone(), + Default::default(), + )); + + event_loop.run(move |event, elwt| { + elwt.set_control_flow(ControlFlow::Poll); + + match event { + Event::WindowEvent { + event: WindowEvent::CloseRequested, + .. + } => { + elwt.exit(); + } + Event::WindowEvent { + event: WindowEvent::Resized(_), + .. + } => { + recreate_swapchain = true; + } + Event::WindowEvent { + event: WindowEvent::RedrawRequested, + .. + } => { + let image_extent: [u32; 2] = window.inner_size().into(); + + if image_extent.contains(&0) { + return; + } + + previous_frame_end.as_mut().unwrap().cleanup_finished(); + + if recreate_swapchain { + let (new_swapchain, new_images) = swapchain + .recreate(SwapchainCreateInfo { + image_extent, + ..swapchain.create_info() + }) + .expect("failed to recreate swapchain"); + + swapchain = new_swapchain; + framebuffers = window_size_dependent_setup( + &new_images, + render_pass.clone(), + &mut viewport, + ); + recreate_swapchain = false; + } + + let (image_index, suboptimal, acquire_future) = + match acquire_next_image(swapchain.clone(), None).map_err(Validated::unwrap) { + Ok(r) => r, + Err(VulkanError::OutOfDate) => { + recreate_swapchain = true; + return; + } + Err(e) => panic!("failed to acquire next image: {e}"), + }; + + if suboptimal { + recreate_swapchain = true; + } + + let mut builder = RecordingCommandBuffer::new( + command_buffer_allocator.clone(), + queue.queue_family_index(), + CommandBufferLevel::Primary, + CommandBufferBeginInfo { + usage: CommandBufferUsage::OneTimeSubmit, + ..Default::default() + }, + ) + .unwrap(); + + builder + .begin_render_pass( + RenderPassBeginInfo { + clear_values: vec![Some([0.0, 0.0, 1.0, 1.0].into())], + ..RenderPassBeginInfo::framebuffer( + framebuffers[image_index as usize].clone(), + ) + }, + Default::default(), + ) + .unwrap() + .set_viewport(0, [viewport.clone()].into_iter().collect()) + .unwrap() + .bind_pipeline_graphics(pipeline.clone()) + .unwrap() + // Instead of binding vertex attributes, bind buffers as descriptor sets + .bind_descriptor_sets( + PipelineBindPoint::Graphics, + pipeline.layout().clone(), + 0, + descriptor_set.clone(), + ) + .unwrap(); + + unsafe { + builder.draw_mesh_tasks([cols, rows, 1]).unwrap(); + } + + builder.end_render_pass(Default::default()).unwrap(); + + let command_buffer = builder.end().unwrap(); + let future = previous_frame_end + .take() + .unwrap() + .join(acquire_future) + .then_execute(queue.clone(), command_buffer) + .unwrap() + .then_swapchain_present( + queue.clone(), + SwapchainPresentInfo::swapchain_image_index(swapchain.clone(), image_index), + ) + .then_signal_fence_and_flush(); + + match future.map_err(Validated::unwrap) { + Ok(future) => { + previous_frame_end = Some(future.boxed()); + } + Err(VulkanError::OutOfDate) => { + recreate_swapchain = true; + previous_frame_end = Some(sync::now(device.clone()).boxed()); + } + Err(e) => { + println!("failed to flush future: {e}"); + previous_frame_end = Some(sync::now(device.clone()).boxed()); + } + } + } + Event::AboutToWait => window.request_redraw(), + _ => (), + } + }) +} + +/// This function is called once during initialization, then again whenever the window is resized. +fn window_size_dependent_setup( + images: &[Arc], + render_pass: Arc, + viewport: &mut Viewport, +) -> Vec> { + let extent = images[0].extent(); + viewport.extent = [extent[0] as f32, extent[1] as f32]; + + images + .iter() + .map(|image| { + let view = ImageView::new_default(image.clone()).unwrap(); + Framebuffer::new( + render_pass.clone(), + FramebufferCreateInfo { + attachments: vec![view], + ..Default::default() + }, + ) + .unwrap() + }) + .collect::>() +} diff --git a/examples/mesh-shader/mesh.glsl b/examples/mesh-shader/mesh.glsl new file mode 100644 index 00000000..2ef27d08 --- /dev/null +++ b/examples/mesh-shader/mesh.glsl @@ -0,0 +1,97 @@ +#version 450 +#extension GL_EXT_mesh_shader : require + +// In mesh shaders you have to load all data manually from storage buffers, which are declared just like uniform +// buffers, but using the `buffer` keyword. You may not use: +// * `in`: Unlike vertex shaders, Mesh shaders do not have an input assembly (IA) stage that pulls data from buffers +// and forwards them to the vertex shaders as `in` inputs. +// * `uniform`: Uniform buffers have to be of constant size, but as our buffers may have a varying amount of data, +// they have to be storage buffers instead. +// +// The triangle vertex positions. +layout(set = 0, binding = 0) buffer VertexBuffer { + vec2 position[]; +} buffer_vertex; + +// The per-instance data. +struct Instance { + vec2 position_offset; + float scale; +}; + +layout(set = 0, binding = 1) buffer InstanceBuffer { + Instance instance[]; +} buffer_instance; + +// This declaration specifies the workgroup size of the mesh shader, similarly to compute shaders +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; +// This declares the type of primitive you want to emit, typically triangles, as well as maximum amount of vertices +// and primitives you may emit. Primitives may only be in lists, aka. triangle_strip or triangle_fan are not allowed. +layout(triangles, max_vertices = 3, max_primitives = 1) out; + +// As mesh shaders may emit multiple vertices, all outputs have to be an array. See below, when vertices are emitted. +layout(location = 0) out vec4 out_color[]; + +const uint rows = 10; +const uint cols = 10; +const uint n_instances = rows * cols; + +void main() { + vec2 position_offset; + float scale; + vec4 color; + + // There are two main use-cases for mesh shaders, switch in between them here. + // They should both draw the same triangles, but with different colors. + const bool LOAD_FROM_INSTANCE_BUFFER = false; + + if (LOAD_FROM_INSTANCE_BUFFER) { + // Use-case 1: load instance data from buffers, similarly to doing an instanced draw + // color triangles red + color = vec4(1.0, 0.0, 0.0, 1.0); + + Instance instance = buffer_instance.instance[gl_GlobalInvocationID.y * rows + gl_GlobalInvocationID.x]; + position_offset = instance.position_offset; + scale = instance.scale; + + } else { + // Use-case 2: generate the geometry dynamically in the mesh shader + // color triangles green + color = vec4(0.0, 1.0, 0.0, 1.0); + + uint c = gl_GlobalInvocationID.x; + uint r = gl_GlobalInvocationID.y; + + // the same algo for generating the triangle data as in the instanced example + float half_cell_w = 0.5 / float(cols); + float half_cell_h = 0.5 / float(rows); + float x = half_cell_w + (c / float(cols)) * 2.0 - 1.0; + float y = half_cell_h + (r / float(rows)) * 2.0 - 1.0; + position_offset = vec2(x, y); + scale = (2.0 / float(rows)) * (c * float(rows) + r) / n_instances; + } + + // Dynamically set the amount of vertices and triangles that you would like to emit, must be lower than what was + // declared above. From the `OpSetMeshOutputsEXT` spec: + // The arguments are taken from the first invocation in each workgroup. Behavior is undefined if any invocation + // executes this instruction more than once or under non-uniform control flow. Behavior is undefined if there is + // any control flow path to an output write that is not preceded by this instruction. + SetMeshOutputsEXT( + 3, // vertices + 1// triangles = indices / 3 + ); + + // emit vertex data + for (uint i = 0; i < 3; i++) { + // As we may emit multiple vertices, all outputs are arrays. You index into them using a unique vertex index + // within your work group. In this example the work group has the size (1, 1, 1), so each invocation can + // simply use the indices [0-2]. With larger work groups you will have to use the `gl_LocalInvocationID` to + // compute indices and make sure they are unique, so results don't get overridden by other invocations. + out_color[i] = color; + // just like setting gl_Position in the vertex shader + gl_MeshVerticesEXT[i].gl_Position = vec4(buffer_vertex.position[i] * scale + position_offset, 0.0, 1.0); + } + + // emit triangle indices + gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2); +}