Mesh shading example (#2437)

* mesh-shader-triangle example: copied from instancing example

* mesh-shader-triangle example: move shaders to separate files

* mesh-shader example: rename example

* mesh-shader example: implement mesh shader generating geometry

* mesh-shader example: fix instance data indexing partially, still has struct alignment issues

* mesh-shader example: fixed instance buffer alignment issues

* remove unnecessary things

Co-authored-by: marc0246 <40955683+marc0246@users.noreply.github.com>

* mesh-shader example: cargo fmt

* mesh-shader example: rename shaders to end in .glsl

* mesh-shader example: added color out variable, docs

* mesh-shader example: rename shader again

* mesh-shader example: reformat shader code

* mesh-shader example: cargo fmt with nightly

---------

Co-authored-by: Firestar99 <4696087-firestar99@users.noreply.gitlab.com>
Co-authored-by: marc0246 <40955683+marc0246@users.noreply.github.com>
This commit is contained in:
Firestar99 2024-01-02 14:46:27 +01:00 committed by GitHub
parent ff1c71fa33
commit 7788847b04
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 626 additions and 0 deletions

View File

@ -0,0 +1,17 @@
[package]
name = "mesh-shader"
version = "0.0.0"
edition = "2021"
publish = false
[[bin]]
name = "mesh-shader"
path = "main.rs"
test = false
bench = false
doc = false
[dependencies]
vulkano = { workspace = true, features = ["macros"] }
vulkano-shaders = { workspace = true }
winit = { workspace = true }

View File

@ -0,0 +1,9 @@
#version 450
layout(location = 0) in vec4 in_color;
layout(location = 0) out vec4 f_color;
void main() {
f_color = in_color;
}

View File

@ -0,0 +1,503 @@
// Welcome to the mesh shader example!
//
// This is a simple, modified version of the `instancing.rs` example that demonstrates how to use mesh shaders to
// generate geometry, that looks identical to the instancing example. We expect you to be familiar with both
// instancing and compute shaders before approaching mesh shaders, due to their high complexity.
//
// This example is intentionally kept simple and does not follow the recommended pattern by which one should emit
// vertices and indices. This pattern should best match what the hardware likes, and thus is unique to each vendor.
//
// See these presentation slides for an overview of mesh shaders and best practices:
// https://vulkan.org/user/pages/09.events/vulkanised-2023/vulkanised_mesh_best_practices_2023.02.09-1.pdf
// Presentation: https://www.youtube.com/watch?v=g9FoZcEQlbA
use std::{error::Error, sync::Arc};
use vulkano::{
buffer::{Buffer, BufferContents, BufferCreateInfo, BufferUsage},
command_buffer::{
allocator::StandardCommandBufferAllocator, CommandBufferBeginInfo, CommandBufferLevel,
CommandBufferUsage, RecordingCommandBuffer, RenderPassBeginInfo,
},
descriptor_set::{
allocator::StandardDescriptorSetAllocator, DescriptorSet, WriteDescriptorSet,
},
device::{
physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, Features,
QueueCreateInfo, QueueFlags,
},
image::{view::ImageView, Image, ImageUsage},
instance::{Instance, InstanceCreateFlags, InstanceCreateInfo},
memory::allocator::{AllocationCreateInfo, MemoryTypeFilter, StandardMemoryAllocator},
padded::Padded,
pipeline::{
graphics::{
color_blend::{ColorBlendAttachmentState, ColorBlendState},
multisample::MultisampleState,
rasterization::RasterizationState,
viewport::{Viewport, ViewportState},
GraphicsPipelineCreateInfo,
},
layout::PipelineDescriptorSetLayoutCreateInfo,
DynamicState, GraphicsPipeline, Pipeline, PipelineBindPoint, PipelineLayout,
PipelineShaderStageCreateInfo,
},
render_pass::{Framebuffer, FramebufferCreateInfo, RenderPass, Subpass},
single_pass_renderpass,
swapchain::{
acquire_next_image, Surface, Swapchain, SwapchainCreateInfo, SwapchainPresentInfo,
},
sync::{self, GpuFuture},
DeviceSize, Validated, VulkanError, VulkanLibrary,
};
use winit::{
event::{Event, WindowEvent},
event_loop::{ControlFlow, EventLoop},
window::WindowBuilder,
};
/// The vertex type that we will be used to describe the triangle's geometry.
#[derive(BufferContents)]
#[repr(C)]
struct TriangleVertex {
position: [f32; 2],
}
/// The vertex type that describes the unique data per instance.
type InstanceData = mesh::Instance;
mod mesh {
vulkano_shaders::shader! {
ty: "mesh",
path: "mesh.glsl",
vulkan_version: "1.2",
}
}
mod fs {
vulkano_shaders::shader! {
ty: "fragment",
path: "frag.glsl",
}
}
fn main() -> Result<(), impl Error> {
let event_loop = EventLoop::new().unwrap();
let library = VulkanLibrary::new().unwrap();
let required_extensions = Surface::required_extensions(&event_loop).unwrap();
let instance = Instance::new(
library,
InstanceCreateInfo {
flags: InstanceCreateFlags::ENUMERATE_PORTABILITY,
enabled_extensions: required_extensions,
..Default::default()
},
)
.unwrap();
let window = Arc::new(WindowBuilder::new().build(&event_loop).unwrap());
let surface = Surface::from_window(instance.clone(), window.clone()).unwrap();
let device_extensions = DeviceExtensions {
khr_swapchain: true,
ext_mesh_shader: true,
..DeviceExtensions::empty()
};
let (physical_device, queue_family_index) = instance
.enumerate_physical_devices()
.unwrap()
.filter(|p| p.supported_extensions().contains(&device_extensions))
.filter_map(|p| {
p.queue_family_properties()
.iter()
.enumerate()
.position(|(i, q)| {
q.queue_flags.intersects(QueueFlags::GRAPHICS)
&& p.surface_support(i as u32, &surface).unwrap_or(false)
})
.map(|i| (p, i as u32))
})
.min_by_key(|(p, _)| match p.properties().device_type {
PhysicalDeviceType::DiscreteGpu => 0,
PhysicalDeviceType::IntegratedGpu => 1,
PhysicalDeviceType::VirtualGpu => 2,
PhysicalDeviceType::Cpu => 3,
PhysicalDeviceType::Other => 4,
_ => 5,
})
.unwrap();
println!(
"Using device: {} (type: {:?})",
physical_device.properties().device_name,
physical_device.properties().device_type,
);
let (device, mut queues) = Device::new(
physical_device,
DeviceCreateInfo {
enabled_extensions: device_extensions,
enabled_features: Features {
mesh_shader: true,
..Features::default()
},
queue_create_infos: vec![QueueCreateInfo {
queue_family_index,
..Default::default()
}],
..Default::default()
},
)
.unwrap();
let queue = queues.next().unwrap();
let (mut swapchain, images) = {
let surface_capabilities = device
.physical_device()
.surface_capabilities(&surface, Default::default())
.unwrap();
let image_format = device
.physical_device()
.surface_formats(&surface, Default::default())
.unwrap()[0]
.0;
Swapchain::new(
device.clone(),
surface,
SwapchainCreateInfo {
min_image_count: surface_capabilities.min_image_count.max(2),
image_format,
image_extent: window.inner_size().into(),
image_usage: ImageUsage::COLOR_ATTACHMENT,
composite_alpha: surface_capabilities
.supported_composite_alpha
.into_iter()
.next()
.unwrap(),
..Default::default()
},
)
.unwrap()
};
let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone()));
let descriptor_set_allocator = Arc::new(StandardDescriptorSetAllocator::new(
device.clone(),
Default::default(),
));
// We now create a buffer that will store the shape of our triangle. This triangle is identical
// to the one in the `triangle.rs` example.
let vertices = [
TriangleVertex {
position: [-0.5, -0.25],
},
TriangleVertex {
position: [0.0, 0.5],
},
TriangleVertex {
position: [0.25, -0.1],
},
];
let vertex_buffer = Buffer::from_iter(
memory_allocator.clone(),
BufferCreateInfo {
usage: BufferUsage::STORAGE_BUFFER,
..Default::default()
},
AllocationCreateInfo {
memory_type_filter: MemoryTypeFilter::PREFER_DEVICE
| MemoryTypeFilter::HOST_SEQUENTIAL_WRITE,
..Default::default()
},
vertices,
)
.unwrap();
// Now we create another buffer that will store the unique data per instance. For this example,
// we'll have the instances form a 10x10 grid that slowly gets larger.
let rows = 10;
let cols = 10;
let instances = {
let n_instances = rows * cols;
let mut data = Vec::new();
for c in 0..cols {
for r in 0..rows {
let half_cell_w = 0.5 / cols as f32;
let half_cell_h = 0.5 / rows as f32;
let x = half_cell_w + (c as f32 / cols as f32) * 2.0 - 1.0;
let y = half_cell_h + (r as f32 / rows as f32) * 2.0 - 1.0;
let position_offset = [x, y];
let scale = (2.0 / rows as f32) * (c * rows + r) as f32 / n_instances as f32;
data.push(InstanceData {
position_offset,
scale,
});
}
}
data
};
let instance_buffer = Buffer::new_unsized::<mesh::InstanceBuffer>(
memory_allocator,
BufferCreateInfo {
usage: BufferUsage::STORAGE_BUFFER,
..Default::default()
},
AllocationCreateInfo {
memory_type_filter: MemoryTypeFilter::PREFER_DEVICE
| MemoryTypeFilter::HOST_SEQUENTIAL_WRITE,
..Default::default()
},
instances.len() as DeviceSize,
)
.unwrap();
{
let mut guard = instance_buffer.write().unwrap();
for (i, instance) in instances.iter().enumerate() {
guard.instance[i] = Padded(*instance);
}
}
let render_pass = single_pass_renderpass!(
device.clone(),
attachments: {
color: {
format: swapchain.image_format(),
samples: 1,
load_op: Clear,
store_op: Store,
},
},
pass: {
color: [color],
depth_stencil: {},
},
)
.unwrap();
let pipeline = {
let mesh = mesh::load(device.clone())
.unwrap()
.entry_point("main")
.unwrap();
let fs = fs::load(device.clone())
.unwrap()
.entry_point("main")
.unwrap();
let stages = [
PipelineShaderStageCreateInfo::new(mesh),
PipelineShaderStageCreateInfo::new(fs),
];
let layout = PipelineLayout::new(
device.clone(),
PipelineDescriptorSetLayoutCreateInfo::from_stages(&stages)
.into_pipeline_layout_create_info(device.clone())
.unwrap(),
)
.unwrap();
let subpass = Subpass::from(render_pass.clone(), 0).unwrap();
GraphicsPipeline::new(
device.clone(),
None,
GraphicsPipelineCreateInfo {
stages: stages.into_iter().collect(),
viewport_state: Some(ViewportState::default()),
rasterization_state: Some(RasterizationState::default()),
multisample_state: Some(MultisampleState::default()),
color_blend_state: Some(ColorBlendState::with_attachment_states(
subpass.num_color_attachments(),
ColorBlendAttachmentState::default(),
)),
dynamic_state: [DynamicState::Viewport].into_iter().collect(),
subpass: Some(subpass.into()),
..GraphicsPipelineCreateInfo::layout(layout)
},
)
.unwrap()
};
let descriptor_set = DescriptorSet::new(
descriptor_set_allocator,
pipeline.layout().set_layouts()[0].clone(),
[
WriteDescriptorSet::buffer(0, vertex_buffer.clone()),
WriteDescriptorSet::buffer(1, instance_buffer.clone()),
],
[],
)
.unwrap();
let mut viewport = Viewport {
offset: [0.0, 0.0],
extent: [0.0, 0.0],
depth_range: 0.0..=1.0,
};
let mut framebuffers = window_size_dependent_setup(&images, render_pass.clone(), &mut viewport);
let mut recreate_swapchain = false;
let mut previous_frame_end = Some(sync::now(device.clone()).boxed());
let command_buffer_allocator = Arc::new(StandardCommandBufferAllocator::new(
device.clone(),
Default::default(),
));
event_loop.run(move |event, elwt| {
elwt.set_control_flow(ControlFlow::Poll);
match event {
Event::WindowEvent {
event: WindowEvent::CloseRequested,
..
} => {
elwt.exit();
}
Event::WindowEvent {
event: WindowEvent::Resized(_),
..
} => {
recreate_swapchain = true;
}
Event::WindowEvent {
event: WindowEvent::RedrawRequested,
..
} => {
let image_extent: [u32; 2] = window.inner_size().into();
if image_extent.contains(&0) {
return;
}
previous_frame_end.as_mut().unwrap().cleanup_finished();
if recreate_swapchain {
let (new_swapchain, new_images) = swapchain
.recreate(SwapchainCreateInfo {
image_extent,
..swapchain.create_info()
})
.expect("failed to recreate swapchain");
swapchain = new_swapchain;
framebuffers = window_size_dependent_setup(
&new_images,
render_pass.clone(),
&mut viewport,
);
recreate_swapchain = false;
}
let (image_index, suboptimal, acquire_future) =
match acquire_next_image(swapchain.clone(), None).map_err(Validated::unwrap) {
Ok(r) => r,
Err(VulkanError::OutOfDate) => {
recreate_swapchain = true;
return;
}
Err(e) => panic!("failed to acquire next image: {e}"),
};
if suboptimal {
recreate_swapchain = true;
}
let mut builder = RecordingCommandBuffer::new(
command_buffer_allocator.clone(),
queue.queue_family_index(),
CommandBufferLevel::Primary,
CommandBufferBeginInfo {
usage: CommandBufferUsage::OneTimeSubmit,
..Default::default()
},
)
.unwrap();
builder
.begin_render_pass(
RenderPassBeginInfo {
clear_values: vec![Some([0.0, 0.0, 1.0, 1.0].into())],
..RenderPassBeginInfo::framebuffer(
framebuffers[image_index as usize].clone(),
)
},
Default::default(),
)
.unwrap()
.set_viewport(0, [viewport.clone()].into_iter().collect())
.unwrap()
.bind_pipeline_graphics(pipeline.clone())
.unwrap()
// Instead of binding vertex attributes, bind buffers as descriptor sets
.bind_descriptor_sets(
PipelineBindPoint::Graphics,
pipeline.layout().clone(),
0,
descriptor_set.clone(),
)
.unwrap();
unsafe {
builder.draw_mesh_tasks([cols, rows, 1]).unwrap();
}
builder.end_render_pass(Default::default()).unwrap();
let command_buffer = builder.end().unwrap();
let future = previous_frame_end
.take()
.unwrap()
.join(acquire_future)
.then_execute(queue.clone(), command_buffer)
.unwrap()
.then_swapchain_present(
queue.clone(),
SwapchainPresentInfo::swapchain_image_index(swapchain.clone(), image_index),
)
.then_signal_fence_and_flush();
match future.map_err(Validated::unwrap) {
Ok(future) => {
previous_frame_end = Some(future.boxed());
}
Err(VulkanError::OutOfDate) => {
recreate_swapchain = true;
previous_frame_end = Some(sync::now(device.clone()).boxed());
}
Err(e) => {
println!("failed to flush future: {e}");
previous_frame_end = Some(sync::now(device.clone()).boxed());
}
}
}
Event::AboutToWait => window.request_redraw(),
_ => (),
}
})
}
/// This function is called once during initialization, then again whenever the window is resized.
fn window_size_dependent_setup(
images: &[Arc<Image>],
render_pass: Arc<RenderPass>,
viewport: &mut Viewport,
) -> Vec<Arc<Framebuffer>> {
let extent = images[0].extent();
viewport.extent = [extent[0] as f32, extent[1] as f32];
images
.iter()
.map(|image| {
let view = ImageView::new_default(image.clone()).unwrap();
Framebuffer::new(
render_pass.clone(),
FramebufferCreateInfo {
attachments: vec![view],
..Default::default()
},
)
.unwrap()
})
.collect::<Vec<_>>()
}

View File

@ -0,0 +1,97 @@
#version 450
#extension GL_EXT_mesh_shader : require
// In mesh shaders you have to load all data manually from storage buffers, which are declared just like uniform
// buffers, but using the `buffer` keyword. You may not use:
// * `in`: Unlike vertex shaders, Mesh shaders do not have an input assembly (IA) stage that pulls data from buffers
// and forwards them to the vertex shaders as `in` inputs.
// * `uniform`: Uniform buffers have to be of constant size, but as our buffers may have a varying amount of data,
// they have to be storage buffers instead.
//
// The triangle vertex positions.
layout(set = 0, binding = 0) buffer VertexBuffer {
vec2 position[];
} buffer_vertex;
// The per-instance data.
struct Instance {
vec2 position_offset;
float scale;
};
layout(set = 0, binding = 1) buffer InstanceBuffer {
Instance instance[];
} buffer_instance;
// This declaration specifies the workgroup size of the mesh shader, similarly to compute shaders
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
// This declares the type of primitive you want to emit, typically triangles, as well as maximum amount of vertices
// and primitives you may emit. Primitives may only be in lists, aka. triangle_strip or triangle_fan are not allowed.
layout(triangles, max_vertices = 3, max_primitives = 1) out;
// As mesh shaders may emit multiple vertices, all outputs have to be an array. See below, when vertices are emitted.
layout(location = 0) out vec4 out_color[];
const uint rows = 10;
const uint cols = 10;
const uint n_instances = rows * cols;
void main() {
vec2 position_offset;
float scale;
vec4 color;
// There are two main use-cases for mesh shaders, switch in between them here.
// They should both draw the same triangles, but with different colors.
const bool LOAD_FROM_INSTANCE_BUFFER = false;
if (LOAD_FROM_INSTANCE_BUFFER) {
// Use-case 1: load instance data from buffers, similarly to doing an instanced draw
// color triangles red
color = vec4(1.0, 0.0, 0.0, 1.0);
Instance instance = buffer_instance.instance[gl_GlobalInvocationID.y * rows + gl_GlobalInvocationID.x];
position_offset = instance.position_offset;
scale = instance.scale;
} else {
// Use-case 2: generate the geometry dynamically in the mesh shader
// color triangles green
color = vec4(0.0, 1.0, 0.0, 1.0);
uint c = gl_GlobalInvocationID.x;
uint r = gl_GlobalInvocationID.y;
// the same algo for generating the triangle data as in the instanced example
float half_cell_w = 0.5 / float(cols);
float half_cell_h = 0.5 / float(rows);
float x = half_cell_w + (c / float(cols)) * 2.0 - 1.0;
float y = half_cell_h + (r / float(rows)) * 2.0 - 1.0;
position_offset = vec2(x, y);
scale = (2.0 / float(rows)) * (c * float(rows) + r) / n_instances;
}
// Dynamically set the amount of vertices and triangles that you would like to emit, must be lower than what was
// declared above. From the `OpSetMeshOutputsEXT` spec:
// The arguments are taken from the first invocation in each workgroup. Behavior is undefined if any invocation
// executes this instruction more than once or under non-uniform control flow. Behavior is undefined if there is
// any control flow path to an output write that is not preceded by this instruction.
SetMeshOutputsEXT(
3, // vertices
1// triangles = indices / 3
);
// emit vertex data
for (uint i = 0; i < 3; i++) {
// As we may emit multiple vertices, all outputs are arrays. You index into them using a unique vertex index
// within your work group. In this example the work group has the size (1, 1, 1), so each invocation can
// simply use the indices [0-2]. With larger work groups you will have to use the `gl_LocalInvocationID` to
// compute indices and make sure they are unique, so results don't get overridden by other invocations.
out_color[i] = color;
// just like setting gl_Position in the vertex shader
gl_MeshVerticesEXT[i].gl_Position = vec4(buffer_vertex.position[i] * scale + position_offset, 0.0, 1.0);
}
// emit triangle indices
gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);
}