Compare commits

...

1 Commits

Author SHA1 Message Date
Kevin Trogant
c254ffee6b start work on meshlet dag algorithm
All checks were successful
Ubuntu Cross to Win64 / Cross Compile with ming64 (1.4.0, ubuntu-latest) (push) Successful in 1m40s
2024-07-20 14:15:04 +02:00
7 changed files with 284 additions and 143 deletions

View File

@ -7,8 +7,6 @@ struct mesh_data {
struct meshlet_data { struct meshlet_data {
vec4 bounds; // xyz, radius vec4 bounds; // xyz, radius
vec4 cone_apex; // xyz, unused
vec4 cone_axis_cutoff; // xyz, cutoff = w
uint mesh_index; uint mesh_index;
uint first_index; uint first_index;
uint base_vertex; uint base_vertex;
@ -44,17 +42,10 @@ void main() {
meshlet_data meshlet = meshlets[meshlet_idx]; meshlet_data meshlet = meshlets[meshlet_idx];
mesh_data mesh = meshes[meshlet.mesh_index]; mesh_data mesh = meshes[meshlet.mesh_index];
vec3 cone_axis = (mesh.model * vec4(meshlet.cone_axis_cutoff.xyz, 0)).xyz; uint draw_idx = atomicAdd(draw_count, 1);
vec3 cone_apex = (mesh.model * vec4(meshlet.cone_apex.xyz, 1)).xyz; draw_commands[draw_idx].count = meshlet.index_count;
float cone_cutoff = meshlet.cone_axis_cutoff.w; draw_commands[draw_idx].instance_count = 1;
draw_commands[draw_idx].first_index = meshlet.first_index;
draw_commands[draw_idx].base_vertex = int(meshlet.base_vertex);
if (dot(normalize(cone_apex - camera_pos), cone_axis) < cone_cutoff) { draw_commands[draw_idx].base_instance = 0;
uint draw_idx = atomicAdd(draw_count, 1);
draw_commands[draw_idx].count = meshlet.index_count;
draw_commands[draw_idx].instance_count = 1;
draw_commands[draw_idx].first_index = meshlet.first_index;
draw_commands[draw_idx].base_vertex = int(meshlet.base_vertex);
draw_commands[draw_idx].base_instance = 0;
}
} }

View File

@ -112,7 +112,7 @@ struct file_picker {
} }
/* Returns true if a new file was selected */ /* Returns true if a new file was selected */
bool RunFlat() { bool Run() {
bool opened=false; bool opened=false;
if (ImGui::Begin("File Selection", &m_open)) { if (ImGui::Begin("File Selection", &m_open)) {
ImGui::InputTextWithHint("Path", ImGui::InputTextWithHint("Path",
@ -125,6 +125,7 @@ struct file_picker {
strcpy(m_input_buf[m_active_input], m_picked); strcpy(m_input_buf[m_active_input], m_picked);
opened = true; opened = true;
} }
ImGui::Checkbox("Flat", &m_flat);
ImGui::End(); ImGui::End();
} }
return opened; return opened;
@ -138,6 +139,7 @@ struct file_picker {
return m_picked != nullptr; return m_picked != nullptr;
} }
bool m_flat = true;
const char *m_picked=nullptr; const char *m_picked=nullptr;
bool m_open = true; bool m_open = true;
char m_input_buf[2][260]; char m_input_buf[2][260];
@ -191,15 +193,22 @@ int main() {
meshlet_renderer ren; meshlet_renderer ren;
ren.Initialize(); ren.Initialize();
bool flat = true;
uint32_t root_idx;
while (!glfwWindowShouldClose(window)) { while (!glfwWindowShouldClose(window)) {
glfwPollEvents(); glfwPollEvents();
ImGui_ImplOpenGL3_NewFrame(); ImGui_ImplOpenGL3_NewFrame();
ImGui_ImplGlfw_NewFrame(); ImGui_ImplGlfw_NewFrame();
ImGui::NewFrame(); ImGui::NewFrame();
if (picker.RunFlat()) { if (picker.Run()) {
gen.LoadObj(picker.GetPicked()); gen.LoadObj(picker.GetPicked());
gen.RunFlat(); flat = picker.m_flat;
if (flat)
gen.RunFlat();
else
gen.RunHierarchical(0,&root_idx);
} }
ren.SettingMenu(); ren.SettingMenu();
@ -212,7 +221,8 @@ int main() {
if (picker.HasPickedFile()) { if (picker.HasPickedFile()) {
ren.m_aspect = (float)display_w / (float)display_h; ren.m_aspect = (float)display_w / (float)display_h;
ren.RenderFlat(gen.m_meshlets, gen.m_num_meshlets); if (flat)
ren.RenderFlat(gen.m_meshlets, gen.m_num_meshlets);
} }
ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData()); ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());

View File

@ -5,6 +5,9 @@
#include <tiny_obj_loader.h> #include <tiny_obj_loader.h>
#pragma warning(pop) #pragma warning(pop)
#include <unordered_map>
#include <vector>
#include "meshlet_generator.hpp" #include "meshlet_generator.hpp"
#include <runtime/mem_arena.h> #include <runtime/mem_arena.h>
@ -118,13 +121,13 @@ rt_result meshlet_generator::RunFlat(uint32_t mesh_idx) {
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices, size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices,
MESHLET_VERTICES, L0_MESHLET_VERTICES,
MESHLET_TRIANGLES); L0_MESHLET_TRIANGLES);
meshopt_Meshlet *meshlets = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets); meshopt_Meshlet *meshlets = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets);
unsigned int *meshlet_vertices = unsigned int *meshlet_vertices =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *MESHLET_VERTICES); RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *L0_MESHLET_VERTICES);
unsigned char *meshlet_triangles = unsigned char *meshlet_triangles =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *MESHLET_INDICES); RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *L0_MESHLET_INDICES);
size_t meshlet_count = meshopt_buildMeshlets(meshlets, size_t meshlet_count = meshopt_buildMeshlets(meshlets,
meshlet_vertices, meshlet_vertices,
@ -134,8 +137,8 @@ rt_result meshlet_generator::RunFlat(uint32_t mesh_idx) {
&m_meshes[mesh_idx].vertices[0].vx, &m_meshes[mesh_idx].vertices[0].vx,
m_meshes[mesh_idx].num_vertices, m_meshes[mesh_idx].num_vertices,
sizeof(vertex), sizeof(vertex),
MESHLET_VERTICES, L0_MESHLET_VERTICES,
MESHLET_TRIANGLES, L0_MESHLET_TRIANGLES,
cone_weight); cone_weight);
m_meshlets = new meshlet[meshlet_count]; m_meshlets = new meshlet[meshlet_count];
@ -155,24 +158,26 @@ rt_result meshlet_generator::RunFlat(uint32_t mesh_idx) {
m_meshes[mesh_idx].num_vertices, m_meshes[mesh_idx].num_vertices,
sizeof(vertex)); sizeof(vertex));
m_meshlets[i].vertices = new vertex[MESHLET_VERTICES]; m_meshlets[i].vertices = new vertex[L0_MESHLET_VERTICES];
m_meshlets[i].indices = new uint8_t[MESHLET_INDICES]; m_meshlets[i].indices = new uint16_t[L0_MESHLET_INDICES];
m_meshlets[i].num_vertices = meshlets[i].vertex_count; m_meshlets[i].num_vertices = meshlets[i].vertex_count;
m_meshlets[i].num_indices = meshlets[i].triangle_count * 3; m_meshlets[i].num_indices = meshlets[i].triangle_count * 3;
memcpy(m_meshlets[i].center, bounds.center, sizeof(bounds.center)); memcpy(m_meshlets[i].center, bounds.center, sizeof(bounds.center));
m_meshlets[i].radius = bounds.radius; m_meshlets[i].radius = bounds.radius;
memcpy(m_meshlets[i].cone_axis, bounds.cone_axis, sizeof(bounds.cone_axis));
m_meshlets[i].cone_cutoff = bounds.cone_cutoff;
memcpy(m_meshlets[i].cone_apex, bounds.cone_apex, sizeof(bounds.cone_apex));
for (unsigned int vert_idx = 0; vert_idx < meshlets[i].vertex_count; ++vert_idx) { for (unsigned int vert_idx = 0; vert_idx < meshlets[i].vertex_count; ++vert_idx) {
unsigned int vert = meshlet_vertices[meshlets[i].vertex_offset + vert_idx]; unsigned int vert = meshlet_vertices[meshlets[i].vertex_offset + vert_idx];
m_meshlets[i].vertices[vert_idx] = m_meshes[mesh_idx].vertices[vert]; m_meshlets[i].vertices[vert_idx] = m_meshes[mesh_idx].vertices[vert];
} }
memcpy(m_meshlets[i].indices, for (unsigned int tri_idx = 0; tri_idx < meshlets[i].triangle_count; ++tri_idx) {
meshlet_triangles + meshlets[i].triangle_offset, m_meshlets[i].indices[tri_idx * 3 + 0] =
meshlets[i].triangle_count * 3); (uint16_t)meshlet_triangles[meshlets[i].triangle_offset + 3 * tri_idx + 0];
m_meshlets[i].indices[tri_idx * 3 + 1] =
(uint16_t)meshlet_triangles[meshlets[i].triangle_offset + 3 * tri_idx + 1];
m_meshlets[i].indices[tri_idx * 3 + 2] =
(uint16_t)meshlet_triangles[meshlets[i].triangle_offset + 3 * tri_idx + 2];
}
m_meshlets[i].num_children = 0u; m_meshlets[i].num_children = 0u;
} }
@ -184,49 +189,81 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices, // Convert meshopt meshlets into our meshlets, each with its own vertex and index buffer
MESHLET_VERTICES, std::vector<meshlet> meshlets;
MESHLET_TRIANGLES); {
meshopt_Meshlet *meshlets = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets); size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices,
unsigned int *meshlet_vertices = L0_MESHLET_VERTICES,
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *MESHLET_VERTICES); L0_MESHLET_TRIANGLES);
unsigned char *meshlet_triangles = meshopt_Meshlet *meshopt_meshlets =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *MESHLET_INDICES); RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets);
meshopt_Bounds *meshlet_bounds = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Bounds, max_meshlets); unsigned int *meshlet_vertices =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *L0_MESHLET_VERTICES);
unsigned char *meshlet_triangles =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *L0_MESHLET_INDICES);
uint32_t meshlet_count = (uint32_t)meshopt_buildMeshlets(meshlets, uint32_t meshlet_count = (uint32_t)meshopt_buildMeshlets(meshopt_meshlets,
meshlet_vertices, meshlet_vertices,
meshlet_triangles, meshlet_triangles,
m_meshes[mesh_idx].indices, m_meshes[mesh_idx].indices,
m_meshes[mesh_idx].num_indices, m_meshes[mesh_idx].num_indices,
&m_meshes[mesh_idx].vertices[0].vx, &m_meshes[mesh_idx].vertices[0].vx,
m_meshes[mesh_idx].num_vertices, m_meshes[mesh_idx].num_vertices,
sizeof(vertex), sizeof(vertex),
MESHLET_VERTICES, L0_MESHLET_VERTICES,
MESHLET_TRIANGLES, L0_MESHLET_TRIANGLES,
cone_weight); cone_weight);
for (size_t i = 0; i < meshlet_count; ++i) { for (size_t i = 0; i < meshlet_count; ++i) {
meshopt_optimizeMeshlet(&meshlet_vertices[meshlets[i].vertex_offset], meshopt_optimizeMeshlet(&meshlet_vertices[meshopt_meshlets[i].vertex_offset],
&meshlet_triangles[meshlets[i].triangle_offset], &meshlet_triangles[meshopt_meshlets[i].triangle_offset],
meshlets[i].triangle_count, meshopt_meshlets[i].triangle_count,
meshlets[i].vertex_count); meshopt_meshlets[i].vertex_count);
meshlet_bounds[i] = meshopt_Bounds bounds = meshopt_computeMeshletBounds(
meshopt_computeMeshletBounds(&meshlet_vertices[meshlets[i].vertex_offset], &meshlet_vertices[meshopt_meshlets[i].vertex_offset],
&meshlet_triangles[meshlets[i].triangle_offset], &meshlet_triangles[meshopt_meshlets[i].triangle_offset],
meshlets[i].triangle_count, meshopt_meshlets[i].triangle_count,
&m_meshes[mesh_idx].vertices[0].vx, &m_meshes[mesh_idx].vertices[0].vx,
m_meshes[mesh_idx].num_vertices, m_meshes[mesh_idx].num_vertices,
sizeof(vertex)); sizeof(vertex));
meshlet meshlet;
meshlet.vertices = new vertex[L0_MESHLET_VERTICES];
meshlet.indices = new uint16_t[L0_MESHLET_INDICES];
meshlet.num_vertices = meshopt_meshlets[i].vertex_count;
meshlet.num_indices = meshopt_meshlets[i].triangle_count * 3;
memcpy(meshlet.center, bounds.center, sizeof(bounds.center));
meshlet.radius = bounds.radius;
for (unsigned int vert_idx = 0; vert_idx < meshopt_meshlets[i].vertex_count;
++vert_idx) {
unsigned int vert = meshlet_vertices[meshopt_meshlets[i].vertex_offset + vert_idx];
meshlet.vertices[vert_idx] = m_meshes[mesh_idx].vertices[vert];
}
for (unsigned int tri_idx = 0; tri_idx < meshopt_meshlets[i].triangle_count;
++tri_idx) {
meshlet.indices[tri_idx * 3 + 0] = (uint16_t)
meshlet_triangles[meshopt_meshlets[i].triangle_offset + 3 * tri_idx + 0];
meshlet.indices[tri_idx * 3 + 1] = (uint16_t)
meshlet_triangles[meshopt_meshlets[i].triangle_offset + 3 * tri_idx + 1];
meshlet.indices[tri_idx * 3 + 2] = (uint16_t)
meshlet_triangles[meshopt_meshlets[i].triangle_offset + 3 * tri_idx + 2];
}
meshlet.num_children = 0u;
meshlets.push_back(meshlet);
}
} }
// We now have a flat list of meshlets -> the highest lod ones // We now have a flat list of meshlets -> the highest lod ones
// We now combine (up to 8) meshlets into one to generate the next hierarchy level // We now combine (up to 8) meshlets into one to generate the next hierarchy level
// Repeat until we only have 1 meshlet left // Repeat until we only have 1 meshlet left
std::vector<uint32_t> unprocessed; std::vector<uint32_t> unprocessed;
unprocessed.reserve(meshlet_count); unprocessed.reserve(meshlets.size());
for (uint32_t i = 0; i < meshlet_count; ++i) { for (uint32_t i = 0; i < meshlets.size(); ++i) {
unprocessed.push_back(i); unprocessed.push_back(i);
} }
std::vector<uint32_t> next_level; std::vector<uint32_t> next_level;
@ -241,11 +278,11 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
unsigned int closest_count = 0u; unsigned int closest_count = 0u;
float first_center[3]; float first_center[3];
memcpy(first_center, meshlet_bounds[first].center, sizeof(float) * 3); memcpy(first_center, meshlets[first].center, sizeof(float) * 3);
for (uint32_t i = 0; i < unprocessed.size(); ++i) { for (uint32_t i = 0; i < unprocessed.size(); ++i) {
float center[3]; float center[3];
memcpy(center, meshlet_bounds[unprocessed[i]].center, sizeof(float) * 3); memcpy(center, meshlets[unprocessed[i]].center, sizeof(float) * 3);
float dist = sqrtf((center[0] - first_center[0]) * (center[0] - first_center[0]) + float dist = sqrtf((center[0] - first_center[0]) * (center[0] - first_center[0]) +
(center[1] - first_center[1]) * (center[1] - first_center[1]) + (center[1] - first_center[1]) * (center[1] - first_center[1]) +
@ -255,7 +292,7 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
// Check if we are closer than one of the other candidates // Check if we are closer than one of the other candidates
for (unsigned int j = 0; j < closest_count; ++j) { for (unsigned int j = 0; j < closest_count; ++j) {
uint32_t highest_idx = UINT_MAX; uint32_t highest_idx = UINT_MAX;
float highest_dist = dist; float highest_dist = dist;
if (dist < distances[j]) { if (dist < distances[j]) {
if (distances[j] > highest_dist) { if (distances[j] > highest_dist) {
highest_dist = distances[j]; highest_dist = distances[j];
@ -263,14 +300,14 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
} }
} }
if (highest_idx < RT_ARRAY_COUNT(closest)) { if (highest_idx < RT_ARRAY_COUNT(closest)) {
const uint32_t replaced = highest_idx; const uint32_t replaced = closest[highest_idx];
distances[j] = dist; distances[j] = dist;
closest[j] = i; closest[j] = unprocessed[i];
unprocessed.push_back(replaced); unprocessed.push_back(replaced);
} }
} }
} else { } else {
closest[closest_count] = i; closest[closest_count] = unprocessed[i];
distances[closest_count] = dist; distances[closest_count] = dist;
closest_count++; closest_count++;
unprocessed.erase(unprocessed.begin() + i); unprocessed.erase(unprocessed.begin() + i);
@ -279,6 +316,81 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
// Combine into a new meshlet // Combine into a new meshlet
// vertex *vertices = new vertex[MESHLET_VERTICES * 8]; // vertex *vertices = new vertex[MESHLET_VERTICES * 8];
vertex *in_vertices = new vertex[L0_MESHLET_VERTICES * 8];
uint32_t *in_indices = new uint32_t[L0_MESHLET_INDICES * 8];
memcpy(in_vertices,
meshlets[first].vertices,
sizeof(vertex) * meshlets[first].num_vertices);
size_t at_vert = meshlets[first].num_vertices;
for (unsigned int i = 0; i < meshlets[first].num_indices; ++i)
in_indices[i] = static_cast<uint32_t>(meshlets[first].indices[i]);
size_t at_idx = meshlets[first].num_indices;
for (unsigned int i = 0; i < closest_count; ++i) {
memcpy(&in_vertices[at_vert],
meshlets[closest[i]].vertices,
sizeof(vertex) * meshlets[closest[i]].num_vertices);
at_vert += meshlets[closest[i]].num_vertices;
for (unsigned int j = 0; j < meshlets[closest[i]].num_indices; ++j)
in_indices[at_idx + j] = static_cast<uint32_t>(meshlets[closest[i]].indices[j]);
at_idx += meshlets[closest[i]].num_indices;
}
const float target_error =
0.10f; // Acept 10% error. We are simplifying meshlets, the extents are not very large
const size_t target_index_count = L0_MESHLET_INDICES;
const unsigned int simplify_options = meshopt_SimplifyLockBorder;
const float threshold = 1.f / 8.f;
std::vector<unsigned int> lod(UINT16_MAX);
float lod_error = 0.f;
size_t out_index_count = meshopt_simplify(&lod[0],
in_indices,
at_idx,
&in_vertices[0].vx,
at_vert,
sizeof(vertex),
target_index_count,
target_error,
simplify_options,
&lod_error);
lod.resize(out_index_count);
if (out_index_count < UINT16_MAX) {
rtLog("EXP", "Yay");
} else {
rtReportError("EXP", "Nay");
}
std::unordered_map<unsigned int, uint16_t> index_remap;
uint16_t *index_buffer = new uint16_t[out_index_count];
std::vector<vertex> merged_vertices;
merged_vertices.reserve(out_index_count);
for (size_t i = 0; i < out_index_count; ++i) {
if (index_remap.find(lod[i]) != index_remap.end()) {
index_buffer[i] = index_remap[lod[i]];
} else {
uint16_t index = (uint16_t)merged_vertices.size();
merged_vertices.push_back(in_vertices[lod[i]]);
index_buffer[i] = index;
index_remap[lod[i]] = index;
}
}
meshlet merged = {};
merged.num_children = closest_count + 1;
merged.children[0] = first;
memcpy(merged.children, closest, sizeof(uint32_t) * closest_count);
merged.indices = index_buffer;
merged.num_indices = (uint32_t)out_index_count;
merged.vertices = new vertex[merged_vertices.size()];
merged.num_vertices = (uint32_t)merged_vertices.size();
memcpy(merged.vertices, merged_vertices.data(), sizeof(vertex) * merged_vertices.size());
// Determine center and bounds
meshlets.push_back(merged);
next_level.push_back((unsigned int)meshlets.size() - 1);
} }
return RT_SUCCESS; return RT_SUCCESS;
} }

View File

@ -3,9 +3,9 @@
#include "runtime/runtime.h" #include "runtime/runtime.h"
constexpr size_t MESHLET_VERTICES = 64; constexpr size_t L0_MESHLET_VERTICES = 64;
constexpr size_t MESHLET_TRIANGLES = 124; constexpr size_t L0_MESHLET_TRIANGLES = 124;
constexpr size_t MESHLET_INDICES = MESHLET_TRIANGLES * 3; constexpr size_t L0_MESHLET_INDICES = L0_MESHLET_TRIANGLES * 3;
struct vertex { struct vertex {
float vx, vy, vz; float vx, vy, vz;
@ -13,18 +13,13 @@ struct vertex {
struct meshlet { struct meshlet {
vertex *vertices; vertex *vertices;
uint8_t *indices; uint16_t *indices;
uint32_t num_vertices; uint32_t num_vertices;
uint32_t num_indices; uint32_t num_indices;
float center[3]; float center[3];
float radius; float radius;
// Normal cone for backface culling
float cone_apex[3];
float cone_axis[3];
float cone_cutoff;
// child indices // child indices
uint32_t children[8]; uint32_t children[8];
uint32_t num_children; uint32_t num_children;

View File

@ -108,8 +108,6 @@ struct gpu_mesh_data {
struct gpu_meshlet_data { struct gpu_meshlet_data {
float bounds[4]; // xyz, radius float bounds[4]; // xyz, radius
float cone_apex[4]; // xyz, unused
float cone_axis_cutoff[4];
uint32_t mesh_index; uint32_t mesh_index;
uint32_t first_index; uint32_t first_index;
uint32_t base_vertex; uint32_t base_vertex;
@ -176,9 +174,58 @@ rt_result meshlet_renderer::Initialize() {
m_single_meshlet_shader = shader::CompileSource(_single_vert, _single_frag); m_single_meshlet_shader = shader::CompileSource(_single_vert, _single_frag);
m_flat_cull_shader = shader::CompileFile(_flat_cull); m_flat_cull_shader = shader::CompileFile(_flat_cull);
m_meshlet_shader = shader::CompileSource(_meshlet_vert, _meshlet_frag); m_meshlet_shader = shader::CompileSource(_meshlet_vert, _meshlet_frag);
InitFlat();
return RT_SUCCESS; return RT_SUCCESS;
} }
void OnModelLoaded(meshlet_generator *gen);
void meshlet_renderer::InitFlat(void) {
const size_t MAX_MESHLETS = 65556;
GLuint buffers[6];
glCreateBuffers(RT_ARRAY_COUNT(buffers), buffers);
m_flat.vbo = buffers[0];
m_flat.ebo = buffers[1];
m_flat.meshlet_ssbo = buffers[2];
m_flat.draw_ssbo = buffers[3];
m_flat.cull_ssbo = buffers[4];
m_flat.mesh_ssbo = buffers[5];
// Create the vao
glGenVertexArrays(1, &m_flat.vao);
glBindVertexArray(m_flat.vao);
glBindBuffer(GL_ARRAY_BUFFER, m_flat.vbo);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_flat.ebo);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr);
glEnableVertexAttribArray(0);
glBindVertexArray(0);
// Reserve space
glNamedBufferStorage(m_flat.vbo,
MAX_MESHLETS * sizeof(vertex) * L0_MESHLET_VERTICES,
nullptr,
GL_DYNAMIC_STORAGE_BIT);
glNamedBufferStorage(m_flat.ebo,
MAX_MESHLETS * sizeof(uint16_t) * L0_MESHLET_INDICES,
nullptr,
GL_DYNAMIC_STORAGE_BIT);
glNamedBufferStorage(m_flat.meshlet_ssbo,
MAX_MESHLETS * sizeof(gpu_meshlet_data),
nullptr,
GL_DYNAMIC_STORAGE_BIT);
glNamedBufferStorage(m_flat.draw_ssbo,
MAX_MESHLETS * sizeof(draw_elements_indirect_command),
nullptr,
0);
glNamedBufferStorage(m_flat.cull_ssbo, sizeof(cull_output), nullptr, GL_MAP_READ_BIT | GL_DYNAMIC_STORAGE_BIT);
glNamedBufferStorage(m_flat.mesh_ssbo, sizeof(gpu_mesh_data), nullptr, GL_DYNAMIC_STORAGE_BIT);
}
void meshlet_renderer::RenderFlat(const meshlet *meshlets, unsigned int count) { void meshlet_renderer::RenderFlat(const meshlet *meshlets, unsigned int count) {
if (m_settings.separate_rendering) { if (m_settings.separate_rendering) {
SeparateRendering(meshlets, count); SeparateRendering(meshlets, count);
@ -193,88 +240,56 @@ void meshlet_renderer::DrawIndirectFlat(const meshlet *meshlets, unsigned int co
// Do compute "culling" (generate drawindirect) into 1 ssbo // Do compute "culling" (generate drawindirect) into 1 ssbo
// DrawIndirect // DrawIndirect
GLuint vbo, ebo, meshlet_ssbo, draw_ssbo, cull_ssbo, mesh_ssbo;
GLuint buffers[6];
glCreateBuffers(RT_ARRAY_COUNT(buffers), buffers);
vbo = buffers[0];
ebo = buffers[1];
meshlet_ssbo = buffers[2];
draw_ssbo = buffers[3];
cull_ssbo = buffers[4];
mesh_ssbo = buffers[5];
// Store vertices and indices at idx * MESHLET_VERTICES/MESHLET_INDICES // Store vertices and indices at idx * MESHLET_VERTICES/MESHLET_INDICES
glNamedBufferStorage(vbo,
count * sizeof(vertex) * MESHLET_VERTICES,
nullptr,
GL_DYNAMIC_STORAGE_BIT);
glNamedBufferStorage(ebo, count * MESHLET_INDICES, nullptr, GL_DYNAMIC_STORAGE_BIT);
for (unsigned int i = 0; i < count; ++i) { for (unsigned int i = 0; i < count; ++i) {
glNamedBufferSubData(vbo, glNamedBufferSubData(m_flat.vbo,
i * sizeof(vertex) * MESHLET_VERTICES, i * sizeof(vertex) * L0_MESHLET_VERTICES,
meshlets[i].num_vertices * sizeof(vertex), meshlets[i].num_vertices * sizeof(vertex),
meshlets[i].vertices); meshlets[i].vertices);
glNamedBufferSubData(ebo, glNamedBufferSubData(m_flat.ebo,
i * MESHLET_INDICES, i * L0_MESHLET_INDICES * sizeof(uint16_t),
meshlets[i].num_indices, meshlets[i].num_indices * sizeof(uint16_t),
meshlets[i].indices); meshlets[i].indices);
} }
// Store meshlet information // Store meshlet information
glNamedBufferStorage(meshlet_ssbo,
count * sizeof(gpu_meshlet_data),
nullptr,
GL_DYNAMIC_STORAGE_BIT);
for (unsigned int i = 0; i < count; ++i) { for (unsigned int i = 0; i < count; ++i) {
gpu_meshlet_data meshlet; gpu_meshlet_data meshlet;
// Only have one right now // Only have one right now
meshlet.mesh_index = 0; meshlet.mesh_index = 0;
meshlet.first_index = i * MESHLET_INDICES; meshlet.first_index = i * L0_MESHLET_INDICES;
meshlet.base_vertex = i * MESHLET_VERTICES; meshlet.base_vertex = i * L0_MESHLET_VERTICES;
meshlet.index_count = meshlets[i].num_indices; meshlet.index_count = meshlets[i].num_indices;
memcpy(meshlet.bounds, meshlets[i].center, 3 * sizeof(float)); memcpy(meshlet.bounds, meshlets[i].center, 3 * sizeof(float));
meshlet.bounds[3] = meshlets[i].radius; meshlet.bounds[3] = meshlets[i].radius;
memcpy(meshlet.cone_apex, meshlets[i].cone_apex, 3 * sizeof(float)); glNamedBufferSubData(m_flat.meshlet_ssbo,
meshlet.cone_apex[3] = 0.f;
memcpy(meshlet.cone_axis_cutoff, meshlets[i].cone_axis, sizeof(meshlets[i].cone_axis));
glNamedBufferSubData(meshlet_ssbo,
i * sizeof(gpu_meshlet_data), i * sizeof(gpu_meshlet_data),
sizeof(gpu_meshlet_data), sizeof(gpu_meshlet_data),
&meshlet); &meshlet);
} }
// Reserve space for the draw commands
glNamedBufferStorage(draw_ssbo, count * sizeof(draw_elements_indirect_command), nullptr, 0);
// Prepare culling output // Prepare culling output
cull_output cull_output = {0}; cull_output cull_output = {0};
glNamedBufferStorage(cull_ssbo, sizeof(cull_output), &cull_output, GL_MAP_READ_BIT); glNamedBufferSubData(m_flat.cull_ssbo, 0, sizeof(cull_output), &cull_output);
// Prepare mesh data // Prepare mesh data
gpu_mesh_data mesh_data; gpu_mesh_data mesh_data;
mesh_data.model = glm ::rotate(m_settings.rotation, glm::vec3(0, 1, 0)) * mesh_data.model = glm ::rotate(m_settings.rotation, glm::vec3(0, 1, 0)) *
glm::scale(glm::vec3(m_settings.scale)); glm::scale(glm::vec3(m_settings.scale));
glNamedBufferStorage(mesh_ssbo, sizeof(mesh_data), &mesh_data, GL_DYNAMIC_STORAGE_BIT); glNamedBufferSubData(m_flat.mesh_ssbo, 0, sizeof(mesh_data), &mesh_data);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
// Do culling. TODO: Get number of draws back // Do culling. TODO: Get number of draws back
m_flat_cull_shader.Use(); m_flat_cull_shader.Use();
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, meshlet_ssbo); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_flat.meshlet_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, draw_ssbo); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_flat.draw_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, cull_ssbo); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, m_flat.cull_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, mesh_ssbo); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, m_flat.mesh_ssbo);
glUniform3fv(0, 1, m_settings.eye); glUniform3fv(0, 1, m_settings.eye);
glDispatchCompute(count, 1, 1); glDispatchCompute(count, 1, 1);
// Create the vao
GLuint vao;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr);
glEnableVertexAttribArray(0);
// DrawIndirect // DrawIndirect
GLuint mvploc = glGetUniformLocation(m_meshlet_shader.m_prog, "mvp"); GLuint mvploc = glGetUniformLocation(m_meshlet_shader.m_prog, "mvp");
@ -295,21 +310,20 @@ void meshlet_renderer::DrawIndirectFlat(const meshlet *meshlets, unsigned int co
m_meshlet_shader.Use(); m_meshlet_shader.Use();
glUniformMatrix4fv(mvploc, 1, GL_FALSE, &mvp[0][0]); glUniformMatrix4fv(mvploc, 1, GL_FALSE, &mvp[0][0]);
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, draw_ssbo); glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_flat.draw_ssbo);
glMemoryBarrier(GL_COMMAND_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT); glMemoryBarrier(GL_COMMAND_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT);
{ {
void *out = glMapNamedBuffer(cull_ssbo, GL_READ_ONLY); void *out = glMapNamedBuffer(m_flat.cull_ssbo, GL_READ_ONLY);
memcpy(&cull_output, out, sizeof(cull_output)); memcpy(&cull_output, out, sizeof(cull_output));
glUnmapNamedBuffer(cull_ssbo); glUnmapNamedBuffer(m_flat.cull_ssbo);
} }
glBindVertexArray(m_flat.vao);
glMultiDrawElementsIndirect(GL_TRIANGLES, glMultiDrawElementsIndirect(GL_TRIANGLES,
GL_UNSIGNED_BYTE, GL_UNSIGNED_SHORT,
nullptr, nullptr,
cull_output.draw_count, cull_output.draw_count,
sizeof(draw_elements_indirect_command)); sizeof(draw_elements_indirect_command));
glDeleteBuffers(RT_ARRAY_COUNT(buffers), buffers);
} }
void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int count) { void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int count) {
@ -320,9 +334,9 @@ void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int c
glBindVertexArray(vao); glBindVertexArray(vao);
glBindBuffer(GL_ARRAY_BUFFER, vbo); glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertex) * MESHLET_VERTICES, nullptr, GL_STREAM_DRAW); glBufferData(GL_ARRAY_BUFFER, sizeof(vertex) * L0_MESHLET_VERTICES, nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, MESHLET_INDICES, nullptr, GL_STREAM_DRAW); glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(uint16_t) * L0_MESHLET_INDICES, nullptr, GL_STREAM_DRAW);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr); glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr);
glEnableVertexAttribArray(0); glEnableVertexAttribArray(0);
@ -361,11 +375,11 @@ void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int c
meshlets[i].vertices); meshlets[i].vertices);
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, glBufferSubData(GL_ELEMENT_ARRAY_BUFFER,
0, 0,
sizeof(uint8_t) * meshlets[i].num_indices, sizeof(uint16_t) * meshlets[i].num_indices,
meshlets[i].indices); meshlets[i].indices);
glUniform3fv(colorloc, 1, colors[i % 5]); glUniform3fv(colorloc, 1, colors[i % 5]);
glDrawElements(GL_TRIANGLES, meshlets[i].num_indices, GL_UNSIGNED_BYTE, nullptr); glDrawElements(GL_TRIANGLES, meshlets[i].num_indices, GL_UNSIGNED_SHORT, nullptr);
} }
glDeleteVertexArrays(1, &vao); glDeleteVertexArrays(1, &vao);

View File

@ -24,6 +24,7 @@ struct shader {
}; };
struct meshlet_renderer { struct meshlet_renderer {
public:
struct settings { struct settings {
bool separate_rendering = true; bool separate_rendering = true;
float scale = 1.f; float scale = 1.f;
@ -33,6 +34,18 @@ struct meshlet_renderer {
float fov; float fov;
}; };
private:
struct flat_state {
GLuint vao;
GLuint vbo;
GLuint ebo;
GLuint meshlet_ssbo;
GLuint draw_ssbo;
GLuint cull_ssbo;
GLuint mesh_ssbo;
};
public:
rt_result Initialize(); rt_result Initialize();
void RenderFlat(const meshlet *meshlets, unsigned int count); void RenderFlat(const meshlet *meshlets, unsigned int count);
@ -40,16 +53,21 @@ struct meshlet_renderer {
void SettingMenu(); void SettingMenu();
private: private:
void InitFlat(void);
void SeparateRendering(const meshlet *meshlets, unsigned int count); void SeparateRendering(const meshlet *meshlets, unsigned int count);
void DrawIndirectFlat(const meshlet *meshlets, unsigned int count); void DrawIndirectFlat(const meshlet *meshlets, unsigned int count);
public: public:
settings m_settings; settings m_settings;
float m_aspect;
private:
flat_state m_flat;
shader m_single_meshlet_shader; shader m_single_meshlet_shader;
shader m_flat_cull_shader; shader m_flat_cull_shader;
shader m_meshlet_shader; shader m_meshlet_shader;
float m_aspect;
}; };
#endif #endif