From c254ffee6b9d4d6529baa4bffaf654e1ee637ddb Mon Sep 17 00:00:00 2001 From: Kevin Trogant Date: Sat, 20 Jul 2024 14:15:04 +0200 Subject: [PATCH] start work on meshlet dag algorithm --- src/experimental/meshlets/flat_cull.glsl | 21 +- .../meshlets/hierarchical_cull.glsl | 1 + src/experimental/meshlets/main.cpp | 18 +- .../meshlets/meshlet_generator.cpp | 220 +++++++++++++----- .../meshlets/meshlet_generator.hpp | 13 +- .../meshlets/meshlet_renderer.cpp | 134 ++++++----- .../meshlets/meshlet_renderer.hpp | 20 +- 7 files changed, 284 insertions(+), 143 deletions(-) diff --git a/src/experimental/meshlets/flat_cull.glsl b/src/experimental/meshlets/flat_cull.glsl index 5f0a329..9955d32 100644 --- a/src/experimental/meshlets/flat_cull.glsl +++ b/src/experimental/meshlets/flat_cull.glsl @@ -7,8 +7,6 @@ struct mesh_data { struct meshlet_data { vec4 bounds; // xyz, radius - vec4 cone_apex; // xyz, unused - vec4 cone_axis_cutoff; // xyz, cutoff = w uint mesh_index; uint first_index; uint base_vertex; @@ -44,17 +42,10 @@ void main() { meshlet_data meshlet = meshlets[meshlet_idx]; mesh_data mesh = meshes[meshlet.mesh_index]; - vec3 cone_axis = (mesh.model * vec4(meshlet.cone_axis_cutoff.xyz, 0)).xyz; - vec3 cone_apex = (mesh.model * vec4(meshlet.cone_apex.xyz, 1)).xyz; - float cone_cutoff = meshlet.cone_axis_cutoff.w; - - - if (dot(normalize(cone_apex - camera_pos), cone_axis) < cone_cutoff) { - uint draw_idx = atomicAdd(draw_count, 1); - draw_commands[draw_idx].count = meshlet.index_count; - draw_commands[draw_idx].instance_count = 1; - draw_commands[draw_idx].first_index = meshlet.first_index; - draw_commands[draw_idx].base_vertex = int(meshlet.base_vertex); - draw_commands[draw_idx].base_instance = 0; - } + uint draw_idx = atomicAdd(draw_count, 1); + draw_commands[draw_idx].count = meshlet.index_count; + draw_commands[draw_idx].instance_count = 1; + draw_commands[draw_idx].first_index = meshlet.first_index; + draw_commands[draw_idx].base_vertex = int(meshlet.base_vertex); + draw_commands[draw_idx].base_instance = 0; } \ No newline at end of file diff --git a/src/experimental/meshlets/hierarchical_cull.glsl b/src/experimental/meshlets/hierarchical_cull.glsl index e69de29..991aa1a 100644 --- a/src/experimental/meshlets/hierarchical_cull.glsl +++ b/src/experimental/meshlets/hierarchical_cull.glsl @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/src/experimental/meshlets/main.cpp b/src/experimental/meshlets/main.cpp index a07ece7..76347ab 100644 --- a/src/experimental/meshlets/main.cpp +++ b/src/experimental/meshlets/main.cpp @@ -112,7 +112,7 @@ struct file_picker { } /* Returns true if a new file was selected */ - bool RunFlat() { + bool Run() { bool opened=false; if (ImGui::Begin("File Selection", &m_open)) { ImGui::InputTextWithHint("Path", @@ -125,6 +125,7 @@ struct file_picker { strcpy(m_input_buf[m_active_input], m_picked); opened = true; } + ImGui::Checkbox("Flat", &m_flat); ImGui::End(); } return opened; @@ -138,6 +139,7 @@ struct file_picker { return m_picked != nullptr; } + bool m_flat = true; const char *m_picked=nullptr; bool m_open = true; char m_input_buf[2][260]; @@ -191,15 +193,22 @@ int main() { meshlet_renderer ren; ren.Initialize(); + bool flat = true; + uint32_t root_idx; + while (!glfwWindowShouldClose(window)) { glfwPollEvents(); ImGui_ImplOpenGL3_NewFrame(); ImGui_ImplGlfw_NewFrame(); ImGui::NewFrame(); - if (picker.RunFlat()) { + if (picker.Run()) { gen.LoadObj(picker.GetPicked()); - gen.RunFlat(); + flat = picker.m_flat; + if (flat) + gen.RunFlat(); + else + gen.RunHierarchical(0,&root_idx); } ren.SettingMenu(); @@ -212,7 +221,8 @@ int main() { if (picker.HasPickedFile()) { ren.m_aspect = (float)display_w / (float)display_h; - ren.RenderFlat(gen.m_meshlets, gen.m_num_meshlets); + if (flat) + ren.RenderFlat(gen.m_meshlets, gen.m_num_meshlets); } ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData()); diff --git a/src/experimental/meshlets/meshlet_generator.cpp b/src/experimental/meshlets/meshlet_generator.cpp index c98d0de..8c79eee 100644 --- a/src/experimental/meshlets/meshlet_generator.cpp +++ b/src/experimental/meshlets/meshlet_generator.cpp @@ -5,6 +5,9 @@ #include #pragma warning(pop) +#include +#include + #include "meshlet_generator.hpp" #include @@ -118,13 +121,13 @@ rt_result meshlet_generator::RunFlat(uint32_t mesh_idx) { rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices, - MESHLET_VERTICES, - MESHLET_TRIANGLES); + L0_MESHLET_VERTICES, + L0_MESHLET_TRIANGLES); meshopt_Meshlet *meshlets = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets); unsigned int *meshlet_vertices = - RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *MESHLET_VERTICES); + RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *L0_MESHLET_VERTICES); unsigned char *meshlet_triangles = - RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *MESHLET_INDICES); + RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *L0_MESHLET_INDICES); size_t meshlet_count = meshopt_buildMeshlets(meshlets, meshlet_vertices, @@ -134,8 +137,8 @@ rt_result meshlet_generator::RunFlat(uint32_t mesh_idx) { &m_meshes[mesh_idx].vertices[0].vx, m_meshes[mesh_idx].num_vertices, sizeof(vertex), - MESHLET_VERTICES, - MESHLET_TRIANGLES, + L0_MESHLET_VERTICES, + L0_MESHLET_TRIANGLES, cone_weight); m_meshlets = new meshlet[meshlet_count]; @@ -155,24 +158,26 @@ rt_result meshlet_generator::RunFlat(uint32_t mesh_idx) { m_meshes[mesh_idx].num_vertices, sizeof(vertex)); - m_meshlets[i].vertices = new vertex[MESHLET_VERTICES]; - m_meshlets[i].indices = new uint8_t[MESHLET_INDICES]; + m_meshlets[i].vertices = new vertex[L0_MESHLET_VERTICES]; + m_meshlets[i].indices = new uint16_t[L0_MESHLET_INDICES]; m_meshlets[i].num_vertices = meshlets[i].vertex_count; m_meshlets[i].num_indices = meshlets[i].triangle_count * 3; memcpy(m_meshlets[i].center, bounds.center, sizeof(bounds.center)); m_meshlets[i].radius = bounds.radius; - memcpy(m_meshlets[i].cone_axis, bounds.cone_axis, sizeof(bounds.cone_axis)); - m_meshlets[i].cone_cutoff = bounds.cone_cutoff; - memcpy(m_meshlets[i].cone_apex, bounds.cone_apex, sizeof(bounds.cone_apex)); for (unsigned int vert_idx = 0; vert_idx < meshlets[i].vertex_count; ++vert_idx) { unsigned int vert = meshlet_vertices[meshlets[i].vertex_offset + vert_idx]; m_meshlets[i].vertices[vert_idx] = m_meshes[mesh_idx].vertices[vert]; } - memcpy(m_meshlets[i].indices, - meshlet_triangles + meshlets[i].triangle_offset, - meshlets[i].triangle_count * 3); + for (unsigned int tri_idx = 0; tri_idx < meshlets[i].triangle_count; ++tri_idx) { + m_meshlets[i].indices[tri_idx * 3 + 0] = + (uint16_t)meshlet_triangles[meshlets[i].triangle_offset + 3 * tri_idx + 0]; + m_meshlets[i].indices[tri_idx * 3 + 1] = + (uint16_t)meshlet_triangles[meshlets[i].triangle_offset + 3 * tri_idx + 1]; + m_meshlets[i].indices[tri_idx * 3 + 2] = + (uint16_t)meshlet_triangles[meshlets[i].triangle_offset + 3 * tri_idx + 2]; + } m_meshlets[i].num_children = 0u; } @@ -184,49 +189,81 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); - size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices, - MESHLET_VERTICES, - MESHLET_TRIANGLES); - meshopt_Meshlet *meshlets = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets); - unsigned int *meshlet_vertices = - RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *MESHLET_VERTICES); - unsigned char *meshlet_triangles = - RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *MESHLET_INDICES); - meshopt_Bounds *meshlet_bounds = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Bounds, max_meshlets); + // Convert meshopt meshlets into our meshlets, each with its own vertex and index buffer + std::vector meshlets; + { + size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices, + L0_MESHLET_VERTICES, + L0_MESHLET_TRIANGLES); + meshopt_Meshlet *meshopt_meshlets = + RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets); + unsigned int *meshlet_vertices = + RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *L0_MESHLET_VERTICES); + unsigned char *meshlet_triangles = + RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *L0_MESHLET_INDICES); - uint32_t meshlet_count = (uint32_t)meshopt_buildMeshlets(meshlets, - meshlet_vertices, - meshlet_triangles, - m_meshes[mesh_idx].indices, - m_meshes[mesh_idx].num_indices, - &m_meshes[mesh_idx].vertices[0].vx, - m_meshes[mesh_idx].num_vertices, - sizeof(vertex), - MESHLET_VERTICES, - MESHLET_TRIANGLES, - cone_weight); + uint32_t meshlet_count = (uint32_t)meshopt_buildMeshlets(meshopt_meshlets, + meshlet_vertices, + meshlet_triangles, + m_meshes[mesh_idx].indices, + m_meshes[mesh_idx].num_indices, + &m_meshes[mesh_idx].vertices[0].vx, + m_meshes[mesh_idx].num_vertices, + sizeof(vertex), + L0_MESHLET_VERTICES, + L0_MESHLET_TRIANGLES, + cone_weight); - for (size_t i = 0; i < meshlet_count; ++i) { - meshopt_optimizeMeshlet(&meshlet_vertices[meshlets[i].vertex_offset], - &meshlet_triangles[meshlets[i].triangle_offset], - meshlets[i].triangle_count, - meshlets[i].vertex_count); + for (size_t i = 0; i < meshlet_count; ++i) { + meshopt_optimizeMeshlet(&meshlet_vertices[meshopt_meshlets[i].vertex_offset], + &meshlet_triangles[meshopt_meshlets[i].triangle_offset], + meshopt_meshlets[i].triangle_count, + meshopt_meshlets[i].vertex_count); - meshlet_bounds[i] = - meshopt_computeMeshletBounds(&meshlet_vertices[meshlets[i].vertex_offset], - &meshlet_triangles[meshlets[i].triangle_offset], - meshlets[i].triangle_count, - &m_meshes[mesh_idx].vertices[0].vx, - m_meshes[mesh_idx].num_vertices, - sizeof(vertex)); + meshopt_Bounds bounds = meshopt_computeMeshletBounds( + &meshlet_vertices[meshopt_meshlets[i].vertex_offset], + &meshlet_triangles[meshopt_meshlets[i].triangle_offset], + meshopt_meshlets[i].triangle_count, + &m_meshes[mesh_idx].vertices[0].vx, + m_meshes[mesh_idx].num_vertices, + sizeof(vertex)); + + meshlet meshlet; + meshlet.vertices = new vertex[L0_MESHLET_VERTICES]; + meshlet.indices = new uint16_t[L0_MESHLET_INDICES]; + meshlet.num_vertices = meshopt_meshlets[i].vertex_count; + meshlet.num_indices = meshopt_meshlets[i].triangle_count * 3; + memcpy(meshlet.center, bounds.center, sizeof(bounds.center)); + meshlet.radius = bounds.radius; + + for (unsigned int vert_idx = 0; vert_idx < meshopt_meshlets[i].vertex_count; + ++vert_idx) { + unsigned int vert = meshlet_vertices[meshopt_meshlets[i].vertex_offset + vert_idx]; + meshlet.vertices[vert_idx] = m_meshes[mesh_idx].vertices[vert]; + } + + for (unsigned int tri_idx = 0; tri_idx < meshopt_meshlets[i].triangle_count; + ++tri_idx) { + meshlet.indices[tri_idx * 3 + 0] = (uint16_t) + meshlet_triangles[meshopt_meshlets[i].triangle_offset + 3 * tri_idx + 0]; + meshlet.indices[tri_idx * 3 + 1] = (uint16_t) + meshlet_triangles[meshopt_meshlets[i].triangle_offset + 3 * tri_idx + 1]; + meshlet.indices[tri_idx * 3 + 2] = (uint16_t) + meshlet_triangles[meshopt_meshlets[i].triangle_offset + 3 * tri_idx + 2]; + } + + meshlet.num_children = 0u; + + meshlets.push_back(meshlet); + } } // We now have a flat list of meshlets -> the highest lod ones // We now combine (up to 8) meshlets into one to generate the next hierarchy level // Repeat until we only have 1 meshlet left std::vector unprocessed; - unprocessed.reserve(meshlet_count); - for (uint32_t i = 0; i < meshlet_count; ++i) { + unprocessed.reserve(meshlets.size()); + for (uint32_t i = 0; i < meshlets.size(); ++i) { unprocessed.push_back(i); } std::vector next_level; @@ -241,11 +278,11 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro unsigned int closest_count = 0u; float first_center[3]; - memcpy(first_center, meshlet_bounds[first].center, sizeof(float) * 3); + memcpy(first_center, meshlets[first].center, sizeof(float) * 3); for (uint32_t i = 0; i < unprocessed.size(); ++i) { float center[3]; - memcpy(center, meshlet_bounds[unprocessed[i]].center, sizeof(float) * 3); + memcpy(center, meshlets[unprocessed[i]].center, sizeof(float) * 3); float dist = sqrtf((center[0] - first_center[0]) * (center[0] - first_center[0]) + (center[1] - first_center[1]) * (center[1] - first_center[1]) + @@ -255,7 +292,7 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro // Check if we are closer than one of the other candidates for (unsigned int j = 0; j < closest_count; ++j) { uint32_t highest_idx = UINT_MAX; - float highest_dist = dist; + float highest_dist = dist; if (dist < distances[j]) { if (distances[j] > highest_dist) { highest_dist = distances[j]; @@ -263,14 +300,14 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro } } if (highest_idx < RT_ARRAY_COUNT(closest)) { - const uint32_t replaced = highest_idx; + const uint32_t replaced = closest[highest_idx]; distances[j] = dist; - closest[j] = i; + closest[j] = unprocessed[i]; unprocessed.push_back(replaced); } } } else { - closest[closest_count] = i; + closest[closest_count] = unprocessed[i]; distances[closest_count] = dist; closest_count++; unprocessed.erase(unprocessed.begin() + i); @@ -279,6 +316,81 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro // Combine into a new meshlet // vertex *vertices = new vertex[MESHLET_VERTICES * 8]; + + vertex *in_vertices = new vertex[L0_MESHLET_VERTICES * 8]; + uint32_t *in_indices = new uint32_t[L0_MESHLET_INDICES * 8]; + memcpy(in_vertices, + meshlets[first].vertices, + sizeof(vertex) * meshlets[first].num_vertices); + size_t at_vert = meshlets[first].num_vertices; + for (unsigned int i = 0; i < meshlets[first].num_indices; ++i) + in_indices[i] = static_cast(meshlets[first].indices[i]); + size_t at_idx = meshlets[first].num_indices; + for (unsigned int i = 0; i < closest_count; ++i) { + memcpy(&in_vertices[at_vert], + meshlets[closest[i]].vertices, + sizeof(vertex) * meshlets[closest[i]].num_vertices); + at_vert += meshlets[closest[i]].num_vertices; + + for (unsigned int j = 0; j < meshlets[closest[i]].num_indices; ++j) + in_indices[at_idx + j] = static_cast(meshlets[closest[i]].indices[j]); + at_idx += meshlets[closest[i]].num_indices; + } + + const float target_error = + 0.10f; // Acept 10% error. We are simplifying meshlets, the extents are not very large + const size_t target_index_count = L0_MESHLET_INDICES; + const unsigned int simplify_options = meshopt_SimplifyLockBorder; + const float threshold = 1.f / 8.f; + std::vector lod(UINT16_MAX); + float lod_error = 0.f; + size_t out_index_count = meshopt_simplify(&lod[0], + in_indices, + at_idx, + &in_vertices[0].vx, + at_vert, + sizeof(vertex), + target_index_count, + target_error, + simplify_options, + &lod_error); + lod.resize(out_index_count); + if (out_index_count < UINT16_MAX) { + rtLog("EXP", "Yay"); + } else { + rtReportError("EXP", "Nay"); + } + + std::unordered_map index_remap; + uint16_t *index_buffer = new uint16_t[out_index_count]; + std::vector merged_vertices; + merged_vertices.reserve(out_index_count); + for (size_t i = 0; i < out_index_count; ++i) { + if (index_remap.find(lod[i]) != index_remap.end()) { + index_buffer[i] = index_remap[lod[i]]; + } else { + uint16_t index = (uint16_t)merged_vertices.size(); + merged_vertices.push_back(in_vertices[lod[i]]); + index_buffer[i] = index; + index_remap[lod[i]] = index; + } + } + + meshlet merged = {}; + merged.num_children = closest_count + 1; + merged.children[0] = first; + memcpy(merged.children, closest, sizeof(uint32_t) * closest_count); + merged.indices = index_buffer; + merged.num_indices = (uint32_t)out_index_count; + merged.vertices = new vertex[merged_vertices.size()]; + merged.num_vertices = (uint32_t)merged_vertices.size(); + memcpy(merged.vertices, merged_vertices.data(), sizeof(vertex) * merged_vertices.size()); + + // Determine center and bounds + + + meshlets.push_back(merged); + next_level.push_back((unsigned int)meshlets.size() - 1); } return RT_SUCCESS; } \ No newline at end of file diff --git a/src/experimental/meshlets/meshlet_generator.hpp b/src/experimental/meshlets/meshlet_generator.hpp index 467a7e7..a9c2c33 100644 --- a/src/experimental/meshlets/meshlet_generator.hpp +++ b/src/experimental/meshlets/meshlet_generator.hpp @@ -3,9 +3,9 @@ #include "runtime/runtime.h" -constexpr size_t MESHLET_VERTICES = 64; -constexpr size_t MESHLET_TRIANGLES = 124; -constexpr size_t MESHLET_INDICES = MESHLET_TRIANGLES * 3; +constexpr size_t L0_MESHLET_VERTICES = 64; +constexpr size_t L0_MESHLET_TRIANGLES = 124; +constexpr size_t L0_MESHLET_INDICES = L0_MESHLET_TRIANGLES * 3; struct vertex { float vx, vy, vz; @@ -13,18 +13,13 @@ struct vertex { struct meshlet { vertex *vertices; - uint8_t *indices; + uint16_t *indices; uint32_t num_vertices; uint32_t num_indices; float center[3]; float radius; - // Normal cone for backface culling - float cone_apex[3]; - float cone_axis[3]; - float cone_cutoff; - // child indices uint32_t children[8]; uint32_t num_children; diff --git a/src/experimental/meshlets/meshlet_renderer.cpp b/src/experimental/meshlets/meshlet_renderer.cpp index 27620da..b57e5cd 100644 --- a/src/experimental/meshlets/meshlet_renderer.cpp +++ b/src/experimental/meshlets/meshlet_renderer.cpp @@ -108,8 +108,6 @@ struct gpu_mesh_data { struct gpu_meshlet_data { float bounds[4]; // xyz, radius - float cone_apex[4]; // xyz, unused - float cone_axis_cutoff[4]; uint32_t mesh_index; uint32_t first_index; uint32_t base_vertex; @@ -176,9 +174,58 @@ rt_result meshlet_renderer::Initialize() { m_single_meshlet_shader = shader::CompileSource(_single_vert, _single_frag); m_flat_cull_shader = shader::CompileFile(_flat_cull); m_meshlet_shader = shader::CompileSource(_meshlet_vert, _meshlet_frag); + + InitFlat(); + return RT_SUCCESS; } +void OnModelLoaded(meshlet_generator *gen); + +void meshlet_renderer::InitFlat(void) { + const size_t MAX_MESHLETS = 65556; + + GLuint buffers[6]; + glCreateBuffers(RT_ARRAY_COUNT(buffers), buffers); + m_flat.vbo = buffers[0]; + m_flat.ebo = buffers[1]; + m_flat.meshlet_ssbo = buffers[2]; + m_flat.draw_ssbo = buffers[3]; + m_flat.cull_ssbo = buffers[4]; + m_flat.mesh_ssbo = buffers[5]; + + // Create the vao + glGenVertexArrays(1, &m_flat.vao); + glBindVertexArray(m_flat.vao); + glBindBuffer(GL_ARRAY_BUFFER, m_flat.vbo); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_flat.ebo); + glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr); + glEnableVertexAttribArray(0); + glBindVertexArray(0); + + // Reserve space + glNamedBufferStorage(m_flat.vbo, + MAX_MESHLETS * sizeof(vertex) * L0_MESHLET_VERTICES, + nullptr, + GL_DYNAMIC_STORAGE_BIT); + glNamedBufferStorage(m_flat.ebo, + MAX_MESHLETS * sizeof(uint16_t) * L0_MESHLET_INDICES, + nullptr, + GL_DYNAMIC_STORAGE_BIT); + glNamedBufferStorage(m_flat.meshlet_ssbo, + MAX_MESHLETS * sizeof(gpu_meshlet_data), + nullptr, + GL_DYNAMIC_STORAGE_BIT); + glNamedBufferStorage(m_flat.draw_ssbo, + MAX_MESHLETS * sizeof(draw_elements_indirect_command), + nullptr, + 0); + glNamedBufferStorage(m_flat.cull_ssbo, sizeof(cull_output), nullptr, GL_MAP_READ_BIT | GL_DYNAMIC_STORAGE_BIT); + + glNamedBufferStorage(m_flat.mesh_ssbo, sizeof(gpu_mesh_data), nullptr, GL_DYNAMIC_STORAGE_BIT); +} + + void meshlet_renderer::RenderFlat(const meshlet *meshlets, unsigned int count) { if (m_settings.separate_rendering) { SeparateRendering(meshlets, count); @@ -193,88 +240,56 @@ void meshlet_renderer::DrawIndirectFlat(const meshlet *meshlets, unsigned int co // Do compute "culling" (generate drawindirect) into 1 ssbo // DrawIndirect - GLuint vbo, ebo, meshlet_ssbo, draw_ssbo, cull_ssbo, mesh_ssbo; - GLuint buffers[6]; - glCreateBuffers(RT_ARRAY_COUNT(buffers), buffers); - vbo = buffers[0]; - ebo = buffers[1]; - meshlet_ssbo = buffers[2]; - draw_ssbo = buffers[3]; - cull_ssbo = buffers[4]; - mesh_ssbo = buffers[5]; - // Store vertices and indices at idx * MESHLET_VERTICES/MESHLET_INDICES - glNamedBufferStorage(vbo, - count * sizeof(vertex) * MESHLET_VERTICES, - nullptr, - GL_DYNAMIC_STORAGE_BIT); - glNamedBufferStorage(ebo, count * MESHLET_INDICES, nullptr, GL_DYNAMIC_STORAGE_BIT); + for (unsigned int i = 0; i < count; ++i) { - glNamedBufferSubData(vbo, - i * sizeof(vertex) * MESHLET_VERTICES, + glNamedBufferSubData(m_flat.vbo, + i * sizeof(vertex) * L0_MESHLET_VERTICES, meshlets[i].num_vertices * sizeof(vertex), meshlets[i].vertices); - glNamedBufferSubData(ebo, - i * MESHLET_INDICES, - meshlets[i].num_indices, + glNamedBufferSubData(m_flat.ebo, + i * L0_MESHLET_INDICES * sizeof(uint16_t), + meshlets[i].num_indices * sizeof(uint16_t), meshlets[i].indices); } // Store meshlet information - glNamedBufferStorage(meshlet_ssbo, - count * sizeof(gpu_meshlet_data), - nullptr, - GL_DYNAMIC_STORAGE_BIT); for (unsigned int i = 0; i < count; ++i) { gpu_meshlet_data meshlet; // Only have one right now meshlet.mesh_index = 0; - meshlet.first_index = i * MESHLET_INDICES; - meshlet.base_vertex = i * MESHLET_VERTICES; + meshlet.first_index = i * L0_MESHLET_INDICES; + meshlet.base_vertex = i * L0_MESHLET_VERTICES; meshlet.index_count = meshlets[i].num_indices; memcpy(meshlet.bounds, meshlets[i].center, 3 * sizeof(float)); meshlet.bounds[3] = meshlets[i].radius; - memcpy(meshlet.cone_apex, meshlets[i].cone_apex, 3 * sizeof(float)); - meshlet.cone_apex[3] = 0.f; - memcpy(meshlet.cone_axis_cutoff, meshlets[i].cone_axis, sizeof(meshlets[i].cone_axis)); - glNamedBufferSubData(meshlet_ssbo, + glNamedBufferSubData(m_flat.meshlet_ssbo, i * sizeof(gpu_meshlet_data), sizeof(gpu_meshlet_data), &meshlet); } - // Reserve space for the draw commands - glNamedBufferStorage(draw_ssbo, count * sizeof(draw_elements_indirect_command), nullptr, 0); - // Prepare culling output cull_output cull_output = {0}; - glNamedBufferStorage(cull_ssbo, sizeof(cull_output), &cull_output, GL_MAP_READ_BIT); - + glNamedBufferSubData(m_flat.cull_ssbo, 0, sizeof(cull_output), &cull_output); + // Prepare mesh data gpu_mesh_data mesh_data; mesh_data.model = glm ::rotate(m_settings.rotation, glm::vec3(0, 1, 0)) * glm::scale(glm::vec3(m_settings.scale)); - glNamedBufferStorage(mesh_ssbo, sizeof(mesh_data), &mesh_data, GL_DYNAMIC_STORAGE_BIT); + glNamedBufferSubData(m_flat.mesh_ssbo, 0, sizeof(mesh_data), &mesh_data); glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); // Do culling. TODO: Get number of draws back m_flat_cull_shader.Use(); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, meshlet_ssbo); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, draw_ssbo); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, cull_ssbo); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, mesh_ssbo); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_flat.meshlet_ssbo); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_flat.draw_ssbo); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, m_flat.cull_ssbo); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, m_flat.mesh_ssbo); glUniform3fv(0, 1, m_settings.eye); glDispatchCompute(count, 1, 1); - // Create the vao - GLuint vao; - glGenVertexArrays(1, &vao); - glBindVertexArray(vao); - glBindBuffer(GL_ARRAY_BUFFER, vbo); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo); - glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr); - glEnableVertexAttribArray(0); // DrawIndirect GLuint mvploc = glGetUniformLocation(m_meshlet_shader.m_prog, "mvp"); @@ -295,21 +310,20 @@ void meshlet_renderer::DrawIndirectFlat(const meshlet *meshlets, unsigned int co m_meshlet_shader.Use(); glUniformMatrix4fv(mvploc, 1, GL_FALSE, &mvp[0][0]); - glBindBuffer(GL_DRAW_INDIRECT_BUFFER, draw_ssbo); + glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_flat.draw_ssbo); glMemoryBarrier(GL_COMMAND_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT); { - void *out = glMapNamedBuffer(cull_ssbo, GL_READ_ONLY); + void *out = glMapNamedBuffer(m_flat.cull_ssbo, GL_READ_ONLY); memcpy(&cull_output, out, sizeof(cull_output)); - glUnmapNamedBuffer(cull_ssbo); + glUnmapNamedBuffer(m_flat.cull_ssbo); } + glBindVertexArray(m_flat.vao); glMultiDrawElementsIndirect(GL_TRIANGLES, - GL_UNSIGNED_BYTE, + GL_UNSIGNED_SHORT, nullptr, cull_output.draw_count, sizeof(draw_elements_indirect_command)); - - glDeleteBuffers(RT_ARRAY_COUNT(buffers), buffers); } void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int count) { @@ -320,9 +334,9 @@ void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int c glBindVertexArray(vao); glBindBuffer(GL_ARRAY_BUFFER, vbo); - glBufferData(GL_ARRAY_BUFFER, sizeof(vertex) * MESHLET_VERTICES, nullptr, GL_STREAM_DRAW); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertex) * L0_MESHLET_VERTICES, nullptr, GL_STREAM_DRAW); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo); - glBufferData(GL_ELEMENT_ARRAY_BUFFER, MESHLET_INDICES, nullptr, GL_STREAM_DRAW); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(uint16_t) * L0_MESHLET_INDICES, nullptr, GL_STREAM_DRAW); glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr); glEnableVertexAttribArray(0); @@ -361,11 +375,11 @@ void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int c meshlets[i].vertices); glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, - sizeof(uint8_t) * meshlets[i].num_indices, + sizeof(uint16_t) * meshlets[i].num_indices, meshlets[i].indices); glUniform3fv(colorloc, 1, colors[i % 5]); - glDrawElements(GL_TRIANGLES, meshlets[i].num_indices, GL_UNSIGNED_BYTE, nullptr); + glDrawElements(GL_TRIANGLES, meshlets[i].num_indices, GL_UNSIGNED_SHORT, nullptr); } glDeleteVertexArrays(1, &vao); diff --git a/src/experimental/meshlets/meshlet_renderer.hpp b/src/experimental/meshlets/meshlet_renderer.hpp index 8381a64..953f948 100644 --- a/src/experimental/meshlets/meshlet_renderer.hpp +++ b/src/experimental/meshlets/meshlet_renderer.hpp @@ -24,6 +24,7 @@ struct shader { }; struct meshlet_renderer { + public: struct settings { bool separate_rendering = true; float scale = 1.f; @@ -33,6 +34,18 @@ struct meshlet_renderer { float fov; }; + private: + struct flat_state { + GLuint vao; + GLuint vbo; + GLuint ebo; + GLuint meshlet_ssbo; + GLuint draw_ssbo; + GLuint cull_ssbo; + GLuint mesh_ssbo; + }; + + public: rt_result Initialize(); void RenderFlat(const meshlet *meshlets, unsigned int count); @@ -40,16 +53,21 @@ struct meshlet_renderer { void SettingMenu(); private: + void InitFlat(void); + void SeparateRendering(const meshlet *meshlets, unsigned int count); void DrawIndirectFlat(const meshlet *meshlets, unsigned int count); public: settings m_settings; + float m_aspect; + + private: + flat_state m_flat; shader m_single_meshlet_shader; shader m_flat_cull_shader; shader m_meshlet_shader; - float m_aspect; }; #endif