start work on meshlet dag algorithm
All checks were successful
Ubuntu Cross to Win64 / Cross Compile with ming64 (1.4.0, ubuntu-latest) (push) Successful in 1m40s

This commit is contained in:
Kevin Trogant 2024-07-20 14:15:04 +02:00
parent 9f94ac40ee
commit c254ffee6b
7 changed files with 284 additions and 143 deletions

View File

@ -7,8 +7,6 @@ struct mesh_data {
struct meshlet_data {
vec4 bounds; // xyz, radius
vec4 cone_apex; // xyz, unused
vec4 cone_axis_cutoff; // xyz, cutoff = w
uint mesh_index;
uint first_index;
uint base_vertex;
@ -44,17 +42,10 @@ void main() {
meshlet_data meshlet = meshlets[meshlet_idx];
mesh_data mesh = meshes[meshlet.mesh_index];
vec3 cone_axis = (mesh.model * vec4(meshlet.cone_axis_cutoff.xyz, 0)).xyz;
vec3 cone_apex = (mesh.model * vec4(meshlet.cone_apex.xyz, 1)).xyz;
float cone_cutoff = meshlet.cone_axis_cutoff.w;
if (dot(normalize(cone_apex - camera_pos), cone_axis) < cone_cutoff) {
uint draw_idx = atomicAdd(draw_count, 1);
draw_commands[draw_idx].count = meshlet.index_count;
draw_commands[draw_idx].instance_count = 1;
draw_commands[draw_idx].first_index = meshlet.first_index;
draw_commands[draw_idx].base_vertex = int(meshlet.base_vertex);
draw_commands[draw_idx].base_instance = 0;
}
uint draw_idx = atomicAdd(draw_count, 1);
draw_commands[draw_idx].count = meshlet.index_count;
draw_commands[draw_idx].instance_count = 1;
draw_commands[draw_idx].first_index = meshlet.first_index;
draw_commands[draw_idx].base_vertex = int(meshlet.base_vertex);
draw_commands[draw_idx].base_instance = 0;
}

View File

@ -112,7 +112,7 @@ struct file_picker {
}
/* Returns true if a new file was selected */
bool RunFlat() {
bool Run() {
bool opened=false;
if (ImGui::Begin("File Selection", &m_open)) {
ImGui::InputTextWithHint("Path",
@ -125,6 +125,7 @@ struct file_picker {
strcpy(m_input_buf[m_active_input], m_picked);
opened = true;
}
ImGui::Checkbox("Flat", &m_flat);
ImGui::End();
}
return opened;
@ -138,6 +139,7 @@ struct file_picker {
return m_picked != nullptr;
}
bool m_flat = true;
const char *m_picked=nullptr;
bool m_open = true;
char m_input_buf[2][260];
@ -191,15 +193,22 @@ int main() {
meshlet_renderer ren;
ren.Initialize();
bool flat = true;
uint32_t root_idx;
while (!glfwWindowShouldClose(window)) {
glfwPollEvents();
ImGui_ImplOpenGL3_NewFrame();
ImGui_ImplGlfw_NewFrame();
ImGui::NewFrame();
if (picker.RunFlat()) {
if (picker.Run()) {
gen.LoadObj(picker.GetPicked());
gen.RunFlat();
flat = picker.m_flat;
if (flat)
gen.RunFlat();
else
gen.RunHierarchical(0,&root_idx);
}
ren.SettingMenu();
@ -212,7 +221,8 @@ int main() {
if (picker.HasPickedFile()) {
ren.m_aspect = (float)display_w / (float)display_h;
ren.RenderFlat(gen.m_meshlets, gen.m_num_meshlets);
if (flat)
ren.RenderFlat(gen.m_meshlets, gen.m_num_meshlets);
}
ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());

View File

@ -5,6 +5,9 @@
#include <tiny_obj_loader.h>
#pragma warning(pop)
#include <unordered_map>
#include <vector>
#include "meshlet_generator.hpp"
#include <runtime/mem_arena.h>
@ -118,13 +121,13 @@ rt_result meshlet_generator::RunFlat(uint32_t mesh_idx) {
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices,
MESHLET_VERTICES,
MESHLET_TRIANGLES);
L0_MESHLET_VERTICES,
L0_MESHLET_TRIANGLES);
meshopt_Meshlet *meshlets = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets);
unsigned int *meshlet_vertices =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *MESHLET_VERTICES);
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *L0_MESHLET_VERTICES);
unsigned char *meshlet_triangles =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *MESHLET_INDICES);
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *L0_MESHLET_INDICES);
size_t meshlet_count = meshopt_buildMeshlets(meshlets,
meshlet_vertices,
@ -134,8 +137,8 @@ rt_result meshlet_generator::RunFlat(uint32_t mesh_idx) {
&m_meshes[mesh_idx].vertices[0].vx,
m_meshes[mesh_idx].num_vertices,
sizeof(vertex),
MESHLET_VERTICES,
MESHLET_TRIANGLES,
L0_MESHLET_VERTICES,
L0_MESHLET_TRIANGLES,
cone_weight);
m_meshlets = new meshlet[meshlet_count];
@ -155,24 +158,26 @@ rt_result meshlet_generator::RunFlat(uint32_t mesh_idx) {
m_meshes[mesh_idx].num_vertices,
sizeof(vertex));
m_meshlets[i].vertices = new vertex[MESHLET_VERTICES];
m_meshlets[i].indices = new uint8_t[MESHLET_INDICES];
m_meshlets[i].vertices = new vertex[L0_MESHLET_VERTICES];
m_meshlets[i].indices = new uint16_t[L0_MESHLET_INDICES];
m_meshlets[i].num_vertices = meshlets[i].vertex_count;
m_meshlets[i].num_indices = meshlets[i].triangle_count * 3;
memcpy(m_meshlets[i].center, bounds.center, sizeof(bounds.center));
m_meshlets[i].radius = bounds.radius;
memcpy(m_meshlets[i].cone_axis, bounds.cone_axis, sizeof(bounds.cone_axis));
m_meshlets[i].cone_cutoff = bounds.cone_cutoff;
memcpy(m_meshlets[i].cone_apex, bounds.cone_apex, sizeof(bounds.cone_apex));
for (unsigned int vert_idx = 0; vert_idx < meshlets[i].vertex_count; ++vert_idx) {
unsigned int vert = meshlet_vertices[meshlets[i].vertex_offset + vert_idx];
m_meshlets[i].vertices[vert_idx] = m_meshes[mesh_idx].vertices[vert];
}
memcpy(m_meshlets[i].indices,
meshlet_triangles + meshlets[i].triangle_offset,
meshlets[i].triangle_count * 3);
for (unsigned int tri_idx = 0; tri_idx < meshlets[i].triangle_count; ++tri_idx) {
m_meshlets[i].indices[tri_idx * 3 + 0] =
(uint16_t)meshlet_triangles[meshlets[i].triangle_offset + 3 * tri_idx + 0];
m_meshlets[i].indices[tri_idx * 3 + 1] =
(uint16_t)meshlet_triangles[meshlets[i].triangle_offset + 3 * tri_idx + 1];
m_meshlets[i].indices[tri_idx * 3 + 2] =
(uint16_t)meshlet_triangles[meshlets[i].triangle_offset + 3 * tri_idx + 2];
}
m_meshlets[i].num_children = 0u;
}
@ -184,49 +189,81 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices,
MESHLET_VERTICES,
MESHLET_TRIANGLES);
meshopt_Meshlet *meshlets = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets);
unsigned int *meshlet_vertices =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *MESHLET_VERTICES);
unsigned char *meshlet_triangles =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *MESHLET_INDICES);
meshopt_Bounds *meshlet_bounds = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Bounds, max_meshlets);
// Convert meshopt meshlets into our meshlets, each with its own vertex and index buffer
std::vector<meshlet> meshlets;
{
size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices,
L0_MESHLET_VERTICES,
L0_MESHLET_TRIANGLES);
meshopt_Meshlet *meshopt_meshlets =
RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets);
unsigned int *meshlet_vertices =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *L0_MESHLET_VERTICES);
unsigned char *meshlet_triangles =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *L0_MESHLET_INDICES);
uint32_t meshlet_count = (uint32_t)meshopt_buildMeshlets(meshlets,
meshlet_vertices,
meshlet_triangles,
m_meshes[mesh_idx].indices,
m_meshes[mesh_idx].num_indices,
&m_meshes[mesh_idx].vertices[0].vx,
m_meshes[mesh_idx].num_vertices,
sizeof(vertex),
MESHLET_VERTICES,
MESHLET_TRIANGLES,
cone_weight);
uint32_t meshlet_count = (uint32_t)meshopt_buildMeshlets(meshopt_meshlets,
meshlet_vertices,
meshlet_triangles,
m_meshes[mesh_idx].indices,
m_meshes[mesh_idx].num_indices,
&m_meshes[mesh_idx].vertices[0].vx,
m_meshes[mesh_idx].num_vertices,
sizeof(vertex),
L0_MESHLET_VERTICES,
L0_MESHLET_TRIANGLES,
cone_weight);
for (size_t i = 0; i < meshlet_count; ++i) {
meshopt_optimizeMeshlet(&meshlet_vertices[meshlets[i].vertex_offset],
&meshlet_triangles[meshlets[i].triangle_offset],
meshlets[i].triangle_count,
meshlets[i].vertex_count);
for (size_t i = 0; i < meshlet_count; ++i) {
meshopt_optimizeMeshlet(&meshlet_vertices[meshopt_meshlets[i].vertex_offset],
&meshlet_triangles[meshopt_meshlets[i].triangle_offset],
meshopt_meshlets[i].triangle_count,
meshopt_meshlets[i].vertex_count);
meshlet_bounds[i] =
meshopt_computeMeshletBounds(&meshlet_vertices[meshlets[i].vertex_offset],
&meshlet_triangles[meshlets[i].triangle_offset],
meshlets[i].triangle_count,
&m_meshes[mesh_idx].vertices[0].vx,
m_meshes[mesh_idx].num_vertices,
sizeof(vertex));
meshopt_Bounds bounds = meshopt_computeMeshletBounds(
&meshlet_vertices[meshopt_meshlets[i].vertex_offset],
&meshlet_triangles[meshopt_meshlets[i].triangle_offset],
meshopt_meshlets[i].triangle_count,
&m_meshes[mesh_idx].vertices[0].vx,
m_meshes[mesh_idx].num_vertices,
sizeof(vertex));
meshlet meshlet;
meshlet.vertices = new vertex[L0_MESHLET_VERTICES];
meshlet.indices = new uint16_t[L0_MESHLET_INDICES];
meshlet.num_vertices = meshopt_meshlets[i].vertex_count;
meshlet.num_indices = meshopt_meshlets[i].triangle_count * 3;
memcpy(meshlet.center, bounds.center, sizeof(bounds.center));
meshlet.radius = bounds.radius;
for (unsigned int vert_idx = 0; vert_idx < meshopt_meshlets[i].vertex_count;
++vert_idx) {
unsigned int vert = meshlet_vertices[meshopt_meshlets[i].vertex_offset + vert_idx];
meshlet.vertices[vert_idx] = m_meshes[mesh_idx].vertices[vert];
}
for (unsigned int tri_idx = 0; tri_idx < meshopt_meshlets[i].triangle_count;
++tri_idx) {
meshlet.indices[tri_idx * 3 + 0] = (uint16_t)
meshlet_triangles[meshopt_meshlets[i].triangle_offset + 3 * tri_idx + 0];
meshlet.indices[tri_idx * 3 + 1] = (uint16_t)
meshlet_triangles[meshopt_meshlets[i].triangle_offset + 3 * tri_idx + 1];
meshlet.indices[tri_idx * 3 + 2] = (uint16_t)
meshlet_triangles[meshopt_meshlets[i].triangle_offset + 3 * tri_idx + 2];
}
meshlet.num_children = 0u;
meshlets.push_back(meshlet);
}
}
// We now have a flat list of meshlets -> the highest lod ones
// We now combine (up to 8) meshlets into one to generate the next hierarchy level
// Repeat until we only have 1 meshlet left
std::vector<uint32_t> unprocessed;
unprocessed.reserve(meshlet_count);
for (uint32_t i = 0; i < meshlet_count; ++i) {
unprocessed.reserve(meshlets.size());
for (uint32_t i = 0; i < meshlets.size(); ++i) {
unprocessed.push_back(i);
}
std::vector<uint32_t> next_level;
@ -241,11 +278,11 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
unsigned int closest_count = 0u;
float first_center[3];
memcpy(first_center, meshlet_bounds[first].center, sizeof(float) * 3);
memcpy(first_center, meshlets[first].center, sizeof(float) * 3);
for (uint32_t i = 0; i < unprocessed.size(); ++i) {
float center[3];
memcpy(center, meshlet_bounds[unprocessed[i]].center, sizeof(float) * 3);
memcpy(center, meshlets[unprocessed[i]].center, sizeof(float) * 3);
float dist = sqrtf((center[0] - first_center[0]) * (center[0] - first_center[0]) +
(center[1] - first_center[1]) * (center[1] - first_center[1]) +
@ -255,7 +292,7 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
// Check if we are closer than one of the other candidates
for (unsigned int j = 0; j < closest_count; ++j) {
uint32_t highest_idx = UINT_MAX;
float highest_dist = dist;
float highest_dist = dist;
if (dist < distances[j]) {
if (distances[j] > highest_dist) {
highest_dist = distances[j];
@ -263,14 +300,14 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
}
}
if (highest_idx < RT_ARRAY_COUNT(closest)) {
const uint32_t replaced = highest_idx;
const uint32_t replaced = closest[highest_idx];
distances[j] = dist;
closest[j] = i;
closest[j] = unprocessed[i];
unprocessed.push_back(replaced);
}
}
} else {
closest[closest_count] = i;
closest[closest_count] = unprocessed[i];
distances[closest_count] = dist;
closest_count++;
unprocessed.erase(unprocessed.begin() + i);
@ -279,6 +316,81 @@ rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_ro
// Combine into a new meshlet
// vertex *vertices = new vertex[MESHLET_VERTICES * 8];
vertex *in_vertices = new vertex[L0_MESHLET_VERTICES * 8];
uint32_t *in_indices = new uint32_t[L0_MESHLET_INDICES * 8];
memcpy(in_vertices,
meshlets[first].vertices,
sizeof(vertex) * meshlets[first].num_vertices);
size_t at_vert = meshlets[first].num_vertices;
for (unsigned int i = 0; i < meshlets[first].num_indices; ++i)
in_indices[i] = static_cast<uint32_t>(meshlets[first].indices[i]);
size_t at_idx = meshlets[first].num_indices;
for (unsigned int i = 0; i < closest_count; ++i) {
memcpy(&in_vertices[at_vert],
meshlets[closest[i]].vertices,
sizeof(vertex) * meshlets[closest[i]].num_vertices);
at_vert += meshlets[closest[i]].num_vertices;
for (unsigned int j = 0; j < meshlets[closest[i]].num_indices; ++j)
in_indices[at_idx + j] = static_cast<uint32_t>(meshlets[closest[i]].indices[j]);
at_idx += meshlets[closest[i]].num_indices;
}
const float target_error =
0.10f; // Acept 10% error. We are simplifying meshlets, the extents are not very large
const size_t target_index_count = L0_MESHLET_INDICES;
const unsigned int simplify_options = meshopt_SimplifyLockBorder;
const float threshold = 1.f / 8.f;
std::vector<unsigned int> lod(UINT16_MAX);
float lod_error = 0.f;
size_t out_index_count = meshopt_simplify(&lod[0],
in_indices,
at_idx,
&in_vertices[0].vx,
at_vert,
sizeof(vertex),
target_index_count,
target_error,
simplify_options,
&lod_error);
lod.resize(out_index_count);
if (out_index_count < UINT16_MAX) {
rtLog("EXP", "Yay");
} else {
rtReportError("EXP", "Nay");
}
std::unordered_map<unsigned int, uint16_t> index_remap;
uint16_t *index_buffer = new uint16_t[out_index_count];
std::vector<vertex> merged_vertices;
merged_vertices.reserve(out_index_count);
for (size_t i = 0; i < out_index_count; ++i) {
if (index_remap.find(lod[i]) != index_remap.end()) {
index_buffer[i] = index_remap[lod[i]];
} else {
uint16_t index = (uint16_t)merged_vertices.size();
merged_vertices.push_back(in_vertices[lod[i]]);
index_buffer[i] = index;
index_remap[lod[i]] = index;
}
}
meshlet merged = {};
merged.num_children = closest_count + 1;
merged.children[0] = first;
memcpy(merged.children, closest, sizeof(uint32_t) * closest_count);
merged.indices = index_buffer;
merged.num_indices = (uint32_t)out_index_count;
merged.vertices = new vertex[merged_vertices.size()];
merged.num_vertices = (uint32_t)merged_vertices.size();
memcpy(merged.vertices, merged_vertices.data(), sizeof(vertex) * merged_vertices.size());
// Determine center and bounds
meshlets.push_back(merged);
next_level.push_back((unsigned int)meshlets.size() - 1);
}
return RT_SUCCESS;
}

View File

@ -3,9 +3,9 @@
#include "runtime/runtime.h"
constexpr size_t MESHLET_VERTICES = 64;
constexpr size_t MESHLET_TRIANGLES = 124;
constexpr size_t MESHLET_INDICES = MESHLET_TRIANGLES * 3;
constexpr size_t L0_MESHLET_VERTICES = 64;
constexpr size_t L0_MESHLET_TRIANGLES = 124;
constexpr size_t L0_MESHLET_INDICES = L0_MESHLET_TRIANGLES * 3;
struct vertex {
float vx, vy, vz;
@ -13,18 +13,13 @@ struct vertex {
struct meshlet {
vertex *vertices;
uint8_t *indices;
uint16_t *indices;
uint32_t num_vertices;
uint32_t num_indices;
float center[3];
float radius;
// Normal cone for backface culling
float cone_apex[3];
float cone_axis[3];
float cone_cutoff;
// child indices
uint32_t children[8];
uint32_t num_children;

View File

@ -108,8 +108,6 @@ struct gpu_mesh_data {
struct gpu_meshlet_data {
float bounds[4]; // xyz, radius
float cone_apex[4]; // xyz, unused
float cone_axis_cutoff[4];
uint32_t mesh_index;
uint32_t first_index;
uint32_t base_vertex;
@ -176,9 +174,58 @@ rt_result meshlet_renderer::Initialize() {
m_single_meshlet_shader = shader::CompileSource(_single_vert, _single_frag);
m_flat_cull_shader = shader::CompileFile(_flat_cull);
m_meshlet_shader = shader::CompileSource(_meshlet_vert, _meshlet_frag);
InitFlat();
return RT_SUCCESS;
}
void OnModelLoaded(meshlet_generator *gen);
void meshlet_renderer::InitFlat(void) {
const size_t MAX_MESHLETS = 65556;
GLuint buffers[6];
glCreateBuffers(RT_ARRAY_COUNT(buffers), buffers);
m_flat.vbo = buffers[0];
m_flat.ebo = buffers[1];
m_flat.meshlet_ssbo = buffers[2];
m_flat.draw_ssbo = buffers[3];
m_flat.cull_ssbo = buffers[4];
m_flat.mesh_ssbo = buffers[5];
// Create the vao
glGenVertexArrays(1, &m_flat.vao);
glBindVertexArray(m_flat.vao);
glBindBuffer(GL_ARRAY_BUFFER, m_flat.vbo);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_flat.ebo);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr);
glEnableVertexAttribArray(0);
glBindVertexArray(0);
// Reserve space
glNamedBufferStorage(m_flat.vbo,
MAX_MESHLETS * sizeof(vertex) * L0_MESHLET_VERTICES,
nullptr,
GL_DYNAMIC_STORAGE_BIT);
glNamedBufferStorage(m_flat.ebo,
MAX_MESHLETS * sizeof(uint16_t) * L0_MESHLET_INDICES,
nullptr,
GL_DYNAMIC_STORAGE_BIT);
glNamedBufferStorage(m_flat.meshlet_ssbo,
MAX_MESHLETS * sizeof(gpu_meshlet_data),
nullptr,
GL_DYNAMIC_STORAGE_BIT);
glNamedBufferStorage(m_flat.draw_ssbo,
MAX_MESHLETS * sizeof(draw_elements_indirect_command),
nullptr,
0);
glNamedBufferStorage(m_flat.cull_ssbo, sizeof(cull_output), nullptr, GL_MAP_READ_BIT | GL_DYNAMIC_STORAGE_BIT);
glNamedBufferStorage(m_flat.mesh_ssbo, sizeof(gpu_mesh_data), nullptr, GL_DYNAMIC_STORAGE_BIT);
}
void meshlet_renderer::RenderFlat(const meshlet *meshlets, unsigned int count) {
if (m_settings.separate_rendering) {
SeparateRendering(meshlets, count);
@ -193,88 +240,56 @@ void meshlet_renderer::DrawIndirectFlat(const meshlet *meshlets, unsigned int co
// Do compute "culling" (generate drawindirect) into 1 ssbo
// DrawIndirect
GLuint vbo, ebo, meshlet_ssbo, draw_ssbo, cull_ssbo, mesh_ssbo;
GLuint buffers[6];
glCreateBuffers(RT_ARRAY_COUNT(buffers), buffers);
vbo = buffers[0];
ebo = buffers[1];
meshlet_ssbo = buffers[2];
draw_ssbo = buffers[3];
cull_ssbo = buffers[4];
mesh_ssbo = buffers[5];
// Store vertices and indices at idx * MESHLET_VERTICES/MESHLET_INDICES
glNamedBufferStorage(vbo,
count * sizeof(vertex) * MESHLET_VERTICES,
nullptr,
GL_DYNAMIC_STORAGE_BIT);
glNamedBufferStorage(ebo, count * MESHLET_INDICES, nullptr, GL_DYNAMIC_STORAGE_BIT);
for (unsigned int i = 0; i < count; ++i) {
glNamedBufferSubData(vbo,
i * sizeof(vertex) * MESHLET_VERTICES,
glNamedBufferSubData(m_flat.vbo,
i * sizeof(vertex) * L0_MESHLET_VERTICES,
meshlets[i].num_vertices * sizeof(vertex),
meshlets[i].vertices);
glNamedBufferSubData(ebo,
i * MESHLET_INDICES,
meshlets[i].num_indices,
glNamedBufferSubData(m_flat.ebo,
i * L0_MESHLET_INDICES * sizeof(uint16_t),
meshlets[i].num_indices * sizeof(uint16_t),
meshlets[i].indices);
}
// Store meshlet information
glNamedBufferStorage(meshlet_ssbo,
count * sizeof(gpu_meshlet_data),
nullptr,
GL_DYNAMIC_STORAGE_BIT);
for (unsigned int i = 0; i < count; ++i) {
gpu_meshlet_data meshlet;
// Only have one right now
meshlet.mesh_index = 0;
meshlet.first_index = i * MESHLET_INDICES;
meshlet.base_vertex = i * MESHLET_VERTICES;
meshlet.first_index = i * L0_MESHLET_INDICES;
meshlet.base_vertex = i * L0_MESHLET_VERTICES;
meshlet.index_count = meshlets[i].num_indices;
memcpy(meshlet.bounds, meshlets[i].center, 3 * sizeof(float));
meshlet.bounds[3] = meshlets[i].radius;
memcpy(meshlet.cone_apex, meshlets[i].cone_apex, 3 * sizeof(float));
meshlet.cone_apex[3] = 0.f;
memcpy(meshlet.cone_axis_cutoff, meshlets[i].cone_axis, sizeof(meshlets[i].cone_axis));
glNamedBufferSubData(meshlet_ssbo,
glNamedBufferSubData(m_flat.meshlet_ssbo,
i * sizeof(gpu_meshlet_data),
sizeof(gpu_meshlet_data),
&meshlet);
}
// Reserve space for the draw commands
glNamedBufferStorage(draw_ssbo, count * sizeof(draw_elements_indirect_command), nullptr, 0);
// Prepare culling output
cull_output cull_output = {0};
glNamedBufferStorage(cull_ssbo, sizeof(cull_output), &cull_output, GL_MAP_READ_BIT);
glNamedBufferSubData(m_flat.cull_ssbo, 0, sizeof(cull_output), &cull_output);
// Prepare mesh data
gpu_mesh_data mesh_data;
mesh_data.model = glm ::rotate(m_settings.rotation, glm::vec3(0, 1, 0)) *
glm::scale(glm::vec3(m_settings.scale));
glNamedBufferStorage(mesh_ssbo, sizeof(mesh_data), &mesh_data, GL_DYNAMIC_STORAGE_BIT);
glNamedBufferSubData(m_flat.mesh_ssbo, 0, sizeof(mesh_data), &mesh_data);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
// Do culling. TODO: Get number of draws back
m_flat_cull_shader.Use();
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, meshlet_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, draw_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, cull_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, mesh_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_flat.meshlet_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_flat.draw_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, m_flat.cull_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, m_flat.mesh_ssbo);
glUniform3fv(0, 1, m_settings.eye);
glDispatchCompute(count, 1, 1);
// Create the vao
GLuint vao;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr);
glEnableVertexAttribArray(0);
// DrawIndirect
GLuint mvploc = glGetUniformLocation(m_meshlet_shader.m_prog, "mvp");
@ -295,21 +310,20 @@ void meshlet_renderer::DrawIndirectFlat(const meshlet *meshlets, unsigned int co
m_meshlet_shader.Use();
glUniformMatrix4fv(mvploc, 1, GL_FALSE, &mvp[0][0]);
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, draw_ssbo);
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_flat.draw_ssbo);
glMemoryBarrier(GL_COMMAND_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT);
{
void *out = glMapNamedBuffer(cull_ssbo, GL_READ_ONLY);
void *out = glMapNamedBuffer(m_flat.cull_ssbo, GL_READ_ONLY);
memcpy(&cull_output, out, sizeof(cull_output));
glUnmapNamedBuffer(cull_ssbo);
glUnmapNamedBuffer(m_flat.cull_ssbo);
}
glBindVertexArray(m_flat.vao);
glMultiDrawElementsIndirect(GL_TRIANGLES,
GL_UNSIGNED_BYTE,
GL_UNSIGNED_SHORT,
nullptr,
cull_output.draw_count,
sizeof(draw_elements_indirect_command));
glDeleteBuffers(RT_ARRAY_COUNT(buffers), buffers);
}
void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int count) {
@ -320,9 +334,9 @@ void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int c
glBindVertexArray(vao);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertex) * MESHLET_VERTICES, nullptr, GL_STREAM_DRAW);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertex) * L0_MESHLET_VERTICES, nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, MESHLET_INDICES, nullptr, GL_STREAM_DRAW);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(uint16_t) * L0_MESHLET_INDICES, nullptr, GL_STREAM_DRAW);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr);
glEnableVertexAttribArray(0);
@ -361,11 +375,11 @@ void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int c
meshlets[i].vertices);
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER,
0,
sizeof(uint8_t) * meshlets[i].num_indices,
sizeof(uint16_t) * meshlets[i].num_indices,
meshlets[i].indices);
glUniform3fv(colorloc, 1, colors[i % 5]);
glDrawElements(GL_TRIANGLES, meshlets[i].num_indices, GL_UNSIGNED_BYTE, nullptr);
glDrawElements(GL_TRIANGLES, meshlets[i].num_indices, GL_UNSIGNED_SHORT, nullptr);
}
glDeleteVertexArrays(1, &vao);

View File

@ -24,6 +24,7 @@ struct shader {
};
struct meshlet_renderer {
public:
struct settings {
bool separate_rendering = true;
float scale = 1.f;
@ -33,6 +34,18 @@ struct meshlet_renderer {
float fov;
};
private:
struct flat_state {
GLuint vao;
GLuint vbo;
GLuint ebo;
GLuint meshlet_ssbo;
GLuint draw_ssbo;
GLuint cull_ssbo;
GLuint mesh_ssbo;
};
public:
rt_result Initialize();
void RenderFlat(const meshlet *meshlets, unsigned int count);
@ -40,16 +53,21 @@ struct meshlet_renderer {
void SettingMenu();
private:
void InitFlat(void);
void SeparateRendering(const meshlet *meshlets, unsigned int count);
void DrawIndirectFlat(const meshlet *meshlets, unsigned int count);
public:
settings m_settings;
float m_aspect;
private:
flat_state m_flat;
shader m_single_meshlet_shader;
shader m_flat_cull_shader;
shader m_meshlet_shader;
float m_aspect;
};
#endif