WIP meshlets experiment

This commit is contained in:
Kevin Trogant 2024-07-15 16:34:39 +02:00
parent 1b4a17f01a
commit 09c14a8809
39 changed files with 116919 additions and 9 deletions

105414
assets/stanford-bunny.obj Normal file

File diff suppressed because it is too large Load Diff

1833
contrib/glad/glad.c Normal file

File diff suppressed because it is too large Load Diff

3694
contrib/glad/glad.h Normal file

File diff suppressed because it is too large Load Diff

311
contrib/glad/khrplatform.h Normal file
View File

@ -0,0 +1,311 @@
#ifndef __khrplatform_h_
#define __khrplatform_h_
/*
** Copyright (c) 2008-2018 The Khronos Group Inc.
**
** Permission is hereby granted, free of charge, to any person obtaining a
** copy of this software and/or associated documentation files (the
** "Materials"), to deal in the Materials without restriction, including
** without limitation the rights to use, copy, modify, merge, publish,
** distribute, sublicense, and/or sell copies of the Materials, and to
** permit persons to whom the Materials are furnished to do so, subject to
** the following conditions:
**
** The above copyright notice and this permission notice shall be included
** in all copies or substantial portions of the Materials.
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
*/
/* Khronos platform-specific types and definitions.
*
* The master copy of khrplatform.h is maintained in the Khronos EGL
* Registry repository at https://github.com/KhronosGroup/EGL-Registry
* The last semantic modification to khrplatform.h was at commit ID:
* 67a3e0864c2d75ea5287b9f3d2eb74a745936692
*
* Adopters may modify this file to suit their platform. Adopters are
* encouraged to submit platform specific modifications to the Khronos
* group so that they can be included in future versions of this file.
* Please submit changes by filing pull requests or issues on
* the EGL Registry repository linked above.
*
*
* See the Implementer's Guidelines for information about where this file
* should be located on your system and for more details of its use:
* http://www.khronos.org/registry/implementers_guide.pdf
*
* This file should be included as
* #include <KHR/khrplatform.h>
* by Khronos client API header files that use its types and defines.
*
* The types in khrplatform.h should only be used to define API-specific types.
*
* Types defined in khrplatform.h:
* khronos_int8_t signed 8 bit
* khronos_uint8_t unsigned 8 bit
* khronos_int16_t signed 16 bit
* khronos_uint16_t unsigned 16 bit
* khronos_int32_t signed 32 bit
* khronos_uint32_t unsigned 32 bit
* khronos_int64_t signed 64 bit
* khronos_uint64_t unsigned 64 bit
* khronos_intptr_t signed same number of bits as a pointer
* khronos_uintptr_t unsigned same number of bits as a pointer
* khronos_ssize_t signed size
* khronos_usize_t unsigned size
* khronos_float_t signed 32 bit floating point
* khronos_time_ns_t unsigned 64 bit time in nanoseconds
* khronos_utime_nanoseconds_t unsigned time interval or absolute time in
* nanoseconds
* khronos_stime_nanoseconds_t signed time interval in nanoseconds
* khronos_boolean_enum_t enumerated boolean type. This should
* only be used as a base type when a client API's boolean type is
* an enum. Client APIs which use an integer or other type for
* booleans cannot use this as the base type for their boolean.
*
* Tokens defined in khrplatform.h:
*
* KHRONOS_FALSE, KHRONOS_TRUE Enumerated boolean false/true values.
*
* KHRONOS_SUPPORT_INT64 is 1 if 64 bit integers are supported; otherwise 0.
* KHRONOS_SUPPORT_FLOAT is 1 if floats are supported; otherwise 0.
*
* Calling convention macros defined in this file:
* KHRONOS_APICALL
* KHRONOS_APIENTRY
* KHRONOS_APIATTRIBUTES
*
* These may be used in function prototypes as:
*
* KHRONOS_APICALL void KHRONOS_APIENTRY funcname(
* int arg1,
* int arg2) KHRONOS_APIATTRIBUTES;
*/
#if defined(__SCITECH_SNAP__) && !defined(KHRONOS_STATIC)
# define KHRONOS_STATIC 1
#endif
/*-------------------------------------------------------------------------
* Definition of KHRONOS_APICALL
*-------------------------------------------------------------------------
* This precedes the return type of the function in the function prototype.
*/
#if defined(KHRONOS_STATIC)
/* If the preprocessor constant KHRONOS_STATIC is defined, make the
* header compatible with static linking. */
# define KHRONOS_APICALL
#elif defined(_WIN32)
# define KHRONOS_APICALL __declspec(dllimport)
#elif defined (__SYMBIAN32__)
# define KHRONOS_APICALL IMPORT_C
#elif defined(__ANDROID__)
# define KHRONOS_APICALL __attribute__((visibility("default")))
#else
# define KHRONOS_APICALL
#endif
/*-------------------------------------------------------------------------
* Definition of KHRONOS_APIENTRY
*-------------------------------------------------------------------------
* This follows the return type of the function and precedes the function
* name in the function prototype.
*/
#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__)
/* Win32 but not WinCE */
# define KHRONOS_APIENTRY __stdcall
#else
# define KHRONOS_APIENTRY
#endif
/*-------------------------------------------------------------------------
* Definition of KHRONOS_APIATTRIBUTES
*-------------------------------------------------------------------------
* This follows the closing parenthesis of the function prototype arguments.
*/
#if defined (__ARMCC_2__)
#define KHRONOS_APIATTRIBUTES __softfp
#else
#define KHRONOS_APIATTRIBUTES
#endif
/*-------------------------------------------------------------------------
* basic type definitions
*-----------------------------------------------------------------------*/
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__GNUC__) || defined(__SCO__) || defined(__USLC__)
/*
* Using <stdint.h>
*/
#include <stdint.h>
typedef int32_t khronos_int32_t;
typedef uint32_t khronos_uint32_t;
typedef int64_t khronos_int64_t;
typedef uint64_t khronos_uint64_t;
#define KHRONOS_SUPPORT_INT64 1
#define KHRONOS_SUPPORT_FLOAT 1
/*
* To support platform where unsigned long cannot be used interchangeably with
* inptr_t (e.g. CHERI-extended ISAs), we can use the stdint.h intptr_t.
* Ideally, we could just use (u)intptr_t everywhere, but this could result in
* ABI breakage if khronos_uintptr_t is changed from unsigned long to
* unsigned long long or similar (this results in different C++ name mangling).
* To avoid changes for existing platforms, we restrict usage of intptr_t to
* platforms where the size of a pointer is larger than the size of long.
*/
#if defined(__SIZEOF_LONG__) && defined(__SIZEOF_POINTER__)
#if __SIZEOF_POINTER__ > __SIZEOF_LONG__
#define KHRONOS_USE_INTPTR_T
#endif
#endif
#elif defined(__VMS ) || defined(__sgi)
/*
* Using <inttypes.h>
*/
#include <inttypes.h>
typedef int32_t khronos_int32_t;
typedef uint32_t khronos_uint32_t;
typedef int64_t khronos_int64_t;
typedef uint64_t khronos_uint64_t;
#define KHRONOS_SUPPORT_INT64 1
#define KHRONOS_SUPPORT_FLOAT 1
#elif defined(_WIN32) && !defined(__SCITECH_SNAP__)
/*
* Win32
*/
typedef __int32 khronos_int32_t;
typedef unsigned __int32 khronos_uint32_t;
typedef __int64 khronos_int64_t;
typedef unsigned __int64 khronos_uint64_t;
#define KHRONOS_SUPPORT_INT64 1
#define KHRONOS_SUPPORT_FLOAT 1
#elif defined(__sun__) || defined(__digital__)
/*
* Sun or Digital
*/
typedef int khronos_int32_t;
typedef unsigned int khronos_uint32_t;
#if defined(__arch64__) || defined(_LP64)
typedef long int khronos_int64_t;
typedef unsigned long int khronos_uint64_t;
#else
typedef long long int khronos_int64_t;
typedef unsigned long long int khronos_uint64_t;
#endif /* __arch64__ */
#define KHRONOS_SUPPORT_INT64 1
#define KHRONOS_SUPPORT_FLOAT 1
#elif 0
/*
* Hypothetical platform with no float or int64 support
*/
typedef int khronos_int32_t;
typedef unsigned int khronos_uint32_t;
#define KHRONOS_SUPPORT_INT64 0
#define KHRONOS_SUPPORT_FLOAT 0
#else
/*
* Generic fallback
*/
#include <stdint.h>
typedef int32_t khronos_int32_t;
typedef uint32_t khronos_uint32_t;
typedef int64_t khronos_int64_t;
typedef uint64_t khronos_uint64_t;
#define KHRONOS_SUPPORT_INT64 1
#define KHRONOS_SUPPORT_FLOAT 1
#endif
/*
* Types that are (so far) the same on all platforms
*/
typedef signed char khronos_int8_t;
typedef unsigned char khronos_uint8_t;
typedef signed short int khronos_int16_t;
typedef unsigned short int khronos_uint16_t;
/*
* Types that differ between LLP64 and LP64 architectures - in LLP64,
* pointers are 64 bits, but 'long' is still 32 bits. Win64 appears
* to be the only LLP64 architecture in current use.
*/
#ifdef KHRONOS_USE_INTPTR_T
typedef intptr_t khronos_intptr_t;
typedef uintptr_t khronos_uintptr_t;
#elif defined(_WIN64)
typedef signed long long int khronos_intptr_t;
typedef unsigned long long int khronos_uintptr_t;
#else
typedef signed long int khronos_intptr_t;
typedef unsigned long int khronos_uintptr_t;
#endif
#if defined(_WIN64)
typedef signed long long int khronos_ssize_t;
typedef unsigned long long int khronos_usize_t;
#else
typedef signed long int khronos_ssize_t;
typedef unsigned long int khronos_usize_t;
#endif
#if KHRONOS_SUPPORT_FLOAT
/*
* Float type
*/
typedef float khronos_float_t;
#endif
#if KHRONOS_SUPPORT_INT64
/* Time types
*
* These types can be used to represent a time interval in nanoseconds or
* an absolute Unadjusted System Time. Unadjusted System Time is the number
* of nanoseconds since some arbitrary system event (e.g. since the last
* time the system booted). The Unadjusted System Time is an unsigned
* 64 bit value that wraps back to 0 every 584 years. Time intervals
* may be either signed or unsigned.
*/
typedef khronos_uint64_t khronos_utime_nanoseconds_t;
typedef khronos_int64_t khronos_stime_nanoseconds_t;
#endif
/*
* Dummy value used to pad enum types to 32 bits.
*/
#ifndef KHRONOS_MAX_ENUM
#define KHRONOS_MAX_ENUM 0x7FFFFFFF
#endif
/*
* Enumerated boolean type
*
* Values other than zero should be considered to be true. Therefore
* comparisons should not be made against KHRONOS_TRUE.
*/
typedef enum {
KHRONOS_FALSE = 0,
KHRONOS_TRUE = 1,
KHRONOS_BOOLEAN_ENUM_FORCE_SIZE = KHRONOS_MAX_ENUM
} khronos_boolean_enum_t;
#endif /* __khrplatform_h_ */

3499
contrib/tiny_obj_loader.h Normal file

File diff suppressed because it is too large Load Diff

View File

@ -58,6 +58,10 @@ thread_dep = dependency('threads')
m_dep = compiler.find_library('m', required : false)
vk_dep = dependency('vulkan', required : false)
# Subprojects installed via wraps
meshoptimizer_proj = subproject('meshoptimizer', default_options: ['warning_level=0', 'werror=false'] )
meshoptimizer_dep = meshoptimizer_proj.get_variable('meshoptimizer_dep')
windowing_dep = []
if get_option('use_xlib')
windowing_dep = dependency('x11', required : true)

View File

@ -9,7 +9,7 @@
#include <stdbool.h>
RT_CVAR_I(rt_Fullscreen, "Show window in fullscreen mode. [0/1] Default: 0", 0);
RT_CVAR_I(rt_Fullscreen, "Show window in fullscreen mode. [0/1] Default: 0", 1);
RT_CVAR_I(rt_WindowWidth, "Window width. Default: 1024", 1024);
RT_CVAR_I(rt_WindowHeight, "Window height. Default: 768", 768);
@ -96,7 +96,18 @@ RT_DLLEXPORT int rtWin32Entry(HINSTANCE hInstance,
return 1;
}
rt_renderer_init_info renderer_info = {.hWnd = wnd, .hInstance = hInstance};
unsigned int window_width, window_height;
{
RECT r;
GetClientRect(wnd, &r);
window_width = r.right;
window_height = r.bottom;
}
rt_renderer_init_info renderer_info = {.hWnd = wnd,
.hInstance = hInstance,
.width = window_width,
.height = window_height,
.is_fullscreen = rt_Fullscreen.i};
if (rtLoadRenderer() != RT_SUCCESS) {
return 1;
}

View File

@ -0,0 +1,60 @@
#version 460 core
layout(local_size_x = 1) in;
struct mesh_data {
mat4 model;
};
struct meshlet_data {
vec4 bounds; // xyz, radius
vec4 cone_apex; // xyz, unused
vec4 cone_axis_cutoff; // xyz, cutoff = w
uint mesh_index;
uint first_index;
uint base_vertex;
uint index_count;
};
struct draw_indirect_command {
uint count;
uint instance_count;
uint first_index;
int base_vertex;
uint base_instance;
};
layout(std430, binding = 1) readonly buffer meshletsSSBO {
meshlet_data meshlets[];
};
layout(std430, binding = 2) writeonly buffer drawSSBO {
draw_indirect_command draw_commands[];
};
layout(std430, binding = 3) buffer outputSSBO {
uint draw_count;
};
layout(std430, binding = 4) readonly buffer meshSSBO {
mesh_data meshes[];
};
// World space camera position
layout(location = 0) uniform vec3 camera_pos;
void main() {
uint meshlet_idx = gl_GlobalInvocationID.x;
meshlet_data meshlet = meshlets[meshlet_idx];
mesh_data mesh = meshes[meshlet.mesh_index];
vec3 cone_axis = (mesh.model * vec4(meshlet.cone_axis_cutoff.xyz, 0)).xyz;
vec3 cone_apex = (mesh.model * vec4(meshlet.cone_apex.xyz, 1)).xyz;
float cone_cutoff = meshlet.cone_axis_cutoff.w;
if (dot(normalize(cone_apex - camera_pos), cone_axis) < cone_cutoff) {
uint draw_idx = atomicAdd(draw_count, 1);
draw_commands[draw_idx].count = meshlet.index_count;
draw_commands[draw_idx].instance_count = 1;
draw_commands[draw_idx].first_index = meshlet.first_index;
draw_commands[draw_idx].base_vertex = int(meshlet.base_vertex);
draw_commands[draw_idx].base_instance = 0;
}
}

View File

@ -0,0 +1,224 @@
#include <glad/glad.h>
#include <GLFW/glfw3.h>
#include <runtime/runtime.h>
#include <imgui.h>
#include <imgui_impl_glfw.h>
#include <imgui_impl_opengl3.h>
#include "meshlet_generator.hpp"
#include "meshlet_renderer.hpp"
static void GlfwErrorCallback(int errnum, const char *desc) {
rtReportError("GLFW", "Error %d: %s", errnum, desc);
}
static void GLDebugCallback(GLenum source,
GLenum type,
GLuint id,
GLenum severity,
GLsizei length,
const GLchar *message,
const void *userparam) {
if (id == 131169 || id == 131185 || id == 131218 || id == 131204)
return;
rtLog("gl", "---------------");
rtLog("gl", "Debug message (%u): %s", id, message);
const char *source_str = "<undefined>";
switch (source) {
case GL_DEBUG_SOURCE_API:
source_str = "Source: API";
break;
case GL_DEBUG_SOURCE_WINDOW_SYSTEM:
source_str = "Source: Window System";
break;
case GL_DEBUG_SOURCE_SHADER_COMPILER:
source_str = "Source: Shader Compiler";
break;
case GL_DEBUG_SOURCE_THIRD_PARTY:
source_str = "Source: Third Party";
break;
case GL_DEBUG_SOURCE_APPLICATION:
source_str = "Source: Application";
break;
case GL_DEBUG_SOURCE_OTHER:
source_str = "Source: Other";
break;
}
rtLog("gl", "%s", source_str);
const char *type_str = "<undefined>";
switch (type) {
case GL_DEBUG_TYPE_ERROR:
type_str = "Type: Error";
break;
case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR:
type_str = "Type: Deprecated Behaviour";
break;
case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR:
type_str = "Type: Undefined Behaviour";
break;
case GL_DEBUG_TYPE_PORTABILITY:
type_str = "Type: Portability";
break;
case GL_DEBUG_TYPE_PERFORMANCE:
type_str = "Type: Performance";
break;
case GL_DEBUG_TYPE_MARKER:
type_str = "Type: Marker";
break;
case GL_DEBUG_TYPE_PUSH_GROUP:
type_str = "Type: Push Group";
break;
case GL_DEBUG_TYPE_POP_GROUP:
type_str = "Type: Pop Group";
break;
case GL_DEBUG_TYPE_OTHER:
type_str = "Type: Other";
break;
}
rtLog("gl", "%s", type_str);
const char *severity_str = "<undefined>";
switch (severity) {
case GL_DEBUG_SEVERITY_HIGH:
severity_str = "Severity: high";
break;
case GL_DEBUG_SEVERITY_MEDIUM:
severity_str = "Severity: medium";
break;
case GL_DEBUG_SEVERITY_LOW:
severity_str = "Severity: low";
break;
case GL_DEBUG_SEVERITY_NOTIFICATION:
severity_str = "Severity: notification";
break;
}
rtLog("gl", "%s", severity_str);
if (type == GL_DEBUG_TYPE_ERROR)
RT_DEBUGBREAK;
}
struct file_picker {
file_picker() {
memset(m_input_buf[0], 0, sizeof(m_input_buf[0]));
memset(m_input_buf[1], 0, sizeof(m_input_buf[1]));
strcpy(m_input_buf[0], "assets/stanford-bunny.obj");
}
/* Returns true if a new file was selected */
bool RunFlat() {
bool opened=false;
if (ImGui::Begin("File Selection", &m_open)) {
ImGui::InputTextWithHint("Path",
"Path to the OBJ file",
&m_input_buf[m_active_input][0],
RT_ARRAY_COUNT(m_input_buf[0]));
if (ImGui::Button("Open")) {
m_picked = m_input_buf[m_active_input];
m_active_input = (m_active_input + 1) & 1;
strcpy(m_input_buf[m_active_input], m_picked);
opened = true;
}
ImGui::End();
}
return opened;
}
RT_INLINE const char *GetPicked() {
RT_VERIFY(m_picked);
return m_picked;
}
RT_INLINE bool HasPickedFile() const {
return m_picked != nullptr;
}
const char *m_picked=nullptr;
bool m_open = true;
char m_input_buf[2][260];
int m_active_input = 0;
};
int main() {
if (rtInitRuntime() != RT_SUCCESS)
return -1;
glfwSetErrorCallback(GlfwErrorCallback);
if (!glfwInit())
return -1;
glfwWindowHint(GLFW_CLIENT_API, GLFW_OPENGL_API);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 4);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 6);
glfwWindowHint(GLFW_OPENGL_DEBUG_CONTEXT, GLFW_TRUE);
GLFWwindow *window = glfwCreateWindow(1280, 720, "MESHLETS!", NULL, NULL);
if (!window)
return -1;
glfwMakeContextCurrent(window);
if (!gladLoadGL())
return -1;
glfwSwapInterval(1);
int flags;
glGetIntegerv(GL_CONTEXT_FLAGS, &flags);
if (flags & GL_CONTEXT_FLAG_DEBUG_BIT) {
glEnable(GL_DEBUG_OUTPUT);
glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS);
glDebugMessageCallback(GLDebugCallback, nullptr);
glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, nullptr, GL_TRUE);
}
// Setup Dear ImGui context
IMGUI_CHECKVERSION();
ImGui::CreateContext();
ImGuiIO &io = ImGui::GetIO();
(void)io;
io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard; // Enable Keyboard Controls
io.ConfigFlags |= ImGuiConfigFlags_NavEnableGamepad; // Enable Gamepad Controls
ImGui::StyleColorsDark();
ImGui_ImplGlfw_InitForOpenGL(window, true);
ImGui_ImplOpenGL3_Init("#version 130");
file_picker picker;
meshlet_generator gen;
meshlet_renderer ren;
ren.Initialize();
while (!glfwWindowShouldClose(window)) {
glfwPollEvents();
ImGui_ImplOpenGL3_NewFrame();
ImGui_ImplGlfw_NewFrame();
ImGui::NewFrame();
if (picker.RunFlat()) {
gen.LoadObj(picker.GetPicked());
gen.RunFlat();
}
ren.SettingMenu();
ImGui::Render();
int display_w, display_h;
glfwGetFramebufferSize(window, &display_w, &display_h);
glViewport(0, 0, display_w, display_h);
glClearColor(0.f, 0.f, 0.f, 1.f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
if (picker.HasPickedFile()) {
ren.m_aspect = (float)display_w / (float)display_h;
ren.RenderFlat(gen.m_meshlets, gen.m_num_meshlets);
}
ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
glfwSwapBuffers(window);
}
glfwDestroyWindow(window);
glfwTerminate();
rtShutdownRuntime();
return 0;
}

View File

@ -0,0 +1,284 @@
#include <meshoptimizer.h>
#pragma warning(push)
#pragma warning(disable : 4530)
#define TINYOBJLOADER_IMPLEMENTATION
#include <tiny_obj_loader.h>
#pragma warning(pop)
#include "meshlet_generator.hpp"
#include <runtime/mem_arena.h>
meshlet_generator::meshlet_generator()
: m_meshes(nullptr), m_num_meshes(0u), m_meshlets(nullptr), m_num_meshlets(0u) {
}
meshlet_generator::~meshlet_generator() {
Release();
}
void meshlet_generator::Release() {
for (uint32_t i = 0; i < m_num_meshes; ++i) {
delete m_meshes[i].vertices;
delete m_meshes[i].indices;
}
delete m_meshes;
m_meshes = nullptr;
m_num_meshes = 0;
for (uint32_t i = 0; i < m_num_meshlets; ++i) {
delete m_meshlets[i].indices;
delete m_meshlets[i].vertices;
}
delete m_meshlets;
m_meshlets = nullptr;
m_num_meshlets = 0;
}
rt_result meshlet_generator::LoadObj(const char *path) {
tinyobj::ObjReaderConfig config;
config.mtl_search_path = "./";
config.triangulate = true;
tinyobj::ObjReader reader;
if (!reader.ParseFromFile(path, config)) {
if (!reader.Error().empty()) {
rtReportError("OBJ", "Error: %s", reader.Error().c_str());
}
return RT_UNKNOWN_ERROR;
}
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
auto &attrib = reader.GetAttrib();
auto &shapes = reader.GetShapes();
// auto &materials = reader.GetMaterials();
mesh_data *meshes = new mesh_data[shapes.size()];
RT_VERIFY(meshes);
for (size_t shape_idx = 0; shape_idx < shapes.size(); ++shape_idx) {
rt_temp_arena rewind = rtBeginTempArena(temp.arena);
size_t num_faces = shapes[shape_idx].mesh.num_face_vertices.size();
vertex *unindexed_vertices = RT_ARENA_PUSH_ARRAY(temp.arena, vertex, num_faces * 3);
size_t index_offset = 0;
for (size_t face_idx = 0; face_idx < shapes[shape_idx].mesh.num_face_vertices.size();
++face_idx) {
size_t fv = shapes[shape_idx].mesh.num_face_vertices[face_idx];
for (size_t vert_idx = 0; vert_idx < fv; ++vert_idx) {
tinyobj::index_t idx = shapes[shape_idx].mesh.indices[index_offset + vert_idx];
tinyobj::real_t vx = attrib.vertices[3 * idx.vertex_index];
tinyobj::real_t vy = attrib.vertices[3 * idx.vertex_index + 1];
tinyobj::real_t vz = attrib.vertices[3 * idx.vertex_index + 2];
unindexed_vertices[index_offset + vert_idx].vx = vx;
unindexed_vertices[index_offset + vert_idx].vy = vy;
unindexed_vertices[index_offset + vert_idx].vz = vz;
}
index_offset += fv;
}
// Generate a non-redundant index buffer
size_t num_indices = num_faces * 3;
unsigned int *remap = RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, num_indices);
size_t num_vertices = meshopt_generateVertexRemap(remap,
nullptr,
num_indices,
unindexed_vertices,
num_faces * 3,
sizeof(vertex));
meshes[shape_idx].num_indices = (uint32_t)num_indices;
meshes[shape_idx].indices = new uint32_t[num_indices];
meshes[shape_idx].num_vertices = (uint32_t)num_vertices;
meshes[shape_idx].vertices = new vertex[num_vertices];
meshopt_remapIndexBuffer(meshes[shape_idx].indices, nullptr, num_indices, remap);
meshopt_remapVertexBuffer(meshes[shape_idx].vertices,
unindexed_vertices,
num_faces * 3,
sizeof(vertex),
remap);
rtEndTempArena(rewind);
}
m_num_meshes = (uint32_t)shapes.size();
m_meshes = meshes;
return RT_SUCCESS;
}
rt_result meshlet_generator::RunFlat(uint32_t mesh_idx) {
const float cone_weight = 0.5f;
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices,
MESHLET_VERTICES,
MESHLET_TRIANGLES);
meshopt_Meshlet *meshlets = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets);
unsigned int *meshlet_vertices =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *MESHLET_VERTICES);
unsigned char *meshlet_triangles =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *MESHLET_INDICES);
size_t meshlet_count = meshopt_buildMeshlets(meshlets,
meshlet_vertices,
meshlet_triangles,
m_meshes[mesh_idx].indices,
m_meshes[mesh_idx].num_indices,
&m_meshes[mesh_idx].vertices[0].vx,
m_meshes[mesh_idx].num_vertices,
sizeof(vertex),
MESHLET_VERTICES,
MESHLET_TRIANGLES,
cone_weight);
m_meshlets = new meshlet[meshlet_count];
m_num_meshlets = (uint32_t)meshlet_count;
for (size_t i = 0; i < meshlet_count; ++i) {
meshopt_optimizeMeshlet(&meshlet_vertices[meshlets[i].vertex_offset],
&meshlet_triangles[meshlets[i].triangle_offset],
meshlets[i].triangle_count,
meshlets[i].vertex_count);
meshopt_Bounds bounds =
meshopt_computeMeshletBounds(&meshlet_vertices[meshlets[i].vertex_offset],
&meshlet_triangles[meshlets[i].triangle_offset],
meshlets[i].triangle_count,
&m_meshes[mesh_idx].vertices[0].vx,
m_meshes[mesh_idx].num_vertices,
sizeof(vertex));
m_meshlets[i].vertices = new vertex[MESHLET_VERTICES];
m_meshlets[i].indices = new uint8_t[MESHLET_INDICES];
m_meshlets[i].num_vertices = meshlets[i].vertex_count;
m_meshlets[i].num_indices = meshlets[i].triangle_count * 3;
memcpy(m_meshlets[i].center, bounds.center, sizeof(bounds.center));
m_meshlets[i].radius = bounds.radius;
memcpy(m_meshlets[i].cone_axis, bounds.cone_axis, sizeof(bounds.cone_axis));
m_meshlets[i].cone_cutoff = bounds.cone_cutoff;
memcpy(m_meshlets[i].cone_apex, bounds.cone_apex, sizeof(bounds.cone_apex));
for (unsigned int vert_idx = 0; vert_idx < meshlets[i].vertex_count; ++vert_idx) {
unsigned int vert = meshlet_vertices[meshlets[i].vertex_offset + vert_idx];
m_meshlets[i].vertices[vert_idx] = m_meshes[mesh_idx].vertices[vert];
}
memcpy(m_meshlets[i].indices,
meshlet_triangles + meshlets[i].triangle_offset,
meshlets[i].triangle_count * 3);
m_meshlets[i].num_children = 0u;
}
return RT_SUCCESS;
}
rt_result meshlet_generator::RunHierarchical(uint32_t mesh_idx, uint32_t *out_root) {
const float cone_weight = 0.25f;
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
size_t max_meshlets = meshopt_buildMeshletsBound(m_meshes[mesh_idx].num_indices,
MESHLET_VERTICES,
MESHLET_TRIANGLES);
meshopt_Meshlet *meshlets = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Meshlet, max_meshlets);
unsigned int *meshlet_vertices =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned int, max_meshlets *MESHLET_VERTICES);
unsigned char *meshlet_triangles =
RT_ARENA_PUSH_ARRAY(temp.arena, unsigned char, max_meshlets *MESHLET_INDICES);
meshopt_Bounds *meshlet_bounds = RT_ARENA_PUSH_ARRAY(temp.arena, meshopt_Bounds, max_meshlets);
uint32_t meshlet_count = (uint32_t)meshopt_buildMeshlets(meshlets,
meshlet_vertices,
meshlet_triangles,
m_meshes[mesh_idx].indices,
m_meshes[mesh_idx].num_indices,
&m_meshes[mesh_idx].vertices[0].vx,
m_meshes[mesh_idx].num_vertices,
sizeof(vertex),
MESHLET_VERTICES,
MESHLET_TRIANGLES,
cone_weight);
for (size_t i = 0; i < meshlet_count; ++i) {
meshopt_optimizeMeshlet(&meshlet_vertices[meshlets[i].vertex_offset],
&meshlet_triangles[meshlets[i].triangle_offset],
meshlets[i].triangle_count,
meshlets[i].vertex_count);
meshlet_bounds[i] =
meshopt_computeMeshletBounds(&meshlet_vertices[meshlets[i].vertex_offset],
&meshlet_triangles[meshlets[i].triangle_offset],
meshlets[i].triangle_count,
&m_meshes[mesh_idx].vertices[0].vx,
m_meshes[mesh_idx].num_vertices,
sizeof(vertex));
}
// We now have a flat list of meshlets -> the highest lod ones
// We now combine (up to 8) meshlets into one to generate the next hierarchy level
// Repeat until we only have 1 meshlet left
std::vector<uint32_t> unprocessed;
unprocessed.reserve(meshlet_count);
for (uint32_t i = 0; i < meshlet_count; ++i) {
unprocessed.push_back(i);
}
std::vector<uint32_t> next_level;
next_level.reserve((unprocessed.size() + 7) / 8);
while (!unprocessed.empty()) {
uint32_t first = unprocessed.back();
unprocessed.pop_back();
// Find the 7 closest center points
uint32_t closest[7];
float distances[7];
unsigned int closest_count = 0u;
float first_center[3];
memcpy(first_center, meshlet_bounds[first].center, sizeof(float) * 3);
for (uint32_t i = 0; i < unprocessed.size(); ++i) {
float center[3];
memcpy(center, meshlet_bounds[unprocessed[i]].center, sizeof(float) * 3);
float dist = sqrtf((center[0] - first_center[0]) * (center[0] - first_center[0]) +
(center[1] - first_center[1]) * (center[1] - first_center[1]) +
(center[2] - first_center[2]) * (center[2] - first_center[2]));
if (closest_count == RT_ARRAY_COUNT(closest)) {
// Check if we are closer than one of the other candidates
for (unsigned int j = 0; j < closest_count; ++j) {
uint32_t highest_idx = UINT_MAX;
float highest_dist = dist;
if (dist < distances[j]) {
if (distances[j] > highest_dist) {
highest_dist = distances[j];
highest_idx = j;
}
}
if (highest_idx < RT_ARRAY_COUNT(closest)) {
const uint32_t replaced = highest_idx;
distances[j] = dist;
closest[j] = i;
unprocessed.push_back(replaced);
}
}
} else {
closest[closest_count] = i;
distances[closest_count] = dist;
closest_count++;
unprocessed.erase(unprocessed.begin() + i);
}
}
// Combine into a new meshlet
// vertex *vertices = new vertex[MESHLET_VERTICES * 8];
}
return RT_SUCCESS;
}

View File

@ -0,0 +1,59 @@
#ifndef RT_EXP_MESHLET_GENERATOR_H
#define RT_EXP_MESHLET_GENERATOR_H
#include "runtime/runtime.h"
constexpr size_t MESHLET_VERTICES = 64;
constexpr size_t MESHLET_TRIANGLES = 124;
constexpr size_t MESHLET_INDICES = MESHLET_TRIANGLES * 3;
struct vertex {
float vx, vy, vz;
};
struct meshlet {
vertex *vertices;
uint8_t *indices;
uint32_t num_vertices;
uint32_t num_indices;
float center[3];
float radius;
// Normal cone for backface culling
float cone_apex[3];
float cone_axis[3];
float cone_cutoff;
// child indices
uint32_t children[8];
uint32_t num_children;
};
struct meshlet_generator {
struct mesh_data {
vertex *vertices;
uint32_t *indices;
uint32_t num_vertices;
uint32_t num_indices;
};
meshlet_generator();
~meshlet_generator();
void Release();
rt_result LoadObj(const char *path);
rt_result RunFlat(uint32_t mesh_idx = 0);
rt_result RunHierarchical(uint32_t mesh_idx, uint32_t *out_root);
mesh_data *m_meshes;
uint32_t m_num_meshes;
meshlet *m_meshlets;
uint32_t m_num_meshlets;
};
#endif

View File

@ -0,0 +1,389 @@
#include "meshlet_renderer.hpp"
#include <imgui.h>
#include <stdio.h>
#define GLM_ENABLE_EXPERIMENTAL
#include <glm/glm.hpp>
#include <glm/gtx/transform.hpp>
#include <glm/gtc/quaternion.hpp>
shader shader::CompileSource(const char *vert_src, const char *frag_src) {
shader sh = {0};
GLuint vert, frag;
GLint status;
vert = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vert, 1, &vert_src, NULL);
glCompileShader(vert);
glGetShaderiv(vert, GL_COMPILE_STATUS, &status);
if (status != GL_TRUE) {
char log[512];
glGetShaderInfoLog(vert, 512, NULL, log);
rtLog("MESHLETS", "VERT: %s", log);
return sh;
}
frag = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(frag, 1, &frag_src, NULL);
glCompileShader(frag);
glGetShaderiv(frag, GL_COMPILE_STATUS, &status);
if (status != GL_TRUE) {
char log[512];
glGetShaderInfoLog(frag, 512, NULL, log);
rtLog("MESHLETS", "FRAG: %s", log);
return sh;
}
sh.m_prog = glCreateProgram();
glAttachShader(sh.m_prog, vert);
glAttachShader(sh.m_prog, frag);
glLinkProgram(sh.m_prog);
glGetProgramiv(sh.m_prog, GL_LINK_STATUS, &status);
if (status != GL_TRUE) {
char log[512];
glGetProgramInfoLog(sh.m_prog, 512, NULL, log);
rtLog("MESHLETS", "PROG: %s", log);
sh.m_prog = 0;
return sh;
}
glDeleteShader(vert);
glDeleteShader(frag);
return sh;
}
shader shader::CompileSource(const char *compute_src) {
shader sh = {0};
GLuint comp;
GLint status;
comp = glCreateShader(GL_COMPUTE_SHADER);
glShaderSource(comp, 1, &compute_src, NULL);
glCompileShader(comp);
glGetShaderiv(comp, GL_COMPILE_STATUS, &status);
if (status != GL_TRUE) {
char log[512];
glGetShaderInfoLog(comp, 512, NULL, log);
rtLog("MESHLETS", "COMP: %s", log);
return sh;
}
sh.m_prog = glCreateProgram();
glAttachShader(sh.m_prog, comp);
glLinkProgram(sh.m_prog);
glGetProgramiv(sh.m_prog, GL_LINK_STATUS, &status);
if (status != GL_TRUE) {
char log[512];
glGetProgramInfoLog(sh.m_prog, 512, NULL, log);
rtLog("MESHLETS", "PROG: %s", log);
sh.m_prog = 0;
return sh;
}
glDeleteShader(comp);
return sh;
}
shader shader::CompileFile(const char *compute_path) {
shader sh = {0};
FILE *f = fopen(compute_path, "rb");
if (!f)
return sh;
fseek(f, 0, SEEK_END);
long fsz = ftell(f);
fseek(f, 0, SEEK_SET);
char *buf = new char[fsz + 1];
fread(buf, 1, fsz, f);
buf[fsz] = 0;
fclose(f);
sh = CompileSource(buf);
delete buf;
return sh;
}
struct gpu_mesh_data {
glm::mat4 model;
};
struct gpu_meshlet_data {
float bounds[4]; // xyz, radius
float cone_apex[4]; // xyz, unused
float cone_axis_cutoff[4];
uint32_t mesh_index;
uint32_t first_index;
uint32_t base_vertex;
uint32_t index_count;
};
struct draw_elements_indirect_command {
uint32_t count;
uint32_t instanceCount;
uint32_t firstIndex;
int32_t baseVertex;
uint32_t baseInstance;
};
struct cull_output {
uint32_t draw_count;
};
static const char *_single_vert = "#version 460\n"
"layout (location = 0) in vec3 vertpos;\n"
"uniform mat4 mvp;\n"
"void main() {\n"
" gl_Position = mvp * vec4(vertpos, 1.0);\n"
"}\n";
static const char *_single_frag = "#version 460\n"
"out vec3 fragcol;\n"
"uniform vec3 meshletcol;\n"
"void main(){\n"
" fragcol = meshletcol;\n"
"}\n";
static const char *_flat_cull = "src/experimental/meshlets/flat_cull.glsl";
static const char *_meshlet_vert = "#version 460\n"
"layout (location = 0) in vec3 vertpos;\n"
"uniform mat4 mvp;\n"
"out vec3 meshletcol;\n"
"vec3 colors[5] = vec3[5](\n"
" vec3(129.f / 255.f, 132.f / 255.f, 121.f / 255.f),\n"
" vec3(181.f / 255.f, 203.f / 255.f, 183.f / 255.f),\n"
" vec3(210.f / 255.f, 228.f / 255.f, 196.f / 255.f),\n"
" vec3(228.f / 255.f, 233.f / 255.f, 178.f / 255.f),\n"
" vec3(231.f / 255.f, 224.f / 255.f, 139 / 255.f));\n"
"void main() {\n"
" gl_Position = mvp * vec4(vertpos, 1.0);\n"
" meshletcol = colors[gl_DrawID % 5];\n"
"}\n";
static const char *_meshlet_frag = "#version 460\n"
"out vec3 fragcol;\n"
"in vec3 meshletcol;\n"
"void main(){\n"
" fragcol = meshletcol;\n"
"}\n";
rt_result meshlet_renderer::Initialize() {
m_settings.eye[0] = 0.f;
m_settings.eye[1] = 0.5f;
m_settings.eye[2] = 0.5f;
m_settings.fov = glm::radians(45.f);
memset(m_settings.target, 0, sizeof(m_settings.target));
m_single_meshlet_shader = shader::CompileSource(_single_vert, _single_frag);
m_flat_cull_shader = shader::CompileFile(_flat_cull);
m_meshlet_shader = shader::CompileSource(_meshlet_vert, _meshlet_frag);
return RT_SUCCESS;
}
void meshlet_renderer::RenderFlat(const meshlet *meshlets, unsigned int count) {
if (m_settings.separate_rendering) {
SeparateRendering(meshlets, count);
} else {
DrawIndirectFlat(meshlets, count);
}
}
void meshlet_renderer::DrawIndirectFlat(const meshlet *meshlets, unsigned int count) {
// Store all meshlets in one vertex- and index buffer
// Store meshlet metadata in one ssbo
// Do compute "culling" (generate drawindirect) into 1 ssbo
// DrawIndirect
GLuint vbo, ebo, meshlet_ssbo, draw_ssbo, cull_ssbo, mesh_ssbo;
GLuint buffers[6];
glCreateBuffers(RT_ARRAY_COUNT(buffers), buffers);
vbo = buffers[0];
ebo = buffers[1];
meshlet_ssbo = buffers[2];
draw_ssbo = buffers[3];
cull_ssbo = buffers[4];
mesh_ssbo = buffers[5];
// Store vertices and indices at idx * MESHLET_VERTICES/MESHLET_INDICES
glNamedBufferStorage(vbo,
count * sizeof(vertex) * MESHLET_VERTICES,
nullptr,
GL_DYNAMIC_STORAGE_BIT);
glNamedBufferStorage(ebo, count * MESHLET_INDICES, nullptr, GL_DYNAMIC_STORAGE_BIT);
for (unsigned int i = 0; i < count; ++i) {
glNamedBufferSubData(vbo,
i * sizeof(vertex) * MESHLET_VERTICES,
meshlets[i].num_vertices * sizeof(vertex),
meshlets[i].vertices);
glNamedBufferSubData(ebo,
i * MESHLET_INDICES,
meshlets[i].num_indices,
meshlets[i].indices);
}
// Store meshlet information
glNamedBufferStorage(meshlet_ssbo,
count * sizeof(gpu_meshlet_data),
nullptr,
GL_DYNAMIC_STORAGE_BIT);
for (unsigned int i = 0; i < count; ++i) {
gpu_meshlet_data meshlet;
// Only have one right now
meshlet.mesh_index = 0;
meshlet.first_index = i * MESHLET_INDICES;
meshlet.base_vertex = i * MESHLET_VERTICES;
meshlet.index_count = meshlets[i].num_indices;
memcpy(meshlet.bounds, meshlets[i].center, 3 * sizeof(float));
meshlet.bounds[3] = meshlets[i].radius;
memcpy(meshlet.cone_apex, meshlets[i].cone_apex, 3 * sizeof(float));
meshlet.cone_apex[3] = 0.f;
memcpy(meshlet.cone_axis_cutoff, meshlets[i].cone_axis, sizeof(meshlets[i].cone_axis));
glNamedBufferSubData(meshlet_ssbo,
i * sizeof(gpu_meshlet_data),
sizeof(gpu_meshlet_data),
&meshlet);
}
// Reserve space for the draw commands
glNamedBufferStorage(draw_ssbo, count * sizeof(draw_elements_indirect_command), nullptr, 0);
// Prepare culling output
cull_output cull_output = {0};
glNamedBufferStorage(cull_ssbo, sizeof(cull_output), &cull_output, GL_MAP_READ_BIT);
// Prepare mesh data
gpu_mesh_data mesh_data;
mesh_data.model = glm ::rotate(m_settings.rotation, glm::vec3(0, 1, 0)) *
glm::scale(glm::vec3(m_settings.scale));
glNamedBufferStorage(mesh_ssbo, sizeof(mesh_data), &mesh_data, GL_DYNAMIC_STORAGE_BIT);
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
// Do culling. TODO: Get number of draws back
m_flat_cull_shader.Use();
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, meshlet_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, draw_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, cull_ssbo);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, mesh_ssbo);
glUniform3fv(0, 1, m_settings.eye);
glDispatchCompute(count, 1, 1);
// Create the vao
GLuint vao;
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr);
glEnableVertexAttribArray(0);
// DrawIndirect
GLuint mvploc = glGetUniformLocation(m_meshlet_shader.m_prog, "mvp");
glm::mat4 model = glm::rotate(m_settings.rotation, glm::vec3(0, 1, 0)) *
glm::scale(glm::vec3(m_settings.scale));
glm::mat4 view =
glm::lookAt(glm::vec3(m_settings.eye[0], m_settings.eye[1], m_settings.eye[2]),
glm::vec3(m_settings.target[0], m_settings.target[1], m_settings.target[2]),
glm::vec3(0, 1, 0));
glm::mat4 proj = glm::perspective(m_settings.fov, m_aspect, 0.01f, 100.f);
glm::mat4 mvp = proj * view * model;
glDisable(GL_CULL_FACE);
glCullFace(GL_BACK);
glEnable(GL_DEPTH_TEST);
m_meshlet_shader.Use();
glUniformMatrix4fv(mvploc, 1, GL_FALSE, &mvp[0][0]);
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, draw_ssbo);
glMemoryBarrier(GL_COMMAND_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT);
{
void *out = glMapNamedBuffer(cull_ssbo, GL_READ_ONLY);
memcpy(&cull_output, out, sizeof(cull_output));
glUnmapNamedBuffer(cull_ssbo);
}
glMultiDrawElementsIndirect(GL_TRIANGLES,
GL_UNSIGNED_BYTE,
nullptr,
cull_output.draw_count,
sizeof(draw_elements_indirect_command));
glDeleteBuffers(RT_ARRAY_COUNT(buffers), buffers);
}
void meshlet_renderer::SeparateRendering(const meshlet *meshlets, unsigned int count) {
GLuint vao, vbo, ebo;
glGenBuffers(1, &vbo);
glGenBuffers(1, &ebo);
glGenVertexArrays(1, &vao);
glBindVertexArray(vao);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertex) * MESHLET_VERTICES, nullptr, GL_STREAM_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, MESHLET_INDICES, nullptr, GL_STREAM_DRAW);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, sizeof(vertex), nullptr);
glEnableVertexAttribArray(0);
m_single_meshlet_shader.Use();
float colors[5][3] = {
{129.f / 255.f, 132.f / 255.f, 121.f / 255.f},
{181.f / 255.f, 203.f / 255.f, 183.f / 255.f},
{210.f / 255.f, 228.f / 255.f, 196.f / 255.f},
{228.f / 255.f, 233.f / 255.f, 178.f / 255.f},
{231.f / 255.f, 224.f / 255.f, 139 / 255.f},
};
GLuint colorloc = glGetUniformLocation(m_single_meshlet_shader.m_prog, "meshletcol");
GLuint mvploc = glGetUniformLocation(m_single_meshlet_shader.m_prog, "mvp");
glm::mat4 model = glm::rotate(m_settings.rotation, glm::vec3(0, 1, 0)) *
glm::scale(glm::vec3(m_settings.scale));
glm::mat4 view =
glm::lookAt(glm::vec3(m_settings.eye[0], m_settings.eye[1], m_settings.eye[2]),
glm::vec3(m_settings.target[0], m_settings.target[1], m_settings.target[2]),
glm::vec3(0, 1, 0));
glm::mat4 proj = glm::perspective(m_settings.fov, m_aspect, 0.01f, 100.f);
glm::mat4 mvp = proj * view * model;
glUniformMatrix4fv(mvploc, 1, GL_FALSE, &mvp[0][0]);
glDisable(GL_CULL_FACE);
glCullFace(GL_BACK);
glEnable(GL_DEPTH_TEST);
for (unsigned int i = 0; i < count; ++i) {
glBufferSubData(GL_ARRAY_BUFFER,
0,
sizeof(vertex) * meshlets[i].num_vertices,
meshlets[i].vertices);
glBufferSubData(GL_ELEMENT_ARRAY_BUFFER,
0,
sizeof(uint8_t) * meshlets[i].num_indices,
meshlets[i].indices);
glUniform3fv(colorloc, 1, colors[i % 5]);
glDrawElements(GL_TRIANGLES, meshlets[i].num_indices, GL_UNSIGNED_BYTE, nullptr);
}
glDeleteVertexArrays(1, &vao);
glDeleteBuffers(1, &vbo);
glDeleteBuffers(1, &ebo);
}
void meshlet_renderer::SettingMenu() {
settings *sett = &m_settings;
if (ImGui::Begin("Settings")) {
ImGui::Checkbox("Separate Rendering", &sett->separate_rendering);
ImGui::SeparatorText("Model");
ImGui::SliderFloat("Scale", &sett->scale, 1.f, 100.f);
ImGui::SliderAngle("Rotation", &sett->rotation, 0.f, 360.f);
ImGui::SeparatorText("Camera");
ImGui::SliderAngle("Fov", &sett->fov, 30.f, 180.f);
ImGui::InputFloat3("Eye", sett->eye);
ImGui::InputFloat3("Target", sett->target);
ImGui::End();
}
}

View File

@ -0,0 +1,55 @@
#ifndef RT_EXP_MESHLET_RENDERER_H
#define RT_EXP_MESHLET_RENDERER_H
#include "runtime/runtime.h"
#include "meshlet_generator.hpp"
#include <glad/glad.h>
struct shader {
static shader CompileSource(const char *vert_src, const char *frag_src);
static shader CompileSource(const char *compute_src);
static shader CompileFile(const char *compute_path);
RT_INLINE bool IsValid() const {
return m_prog != 0;
}
RT_INLINE void Use() const {
glUseProgram(m_prog);
}
GLuint m_prog;
};
struct meshlet_renderer {
struct settings {
bool separate_rendering = true;
float scale = 1.f;
float rotation = 0.f;
float eye[3];
float target[3];
float fov;
};
rt_result Initialize();
void RenderFlat(const meshlet *meshlets, unsigned int count);
void SettingMenu();
private:
void SeparateRendering(const meshlet *meshlets, unsigned int count);
void DrawIndirectFlat(const meshlet *meshlets, unsigned int count);
public:
settings m_settings;
shader m_single_meshlet_shader;
shader m_flat_cull_shader;
shader m_meshlet_shader;
float m_aspect;
};
#endif

View File

@ -0,0 +1,18 @@
glfw_proj = subproject('glfw', default_options: ['default_library=static', 'b_sanitize=none'])
glfw_dep = glfw_proj.get_variable('glfw_dep')
imgui_proj = subproject('imgui')
imgui_dep = imgui_proj.get_variable('imgui_dep')
glm_proj = subproject('glm')
glm_dep = glm_proj.get_variable('glm_dep')
executable('meshlet_experiment',
'main.cpp',
'meshlet_generator.hpp',
'meshlet_generator.cpp',
'meshlet_renderer.hpp',
'meshlet_renderer.cpp',
contrib_dir / 'glad/glad.c',
extra_files: ['flat_cull.glsl', 'hierarchical_cull.glsl'],
include_directories: [engine_incdir, contrib_incdir],
dependencies: [m_dep, meshoptimizer_dep, glfw_dep, imgui_dep, glm_dep],
link_with: runtime_lib)

View File

@ -0,0 +1 @@
subdir('meshlets')

View File

@ -2,4 +2,7 @@ subdir('runtime')
subdir('asset_compiler')
subdir('app_framework')
subdir('renderer/common')
subdir('renderer/dx11')
subdir('experimental')

View File

@ -0,0 +1,82 @@
#include "runtime/config.h"
#include "runtime/runtime.h"
#include "render_mesh.h"
#include <stdlib.h>
RT_CVAR_SZ(rt_MeshletVertexBufferPoolSize,
"Amount of memory to allocate for meshlet vertex data in megabytes. Default: 512",
512);
RT_CVAR_SZ(rt_MeshletPrimitiveIndexBufferPoolSize,
"Amount of memory to allocate for meshlet primitive index data in megabytes. Default: 512",
512);
/* We manage the pools in chunks with RT_MESHLET_MAX_VERTICES/RT_MESHLET_MAX_INDICES size.
* A free chunk contains a pointer to the next free chunk.
*
*
* This is _not_ the management of GPU side buffers!
*/
typedef struct rt_free_chunk {
struct rt_free_chunk *next;
} rt_free_chunk;
typedef struct {
rt_free_chunk *first_free;
void *base;
size_t num_chunks;
size_t chunk_size;
/* For statistics */
size_t used;
} rt_pool;
static rt_pool _vertex_pool;
static rt_pool _index_pool;
static rt_pool CreatePool(size_t total_size, size_t chunk_size) {
RT_ASSERT(chunk_size >= sizeof(rt_free_chunk), "Invalid chunk size");
rt_pool pool;
pool.base = malloc(total_size);
if (!pool.base) {
rtLog("RENCOM", "Meshlet pool allocation failed.");
return pool;
}
pool.num_chunks = total_size / chunk_size;
pool.chunk_size = chunk_size;
pool.used = 0;
pool.first_free = pool.base;
for (size_t i = 0; i < pool.num_chunks; ++i) {
char *next = (i < pool.num_chunks - 1) ? (char *)pool.base + (i + 1) * chunk_size : NULL;
char *at = (char *)pool.base + i * chunk_size;
rt_free_chunk *chunk = (rt_free_chunk *)at;
chunk->next = (rt_free_chunk *)next;
}
return pool;
}
rt_result InitMeshletPools(void) {
rtRegisterCVAR(&rt_MeshletVertexBufferPoolSize);
rtRegisterCVAR(&rt_MeshletPrimitiveIndexBufferPoolSize);
_vertex_pool = CreatePool(RT_MB(rt_MeshletVertexBufferPoolSize.sz),
sizeof(rt_vertex) * RT_MESHLET_MAX_VERTICES);
if (!_vertex_pool.base)
return RT_OUT_OF_MEMORY;
_index_pool =
CreatePool(RT_MB(rt_MeshletPrimitiveIndexBufferPoolSize.sz), RT_MESHLET_MAX_INDICES);
if (!_index_pool.base) {
free(_vertex_pool.base);
_vertex_pool.base = NULL;
return RT_OUT_OF_MEMORY;
}
return RT_SUCCESS;
}
void ShutdownMeshletPools(void) {
}

View File

@ -0,0 +1,10 @@
common_renderer_lib = static_library('common_renderer',
'../common/renderer_api.h',
'../common/render_mesh.h',
'../common/meshlet_pools.c',
'../common/render_mesh.c',
dependencies: [m_dep, thread_dep],
link_with: runtime_lib,
include_directories: [engine_incdir, contrib_incdir],
install: false)

View File

@ -0,0 +1,394 @@
#include "render_mesh.h"
#include "runtime/config.h"
#include "runtime/ds.h"
#include "runtime/mem_arena.h"
#include "runtime/threading.h"
#include <stdlib.h>
#include <string.h>
RT_CVAR_SZ(rt_MeshPoolSlots, "Number of slots available for meshes. Default: 1024", 1024);
RT_CVAR_SZ(rt_MeshletPoolSlots, "Number of slots available for meshlets. Default: 4096", 4096);
RT_CVAR_SZ(rt_MeshletStreamQueueSize, "Number of slots in the streaming queue. Default: 256", 256);
RT_CVAR_F(rt_MeshletStreamTimeslice,
"Timeslice available for streaming meshlets. Default: 4ms",
0.04f);
RT_CVAR_I(rt_MeshletDirectReleaseCount,
"Number of meshlets that will get released immediately. Default: 1",
1);
typedef struct rt_mesh_container {
_Alignas(4) uint32_t refcount;
struct rt_mesh_container *next_free;
rt_render_mesh mesh;
} rt_mesh_container;
typedef struct rt_meshlet_container {
struct rt_meshlet_container *next_free;
rt_render_meshlet meshlet;
} rt_meshlet_container;
typedef struct {
rt_meshlet_container *meshlet;
} rt_meshlet_release_queue_entry;
typedef struct {
rt_resource_id meshlet_resource;
rt_meshlet_container *destination;
} rt_meshlet_stream_queue_entry;
static unsigned int _current_frame_id = 0;
static rt_mesh_container *_mesh_container;
static rt_mesh_container *_first_free_mesh;
static rt_meshlet_container *_meshlet_container;
static rt_meshlet_container *_first_free_meshlet;
static rt_minheap _reclaim_heap;
static void *_reclaim_heap_memory;
static rt_hashtable _mesh_lut;
static void *_mesh_lut_memory;
static rt_mutex *_mesh_lock;
static rt_queue _stream_queue;
static void *_stream_queue_memory;
static rt_queue _release_queue;
static void *_release_queue_memory;
static rt_mutex *_meshlet_lock;
extern rt_result InitMeshletPools(void);
extern void ShutdownMeshletPools(void);
rt_result rtInitMeshStreaming(void) {
rtRegisterCVAR(&rt_MeshPoolSlots);
rtRegisterCVAR(&rt_MeshletPoolSlots);
rtRegisterCVAR(&rt_MeshletStreamQueueSize);
rtRegisterCVAR(&rt_MeshletStreamTimeslice);
rtRegisterCVAR(&rt_MeshletDirectReleaseCount);
if (!RT_IS_POWER_OF_TWO(rt_MeshPoolSlots.sz)) {
rtReportError("RENCOM", "rt_MeshPoolSlots must be a power of two.");
return RT_INVALID_VALUE;
}
if (rt_MeshletDirectReleaseCount.i == 0) {
rtLog("RENCOM", "rt_MeshletDirectReleaseCount must at least be one.");
rt_MeshletDirectReleaseCount.i = 1;
}
_mesh_container = calloc(rt_MeshPoolSlots.sz, sizeof(rt_mesh_container));
if (!_mesh_container)
return RT_OUT_OF_MEMORY;
for (size_t i = 0; i < rt_MeshPoolSlots.sz - 1; ++i) {
_mesh_container[i].next_free = &_mesh_container[i + 1];
}
_first_free_mesh = _mesh_container;
_reclaim_heap_memory = malloc((sizeof(int) + sizeof(size_t)) * rt_MeshPoolSlots.sz);
if (!_reclaim_heap_memory) {
free(_mesh_container);
return RT_OUT_OF_MEMORY;
}
_reclaim_heap = rtCreateMinheap((int *)_reclaim_heap_memory,
(int *)+rt_MeshPoolSlots.sz,
sizeof(size_t),
rt_MeshPoolSlots.sz,
0);
_mesh_lut_memory = malloc(RT_HASH_TABLE_MEMORY_REQUIRED(2 * rt_MeshPoolSlots.sz));
if (!_mesh_lut_memory) {
free(_reclaim_heap_memory);
free(_mesh_container);
return RT_OUT_OF_MEMORY;
}
_mesh_lut = rtCreateHashtable(2 * rt_MeshPoolSlots.sz, _mesh_lut_memory, NULL, NULL);
_meshlet_container = calloc(rt_MeshletPoolSlots.sz, sizeof(rt_meshlet_container));
if (!_meshlet_container) {
free(_reclaim_heap_memory);
free(_mesh_container);
return RT_OUT_OF_MEMORY;
}
for (size_t i = 0; i < rt_MeshPoolSlots.sz - 1; ++i) {
_meshlet_container[i].next_free = &_meshlet_container[i + 1];
}
_first_free_meshlet = _meshlet_container;
_stream_queue_memory =
calloc(rt_MeshletStreamQueueSize.sz, sizeof(rt_meshlet_stream_queue_entry));
if (!_stream_queue_memory) {
free(_reclaim_heap_memory);
free(_meshlet_container);
free(_mesh_container);
return RT_OUT_OF_MEMORY;
}
_stream_queue = rtCreateQueue(rt_MeshletStreamQueueSize.sz,
_stream_queue_memory,
sizeof(rt_meshlet_stream_queue_entry));
_release_queue_memory =
calloc(rt_MeshletStreamQueueSize.sz, sizeof(rt_meshlet_release_queue_entry));
if (!_release_queue_memory) {
free(_meshlet_container);
free(_mesh_container);
free(_stream_queue_memory);
return RT_OUT_OF_MEMORY;
}
_release_queue = rtCreateQueue(rt_MeshletStreamQueueSize.sz,
_release_queue_memory,
sizeof(rt_meshlet_release_queue_entry));
_mesh_lock = rtCreateMutex();
_meshlet_lock = rtCreateMutex();
rt_result res = InitMeshletPools();
if (res != RT_SUCCESS) {
rtShutdownMeshStreaming();
return res;
}
return RT_SUCCESS;
}
void rtShutdownMeshStreaming(void) {
free(_mesh_container);
free(_meshlet_container);
free(_stream_queue_memory);
free(_release_queue_memory);
free(_reclaim_heap_memory);
free(_mesh_lut_memory);
ShutdownMeshletPools();
}
static rt_meshlet_container *AllocMeshletContainer(void) {
rtLockMutex(_meshlet_lock);
rt_meshlet_container *container = _first_free_meshlet;
if (container) {
container->meshlet.state = RT_RENDER_MESHLET_STATE_LOADING;
_first_free_meshlet = container->next_free;
}
rtUnlockMutex(_meshlet_lock);
return container;
}
static rt_result LoadMeshlet(rt_resource_id meshlet_id, rt_meshlet_container *destination) {
size_t resource_size = rtGetResourceSize(meshlet_id);
if (!resource_size)
return RT_INVALID_VALUE;
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena)
return RT_OUT_OF_MEMORY;
void *resource_buffer = rtArenaPush(temp.arena, resource_size);
if (!resource_buffer) {
rtLog("RENCOM",
"Unable to allocate %zu bytes for loading meshlet %llx",
resource_size,
meshlet_id);
rtReturnTemporaryArena(temp);
return RT_OUT_OF_MEMORY;
}
rt_result res = rtGetResource(meshlet_id, resource_buffer);
if (res != RT_SUCCESS) {
rtLog("RENCOM", "Failed to get meshlet resource %llx", meshlet_id);
}
rt_resource *meshlet_resource = resource_buffer;
if (!RT_VERIFY(meshlet_resource->type == RT_RESOURCE_MESHLET)) {
rtLog("RENCOM", "Resource %llx is not a meshlet.", meshlet_id);
res = RT_INVALID_VALUE;
goto out;
}
out:
rtReturnTemporaryArena(temp);
return res;
}
static void ReleaseMeshlet(rt_meshlet_container *container) {
/* TODO: Release vertex and index buffers */
container->meshlet.state = RT_RENDER_MESHLET_STATE_INVALID;
rtLockMutex(_meshlet_lock);
container->next_free = _first_free_meshlet;
_first_free_meshlet = container;
rtUnlockMutex(_meshlet_lock);
}
static int RecursiveReleaseMeshlets(rt_render_meshlet *meshlet, int release_count) {
for (unsigned int i = 0; i < meshlet->static_data.child_count; ++i) {
release_count = RecursiveReleaseMeshlets(meshlet->children[i], release_count);
}
rt_meshlet_container *container =
(rt_meshlet_container *)((char *)meshlet - offsetof(rt_meshlet_container, meshlet));
rtLockMutex(_meshlet_lock);
if (release_count >= rt_MeshletDirectReleaseCount.i && !rtQueueIsFull(&_release_queue)) {
/* Enqueue for later */
rt_meshlet_release_queue_entry entry = {.meshlet = container};
rtQueuePush(&_release_queue, &entry);
rtUnlockMutex(_meshlet_lock);
return release_count;
} else {
rtUnlockMutex(_meshlet_lock);
ReleaseMeshlet(container);
return release_count + 1;
}
}
rt_render_mesh *rtGetRenderMesh(rt_resource_id mesh_id) {
size_t mesh_resource_size = rtGetResourceSize(mesh_id);
if (!mesh_resource_size) {
rtLog("RENCOM", "Tried to load invalid mesh %llx");
return NULL;
}
rtLockMutex(_mesh_lock);
/* Check if this mesh is already loaded */
size_t mesh_index = 0;
if ((mesh_index = rtHashtableLookup(&_mesh_lut, mesh_id, SIZE_MAX)) != SIZE_MAX) {
rt_mesh_container *container = &_mesh_container[mesh_id];
container->refcount++;
rtUnlockMutex(_mesh_lock);
return &container->mesh;
}
if (!_first_free_mesh) {
/* Try to reclaim used space */
if (rtMinheapIsEmpty(&_reclaim_heap)) {
rtLog("RENCOM", "Tried to load new mesh %llx, but no space is available.", mesh_id);
rtUnlockMutex(_mesh_lock);
return NULL;
}
size_t reuse_idx;
rtMinheapPop(&_reclaim_heap, &reuse_idx);
RT_ASSERT(_mesh_container[reuse_idx].refcount == 0,
"In-use mesh container found in reclaim heap.");
_mesh_container[reuse_idx].next_free = NULL;
_first_free_mesh = &_mesh_container[reuse_idx];
/* Free all associated meshlets */
RT_VERIFY(_mesh_container[reuse_idx].mesh.root);
int num_released = RecursiveReleaseMeshlets(_mesh_container[reuse_idx].mesh.root, 0);
RT_ASSERT(num_released >= 1, "At least one meshlet should have been released.");
}
rt_mesh_container *container = _first_free_mesh;
_first_free_mesh = container->next_free;
mesh_index = container - _mesh_container;
rtHashtableInsert(&_mesh_lut, mesh_id, mesh_index);
rtUnlockMutex(_mesh_lock);
container->refcount = 1;
container->next_free = NULL;
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena)
return NULL;
void *resource_buffer = rtArenaPush(temp.arena, mesh_resource_size);
if (!resource_buffer) {
rtLog("RENCOM",
"Tried to load new mesh %llx, but no memory for storing the mesh resource is "
"available.",
mesh_id);
rtLockMutex(_mesh_lock);
container->refcount = 0;
container->next_free = _first_free_mesh;
_first_free_mesh = container;
rtHashtableRemove(&_mesh_lut, mesh_id);
rtUnlockMutex(_mesh_lock);
return NULL;
}
if (rtGetResource(mesh_id, resource_buffer) != RT_SUCCESS) {
rtLog("RENCOM", "Failed to load mesh %llx", mesh_id);
rtUnlockMutex(_mesh_lock);
return NULL;
}
rt_resource *resource = resource_buffer;
RT_VERIFY(resource->type == RT_RESOURCE_MESH);
memcpy(&container->mesh.static_data, resource->data, sizeof(rt_render_mesh_static_data));
rtReturnTemporaryArena(temp);
resource = NULL;
resource_buffer = NULL;
/* Enqueue the root load */
rt_meshlet_stream_queue_entry root_load = {0};
root_load.destination = AllocMeshletContainer();
root_load.meshlet_resource = container->mesh.static_data.root_resource;
if (!root_load.destination) {
/* Oh no, we can't load this */
rtLog(
"RENCOM",
"Tried to load new mesh %llx, but no space for loading the root meshlet is available.",
mesh_id);
rtLockMutex(_mesh_lock);
container->refcount = 0;
container->next_free = _first_free_mesh;
_first_free_mesh = container;
rtHashtableRemove(&_mesh_lut, mesh_id);
rtUnlockMutex(_mesh_lock);
return NULL;
}
container->mesh.root = &root_load.destination->meshlet;
rtQueuePush(&_stream_queue, &root_load);
return &container->mesh;
}
void rtReleaseRenderMesh(rt_resource_id mesh_id) {
rtLockMutex(_mesh_lock);
size_t mesh_index;
if ((mesh_index = rtHashtableLookup(&_mesh_lut, mesh_id, SIZE_MAX)) == SIZE_MAX) {
rtUnlockMutex(_mesh_lock);
return;
}
rt_mesh_container *container = &_mesh_container[mesh_index];
if ((--container->refcount) == 0) {
RT_VERIFY(container->mesh.root);
RecursiveReleaseMeshlets(container->mesh.root, 0);
container->next_free = _first_free_mesh;
_first_free_mesh = container;
rtMinheapPush(&_reclaim_heap, (int)_current_frame_id, &mesh_index);
}
rtUnlockMutex(_mesh_lock);
}
void rtMeshStreamingNewFrame(unsigned int frame_id) {
_current_frame_id = frame_id;
float time = 0.f;
while (time < rt_MeshletStreamTimeslice.f) {
rt_meshlet_release_queue_entry release = {NULL};
rt_meshlet_stream_queue_entry stream = {0};
bool have_release = false, have_stream = false;
rtLockMutex(_meshlet_lock);
if (!rtQueueIsEmpty(&_release_queue)) {
rtQueuePop(&_release_queue, &release);
have_release = true;
}
if (!rtQueueIsEmpty(&_stream_queue)) {
rtQueuePop(&_stream_queue, &stream);
have_stream = true;
}
rtUnlockMutex(_meshlet_lock);
if (!have_stream && !have_release)
break;
if (have_release) {
ReleaseMeshlet(release.meshlet);
}
if (have_stream) {
if (LoadMeshlet(stream.meshlet_resource, stream.destination) != RT_SUCCESS) {
stream.destination->meshlet.state = RT_RENDER_MESHLET_STATE_LOAD_FAILED;
}
}
}
}

View File

@ -0,0 +1,120 @@
#ifndef RT_RENCOM_RENDER_MESH_H
#define RT_RENCOM_RENDER_MESH_H
/* Meshes
* A mesh is a tree of small meshlets: Chunks of up to 64 vertices/126 triangles.
* Each tree node contains the meshlet data and (optionally) children.
* A node is a simplified version of all its children.
*
* A meshlet contains a primitive index buffer that references the meshes
*/
#include <stdint.h>
#include "runtime/resources.h"
#ifdef __cplusplus
extern "C" {
#endif
#define RT_MAX_MESHLET_CHILDREN 8
/* These numbers match the ones for nvidias turing mesh-shaders
* See: https://developer.nvidia.com/blog/introduction-turing-mesh-shaders/ */
#define RT_MESHLET_MAX_VERTICES 64
#define RT_MESHLET_MAX_PRIMITIVES 126
#define RT_MESHLET_MAX_INDICES (RT_MESHLET_MAX_PRIMITIVES * 3)
typedef struct {
uint16_t position[3];
uint16_t normal[3];
uint16_t tangent[3];
uint16_t texcoord[2];
} rt_vertex;
/* Meshlet data as passed to the GPU */
typedef struct {
/* Offset into the shared index buffer */
uint32_t base_index : 21;
uint32_t triangle_count : 8;
uint32_t child_count : 3;
/* Bounding sphere for culling */
float center[3];
float radius;
/* Indices of the child nodes in the meshlet buffer */
uint32_t children[8];
} rt_gpu_meshlet;
/* Static data stored in the meshlet resource */
typedef struct {
/* Bounding sphere for culling */
float center[3];
float radius;
/* Number of triangles = index_count / 3 */
uint32_t triangle_count;
uint32_t vertex_count;
uint32_t child_count;
rt_resource_id child_resources[RT_MAX_MESHLET_CHILDREN];
} rt_render_meshlet_static_data;
typedef enum {
RT_RENDER_MESHLET_STATE_INVALID,
RT_RENDER_MESHLET_STATE_LOADING,
RT_RENDER_MESHLET_STATE_LOADED,
RT_RENDER_MESHLET_STATE_LOAD_FAILED,
} rt_render_meshlet_state;
typedef struct rt_render_meshlet {
rt_render_meshlet_static_data static_data;
/* Non-null for loaded children */
struct rt_render_meshlet *children[RT_MAX_MESHLET_CHILDREN];
/* Meshlet index buffer. */
uint32_t *indices;
/* Meshlet vertex buffer. */
rt_vertex *vertices;
rt_render_meshlet_state state;
} rt_render_meshlet;
/* Static data stored in the mesh resource */
typedef struct {
uint32_t meshlet_count;
uint32_t total_index_count;
uint32_t total_vertex_count;
rt_resource_id root_resource;
} rt_render_mesh_static_data;
typedef struct {
rt_render_mesh_static_data static_data;
rt_render_meshlet *root;
} rt_render_mesh;
rt_result rtInitMeshStreaming(void);
void rtShutdownMeshStreaming(void);
void rtMeshStreamingNewFrame(unsigned int frame_id);
/* Loads a mesh from a resource file.
* Allocates storage for the mesh data and loads the root meshlet,
* to make a (much) simplified version available for rendering immediately.
*
* If the mesh is already loaded, a shared reference count is incremented instead.
*/
rt_render_mesh *rtGetRenderMesh(rt_resource_id mesh_id);
/* Decrement a meshes reference counter */
void rtReleaseRenderMesh(rt_resource_id mesh_id);
void rtMeshStreamingNewFrame(unsigned int frame_id);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -21,6 +21,10 @@ typedef struct {
struct _XDisplay *display;
unsigned long window;
#endif
unsigned int width;
unsigned int height;
int is_fullscreen;
} rt_renderer_init_info;
typedef rt_result rt_renderer_init_fn(const rt_renderer_init_info *info);

View File

@ -0,0 +1,196 @@
#include "device.hpp"
#include "runtime/config.h"
static RT_CVAR_S(rt_Dx11AdapterName, "Name of the desired adapter (GPU). Default: Empty.", "");
rt_dx11_device::rt_dx11_device() : m_is_initialized(false) {
}
rt_dx11_device::~rt_dx11_device() {
}
rt_result rt_dx11_device::Initialize(const rt_renderer_init_info *info) {
// Create the necessary objects (Device, SwapChain, Immediate Context)
HRESULT hr = S_OK;
ComPtr<IDXGIFactory1> factory;
if (FAILED(hr = CreateDXGIFactory1(IID_PPV_ARGS(&factory)))) {
rtReportError("DX11", "CreateDXGIFactory1 failed with %d", hr);
return RT_UNKNOWN_ERROR;
}
if (FAILED(hr = factory.As(&m_dxgi_factory))) {
rtReportError("DX11", "Failed to retrieve IDXGIFactory4 with %d", hr);
return RT_UNKNOWN_ERROR;
}
// Retrieve the selected adapter
{
ComPtr<IDXGIAdapter1> adapter = nullptr;
WCHAR w_name[128];
memset(w_name, 0, sizeof(w_name));
if (rtUTF8ToWStr(rt_Dx11AdapterName.s, w_name, RT_ARRAY_COUNT(w_name)) != RT_SUCCESS) {
rtLog("DX11",
"The provided adapter name does not fit into the statically sized array.");
if (FAILED(m_dxgi_factory->EnumAdapters1(0, &adapter))) {
rtReportError("DX11", "Failed to retrieve the default adapter.");
return RT_UNKNOWN_ERROR;
}
if (FAILED(hr = adapter.As(&m_adapter))) {
rtReportError("DX11", "Failed to retrieve IDXGIAdapter3 with %d", hr);
return RT_UNKNOWN_ERROR;
}
}
UINT i = 0;
while (m_dxgi_factory->EnumAdapters1(i, &adapter) == S_OK) {
DXGI_ADAPTER_DESC1 desc;
if (FAILED(adapter->GetDesc1(&desc))) {
++i;
adapter->Release();
continue;
}
if (memcmp(desc.Description, w_name, sizeof(w_name)) == 0) {
if (FAILED(hr = adapter.As(&m_adapter))) {
rtReportError("DX11", "Failed to retrieve IDXGIAdapter3 with %d", hr);
return RT_UNKNOWN_ERROR;
}
break;
}
++i;
}
if (!m_adapter.Get()) {
if (FAILED(m_dxgi_factory->EnumAdapters1(0, &adapter))) {
rtReportError("DX11", "Failed to retrieve the default adapter.");
return RT_UNKNOWN_ERROR;
}
if (FAILED(hr = adapter.As(&m_adapter))) {
rtReportError("DX11", "Failed to retrieve IDXGIAdapter3 with %d", hr);
return RT_UNKNOWN_ERROR;
}
}
}
{
// Get monitor properties
ComPtr<IDXGIOutput> output;
HMONITOR wnd_mon = MonitorFromWindow(info->hWnd, MONITOR_DEFAULTTOPRIMARY);
if (wnd_mon) {
UINT i = 0;
while (m_adapter->EnumOutputs(i, &output) == S_OK) {
DXGI_OUTPUT_DESC desc = {};
output->GetDesc(&desc);
if (desc.Monitor == wnd_mon) {
GetDisplayInfo(output.Get());
break;
}
++i;
}
} else {
rtLog("DX11", "MonitorFromWindow returned a non-valid monitor.");
if (!FAILED(m_adapter->EnumOutputs(0, &output))) {
GetDisplayInfo(output.Get());
}
}
}
UINT device_flags = D3D11_CREATE_DEVICE_BGRA_SUPPORT;
#ifdef RT_DEBUG
device_flags |= D3D11_CREATE_DEVICE_DEBUGGABLE | D3D11_CREATE_DEVICE_DEBUG;
#endif
// Check for tearing support, which is a requirement for variable refresh displays
if (FAILED(m_dxgi_factory->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING,
&m_tearing_supported,
sizeof(m_tearing_supported))))
m_tearing_supported = false;
D3D_FEATURE_LEVEL feature_levels[] = {D3D_FEATURE_LEVEL_11_1};
ComPtr<ID3D11Device> device;
ComPtr<ID3D11DeviceContext> context;
if (FAILED(hr = D3D11CreateDevice(m_adapter.Get(),
D3D_DRIVER_TYPE_HARDWARE,
NULL,
device_flags,
feature_levels,
RT_ARRAY_COUNT(feature_levels),
D3D11_SDK_VERSION,
&device,
nullptr,
&context))) {
rtLog("DX11", "D3D11CreateDevice failed with %d. Retrying without DEBUGGABLE flag...");
device_flags &= ~D3D11_CREATE_DEVICE_DEBUGGABLE;
if (FAILED(hr = D3D11CreateDevice(nullptr, // Use the default adapter TODO: Select
D3D_DRIVER_TYPE_HARDWARE,
NULL,
device_flags,
feature_levels,
RT_ARRAY_COUNT(feature_levels),
D3D11_SDK_VERSION,
&device,
nullptr,
&context))) {
rtReportError("DX11", "D3D11CreateDevice failed with %d", hr);
return RT_UNKNOWN_ERROR;
}
}
if (FAILED(hr = device.As(&m_device))) {
rtReportError("DX11", "Failed to retrieve ID3D11Device5 with %d", hr);
return RT_UNKNOWN_ERROR;
}
if (FAILED(hr = context.As(&m_context))) {
rtReportError("DX11", "Failed to retrieve ID3D11DeviceContext3 with %d", hr);
return RT_UNKNOWN_ERROR;
}
if (info->is_fullscreen) {
// Don't allow Alt+Enter to switch the window to fullscreen exclusive
m_dxgi_factory->MakeWindowAssociation(info->hWnd, DXGI_MWA_NO_ALT_ENTER);
}
UINT swap_chain_flags = 0;
if (m_tearing_supported)
swap_chain_flags |= DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
// TODO(Kevin): HDR Support
DXGI_SWAP_CHAIN_DESC1 swap_chain_desc = {};
swap_chain_desc.Width = info->width;
swap_chain_desc.Height = info->height;
swap_chain_desc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
swap_chain_desc.Stereo = FALSE;
swap_chain_desc.SampleDesc.Count = 1;
swap_chain_desc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
swap_chain_desc.BufferCount = 2;
swap_chain_desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
swap_chain_desc.Scaling = DXGI_SCALING_STRETCH;
swap_chain_desc.AlphaMode = DXGI_ALPHA_MODE_IGNORE;
swap_chain_desc.Flags = swap_chain_flags;
if (FAILED(hr = m_dxgi_factory->CreateSwapChainForHwnd(m_device.Get(),
info->hWnd,
&swap_chain_desc,
nullptr,
nullptr,
&m_swap_chain))) {
rtReportError("DX11", "CreateSwapChainForHwnd failed with %d", hr);
return RT_UNKNOWN_ERROR;
}
m_is_initialized = true;
return RT_SUCCESS;
}
void rt_dx11_device::Shutdown(void) {
m_device.Reset();
m_is_initialized = false;
}
void rt_dx11_device::GetDisplayInfo(IDXGIOutput *output) {
// TODO Not implemented yet
m_monitor_refresh_rate = 1.f / 60.f;
}

View File

@ -0,0 +1,51 @@
#ifndef RT_DX11_DEVICE_HPP
#define RT_DX11_DEVICE_HPP
#ifndef __cplusplus
#error This file must only be used from C++ code
#endif
#include "renderer/common/renderer_api.h"
#include "runtime/runtime.h"
#include <d3d11_4.h>
#include <dxgi1_5.h>
#include <wrl.h>
// Smart pointer for COM Objects
template <class T> using ComPtr = Microsoft::WRL::ComPtr<T>;
class rt_dx11_device {
public:
static RT_INLINE rt_dx11_device *GetInstance() {
static rt_dx11_device dev;
return &dev;
}
rt_result Initialize(const rt_renderer_init_info *info);
void Shutdown(void);
private:
rt_dx11_device();
~rt_dx11_device();
rt_dx11_device(const rt_dx11_device &) = delete;
rt_dx11_device &operator=(const rt_dx11_device &) = delete;
void GetDisplayInfo(IDXGIOutput *output);
public:
static constexpr float VARIABLE_REFRESH_RATE = 0.f;
float m_monitor_refresh_rate;
BOOL m_tearing_supported;
private:
bool m_is_initialized;
ComPtr<IDXGIFactory5> m_dxgi_factory;
ComPtr<IDXGISwapChain1> m_swap_chain;
ComPtr<IDXGIAdapter3> m_adapter;
ComPtr<ID3D11DeviceContext3> m_context;
ComPtr<ID3D11Device5> m_device;
};
#endif

View File

@ -1,13 +1,22 @@
#include "renderer/common/render_mesh.h"
#include "renderer/common/renderer_api.h"
#include "device.hpp"
rt_result Dx11Init(const rt_renderer_init_info *info) {
return RT_SUCCESS;
rt_result res = rt_dx11_device::GetInstance()->Initialize(info);
if (res != RT_SUCCESS)
return res;
res = rtInitMeshStreaming();
return res;
}
void Dx11Shutdown(void) {
rtShutdownMeshStreaming();
rt_dx11_device::GetInstance()->Shutdown();
}
// Called by the application to retrieve the renderer api
extern "C" RT_DLLEXPORT rt_renderer_api rtLoadRendererImpl(void) {
rt_renderer_api api = {

View File

@ -1,13 +1,15 @@
if get_option('build_dx11')
dx11_dep = declare_dependency(link_args: ['-ld3d11', '-ldxgi', '-lwinmm', '-ldxguid'])
dx11_renderer_lib = library('rtdx11',
'../common/renderer_api.h',
# Dx11 specific files
'device.hpp',
'device.cpp',
'init.cpp',
dependencies: [m_dep, windowing_dep, dx11_dep, thread_dep],
dependencies: [m_dep, dx11_dep, thread_dep],
include_directories: [engine_incdir, contrib_incdir],
link_with: runtime_lib,
link_with: [runtime_lib, common_renderer_lib],
cpp_pch: 'pch/dx11_pch.h',
override_options: ['b_sanitize=none'],
install: true)

View File

@ -1,2 +1,12 @@
#include <d3d11.h>
#include <d3d11_1.h>
#include <d3d11_2.h>
#include <d3d11_3.h>
#include <d3d11_4.h>
#include <dxgi.h>
#include <dxgi1_2.h>
#include <dxgi1_3.h>
#include <dxgi1_4.h>
#include <dxgi1_5.h>
#include "runtime/runtime.h"

View File

@ -96,6 +96,31 @@ RT_DLLEXPORT uint64_t rtHashtableLookup(const rt_hashtable *hashtable,
RT_DLLEXPORT void rtHashtableRemove(rt_hashtable *hashtable, uint64_t key);
/* A ringbuffer based queue. The queue is NOT threadsafe */
typedef struct {
void *entries;
uint64_t capacity;
uint64_t head;
uint64_t tail;
uint64_t entry_size;
} rt_queue;
RT_DLLEXPORT rt_queue rtCreateQueue(uint64_t capacity, void *memory, uint64_t entry_size);
static RT_INLINE int rtQueueIsEmpty(const rt_queue *queue) {
return queue->head == queue->tail;
}
static RT_INLINE int rtQueueIsFull(const rt_queue *queue) {
return ((queue->tail + 1) % queue->capacity) == queue->head;
}
RT_DLLEXPORT rt_result rtQueuePush(rt_queue *queue, const void *element);
RT_DLLEXPORT rt_result rtQueuePop(rt_queue *queue, void *dst);
#ifdef __cplusplus
}
#endif

33
src/runtime/ds_queue.c Normal file
View File

@ -0,0 +1,33 @@
#include "ds.h"
#include <string.h>
#define QUEUE_AT(_pq, _idx) ((char *)((_pq)->entries) + ((_pq)->entry_size * (_idx)))
RT_DLLEXPORT rt_queue rtCreateQueue(uint64_t capacity, void *memory, uint64_t entry_size) {
rt_queue q = {.entries = memory,
.entry_size = entry_size,
.capacity = capacity,
.head = 0,
.tail = 0};
return q;
}
RT_DLLEXPORT rt_result rtQueuePush(rt_queue *queue, const void *element) {
if (rtQueueIsFull(queue))
return RT_OUT_OF_MEMORY;
char *dest = QUEUE_AT(queue, queue->tail);
memcpy(dest, element, queue->entry_size);
queue->tail = (queue->tail + 1) % queue->capacity;
return RT_SUCCESS;
}
RT_DLLEXPORT rt_result rtQueuePop(rt_queue *queue, void *dst) {
if (rtQueueIsEmpty(queue))
return RT_INVALID_VALUE;
const char *src = QUEUE_AT(queue, queue->head);
memcpy(dst, src, queue->entry_size);
queue->head = (queue->head + 1) % queue->capacity;
return RT_SUCCESS;
}

View File

@ -34,12 +34,18 @@ extern rt_result InitAIO(void);
extern void ShutdownAIO(void);
extern rt_result InitResourceManager(void);
extern void ShutdownResourceManager(void);
extern rt_result InitTiming(void);
RT_DLLEXPORT rt_result rtInitRuntime(void) {
SetMainThreadId();
RegisterRuntimeCVars();
rt_result res;
if ((res = InitTiming()) != RT_SUCCESS) {
rtReportError("TIMING", "Init failed.");
return res;
}
if ((res = InitBufferManager()) != RT_SUCCESS) {
rtReportError("BUFFERMGR", "Init failed.");
return res;

View File

@ -20,6 +20,7 @@ runtime_lib = library('rt',
'runtime.h',
'threading.h',
'threading_helpers.hpp',
'timing.h',
'aio.c',
'assert.c',
@ -28,6 +29,7 @@ runtime_lib = library('rt',
'config.c',
'ds_hashtable.c',
'ds_minheap.c',
'ds_queue.c',
'dynamic_libs.c',
'error_report.c',
'file_tab.c',
@ -44,6 +46,7 @@ runtime_lib = library('rt',
'threading_rwlock.c',
'threading_semaphore.c',
'threading_thread.c',
'timing.c',
# Contrib Sources
contrib_dir / 'xxhash/xxhash.c',

View File

@ -3,7 +3,7 @@
/* Resource system interface
*
* To differentiate the two ideas, we called processed assets "resources"
* To differentiate the two ideas, we call processed assets "resources"
* and the source files "assets".
*
* For example a .pipeline file is an asset, while a compiled pipeline in
@ -39,6 +39,10 @@ typedef enum {
RT_RESOURCE_EFFECT,
RT_RESOURCE_MESH,
RT_RESOURCE_MESHLET,
RT_RESOURCE_TYPE_count,
} rt_resource_type;

36
src/runtime/timing.c Normal file
View File

@ -0,0 +1,36 @@
#include "timing.h"
#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#include <Windows.h>
static uint64_t _QPC_freq = 0u;
rt_result InitTiming(void) {
LARGE_INTEGER qpc_freq;
if (!QueryPerformanceFrequency(&qpc_freq)) {
return RT_UNKNOWN_ERROR;
}
_QPC_freq = (uint64_t)qpc_freq.QuadPart;
double resolution = 1e6 * 1.0 / (double)_QPC_freq;
rtLog("TIMING",
"QPC Frequency: %llu ticks per second Resolution: %.2lf us",
_QPC_freq, resolution);
return RT_SUCCESS;
}
RT_DLLEXPORT rt_timestamp rtTimeNow(void) {
LARGE_INTEGER qpc;
QueryPerformanceCounter(&qpc);
return (rt_timestamp){.ticks = qpc.QuadPart, .ticks_per_second = _QPC_freq };
}
RT_DLLEXPORT rt_time_delta rtTimeBetween(rt_timestamp a, rt_timestamp b) {
double a_secs, b_secs;
a_secs = (double)a.ticks / (double)a.ticks_per_second;
b_secs = (double)b.ticks / (double)b.ticks_per_second;
return b_secs - a_secs;
}
#endif

19
src/runtime/timing.h Normal file
View File

@ -0,0 +1,19 @@
#ifndef RT_TIMING_H
#define RT_TIMING_H
/* Access to high precision timers */
#include "runtime.h"
typedef struct {
uint64_t ticks;
uint64_t ticks_per_second;
} rt_timestamp;
/* Time between two timestamps in seconds */
typedef double rt_time_delta;
RT_DLLEXPORT rt_timestamp rtTimeNow(void);
RT_DLLEXPORT rt_time_delta rtTimeBetween(rt_timestamp a, rt_timestamp b);
#endif

13
subprojects/glfw.wrap Normal file
View File

@ -0,0 +1,13 @@
[wrap-file]
directory = glfw-3.3.10
source_url = https://github.com/glfw/glfw/archive/refs/tags/3.3.10.tar.gz
source_filename = glfw-3.3.10.tar.gz
source_hash = 4ff18a3377da465386374d8127e7b7349b685288cb8e17122f7e1179f73769d5
patch_filename = glfw_3.3.10-1_patch.zip
patch_url = https://wrapdb.mesonbuild.com/v2/glfw_3.3.10-1/get_patch
patch_hash = 3567f96c2576a5fc8c9cafd9059f919d7da404f6c22450c6c2ce3f9938909b8b
source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/glfw_3.3.10-1/glfw-3.3.10.tar.gz
wrapdb_version = 3.3.10-1
[provide]
glfw3 = glfw_dep

13
subprojects/glm.wrap Normal file
View File

@ -0,0 +1,13 @@
[wrap-file]
directory = glm-1.0.1
source_url = https://github.com/g-truc/glm/archive/refs/tags/1.0.1.tar.gz
source_filename = glm-1.0.1.tar.gz
source_hash = 9f3174561fd26904b23f0db5e560971cbf9b3cbda0b280f04d5c379d03bf234c
patch_filename = glm_1.0.1-1_patch.zip
patch_url = https://wrapdb.mesonbuild.com/v2/glm_1.0.1-1/get_patch
patch_hash = 25679275e26bc4c36bb617d1b4a52197039402af828d2a4bf67b3c0260a5df6a
source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/glm_1.0.1-1/glm-1.0.1.tar.gz
wrapdb_version = 1.0.1-1
[provide]
glm = glm_dep

13
subprojects/imgui.wrap Normal file
View File

@ -0,0 +1,13 @@
[wrap-file]
directory = imgui-1.89.9
source_url = https://github.com/ocornut/imgui/archive/refs/tags/v1.89.9.tar.gz
source_filename = imgui-1.89.9.tar.gz
source_hash = 1acc27a778b71d859878121a3f7b287cd81c29d720893d2b2bf74455bf9d52d6
patch_filename = imgui_1.89.9-1_patch.zip
patch_url = https://wrapdb.mesonbuild.com/v2/imgui_1.89.9-1/get_patch
patch_hash = 9b21290c597d76bf8d4eeb3f9ffa024b11d9ea6c61e91d648ccc90b42843d584
source_fallback_url = https://github.com/mesonbuild/wrapdb/releases/download/imgui_1.89.9-1/imgui-1.89.9.tar.gz
wrapdb_version = 1.89.9-1
[provide]
imgui = imgui_dep

View File

@ -0,0 +1,8 @@
[wrap-git]
url = https://github.com/zeux/meshoptimizer.git
revision = v0.21
depth = 1
method = cmake
[provide]
meshoptimizer-0.21 = meshoptimizer_dep

View File

@ -1,4 +1,4 @@
test_link_libs = [runtime_lib, gfx_lib]
test_link_libs = [runtime_lib]
if get_option('default_library') == 'static'
test_link_libs += null_renderer_lib
endif