Loading and caching effects

This commit is contained in:
Kevin Trogant 2024-05-14 14:07:04 +02:00
parent 5d988d15b7
commit 656b21d1ef
23 changed files with 883 additions and 114 deletions

View File

@ -2,8 +2,10 @@
#include "runtime/resources.h" #include "runtime/resources.h"
#include "runtime/threading.h" #include "runtime/threading.h"
#include "gfx/builtin_objects.h"
#include "gfx/gfx.h" #include "gfx/gfx.h"
#include "gfx/renderer_api.h" #include "gfx/renderer_api.h"
#include "gfx/effect.h"
#include "asset_compiler/asset_compiler.h" #include "asset_compiler/asset_compiler.h"
@ -13,10 +15,33 @@ void RegisterCVars(void) {
static rt_render_graph *_graph; static rt_render_graph *_graph;
static rt_result ForwardPassExecute(rt_command_buffer_handle cmdbuf, static rt_result ForwardPassExecute(uint32_t pass_id,
rt_command_buffer_handle cmdbuf,
const rt_render_view *views, const rt_render_view *views,
uint32_t view_count, uint32_t view_count,
void *userdata) { void *userdata) {
RT_ASSERT(view_count == 1, "Expected a single view for the main camera.");
rt_render_view view = *views;
RT_VERIFY(rtDoViewTypesMatchExact(view, &g_builtin_render_object_types.render_mesh, 1));
rt_render_list meshes = view.lists[0];
if (meshes.length == 0)
return RT_SUCCESS;
rt_pipeline_handle bound_pipeline =
(RT_GET_RENDER_LIST_ELEMENT(meshes, rt_render_mesh, 0)).pipeline;
g_renderer.CmdBindPipeline(cmdbuf, bound_pipeline);
for (size_t mesh_idx = 0; mesh_idx < meshes.length; ++mesh_idx) {
rt_render_mesh mesh = RT_GET_RENDER_LIST_ELEMENT(meshes, rt_render_mesh, mesh_idx);
if (RT_COMPARE_RENDER_HANDLES(bound_pipeline, mesh.pipeline, !=)) {
bound_pipeline = mesh.pipeline;
g_renderer.CmdBindPipeline(cmdbuf, bound_pipeline);
}
g_renderer.CmdBindVertexBuffers(cmdbuf, 0, 1, &mesh.vbo, NULL);
g_renderer.CmdDraw(cmdbuf, 0, mesh.vertex_count);
}
return RT_SUCCESS; return RT_SUCCESS;
} }
@ -27,6 +52,8 @@ void Init(void) {
rtWaitForAssetProcessing(); rtWaitForAssetProcessing();
rtRegisterBuiltinRenderObjectTypes();
rt_render_graph_builder builder = g_renderer.CreateRenderGraphBuilder(); rt_render_graph_builder builder = g_renderer.CreateRenderGraphBuilder();
rt_attachment_info backbuffer = { rt_attachment_info backbuffer = {
.name = "backbuffer", .name = "backbuffer",
@ -55,6 +82,21 @@ void Init(void) {
} }
g_renderer.DestroyRenderGraphBuilder(&builder); g_renderer.DestroyRenderGraphBuilder(&builder);
const rt_effect *effect;
if (rtLoadEffect(rtGetResourceID("assets/shader/static_object.effect"), &effect) !=
RT_SUCCESS) {
rtReportError("GAME", "Oh noo...");
}
const rt_effect *effect2;
if (rtLoadEffect(rtGetResourceID("assets/shader/static_object.effect"), &effect2) !=
RT_SUCCESS) {
rtReportError("GAME", "Oh noo...");
}
rtReleaseEffect(effect);
rtReleaseEffect(effect2);
} }
/* Called after exiting the main-loop and before the runtime starts its shutdown */ /* Called after exiting the main-loop and before the runtime starts its shutdown */
@ -63,6 +105,9 @@ void Shutdown(void) {
rtShutdownAssetCompiler(); rtShutdownAssetCompiler();
} }
// Question; How do we move data from update to render.
// This is where we could fill the render views, but that would
// mean double/triple buffering the views
void Update(unsigned int frame_id) { void Update(unsigned int frame_id) {
RT_UNUSED(frame_id); RT_UNUSED(frame_id);
} }

View File

@ -14,6 +14,7 @@ extern "C" {
#endif #endif
typedef struct { typedef struct {
rt_pipeline_handle pipeline;
rt_buffer_handle vbo; rt_buffer_handle vbo;
rt_buffer_handle ibo; rt_buffer_handle ibo;
uint32_t vertex_count; uint32_t vertex_count;

View File

@ -1,5 +1,88 @@
#include "effect.h" #include "effect.h"
#include "runtime/config.h"
#include "runtime/ds.h"
#include "runtime/handles.h"
#include "runtime/hashing.h" #include "runtime/hashing.h"
#include "runtime/mem_arena.h"
#include "runtime/threading.h"
#include "runtime/atomics.h"
#include <stdlib.h>
#include <string.h>
RT_CVAR_SZ(rt_EffectCacheSize, "The number of slots in the effect cache. Default: 1024", 1024);
typedef struct {
rt_resource_id resource;
rt_effect effect;
_Alignas(4) unsigned int refcount;
} rt_effect_cache_slot;
/* We use a hashtable to find previously loaded effects.
* To reclaim unreferenced slots when we need to, we use a minheap.
* The minheap implements a LRU list. To track usage, we use a global running "usage counter",
* incremented whenever an effect is loaded.
*/
typedef struct {
rt_effect_cache_slot *slots;
rt_hashtable lut;
rt_minheap reclaim_heap;
/* Linearly allocate slots until we reach capacity */
size_t next_free;
/* Used to track "time" since an effect was loaded */
_Alignas(4) int usage_counter;
void *memory;
rt_rwlock lock;
} rt_effect_cache;
static rt_effect_cache _cache;
rt_result InitEffectCache(void) {
if (!RT_IS_POWER_OF_TWO(rt_EffectCacheSize.sz)) {
rtReportError(
"GFX",
"The value of \"rt_EffectCacheSize\" must be a power of two.\nConfigured: %zu.",
rt_EffectCacheSize.sz);
return RT_INVALID_VALUE;
}
rt_create_rwlock_result lock_res = rtCreateRWLock();
if (!lock_res.ok)
return RT_UNKNOWN_ERROR;
_cache.lock = lock_res.lock;
size_t mem_required = sizeof(rt_effect_cache_slot) * rt_EffectCacheSize.sz +
RT_HASH_TABLE_MEMORY_REQUIRED(
2 * rt_EffectCacheSize.sz) + /* double to keep performance up */
sizeof(int) * rt_EffectCacheSize.sz + /* heap keys */
sizeof(size_t) * rt_EffectCacheSize.sz; /* heap values */
_cache.memory = malloc(mem_required);
if (!_cache.memory) {
rtDestroyRWLock(&_cache.lock);
return RT_OUT_OF_MEMORY;
}
_cache.lut = rtCreateHashtable(rt_EffectCacheSize.sz, _cache.memory, NULL, NULL);
int *keys =
(int *)((char *)_cache.memory + RT_HASH_TABLE_MEMORY_REQUIRED(2 * rt_EffectCacheSize.sz));
size_t *values = (size_t *)(keys + rt_EffectCacheSize.sz);
_cache.reclaim_heap = rtCreateMinheap(keys, values, sizeof(size_t), rt_EffectCacheSize.sz, 0);
_cache.usage_counter = 0;
_cache.slots = (rt_effect_cache_slot *)(values + rt_EffectCacheSize.sz);
memset(_cache.slots, 0, sizeof(rt_effect_cache_slot) * rt_EffectCacheSize.sz);
return RT_SUCCESS;
}
void ShutdownEffectCache(void) {
free(_cache.memory);
}
RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len) { RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len) {
uint32_t id = rtHashBytes32(name, len); uint32_t id = rtHashBytes32(name, len);
@ -14,3 +97,163 @@ RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len) {
id = ~id; id = ~id;
return id; return id;
} }
static void ReleaseEffect(rt_effect *effect) {
for (unsigned int i = 0; i < effect->pass_count; ++i) {
g_renderer.DestroyPipeline(effect->passes[i].pipeline);
}
}
/* Returns the index of the reserved slot */
static size_t ReserveSlot(rt_resource_id id) {
if (_cache.next_free < rt_EffectCacheSize.sz) {
size_t slot = _cache.next_free++;
RT_ASSERT(_cache.slots[slot].refcount == 0, "Got a slot that is still in use.");
_cache.slots[slot].refcount = 1;
if (rtHashtableInsert(&_cache.lut, id, slot) != RT_SUCCESS) {
rtLog("GFX", "Failed to insert effect %x into the lookup table.", id);
_cache.slots[slot].refcount = 0;
rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot);
return SIZE_MAX;
}
_cache.slots[slot].resource = id;
return slot;
} else if (!rtMinheapIsEmpty(&_cache.reclaim_heap)) {
size_t slot;
rtMinheapPop(&_cache.reclaim_heap, &slot);
RT_ASSERT(_cache.slots[slot].refcount == 0, "Got a slot that is still in use.");
_cache.slots[slot].refcount = 1;
rt_resource_id old_id = _cache.slots[slot].resource;
RT_ASSERT(old_id != RT_INVALID_RESOURCE_ID, "The slot should contain an old effect.");
ReleaseEffect(&_cache.slots[slot].effect);
rtHashtableRemove(&_cache.lut, old_id);
if (rtHashtableInsert(&_cache.lut, id, slot) != RT_SUCCESS) {
rtLog("GFX", "Failed to insert effect %x into the lookup table.", id);
_cache.slots[slot].refcount = 0;
rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot);
return SIZE_MAX;
}
_cache.slots[slot].resource = id;
return slot;
} else {
rtLog("GFX",
"Could not insert effect %x into the cache, because the effect cache is full.",
id);
return SIZE_MAX;
}
}
/* Load resource to memory allocated on the given arena */
static rt_result LoadResource(rt_resource_id id, void **p_out, rt_arena *arena) {
size_t size = rtGetResourceSize(id);
if (!size) {
rtLog("GFX", "ID %x is not a valid resource.", id);
return RT_INVALID_VALUE;
}
void *dst = rtArenaPush(arena, size);
if (!dst) {
rtLog("GFX", "Failed to allocate %zu bytes of temporary storage.", size);
return RT_OUT_OF_MEMORY;
}
*p_out = dst;
return rtGetResource(id, dst);
}
static rt_result LoadEffect(rt_resource_id id, rt_effect *effect) {
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena) {
rtLog("GFX", "Could not get a temporary arena.");
return RT_OUT_OF_MEMORY;
}
const rt_resource *resource = NULL;
rt_result res = LoadResource(id, &resource, temp.arena);
if (res != RT_SUCCESS) {
rtReturnTemporaryArena(temp);
return res;
}
if (resource->type != RT_RESOURCE_EFFECT) {
rtReturnTemporaryArena(temp);
rtLog("GFX", "Resource %x does not refer to an effect resource.", id);
return RT_INVALID_VALUE;
}
const rt_effect_info *effect_info = resource->data;
effect->pass_count = effect_info->pass_count;
for (unsigned int i = 0; i < effect_info->pass_count; ++i) {
rt_resource *pipeline_resource = NULL;
res = LoadResource(effect_info->passes[i].pipeline, &pipeline_resource, temp.arena);
if (res != RT_SUCCESS) {
rtReturnTemporaryArena(temp);
return res;
}
if (pipeline_resource->type != RT_RESOURCE_PIPELINE) {
rtReturnTemporaryArena(temp);
rtLog("GFX", "Resource %x does not refer to a pipeline resource.", id);
return RT_INVALID_VALUE;
}
rt_pipeline_info *pipeline_info = pipeline_resource->data;
rt_pipeline_handle pipeline = g_renderer.CompilePipeline(pipeline_info);
if (!RT_IS_HANDLE_VALID(pipeline)) {
rtReturnTemporaryArena(temp);
rtLog("GFX",
"Failed to compile the pipeline of pass %d (%x).",
i,
effect_info->passes[i].pass_id);
return RT_UNKNOWN_ERROR;
}
effect->passes[i].pass_id = effect_info->passes[i].pass_id;
effect->passes[i].pipeline = pipeline;
}
rtReturnTemporaryArena(temp);
return RT_SUCCESS;
}
RT_DLLEXPORT rt_result rtLoadEffect(rt_resource_id id, const rt_effect **effect) {
rtAtomic32Inc(&_cache.usage_counter);
/* Check if the effect is already loaded */
rtLockRead(&_cache.lock);
uint64_t slot = rtHashtableLookup(&_cache.lut, id, UINT64_MAX);
if (slot != UINT64_MAX) {
RT_ASSERT(_cache.slots[slot].resource == id, "Got the wrong effect");
rtAtomic32Inc(&_cache.slots[slot].refcount);
*effect = &_cache.slots[slot].effect;
rtUnlockRead(&_cache.lock);
return RT_SUCCESS;
}
rtUnlockRead(&_cache.lock);
/* Load the effect */
rtLockWrite(&_cache.lock);
if (rtHashtableLookup(&_cache.lut, id, UINT64_MAX) != UINT64_MAX) {
/* Another thread was faster than we, just retry */
rtUnlockWrite(&_cache.lock);
return rtLoadEffect(id, effect);
}
slot = ReserveSlot(id);
if (slot == SIZE_MAX) {
rtUnlockWrite(&_cache.lock);
return RT_OUT_OF_MEMORY;
}
rt_result res = LoadEffect(id, &_cache.slots[slot].effect);
rtUnlockWrite(&_cache.lock);
*effect = &_cache.slots[slot].effect;
return res;
}
RT_DLLEXPORT void rtReleaseEffect(const rt_effect *effect) {
RT_VERIFY(effect);
rt_effect_cache_slot *slot = (rt_effect_cache_slot *)((char *)effect - offsetof(rt_effect_cache_slot, effect));
if (rtAtomic32Dec(&slot->refcount) == 0) {
rtLockWrite(&_cache.lock);
size_t slot_index = (size_t)(slot - _cache.slots);
rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot_index);
rtUnlockWrite(&_cache.lock);
}
}

View File

@ -7,8 +7,11 @@
*/ */
#include "gfx.h" #include "gfx.h"
#include "renderer_api.h"
#include "runtime/resources.h" #include "runtime/resources.h"
/* *** Resource types *** */
typedef struct rt_pipeline_info_s { typedef struct rt_pipeline_info_s {
rt_resource_id vertex_shader; rt_resource_id vertex_shader;
rt_resource_id fragment_shader; rt_resource_id fragment_shader;
@ -28,7 +31,30 @@ typedef struct {
rt_effect_pass_info passes[RT_MAX_SUBRESOURCES]; rt_effect_pass_info passes[RT_MAX_SUBRESOURCES];
} rt_effect_info; } rt_effect_info;
/* *** Runtime types *** */
typedef struct {
uint32_t pass_id;
rt_pipeline_handle pipeline;
} rt_effect_pass;
typedef struct {
uint32_t pass_count;
rt_effect_pass passes[RT_MAX_SUBRESOURCES];
} rt_effect;
RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len); RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len);
RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len); RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len);
/* Load an effect from a resource file.
* Returns:
* - RT_SUCCESS
* - RT_OUT_OF_MEMORY, if temporary memory allocations failed
* - RT_INVALID_VALUE, if id does not refer to an effect resource.
* - RT_UNKNOWN_ERROR, if a pipeline failed to compile
* - errors returned by rtGetResource() */
RT_DLLEXPORT rt_result rtLoadEffect(rt_resource_id id, const rt_effect **effect);
RT_DLLEXPORT void rtReleaseEffect(const rt_effect *effect);
#endif #endif

View File

@ -23,7 +23,7 @@ static bool _renderer_loaded = false;
RT_DLLEXPORT RT_DLLEXPORT
RT_CVAR_S(rt_Renderer, RT_CVAR_S(rt_Renderer,
"Select the render backend. Available options: [vk, null], Default: vk", "Select the render backend. Available options: [vk, dx11, null], Default: vk",
"dx11"); "dx11");
extern rt_cvar rt_RenderViewArenaSize; extern rt_cvar rt_RenderViewArenaSize;
@ -59,7 +59,8 @@ extern void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builde
extern rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *); extern rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *);
extern void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph, extern void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph,
uint32_t pass_id, uint32_t pass_id,
rt_render_view view); rt_render_view view,
unsigned int frame_id);
extern void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph); extern void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph);
extern void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle, extern void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle,
@ -81,10 +82,12 @@ extern void RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle, uint32_t, uint
extern rt_result InitRenderLists(void); extern rt_result InitRenderLists(void);
extern void ShutdownRenderLists(void); extern void ShutdownRenderLists(void);
extern void ResetRenderLists(void); extern void ResetRenderLists(unsigned int frame_id);
extern rt_result InitRenderViews(void); extern rt_result InitRenderViews(void);
extern void ShutdownRenderViews(void); extern void ShutdownRenderViews(void);
extern void ResetRenderViews(void); extern void ResetRenderViews(unsigned int frame_id);
extern rt_result InitEffectCache(void);
extern void ShutdownEffectCache(void);
static bool LoadRenderer(void) { static bool LoadRenderer(void) {
@ -196,10 +199,14 @@ RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info) {
if ((result = InitRenderViews()) != RT_SUCCESS) if ((result = InitRenderViews()) != RT_SUCCESS)
return result; return result;
if ((result = InitEffectCache()) != RT_SUCCESS)
return result;
return result; return result;
} }
RT_DLLEXPORT void rtShutdownGFX(void) { RT_DLLEXPORT void rtShutdownGFX(void) {
ShutdownEffectCache();
ShutdownRenderViews(); ShutdownRenderViews();
ShutdownRenderLists(); ShutdownRenderLists();
g_renderer.Shutdown(); g_renderer.Shutdown();
@ -211,6 +218,6 @@ RT_DLLEXPORT void rtBeginGFXFrame(unsigned int frame_id) {
RT_DLLEXPORT void rtEndGFXFrame(unsigned int frame_id) { RT_DLLEXPORT void rtEndGFXFrame(unsigned int frame_id) {
g_renderer.EndFrame(frame_id); g_renderer.EndFrame(frame_id);
ResetRenderLists(); ResetRenderLists(frame_id);
ResetRenderViews(); ResetRenderViews(frame_id);
} }

View File

@ -1,8 +1,9 @@
#include "render_list.h" #include "render_list.h"
#include "renderer_api.h"
#include "runtime/threading.h"
#include "runtime/mem_arena.h"
#include "runtime/config.h" #include "runtime/config.h"
#include "runtime/mem_arena.h"
#include "runtime/threading.h"
#include <string.h> #include <string.h>
@ -20,33 +21,49 @@ typedef struct rt_list_pool_s {
struct rt_list_pool_s *next; struct rt_list_pool_s *next;
} rt_list_pool; } rt_list_pool;
typedef struct {
rt_mutex *lock;
rt_list_pool *first_free;
rt_arena arena;
unsigned int access_frame_id;
} rt_frame_lists;
#define DEFAULT_LIST_CAPACITY RT_KB(1) #define DEFAULT_LIST_CAPACITY RT_KB(1)
static rt_render_object_type_data _types[RT_MAX_RENDER_OBJECT_TYPE + 1]; static rt_render_object_type_data _types[RT_MAX_RENDER_OBJECT_TYPE + 1];
static unsigned int _type_count = 0; static unsigned int _type_count = 0;
static rt_rwlock _type_lock; static rt_rwlock _type_lock;
static rt_arena _list_arena; static rt_frame_lists _frame_lists[4];
static rt_list_pool *_first_free_list; static unsigned int _max_frames_in_flight;
static rt_mutex *_list_lock;
rt_result InitRenderLists(void) { rt_result InitRenderLists(void) {
rt_create_rwlock_result lock_res = rtCreateRWLock(); rt_create_rwlock_result lock_res = rtCreateRWLock();
if (!lock_res.ok) if (!lock_res.ok)
return RT_UNKNOWN_ERROR; return RT_UNKNOWN_ERROR;
_type_lock = lock_res.lock; _type_lock = lock_res.lock;
rt_create_arena_result arena_res = rtCreateArena(NULL, (size_t)rt_RenderListPoolSize.i);
if (!arena_res.ok) {
rtDestroyRWLock(&_type_lock);
return RT_OUT_OF_MEMORY;
}
_list_arena = arena_res.arena;
_list_lock = rtCreateMutex(); _max_frames_in_flight = g_renderer.GetMaxFramesInFlight();
if (!_list_lock) { RT_ASSERT(_max_frames_in_flight > 0 && _max_frames_in_flight < RT_ARRAY_COUNT(_frame_lists),
rtReleaseArena(&_list_arena); "Invalid maxium number of in-flight frames.");
rtDestroyRWLock(&_type_lock);
for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
rt_create_arena_result arena_res = rtCreateArena(NULL, (size_t)rt_RenderListPoolSize.i);
if (!arena_res.ok) {
rtDestroyRWLock(&_type_lock);
return RT_OUT_OF_MEMORY;
}
_frame_lists[i].arena = arena_res.arena;
_frame_lists[i].lock = rtCreateMutex();
if (!_frame_lists[i].lock) {
rtReleaseArena(&_frame_lists[i].arena);
rtDestroyRWLock(&_type_lock);
}
_frame_lists[i].first_free = NULL;
_frame_lists[i].access_frame_id = 0;
} }
return RT_SUCCESS; return RT_SUCCESS;
@ -54,11 +71,13 @@ rt_result InitRenderLists(void) {
void ShutdownRenderLists(void) { void ShutdownRenderLists(void) {
rtDestroyRWLock(&_type_lock); rtDestroyRWLock(&_type_lock);
rtDestroyMutex(_list_lock); for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
rtReleaseArena(&_list_arena); rtDestroyMutex(_frame_lists[i].lock);
rtReleaseArena(&_frame_lists[i].arena);
}
} }
RT_DLLEXPORT rt_render_object_type rtRegisterRenderObjectType(size_t object_size, RT_DLLEXPORT rt_render_object_type rtRegisterRenderObjectType(size_t object_size,
const char *debug_name) { const char *debug_name) {
if (_type_count == RT_MAX_RENDER_OBJECT_TYPE) { if (_type_count == RT_MAX_RENDER_OBJECT_TYPE) {
rtReportError("GFX", "Too many render object types (max is %u)", RT_MAX_RENDER_OBJECT_TYPE); rtReportError("GFX", "Too many render object types (max is %u)", RT_MAX_RENDER_OBJECT_TYPE);
@ -66,8 +85,8 @@ RT_DLLEXPORT rt_render_object_type rtRegisterRenderObjectType(size_t object_siz
} }
rtLockWrite(&_type_lock); rtLockWrite(&_type_lock);
rt_render_object_type type = (rt_render_object_type)++_type_count; rt_render_object_type type = (rt_render_object_type)++_type_count;
_types[_type_count].size = object_size; _types[_type_count].size = object_size;
_types[_type_count].name = debug_name; _types[_type_count].name = debug_name;
if (debug_name) if (debug_name)
rtLog("GFX", rtLog("GFX",
"Registered render object type %s; object size: %zu. Type: %u", "Registered render object type %s; object size: %zu. Type: %u",
@ -83,7 +102,6 @@ RT_DLLEXPORT rt_render_object_type rtRegisterRenderObjectType(size_t object_siz
return type; return type;
} }
RT_DLLEXPORT size_t rtGetRenderObjectSize(rt_render_object_type type) { RT_DLLEXPORT size_t rtGetRenderObjectSize(rt_render_object_type type) {
size_t size = 0; size_t size = 0;
rtLockRead(&_type_lock); rtLockRead(&_type_lock);
@ -102,66 +120,84 @@ RT_DLLEXPORT const char *rtGetRenderObjectTypeDebugName(rt_render_object_type ty
return name; return name;
} }
static rt_create_render_list_result CreateNewList(rt_render_object_type type, size_t capacity) { static rt_create_render_list_result
CreateNewList(rt_render_object_type type, unsigned int frame_id, size_t capacity) {
rt_create_render_list_result res = {.ok = false}; rt_create_render_list_result res = {.ok = false};
rtLockMutex(_list_lock); unsigned int slot = frame_id % _max_frames_in_flight;
rtLockMutex(_frame_lists[slot].lock);
if (!_first_free_list || _first_free_list->capacity < capacity) { /* Allocate a new list */ _frame_lists[slot].access_frame_id = frame_id;
rt_list_pool *pool =
rtArenaPush(&_list_arena, sizeof(rt_list_pool) + capacity); if (!_frame_lists[slot].first_free ||
_frame_lists[slot].first_free->capacity < capacity) { /* Allocate a new list */
rt_list_pool *pool = rtArenaPush(&_frame_lists[slot].arena,
sizeof(rt_list_pool) + sizeof(unsigned int) + capacity);
if (!pool) { if (!pool) {
rtReportError("GFX", rtReportError("GFX",
"Out of render list pool space! Configured space: %d kiB", "Out of render list pool space! Configured space: %d kiB",
rt_RenderListPoolSize.i / 1024); rt_RenderListPoolSize.i / 1024);
goto out; goto out;
} }
pool->capacity = capacity; pool->capacity = capacity;
pool->next = _first_free_list; pool->next = _frame_lists[slot].first_free;
_first_free_list = pool; _frame_lists[slot].first_free = pool;
} }
rt_render_list list; rt_render_list list;
list.data = (char *)_first_free_list + sizeof(rt_list_pool); unsigned int *frame_id_store =
list.type = type; (unsigned int *)((char *)_frame_lists[slot].first_free + sizeof(rt_list_pool));
list.length = 0; *frame_id_store = frame_id;
res.ok = true; list.data = (char *)_frame_lists[slot].first_free + sizeof(rt_list_pool) + sizeof(unsigned int);
res.list = list; list.type = type;
_first_free_list = _first_free_list->next; list.length = 0;
res.ok = true;
res.list = list;
_frame_lists[slot].first_free = _frame_lists[slot].first_free->next;
out: out:
rtUnlockMutex(_list_lock); rtUnlockMutex(_frame_lists[slot].lock);
return res; return res;
} }
RT_DLLEXPORT rt_create_render_list_result rtCreateRenderList(rt_render_object_type type) { RT_DLLEXPORT rt_create_render_list_result rtCreateRenderList(rt_render_object_type type,
return CreateNewList(type, DEFAULT_LIST_CAPACITY); unsigned int frame_id) {
return CreateNewList(type, frame_id, DEFAULT_LIST_CAPACITY);
} }
void ResetRenderLists(void) { void ResetRenderLists(unsigned int frame_id) {
rtLockMutex(_list_lock); unsigned int slot = frame_id % _max_frames_in_flight;
_first_free_list = NULL; RT_ASSERT(_frame_lists[slot].access_frame_id == frame_id ||
rtArenaClear(&_list_arena); _frame_lists[slot].access_frame_id == 0,
rtUnlockMutex(_list_lock); "Frame id mismatch");
rtLockMutex(_frame_lists[slot].lock);
_frame_lists[slot].first_free = NULL;
_frame_lists[slot].access_frame_id = 0;
rtArenaClear(&_frame_lists[slot].arena);
rtUnlockMutex(_frame_lists[slot].lock);
} }
RT_DLLEXPORT bool rtPushRenderListEntry(rt_render_list *list, const void *object) { RT_DLLEXPORT bool rtPushRenderListEntry(rt_render_list *list, const void *object) {
size_t object_size = rtGetRenderObjectSize(list->type); size_t object_size = rtGetRenderObjectSize(list->type);
rt_list_pool *pool = (rt_list_pool *)((char *)list->data - sizeof(rt_list_pool)); rt_list_pool *pool =
size_t list_capacity = pool->capacity / object_size; (rt_list_pool *)((char *)list->data - sizeof(rt_list_pool) - sizeof(unsigned int));
unsigned int frame_id = *(unsigned int *)((char *)list->data - sizeof(unsigned int));
size_t list_capacity = pool->capacity / object_size;
if (list->length == list_capacity) { if (list->length == list_capacity) {
/* "Grow" the list */ /* "Grow" the list */
rt_create_render_list_result list_res = CreateNewList(list->type, pool->capacity * 2); rt_create_render_list_result list_res =
CreateNewList(list->type, frame_id, pool->capacity * 2);
if (!list_res.ok) if (!list_res.ok)
return false; return false;
memcpy(list_res.list.data, list->data, list->length * object_size); memcpy(list_res.list.data, list->data, list->length * object_size);
rtLockMutex(_list_lock); unsigned int slot = frame_id % _max_frames_in_flight;
pool->next = _first_free_list; rtLockMutex(_frame_lists[slot].lock);
_first_free_list = pool; pool->next = _frame_lists[slot].first_free;
rtUnlockMutex(_list_lock); _frame_lists[slot].first_free = pool;
rtUnlockMutex(_frame_lists[slot].lock);
list_res.list.length = list->length; list_res.list.length = list->length;
*list = list_res.list; *list = list_res.list;
} }
char *dst = (char *)list->data + list->length * object_size; char *dst = (char *)list->data + list->length * object_size;

View File

@ -58,7 +58,7 @@ typedef struct {
/* Create a render list for a particular object type. /* Create a render list for a particular object type.
* *
* Render Lists have a lifetime of one frame. */ * Render Lists have a lifetime of one frame. */
RT_DLLEXPORT rt_create_render_list_result rtCreateRenderList(rt_render_object_type type); RT_DLLEXPORT rt_create_render_list_result rtCreateRenderList(rt_render_object_type type, unsigned int frame_id);
/* Append a render object to a list. The object must be of the correct type. */ /* Append a render object to a list. The object must be of the correct type. */
RT_DLLEXPORT bool rtPushRenderListEntry(rt_render_list *list, const void *object); RT_DLLEXPORT bool rtPushRenderListEntry(rt_render_list *list, const void *object);

View File

@ -9,34 +9,50 @@ RT_CVAR_I(rt_RenderViewArenaSize,
"Size of the memory arena used for allocating render views. Default: 1 MB", "Size of the memory arena used for allocating render views. Default: 1 MB",
RT_MB(1)); RT_MB(1));
static rt_arena _view_arena; typedef struct {
static rt_mutex *_view_lock; rt_arena arena;
rt_mutex *lock;
uint32_t frame_id;
} rt_frame_views;
static rt_frame_views _frames[4];
static unsigned int _max_frames_in_flight;
rt_result InitRenderViews(void) { rt_result InitRenderViews(void) {
rt_create_arena_result arena_res = rtCreateArena(NULL, (size_t)rt_RenderViewArenaSize.i); _max_frames_in_flight = g_renderer.GetMaxFramesInFlight();
if (!arena_res.ok) RT_ASSERT(_max_frames_in_flight > 0 && _max_frames_in_flight < RT_ARRAY_COUNT(_frames),
return RT_OUT_OF_MEMORY; "Invalid maximum number of in-flight frames.");
_view_arena = arena_res.arena;
_view_lock = rtCreateMutex(); for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
if (!_view_lock) { rt_create_arena_result arena_res = rtCreateArena(NULL, (size_t)rt_RenderViewArenaSize.i);
rtReleaseArena(&_view_arena); if (!arena_res.ok)
return RT_UNKNOWN_ERROR; return RT_OUT_OF_MEMORY;
_frames[i].arena = arena_res.arena;
_frames[i].lock = rtCreateMutex();
if (!_frames[i].lock) {
rtReleaseArena(&_frames[i].arena);
return RT_UNKNOWN_ERROR;
}
_frames[i].frame_id = 0;
} }
return RT_SUCCESS; return RT_SUCCESS;
} }
void ShutdownRenderViews(void) { void ShutdownRenderViews(void) {
rtDestroyMutex(_view_lock); for (unsigned int i = 0; i < _max_frames_in_flight; ++i) {
rtReleaseArena(&_view_arena); rtDestroyMutex(_frames[i].lock);
rtReleaseArena(&_frames[i].arena);
}
} }
void ResetRenderViews(void) { void ResetRenderViews(unsigned int frame_id) {
rtArenaClear(&_view_arena); unsigned int slot = frame_id % _max_frames_in_flight;
rtArenaClear(&_frames[slot].arena);
} }
RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_object_type *types, RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_object_type *types,
uint32_t type_count) { uint32_t type_count,
unsigned int frame_id) {
#ifdef RT_DEBUG #ifdef RT_DEBUG
for (uint32_t i = 0; i < type_count - 1; ++i) { for (uint32_t i = 0; i < type_count - 1; ++i) {
for (uint32_t j = i + 1; j < type_count; ++j) { for (uint32_t j = i + 1; j < type_count; ++j) {
@ -45,10 +61,13 @@ RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_obj
} }
#endif #endif
unsigned int slot = frame_id % _max_frames_in_flight;
size_t size = type_count * (sizeof(rt_render_list) + sizeof(rt_render_list)); size_t size = type_count * (sizeof(rt_render_list) + sizeof(rt_render_list));
rtLockMutex(_view_lock); rtLockMutex(_frames[slot].lock);
void *storage = rtArenaPush(&_view_arena, size); void *storage = rtArenaPush(&_frames[slot].arena, size);
rtUnlockMutex(_view_lock); _frames[slot].frame_id = frame_id;
rtUnlockMutex(_frames[slot].lock);
if (!storage) { if (!storage) {
return (rt_create_render_view_result){ return (rt_create_render_view_result){
.ok = false, .ok = false,
@ -59,6 +78,20 @@ RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_obj
view.lists = storage; view.lists = storage;
view.list_types = (rt_render_object_type *)(view.lists + type_count); view.list_types = (rt_render_object_type *)(view.lists + type_count);
view.list_count = type_count; view.list_count = type_count;
view.type_mask = 0;
for (uint32_t i = 0; i < type_count; ++i) {
rt_create_render_list_result list_res = rtCreateRenderList(types[i], frame_id);
if (!list_res.ok) {
return (rt_create_render_view_result){
.ok = false,
};
}
view.lists[i] = list_res.list;
view.list_types[i] = types[i];
view.type_mask |= RT_RENDER_OBJECT_TYPE_BIT(types[i]);
}
return (rt_create_render_view_result){.ok = true, .view = view}; return (rt_create_render_view_result){.ok = true, .view = view};
} }
@ -72,6 +105,18 @@ rtPushRenderObjectToView(rt_render_view *view, rt_render_object_type type, const
} }
RT_DLLEXPORT void RT_DLLEXPORT void
rtSubmitRenderView(rt_render_view view, rt_render_graph *render_graph, uint32_t pass_id) { rtSubmitRenderView(rt_render_view view, rt_render_graph *render_graph, uint32_t pass_id, unsigned int frame_id) {
g_renderer.SubmitRenderView(render_graph, pass_id, view); g_renderer.SubmitRenderView(render_graph, pass_id, view, frame_id);
}
RT_DLLEXPORT int rtDoViewTypesMatchExact(rt_render_view view,
const rt_render_object_type *types,
uint32_t type_count) {
if (view.list_count != type_count)
return 0;
for (uint32_t i = 0; i < type_count; ++i) {
if (view.list_types[i] != types[i])
return 0;
}
return 1;
} }

View File

@ -12,6 +12,7 @@ typedef struct {
rt_render_list *lists; rt_render_list *lists;
rt_render_object_type *list_types; rt_render_object_type *list_types;
uint32_t list_count; uint32_t list_count;
rt_render_object_type_mask type_mask;
} rt_render_view; } rt_render_view;
typedef struct { typedef struct {
@ -24,13 +25,36 @@ extern "C" {
#endif #endif
RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_object_type *types, RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_object_type *types,
uint32_t type_count); uint32_t type_count,
unsigned int frame_id);
RT_DLLEXPORT bool RT_DLLEXPORT bool
rtPushRenderObjectToView(rt_render_view *view, rt_render_object_type type, const void *object); rtPushRenderObjectToView(rt_render_view *view, rt_render_object_type type, const void *object);
RT_DLLEXPORT void RT_DLLEXPORT void rtSubmitRenderView(rt_render_view view,
rtSubmitRenderView(rt_render_view view, rt_render_graph *render_graph, uint32_t pass_id); rt_render_graph *render_graph,
uint32_t pass_id,
unsigned int frame_id);
/* Checks if the view contains exactly the given types in the given order */
RT_DLLEXPORT int rtDoViewTypesMatchExact(rt_render_view view,
const rt_render_object_type *types,
uint32_t type_count);
/* Checks if the view contains exactly the given types, in any order */
RT_INLINE static int
rtDoViewTypesMatch(rt_render_view view, const rt_render_object_type *types, uint32_t type_count) {
rt_render_object_type_mask in_mask = 0;
for (uint32_t i = 0; i < type_count; ++i) {
in_mask |= RT_RENDER_OBJECT_TYPE_BIT(types[i]);
}
return view.type_mask == in_mask;
}
RT_INLINE static int rtDoesViewContainTypes(rt_render_view view,
rt_render_object_type_mask type_mask) {
return (int)(view.type_mask & type_mask);
}
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -36,6 +36,8 @@ RT_RENDER_BACKEND_HANDLE(rt_buffer_handle);
#undef RT_RENDER_BACKEND_HANDLE #undef RT_RENDER_BACKEND_HANDLE
#define RT_COMPARE_RENDER_HANDLES(_A, _B, _Comp) ((*(uint32_t *)&(_A)) _Comp (*(uint32_t *)&(_B)))
/* Init data for the renderer */ /* Init data for the renderer */
#ifdef _WIN32 #ifdef _WIN32
@ -243,7 +245,8 @@ typedef struct {
} rt_pass_info; } rt_pass_info;
typedef struct rt_render_graph_s rt_render_graph; typedef struct rt_render_graph_s rt_render_graph;
typedef rt_result rt_execute_render_pass_fn(rt_command_buffer_handle cmdbuf, typedef rt_result rt_execute_render_pass_fn(uint32_t pass_id,
rt_command_buffer_handle cmdbuf,
const rt_render_view *views, const rt_render_view *views,
unsigned int view_count, unsigned int view_count,
void *userdata); void *userdata);
@ -298,7 +301,7 @@ typedef rt_render_graph_builder rt_create_render_graph_builder_fn(void);
typedef void rt_destroy_render_graph_builder_fn(rt_render_graph_builder *builder); typedef void rt_destroy_render_graph_builder_fn(rt_render_graph_builder *builder);
typedef rt_result rt_execute_render_graph_fn(rt_render_graph *rgraph); typedef rt_result rt_execute_render_graph_fn(rt_render_graph *rgraph);
typedef void typedef void
rt_submit_render_view_fn(rt_render_graph *render_graph, uint32_t pass_id, rt_render_view view); rt_submit_render_view_fn(rt_render_graph *render_graph, uint32_t pass_id, rt_render_view view, unsigned int frame_id);
typedef void rt_reset_render_graph_fn(rt_render_graph *graph); typedef void rt_reset_render_graph_fn(rt_render_graph *graph);
typedef void rt_cmd_begin_pass_fn(rt_command_buffer_handle cmdbuf, typedef void rt_cmd_begin_pass_fn(rt_command_buffer_handle cmdbuf,

View File

@ -671,12 +671,15 @@ static uint32_t *DeterminePassExecutionLevels(rt_render_graph_builder_obj *obj,
return execution_levels; return execution_levels;
} }
static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, const uint32_t *order, const uint32_t *execution_levels) { static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj,
const uint32_t *order,
size_t required_size = sizeof(rt_render_graph); const uint32_t *execution_levels) {
size_t runtime_data_size = obj->platform_cbs.GetRuntimeDataSize();
size_t required_size = sizeof(rt_render_graph);
required_size += obj->phys_render_target_count * sizeof(rt_render_target_handle); required_size += obj->phys_render_target_count * sizeof(rt_render_target_handle);
required_size += obj->sync_point_count * sizeof(rt_gpu_semaphore_handle); required_size += obj->sync_point_count * sizeof(rt_gpu_semaphore_handle);
required_size += obj->pass_count * sizeof(rt_render_pass); required_size += obj->pass_count * sizeof(rt_render_pass);
required_size += obj->pass_count * runtime_data_size;
size_t pass_attachment_size = 0; size_t pass_attachment_size = 0;
@ -699,7 +702,8 @@ static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, cons
(rt_gpu_semaphore_handle *)(graph->render_targets + obj->phys_render_target_count); (rt_gpu_semaphore_handle *)(graph->render_targets + obj->phys_render_target_count);
graph->passes = (rt_render_pass *)(graph->semaphores + obj->sync_point_count); graph->passes = (rt_render_pass *)(graph->semaphores + obj->sync_point_count);
char *attachment_storage = (char *)(graph->passes + obj->pass_count); char *attachment_storage = (char *)(graph->passes + obj->pass_count);
char *names = attachment_storage + pass_attachment_size; char *runtime_data = attachment_storage + pass_attachment_size;
char *names = runtime_data + runtime_data_size * obj->pass_count;
char *next_name = names; char *next_name = names;
graph->render_target_count = obj->phys_render_target_count; graph->render_target_count = obj->phys_render_target_count;
@ -771,6 +775,8 @@ static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, cons
} }
} }
graph->passes[i].runtime_data = (void *)(runtime_data + i * runtime_data_size);
graph->passes[i].name = next_name; graph->passes[i].name = next_name;
next_name += namelen + 1; next_name += namelen + 1;
memcpy((char *)graph->passes[i].name, obj->passes[passidx].name, namelen + 1); memcpy((char *)graph->passes[i].name, obj->passes[passidx].name, namelen + 1);

View File

@ -17,14 +17,14 @@ typedef struct {
typedef rt_render_target_handle typedef rt_render_target_handle
rt_rgb_create_render_target_fn(const rt_physical_render_target_info *rt_info); rt_rgb_create_render_target_fn(const rt_physical_render_target_info *rt_info);
typedef int rt_rgb_require_explicit_synchronization_fn(void); typedef int rt_rgb_require_explicit_synchronization_fn(void);
typedef size_t rt_rgb_get_runtime_data_size_fn(void);
typedef struct { typedef struct {
rt_rgb_create_render_target_fn *CreateRenderTarget; rt_rgb_create_render_target_fn *CreateRenderTarget;
rt_rgb_require_explicit_synchronization_fn *RequireExplicitSynchronization; rt_rgb_require_explicit_synchronization_fn *RequireExplicitSynchronization;
rt_rgb_get_runtime_data_size_fn *GetRuntimeDataSize;
} rt_render_graph_builder_platform_callbacks; } rt_render_graph_builder_platform_callbacks;
#define RT_MAX_SUBMITTED_VIEWS_PER_PASS 32
typedef struct { typedef struct {
uint32_t flags; uint32_t flags;
@ -55,10 +55,8 @@ typedef struct {
rt_execute_render_pass_fn *Execute; rt_execute_render_pass_fn *Execute;
void *user_data; void *user_data;
/* Runtime data. */ /* Allocated by the backend, used during runtime */
rt_render_view submitted_views[RT_MAX_SUBMITTED_VIEWS_PER_PASS]; void *runtime_data;
uint32_t submitted_view_count;
/* These refer to the semaphores array */ /* These refer to the semaphores array */
uint32_t first_wait; uint32_t first_wait;

View File

@ -108,6 +108,8 @@ extern rt_result InitRenderTargetManagement();
extern void ShutdownRenderTargetManagement(); extern void ShutdownRenderTargetManagement();
extern rt_result InitBufferManagement(); extern rt_result InitBufferManagement();
extern void ShutdownBufferManagement(); extern void ShutdownBufferManagement();
extern rt_result InitPipelineManagement();
extern void ShutdownPipelineManagement();
extern "C" rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) { extern "C" rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) {
constexpr D3D_FEATURE_LEVEL feature_levels[] = {D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0}; constexpr D3D_FEATURE_LEVEL feature_levels[] = {D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0};
@ -185,6 +187,9 @@ extern "C" rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info)
if (res != RT_SUCCESS) if (res != RT_SUCCESS)
return res; return res;
res = InitBufferManagement(); res = InitBufferManagement();
if (res != RT_SUCCESS)
return res;
res = InitPipelineManagement();
if (res != RT_SUCCESS) if (res != RT_SUCCESS)
return res; return res;
@ -192,6 +197,7 @@ extern "C" rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info)
} }
extern "C" void RT_RENDERER_API_FN(Shutdown)(void) { extern "C" void RT_RENDERER_API_FN(Shutdown)(void) {
ShutdownPipelineManagement();
ShutdownBufferManagement(); ShutdownBufferManagement();
ShutdownRenderTargetManagement(); ShutdownRenderTargetManagement();
ShutdownCommandBufferManagement(); ShutdownCommandBufferManagement();

View File

@ -5,6 +5,14 @@
#include "device_objects.hpp" #include "device_objects.hpp"
#include "gpu.hpp" #include "gpu.hpp"
static constexpr unsigned int MAX_SUBMITTED_VIEWS_PER_PASS = 4;
struct rt_pass_runtime_data {
rt_render_view views[MAX_SUBMITTED_VIEWS_PER_PASS];
uint32_t view_count;
unsigned int views_frame_id;
};
static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *rtinfo) { static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *rtinfo) {
return rtCreateRenderTarget({.format = rtinfo->format, return rtCreateRenderTarget({.format = rtinfo->format,
.width = rtinfo->width, .width = rtinfo->width,
@ -16,10 +24,15 @@ static int RequireExplicitSynchronization() {
return 0; return 0;
} }
static size_t GetRuntimeDataSize() {
return sizeof(rt_pass_runtime_data);
}
extern "C" rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) { extern "C" rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
rt_render_graph_builder_platform_callbacks cbs{}; rt_render_graph_builder_platform_callbacks cbs{};
cbs.CreateRenderTarget = CreateRenderTarget; cbs.CreateRenderTarget = CreateRenderTarget;
cbs.RequireExplicitSynchronization = RequireExplicitSynchronization; cbs.RequireExplicitSynchronization = RequireExplicitSynchronization;
cbs.GetRuntimeDataSize = GetRuntimeDataSize;
return rtCreateRenderGraphBuilder(&cbs); return rtCreateRenderGraphBuilder(&cbs);
} }
@ -29,23 +42,31 @@ extern "C" void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_bu
extern "C" void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph, extern "C" void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph,
uint32_t pass_id, uint32_t pass_id,
rt_render_view view) { rt_render_view view,
unsigned int frame_id) {
for (uint32_t i = 0; i < render_graph->pass_count; ++i) { for (uint32_t i = 0; i < render_graph->pass_count; ++i) {
if (render_graph->passes[i].id == pass_id) { if (render_graph->passes[i].id == pass_id) {
rt_render_pass *pass = &render_graph->passes[i]; rt_render_pass *pass = &render_graph->passes[i];
if (!RT_VERIFY(pass->submitted_view_count < RT_MAX_SUBMITTED_VIEWS_PER_PASS)) rt_pass_runtime_data *runtime_data =
reinterpret_cast<rt_pass_runtime_data *>(pass->runtime_data);
RT_ASSERT(runtime_data->views_frame_id == frame_id || runtime_data->views_frame_id == 0,
"Tried to submit a view for a not-current frame.");
if (!RT_VERIFY(runtime_data->view_count < MAX_SUBMITTED_VIEWS_PER_PASS))
return; return;
pass->submitted_views[pass->submitted_view_count++] = view; runtime_data->views[runtime_data->view_count++] = view;
runtime_data->views_frame_id = frame_id;
} }
} }
} }
extern "C" void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph) { extern "C" void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph) {
for (uint32_t i = 0; i < graph->pass_count; ++i) { for (uint32_t i = 0; i < graph->pass_count; ++i) {
rt_pass_runtime_data *runtime_data =
reinterpret_cast<rt_pass_runtime_data *>(graph->passes[i].runtime_data);
#ifdef RT_DEBUG #ifdef RT_DEBUG
memset(graph->passes[i].submitted_views, 0, sizeof(graph->passes[i].submitted_views)); memset(runtime_data->views, 0, sizeof(runtime_data->views));
#endif #endif
graph->passes[i].submitted_view_count = 0; runtime_data->view_count = 0;
} }
} }
@ -99,7 +120,14 @@ static rt_result ExecutePass(rt_render_pass *pass, rt_command_buffer_handle cmdb
cmd->context->OMSetRenderTargets(static_cast<UINT>(pass->color_output_count), rtvs, dsv); cmd->context->OMSetRenderTargets(static_cast<UINT>(pass->color_output_count), rtvs, dsv);
rt_result res = RT_VERIFY(pass->Execute)(cmdbuf_handle, nullptr, 0, pass->user_data); auto runtime_data = reinterpret_cast<rt_pass_runtime_data *>(pass->runtime_data);
RT_VERIFY(runtime_data);
rt_result res = RT_VERIFY(pass->Execute)(pass->id,
cmdbuf_handle,
runtime_data->views,
runtime_data->view_count,
pass->user_data);
if (cmd->annotation) { if (cmd->annotation) {
cmd->annotation->EndEvent(); cmd->annotation->EndEvent();

View File

@ -131,10 +131,12 @@ rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph)
void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph, void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph,
uint32_t pass_id, uint32_t pass_id,
rt_render_view view) { rt_render_view view,
unsigned int frame_id) {
RT_UNUSED(render_graph); RT_UNUSED(render_graph);
RT_UNUSED(pass_id); RT_UNUSED(pass_id);
RT_UNUSED(view); RT_UNUSED(view);
RT_UNUSED(frame_id);
} }
void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph) { void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph) {

View File

@ -7,13 +7,37 @@
/* Increment and decrement return the new value */ /* Increment and decrement return the new value */
#define rtAtomic32Inc(pa) _InterlockedIncrement((volatile LONG *)(pa)) #define rtAtomic32Inc(pa) _InterlockedIncrement((volatile long *)(pa))
#define rtAtomic64Inc(pa) _InterlockedIncrement64((volatile LONG64 *)(pa)) #define rtAtomic64Inc(pa) _InterlockedIncrement64((volatile __int64 *)(pa))
#define rtAtomic32Dec(pa) _InterlockedDecrement((volatile LONG *)(pa)) #define rtAtomic32Dec(pa) _InterlockedDecrement((volatile long *)(pa))
#define rtAtomic64Dec(pa) _InterlockedDecrement64((volatile LONG64 *)(pa)) #define rtAtomic64Dec(pa) _InterlockedDecrement64((volatile __int64 *)(pa))
#define rtAtomic32FetchAdd(pa, value) _InterlockedExchangeAdd((volatile LONG *)(pa), (LONG)(value)) #define rtAtomic32FetchAdd(pa, value) _InterlockedExchangeAdd((volatile long *)(pa), (LONG)(value))
#define rtAtomic64FetchAdd(pa, value) _InterlockedExchangeAdd64((volatile LONG64 *)(pa), (LONG)(value)) #define rtAtomic64FetchAdd(pa, value) \
_InterlockedExchangeAdd64((volatile __int64 *)(pa), (LONG)(value))
#if defined(_M_ARM) || defined(_M_ARM64)
#define rtAtomic32ExchangeAcq(_pDest, _NewVal) \
_InterlockedExchange_acq((volatile long *)(_pDest), (_NewVal))
#define rtAtomic32ExchangeRel(_pDest, _NewVal) \
_InterlockedExchange_rel((volatile long *)(_pDest), (_NewVal))
#define rtAtomic32CASAcq(_pDest, _NewVal, _Compare) \
_InterlockedCompareExchange_acq((volatile long *)(_pDest), (_NewVal), (_Compare))
#define rtAtomic32CASRel(_pDest, _NewVal, _Compare) _InterlockedCompareExchange_rel((volatile long *)(_pDest), (_NewVal), (_Compare
#else
/* x64/86 does not have acquire/release versions of these */
#define rtAtomic32ExchangeAcq(_pDest, _NewVal) \
_InterlockedExchange((volatile long *)(_pDest), (_NewVal))
#define rtAtomic32ExchangeRel(_pDest, _NewVal) \
_InterlockedExchange((volatile long *)(_pDest), (_NewVal))
#define rtAtomic32CASAcq(_pDest, _NewVal, _Compare) \
_InterlockedCompareExchange((volatile long *)(_pDest), (_NewVal), (_Compare))
#define rtAtomic32CASRel(_pDest, _NewVal, _Compare) _InterlockedCompareExchange((volatile long *)(_pDest), (_NewVal), (_Compare
#endif
#define rtAtomic32Exchange(_pDest, _NewVal) \
_InterlockedExchange((volatile long *)(_pDest), (_NewVal))
#define rtAtomic32CAS(_pDest, _NewVal, _Compare) \
_InterlockedCompareExchange((volatile long *)(_pDest), (_NewVal), (_Compare))
#elif defined(__GNUC__) || defined(__clang__) #elif defined(__GNUC__) || defined(__clang__)
@ -23,7 +47,12 @@
#define rtAtomic64Dec(pa) __atomic_sub_fetch((pa), 1LL, __ATOMIC_SEQ_CST) #define rtAtomic64Dec(pa) __atomic_sub_fetch((pa), 1LL, __ATOMIC_SEQ_CST)
#define rtAtomic32FetchAdd(pa, value) __atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST) #define rtAtomic32FetchAdd(pa, value) __atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST)
#define rtAtomic64FetchAdd(pa, value) _-atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST) #define rtAtomic64FetchAdd(pa, value) __atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST)
/* TODO Linux versions of compare exchange
https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html
*/
#endif #endif
#endif #endif

View File

@ -52,6 +52,50 @@ static RT_INLINE int rtMinheapIsEmpty(rt_minheap *minheap) {
return minheap->size == 0; return minheap->size == 0;
} }
/* This function is expected to copy the contents of current (current_size bytes) to the new memory. */
typedef void *rt_hashtable_grow_memory_fn(void *current, size_t current_size, size_t new_size, void *userdata);
/* A hashtable.
*
* Maps 64 bit keys to 64 bit values.
* The hashtable is _not_ thread safe.
*/
typedef struct {
uint64_t *keys;
uint64_t *values;
uint64_t capacity;
uint64_t used_slots;
void *grow_memory_userdata;
rt_hashtable_grow_memory_fn *GrowMemoryCallback;
} rt_hashtable;
#define RT_HASH_TABLE_MEMORY_REQUIRED(_capacity) (2 * sizeof(uint64_t) * (_capacity))
/* Default implementation for allocating from an arena */
RT_DLLEXPORT void *
rtHashtableGrowMemoryFromArena(void *current, size_t current_size, size_t new_size, void *userdata);
/* Capacity must be a power of two.
* If grow_memory_cb is NULL, the hashtable will not support growth above the initial capacity. */
RT_DLLEXPORT rt_hashtable rtCreateHashtable(uint64_t capacity,
void *memory,
rt_hashtable_grow_memory_fn *grow_memory_cb,
void *userdata);
/* Returns
* RT_OUT_OF_MEMORY if the hashtable is full or
* RT_SUCCESS. */
RT_DLLEXPORT rt_result rtHashtableInsert(rt_hashtable *hashtable, uint64_t key, uint64_t value);
/* Returns default_value, if the key is not present in the hashtable */
RT_DLLEXPORT uint64_t rtHashtableLookup(const rt_hashtable *hashtable,
uint64_t key,
uint64_t default_value);
RT_DLLEXPORT void rtHashtableRemove(rt_hashtable *hashtable, uint64_t key);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

157
src/runtime/ds_hashtable.c Normal file
View File

@ -0,0 +1,157 @@
#include "ds.h"
#include "mem_arena.h"
#include <string.h>
#define RT_EMPTY_KEY 0ull
#define RT_TOMBSTONE 1ull
static uint64_t FixKey(uint64_t key) {
if (key < RT_TOMBSTONE + 1)
key = UINT64_MAX - key;
return key;
}
RT_DLLEXPORT rt_hashtable rtCreateHashtable(uint64_t capacity,
void *memory,
rt_hashtable_grow_memory_fn *grow_memory_cb,
void *userdata) {
RT_ASSERT(RT_IS_POWER_OF_TWO(capacity), "Hashtable capacity must be a power of two.");
memset(memory, 0, sizeof(uint64_t) * capacity);
rt_hashtable hashtable = {
.keys = memory,
.values = (uint64_t *)memory + capacity,
.capacity = capacity,
.used_slots = 0,
.GrowMemoryCallback = grow_memory_cb,
.grow_memory_userdata = userdata,
};
return hashtable;
}
static rt_result Rehash(rt_hashtable *hashtable) {
void *newmem = hashtable->GrowMemoryCallback(hashtable->keys,
2 * sizeof(uint64_t) * hashtable->capacity,
4 * sizeof(uint64_t) * hashtable->capacity,
hashtable->grow_memory_userdata);
if (!newmem)
return RT_OUT_OF_MEMORY;
uint64_t old_capacity = hashtable->capacity;
hashtable->capacity *= 2;
hashtable->keys = newmem;
hashtable->values = hashtable->keys + hashtable->capacity;
memset(hashtable->keys + old_capacity, 0, sizeof(uint64_t) * old_capacity);
uint64_t *keys = hashtable->keys;
uint64_t capacity = hashtable->capacity;
uint64_t mod = capacity - 1;
for (uint64_t i = 0; i < old_capacity; ++i) {
if (keys[i] > RT_TOMBSTONE) {
uint64_t key = keys[i];
uint64_t offset = 0;
while (offset < capacity) {
uint64_t new_slot = (key + offset) & mod;
if (keys[new_slot] == key || keys[new_slot] <= RT_TOMBSTONE) {
if (new_slot != i) {
keys[new_slot] = key;
hashtable->values[new_slot] = hashtable->values[i];
keys[i] = RT_TOMBSTONE;
}
break;
}
}
if (offset == capacity) {
rtReportError("CORE", "Rehashing failed. This is a bug.");
return RT_UNKNOWN_ERROR;
}
}
}
return RT_SUCCESS;
}
RT_DLLEXPORT rt_result rtHashtableInsert(rt_hashtable *hashtable, uint64_t key, uint64_t value) {
key = FixKey(key);
if (hashtable->GrowMemoryCallback && (hashtable->used_slots > (hashtable->capacity >> 1))) {
rt_result res = Rehash(hashtable);
if (res != RT_SUCCESS)
return res;
}
uint64_t *keys = hashtable->keys;
uint64_t capacity = hashtable->capacity;
uint64_t mod = capacity - 1;
uint64_t offset = 0;
while (offset < capacity) {
uint64_t index = (key + offset) & mod;
++offset;
if (keys[index] == key || keys[index] <= RT_TOMBSTONE) {
keys[index] = key;
hashtable->values[index] = value;
++hashtable->used_slots;
return RT_SUCCESS;
}
}
return RT_OUT_OF_MEMORY;
}
RT_DLLEXPORT uint64_t rtHashtableLookup(const rt_hashtable *hashtable,
uint64_t key,
uint64_t default_value) {
key = FixKey(key);
uint64_t capacity = hashtable->capacity;
uint64_t mod = capacity - 1;
uint64_t offset = 0;
while (offset < capacity) {
uint64_t index = (key + offset) & mod;
++offset;
if (hashtable->keys[index] == key) {
return hashtable->values[index];
} else if (hashtable->keys[index] == RT_EMPTY_KEY) {
break;
}
}
return default_value;
}
RT_DLLEXPORT void rtHashtableRemove(rt_hashtable *hashtable, uint64_t key) {
key = FixKey(key);
uint64_t *keys = hashtable->keys;
uint64_t capacity = hashtable->capacity;
uint64_t mod = capacity - 1;
uint64_t offset = 0;
while (offset < capacity) {
uint64_t index = (key + offset) & mod;
++offset;
if (keys[index] == key) {
keys[index] = RT_TOMBSTONE;
} else if (keys[index] == RT_EMPTY_KEY) {
break;
}
}
}
RT_DLLEXPORT void *rtHashtableGrowMemoryFromArena(void *current,
size_t current_size,
size_t new_size,
void *userdata) {
rt_arena *arena = userdata;
ptrdiff_t current_offset = (uint8_t *)current - (uint8_t *)arena->base;
if (current_offset + current_size == arena->at) {
/* The current allocation is the one at the end of the arena, so we can simply grow and be
* done */
if (!rtArenaPush(arena, new_size - current_size))
return NULL;
return current;
} else {
void *newmem = rtArenaPush(arena, new_size);
if (newmem)
memcpy(newmem, current, current_size);
return newmem;
}
}

View File

@ -26,6 +26,7 @@ runtime_lib = library('rt',
'buffer_manager.c', 'buffer_manager.c',
'compression.c', 'compression.c',
'config.c', 'config.c',
'ds_hashtable.c',
'ds_minheap.c', 'ds_minheap.c',
'dynamic_libs.c', 'dynamic_libs.c',
'error_report.c', 'error_report.c',

View File

@ -203,6 +203,9 @@ static RT_INLINE uint32_t rtNextPowerOfTwo32(uint32_t v) {
return v; return v;
} }
/* Checks that exactly one bit is set */
#define RT_IS_POWER_OF_TWO(_n) ((_n) && !((_n) & ((_n)-1)))
/* Runtime init. Initializes basic systems. /* Runtime init. Initializes basic systems.
* You need to call this, even if you build a CLI only app. */ * You need to call this, even if you build a CLI only app. */
RT_DLLEXPORT rt_result rtInitRuntime(void); RT_DLLEXPORT rt_result rtInitRuntime(void);

View File

@ -81,6 +81,20 @@ RT_DLLEXPORT void rtSignalSemaphore(rt_semaphore *sem);
RT_DLLEXPORT void rtWaitOnSemaphore(rt_semaphore *sem); RT_DLLEXPORT void rtWaitOnSemaphore(rt_semaphore *sem);
/* Spinlock */
typedef struct {
volatile int lock;
} rt_spinlock;
static RT_INLINE rt_spinlock rtCreateSpinlock(void) {
rt_spinlock l = {0};
return l;
}
RT_DLLEXPORT void rtLockSpinlock(rt_spinlock *lock);
RT_DLLEXPORT void rtUnlockSpinlock(rt_spinlock *lock);
/* Threads */ /* Threads */
typedef struct rt_thread_s rt_thread; typedef struct rt_thread_s rt_thread;

View File

@ -0,0 +1,13 @@
#include "atomics.h"
#include "threading.h"
RT_DLLEXPORT void rtLockSpinlock(rt_spinlock *lock) {
int prev;
do {
prev = rtAtomic32CASAcq(&lock->lock, 1, 0);
} while (prev != 0);
}
RT_DLLEXPORT void rtUnlockSpinlock(rt_spinlock *lock) {
rtAtomic32ExchangeRel(&lock->lock, 0);
}

View File

@ -1,6 +1,8 @@
#include <stdio.h> #include <stdio.h>
#include "runtime/config.h" #include "runtime/config.h"
#include "runtime/ds.h"
#include "runtime/mem_arena.h"
#include "runtime/runtime.h" #include "runtime/runtime.h"
#include "gfx/gfx.h" #include "gfx/gfx.h"
@ -69,7 +71,7 @@ static rt_result PushRenderList(void) {
} dummy_type; } dummy_type;
rt_render_object_type type = rtRegisterRenderObjectType(sizeof(dummy_type), "DummyType"); rt_render_object_type type = rtRegisterRenderObjectType(sizeof(dummy_type), "DummyType");
rt_create_render_list_result list_res = rtCreateRenderList(type); rt_create_render_list_result list_res = rtCreateRenderList(type, 43);
if (!list_res.ok) { if (!list_res.ok) {
return RT_INVALID_VALUE; return RT_INVALID_VALUE;
} }
@ -92,7 +94,7 @@ static rt_result PushLongRenderList(void) {
} dummy_type; } dummy_type;
rt_render_object_type type = rtRegisterRenderObjectType(sizeof(dummy_type), "DummyType"); rt_render_object_type type = rtRegisterRenderObjectType(sizeof(dummy_type), "DummyType");
rt_create_render_list_result list_res = rtCreateRenderList(type); rt_create_render_list_result list_res = rtCreateRenderList(type, 43);
if (!list_res.ok) if (!list_res.ok)
return RT_INVALID_VALUE; return RT_INVALID_VALUE;
rt_render_list list = list_res.list; rt_render_list list = list_res.list;
@ -119,6 +121,41 @@ static rt_result PushLongRenderList(void) {
return RT_SUCCESS; return RT_SUCCESS;
} }
static rt_result HashTableBasics(void) {
{
uint64_t mem[128];
rt_hashtable ht = rtCreateHashtable(64, mem, NULL, NULL);
for (uint64_t i = 0; i < 64; ++i) {
if (rtHashtableInsert(&ht, i, i) != RT_SUCCESS)
return RT_UNKNOWN_ERROR;
uint64_t found = rtHashtableLookup(&ht, i, UINT64_MAX);
if (found != i)
return RT_INVALID_VALUE;
}
}
{
rt_create_arena_result arena_res = rtCreateArena(NULL, RT_KB(4));
if (!arena_res.ok)
return RT_OUT_OF_MEMORY;
rt_arena arena = arena_res.arena;
void *mem = rtArenaPush(&arena, RT_HASH_TABLE_MEMORY_REQUIRED(64));
if (!mem)
return RT_OUT_OF_MEMORY;
rt_hashtable ht = rtCreateHashtable(64, mem, rtHashtableGrowMemoryFromArena, &arena);
for (uint64_t i = 0; i < 64; ++i) {
if (rtHashtableInsert(&ht, 256+i, i) != RT_SUCCESS)
return RT_UNKNOWN_ERROR;
uint64_t found = rtHashtableLookup(&ht, 256+i, UINT64_MAX);
if (found != i)
return RT_INVALID_VALUE;
}
rtReleaseArena(&arena);
}
return RT_SUCCESS;
}
/* Scaffolding /* Scaffolding
* *
* Run all the test cases, output if they passed or failed. * Run all the test cases, output if they passed or failed.
@ -169,7 +206,8 @@ static rt_test_fixture *_test_fixtures[] = {TEST_FIXTURE_LIST};
static rt_test_case _test_cases[] = {TEST_CASE(RelPtrTest), static rt_test_case _test_cases[] = {TEST_CASE(RelPtrTest),
TEST_CASE(NegRelPtrTest), TEST_CASE(NegRelPtrTest),
TEST_CASE_FIXTURE(PushRenderList, render_list_fixture), TEST_CASE_FIXTURE(PushRenderList, render_list_fixture),
TEST_CASE_FIXTURE(PushLongRenderList, render_list_fixture)}; TEST_CASE_FIXTURE(PushLongRenderList, render_list_fixture),
TEST_CASE(HashTableBasics)};
int main() { int main() {
int out = 0; int out = 0;