From 656b21d1ef1a8a8dada53784515721b694b5ce19 Mon Sep 17 00:00:00 2001 From: Kevin Trogant Date: Tue, 14 May 2024 14:07:04 +0200 Subject: [PATCH] Loading and caching effects --- src/game/main.c | 47 ++++- src/gfx/builtin_objects.h | 1 + src/gfx/effect.c | 243 ++++++++++++++++++++++ src/gfx/effect.h | 26 +++ src/gfx/gfx_main.c | 19 +- src/gfx/render_list.c | 142 ++++++++----- src/gfx/render_list.h | 2 +- src/gfx/render_view.c | 87 ++++++-- src/gfx/render_view.h | 30 ++- src/gfx/renderer_api.h | 7 +- src/renderer/common/common_render_graph.c | 14 +- src/renderer/common/common_render_graph.h | 10 +- src/renderer/dx11/init.cpp | 6 + src/renderer/dx11/render_graph.cpp | 40 +++- src/renderer/null/null.c | 4 +- src/runtime/atomics.h | 43 +++- src/runtime/ds.h | 44 ++++ src/runtime/ds_hashtable.c | 157 ++++++++++++++ src/runtime/meson.build | 1 + src/runtime/runtime.h | 3 + src/runtime/threading.h | 14 ++ src/runtime/threading_spinlock.c | 13 ++ tests/rttest.c | 44 +++- 23 files changed, 883 insertions(+), 114 deletions(-) create mode 100644 src/runtime/ds_hashtable.c create mode 100644 src/runtime/threading_spinlock.c diff --git a/src/game/main.c b/src/game/main.c index 064f226..aac6973 100644 --- a/src/game/main.c +++ b/src/game/main.c @@ -2,8 +2,10 @@ #include "runtime/resources.h" #include "runtime/threading.h" +#include "gfx/builtin_objects.h" #include "gfx/gfx.h" #include "gfx/renderer_api.h" +#include "gfx/effect.h" #include "asset_compiler/asset_compiler.h" @@ -13,10 +15,33 @@ void RegisterCVars(void) { static rt_render_graph *_graph; -static rt_result ForwardPassExecute(rt_command_buffer_handle cmdbuf, +static rt_result ForwardPassExecute(uint32_t pass_id, + rt_command_buffer_handle cmdbuf, const rt_render_view *views, uint32_t view_count, void *userdata) { + RT_ASSERT(view_count == 1, "Expected a single view for the main camera."); + rt_render_view view = *views; + RT_VERIFY(rtDoViewTypesMatchExact(view, &g_builtin_render_object_types.render_mesh, 1)); + + rt_render_list meshes = view.lists[0]; + if (meshes.length == 0) + return RT_SUCCESS; + + rt_pipeline_handle bound_pipeline = + (RT_GET_RENDER_LIST_ELEMENT(meshes, rt_render_mesh, 0)).pipeline; + g_renderer.CmdBindPipeline(cmdbuf, bound_pipeline); + + for (size_t mesh_idx = 0; mesh_idx < meshes.length; ++mesh_idx) { + rt_render_mesh mesh = RT_GET_RENDER_LIST_ELEMENT(meshes, rt_render_mesh, mesh_idx); + if (RT_COMPARE_RENDER_HANDLES(bound_pipeline, mesh.pipeline, !=)) { + bound_pipeline = mesh.pipeline; + g_renderer.CmdBindPipeline(cmdbuf, bound_pipeline); + } + g_renderer.CmdBindVertexBuffers(cmdbuf, 0, 1, &mesh.vbo, NULL); + g_renderer.CmdDraw(cmdbuf, 0, mesh.vertex_count); + } + return RT_SUCCESS; } @@ -27,6 +52,8 @@ void Init(void) { rtWaitForAssetProcessing(); + rtRegisterBuiltinRenderObjectTypes(); + rt_render_graph_builder builder = g_renderer.CreateRenderGraphBuilder(); rt_attachment_info backbuffer = { .name = "backbuffer", @@ -55,6 +82,21 @@ void Init(void) { } g_renderer.DestroyRenderGraphBuilder(&builder); + + const rt_effect *effect; + if (rtLoadEffect(rtGetResourceID("assets/shader/static_object.effect"), &effect) != + RT_SUCCESS) { + rtReportError("GAME", "Oh noo..."); + } + + const rt_effect *effect2; + if (rtLoadEffect(rtGetResourceID("assets/shader/static_object.effect"), &effect2) != + RT_SUCCESS) { + rtReportError("GAME", "Oh noo..."); + } + + rtReleaseEffect(effect); + rtReleaseEffect(effect2); } /* Called after exiting the main-loop and before the runtime starts its shutdown */ @@ -63,6 +105,9 @@ void Shutdown(void) { rtShutdownAssetCompiler(); } +// Question; How do we move data from update to render. +// This is where we could fill the render views, but that would +// mean double/triple buffering the views void Update(unsigned int frame_id) { RT_UNUSED(frame_id); } diff --git a/src/gfx/builtin_objects.h b/src/gfx/builtin_objects.h index eda231c..617618c 100644 --- a/src/gfx/builtin_objects.h +++ b/src/gfx/builtin_objects.h @@ -14,6 +14,7 @@ extern "C" { #endif typedef struct { + rt_pipeline_handle pipeline; rt_buffer_handle vbo; rt_buffer_handle ibo; uint32_t vertex_count; diff --git a/src/gfx/effect.c b/src/gfx/effect.c index 0b7dbc3..f657c41 100644 --- a/src/gfx/effect.c +++ b/src/gfx/effect.c @@ -1,5 +1,88 @@ #include "effect.h" + +#include "runtime/config.h" +#include "runtime/ds.h" +#include "runtime/handles.h" #include "runtime/hashing.h" +#include "runtime/mem_arena.h" +#include "runtime/threading.h" +#include "runtime/atomics.h" + +#include +#include + +RT_CVAR_SZ(rt_EffectCacheSize, "The number of slots in the effect cache. Default: 1024", 1024); + +typedef struct { + rt_resource_id resource; + rt_effect effect; + _Alignas(4) unsigned int refcount; +} rt_effect_cache_slot; + +/* We use a hashtable to find previously loaded effects. + * To reclaim unreferenced slots when we need to, we use a minheap. + * The minheap implements a LRU list. To track usage, we use a global running "usage counter", + * incremented whenever an effect is loaded. + */ +typedef struct { + rt_effect_cache_slot *slots; + rt_hashtable lut; + rt_minheap reclaim_heap; + + /* Linearly allocate slots until we reach capacity */ + size_t next_free; + + /* Used to track "time" since an effect was loaded */ + _Alignas(4) int usage_counter; + + void *memory; + + rt_rwlock lock; +} rt_effect_cache; + +static rt_effect_cache _cache; + +rt_result InitEffectCache(void) { + if (!RT_IS_POWER_OF_TWO(rt_EffectCacheSize.sz)) { + rtReportError( + "GFX", + "The value of \"rt_EffectCacheSize\" must be a power of two.\nConfigured: %zu.", + rt_EffectCacheSize.sz); + return RT_INVALID_VALUE; + } + + rt_create_rwlock_result lock_res = rtCreateRWLock(); + if (!lock_res.ok) + return RT_UNKNOWN_ERROR; + _cache.lock = lock_res.lock; + + size_t mem_required = sizeof(rt_effect_cache_slot) * rt_EffectCacheSize.sz + + RT_HASH_TABLE_MEMORY_REQUIRED( + 2 * rt_EffectCacheSize.sz) + /* double to keep performance up */ + sizeof(int) * rt_EffectCacheSize.sz + /* heap keys */ + sizeof(size_t) * rt_EffectCacheSize.sz; /* heap values */ + _cache.memory = malloc(mem_required); + if (!_cache.memory) { + rtDestroyRWLock(&_cache.lock); + return RT_OUT_OF_MEMORY; + } + _cache.lut = rtCreateHashtable(rt_EffectCacheSize.sz, _cache.memory, NULL, NULL); + + int *keys = + (int *)((char *)_cache.memory + RT_HASH_TABLE_MEMORY_REQUIRED(2 * rt_EffectCacheSize.sz)); + size_t *values = (size_t *)(keys + rt_EffectCacheSize.sz); + _cache.reclaim_heap = rtCreateMinheap(keys, values, sizeof(size_t), rt_EffectCacheSize.sz, 0); + _cache.usage_counter = 0; + + _cache.slots = (rt_effect_cache_slot *)(values + rt_EffectCacheSize.sz); + memset(_cache.slots, 0, sizeof(rt_effect_cache_slot) * rt_EffectCacheSize.sz); + + return RT_SUCCESS; +} + +void ShutdownEffectCache(void) { + free(_cache.memory); +} RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len) { uint32_t id = rtHashBytes32(name, len); @@ -14,3 +97,163 @@ RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len) { id = ~id; return id; } + +static void ReleaseEffect(rt_effect *effect) { + for (unsigned int i = 0; i < effect->pass_count; ++i) { + g_renderer.DestroyPipeline(effect->passes[i].pipeline); + } +} + +/* Returns the index of the reserved slot */ +static size_t ReserveSlot(rt_resource_id id) { + if (_cache.next_free < rt_EffectCacheSize.sz) { + size_t slot = _cache.next_free++; + RT_ASSERT(_cache.slots[slot].refcount == 0, "Got a slot that is still in use."); + _cache.slots[slot].refcount = 1; + + if (rtHashtableInsert(&_cache.lut, id, slot) != RT_SUCCESS) { + rtLog("GFX", "Failed to insert effect %x into the lookup table.", id); + _cache.slots[slot].refcount = 0; + rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot); + return SIZE_MAX; + } + _cache.slots[slot].resource = id; + return slot; + } else if (!rtMinheapIsEmpty(&_cache.reclaim_heap)) { + size_t slot; + rtMinheapPop(&_cache.reclaim_heap, &slot); + RT_ASSERT(_cache.slots[slot].refcount == 0, "Got a slot that is still in use."); + _cache.slots[slot].refcount = 1; + + rt_resource_id old_id = _cache.slots[slot].resource; + RT_ASSERT(old_id != RT_INVALID_RESOURCE_ID, "The slot should contain an old effect."); + ReleaseEffect(&_cache.slots[slot].effect); + rtHashtableRemove(&_cache.lut, old_id); + + if (rtHashtableInsert(&_cache.lut, id, slot) != RT_SUCCESS) { + rtLog("GFX", "Failed to insert effect %x into the lookup table.", id); + _cache.slots[slot].refcount = 0; + rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot); + return SIZE_MAX; + } + _cache.slots[slot].resource = id; + return slot; + } else { + rtLog("GFX", + "Could not insert effect %x into the cache, because the effect cache is full.", + id); + return SIZE_MAX; + } +} + +/* Load resource to memory allocated on the given arena */ +static rt_result LoadResource(rt_resource_id id, void **p_out, rt_arena *arena) { + size_t size = rtGetResourceSize(id); + if (!size) { + rtLog("GFX", "ID %x is not a valid resource.", id); + return RT_INVALID_VALUE; + } + void *dst = rtArenaPush(arena, size); + if (!dst) { + rtLog("GFX", "Failed to allocate %zu bytes of temporary storage.", size); + return RT_OUT_OF_MEMORY; + } + *p_out = dst; + return rtGetResource(id, dst); +} + +static rt_result LoadEffect(rt_resource_id id, rt_effect *effect) { + rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); + if (!temp.arena) { + rtLog("GFX", "Could not get a temporary arena."); + return RT_OUT_OF_MEMORY; + } + const rt_resource *resource = NULL; + rt_result res = LoadResource(id, &resource, temp.arena); + if (res != RT_SUCCESS) { + rtReturnTemporaryArena(temp); + return res; + } + if (resource->type != RT_RESOURCE_EFFECT) { + rtReturnTemporaryArena(temp); + rtLog("GFX", "Resource %x does not refer to an effect resource.", id); + return RT_INVALID_VALUE; + } + const rt_effect_info *effect_info = resource->data; + effect->pass_count = effect_info->pass_count; + + for (unsigned int i = 0; i < effect_info->pass_count; ++i) { + rt_resource *pipeline_resource = NULL; + res = LoadResource(effect_info->passes[i].pipeline, &pipeline_resource, temp.arena); + if (res != RT_SUCCESS) { + rtReturnTemporaryArena(temp); + return res; + } + if (pipeline_resource->type != RT_RESOURCE_PIPELINE) { + rtReturnTemporaryArena(temp); + rtLog("GFX", "Resource %x does not refer to a pipeline resource.", id); + return RT_INVALID_VALUE; + } + rt_pipeline_info *pipeline_info = pipeline_resource->data; + rt_pipeline_handle pipeline = g_renderer.CompilePipeline(pipeline_info); + if (!RT_IS_HANDLE_VALID(pipeline)) { + rtReturnTemporaryArena(temp); + rtLog("GFX", + "Failed to compile the pipeline of pass %d (%x).", + i, + effect_info->passes[i].pass_id); + return RT_UNKNOWN_ERROR; + } + effect->passes[i].pass_id = effect_info->passes[i].pass_id; + effect->passes[i].pipeline = pipeline; + } + + rtReturnTemporaryArena(temp); + + return RT_SUCCESS; +} + +RT_DLLEXPORT rt_result rtLoadEffect(rt_resource_id id, const rt_effect **effect) { + rtAtomic32Inc(&_cache.usage_counter); + + /* Check if the effect is already loaded */ + rtLockRead(&_cache.lock); + uint64_t slot = rtHashtableLookup(&_cache.lut, id, UINT64_MAX); + if (slot != UINT64_MAX) { + + RT_ASSERT(_cache.slots[slot].resource == id, "Got the wrong effect"); + rtAtomic32Inc(&_cache.slots[slot].refcount); + *effect = &_cache.slots[slot].effect; + rtUnlockRead(&_cache.lock); + return RT_SUCCESS; + } + rtUnlockRead(&_cache.lock); + + /* Load the effect */ + rtLockWrite(&_cache.lock); + if (rtHashtableLookup(&_cache.lut, id, UINT64_MAX) != UINT64_MAX) { + /* Another thread was faster than we, just retry */ + rtUnlockWrite(&_cache.lock); + return rtLoadEffect(id, effect); + } + slot = ReserveSlot(id); + if (slot == SIZE_MAX) { + rtUnlockWrite(&_cache.lock); + return RT_OUT_OF_MEMORY; + } + rt_result res = LoadEffect(id, &_cache.slots[slot].effect); + rtUnlockWrite(&_cache.lock); + *effect = &_cache.slots[slot].effect; + return res; +} + +RT_DLLEXPORT void rtReleaseEffect(const rt_effect *effect) { + RT_VERIFY(effect); + rt_effect_cache_slot *slot = (rt_effect_cache_slot *)((char *)effect - offsetof(rt_effect_cache_slot, effect)); + if (rtAtomic32Dec(&slot->refcount) == 0) { + rtLockWrite(&_cache.lock); + size_t slot_index = (size_t)(slot - _cache.slots); + rtMinheapPush(&_cache.reclaim_heap, _cache.usage_counter, &slot_index); + rtUnlockWrite(&_cache.lock); + } +} diff --git a/src/gfx/effect.h b/src/gfx/effect.h index b5006e3..d3154d5 100644 --- a/src/gfx/effect.h +++ b/src/gfx/effect.h @@ -7,8 +7,11 @@ */ #include "gfx.h" +#include "renderer_api.h" #include "runtime/resources.h" +/* *** Resource types *** */ + typedef struct rt_pipeline_info_s { rt_resource_id vertex_shader; rt_resource_id fragment_shader; @@ -28,7 +31,30 @@ typedef struct { rt_effect_pass_info passes[RT_MAX_SUBRESOURCES]; } rt_effect_info; +/* *** Runtime types *** */ + +typedef struct { + uint32_t pass_id; + rt_pipeline_handle pipeline; +} rt_effect_pass; + +typedef struct { + uint32_t pass_count; + rt_effect_pass passes[RT_MAX_SUBRESOURCES]; +} rt_effect; + RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len); RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len); +/* Load an effect from a resource file. + * Returns: + * - RT_SUCCESS + * - RT_OUT_OF_MEMORY, if temporary memory allocations failed + * - RT_INVALID_VALUE, if id does not refer to an effect resource. + * - RT_UNKNOWN_ERROR, if a pipeline failed to compile + * - errors returned by rtGetResource() */ +RT_DLLEXPORT rt_result rtLoadEffect(rt_resource_id id, const rt_effect **effect); + +RT_DLLEXPORT void rtReleaseEffect(const rt_effect *effect); + #endif diff --git a/src/gfx/gfx_main.c b/src/gfx/gfx_main.c index 1ef0464..4ecb388 100644 --- a/src/gfx/gfx_main.c +++ b/src/gfx/gfx_main.c @@ -23,7 +23,7 @@ static bool _renderer_loaded = false; RT_DLLEXPORT RT_CVAR_S(rt_Renderer, - "Select the render backend. Available options: [vk, null], Default: vk", + "Select the render backend. Available options: [vk, dx11, null], Default: vk", "dx11"); extern rt_cvar rt_RenderViewArenaSize; @@ -59,7 +59,8 @@ extern void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builde extern rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *); extern void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph, uint32_t pass_id, - rt_render_view view); + rt_render_view view, + unsigned int frame_id); extern void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph); extern void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle, @@ -81,10 +82,12 @@ extern void RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle, uint32_t, uint extern rt_result InitRenderLists(void); extern void ShutdownRenderLists(void); -extern void ResetRenderLists(void); +extern void ResetRenderLists(unsigned int frame_id); extern rt_result InitRenderViews(void); extern void ShutdownRenderViews(void); -extern void ResetRenderViews(void); +extern void ResetRenderViews(unsigned int frame_id); +extern rt_result InitEffectCache(void); +extern void ShutdownEffectCache(void); static bool LoadRenderer(void) { @@ -196,10 +199,14 @@ RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info) { if ((result = InitRenderViews()) != RT_SUCCESS) return result; + if ((result = InitEffectCache()) != RT_SUCCESS) + return result; + return result; } RT_DLLEXPORT void rtShutdownGFX(void) { + ShutdownEffectCache(); ShutdownRenderViews(); ShutdownRenderLists(); g_renderer.Shutdown(); @@ -211,6 +218,6 @@ RT_DLLEXPORT void rtBeginGFXFrame(unsigned int frame_id) { RT_DLLEXPORT void rtEndGFXFrame(unsigned int frame_id) { g_renderer.EndFrame(frame_id); - ResetRenderLists(); - ResetRenderViews(); + ResetRenderLists(frame_id); + ResetRenderViews(frame_id); } diff --git a/src/gfx/render_list.c b/src/gfx/render_list.c index 99f53c8..46ee069 100644 --- a/src/gfx/render_list.c +++ b/src/gfx/render_list.c @@ -1,8 +1,9 @@ #include "render_list.h" +#include "renderer_api.h" -#include "runtime/threading.h" -#include "runtime/mem_arena.h" #include "runtime/config.h" +#include "runtime/mem_arena.h" +#include "runtime/threading.h" #include @@ -20,33 +21,49 @@ typedef struct rt_list_pool_s { struct rt_list_pool_s *next; } rt_list_pool; +typedef struct { + rt_mutex *lock; + rt_list_pool *first_free; + rt_arena arena; + + unsigned int access_frame_id; +} rt_frame_lists; + #define DEFAULT_LIST_CAPACITY RT_KB(1) static rt_render_object_type_data _types[RT_MAX_RENDER_OBJECT_TYPE + 1]; static unsigned int _type_count = 0; static rt_rwlock _type_lock; -static rt_arena _list_arena; -static rt_list_pool *_first_free_list; -static rt_mutex *_list_lock; +static rt_frame_lists _frame_lists[4]; +static unsigned int _max_frames_in_flight; rt_result InitRenderLists(void) { rt_create_rwlock_result lock_res = rtCreateRWLock(); if (!lock_res.ok) return RT_UNKNOWN_ERROR; _type_lock = lock_res.lock; - - rt_create_arena_result arena_res = rtCreateArena(NULL, (size_t)rt_RenderListPoolSize.i); - if (!arena_res.ok) { - rtDestroyRWLock(&_type_lock); - return RT_OUT_OF_MEMORY; - } - _list_arena = arena_res.arena; - _list_lock = rtCreateMutex(); - if (!_list_lock) { - rtReleaseArena(&_list_arena); - rtDestroyRWLock(&_type_lock); + _max_frames_in_flight = g_renderer.GetMaxFramesInFlight(); + RT_ASSERT(_max_frames_in_flight > 0 && _max_frames_in_flight < RT_ARRAY_COUNT(_frame_lists), + "Invalid maxium number of in-flight frames."); + + for (unsigned int i = 0; i < _max_frames_in_flight; ++i) { + rt_create_arena_result arena_res = rtCreateArena(NULL, (size_t)rt_RenderListPoolSize.i); + if (!arena_res.ok) { + rtDestroyRWLock(&_type_lock); + return RT_OUT_OF_MEMORY; + } + _frame_lists[i].arena = arena_res.arena; + + _frame_lists[i].lock = rtCreateMutex(); + if (!_frame_lists[i].lock) { + rtReleaseArena(&_frame_lists[i].arena); + rtDestroyRWLock(&_type_lock); + } + + _frame_lists[i].first_free = NULL; + _frame_lists[i].access_frame_id = 0; } return RT_SUCCESS; @@ -54,11 +71,13 @@ rt_result InitRenderLists(void) { void ShutdownRenderLists(void) { rtDestroyRWLock(&_type_lock); - rtDestroyMutex(_list_lock); - rtReleaseArena(&_list_arena); + for (unsigned int i = 0; i < _max_frames_in_flight; ++i) { + rtDestroyMutex(_frame_lists[i].lock); + rtReleaseArena(&_frame_lists[i].arena); + } } -RT_DLLEXPORT rt_render_object_type rtRegisterRenderObjectType(size_t object_size, +RT_DLLEXPORT rt_render_object_type rtRegisterRenderObjectType(size_t object_size, const char *debug_name) { if (_type_count == RT_MAX_RENDER_OBJECT_TYPE) { rtReportError("GFX", "Too many render object types (max is %u)", RT_MAX_RENDER_OBJECT_TYPE); @@ -66,8 +85,8 @@ RT_DLLEXPORT rt_render_object_type rtRegisterRenderObjectType(size_t object_siz } rtLockWrite(&_type_lock); rt_render_object_type type = (rt_render_object_type)++_type_count; - _types[_type_count].size = object_size; - _types[_type_count].name = debug_name; + _types[_type_count].size = object_size; + _types[_type_count].name = debug_name; if (debug_name) rtLog("GFX", "Registered render object type %s; object size: %zu. Type: %u", @@ -83,7 +102,6 @@ RT_DLLEXPORT rt_render_object_type rtRegisterRenderObjectType(size_t object_siz return type; } - RT_DLLEXPORT size_t rtGetRenderObjectSize(rt_render_object_type type) { size_t size = 0; rtLockRead(&_type_lock); @@ -102,66 +120,84 @@ RT_DLLEXPORT const char *rtGetRenderObjectTypeDebugName(rt_render_object_type ty return name; } -static rt_create_render_list_result CreateNewList(rt_render_object_type type, size_t capacity) { +static rt_create_render_list_result +CreateNewList(rt_render_object_type type, unsigned int frame_id, size_t capacity) { rt_create_render_list_result res = {.ok = false}; - rtLockMutex(_list_lock); + unsigned int slot = frame_id % _max_frames_in_flight; + rtLockMutex(_frame_lists[slot].lock); - if (!_first_free_list || _first_free_list->capacity < capacity) { /* Allocate a new list */ - rt_list_pool *pool = - rtArenaPush(&_list_arena, sizeof(rt_list_pool) + capacity); + _frame_lists[slot].access_frame_id = frame_id; + + if (!_frame_lists[slot].first_free || + _frame_lists[slot].first_free->capacity < capacity) { /* Allocate a new list */ + rt_list_pool *pool = rtArenaPush(&_frame_lists[slot].arena, + sizeof(rt_list_pool) + sizeof(unsigned int) + capacity); if (!pool) { rtReportError("GFX", "Out of render list pool space! Configured space: %d kiB", rt_RenderListPoolSize.i / 1024); goto out; } - pool->capacity = capacity; - pool->next = _first_free_list; - _first_free_list = pool; + pool->capacity = capacity; + pool->next = _frame_lists[slot].first_free; + _frame_lists[slot].first_free = pool; } rt_render_list list; - list.data = (char *)_first_free_list + sizeof(rt_list_pool); - list.type = type; - list.length = 0; - res.ok = true; - res.list = list; - _first_free_list = _first_free_list->next; + unsigned int *frame_id_store = + (unsigned int *)((char *)_frame_lists[slot].first_free + sizeof(rt_list_pool)); + *frame_id_store = frame_id; + list.data = (char *)_frame_lists[slot].first_free + sizeof(rt_list_pool) + sizeof(unsigned int); + list.type = type; + list.length = 0; + res.ok = true; + res.list = list; + _frame_lists[slot].first_free = _frame_lists[slot].first_free->next; out: - rtUnlockMutex(_list_lock); + rtUnlockMutex(_frame_lists[slot].lock); return res; } -RT_DLLEXPORT rt_create_render_list_result rtCreateRenderList(rt_render_object_type type) { - return CreateNewList(type, DEFAULT_LIST_CAPACITY); +RT_DLLEXPORT rt_create_render_list_result rtCreateRenderList(rt_render_object_type type, + unsigned int frame_id) { + return CreateNewList(type, frame_id, DEFAULT_LIST_CAPACITY); } -void ResetRenderLists(void) { - rtLockMutex(_list_lock); - _first_free_list = NULL; - rtArenaClear(&_list_arena); - rtUnlockMutex(_list_lock); +void ResetRenderLists(unsigned int frame_id) { + unsigned int slot = frame_id % _max_frames_in_flight; + RT_ASSERT(_frame_lists[slot].access_frame_id == frame_id || + _frame_lists[slot].access_frame_id == 0, + "Frame id mismatch"); + rtLockMutex(_frame_lists[slot].lock); + _frame_lists[slot].first_free = NULL; + _frame_lists[slot].access_frame_id = 0; + rtArenaClear(&_frame_lists[slot].arena); + rtUnlockMutex(_frame_lists[slot].lock); } RT_DLLEXPORT bool rtPushRenderListEntry(rt_render_list *list, const void *object) { size_t object_size = rtGetRenderObjectSize(list->type); - rt_list_pool *pool = (rt_list_pool *)((char *)list->data - sizeof(rt_list_pool)); - size_t list_capacity = pool->capacity / object_size; + rt_list_pool *pool = + (rt_list_pool *)((char *)list->data - sizeof(rt_list_pool) - sizeof(unsigned int)); + unsigned int frame_id = *(unsigned int *)((char *)list->data - sizeof(unsigned int)); + size_t list_capacity = pool->capacity / object_size; if (list->length == list_capacity) { /* "Grow" the list */ - rt_create_render_list_result list_res = CreateNewList(list->type, pool->capacity * 2); + rt_create_render_list_result list_res = + CreateNewList(list->type, frame_id, pool->capacity * 2); if (!list_res.ok) return false; memcpy(list_res.list.data, list->data, list->length * object_size); - - rtLockMutex(_list_lock); - pool->next = _first_free_list; - _first_free_list = pool; - rtUnlockMutex(_list_lock); + + unsigned int slot = frame_id % _max_frames_in_flight; + rtLockMutex(_frame_lists[slot].lock); + pool->next = _frame_lists[slot].first_free; + _frame_lists[slot].first_free = pool; + rtUnlockMutex(_frame_lists[slot].lock); list_res.list.length = list->length; - *list = list_res.list; + *list = list_res.list; } char *dst = (char *)list->data + list->length * object_size; diff --git a/src/gfx/render_list.h b/src/gfx/render_list.h index eb5d3ed..3c594ff 100644 --- a/src/gfx/render_list.h +++ b/src/gfx/render_list.h @@ -58,7 +58,7 @@ typedef struct { /* Create a render list for a particular object type. * * Render Lists have a lifetime of one frame. */ -RT_DLLEXPORT rt_create_render_list_result rtCreateRenderList(rt_render_object_type type); +RT_DLLEXPORT rt_create_render_list_result rtCreateRenderList(rt_render_object_type type, unsigned int frame_id); /* Append a render object to a list. The object must be of the correct type. */ RT_DLLEXPORT bool rtPushRenderListEntry(rt_render_list *list, const void *object); diff --git a/src/gfx/render_view.c b/src/gfx/render_view.c index d476aa0..5fb2958 100644 --- a/src/gfx/render_view.c +++ b/src/gfx/render_view.c @@ -9,34 +9,50 @@ RT_CVAR_I(rt_RenderViewArenaSize, "Size of the memory arena used for allocating render views. Default: 1 MB", RT_MB(1)); -static rt_arena _view_arena; -static rt_mutex *_view_lock; +typedef struct { + rt_arena arena; + rt_mutex *lock; + uint32_t frame_id; +} rt_frame_views; + +static rt_frame_views _frames[4]; +static unsigned int _max_frames_in_flight; rt_result InitRenderViews(void) { - rt_create_arena_result arena_res = rtCreateArena(NULL, (size_t)rt_RenderViewArenaSize.i); - if (!arena_res.ok) - return RT_OUT_OF_MEMORY; - _view_arena = arena_res.arena; - _view_lock = rtCreateMutex(); - if (!_view_lock) { - rtReleaseArena(&_view_arena); - return RT_UNKNOWN_ERROR; + _max_frames_in_flight = g_renderer.GetMaxFramesInFlight(); + RT_ASSERT(_max_frames_in_flight > 0 && _max_frames_in_flight < RT_ARRAY_COUNT(_frames), + "Invalid maximum number of in-flight frames."); + + for (unsigned int i = 0; i < _max_frames_in_flight; ++i) { + rt_create_arena_result arena_res = rtCreateArena(NULL, (size_t)rt_RenderViewArenaSize.i); + if (!arena_res.ok) + return RT_OUT_OF_MEMORY; + _frames[i].arena = arena_res.arena; + _frames[i].lock = rtCreateMutex(); + if (!_frames[i].lock) { + rtReleaseArena(&_frames[i].arena); + return RT_UNKNOWN_ERROR; + } + _frames[i].frame_id = 0; } return RT_SUCCESS; } void ShutdownRenderViews(void) { - rtDestroyMutex(_view_lock); - rtReleaseArena(&_view_arena); + for (unsigned int i = 0; i < _max_frames_in_flight; ++i) { + rtDestroyMutex(_frames[i].lock); + rtReleaseArena(&_frames[i].arena); + } } -void ResetRenderViews(void) { - rtArenaClear(&_view_arena); +void ResetRenderViews(unsigned int frame_id) { + unsigned int slot = frame_id % _max_frames_in_flight; + rtArenaClear(&_frames[slot].arena); } RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_object_type *types, - uint32_t type_count) { - + uint32_t type_count, + unsigned int frame_id) { #ifdef RT_DEBUG for (uint32_t i = 0; i < type_count - 1; ++i) { for (uint32_t j = i + 1; j < type_count; ++j) { @@ -45,10 +61,13 @@ RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_obj } #endif + unsigned int slot = frame_id % _max_frames_in_flight; + size_t size = type_count * (sizeof(rt_render_list) + sizeof(rt_render_list)); - rtLockMutex(_view_lock); - void *storage = rtArenaPush(&_view_arena, size); - rtUnlockMutex(_view_lock); + rtLockMutex(_frames[slot].lock); + void *storage = rtArenaPush(&_frames[slot].arena, size); + _frames[slot].frame_id = frame_id; + rtUnlockMutex(_frames[slot].lock); if (!storage) { return (rt_create_render_view_result){ .ok = false, @@ -59,6 +78,20 @@ RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_obj view.lists = storage; view.list_types = (rt_render_object_type *)(view.lists + type_count); view.list_count = type_count; + view.type_mask = 0; + + for (uint32_t i = 0; i < type_count; ++i) { + rt_create_render_list_result list_res = rtCreateRenderList(types[i], frame_id); + if (!list_res.ok) { + return (rt_create_render_view_result){ + .ok = false, + }; + } + view.lists[i] = list_res.list; + view.list_types[i] = types[i]; + view.type_mask |= RT_RENDER_OBJECT_TYPE_BIT(types[i]); + } + return (rt_create_render_view_result){.ok = true, .view = view}; } @@ -72,6 +105,18 @@ rtPushRenderObjectToView(rt_render_view *view, rt_render_object_type type, const } RT_DLLEXPORT void -rtSubmitRenderView(rt_render_view view, rt_render_graph *render_graph, uint32_t pass_id) { - g_renderer.SubmitRenderView(render_graph, pass_id, view); +rtSubmitRenderView(rt_render_view view, rt_render_graph *render_graph, uint32_t pass_id, unsigned int frame_id) { + g_renderer.SubmitRenderView(render_graph, pass_id, view, frame_id); +} + +RT_DLLEXPORT int rtDoViewTypesMatchExact(rt_render_view view, + const rt_render_object_type *types, + uint32_t type_count) { + if (view.list_count != type_count) + return 0; + for (uint32_t i = 0; i < type_count; ++i) { + if (view.list_types[i] != types[i]) + return 0; + } + return 1; } \ No newline at end of file diff --git a/src/gfx/render_view.h b/src/gfx/render_view.h index 883a43a..dbf6c95 100644 --- a/src/gfx/render_view.h +++ b/src/gfx/render_view.h @@ -12,6 +12,7 @@ typedef struct { rt_render_list *lists; rt_render_object_type *list_types; uint32_t list_count; + rt_render_object_type_mask type_mask; } rt_render_view; typedef struct { @@ -24,13 +25,36 @@ extern "C" { #endif RT_DLLEXPORT rt_create_render_view_result rtCreateRenderView(const rt_render_object_type *types, - uint32_t type_count); + uint32_t type_count, + unsigned int frame_id); RT_DLLEXPORT bool rtPushRenderObjectToView(rt_render_view *view, rt_render_object_type type, const void *object); -RT_DLLEXPORT void -rtSubmitRenderView(rt_render_view view, rt_render_graph *render_graph, uint32_t pass_id); +RT_DLLEXPORT void rtSubmitRenderView(rt_render_view view, + rt_render_graph *render_graph, + uint32_t pass_id, + unsigned int frame_id); + +/* Checks if the view contains exactly the given types in the given order */ +RT_DLLEXPORT int rtDoViewTypesMatchExact(rt_render_view view, + const rt_render_object_type *types, + uint32_t type_count); + +/* Checks if the view contains exactly the given types, in any order */ +RT_INLINE static int +rtDoViewTypesMatch(rt_render_view view, const rt_render_object_type *types, uint32_t type_count) { + rt_render_object_type_mask in_mask = 0; + for (uint32_t i = 0; i < type_count; ++i) { + in_mask |= RT_RENDER_OBJECT_TYPE_BIT(types[i]); + } + return view.type_mask == in_mask; +} + +RT_INLINE static int rtDoesViewContainTypes(rt_render_view view, + rt_render_object_type_mask type_mask) { + return (int)(view.type_mask & type_mask); +} #ifdef __cplusplus } diff --git a/src/gfx/renderer_api.h b/src/gfx/renderer_api.h index 4404557..c7bdab7 100644 --- a/src/gfx/renderer_api.h +++ b/src/gfx/renderer_api.h @@ -36,6 +36,8 @@ RT_RENDER_BACKEND_HANDLE(rt_buffer_handle); #undef RT_RENDER_BACKEND_HANDLE +#define RT_COMPARE_RENDER_HANDLES(_A, _B, _Comp) ((*(uint32_t *)&(_A)) _Comp (*(uint32_t *)&(_B))) + /* Init data for the renderer */ #ifdef _WIN32 @@ -243,7 +245,8 @@ typedef struct { } rt_pass_info; typedef struct rt_render_graph_s rt_render_graph; -typedef rt_result rt_execute_render_pass_fn(rt_command_buffer_handle cmdbuf, +typedef rt_result rt_execute_render_pass_fn(uint32_t pass_id, + rt_command_buffer_handle cmdbuf, const rt_render_view *views, unsigned int view_count, void *userdata); @@ -298,7 +301,7 @@ typedef rt_render_graph_builder rt_create_render_graph_builder_fn(void); typedef void rt_destroy_render_graph_builder_fn(rt_render_graph_builder *builder); typedef rt_result rt_execute_render_graph_fn(rt_render_graph *rgraph); typedef void -rt_submit_render_view_fn(rt_render_graph *render_graph, uint32_t pass_id, rt_render_view view); +rt_submit_render_view_fn(rt_render_graph *render_graph, uint32_t pass_id, rt_render_view view, unsigned int frame_id); typedef void rt_reset_render_graph_fn(rt_render_graph *graph); typedef void rt_cmd_begin_pass_fn(rt_command_buffer_handle cmdbuf, diff --git a/src/renderer/common/common_render_graph.c b/src/renderer/common/common_render_graph.c index 0b2d3f1..0899a81 100644 --- a/src/renderer/common/common_render_graph.c +++ b/src/renderer/common/common_render_graph.c @@ -671,12 +671,15 @@ static uint32_t *DeterminePassExecutionLevels(rt_render_graph_builder_obj *obj, return execution_levels; } -static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, const uint32_t *order, const uint32_t *execution_levels) { - - size_t required_size = sizeof(rt_render_graph); +static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, + const uint32_t *order, + const uint32_t *execution_levels) { + size_t runtime_data_size = obj->platform_cbs.GetRuntimeDataSize(); + size_t required_size = sizeof(rt_render_graph); required_size += obj->phys_render_target_count * sizeof(rt_render_target_handle); required_size += obj->sync_point_count * sizeof(rt_gpu_semaphore_handle); required_size += obj->pass_count * sizeof(rt_render_pass); + required_size += obj->pass_count * runtime_data_size; size_t pass_attachment_size = 0; @@ -699,7 +702,8 @@ static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, cons (rt_gpu_semaphore_handle *)(graph->render_targets + obj->phys_render_target_count); graph->passes = (rt_render_pass *)(graph->semaphores + obj->sync_point_count); char *attachment_storage = (char *)(graph->passes + obj->pass_count); - char *names = attachment_storage + pass_attachment_size; + char *runtime_data = attachment_storage + pass_attachment_size; + char *names = runtime_data + runtime_data_size * obj->pass_count; char *next_name = names; graph->render_target_count = obj->phys_render_target_count; @@ -771,6 +775,8 @@ static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, cons } } + graph->passes[i].runtime_data = (void *)(runtime_data + i * runtime_data_size); + graph->passes[i].name = next_name; next_name += namelen + 1; memcpy((char *)graph->passes[i].name, obj->passes[passidx].name, namelen + 1); diff --git a/src/renderer/common/common_render_graph.h b/src/renderer/common/common_render_graph.h index fe3b117..4c4614f 100644 --- a/src/renderer/common/common_render_graph.h +++ b/src/renderer/common/common_render_graph.h @@ -17,14 +17,14 @@ typedef struct { typedef rt_render_target_handle rt_rgb_create_render_target_fn(const rt_physical_render_target_info *rt_info); typedef int rt_rgb_require_explicit_synchronization_fn(void); +typedef size_t rt_rgb_get_runtime_data_size_fn(void); typedef struct { rt_rgb_create_render_target_fn *CreateRenderTarget; rt_rgb_require_explicit_synchronization_fn *RequireExplicitSynchronization; + rt_rgb_get_runtime_data_size_fn *GetRuntimeDataSize; } rt_render_graph_builder_platform_callbacks; -#define RT_MAX_SUBMITTED_VIEWS_PER_PASS 32 - typedef struct { uint32_t flags; @@ -55,10 +55,8 @@ typedef struct { rt_execute_render_pass_fn *Execute; void *user_data; - /* Runtime data. */ - rt_render_view submitted_views[RT_MAX_SUBMITTED_VIEWS_PER_PASS]; - uint32_t submitted_view_count; - + /* Allocated by the backend, used during runtime */ + void *runtime_data; /* These refer to the semaphores array */ uint32_t first_wait; diff --git a/src/renderer/dx11/init.cpp b/src/renderer/dx11/init.cpp index a07c878..22de56c 100644 --- a/src/renderer/dx11/init.cpp +++ b/src/renderer/dx11/init.cpp @@ -108,6 +108,8 @@ extern rt_result InitRenderTargetManagement(); extern void ShutdownRenderTargetManagement(); extern rt_result InitBufferManagement(); extern void ShutdownBufferManagement(); +extern rt_result InitPipelineManagement(); +extern void ShutdownPipelineManagement(); extern "C" rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) { constexpr D3D_FEATURE_LEVEL feature_levels[] = {D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0}; @@ -185,6 +187,9 @@ extern "C" rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) if (res != RT_SUCCESS) return res; res = InitBufferManagement(); + if (res != RT_SUCCESS) + return res; + res = InitPipelineManagement(); if (res != RT_SUCCESS) return res; @@ -192,6 +197,7 @@ extern "C" rt_result RT_RENDERER_API_FN(Init)(const rt_renderer_init_info *info) } extern "C" void RT_RENDERER_API_FN(Shutdown)(void) { + ShutdownPipelineManagement(); ShutdownBufferManagement(); ShutdownRenderTargetManagement(); ShutdownCommandBufferManagement(); diff --git a/src/renderer/dx11/render_graph.cpp b/src/renderer/dx11/render_graph.cpp index cdcddb2..54a2ce4 100644 --- a/src/renderer/dx11/render_graph.cpp +++ b/src/renderer/dx11/render_graph.cpp @@ -5,6 +5,14 @@ #include "device_objects.hpp" #include "gpu.hpp" +static constexpr unsigned int MAX_SUBMITTED_VIEWS_PER_PASS = 4; + +struct rt_pass_runtime_data { + rt_render_view views[MAX_SUBMITTED_VIEWS_PER_PASS]; + uint32_t view_count; + unsigned int views_frame_id; +}; + static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *rtinfo) { return rtCreateRenderTarget({.format = rtinfo->format, .width = rtinfo->width, @@ -16,10 +24,15 @@ static int RequireExplicitSynchronization() { return 0; } +static size_t GetRuntimeDataSize() { + return sizeof(rt_pass_runtime_data); +} + extern "C" rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) { rt_render_graph_builder_platform_callbacks cbs{}; cbs.CreateRenderTarget = CreateRenderTarget; cbs.RequireExplicitSynchronization = RequireExplicitSynchronization; + cbs.GetRuntimeDataSize = GetRuntimeDataSize; return rtCreateRenderGraphBuilder(&cbs); } @@ -29,23 +42,31 @@ extern "C" void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_bu extern "C" void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph, uint32_t pass_id, - rt_render_view view) { + rt_render_view view, + unsigned int frame_id) { for (uint32_t i = 0; i < render_graph->pass_count; ++i) { if (render_graph->passes[i].id == pass_id) { rt_render_pass *pass = &render_graph->passes[i]; - if (!RT_VERIFY(pass->submitted_view_count < RT_MAX_SUBMITTED_VIEWS_PER_PASS)) + rt_pass_runtime_data *runtime_data = + reinterpret_cast(pass->runtime_data); + RT_ASSERT(runtime_data->views_frame_id == frame_id || runtime_data->views_frame_id == 0, + "Tried to submit a view for a not-current frame."); + if (!RT_VERIFY(runtime_data->view_count < MAX_SUBMITTED_VIEWS_PER_PASS)) return; - pass->submitted_views[pass->submitted_view_count++] = view; + runtime_data->views[runtime_data->view_count++] = view; + runtime_data->views_frame_id = frame_id; } } } extern "C" void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph) { for (uint32_t i = 0; i < graph->pass_count; ++i) { + rt_pass_runtime_data *runtime_data = + reinterpret_cast(graph->passes[i].runtime_data); #ifdef RT_DEBUG - memset(graph->passes[i].submitted_views, 0, sizeof(graph->passes[i].submitted_views)); + memset(runtime_data->views, 0, sizeof(runtime_data->views)); #endif - graph->passes[i].submitted_view_count = 0; + runtime_data->view_count = 0; } } @@ -99,7 +120,14 @@ static rt_result ExecutePass(rt_render_pass *pass, rt_command_buffer_handle cmdb cmd->context->OMSetRenderTargets(static_cast(pass->color_output_count), rtvs, dsv); - rt_result res = RT_VERIFY(pass->Execute)(cmdbuf_handle, nullptr, 0, pass->user_data); + auto runtime_data = reinterpret_cast(pass->runtime_data); + RT_VERIFY(runtime_data); + + rt_result res = RT_VERIFY(pass->Execute)(pass->id, + cmdbuf_handle, + runtime_data->views, + runtime_data->view_count, + pass->user_data); if (cmd->annotation) { cmd->annotation->EndEvent(); diff --git a/src/renderer/null/null.c b/src/renderer/null/null.c index fdc6bc6..a21e5c0 100644 --- a/src/renderer/null/null.c +++ b/src/renderer/null/null.c @@ -131,10 +131,12 @@ rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph) void RT_RENDERER_API_FN(SubmitRenderView)(rt_render_graph *render_graph, uint32_t pass_id, - rt_render_view view) { + rt_render_view view, + unsigned int frame_id) { RT_UNUSED(render_graph); RT_UNUSED(pass_id); RT_UNUSED(view); + RT_UNUSED(frame_id); } void RT_RENDERER_API_FN(ResetRenderGraph)(rt_render_graph *graph) { diff --git a/src/runtime/atomics.h b/src/runtime/atomics.h index 1d63d3e..22e391a 100644 --- a/src/runtime/atomics.h +++ b/src/runtime/atomics.h @@ -7,13 +7,37 @@ /* Increment and decrement return the new value */ -#define rtAtomic32Inc(pa) _InterlockedIncrement((volatile LONG *)(pa)) -#define rtAtomic64Inc(pa) _InterlockedIncrement64((volatile LONG64 *)(pa)) -#define rtAtomic32Dec(pa) _InterlockedDecrement((volatile LONG *)(pa)) -#define rtAtomic64Dec(pa) _InterlockedDecrement64((volatile LONG64 *)(pa)) +#define rtAtomic32Inc(pa) _InterlockedIncrement((volatile long *)(pa)) +#define rtAtomic64Inc(pa) _InterlockedIncrement64((volatile __int64 *)(pa)) +#define rtAtomic32Dec(pa) _InterlockedDecrement((volatile long *)(pa)) +#define rtAtomic64Dec(pa) _InterlockedDecrement64((volatile __int64 *)(pa)) -#define rtAtomic32FetchAdd(pa, value) _InterlockedExchangeAdd((volatile LONG *)(pa), (LONG)(value)) -#define rtAtomic64FetchAdd(pa, value) _InterlockedExchangeAdd64((volatile LONG64 *)(pa), (LONG)(value)) +#define rtAtomic32FetchAdd(pa, value) _InterlockedExchangeAdd((volatile long *)(pa), (LONG)(value)) +#define rtAtomic64FetchAdd(pa, value) \ + _InterlockedExchangeAdd64((volatile __int64 *)(pa), (LONG)(value)) + +#if defined(_M_ARM) || defined(_M_ARM64) +#define rtAtomic32ExchangeAcq(_pDest, _NewVal) \ + _InterlockedExchange_acq((volatile long *)(_pDest), (_NewVal)) +#define rtAtomic32ExchangeRel(_pDest, _NewVal) \ + _InterlockedExchange_rel((volatile long *)(_pDest), (_NewVal)) +#define rtAtomic32CASAcq(_pDest, _NewVal, _Compare) \ + _InterlockedCompareExchange_acq((volatile long *)(_pDest), (_NewVal), (_Compare)) +#define rtAtomic32CASRel(_pDest, _NewVal, _Compare) _InterlockedCompareExchange_rel((volatile long *)(_pDest), (_NewVal), (_Compare +#else +/* x64/86 does not have acquire/release versions of these */ +#define rtAtomic32ExchangeAcq(_pDest, _NewVal) \ + _InterlockedExchange((volatile long *)(_pDest), (_NewVal)) +#define rtAtomic32ExchangeRel(_pDest, _NewVal) \ + _InterlockedExchange((volatile long *)(_pDest), (_NewVal)) +#define rtAtomic32CASAcq(_pDest, _NewVal, _Compare) \ + _InterlockedCompareExchange((volatile long *)(_pDest), (_NewVal), (_Compare)) +#define rtAtomic32CASRel(_pDest, _NewVal, _Compare) _InterlockedCompareExchange((volatile long *)(_pDest), (_NewVal), (_Compare +#endif +#define rtAtomic32Exchange(_pDest, _NewVal) \ + _InterlockedExchange((volatile long *)(_pDest), (_NewVal)) +#define rtAtomic32CAS(_pDest, _NewVal, _Compare) \ + _InterlockedCompareExchange((volatile long *)(_pDest), (_NewVal), (_Compare)) #elif defined(__GNUC__) || defined(__clang__) @@ -23,7 +47,12 @@ #define rtAtomic64Dec(pa) __atomic_sub_fetch((pa), 1LL, __ATOMIC_SEQ_CST) #define rtAtomic32FetchAdd(pa, value) __atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST) -#define rtAtomic64FetchAdd(pa, value) _-atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST) +#define rtAtomic64FetchAdd(pa, value) __atomic_fetch_add((pa), (value), __ATOMIC_SEQ_CST) + +/* TODO Linux versions of compare exchange +https://gcc.gnu.org/onlinedocs/gcc/_005f_005fatomic-Builtins.html +*/ + #endif #endif diff --git a/src/runtime/ds.h b/src/runtime/ds.h index 1bab093..97b1259 100644 --- a/src/runtime/ds.h +++ b/src/runtime/ds.h @@ -52,6 +52,50 @@ static RT_INLINE int rtMinheapIsEmpty(rt_minheap *minheap) { return minheap->size == 0; } +/* This function is expected to copy the contents of current (current_size bytes) to the new memory. */ +typedef void *rt_hashtable_grow_memory_fn(void *current, size_t current_size, size_t new_size, void *userdata); + +/* A hashtable. + * + * Maps 64 bit keys to 64 bit values. + * The hashtable is _not_ thread safe. + */ +typedef struct { + uint64_t *keys; + uint64_t *values; + + uint64_t capacity; + uint64_t used_slots; + + void *grow_memory_userdata; + rt_hashtable_grow_memory_fn *GrowMemoryCallback; +} rt_hashtable; + +#define RT_HASH_TABLE_MEMORY_REQUIRED(_capacity) (2 * sizeof(uint64_t) * (_capacity)) + +/* Default implementation for allocating from an arena */ +RT_DLLEXPORT void * +rtHashtableGrowMemoryFromArena(void *current, size_t current_size, size_t new_size, void *userdata); + +/* Capacity must be a power of two. + * If grow_memory_cb is NULL, the hashtable will not support growth above the initial capacity. */ +RT_DLLEXPORT rt_hashtable rtCreateHashtable(uint64_t capacity, + void *memory, + rt_hashtable_grow_memory_fn *grow_memory_cb, + void *userdata); + +/* Returns + * RT_OUT_OF_MEMORY if the hashtable is full or + * RT_SUCCESS. */ +RT_DLLEXPORT rt_result rtHashtableInsert(rt_hashtable *hashtable, uint64_t key, uint64_t value); + +/* Returns default_value, if the key is not present in the hashtable */ +RT_DLLEXPORT uint64_t rtHashtableLookup(const rt_hashtable *hashtable, + uint64_t key, + uint64_t default_value); + +RT_DLLEXPORT void rtHashtableRemove(rt_hashtable *hashtable, uint64_t key); + #ifdef __cplusplus } #endif diff --git a/src/runtime/ds_hashtable.c b/src/runtime/ds_hashtable.c new file mode 100644 index 0000000..f04499e --- /dev/null +++ b/src/runtime/ds_hashtable.c @@ -0,0 +1,157 @@ +#include "ds.h" +#include "mem_arena.h" + +#include + +#define RT_EMPTY_KEY 0ull +#define RT_TOMBSTONE 1ull + +static uint64_t FixKey(uint64_t key) { + if (key < RT_TOMBSTONE + 1) + key = UINT64_MAX - key; + return key; +} + +RT_DLLEXPORT rt_hashtable rtCreateHashtable(uint64_t capacity, + void *memory, + rt_hashtable_grow_memory_fn *grow_memory_cb, + void *userdata) { + RT_ASSERT(RT_IS_POWER_OF_TWO(capacity), "Hashtable capacity must be a power of two."); + memset(memory, 0, sizeof(uint64_t) * capacity); + rt_hashtable hashtable = { + .keys = memory, + .values = (uint64_t *)memory + capacity, + .capacity = capacity, + .used_slots = 0, + .GrowMemoryCallback = grow_memory_cb, + .grow_memory_userdata = userdata, + }; + return hashtable; +} + +static rt_result Rehash(rt_hashtable *hashtable) { + void *newmem = hashtable->GrowMemoryCallback(hashtable->keys, + 2 * sizeof(uint64_t) * hashtable->capacity, + 4 * sizeof(uint64_t) * hashtable->capacity, + hashtable->grow_memory_userdata); + if (!newmem) + return RT_OUT_OF_MEMORY; + uint64_t old_capacity = hashtable->capacity; + hashtable->capacity *= 2; + hashtable->keys = newmem; + hashtable->values = hashtable->keys + hashtable->capacity; + + memset(hashtable->keys + old_capacity, 0, sizeof(uint64_t) * old_capacity); + + uint64_t *keys = hashtable->keys; + uint64_t capacity = hashtable->capacity; + uint64_t mod = capacity - 1; + for (uint64_t i = 0; i < old_capacity; ++i) { + if (keys[i] > RT_TOMBSTONE) { + uint64_t key = keys[i]; + uint64_t offset = 0; + while (offset < capacity) { + uint64_t new_slot = (key + offset) & mod; + if (keys[new_slot] == key || keys[new_slot] <= RT_TOMBSTONE) { + if (new_slot != i) { + keys[new_slot] = key; + hashtable->values[new_slot] = hashtable->values[i]; + keys[i] = RT_TOMBSTONE; + } + break; + } + } + if (offset == capacity) { + rtReportError("CORE", "Rehashing failed. This is a bug."); + return RT_UNKNOWN_ERROR; + } + } + } + return RT_SUCCESS; +} + +RT_DLLEXPORT rt_result rtHashtableInsert(rt_hashtable *hashtable, uint64_t key, uint64_t value) { + key = FixKey(key); + + if (hashtable->GrowMemoryCallback && (hashtable->used_slots > (hashtable->capacity >> 1))) { + rt_result res = Rehash(hashtable); + if (res != RT_SUCCESS) + return res; + } + + uint64_t *keys = hashtable->keys; + uint64_t capacity = hashtable->capacity; + uint64_t mod = capacity - 1; + uint64_t offset = 0; + while (offset < capacity) { + uint64_t index = (key + offset) & mod; + ++offset; + + if (keys[index] == key || keys[index] <= RT_TOMBSTONE) { + keys[index] = key; + hashtable->values[index] = value; + ++hashtable->used_slots; + return RT_SUCCESS; + } + } + return RT_OUT_OF_MEMORY; +} + +RT_DLLEXPORT uint64_t rtHashtableLookup(const rt_hashtable *hashtable, + uint64_t key, + uint64_t default_value) { + key = FixKey(key); + + uint64_t capacity = hashtable->capacity; + uint64_t mod = capacity - 1; + uint64_t offset = 0; + while (offset < capacity) { + uint64_t index = (key + offset) & mod; + ++offset; + + if (hashtable->keys[index] == key) { + return hashtable->values[index]; + } else if (hashtable->keys[index] == RT_EMPTY_KEY) { + break; + } + } + return default_value; +} + +RT_DLLEXPORT void rtHashtableRemove(rt_hashtable *hashtable, uint64_t key) { + key = FixKey(key); + uint64_t *keys = hashtable->keys; + uint64_t capacity = hashtable->capacity; + uint64_t mod = capacity - 1; + uint64_t offset = 0; + while (offset < capacity) { + uint64_t index = (key + offset) & mod; + ++offset; + + if (keys[index] == key) { + keys[index] = RT_TOMBSTONE; + } else if (keys[index] == RT_EMPTY_KEY) { + break; + } + } +} + +RT_DLLEXPORT void *rtHashtableGrowMemoryFromArena(void *current, + size_t current_size, + size_t new_size, + void *userdata) { + rt_arena *arena = userdata; + ptrdiff_t current_offset = (uint8_t *)current - (uint8_t *)arena->base; + if (current_offset + current_size == arena->at) { + /* The current allocation is the one at the end of the arena, so we can simply grow and be + * done */ + if (!rtArenaPush(arena, new_size - current_size)) + return NULL; + return current; + } else { + void *newmem = rtArenaPush(arena, new_size); + if (newmem) + memcpy(newmem, current, current_size); + return newmem; + } +} diff --git a/src/runtime/meson.build b/src/runtime/meson.build index 0668b33..f127119 100644 --- a/src/runtime/meson.build +++ b/src/runtime/meson.build @@ -26,6 +26,7 @@ runtime_lib = library('rt', 'buffer_manager.c', 'compression.c', 'config.c', + 'ds_hashtable.c', 'ds_minheap.c', 'dynamic_libs.c', 'error_report.c', diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index bf198aa..6ccad0a 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -203,6 +203,9 @@ static RT_INLINE uint32_t rtNextPowerOfTwo32(uint32_t v) { return v; } +/* Checks that exactly one bit is set */ +#define RT_IS_POWER_OF_TWO(_n) ((_n) && !((_n) & ((_n)-1))) + /* Runtime init. Initializes basic systems. * You need to call this, even if you build a CLI only app. */ RT_DLLEXPORT rt_result rtInitRuntime(void); diff --git a/src/runtime/threading.h b/src/runtime/threading.h index 722a0d9..df3abd9 100644 --- a/src/runtime/threading.h +++ b/src/runtime/threading.h @@ -81,6 +81,20 @@ RT_DLLEXPORT void rtSignalSemaphore(rt_semaphore *sem); RT_DLLEXPORT void rtWaitOnSemaphore(rt_semaphore *sem); +/* Spinlock */ +typedef struct { + volatile int lock; +} rt_spinlock; + +static RT_INLINE rt_spinlock rtCreateSpinlock(void) { + rt_spinlock l = {0}; + return l; +} + +RT_DLLEXPORT void rtLockSpinlock(rt_spinlock *lock); + +RT_DLLEXPORT void rtUnlockSpinlock(rt_spinlock *lock); + /* Threads */ typedef struct rt_thread_s rt_thread; diff --git a/src/runtime/threading_spinlock.c b/src/runtime/threading_spinlock.c new file mode 100644 index 0000000..b8ad342 --- /dev/null +++ b/src/runtime/threading_spinlock.c @@ -0,0 +1,13 @@ +#include "atomics.h" +#include "threading.h" + +RT_DLLEXPORT void rtLockSpinlock(rt_spinlock *lock) { + int prev; + do { + prev = rtAtomic32CASAcq(&lock->lock, 1, 0); + } while (prev != 0); +} + +RT_DLLEXPORT void rtUnlockSpinlock(rt_spinlock *lock) { + rtAtomic32ExchangeRel(&lock->lock, 0); +} diff --git a/tests/rttest.c b/tests/rttest.c index 483df2c..a01d26c 100644 --- a/tests/rttest.c +++ b/tests/rttest.c @@ -1,6 +1,8 @@ #include #include "runtime/config.h" +#include "runtime/ds.h" +#include "runtime/mem_arena.h" #include "runtime/runtime.h" #include "gfx/gfx.h" @@ -69,7 +71,7 @@ static rt_result PushRenderList(void) { } dummy_type; rt_render_object_type type = rtRegisterRenderObjectType(sizeof(dummy_type), "DummyType"); - rt_create_render_list_result list_res = rtCreateRenderList(type); + rt_create_render_list_result list_res = rtCreateRenderList(type, 43); if (!list_res.ok) { return RT_INVALID_VALUE; } @@ -92,7 +94,7 @@ static rt_result PushLongRenderList(void) { } dummy_type; rt_render_object_type type = rtRegisterRenderObjectType(sizeof(dummy_type), "DummyType"); - rt_create_render_list_result list_res = rtCreateRenderList(type); + rt_create_render_list_result list_res = rtCreateRenderList(type, 43); if (!list_res.ok) return RT_INVALID_VALUE; rt_render_list list = list_res.list; @@ -119,6 +121,41 @@ static rt_result PushLongRenderList(void) { return RT_SUCCESS; } +static rt_result HashTableBasics(void) { + { + uint64_t mem[128]; + rt_hashtable ht = rtCreateHashtable(64, mem, NULL, NULL); + + for (uint64_t i = 0; i < 64; ++i) { + if (rtHashtableInsert(&ht, i, i) != RT_SUCCESS) + return RT_UNKNOWN_ERROR; + uint64_t found = rtHashtableLookup(&ht, i, UINT64_MAX); + if (found != i) + return RT_INVALID_VALUE; + } + } + + { + rt_create_arena_result arena_res = rtCreateArena(NULL, RT_KB(4)); + if (!arena_res.ok) + return RT_OUT_OF_MEMORY; + rt_arena arena = arena_res.arena; + void *mem = rtArenaPush(&arena, RT_HASH_TABLE_MEMORY_REQUIRED(64)); + if (!mem) + return RT_OUT_OF_MEMORY; + rt_hashtable ht = rtCreateHashtable(64, mem, rtHashtableGrowMemoryFromArena, &arena); + for (uint64_t i = 0; i < 64; ++i) { + if (rtHashtableInsert(&ht, 256+i, i) != RT_SUCCESS) + return RT_UNKNOWN_ERROR; + uint64_t found = rtHashtableLookup(&ht, 256+i, UINT64_MAX); + if (found != i) + return RT_INVALID_VALUE; + } + rtReleaseArena(&arena); + } + return RT_SUCCESS; +} + /* Scaffolding * * Run all the test cases, output if they passed or failed. @@ -169,7 +206,8 @@ static rt_test_fixture *_test_fixtures[] = {TEST_FIXTURE_LIST}; static rt_test_case _test_cases[] = {TEST_CASE(RelPtrTest), TEST_CASE(NegRelPtrTest), TEST_CASE_FIXTURE(PushRenderList, render_list_fixture), - TEST_CASE_FIXTURE(PushLongRenderList, render_list_fixture)}; + TEST_CASE_FIXTURE(PushLongRenderList, render_list_fixture), + TEST_CASE(HashTableBasics)}; int main() { int out = 0;