From 388b747a046634c3f29193e6a984a77dddaa4ec6 Mon Sep 17 00:00:00 2001 From: Kevin Trogant Date: Thu, 18 Apr 2024 17:06:11 +0200 Subject: [PATCH] Executing the simplest possible render graph --- src/asset_compiler/asset_compiler.c | 7 +- src/asset_compiler/effect_processor.c | 2 +- src/asset_compiler/framegraph_processor.c | 2 + src/game/main.c | 52 +- src/gfx/effect.c | 16 + src/gfx/effect.h | 5 +- src/gfx/gfx.h | 128 --- src/gfx/gfx_framegraph.c | 938 ---------------------- src/gfx/gfx_main.c | 80 +- src/gfx/meson.build | 2 +- src/gfx/renderer_api.h | 57 +- src/renderer/common/common_render_graph.c | 832 +++++++++++++++++++ src/renderer/common/common_render_graph.h | 90 +++ src/renderer/dx11/command_buffers.cpp | 38 +- src/renderer/dx11/commands.cpp | 67 ++ src/renderer/dx11/device_objects.hpp | 13 + src/renderer/dx11/helpers.cpp | 3 + src/renderer/dx11/meson.build | 6 + src/renderer/dx11/render_graph.cpp | 149 ++++ src/renderer/dx11/render_targets.cpp | 83 +- src/renderer/null/meson.build | 2 +- src/renderer/null/null.c | 83 +- src/renderer/vk/commands.c | 4 +- src/renderer/vk/frame.c | 2 +- src/renderer/vk/meson.build | 4 + src/renderer/vk/render_graph.c | 505 +----------- src/renderer/vk/render_targets.c | 16 +- src/renderer/vk/render_targets.h | 7 + src/runtime/mem_arena.h | 8 +- src/runtime/resource_manager.c | 7 + src/runtime/runtime.h | 8 +- 31 files changed, 1448 insertions(+), 1768 deletions(-) create mode 100644 src/gfx/effect.c delete mode 100644 src/gfx/gfx_framegraph.c create mode 100644 src/renderer/common/common_render_graph.c create mode 100644 src/renderer/common/common_render_graph.h create mode 100644 src/renderer/dx11/render_graph.cpp diff --git a/src/asset_compiler/asset_compiler.c b/src/asset_compiler/asset_compiler.c index 0cd9420..6872cbd 100644 --- a/src/asset_compiler/asset_compiler.c +++ b/src/asset_compiler/asset_compiler.c @@ -64,11 +64,12 @@ static rt_asset_db _asset_db; static rt_processing_queue _processing_queue; extern RT_ASSET_PROCESSOR_FN(EffectProcessor); -extern RT_ASSET_PROCESSOR_FN(FramegraphProcessor); +/* extern RT_ASSET_PROCESSOR_FN(FramegraphProcessor);*/ static rt_asset_processor _processors[] = { - { .file_ext = ".effect", .proc = EffectProcessor}, - {.file_ext = ".framegraph", .proc = FramegraphProcessor}, + {.file_ext = ".effect", .proc = EffectProcessor}, + /* + {.file_ext = ".framegraph", .proc = FramegraphProcessor},*/ }; static void ProcessorThreadEntry(void *); diff --git a/src/asset_compiler/effect_processor.c b/src/asset_compiler/effect_processor.c index a220bed..b626a53 100644 --- a/src/asset_compiler/effect_processor.c +++ b/src/asset_compiler/effect_processor.c @@ -36,7 +36,7 @@ typedef struct { typedef struct { unsigned int pass_count; rt_parsed_pipeline_data pipelines[RT_MAX_SUBRESOURCES]; - rt_render_pass_id pass_ids[RT_MAX_SUBRESOURCES]; + uint32_t pass_ids[RT_MAX_SUBRESOURCES]; } rt_parsed_effect_data; enum { diff --git a/src/asset_compiler/framegraph_processor.c b/src/asset_compiler/framegraph_processor.c index 7b463a1..be7d2fe 100644 --- a/src/asset_compiler/framegraph_processor.c +++ b/src/asset_compiler/framegraph_processor.c @@ -7,6 +7,7 @@ #include #include +#if 0 static int RenderTargetExists(const rt_framegraph_info *framegraph, rt_render_target_id id) { const rt_render_target_info *render_targets = rtResolveConstRelptr(&framegraph->render_targets); for (uint32_t i = 0; i < framegraph->render_target_count; ++i) { @@ -658,3 +659,4 @@ out: rtReleaseBuffer(asset.buffer, asset.size); return result; } +#endif \ No newline at end of file diff --git a/src/game/main.c b/src/game/main.c index 2ffdeaa..62fcbad 100644 --- a/src/game/main.c +++ b/src/game/main.c @@ -11,9 +11,14 @@ void RegisterCVars(void) { rtRegisterAssetCompilerCVars(); } -static rt_framegraph *_framegraph; +static rt_render_graph *_graph; -static rt_render_target_handle _rt; +static rt_result ForwardPassExecute(rt_command_buffer_handle cmdbuf, + const rt_render_list *lists, + uint32_t list_count, + void *userdata) { + return RT_SUCCESS; +} /* Called after the runtime has finished its initialization and before entering the main-loop*/ void Init(void) { @@ -22,7 +27,31 @@ void Init(void) { rtWaitForAssetProcessing(); - _rt = g_renderer.GetSwapchainRenderTarget(); + rt_render_graph_builder builder = g_renderer.CreateRenderGraphBuilder(); + rt_attachment_info backbuffer = { + .name = "backbuffer", + .format = RT_PIXEL_FORMAT_SWAPCHAIN, + .width = RT_RENDER_TARGET_SIZE_SWAPCHAIN, + .height = RT_RENDER_TARGET_SIZE_SWAPCHAIN, + .samples = 1, + .layers = 1, + }; + builder.AddRenderTarget(builder.obj, &backbuffer); + + rt_pass_info forward = {.name = "forward", + .flags = RT_PASS_FLAG_EXECUTE_ALWAYS | RT_PASS_FLAG_GRAPHICS}; + builder.AddRenderPass(builder.obj, &forward); + builder.AddColorOutput(builder.obj, + "forward", + "backbuffer", + RT_PASS_LOAD_MODE_CLEAR, + RT_PASS_WRITE_MODE_STORE, + (rt_color){.r = 1.f, .g = 0.f, .b = 1.f, .a = 1.f}); + builder.SetBackbuffer(builder.obj, "backbuffer"); + builder.BindRenderPass(builder.obj, "forward", ForwardPassExecute, NULL); + builder.Build(builder.obj, &_graph); + + g_renderer.DestroyRenderGraphBuilder(&builder); } /* Called after exiting the main-loop and before the runtime starts its shutdown */ @@ -36,20 +65,5 @@ void Update(unsigned int frame_id) { } void Render(unsigned int frame_id) { - rt_alloc_command_buffer_info info = {RT_GRAPHICS_QUEUE}; - rt_command_buffer_handle cmd; - g_renderer.AllocCommandBuffers(1, &info, &cmd); - - rt_cmd_begin_pass_info pass_info = { - .color_buffer_count = 1, - .color_buffers = {_rt}, - .color_buffer_loads = {RT_PASS_LOAD_MODE_CLEAR}, - .color_buffer_clear_values = {{.color = {1.f, 0.f, 0.f, 1.f}}}, - .name = "testme", - }; - g_renderer.CmdBeginPass(cmd, &pass_info); - g_renderer.CmdEndPass(cmd); - - rt_submit_command_buffers_info submit = {.command_buffer_count = 1, .command_buffers = &cmd}; - g_renderer.SubmitCommandBuffers(RT_GRAPHICS_QUEUE, &submit); + g_renderer.ExecuteRenderGraph(_graph); } \ No newline at end of file diff --git a/src/gfx/effect.c b/src/gfx/effect.c new file mode 100644 index 0000000..0b7dbc3 --- /dev/null +++ b/src/gfx/effect.c @@ -0,0 +1,16 @@ +#include "effect.h" +#include "runtime/hashing.h" + +RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len) { + uint32_t id = rtHashBytes32(name, len); + if (id == 0) + id = ~id; + return id; +} + +RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len) { + uint32_t id = rtHashBytes32(name, len); + if (id == 0) + id = ~id; + return id; +} diff --git a/src/gfx/effect.h b/src/gfx/effect.h index 2b698f6..b5006e3 100644 --- a/src/gfx/effect.h +++ b/src/gfx/effect.h @@ -19,7 +19,7 @@ typedef struct rt_pipeline_info_s { typedef struct { /* Id of the render pass during which this effect pass is run. */ - rt_render_pass_id pass_id; + uint32_t pass_id; rt_resource_id pipeline; } rt_effect_pass_info; @@ -28,4 +28,7 @@ typedef struct { rt_effect_pass_info passes[RT_MAX_SUBRESOURCES]; } rt_effect_info; +RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len); +RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len); + #endif diff --git a/src/gfx/gfx.h b/src/gfx/gfx.h index 2e58217..b02ed67 100644 --- a/src/gfx/gfx.h +++ b/src/gfx/gfx.h @@ -81,134 +81,6 @@ RT_DLLEXPORT void rtBeginGFXFrame(unsigned int frame_id); RT_DLLEXPORT void rtEndGFXFrame(unsigned int frame_id); -/* ********************************************************************* - * Framegraph API - * - * The framegraph is used to organize and schedule the work for a frame. - * *********************************************************************/ - -/* Special value for the .width and .height fields of rt_render_target_info - * to indicate that these should be set to the width or height of the swapchain, respectively. */ -#define RT_RENDER_TARGET_SIZE_SWAPCHAIN 0 -/* 32 bit string hashes */ -typedef uint32_t rt_render_target_id; -typedef uint32_t rt_render_pass_id; - -typedef struct { - rt_render_target_id id; - rt_pixel_format format; - uint32_t width; - uint32_t height; - uint32_t sample_count; - - /* For debug purposes, can be 0 */ - rt_relptr name; - uint32_t name_len; -} rt_render_target_info; - -typedef enum { - RT_RENDER_TARGET_READ_SAMPLED, - RT_RENDER_TARGET_READ_DIRECT, - - RT_RENDER_TARGET_READ_count, -} rt_render_target_read_mode; - -typedef struct { - rt_render_target_id render_target; - rt_render_target_read_mode mode; -} rt_render_target_read; - -typedef enum { - /* Clears the render target with the clear value before executing the pass */ - RT_RENDER_TARGET_WRITE_CLEAR = 0x01, - - /* Discards the written values after the pass has finished executing */ - RT_RENDER_TARGET_WRITE_DISCARD = 0x02, -} rt_render_target_write_flags; - -typedef struct { - rt_render_target_id render_target; - union { - rt_color color; - struct { - float depth; - int32_t stencil; - } depth_stencil; - } clear; - rt_render_target_write_flags flags; -} rt_render_target_write; - -typedef enum { - RT_RENDER_PASS_TYPE_GRAPHICS, - RT_RENDER_PASS_TYPE_COMPUTE, -} rt_render_pass_type; - -typedef struct { - rt_render_pass_id id; - - /* For debug purposes, can be 0 */ - rt_relptr name; - uint32_t name_len; - - rt_render_pass_type type; - /* list of rt_render_target_reads */ - rt_relptr read_render_targets; - /* list of rt_render_target_writes */ - rt_relptr write_render_targets; - uint32_t read_render_target_count; - uint32_t write_render_target_count; -} rt_render_pass_info; - -typedef struct { - rt_relptr render_targets; - rt_relptr render_passes; - uint32_t render_target_count; - uint32_t render_pass_count; - rt_render_target_id backbuffer; - rt_relptr names; - uint32_t names_size; -} rt_framegraph_info; - -typedef void rt_render_pass_prepare_fn(rt_render_pass_id id, - const rt_render_target_write *writes, - uint32_t write_count, - const rt_render_target_read *reads, - uint32_t read_count); -typedef void rt_render_pass_execute_fn(rt_render_pass_id id, - const rt_render_target_write *writes, - uint32_t write_count, - const rt_render_target_read *reads, - uint32_t read_count); -typedef void rt_render_pass_finalize_fn(rt_render_pass_id id, - const rt_render_target_write *writes, - uint32_t write_count, - const rt_render_target_read *reads, - uint32_t read_count); - -typedef struct { - rt_render_pass_prepare_fn *Prepare; - rt_render_pass_execute_fn *Execute; - rt_render_pass_finalize_fn *Finalize; -} rt_render_pass_bind_fns; - -typedef struct rt_framegraph_s rt_framegraph; - -RT_DLLEXPORT rt_framegraph *rtCreateFramegraph(const rt_framegraph_info *info); - -RT_DLLEXPORT void rtDestroyFramegraph(rt_framegraph *framegraph); - -RT_DLLEXPORT void rtBindRenderPass(rt_framegraph *framegraph, - rt_render_pass_id pass, - const rt_render_pass_bind_fns *bind_fns); - -RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int frame_id); - -/* Utility to turn a string into a usable render target id. */ -RT_DLLEXPORT rt_render_target_id rtCalculateRenderTargetID(const char *name, size_t len); - -/* Utility to turn a string into a usable render pass id. */ -RT_DLLEXPORT rt_render_pass_id rtCalculateRenderPassID(const char *name, size_t len); - #ifdef __cplusplus } #endif diff --git a/src/gfx/gfx_framegraph.c b/src/gfx/gfx_framegraph.c deleted file mode 100644 index 7100361..0000000 --- a/src/gfx/gfx_framegraph.c +++ /dev/null @@ -1,938 +0,0 @@ -#include "gfx.h" -#include "renderer_api.h" - -#include "runtime/config.h" -#include "runtime/handles.h" -#include "runtime/hashing.h" -#include "runtime/mem_arena.h" -#include "runtime/threading.h" - -#include -#include -#include - -RT_CVAR_I(rt_MaxFramegraphs, "Maximum number of framegraphs. Default 16", 16); - -#define RT_FRAMEGRAPH_MAX_PASSES 32 -#define RT_FRAMEGRAPH_MAX_RENDER_TARGETS 32 -#define RT_RENDERPASS_MAX_READS 8 -#define RT_RENDERPASS_MAX_WRITES 8 - -typedef struct { - rt_render_target_id id; - rt_pixel_format format; - const char *name; - unsigned int width; - unsigned int height; - unsigned int sample_count; - rt_gpu_semaphore_handle semaphores[3]; - rt_render_target_handle api_render_target; -} rt_render_target; - -typedef struct { - rt_render_pass_id id; - rt_render_pass_type type; - const char *name; - int execution_level; - - bool reads_swapchain; - bool writes_swapchain; - - unsigned int read_count; - unsigned int write_count; - rt_render_pass_bind_fns bound_fns; - rt_render_target_read reads[RT_RENDERPASS_MAX_READS]; - rt_render_target_write writes[RT_RENDERPASS_MAX_WRITES]; -} rt_render_pass; - -struct rt_framegraph_s { - uint32_t pass_count; - uint32_t render_target_count; - - rt_framegraph *next_free; - - rt_render_pass passes[RT_FRAMEGRAPH_MAX_PASSES]; - - rt_render_target render_targets[RT_FRAMEGRAPH_MAX_RENDER_TARGETS]; -}; - -static rt_framegraph *_framegraphs; -static rt_framegraph *_first_free; -static rt_mutex *_free_list_lock; - -#define NAMES_CAPACITY 512 -static char _name_buffer[512]; -static char *_name_next; -static rt_mutex *_name_lock; - -static void ReturnFrameGraph(rt_framegraph *framegraph) { - rtLockMutex(_free_list_lock); - framegraph->next_free = _first_free; - _first_free = framegraph; - rtUnlockMutex(_free_list_lock); -} - -rt_result InitFramegraphManager(void) { - _free_list_lock = rtCreateMutex(); - if (!_free_list_lock) - return RT_UNKNOWN_ERROR; - _name_lock = rtCreateMutex(); - if (!_name_lock) { - rtDestroyMutex(_free_list_lock); - return RT_UNKNOWN_ERROR; - } - _framegraphs = calloc((size_t)rt_MaxFramegraphs.i, sizeof(rt_framegraph)); - if (!_framegraphs) - return RT_OUT_OF_MEMORY; - for (int i = 0; i < rt_MaxFramegraphs.i; ++i) - _framegraphs[i].next_free = (i < rt_MaxFramegraphs.i - 1) ? &_framegraphs[i + 1] : NULL; - _first_free = &_framegraphs[0]; - _name_next = &_name_buffer[0]; - return RT_SUCCESS; -} - -void ShutdownFramegraphManager(void) { - free(_framegraphs); - rtDestroyMutex(_free_list_lock); - rtDestroyMutex(_name_lock); -} - -typedef struct { - unsigned int dependency_count; - int execution_level; -} rt_pass_construct; - -static int CompareRenderPassExecutionLevels(const void *a, const void *b) { - const rt_render_pass *pass_a = a, *pass_b = b; - return pass_a->execution_level - pass_b->execution_level; -} - -static bool -CreateRenderPasses(rt_framegraph *graph, const rt_framegraph_info *info, rt_arena *arena) { - uint32_t render_pass_count = info->render_pass_count; - - bool result = false; - - /* Pass A depends on pass B, if: - * B preceeds A in the list of render passes AND - * B writes to a render target that A reads from. */ - bool *dependency_matrix = - rtArenaPushZero(arena, render_pass_count * render_pass_count * sizeof(bool)); - if (!dependency_matrix) { - rtLog("GFX", - "Not enough memory to allocate a %ux%u dependency matrix.", - render_pass_count, - render_pass_count); - goto out; - } - /* Checks if pass "dependent_idx" depends on pass "dependency_idx" */ -#define PASS_DEPENDS(dependent_idx, dependency_idx) \ - dependency_matrix[(dependency_idx)*render_pass_count + (dependent_idx)] - - rt_pass_construct *construct_passes = - RT_ARENA_PUSH_ARRAY_ZERO(arena, rt_pass_construct, render_pass_count); - if (!construct_passes) { - rtLog("GFX", - "Not enough memory to allocate construction information for %u passes.", - render_pass_count); - goto out; - } - - const rt_render_pass_info *pass_info = rtResolveConstRelptr(&info->render_passes); - for (uint32_t i = 0; i < render_pass_count; ++i) { - construct_passes[i].execution_level = -1; /* not scheduled yet */ - const rt_render_target_write *writes_i = - rtResolveConstRelptr(&pass_info[i].write_render_targets); - for (uint32_t j = i + 1; j < render_pass_count; ++j) { - const rt_render_target_read *reads_j = - rtResolveConstRelptr(&pass_info[j].read_render_targets); - bool depends = false; - for (uint32_t read_idx = 0; read_idx < pass_info[j].read_render_target_count; - ++read_idx) { - for (uint32_t write_idx = 0; write_idx < pass_info[i].write_render_target_count; - ++write_idx) { - if (writes_i[write_idx].render_target == reads_j[read_idx].render_target) - depends = true; - } - } - PASS_DEPENDS(j, i) = depends; - if (depends) - ++construct_passes[j].dependency_count; - } - } - - /* Pass A can be executed concurrently with pass B if: - * 1. A and B don't write to the same render target AND - * 2. A's dependencies and B's dependencies have finished executing. */ - - /* We can have at most render_pass_count execution levels */ - uint32_t *level_passes = RT_ARENA_PUSH_ARRAY_ZERO(arena, uint32_t, render_pass_count); - if (!level_passes) { - rtLog("GFX", "Failed to allocate a temporary array for constructing execution levels."); - goto out; - } - uint32_t unscheduled_passes = render_pass_count; - for (int level = 0; level < (int)render_pass_count; ++level) { - unsigned int level_pass_count = 0; - for (uint32_t i = 0; i < render_pass_count; ++i) { - if (construct_passes[i].execution_level == -1 && - construct_passes[i].dependency_count == 0) { - - /* Check that no writes conflict */ - bool write_conflict = false; - const rt_render_target_write *writes_i = - rtResolveConstRelptr(&pass_info[i].write_render_targets); - for (unsigned int j = 0; j < level_pass_count; ++j) { - uint32_t pass_idx = level_passes[i]; - const rt_render_target_write *pass_writes = - rtResolveConstRelptr(&pass_info[pass_idx].write_render_targets); - for (uint32_t k = 0; k < pass_info[i].write_render_target_count; ++k) { - for (uint32_t l = 0; l < pass_info[pass_idx].write_render_target_count; - ++l) { - if (writes_i[k].render_target == pass_writes[l].render_target) { - write_conflict = true; - break; - } - } - if (write_conflict) - break; - } - if (write_conflict) - break; - } - if (!write_conflict) { - RT_ASSERT(level_pass_count < render_pass_count, ""); - level_passes[level_pass_count++] = i; - construct_passes[i].execution_level = level; - } - } - } - if (level_pass_count == 0) { - rtLog("GFX", "Failed to compute a valid schedule for the provided framegraph."); - goto out; - } - /* level passes now contains the passes we can execute concurrently. - * Decrement dependency count for all passes that depend on a pass in this level */ - - for (uint32_t i = 0; i < level_pass_count; ++i) { - for (uint32_t j = 0; j < render_pass_count; ++j) { - if (PASS_DEPENDS(j, level_passes[i])) - --construct_passes[j].dependency_count; - } - } - - unscheduled_passes -= level_pass_count; - if (unscheduled_passes == 0) - break; - } - RT_ASSERT(unscheduled_passes == 0, "Did not schedule all passes"); - /* Construct passes now contains the "execution level" for each pass. - * We execute passes in that order, those with the same execution level can be executed - * concurrently. */ - - graph->pass_count = render_pass_count; - for (uint32_t i = 0; i < render_pass_count; ++i) { - graph->passes[i].execution_level = construct_passes[i].execution_level; - const rt_render_target_write *writes = - rtResolveConstRelptr(&pass_info[i].write_render_targets); - const rt_render_target_read *reads = - rtResolveConstRelptr(&pass_info[i].read_render_targets); - memcpy(graph->passes[i].writes, - writes, - pass_info[i].write_render_target_count * sizeof(rt_render_target_write)); - memcpy(graph->passes[i].reads, - reads, - pass_info[i].read_render_target_count * sizeof(rt_render_target_read)); - graph->passes[i].write_count = pass_info[i].write_render_target_count; - graph->passes[i].read_count = pass_info[i].read_render_target_count; - graph->passes[i].id = pass_info[i].id; - graph->passes[i].type = pass_info[i].type; - graph->passes[i].name = NULL; - - graph->passes[i].reads_swapchain = false; - graph->passes[i].writes_swapchain = false; - const rt_render_target_info *rts = rtResolveConstRelptr(&info->render_targets); - for (unsigned int j = 0; j < graph->passes[i].read_count; ++j) { - rt_render_target_id rt = graph->passes[i].reads[j].render_target; - for (unsigned int k = 0; k < info->render_target_count; ++k) { - if (rts[k].id == rt && rts[k].width == RT_RENDER_TARGET_SIZE_SWAPCHAIN && - rts[k].height == RT_RENDER_TARGET_SIZE_SWAPCHAIN && - rts[k].format == RT_PIXEL_FORMAT_SWAPCHAIN) { - graph->passes[i].reads_swapchain = true; - } - } - } - for (unsigned int j = 0; j < graph->passes[i].write_count; ++j) { - rt_render_target_id rt = graph->passes[i].writes[j].render_target; - for (unsigned int k = 0; k < info->render_target_count; ++k) { - if (rts[k].id == rt && rts[k].width == RT_RENDER_TARGET_SIZE_SWAPCHAIN && - rts[k].height == RT_RENDER_TARGET_SIZE_SWAPCHAIN && - rts[k].format == RT_PIXEL_FORMAT_SWAPCHAIN) { - graph->passes[i].writes_swapchain = true; - } - } - } - - const char *name = rtResolveConstRelptr(&pass_info[i].name); - if (name) { - size_t name_strlen = strlen(name); - if (name_strlen + 1 == pass_info[i].name_len) { - rtLockMutex(_name_lock); - ptrdiff_t name_off = _name_next - _name_buffer; - if ((name_off + pass_info[i].name_len) < NAMES_CAPACITY) { - char *dst_name = _name_next; - memcpy(dst_name, name, pass_info[i].name_len); - _name_next += pass_info[i].name_len; - graph->passes[i].name = dst_name; - } else { - rtLog("GFX", "Ran out of storage for debug name %s", name); - } - rtUnlockMutex(_name_lock); - } else { - rtLog("GFX", "Declared name-length for pass %u does not match strlen()"); - } - } - } - - /* Sort by execution level */ - qsort(graph->passes, - render_pass_count, - sizeof(rt_render_pass), - CompareRenderPassExecutionLevels); - result = true; -out: - return result; -#undef PASS_DEPENDS -} - -static bool -CreateRenderTargets(rt_framegraph *graph, const rt_framegraph_info *info, rt_arena *arena) { - bool result = false; - - unsigned int max_frames_in_flight = g_renderer.GetMaxFramesInFlight(); - - /* TODO(Kevin): determine aliasing opportunities */ - const rt_render_target_info *render_targets = rtResolveConstRelptr(&info->render_targets); - for (uint32_t i = 0; i < info->render_target_count; ++i) { - graph->render_targets[i].id = render_targets[i].id; - graph->render_targets[i].format = render_targets[i].format; - graph->render_targets[i].width = render_targets[i].width; - graph->render_targets[i].height = render_targets[i].height; - graph->render_targets[i].sample_count = render_targets[i].sample_count; - graph->render_targets[i].name = NULL; - - const char *name = rtResolveConstRelptr(&render_targets[i].name); - if (name) { - size_t name_strlen = strlen(name); - if (name_strlen + 1 == render_targets[i].name_len) { - rtLockMutex(_name_lock); - ptrdiff_t name_off = _name_next - _name_buffer; - if ((name_off + render_targets[i].name_len) < NAMES_CAPACITY) { - char *dst_name = _name_next; - memcpy(dst_name, name, render_targets[i].name_len); - _name_next += render_targets[i].name_len; - graph->render_targets[i].name = dst_name; - } else { - rtLog("GFX", "Ran out of storage for debug name %s", name); - } - rtUnlockMutex(_name_lock); - } else { - rtLog("GFX", "Declared name-length for render-target %u does not match strlen()"); - } - } - - for (unsigned int j = 0; j < max_frames_in_flight; ++j) { - char sem_name[128]; - rtSPrint(sem_name, 128, "%s - Semaphore (%u)", (name) ? name : "Unnamed RT", j); - rt_gpu_semaphore_info sem_info = { - .initial_value = 0, - .name = sem_name, - }; - g_renderer.CreateSemaphores(1, &sem_info, &graph->render_targets[i].semaphores[j]); - } - - if (graph->render_targets[i].width != RT_RENDER_TARGET_SIZE_SWAPCHAIN || - graph->render_targets[i].height != RT_RENDER_TARGET_SIZE_SWAPCHAIN || - graph->render_targets[i].format != RT_PIXEL_FORMAT_SWAPCHAIN) { - - graph->render_targets[i].api_render_target = - g_renderer.CreateRenderTarget(&render_targets[i]); - } else { - graph->render_targets[i].api_render_target = g_renderer.GetSwapchainRenderTarget(); - } - if (!RT_IS_HANDLE_VALID(graph->render_targets[i].api_render_target)) { - rtReportError("GFX", "Failed to create render target %u of framegraph.", i); - for (uint32_t j = 0; j < i; ++j) - g_renderer.DestroyRenderTarget(graph->render_targets[j].api_render_target); - goto out; - } - } - graph->render_target_count = info->render_target_count; - - result = true; -out: - return result; -} - -static bool ValidateInfo(const rt_framegraph_info *info) { - if (info->render_pass_count > RT_FRAMEGRAPH_MAX_PASSES) { - rtReportError("GFX", - "Framegraph has too many passes: %u (maximum allowed is %u)", - info->render_pass_count, - RT_FRAMEGRAPH_MAX_PASSES); - return false; - } - if (info->render_target_count > RT_FRAMEGRAPH_MAX_RENDER_TARGETS) { - rtReportError("GFX", - "Framegraph has too many render targets: %u (maximum allowed is %u)", - info->render_target_count, - RT_FRAMEGRAPH_MAX_RENDER_TARGETS); - return false; - } - - const rt_render_target_info *render_targets = rtResolveConstRelptr(&info->render_targets); - for (uint32_t i = 0; i < info->render_target_count; ++i) { - if (render_targets[i].id == 0) { - rtReportError("GFX", "Framegraph render target %u has invalid id 0", i); - return false; - } else if ((render_targets[i].width == RT_RENDER_TARGET_SIZE_SWAPCHAIN || - render_targets[i].height == RT_RENDER_TARGET_SIZE_SWAPCHAIN) && - (render_targets[i].width != render_targets[i].height)) { - rtReportError("GFX", - "Framegraph render target %u: If width or height is set to " - "SWAPCHAIN, both values must be set to SWAPCHAIN.", - i); - return false; - } else if (render_targets[i].format >= RT_PIXEL_FORMAT_count) { - rtReportError("GFX", - "Framegraph render target %u format is outside the allowed range.", - i); - return false; - } - } - - const rt_render_pass_info *passes = rtResolveConstRelptr(&info->render_passes); - for (uint32_t i = 0; i < info->render_pass_count; ++i) { - if (passes[i].id == 0) { - rtReportError("GFX", "Framegraph pass %u has invalid id 0", i); - return false; - } else if (passes[i].read_render_target_count > RT_RENDERPASS_MAX_READS) { - rtReportError( - "GFX", - "Framegraph pass %u reads too many rendertargets: %u (maximum allowed is %u)", - i, - passes[i].read_render_target_count, - RT_RENDERPASS_MAX_READS); - return false; - } else if (passes[i].write_render_target_count > RT_RENDERPASS_MAX_WRITES) { - rtReportError( - "GFX", - "Framegraph pass %u writes too many rendertargets: %u (maximum allowed is %u)", - i, - passes[i].write_render_target_count, - RT_RENDERPASS_MAX_WRITES); - return false; - } - } - - return true; -} - -RT_DLLEXPORT rt_framegraph *rtCreateFramegraph(const rt_framegraph_info *info) { - if (!ValidateInfo(info)) { - return NULL; - } - - rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); - if (!temp.arena) { - rtReportError("GFX", "Failed to acquire a temporary arena for constructing a framegraph"); - return NULL; - } - - rt_framegraph *graph = NULL; - /* Acquire a unused framegraph */ - rtLockMutex(_free_list_lock); - graph = _first_free; - if (graph) - _first_free = graph->next_free; - rtUnlockMutex(_free_list_lock); - if (!graph) - goto out; - memset(graph, 0, sizeof(*graph)); - - if (!CreateRenderPasses(graph, info, temp.arena)) { - ReturnFrameGraph(graph); - graph = NULL; - goto out; - } - - if (!CreateRenderTargets(graph, info, temp.arena)) { - ReturnFrameGraph(graph); - graph = NULL; - goto out; - } - -out: - rtReturnTemporaryArena(temp); - return graph; -} - -RT_DLLEXPORT void rtDestroyFramegraph(rt_framegraph *framegraph) { - ReturnFrameGraph(framegraph); -} - -RT_DLLEXPORT void rtBindRenderPass(rt_framegraph *framegraph, - rt_render_pass_id id, - const rt_render_pass_bind_fns *bind_fns) { - for (uint32_t i = 0; i < framegraph->pass_count; ++i) { - if (framegraph->passes[i].id == id) { - if (framegraph->passes[i].bound_fns.Execute) - rtLog("GFX", "Rebound pass %x to new functions", id); - framegraph->passes[i].bound_fns = *bind_fns; - return; - } - } - rtLog("GFX", "Tried to bind functions to unknown render pass %x", id); -} - -static bool IsDepthFormat(rt_pixel_format format) { - return format == RT_PIXEL_FORMAT_DEPTH32 || format == RT_PIXEL_FORMAT_DEPTH24_STENCIL8; -} - -static rt_render_target *GetRenderTarget(rt_framegraph *framegraph, rt_render_target_id id) { - for (uint32_t i = 0; i < framegraph->render_target_count; ++i) { - if (framegraph->render_targets[i].id == id) - return &framegraph->render_targets[i]; - } - return NULL; -} - -static void -BeginGraphicsPass(rt_framegraph *framegraph, uint32_t pass_idx, rt_command_buffer_handle cmdbuf) { - const rt_render_target_write *writes = framegraph->passes[pass_idx].writes; - const rt_render_target_read *reads = framegraph->passes[pass_idx].reads; - uint32_t write_count = framegraph->passes[pass_idx].write_count; - uint32_t read_count = framegraph->passes[pass_idx].read_count; - - /* Convert reads and writes into the pass begin info for the renderer */ - rt_cmd_begin_pass_info begin_info; - memset(&begin_info, 0, sizeof(begin_info)); - begin_info.name = framegraph->passes[pass_idx].name; - - /* All written render targets need to have the same size */ - if (write_count > 0) { - rt_render_target *rt = GetRenderTarget(framegraph, writes[0].render_target); - RT_ASSERT(rt != NULL, "Invalid render target in pass write."); - begin_info.render_area = (rt_rect2i){ - .offset = {{0, 0}}, - .size = {{.x = rt->width, .y = rt->height}}, - }; - } - - for (uint32_t i = 0; i < write_count; ++i) { - rt_render_target *rt = GetRenderTarget(framegraph, writes[i].render_target); - RT_ASSERT(rt != NULL, "Invalid render target in pass write."); - - g_renderer.CmdTransitionRenderTarget(cmdbuf, - rt->api_render_target, - RT_RENDER_TARGET_STATE_ATTACHMENT); - - if (!IsDepthFormat(rt->format)) { - /* Add as color buffer */ - uint32_t cbidx = begin_info.color_buffer_count; - RT_ASSERT(cbidx < 4, "Maximum of 4 colorbuffers per pass exceeded."); - begin_info.color_buffers[cbidx] = rt->api_render_target; - if ((writes[i].flags & RT_RENDER_TARGET_WRITE_CLEAR) != 0) { - begin_info.color_buffer_loads[cbidx] = RT_PASS_LOAD_MODE_CLEAR; - begin_info.color_buffer_clear_values[cbidx].color = writes[i].clear.color; - } else { - begin_info.color_buffer_loads[cbidx] = RT_PASS_LOAD_MODE_LOAD; - } - if ((writes[i].flags & RT_RENDER_TARGET_WRITE_DISCARD) != 0) { - begin_info.color_buffer_writes[cbidx] = RT_PASS_WRITE_MODE_DISCARD; - } else { - begin_info.color_buffer_writes[cbidx] = RT_PASS_WRITE_MODE_STORE; - } - ++begin_info.color_buffer_count; - } else { - /* Add as depth buffer*/ - RT_ASSERT(!RT_IS_HANDLE_VALID(begin_info.depth_stencil_buffer), - "Only one depth/stencil buffer can be set!"); - begin_info.depth_stencil_buffer = rt->api_render_target; - if ((writes[i].flags & RT_RENDER_TARGET_WRITE_CLEAR) != 0) { - begin_info.depth_stencil_buffer_load = RT_PASS_LOAD_MODE_CLEAR; - begin_info.depth_stencil_buffer_clear_value.depth_stencil.depth = - writes[i].clear.depth_stencil.depth; - begin_info.depth_stencil_buffer_clear_value.depth_stencil.stencil = - writes[i].clear.depth_stencil.stencil; - } else { - begin_info.depth_stencil_buffer_load = RT_PASS_LOAD_MODE_LOAD; - } - if ((writes[i].flags & RT_RENDER_TARGET_WRITE_DISCARD) != 0) { - begin_info.depth_stencil_buffer_write = RT_PASS_WRITE_MODE_DISCARD; - } else { - begin_info.depth_stencil_buffer_write = RT_PASS_WRITE_MODE_STORE; - } - } - } - - for (uint32_t i = 0; i < read_count; ++i) { - rt_render_target *rt = GetRenderTarget(framegraph, reads[i].render_target); - RT_ASSERT(rt != NULL, "Invalid render target in pass read."); - /* We need to transition the render target */ - - switch (reads[i].mode) { - case RT_RENDER_TARGET_READ_SAMPLED: - g_renderer.CmdTransitionRenderTarget(cmdbuf, - rt->api_render_target, - RT_RENDER_TARGET_STATE_SAMPLED_IMAGE); - break; - case RT_RENDER_TARGET_READ_DIRECT: - g_renderer.CmdTransitionRenderTarget(cmdbuf, - rt->api_render_target, - RT_RENDER_TARGET_STATE_STORAGE_IMAGE); - break; - default: - RT_ASSERT(0, "Invalid render target read mode"); - } - } - - g_renderer.CmdBeginPass(cmdbuf, &begin_info); -} - -static void -BeginComputePass(rt_framegraph *framegraph, uint32_t pass_idx, rt_command_buffer_handle cmdbuf) { - const rt_render_target_write *writes = framegraph->passes[pass_idx].writes; - const rt_render_target_read *reads = framegraph->passes[pass_idx].reads; - uint32_t write_count = framegraph->passes[pass_idx].write_count; - uint32_t read_count = framegraph->passes[pass_idx].read_count; - - for (uint32_t i = 0; i < write_count; ++i) { - rt_render_target *rt = GetRenderTarget(framegraph, writes[i].render_target); - RT_ASSERT(rt != NULL, "Invalid render target in pass write."); - g_renderer.CmdTransitionRenderTarget(cmdbuf, - rt->api_render_target, - RT_RENDER_TARGET_STATE_STORAGE_IMAGE); - } - - for (uint32_t i = 0; i < read_count; ++i) { - rt_render_target *rt = GetRenderTarget(framegraph, reads[i].render_target); - RT_ASSERT(rt != NULL, "Invalid render target in pass read."); - /* We need to transition the render target */ - - switch (reads[i].mode) { - case RT_RENDER_TARGET_READ_SAMPLED: - g_renderer.CmdTransitionRenderTarget(cmdbuf, - rt->api_render_target, - RT_RENDER_TARGET_STATE_SAMPLED_IMAGE); - break; - case RT_RENDER_TARGET_READ_DIRECT: - g_renderer.CmdTransitionRenderTarget(cmdbuf, - rt->api_render_target, - RT_RENDER_TARGET_STATE_STORAGE_IMAGE); - break; - default: - RT_ASSERT(0, "Invalid render target read mode"); - } - } -} - -RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int frame_id) { - int execution_level = framegraph->passes[0].execution_level; - uint32_t level_start = 0; - - rt_gpu_semaphore_handle swapchain_available = g_renderer.GetSwapchainAvailableSemaphore(); - rt_gpu_semaphore_handle render_finished = g_renderer.GetRenderFinishedSemaphore(); - unsigned int max_frames_in_flight = g_renderer.GetMaxFramesInFlight(); - unsigned int frame_index = frame_id % max_frames_in_flight; - - rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); - if (!temp.arena) { - rtLog("GFX", "Unable to execute framegraph because no temporary arena is available."); - return; - } - - /* Find the last pass that writes to the swapchain */ - uint32_t last_swapchain_write = framegraph->pass_count - 1; - for (uint32_t i = framegraph->pass_count - 1; i > 0; --i) { - if (framegraph->passes[i].writes_swapchain) { - last_swapchain_write = i; - break; - } - } - /* Find the first pass that reads the swapchain */ - uint32_t first_swapchain_read = 0; - for (uint32_t i = 0; framegraph->pass_count; ++i) { - if (framegraph->passes[i].reads_swapchain || framegraph->passes[i].writes_swapchain) { - first_swapchain_read = i; - break; - } - } - - /* Overflows after ~4.871x10^7 years */ - uint64_t signal_value_base = (uint64_t)frame_id * 100; - - for (uint32_t i = 0; i <= framegraph->pass_count && level_start < framegraph->pass_count; ++i) { - if ((i == framegraph->pass_count) || - (framegraph->passes[i].execution_level > execution_level)) { - - rt_temp_arena level_temp = rtBeginTempArena(temp.arena); - - rt_gpu_semaphore_handle *graphics_wait_semaphores = NULL; - rt_gpu_semaphore_handle *graphics_signal_semaphores = NULL; - uint64_t *graphics_wait_values = NULL; - uint64_t *graphics_signal_values = NULL; - rt_command_buffer_handle *graphics_command_buffers = NULL; - uint32_t graphics_command_buffer_count = 0; - uint32_t graphics_signal_semaphore_count = 0; - uint32_t graphics_wait_semaphore_count = 0; - - rt_gpu_semaphore_handle *compute_wait_semaphores = NULL; - rt_gpu_semaphore_handle *compute_signal_semaphores = NULL; - rt_command_buffer_handle *compute_command_buffers = NULL; - uint64_t *compute_wait_values = NULL; - uint64_t *compute_signal_values = NULL; - uint32_t compute_command_buffer_count = 0; - uint32_t compute_signal_semaphore_count = 0; - uint32_t compute_wait_semaphore_count = 0; - - /* Determine necessary array sizes */ - for (uint32_t pass_idx = level_start; pass_idx < i; ++pass_idx) { - bool pass_bound = framegraph->passes[pass_idx].bound_fns.Prepare != NULL && - framegraph->passes[pass_idx].bound_fns.Execute != NULL && - framegraph->passes[pass_idx].bound_fns.Finalize != NULL; - if (!pass_bound) { - rtLog("GFX", - "Framegraph pass %u (%x) is not bound to any function.", - pass_idx, - framegraph->passes[pass_idx].id); - continue; - } - bool is_graphics_pass = - framegraph->passes[pass_idx].type == RT_RENDER_PASS_TYPE_GRAPHICS; - if (is_graphics_pass) { - graphics_wait_semaphore_count += framegraph->passes[pass_idx].read_count + - framegraph->passes[pass_idx].write_count; - graphics_signal_semaphore_count += framegraph->passes[pass_idx].write_count; - if (framegraph->passes[pass_idx].reads_swapchain || - pass_idx == first_swapchain_read) - graphics_wait_semaphore_count += 1; - if (framegraph->passes[pass_idx].writes_swapchain || - pass_idx == last_swapchain_write) - graphics_signal_semaphore_count += 1; - ++graphics_command_buffer_count; - } else { - compute_wait_semaphore_count += framegraph->passes[pass_idx].read_count + - framegraph->passes[pass_idx].write_count; - compute_signal_semaphore_count += framegraph->passes[pass_idx].write_count; - if (framegraph->passes[pass_idx].reads_swapchain || - pass_idx == first_swapchain_read) - compute_wait_semaphore_count += 1; - if (framegraph->passes[pass_idx].writes_swapchain || - pass_idx == last_swapchain_write) - compute_signal_semaphore_count += 1; - ++compute_command_buffer_count; - } - } - - graphics_wait_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena, - rt_gpu_semaphore_handle, - graphics_wait_semaphore_count); - graphics_wait_values = - RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, graphics_wait_semaphore_count); - graphics_signal_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena, - rt_gpu_semaphore_handle, - graphics_signal_semaphore_count); - graphics_signal_values = - RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, graphics_signal_semaphore_count); - graphics_command_buffers = RT_ARENA_PUSH_ARRAY(level_temp.arena, - rt_command_buffer_handle, - graphics_command_buffer_count); - graphics_signal_semaphore_count = 0; - graphics_wait_semaphore_count = 0; - graphics_command_buffer_count = 0; - - compute_wait_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena, - rt_gpu_semaphore_handle, - compute_wait_semaphore_count); - compute_wait_values = - RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, compute_wait_semaphore_count); - compute_signal_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena, - rt_gpu_semaphore_handle, - compute_signal_semaphore_count); - compute_signal_values = - RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, compute_signal_semaphore_count); - compute_command_buffers = RT_ARENA_PUSH_ARRAY(level_temp.arena, - rt_command_buffer_handle, - compute_command_buffer_count); - compute_signal_semaphore_count = 0; - compute_wait_semaphore_count = 0; - compute_command_buffer_count = 0; - - /* Dispatch all passes in the current execution level */ - for (uint32_t pass_idx = level_start; pass_idx < i; ++pass_idx) { - bool pass_bound = framegraph->passes[pass_idx].bound_fns.Prepare != NULL && - framegraph->passes[pass_idx].bound_fns.Execute != NULL && - framegraph->passes[pass_idx].bound_fns.Finalize != NULL; - if (!pass_bound) - continue; - - // rt_render_pass_id id = framegraph->passes[pass_idx].id; - const rt_render_target_write *writes = framegraph->passes[pass_idx].writes; - const rt_render_target_read *reads = framegraph->passes[pass_idx].reads; - uint32_t write_count = framegraph->passes[pass_idx].write_count; - uint32_t read_count = framegraph->passes[pass_idx].read_count; - - /* TODO(Kevin): Every one of these should be a job-dispatch*/ - - bool is_graphics_pass = - framegraph->passes[pass_idx].type == RT_RENDER_PASS_TYPE_GRAPHICS; - rt_command_buffer_handle cmdbuf; - rt_alloc_command_buffer_info cmdbuf_alloc = { - .target_queue = is_graphics_pass ? RT_GRAPHICS_QUEUE : RT_COMPUTE_QUEUE, - }; - if (g_renderer.AllocCommandBuffers(1, &cmdbuf_alloc, &cmdbuf) != RT_SUCCESS) { - rtLog("GFX", - "Failed to allocate a command buffer for framegraph pass %u (%x)", - pass_idx, - framegraph->passes[pass_idx].id); - continue; - } - - if (is_graphics_pass) { - BeginGraphicsPass(framegraph, pass_idx, cmdbuf); - } else { - BeginComputePass(framegraph, pass_idx, cmdbuf); - } - - /* - framegraph->passes[pass_idx].bound_fns.Prepare(id, - writes, - write_count, - reads, - read_count); - framegraph->passes[pass_idx].bound_fns.Execute(id, - writes, - write_count, - reads, - read_count); - framegraph->passes[pass_idx].bound_fns.Finalize(id, - writes, - write_count, - reads, - read_count); - */ - if (is_graphics_pass) { - g_renderer.CmdEndPass(cmdbuf); - } - for (uint32_t j = 0; j < write_count; j++) { - rt_render_target *rt = GetRenderTarget(framegraph, writes[j].render_target); - g_renderer.CmdFlushRenderTargetWrite(cmdbuf, rt->api_render_target); - } - - rt_gpu_semaphore_handle *wait_semaphores = NULL, *signal_semaphores = NULL; - uint64_t *wait_values = NULL, *signal_values = NULL; - rt_command_buffer_handle *command_buffers = NULL; - uint32_t *wait_count = NULL, *signal_count = 0; - uint32_t *command_buffer_count = NULL; - if (is_graphics_pass) { - wait_semaphores = graphics_wait_semaphores; - signal_semaphores = graphics_signal_semaphores; - wait_values = graphics_wait_values; - signal_values = graphics_signal_values; - command_buffers = graphics_command_buffers; - wait_count = &graphics_wait_semaphore_count; - signal_count = &graphics_signal_semaphore_count; - command_buffer_count = &graphics_command_buffer_count; - } else { - wait_semaphores = compute_wait_semaphores; - signal_semaphores = compute_signal_semaphores; - wait_values = compute_wait_values; - signal_values = compute_signal_values; - command_buffers = compute_command_buffers; - wait_count = &compute_wait_semaphore_count; - signal_count = &compute_signal_semaphore_count; - command_buffer_count = &compute_command_buffer_count; - } - - for (uint32_t j = 0; j < read_count; ++j) { - rt_render_target *rt = GetRenderTarget(framegraph, reads[j].render_target); - wait_semaphores[*wait_count] = rt->semaphores[frame_index]; - wait_values[*wait_count] = signal_value_base + execution_level; - *wait_count += 1; - } - for (uint32_t j = 0; j < write_count; ++j) { - rt_render_target *rt = GetRenderTarget(framegraph, writes[j].render_target); - signal_semaphores[*signal_count] = rt->semaphores[frame_index]; - signal_values[*signal_count] = signal_value_base + execution_level + 1; - *signal_count += 1; - - if (signal_value_base >= 200) { - wait_semaphores[*wait_count] = rt->semaphores[frame_index]; - wait_values[*wait_count] = signal_value_base - 200 + execution_level + 1; - *wait_count += 1; - } - } - if (pass_idx == first_swapchain_read) { - wait_semaphores[*wait_count] = swapchain_available; - wait_values[*wait_count] = 0; - *wait_count += 1; - } - if (pass_idx == last_swapchain_write) { - signal_semaphores[*signal_count] = render_finished; - signal_values[*signal_count] = 0; - *signal_count += 1; - } - command_buffers[*command_buffer_count] = cmdbuf; - *command_buffer_count += 1; - } - - if (graphics_command_buffer_count > 0) { - rt_submit_command_buffers_info submit = { - .command_buffers = graphics_command_buffers, - .command_buffer_count = graphics_command_buffer_count, - .signal_semaphores = graphics_signal_semaphores, - .signal_values = graphics_signal_values, - .signal_semaphore_count = graphics_signal_semaphore_count, - .wait_semaphores = graphics_wait_semaphores, - .wait_values = graphics_wait_values, - .wait_semaphore_count = graphics_wait_semaphore_count, - }; - g_renderer.SubmitCommandBuffers(RT_GRAPHICS_QUEUE, &submit); - } - - if (compute_command_buffer_count > 0) { - rt_submit_command_buffers_info submit = { - .command_buffers = compute_command_buffers, - .command_buffer_count = compute_command_buffer_count, - .signal_semaphores = compute_signal_semaphores, - .signal_values = compute_signal_values, - .signal_semaphore_count = compute_signal_semaphore_count, - .wait_semaphores = compute_wait_semaphores, - .wait_values = compute_wait_values, - .wait_semaphore_count = compute_wait_semaphore_count, - }; - g_renderer.SubmitCommandBuffers(RT_COMPUTE_QUEUE, &submit); - } - - /* Start next level */ - level_start = i; - if (i < framegraph->pass_count) - execution_level = framegraph->passes[i].execution_level; - } - } -} - -RT_DLLEXPORT rt_render_target_id rtCalculateRenderTargetID(const char *name, size_t len) { - rt_render_target_id id = rtHashBytes32(name, len); - if (id == 0) - id = ~id; - return id; -} - -RT_DLLEXPORT rt_render_pass_id rtCalculateRenderPassID(const char *name, size_t len) { - rt_render_pass_id id = rtHashBytes32(name, len); - if (id == 0) - id = ~id; - return id; -} diff --git a/src/gfx/gfx_main.c b/src/gfx/gfx_main.c index c72bddf..6c5f4da 100644 --- a/src/gfx/gfx_main.c +++ b/src/gfx/gfx_main.c @@ -15,7 +15,6 @@ */ rt_renderer_api g_renderer; -extern rt_cvar rt_MaxFramegraphs; #ifndef RT_STATIC_LIB static rt_dynlib _renderer_lib; @@ -36,10 +35,6 @@ extern void RT_RENDERER_API_FN(BeginFrame)(unsigned int); extern void RT_RENDERER_API_FN(EndFrame)(unsigned int); extern rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *); extern void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle); -extern rt_render_target_handle - RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *); -extern rt_render_target_handle RT_RENDERER_API_FN(GetSwapchainRenderTarget)(void); -extern void RT_RENDERER_API_FN(DestroyRenderTarget)(rt_render_target_handle); extern rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t, const rt_alloc_command_buffer_info *, rt_command_buffer_handle *); @@ -58,6 +53,7 @@ extern void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t, rt_buffer_handle *); extern rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void); extern void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *); +extern rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *); extern void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle, const rt_cmd_begin_pass_info *); @@ -67,10 +63,15 @@ extern void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_hand rt_render_target_state); extern void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle, rt_render_target_handle); +extern void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle, rt_pipeline_handle); +extern void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle, + uint32_t, + uint32_t, + const rt_buffer_handle *, + const uint64_t *); +extern void RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle, uint32_t, uint32_t); #endif -extern rt_result InitFramegraphManager(void); -extern void ShutdownFramegraphManager(void); extern rt_result InitRenderLists(void); extern void ShutdownRenderLists(void); extern void ResetRenderLists(void); @@ -101,24 +102,20 @@ static bool LoadRenderer(void) { RETRIEVE_SYMBOL(EndFrame, rt_end_frame_fn); RETRIEVE_SYMBOL(CompilePipeline, rt_compile_pipeline_fn); RETRIEVE_SYMBOL(DestroyPipeline, rt_destroy_pipeline_fn); - RETRIEVE_SYMBOL(CreateRenderTarget, rt_create_render_target_fn); - RETRIEVE_SYMBOL(GetSwapchainRenderTarget, rt_get_swapchain_render_target_fn); - RETRIEVE_SYMBOL(DestroyRenderTarget, rt_destroy_render_target_fn); RETRIEVE_SYMBOL(AllocCommandBuffers, rt_alloc_command_buffers_fn); RETRIEVE_SYMBOL(SubmitCommandBuffers, rt_submit_command_buffers_fn); - RETRIEVE_SYMBOL(CreateSemaphores, rt_create_gpu_semaphores_fn); - RETRIEVE_SYMBOL(DestroySemaphores, rt_destroy_gpu_semaphores_fn); - RETRIEVE_SYMBOL(GetSemaphoreValue, rt_get_gpu_semaphore_value_fn); - RETRIEVE_SYMBOL(GetSwapchainAvailableSemaphore, rt_get_swapchain_available_semaphore_fn); - RETRIEVE_SYMBOL(GetRenderFinishedSemaphore, rt_get_render_finished_semaphore_fn); RETRIEVE_SYMBOL(CreateBuffers, rt_create_buffers_fn); RETRIEVE_SYMBOL(DestroyBuffers, rt_destroy_buffers_fn); RETRIEVE_SYMBOL(CreateRenderGrapbuilder, rt_create_render_graph_builder_fn); RETRIEVE_SYMBOL(DestroyRenderGraphBuilder, rt_destroy_render_graph_builder_fn); + RETRIEVE_SYMBOL(ExecuteRenderGraph, rt_execute_render_graph_fn); RETRIEVE_SYMBOL(CmdBeginPass, rt_cmd_begin_pass_fn); RETRIEVE_SYMBOL(CmdEndPass, rt_cmd_end_pass_fn); RETRIEVE_SYMBOL(CmdTransitionRenderTarget, rt_cmd_transition_render_target_fn); RETRIEVE_SYMBOL(CmdFlushRenderTargetWrite, rt_cmd_flush_render_target_write_fn); + RETRIEVE_SYMBOL(CmdBindPipeline, rt_cmd_bind_pipeline_fn); + RETRIEVE_SYMBOL(CmdBindVertexBuffers, rt_cmd_bind_vertex_buffers_fn); + RETRIEVE_SYMBOL(CmdDraw, rt_cmd_draw_fn); } else { rtReportError("GFX", "Unsupported renderer backend: (%s) %s", @@ -128,32 +125,28 @@ static bool LoadRenderer(void) { } #undef RETRIEVE_SYMBOL #else - g_renderer.RegisterCVars = &rtRenRegisterCVars; - g_renderer.Init = &rtRenInit; - g_renderer.Shutdown = &rtRenShutdown; - g_renderer.GetMaxFramesInFlight = &rtRenGetMaxFramesInFlight; - g_renderer.BeginFrame = &rtRenBeginFrame; - g_renderer.EndFrame = &rtRenEndFrame; - g_renderer.CompilePipeline = &rtRenCompilePipeline; - g_renderer.DestroyPipeline = &rtRenDestroyPipeline; - g_renderer.CreateRenderTarget = &rtRenCreateRenderTarget; - g_renderer.GetSwapchainRenderTarget = &rtRenGetSwapchainRenderTarget; - g_renderer.DestroyRenderTarget = &rtRenDestroyRenderTarget; - g_renderer.AllocCommandBuffers = &rtRenAllocCommandBuffers; - g_renderer.SubmitCommandBuffers = &rtRenSubmitCommandBuffers; - g_renderer.CreateSemaphores = &rtRenCreateSemaphores; - g_renderer.DestroySemaphores = &rtRenDestroySemaphores; - g_renderer.GetSemaphoreValue = &rtRenGetSemaphoreValue; - g_renderer.GetSwapchainAvailableSemaphore = &rtRenGetSwapchainAvailableSemaphore; - g_renderer.GetRenderFinishedSemaphore = &rtRenGetRenderFinishedSemaphore; - g_renderer.CreateBuffers = &rtRenCreateBuffers; - g_renderer.DestroyBuffers = &rtRenDestroyBuffers; - g_renderer.CreateRenderGraphBuilder = &rtRenCreateRenderGraphBuilder; - g_renderer.DestroyRenderGraphBuilder = &rtRenDestroyRenderGraphBuilder; - g_renderer.CmdBeginPass = &rtRenCmdBeginPass; - g_renderer.CmdEndPass = &rtRenCmdEndPass; - g_renderer.CmdTransitionRenderTarget = &rtRenCmdTransitionRenderTarget; - g_renderer.CmdFlushRenderTargetWrite = &rtRenCmdFlushRenderTargetWrite; + g_renderer.RegisterCVars = &rtRenRegisterCVars; + g_renderer.Init = &rtRenInit; + g_renderer.Shutdown = &rtRenShutdown; + g_renderer.GetMaxFramesInFlight = &rtRenGetMaxFramesInFlight; + g_renderer.BeginFrame = &rtRenBeginFrame; + g_renderer.EndFrame = &rtRenEndFrame; + g_renderer.CompilePipeline = &rtRenCompilePipeline; + g_renderer.DestroyPipeline = &rtRenDestroyPipeline; + g_renderer.AllocCommandBuffers = &rtRenAllocCommandBuffers; + g_renderer.SubmitCommandBuffers = &rtRenSubmitCommandBuffers; + g_renderer.CreateBuffers = &rtRenCreateBuffers; + g_renderer.DestroyBuffers = &rtRenDestroyBuffers; + g_renderer.CreateRenderGraphBuilder = &rtRenCreateRenderGraphBuilder; + g_renderer.DestroyRenderGraphBuilder = &rtRenDestroyRenderGraphBuilder; + g_renderer.ExecuteRenderGraph = &rtRenExecuteRenderGraph; + g_renderer.CmdBeginPass = &rtRenCmdBeginPass; + g_renderer.CmdEndPass = &rtRenCmdEndPass; + g_renderer.CmdTransitionRenderTarget = &rtRenCmdTransitionRenderTarget; + g_renderer.CmdFlushRenderTargetWrite = &rtRenCmdFlushRenderTargetWrite; + g_renderer.CmdBindPipeline = &rtRenCmdBindPipeline; + g_renderer.CmdBindVertexBuffers = &rtRenCmdBindVertexBuffers; + g_renderer.CmdDraw = &rtRenCmdDraw; #endif return true; } @@ -169,7 +162,6 @@ RT_DLLEXPORT void rtRegisterRendererCVars(void) { RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info) { rtRegisterCVAR(&rt_Renderer); - rtRegisterCVAR(&rt_MaxFramegraphs); if (!_renderer_loaded) { if (!LoadRenderer()) @@ -182,9 +174,6 @@ RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info) { if ((result = g_renderer.Init(renderer_info)) != RT_SUCCESS) return result; - if ((result = InitFramegraphManager()) != RT_SUCCESS) - return result; - if ((result = InitRenderLists()) != RT_SUCCESS) return result; @@ -193,7 +182,6 @@ RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info) { RT_DLLEXPORT void rtShutdownGFX(void) { ShutdownRenderLists(); - ShutdownFramegraphManager(); g_renderer.Shutdown(); } diff --git a/src/gfx/meson.build b/src/gfx/meson.build index 237b400..ade295e 100644 --- a/src/gfx/meson.build +++ b/src/gfx/meson.build @@ -8,7 +8,7 @@ gfx_lib = library('rtgfx', 'render_list.h', 'builtin_objects.c', - 'gfx_framegraph.c', + 'effect.c', 'gfx_main.c', 'render_list.c', # Contrib Sources diff --git a/src/gfx/renderer_api.h b/src/gfx/renderer_api.h index a5cc3ac..04896f7 100644 --- a/src/gfx/renderer_api.h +++ b/src/gfx/renderer_api.h @@ -6,6 +6,7 @@ #include #include "gfx.h" +#include "render_list.h" #include "runtime/resources.h" #include "runtime/rt_math.h" @@ -174,6 +175,11 @@ typedef union { } depth_stencil; } rt_pass_clear_value; +typedef struct { + float depth; + int32_t stencil; +} rt_depth_stencil_value; + typedef struct { rt_render_target_handle color_buffers[4]; rt_pass_load_mode color_buffer_loads[4]; @@ -204,11 +210,12 @@ typedef enum { RT_RENDER_TARGET_STATE_STORAGE_IMAGE, } rt_render_target_state; +#define RT_RENDER_TARGET_SIZE_SWAPCHAIN 0 + /* Renderer API */ typedef struct rt_pipeline_info_s rt_pipeline_info; - typedef struct { const char *name; rt_pixel_format format; @@ -234,6 +241,9 @@ typedef struct { uint32_t flags; } rt_pass_info; +typedef struct rt_render_graph_s rt_render_graph; +typedef rt_result rt_execute_render_pass_fn(rt_command_buffer_handle cmdbuf, const rt_render_list *render_lists, unsigned int render_list_count, void *userdata); + typedef struct { void *obj; @@ -242,12 +252,17 @@ typedef struct { void (*SetBackbuffer)(void *obj, const char *rt_name); void (*AddRenderPass)(void *obj, const rt_pass_info *info); - void (*AddColorOutput)(void *obj, const char *pass_name, const char *rt_name); + void (*AddColorOutput)(void *obj, const char *pass_name, const char *rt_name, rt_pass_load_mode load, rt_pass_write_mode write, rt_color clear_color); void (*AddSampledInput)(void *obj, const char *pass_name, const char *rt_name); - void (*SetDepthStencilAttachment)(void *obj, const char *pass_name, const char *rt_name); - + void (*SetDepthStencilAttachment)(void *obj, + const char *pass_name, + const char *rt_name, + rt_pass_load_mode load, + rt_pass_write_mode write, + rt_depth_stencil_value clear_value); + void (*BindRenderPass)(void *obj, const char *pass_name, rt_execute_render_pass_fn *execute_fn, void *userdata); - rt_result (*Build)(void *obj); + rt_result (*Build)(void *obj, rt_render_graph **p_render_graph); } rt_render_graph_builder; @@ -259,27 +274,18 @@ typedef void rt_begin_frame_fn(unsigned int frame_id); typedef void rt_end_frame_fn(unsigned int frame_id); typedef rt_pipeline_handle rt_compile_pipeline_fn(const rt_pipeline_info *info); typedef void rt_destroy_pipeline_fn(rt_pipeline_handle handle); -typedef rt_render_target_handle rt_create_render_target_fn(const rt_render_target_info *info); -typedef rt_render_target_handle rt_get_swapchain_render_target_fn(void); -typedef void rt_destroy_render_target_fn(rt_render_target_handle handle); typedef rt_result rt_alloc_command_buffers_fn(uint32_t count, const rt_alloc_command_buffer_info *info, rt_command_buffer_handle *p_command_buffers); typedef rt_result rt_submit_command_buffers_fn(rt_gpu_queue queue, const rt_submit_command_buffers_info *info); -typedef rt_result rt_create_gpu_semaphores_fn(uint32_t count, - const rt_gpu_semaphore_info *info, - rt_gpu_semaphore_handle *p_semaphores); -typedef void rt_destroy_gpu_semaphores_fn(uint32_t count, rt_gpu_semaphore_handle *semaphores); -typedef uint64_t rt_get_gpu_semaphore_value_fn(rt_gpu_semaphore_handle semaphore); -typedef rt_gpu_semaphore_handle rt_get_swapchain_available_semaphore_fn(void); -typedef rt_gpu_semaphore_handle rt_get_render_finished_semaphore_fn(void); typedef rt_result rt_create_buffers_fn(uint32_t count, const rt_buffer_info *info, rt_buffer_handle *p_buffers); typedef void rt_destroy_buffers_fn(uint32_t count, rt_buffer_handle *buffers); typedef rt_render_graph_builder rt_create_render_graph_builder_fn(void); typedef void rt_destroy_render_graph_builder_fn(rt_render_graph_builder *builder); +typedef rt_result rt_execute_render_graph_fn(rt_render_graph *rgraph); typedef void rt_cmd_begin_pass_fn(rt_command_buffer_handle cmdbuf, const rt_cmd_begin_pass_info *info); @@ -289,6 +295,13 @@ typedef void rt_cmd_transition_render_target_fn(rt_command_buffer_handle cmdbuf, rt_render_target_state new_state); typedef void rt_cmd_flush_render_target_write_fn(rt_command_buffer_handle cmdbuf, rt_render_target_handle render_target); +typedef void rt_cmd_bind_pipeline_fn(rt_command_buffer_handle cmd, rt_pipeline_handle pipeline); +typedef void rt_cmd_bind_vertex_buffers_fn(rt_command_buffer_handle cmd, + uint32_t first_binding, + uint32_t count, + const rt_buffer_handle *buffers, + const uint64_t *offsets); +typedef void rt_cmd_draw_fn(rt_command_buffer_handle cmdbuf, uint32_t first_vertex, uint32_t vertex_count); typedef struct { rt_register_renderer_cvars_fn *RegisterCVars; @@ -299,28 +312,24 @@ typedef struct { rt_end_frame_fn *EndFrame; rt_compile_pipeline_fn *CompilePipeline; rt_destroy_pipeline_fn *DestroyPipeline; - rt_create_render_target_fn *CreateRenderTarget; - rt_get_swapchain_render_target_fn *GetSwapchainRenderTarget; - rt_destroy_render_target_fn *DestroyRenderTarget; rt_alloc_command_buffers_fn *AllocCommandBuffers; rt_submit_command_buffers_fn *SubmitCommandBuffers; - rt_create_gpu_semaphores_fn *CreateSemaphores; - rt_destroy_gpu_semaphores_fn *DestroySemaphores; - rt_get_gpu_semaphore_value_fn *GetSemaphoreValue; - rt_get_swapchain_available_semaphore_fn *GetSwapchainAvailableSemaphore; - rt_get_render_finished_semaphore_fn *GetRenderFinishedSemaphore; rt_create_buffers_fn *CreateBuffers; rt_destroy_buffers_fn *DestroyBuffers; - + /*render graph functions*/ rt_create_render_graph_builder_fn *CreateRenderGraphBuilder; rt_destroy_render_graph_builder_fn *DestroyRenderGraphBuilder; + rt_execute_render_graph_fn *ExecuteRenderGraph; /* Command Buffer Functions */ rt_cmd_begin_pass_fn *CmdBeginPass; rt_cmd_end_pass_fn *CmdEndPass; rt_cmd_transition_render_target_fn *CmdTransitionRenderTarget; rt_cmd_flush_render_target_write_fn *CmdFlushRenderTargetWrite; + rt_cmd_bind_pipeline_fn *CmdBindPipeline; + rt_cmd_bind_vertex_buffers_fn *CmdBindVertexBuffers; + rt_cmd_draw_fn *CmdDraw; } rt_renderer_api; #define RT_RENDERER_API_FN(name) RT_DLLEXPORT rtRen##name diff --git a/src/renderer/common/common_render_graph.c b/src/renderer/common/common_render_graph.c new file mode 100644 index 0000000..25773bd --- /dev/null +++ b/src/renderer/common/common_render_graph.c @@ -0,0 +1,832 @@ +#include +#include +#include + +#include "gfx/effect.h" +#include "gfx/renderer_api.h" +#include "runtime/buffer_manager.h" +#include "runtime/handles.h" +#include "runtime/mem_arena.h" + +#include "common_render_graph.h" + +#define MAX_COLOR_ATTACHMENTS_PER_PASS 8 +#define MAX_SAMPLED_INPUTS_PER_PASS 8 + +typedef struct rt_render_target_build_info { + const char *name; + rt_pixel_format format; + unsigned int width; + unsigned int height; + + unsigned int samples; + unsigned int layers; + + uint32_t first_usage; + uint32_t last_usage; +} rt_render_target_build_info; + +typedef struct rt_pass_build_info { + const char *name; + uint32_t flags; + + void *userdata; + rt_execute_render_pass_fn *Execute; + + uint32_t color_attachments[MAX_COLOR_ATTACHMENTS_PER_PASS]; + rt_color color_attachment_clear_values[MAX_COLOR_ATTACHMENTS_PER_PASS]; + rt_pass_load_mode color_attachment_loads[MAX_COLOR_ATTACHMENTS_PER_PASS]; + rt_pass_write_mode color_attachment_writes[MAX_COLOR_ATTACHMENTS_PER_PASS]; + uint32_t color_attachment_count; + + uint32_t sampled_inputs[MAX_SAMPLED_INPUTS_PER_PASS]; + uint32_t sampled_input_count; + + uint32_t depth_stencil_attachment; + rt_depth_stencil_value depth_stencil_clear_value; + rt_pass_load_mode depth_stencil_load; + rt_pass_write_mode depth_stencil_write; + + uint32_t *dependencies; + uint32_t dependency_count; +} rt_pass_build_info; + +typedef struct { + uint32_t signaled_by; + uint32_t waited_on_by; +} rt_sync_point_build_info; + +typedef struct rt_render_graph_builder_obj { + rt_arena arena; + + rt_render_target_build_info *render_targets; + uint32_t render_target_count; + uint32_t render_target_capacity; + + rt_pass_build_info *passes; + uint32_t pass_count; + uint32_t pass_capacity; + + rt_physical_render_target_info *phys_render_targets; + uint32_t phys_render_target_count; + + rt_sync_point_build_info *sync_points; + uint32_t sync_point_count; + + uint32_t backbuffer; + + rt_render_graph_builder_platform_callbacks platform_cbs; +} rt_render_graph_builder_obj; + +/* **************************************************************************** + * + * BUILDER CODE + * + * ****************************************************************************/ + +static void AddRenderTarget(void *_obj, const rt_attachment_info *info) { + rt_render_graph_builder_obj *obj = _obj; + + if (obj->render_target_count == obj->render_target_capacity) { + uint32_t new_cap = obj->render_target_capacity ? 2 * obj->render_target_capacity : 32; + rt_render_target_build_info *tmp = + RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, rt_render_target_build_info, new_cap); + if (obj->render_target_capacity) + memcpy(tmp, + obj->render_targets, + sizeof(rt_render_target_build_info) * obj->render_target_capacity); + obj->render_targets = tmp; + obj->render_target_capacity = new_cap; + } + + char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1); + strcpy(name, info->name); + obj->render_targets[obj->render_target_count].name = name; + obj->render_targets[obj->render_target_count].format = info->format; + obj->render_targets[obj->render_target_count].width = info->width; + obj->render_targets[obj->render_target_count].height = info->height; + obj->render_targets[obj->render_target_count].samples = info->samples; + obj->render_targets[obj->render_target_count].layers = info->layers; + obj->render_targets[obj->render_target_count].first_usage = 0; + obj->render_targets[obj->render_target_count].last_usage = 0; + ++obj->render_target_count; +} + +static void SetBackbuffer(void *_obj, const char *rt_name) { + rt_render_graph_builder_obj *obj = _obj; + for (uint32_t i = 0; i < obj->render_target_count; ++i) { + if (strcmp(obj->render_targets[i].name, rt_name) == 0) { + obj->backbuffer = i; + return; + } + } + rtLog("vk", "Tried to set backbuffer to unknown render target %s", rt_name); +} + +static void AddRenderPass(void *_obj, const rt_pass_info *info) { + rt_render_graph_builder_obj *obj = _obj; + + if (obj->pass_count == obj->pass_capacity) { + uint32_t new_cap = obj->pass_capacity ? 2 * obj->pass_capacity : 32; + rt_pass_build_info *tmp = + RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, rt_pass_build_info, new_cap); + if (obj->pass_capacity) + memcpy(tmp, obj->passes, sizeof(rt_pass_build_info) * obj->pass_capacity); + obj->passes = tmp; + obj->pass_capacity = new_cap; + } + + char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1); + strcpy(name, info->name); + obj->passes[obj->pass_count].name = name; + obj->passes[obj->pass_count].flags = info->flags; + obj->passes[obj->pass_count].color_attachment_count = 0; + obj->passes[obj->pass_count].sampled_input_count = 0; + obj->passes[obj->pass_count].depth_stencil_attachment = UINT_MAX; + obj->passes[obj->pass_count].dependencies = NULL; + obj->passes[obj->pass_count].dependency_count = 0; + + ++obj->pass_count; +} + +static void AddColorOutput(void *_obj, + const char *pass_name, + const char *rt_name, + rt_pass_load_mode load, + rt_pass_write_mode write, + rt_color clear_color) { + uint32_t rt_index = UINT_MAX; + + rt_render_graph_builder_obj *obj = _obj; + for (uint32_t i = 0; i < obj->render_target_count; ++i) { + if (strcmp(obj->render_targets[i].name, rt_name) == 0) { + rt_index = i; + break; + } + } + if (rt_index == UINT_MAX) { + rtLog("ren", + "Tried to add unknown render target %s as color output to %s", + rt_name, + pass_name); + return; + } + + for (uint32_t i = 0; i < obj->pass_count; ++i) { + if (strcmp(obj->passes[i].name, pass_name) == 0) { + if (obj->passes[i].color_attachment_count == MAX_COLOR_ATTACHMENTS_PER_PASS) { + rtLog("ren", "Too many color attachments in pass %s", pass_name); + } + obj->passes[i].color_attachment_clear_values[obj->passes[i].color_attachment_count] = + clear_color; + obj->passes[i].color_attachment_loads[obj->passes[i].color_attachment_count] = load; + obj->passes[i].color_attachment_writes[obj->passes[i].color_attachment_count] = write; + obj->passes[i].color_attachments[obj->passes[i].color_attachment_count++] = rt_index; + return; + } + } + rtLog("ren", + "Tried to add render target %s as color output to unknown render target %s", + rt_name, + pass_name); +} + +static void AddSampledInput(void *_obj, const char *pass_name, const char *rt_name) { + uint32_t rt_index = UINT_MAX; + + rt_render_graph_builder_obj *obj = _obj; + for (uint32_t i = 0; i < obj->render_target_count; ++i) { + if (strcmp(obj->render_targets[i].name, rt_name) == 0) { + rt_index = i; + break; + } + } + if (rt_index == UINT_MAX) { + rtLog("ren", + "Tried to add unknown render target %s as color output to %s", + rt_name, + pass_name); + return; + } + + for (uint32_t i = 0; i < obj->pass_count; ++i) { + if (strcmp(obj->passes[i].name, pass_name) == 0) { + if (obj->passes[i].sampled_input_count == MAX_SAMPLED_INPUTS_PER_PASS) { + rtLog("ren", "Too many sampled inputs in pass %s", pass_name); + } + obj->passes[i].sampled_inputs[obj->passes[i].sampled_input_count++] = rt_index; + return; + } + } + rtLog("ren", + "Tried to add render target %s as sampled input to unknown render target %s", + rt_name, + pass_name); +} + +static void SetDepthStencilAttachment(void *_obj, + const char *pass_name, + const char *rt_name, + rt_pass_load_mode load, + rt_pass_write_mode write, + rt_depth_stencil_value clear_value) { + uint32_t rt_index = UINT_MAX; + + rt_render_graph_builder_obj *obj = _obj; + for (uint32_t i = 0; i < obj->render_target_count; ++i) { + if (strcmp(obj->render_targets[i].name, rt_name) == 0) { + rt_index = i; + break; + } + } + if (rt_index == UINT_MAX) { + rtLog("ren", + "Tried to add unknown render target %s as depth stencil attachment to %s", + rt_name, + pass_name); + return; + } + + for (uint32_t i = 0; i < obj->pass_count; ++i) { + if (strcmp(obj->passes[i].name, pass_name) == 0) { + obj->passes[i].depth_stencil_attachment = rt_index; + obj->passes[i].depth_stencil_clear_value = clear_value; + obj->passes[i].depth_stencil_load = load; + obj->passes[i].depth_stencil_write = write; + return; + } + } + rtLog("ren", + "Tried to add render target %s as depth stencil attachment to unknown render target %s", + rt_name, + pass_name); +} + +static void BindRenderPass(void *_obj, + const char *pass_name, + rt_execute_render_pass_fn *execute_fn, + void *userdata) { + rt_render_graph_builder_obj *obj = _obj; + for (uint32_t i = 0; i < obj->pass_count; ++i) { + if (strcmp(obj->passes[i].name, pass_name) == 0) { + obj->passes[i].Execute = execute_fn; + obj->passes[i].userdata = userdata; + return; + } + } + rtLog("ren", "Tried to bind unknown render pass %s.", pass_name); +} + +typedef struct { + uint32_t added; + uint32_t moved; +} rt_find_writers_result; + +static rt_find_writers_result FindWriters(rt_render_graph_builder_obj *obj, + uint32_t rt_index, + uint32_t append_at, + uint32_t *p_passes) { + rt_find_writers_result res = {0, 0}; + for (uint32_t i = 0; i < obj->pass_count; ++i) { + bool writes_rt = false; + if (obj->passes[i].depth_stencil_attachment == rt_index) { + writes_rt = true; + } else { + for (uint32_t j = 0; j < obj->passes[i].color_attachment_count; ++j) { + if (obj->passes[i].color_attachments[j] == rt_index) { + writes_rt = true; + } + } + } + + if (!writes_rt) + continue; + + uint32_t lower_index = UINT32_MAX; + for (uint32_t j = 0; j < append_at; ++j) { + if (p_passes[j] == i) { + lower_index = j; + break; + } + } + + if (lower_index == UINT32_MAX) { + p_passes[append_at++] = i; + res.added++; + } else { + memmove(&p_passes[lower_index], + &p_passes[lower_index + 1], + (append_at - lower_index - 1) * sizeof(uint32_t)); + p_passes[append_at - 1] = i; + res.moved++; + } + } + return res; +} + +static uint32_t CreateNaiveOrderRec(rt_render_graph_builder_obj *obj, + uint32_t search_rt, + uint32_t append_at, + uint32_t *p_order) { + rt_find_writers_result writers = FindWriters(obj, search_rt, append_at, p_order); + uint32_t new_append = append_at + writers.added; + for (uint32_t i = 0; i < writers.moved; ++i) { + uint32_t pass_idx = p_order[append_at - writers.moved + i]; + const rt_pass_build_info *pass = &obj->passes[pass_idx]; + for (uint32_t j = 0; j < pass->sampled_input_count; ++j) { + new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order); + } + } + for (uint32_t i = 0; i < writers.added; ++i) { + uint32_t pass_idx = p_order[append_at + i]; + const rt_pass_build_info *pass = &obj->passes[pass_idx]; + for (uint32_t j = 0; j < pass->sampled_input_count; ++j) { + new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order); + } + } + return new_append; +} + +static rt_result +CreateNaiveOrder(rt_render_graph_builder_obj *obj, uint32_t **p_order, uint32_t *p_count) { + uint32_t *order = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, obj->pass_count); + if (!order) + return RT_OUT_OF_MEMORY; + uint32_t count = CreateNaiveOrderRec(obj, obj->backbuffer, 0, order); + + /* Now the pass writing the backbuffer is first, we need to revert the order */ + for (uint32_t i = 0; i < count / 2; ++i) { + uint32_t t = order[i]; + order[i] = order[count - i - 1]; + order[count - i - 1] = t; + } + *p_order = order; + *p_count = count; + return RT_SUCCESS; +} + +static uint32_t * +ReorderPasses(rt_render_graph_builder_obj *obj, uint32_t pass_count, uint32_t *naive_order) { + /* Our goal is to calculate a schedule that: + * A) Does not break the dependency chain + * B) Has the maximum amount of overlap, i.e. keeps the GPU busy. + * This means that if pass A depends on pass B, we want to have as much passes inbetween as + * possible, to reduce the likelyhood of stalls caused by B waiting for A to finish. */ + uint32_t *schedule = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, pass_count); + if (!schedule) + return NULL; + uint32_t scheduled_count = 0; + + while (scheduled_count < pass_count) { + /* The number of passes remaining in naive_order */ + uint32_t unscheduled_count = pass_count - scheduled_count; + + /* It is always valid to use the front */ + uint32_t selected_idx = 0; + uint32_t selected_score = 0; + for (uint32_t i = 0; i < unscheduled_count; ++i) { + /* Check if any dependency is not scheduled yet */ + uint32_t pass_idx = naive_order[i]; + const rt_pass_build_info *pass = &obj->passes[pass_idx]; + uint32_t score = 0; + bool is_valid = true; + + if (pass->dependency_count) { + for (uint32_t j = 0; j < unscheduled_count; ++j) { + uint32_t pass2_idx = naive_order[j]; + for (uint32_t k = 0; k < pass->dependency_count; ++k) { + if (pass->dependencies[k] == pass2_idx) { + is_valid = false; + break; + } + } + if (!is_valid) + break; + } + if (!is_valid) + continue; + + for (uint32_t j = 0; j < pass->dependency_count; ++j) { + for (uint32_t k = 0; k < scheduled_count; ++k) { + if (schedule[k] == pass->dependencies[j]) { + score += scheduled_count - k; + break; + } + } + } + + } else { + score = UINT32_MAX; + } + + if (score > selected_score) { + selected_score = score; + selected_idx = i; + } + } + + schedule[scheduled_count++] = naive_order[selected_idx]; + memmove(&naive_order[selected_idx], + &naive_order[selected_idx + 1], + (unscheduled_count - selected_idx - 1) * sizeof(uint32_t)); + } + return schedule; +} + +static rt_result DeterminePassDependencies(rt_render_graph_builder_obj *obj) { + /* NOTE(Kevin): This has a lot of overlap with FindWriters, so maybe its possible to combine + * the two */ + for (uint32_t pass_idx = 0; pass_idx < obj->pass_count; ++pass_idx) { + rt_pass_build_info *pass = &obj->passes[pass_idx]; + uint32_t dependency_capacity = pass->sampled_input_count; + if (dependency_capacity) { + pass->dependencies = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity); + if (!pass->dependencies) + return RT_OUT_OF_MEMORY; + } + for (uint32_t input_idx = 0; input_idx < pass->sampled_input_count; ++input_idx) { + uint32_t rt_index = pass->sampled_inputs[input_idx]; + for (uint32_t candidate_idx = 0; candidate_idx < obj->pass_count; ++candidate_idx) { + const rt_pass_build_info *candidate = &obj->passes[candidate_idx]; + bool is_dependency = false; + if (candidate->depth_stencil_attachment == rt_index) + is_dependency = true; + for (uint32_t j = 0; j < candidate->color_attachment_count; ++j) { + if (candidate->color_attachments[j] == rt_index) + is_dependency = true; + } + + if (!is_dependency) + continue; + + if (pass->dependency_count == dependency_capacity) { + /* The dependencies are still on top of the arena, so we can just grow that + * array */ + if (!RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity)) + return RT_OUT_OF_MEMORY; + dependency_capacity *= 2; + } + pass->dependencies[pass->dependency_count++] = candidate_idx; + } + } + } + return RT_SUCCESS; +} + +static void DetermineRenderTargetUsage(rt_render_graph_builder_obj *obj, + uint32_t pass_count, + const uint32_t *schedule) { + for (uint32_t rt_idx = 0; rt_idx < obj->render_target_count; ++rt_idx) { + rt_render_target_build_info *rt = &obj->render_targets[rt_idx]; + rt->first_usage = UINT32_MAX; + rt->last_usage = 0; + for (uint32_t sched_idx = 0; sched_idx < pass_count; ++sched_idx) { + uint32_t pass_idx = schedule[sched_idx]; + const rt_pass_build_info *pass = &obj->passes[pass_idx]; + bool usage = pass->depth_stencil_attachment == rt_idx; + if (!usage) { + for (unsigned int i = 0; i < pass->color_attachment_count; ++i) { + if (pass->color_attachments[i] == rt_idx) + usage = true; + } + } + if (!usage) { + for (unsigned int i = 0; i < pass->sampled_input_count; ++i) { + if (pass->sampled_inputs[i] == rt_idx) + usage = true; + } + } + if (usage) { + if (sched_idx < rt->first_usage) + rt->first_usage = sched_idx; + if (sched_idx > rt->last_usage) + rt->last_usage = sched_idx; + } + } + } +} + +static rt_result GreedyMergeRenderTargets(rt_render_graph_builder_obj *obj) { + typedef struct { + rt_physical_render_target_info info; + int alive; + int backbuffer; + uint32_t first_usage; + uint32_t last_usage; + } merged_rts; + + merged_rts *merged = RT_ARENA_PUSH_ARRAY(&obj->arena, merged_rts, 2 * obj->render_target_count); + if (!merged) { + return RT_OUT_OF_MEMORY; + } + uint32_t candidate_count = obj->render_target_count; + for (uint32_t i = 0; i < candidate_count; ++i) { + merged[i].alive = 1; + merged[i].backbuffer = (i == obj->backbuffer); + merged[i].info.format = obj->render_targets[i].format; + merged[i].info.width = obj->render_targets[i].width; + merged[i].info.height = obj->render_targets[i].height; + merged[i].info.layers = obj->render_targets[i].layers; + merged[i].info.name = obj->render_targets[i].name; + merged[i].info.samples = obj->render_targets[i].samples; + merged[i].first_usage = obj->render_targets[i].first_usage; + merged[i].last_usage = obj->render_targets[i].last_usage; + } + + uint32_t *rt_mapping = + RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, uint32_t, obj->render_target_count); + if (!rt_mapping) + return RT_OUT_OF_MEMORY; + for (uint32_t i = 0; i < obj->render_target_count; ++i) + rt_mapping[i] = i; + + bool did_merge; + do { + did_merge = false; + for (uint32_t first = 0; first < candidate_count - 1; ++first) { + if (!merged[first].alive) + continue; + for (uint32_t second = first + 1; second < candidate_count; ++second) { + if (!merged[second].alive) + continue; + + if (!((merged[first].last_usage < merged[second].first_usage) || + (merged[second].last_usage < merged[first].first_usage))) + continue; + + if (!(merged[first].info.width == merged[second].info.width && + merged[first].info.height == merged[second].info.height && + merged[first].info.samples == merged[second].info.samples && + merged[first].info.layers == merged[second].info.layers && + merged[first].info.format == merged[second].info.format)) + continue; + + merged[first].alive = 0; + merged[second].alive = 0; + + merged_rts combined = { + .alive = 1, + .backbuffer = merged[first].backbuffer || merged[second].backbuffer, + .first_usage = RT_MIN(merged[first].first_usage, merged[second].first_usage), + .last_usage = RT_MAX(merged[first].last_usage, merged[second].last_usage), + .info = merged[first].info, + }; + char *combined_name = rtArenaPush(&obj->arena, + strlen(merged[first].info.name) + + strlen(merged[second].info.name) + 2); + if (!combined_name) + return RT_OUT_OF_MEMORY; + strcpy(combined_name, merged[first].info.name); + strcat(combined_name, "+"); + strcat(combined_name, merged[second].info.name); + combined.info.name = combined_name; + + /* Update mappings. If indes < render_target_count, than it refers to a + * logical render target. If not, it refers to a merged render target */ + if (first < obj->render_target_count) { + rt_mapping[first] = candidate_count; + } else { + // Find mappings that refer to this index and update them + for (uint32_t i = 0; i < obj->render_target_count; ++i) { + if (rt_mapping[i] == first) + rt_mapping[i] = candidate_count; + } + } + if (second < obj->render_target_count) { + rt_mapping[second] = candidate_count; + } else { + // Find mappings that refer to this index and update them + for (uint32_t i = 0; i < obj->render_target_count; ++i) { + if (rt_mapping[i] == second) + rt_mapping[i] = candidate_count; + } + } + + RT_ASSERT(candidate_count < 2 * obj->render_target_count, ""); + merged[candidate_count++] = combined; + did_merge = true; + break; + } + if (did_merge) + break; + } + } while (did_merge); + + uint32_t phys_count = 0; + for (uint32_t i = 0; i < candidate_count; ++i) { + if (merged[i].alive) + ++phys_count; + } + obj->phys_render_targets = + RT_ARENA_PUSH_ARRAY(&obj->arena, rt_physical_render_target_info, phys_count); + if (!obj->phys_render_targets) + return RT_OUT_OF_MEMORY; + obj->phys_render_target_count = 0; + for (uint32_t i = 0; i < candidate_count; ++i) { + if (merged[i].alive) { + uint32_t index = obj->phys_render_target_count; + if (merged[i].backbuffer) + obj->backbuffer = obj->phys_render_target_count; + obj->phys_render_targets[obj->phys_render_target_count++] = merged[i].info; + + /* Update the mapping table */ + for (uint32_t j = 0; j < obj->render_target_count; ++j) { + if (rt_mapping[j] == i) + rt_mapping[j] = index; + } + } + } + + /* Update pass render target references */ + for (uint32_t i = 0; i < obj->pass_count; ++i) { + if (obj->passes[i].depth_stencil_attachment < UINT_MAX) + obj->passes[i].depth_stencil_attachment = + rt_mapping[obj->passes[i].depth_stencil_attachment]; + for (uint32_t j = 0; j < obj->passes[i].color_attachment_count; ++j) + obj->passes[i].color_attachments[j] = rt_mapping[obj->passes[i].color_attachments[j]]; + for (uint32_t j = 0; j < obj->passes[i].sampled_input_count; ++j) + obj->passes[i].sampled_inputs[j] = rt_mapping[obj->passes[i].sampled_inputs[j]]; + } + obj->backbuffer = rt_mapping[obj->backbuffer]; + + return RT_SUCCESS; +} + +static rt_result CreateSynchronizationPoints(rt_render_graph_builder_obj *obj) { + RT_ASSERT(false, "Not implemented yet"); + return RT_UNKNOWN_ERROR; +} + +static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, const uint32_t *order) { + + size_t required_size = sizeof(rt_render_graph); + required_size += obj->phys_render_target_count * sizeof(rt_render_target_handle); + required_size += obj->sync_point_count * sizeof(rt_gpu_semaphore_handle); + required_size += obj->pass_count * sizeof(rt_render_pass); + + size_t pass_attachment_size = 0; + + for (uint32_t i = 0; i < obj->pass_count; ++i) { + required_size += strlen(obj->passes[i].name) + 1; + pass_attachment_size += obj->passes[i].color_attachment_count * + (sizeof(rt_render_target_handle) + sizeof(rt_color) + + sizeof(rt_pass_load_mode) + sizeof(rt_pass_write_mode)); + pass_attachment_size += + obj->passes[i].sampled_input_count * sizeof(rt_render_target_handle); + } + required_size += pass_attachment_size; + + rt_render_graph *graph = rtAllocBuffer(required_size); + if (!graph) + return NULL; + memset(graph, 0, required_size); + graph->render_targets = (rt_render_target_handle *)(graph + 1); + graph->semaphores = + (rt_gpu_semaphore_handle *)(graph->render_targets + obj->phys_render_target_count); + graph->passes = (rt_render_pass *)(graph->semaphores + obj->sync_point_count); + char *attachment_storage = (char *)(graph->passes + obj->pass_count); + char *names = attachment_storage + pass_attachment_size; + char *next_name = names; + + graph->render_target_count = obj->phys_render_target_count; + graph->semaphore_count = obj->sync_point_count; + graph->pass_count = obj->pass_count; + + for (uint32_t i = 0; i < obj->phys_render_target_count; ++i) { + graph->render_targets[i] = + obj->platform_cbs.CreateRenderTarget(&obj->phys_render_targets[i]); + } + + for (uint32_t i = 0; i < obj->sync_point_count; ++i) { + // TODO + RT_NOT_IMPLEMENTED; + } + + for (uint32_t i = 0; i < obj->pass_count; ++i) { + uint32_t passidx = order[i]; + size_t namelen = strlen(obj->passes[passidx].name); + graph->passes[i].Execute = RT_VERIFY(obj->passes[passidx].Execute); + graph->passes[i].user_data = obj->passes[passidx].userdata; + graph->passes[i].flags = obj->passes[passidx].flags; + graph->passes[i].id = rtCalculateRenderPassID(obj->passes[passidx].name, namelen); + graph->passes[i].first_signal = 0; + graph->passes[i].signal_count = 0; + graph->passes[i].first_wait = 0; + graph->passes[i].wait_count = 0; + graph->passes[i].execution_level = i; + + graph->passes[i].depth_stencil = + (obj->passes[i].depth_stencil_attachment != UINT_MAX) + ? graph->render_targets[obj->passes[i].depth_stencil_attachment] + : (rt_render_target_handle)RT_INVALID_HANDLE; + graph->passes[i].depth_stencil_clear_value = obj->passes[i].depth_stencil_clear_value; + graph->passes[i].depth_stencil_load = obj->passes[i].depth_stencil_load; + graph->passes[i].depth_stencil_write = obj->passes[i].depth_stencil_write; + + graph->passes[i].color_output_count = obj->passes[i].color_attachment_count; + if (graph->passes[i].color_output_count) { + graph->passes[i].color_outputs = (rt_render_target_handle *)attachment_storage; + attachment_storage += + sizeof(rt_render_target_handle) * graph->passes[i].color_output_count; + graph->passes[i].color_clear_values = (rt_color *)attachment_storage; + attachment_storage += sizeof(rt_color) * graph->passes[i].color_output_count; + graph->passes[i].color_loads = (rt_pass_load_mode *)attachment_storage; + attachment_storage += sizeof(rt_pass_load_mode) * graph->passes[i].color_output_count; + graph->passes[i].color_writes = (rt_pass_write_mode *)attachment_storage; + attachment_storage += sizeof(rt_pass_write_mode) * graph->passes[i].color_output_count; + + for (uint32_t j = 0; j < graph->passes[i].color_output_count; ++j) { + graph->passes[i].color_outputs[j] = + graph->render_targets[obj->passes[i].color_attachments[j]]; + graph->passes[i].color_clear_values[j] = + obj->passes[i].color_attachment_clear_values[j]; + graph->passes[i].color_loads[j] = obj->passes[i].color_attachment_loads[j]; + graph->passes[i].color_writes[j] = obj->passes[i].color_attachment_writes[j]; + } + } + + graph->passes[i].sampled_input_count = obj->passes[i].sampled_input_count; + if (graph->passes[i].sampled_input_count) { + graph->passes[i].sampled_inputs = (rt_render_target_handle *)attachment_storage; + attachment_storage += + sizeof(rt_render_target_handle) * graph->passes[i].sampled_input_count; + + for (uint32_t j = 0; j < graph->passes[i].sampled_input_count; ++j) { + graph->passes[i].sampled_inputs[j] = + graph->render_targets[obj->passes[i].sampled_inputs[j]]; + } + } + + graph->passes[i].name = next_name; + next_name += namelen + 1; + memcpy((char *)graph->passes[i].name, obj->passes[passidx].name, namelen + 1); + } + + graph->backbuffer_index = obj->backbuffer; + + return graph; +} + +static rt_result Build(void *_obj, rt_render_graph **p_graph) { + rt_render_graph_builder_obj *obj = _obj; + uint32_t *naive_order; + uint32_t pass_count; + rt_result res = CreateNaiveOrder(obj, &naive_order, &pass_count); + if (res != RT_SUCCESS) + return res; + + res = DeterminePassDependencies(obj); + if (res != RT_SUCCESS) + return res; + + uint32_t *optimized_order = ReorderPasses(obj, pass_count, naive_order); + if (!optimized_order) + return RT_OUT_OF_MEMORY; + + DetermineRenderTargetUsage(obj, pass_count, optimized_order); + res = GreedyMergeRenderTargets(obj); + if (res != RT_SUCCESS) + return res; + + if (obj->platform_cbs.RequireExplicitSynchronization()) { + res = CreateSynchronizationPoints(obj); + if (res != RT_SUCCESS) + return res; + } else { + obj->sync_point_count = 0; + } + + *p_graph = CreateRenderGraph(obj, optimized_order); + return *p_graph ? RT_SUCCESS : RT_UNKNOWN_ERROR; +} + +rt_render_graph_builder +rtCreateRenderGraphBuilder(const rt_render_graph_builder_platform_callbacks *platform_cbs) { + // TODO: This is totally temporary. We do NOT want malloc anywhere in non-init functions + rt_render_graph_builder_obj *obj = malloc(sizeof(*obj)); + RT_ASSERT(obj, "Failed to allocate the builder object."); + memset(obj, 0, sizeof(*obj)); + rt_create_arena_result arena_res = rtCreateArena(NULL, RT_MB(16)); + RT_ASSERT(arena_res.ok, ""); + obj->arena = arena_res.arena; + obj->platform_cbs = *platform_cbs; + + return (rt_render_graph_builder){ + .obj = obj, + .AddRenderTarget = AddRenderTarget, + .SetBackbuffer = SetBackbuffer, + .AddRenderPass = AddRenderPass, + .AddColorOutput = AddColorOutput, + .AddSampledInput = AddSampledInput, + .SetDepthStencilAttachment = SetDepthStencilAttachment, + .BindRenderPass = BindRenderPass, + .Build = Build, + }; +} + +void rtDestroyRenderGraphBuilder(rt_render_graph_builder *builder) { + rt_render_graph_builder_obj *obj = builder->obj; + rtReleaseArena(&obj->arena); + free(obj); + memset(builder, 0, sizeof(*builder)); +} diff --git a/src/renderer/common/common_render_graph.h b/src/renderer/common/common_render_graph.h new file mode 100644 index 0000000..b880a60 --- /dev/null +++ b/src/renderer/common/common_render_graph.h @@ -0,0 +1,90 @@ +#ifndef RT_RENDERER_COMMON_RENDER_GRAPH_H +#define RT_RENDERER_COMMON_RENDER_GRAPH_H + +#include "gfx/renderer_api.h" +#include "runtime/mem_arena.h" + +typedef struct { + const char *name; + rt_pixel_format format; + unsigned int width; + unsigned int height; + + unsigned int samples; + unsigned int layers; + +} rt_physical_render_target_info; + +typedef rt_render_target_handle +rt_rgb_create_render_target_fn(const rt_physical_render_target_info *rt_info); +typedef int rt_rgb_require_explicit_synchronization_fn(void); + +typedef struct { + rt_rgb_create_render_target_fn *CreateRenderTarget; + rt_rgb_require_explicit_synchronization_fn *RequireExplicitSynchronization; +} rt_render_graph_builder_platform_callbacks; + +typedef struct { + uint32_t flags; + + /* Used for cheap referencing */ + uint32_t id; + + /* Used for debug output */ + const char *name; + + /* Render targets */ + rt_render_target_handle *color_outputs; + rt_color *color_clear_values; + rt_pass_load_mode *color_loads; + rt_pass_write_mode *color_writes; + uint32_t color_output_count; + rt_render_target_handle depth_stencil; + rt_depth_stencil_value depth_stencil_clear_value; + rt_pass_load_mode depth_stencil_load; + rt_pass_write_mode depth_stencil_write; + rt_render_target_handle *sampled_inputs; + uint32_t sampled_input_count; + + /* Used for parallelisation on the CPU-side. All passes with execution level N can + * be recorded in parallel, after passes with level N-1 have finished. */ + uint32_t execution_level; + + /* GFX layer function for executing the pass */ + rt_execute_render_pass_fn *Execute; + void *user_data; + + /* These refer to the semaphores array */ + uint32_t first_wait; + uint32_t wait_count; + uint32_t first_signal; + uint32_t signal_count; +} rt_render_pass; + +struct rt_render_graph_s { + rt_render_target_handle *render_targets; + uint32_t render_target_count; + + rt_gpu_semaphore_handle *semaphores; + uint32_t semaphore_count; + + rt_render_pass *passes; + uint32_t pass_count; + + uint32_t backbuffer_index; +}; + +#ifdef __cplusplus +extern "C" { +#endif + +rt_render_graph_builder +rtCreateRenderGraphBuilder(const rt_render_graph_builder_platform_callbacks *platform_cbs); + +void rtDestroyRenderGraphBuilder(rt_render_graph_builder *builder); + +#ifdef __cplusplus +} +#endif + +#endif \ No newline at end of file diff --git a/src/renderer/dx11/command_buffers.cpp b/src/renderer/dx11/command_buffers.cpp index b63821a..3a573dc 100644 --- a/src/renderer/dx11/command_buffers.cpp +++ b/src/renderer/dx11/command_buffers.cpp @@ -46,10 +46,7 @@ void ShutdownCommandBufferManagement() { _buffers = nullptr; } -extern "C" rt_result -RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count, - const rt_alloc_command_buffer_info *, - rt_command_buffer_handle *p_command_buffers) { +rt_result rtAllocCommandBuffers(uint32_t count, rt_command_buffer_handle *p_handles) { for (uint32_t i = 0; i < count; ++i) { rtLockMutex(_lock); rt_command_buffer *slot = _first_free; @@ -61,7 +58,7 @@ RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count, rtLog("dx11", "Failed to allocate a command buffer slot."); rtLockMutex(_lock); for (uint32_t j = 0; j < i; ++j) { - rt_command_buffer *s = &_buffers[p_command_buffers[j].index]; + rt_command_buffer *s = &_buffers[p_handles[j].index]; s->next_free = _first_free; _first_free = s; } @@ -74,7 +71,7 @@ RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count, rtLog("dx11", "Failed to create a deferred context."); auto lock_guard = rtAutoLock(_lock); for (uint32_t j = 0; j < i; ++j) { - rt_command_buffer *s = &_buffers[p_command_buffers[j].index]; + rt_command_buffer *s = &_buffers[p_handles[j].index]; s->next_free = _first_free; _first_free = s; } @@ -91,23 +88,22 @@ RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count, slot->context->ClearState(); } - slot->version = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION; - const uint32_t index = (uint32_t)(slot - _buffers); - p_command_buffers[i].version = slot->version; - p_command_buffers[i].index = index; + slot->version = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION; + const uint32_t index = (uint32_t)(slot - _buffers); + p_handles[i].version = slot->version; + p_handles[i].index = index; } return RT_SUCCESS; } -extern "C" rt_result -RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue, const rt_submit_command_buffers_info *info) { +rt_result rtSubmitCommandBuffers(uint32_t count, const rt_command_buffer_handle *handles) { // TODO: Handle semaphores // Submit the command lists to the gpu - for (uint32_t i = 0; i < info->command_buffer_count; ++i) { - rt_command_buffer *cmdbuf = &_buffers[info->command_buffers[i].index]; - if (cmdbuf->version != info->command_buffers[i].version) { + for (uint32_t i = 0; i < count; ++i) { + rt_command_buffer *cmdbuf = &_buffers[handles[i].index]; + if (cmdbuf->version != handles[i].version) { rtLog("dx11", "Tried to submit an invalid command buffer (version mismatch)"); return RT_INVALID_VALUE; } @@ -137,4 +133,16 @@ rt_command_buffer *rtGetCommandBuffer(rt_command_buffer_handle handle) { if (handle.version != _buffers[handle.index].version) return nullptr; return &_buffers[handle.index]; +} + +extern "C" rt_result +RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count, + const rt_alloc_command_buffer_info *, + rt_command_buffer_handle *p_command_buffers) { + return rtAllocCommandBuffers(count, p_command_buffers); +} + +extern "C" rt_result +RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue, const rt_submit_command_buffers_info *info) { + return rtSubmitCommandBuffers(info->command_buffer_count, info->command_buffers); } \ No newline at end of file diff --git a/src/renderer/dx11/commands.cpp b/src/renderer/dx11/commands.cpp index 78c99d0..8f68772 100644 --- a/src/renderer/dx11/commands.cpp +++ b/src/renderer/dx11/commands.cpp @@ -2,6 +2,7 @@ #include #include "gfx/renderer_api.h" +#include "runtime/mem_arena.h" #include "device_objects.hpp" #include "gpu.hpp" @@ -85,3 +86,69 @@ RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdhandle return; RT_UNUSED(render_target); } + +extern "C" void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle cmdhandle, + rt_pipeline_handle pipeline_handle) { + rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle); + if (!RT_VERIFY(cmd)) + return; + rt_pipeline *pipeline = rtGetPipeline(pipeline_handle); + + if (pipeline->IsComputePipeline()) { + rtReportError("dx11", + "Attempted to bind a compute pipeline via CmdBindPipeline. Use " + "CmdBindComputePipeline instead."); + return; + } + + cmd->context->IASetInputLayout(pipeline->input_layout); + cmd->context->VSSetShader(pipeline->vertex_shader, nullptr, 0); + cmd->context->PSSetShader(pipeline->pixel_shader, nullptr, 0); +} + +extern "C" void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle cmdhandle, + uint32_t first_binding, + uint32_t count, + const rt_buffer_handle *buffers, + const uint64_t *_offsets) { + rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle); + if (!RT_VERIFY(cmd)) + return; + rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); + if (!temp.arena) + return; + + ID3D11Buffer **vbos = RT_ARENA_PUSH_ARRAY(temp.arena, ID3D11Buffer *, count); + UINT *offsets = nullptr; + + if (!vbos) + goto out; + + if (_offsets) { + offsets = RT_ARENA_PUSH_ARRAY(temp.arena, UINT, count); + if (!offsets) + goto out; + for (uint32_t i = 0; i < count; ++i) { + offsets[i] = static_cast(_offsets[i]); + } + } + + for (uint32_t i = 0; i < count; ++i) { + rt_buffer *buffer = rtGetBuffer(buffers[i]); + RT_ASSERT(buffer->type == RT_BUFFER_TYPE_VERTEX, "Buffer must be a vertex buffer"); + vbos[i] = buffer->buffer; + } + + cmd->context->IASetVertexBuffers(first_binding, count, vbos, nullptr, offsets); + +out: + rtReturnTemporaryArena(temp); +} + +extern "C" void +RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle cmdhandle, uint32_t first, uint32_t count) { + rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle); + if (!RT_VERIFY(cmd)) + return; + cmd->context->Draw(count, first); +} diff --git a/src/renderer/dx11/device_objects.hpp b/src/renderer/dx11/device_objects.hpp index b5c2ec0..5d1d527 100644 --- a/src/renderer/dx11/device_objects.hpp +++ b/src/renderer/dx11/device_objects.hpp @@ -73,6 +73,19 @@ struct rt_pipeline { } }; +struct rt_render_target_create_info { + rt_pixel_format format; + uint32_t width; + uint32_t height; + const char *name; +}; + +rt_render_target_handle rtCreateRenderTarget(const rt_render_target_create_info &info); +void rtDestroyRenderTarget(rt_render_target_handle handle); + +rt_result rtAllocCommandBuffers(uint32_t count, rt_command_buffer_handle *p_handles); +rt_result rtSubmitCommandBuffers(uint32_t count, const rt_command_buffer_handle *handles); + rt_render_target *rtGetRenderTarget(rt_render_target_handle handle); rt_command_buffer *rtGetCommandBuffer(rt_command_buffer_handle handle); rt_buffer *rtGetBuffer(rt_buffer_handle handle); diff --git a/src/renderer/dx11/helpers.cpp b/src/renderer/dx11/helpers.cpp index f27b6ae..8264d9c 100644 --- a/src/renderer/dx11/helpers.cpp +++ b/src/renderer/dx11/helpers.cpp @@ -26,6 +26,9 @@ DXGI_FORMAT rtConvertPixelFormat(rt_pixel_format format) { case RT_PIXEL_FORMAT_DEPTH32: return DXGI_FORMAT_D32_FLOAT; + case RT_PIXEL_FORMAT_SWAPCHAIN: + return DXGI_FORMAT_B8G8R8A8_UNORM; + default: return DXGI_FORMAT_UNKNOWN; } diff --git a/src/renderer/dx11/meson.build b/src/renderer/dx11/meson.build index 6f888d1..07488a3 100644 --- a/src/renderer/dx11/meson.build +++ b/src/renderer/dx11/meson.build @@ -5,14 +5,20 @@ if get_option('build_dx11') # Project Sources 'device_objects.hpp', 'gpu.hpp', + + '../common/common_render_graph.h', 'buffers.cpp', 'commands.cpp', 'command_buffers.cpp', 'helpers.cpp', 'init.cpp', + 'pipelines.cpp', + 'render_graph.cpp', 'render_targets.cpp', + '../common/common_render_graph.c', + dependencies : [m_dep, windowing_dep, dx11_dep], include_directories : [engine_incdir, contrib_incdir], link_with : [runtime_lib], diff --git a/src/renderer/dx11/render_graph.cpp b/src/renderer/dx11/render_graph.cpp new file mode 100644 index 0000000..8b19151 --- /dev/null +++ b/src/renderer/dx11/render_graph.cpp @@ -0,0 +1,149 @@ +#include "gfx/renderer_api.h" +#include "renderer/common/common_render_graph.h" + +#include "device_objects.hpp" +#include "gpu.hpp" + +static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *rtinfo) { + return rtCreateRenderTarget({.format = rtinfo->format, + .width = rtinfo->width, + .height = rtinfo->height, + .name = rtinfo->name}); +} + +static int RequireExplicitSynchronization() { + return 0; +} + +extern "C" rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) { + rt_render_graph_builder_platform_callbacks cbs{}; + cbs.CreateRenderTarget = CreateRenderTarget; + cbs.RequireExplicitSynchronization = RequireExplicitSynchronization; + return rtCreateRenderGraphBuilder(&cbs); +} + +extern "C" void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) { + rtDestroyRenderGraphBuilder(builder); +} + +static rt_result ExecutePass(rt_render_pass *pass, rt_command_buffer_handle cmdbuf_handle) { + rt_command_buffer *cmd = rtGetCommandBuffer(cmdbuf_handle); + if (!RT_VERIFY(cmd)) + return RT_INVALID_VALUE; + + if (cmd->annotation) { + WCHAR wname[128]; + if (rtUTF8ToWStr(pass->name, wname, sizeof(wname)) == RT_SUCCESS) + cmd->annotation->BeginEvent(wname); + } + + // Setup rtvs + ID3D11RenderTargetView *rtvs[4]; + ID3D11DepthStencilView *dsv = nullptr; + + for (uint32_t i = 0; i < pass->color_output_count; ++i) { + rt_render_target *rt = rtGetRenderTarget(pass->color_outputs[i]); + if (!RT_VERIFY(rt)) + return RT_INVALID_VALUE; + RT_ASSERT(rt->IsColorRenderTarget(), "Needs to provide a valid color render target"); + rtvs[i] = rt->rtv; + + if (pass->color_loads[i] == RT_PASS_LOAD_MODE_CLEAR) { + FLOAT color[4] = { + pass->color_clear_values[i].r, + pass->color_clear_values[i].g, + pass->color_clear_values[i].b, + pass->color_clear_values[i].a, + }; + cmd->context->ClearRenderTargetView(rt->rtv, color); + } + } + + rt_render_target *dsvrt = rtGetRenderTarget(pass->depth_stencil); + if (dsvrt) { + RT_ASSERT(dsvrt->IsDepthStencilTarget(), + "Need to provide a valid depth stencil render target"); + dsv = dsvrt->dsv; + + if (pass->depth_stencil_load == RT_PASS_LOAD_MODE_CLEAR) + cmd->context->ClearDepthStencilView( + dsv, + (dsvrt->HasStencilComponent()) ? D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL + : D3D11_CLEAR_DEPTH, + pass->depth_stencil_clear_value.depth, + static_cast(pass->depth_stencil_clear_value.stencil)); + } + + cmd->context->OMSetRenderTargets(static_cast(pass->color_output_count), rtvs, dsv); + + rt_result res = RT_VERIFY(pass->Execute)(cmdbuf_handle, nullptr, 0, pass->user_data); + + if (cmd->annotation) { + cmd->annotation->EndEvent(); + } + + return res; +} + +static bool IsCopyResourcePossible(const rt_render_target *backbuffer) { + DXGI_SWAP_CHAIN_DESC scd; + g_gpu.swap_chain.swap_chain->GetDesc(&scd); + + D3D11_TEXTURE2D_DESC td; + backbuffer->texture->GetDesc(&td); + + // This is more strict than necessary, because the formats could also be from the same group + return scd.BufferDesc.Width == td.Width && scd.BufferDesc.Height == td.Height && + scd.SampleDesc.Count == td.SampleDesc.Count && scd.BufferDesc.Format == td.Format; +} + +extern "C" rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph) { + rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); + if (!temp.arena) + return RT_OUT_OF_MEMORY; + + // Alloc a command buffer for every pass + rt_command_buffer_handle *cmdbufs = + RT_ARENA_PUSH_ARRAY(temp.arena, rt_command_buffer_handle, render_graph->pass_count); + rt_result res = rtAllocCommandBuffers(render_graph->pass_count, cmdbufs); + if (res != RT_SUCCESS) { + rtReturnTemporaryArena(temp); + return res; + } + + for (uint32_t i = 0; i < render_graph->pass_count; ++i) { + rt_render_pass *pass = &render_graph->passes[i]; + + res = ExecutePass(pass, cmdbufs[i]); + if (res != RT_SUCCESS) + break; + } + + if (res == RT_SUCCESS) { + res = rtSubmitCommandBuffers(render_graph->pass_count, cmdbufs); + } + + // Copy backbuffer to swapchain + rt_render_target *backbuffer = + rtGetRenderTarget(render_graph->render_targets[render_graph->backbuffer_index]); + if (!backbuffer) { + rtReturnTemporaryArena(temp); + return RT_INVALID_VALUE; + } + + ID3D11Texture2D *frame_buffer; + if (FAILED(g_gpu.swap_chain.swap_chain->GetBuffer(0, IID_PPV_ARGS(&frame_buffer)))) { + rtReportError("dx11", "Failed to retrieve the backbuffer."); + rtReturnTemporaryArena(temp); + return RT_UNKNOWN_ERROR; + } + + if (IsCopyResourcePossible(backbuffer)) { + g_gpu.device_context->CopyResource(frame_buffer, backbuffer->texture); + } else { + RT_NOT_IMPLEMENTED; + } + + rtReturnTemporaryArena(temp); + return res; +} \ No newline at end of file diff --git a/src/renderer/dx11/render_targets.cpp b/src/renderer/dx11/render_targets.cpp index 24c6bad..5ca9678 100644 --- a/src/renderer/dx11/render_targets.cpp +++ b/src/renderer/dx11/render_targets.cpp @@ -55,8 +55,7 @@ void ShutdownRenderTargetManagement() { rtDestroyMutex(_lock); } -extern "C" rt_render_target_handle -RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) { +rt_render_target_handle rtCreateRenderTarget(const rt_render_target_create_info &info) { rt_render_target *slot = nullptr; { auto lock_guard = rtAutoLock(_lock); @@ -71,23 +70,35 @@ RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) { return RT_INVALID_HANDLE; } - slot->format = info->format; + slot->format = info.format; - if (!rtIsDepthFormat(info->format)) { + uint32_t swapchain_width = 0, swapchain_height = 0; + if (info.width == RT_RENDER_TARGET_SIZE_SWAPCHAIN || + info.height == RT_RENDER_TARGET_SIZE_SWAPCHAIN) { + + DXGI_SWAP_CHAIN_DESC desc; + g_gpu.swap_chain.swap_chain->GetDesc(&desc); + swapchain_width = desc.BufferDesc.Width; + swapchain_height = desc.BufferDesc.Height; + } + + if (!rtIsDepthFormat(info.format)) { D3D11_TEXTURE2D_DESC tex_desc = {}; - tex_desc.Width = info->width; - tex_desc.Height = info->height; - tex_desc.MipLevels = 1; - tex_desc.ArraySize = 1; - tex_desc.Format = rtConvertPixelFormat(info->format); - tex_desc.SampleDesc.Count = 1; - tex_desc.SampleDesc.Quality = 0; - tex_desc.Usage = D3D11_USAGE_DEFAULT; // read and write - tex_desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; - tex_desc.CPUAccessFlags = 0; // none - tex_desc.MiscFlags = 0; + tex_desc.Width = + (info.width != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.width : swapchain_width; + tex_desc.Height = + (info.height != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.height : swapchain_height; + tex_desc.MipLevels = 1; + tex_desc.ArraySize = 1; + tex_desc.Format = rtConvertPixelFormat(info.format); + tex_desc.SampleDesc.Count = 1; + tex_desc.SampleDesc.Quality = 0; + tex_desc.Usage = D3D11_USAGE_DEFAULT; // read and write + tex_desc.BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE; + tex_desc.CPUAccessFlags = 0; // none + tex_desc.MiscFlags = 0; if (FAILED(g_gpu.device->CreateTexture2D(&tex_desc, nullptr, &slot->texture))) { - rtLog("dx11", "Failed to create backing texture for render target %s", info->name); + rtLog("dx11", "Failed to create backing texture for render target %s", info.name); auto lg = rtAutoLock(_lock); slot->next_free = _first_free; _first_free = slot; @@ -95,14 +106,14 @@ RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) { } D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = {}; - rtv_desc.Format = rtConvertPixelFormat(info->format); + rtv_desc.Format = rtConvertPixelFormat(info.format); rtv_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D; rtv_desc.Texture2D.MipSlice = 0; if (FAILED(g_gpu.device->CreateRenderTargetView(slot->texture, &rtv_desc, &slot->rtv))) { slot->texture->Release(); rtLog("dx11", "Failed to create the render target view for render target %s", - info->name); + info.name); auto lg = rtAutoLock(_lock); slot->next_free = _first_free; _first_free = slot; @@ -114,19 +125,21 @@ RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) { return {.version = slot->version, .index = index}; } else { D3D11_TEXTURE2D_DESC tex_desc = {}; - tex_desc.Width = info->width; - tex_desc.Height = info->height; - tex_desc.MipLevels = 1; - tex_desc.ArraySize = 1; - tex_desc.Format = rtConvertPixelFormat(info->format); - tex_desc.SampleDesc.Count = 1; - tex_desc.SampleDesc.Quality = 0; - tex_desc.Usage = D3D11_USAGE_DEFAULT; // read and write - tex_desc.BindFlags = D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE; - tex_desc.CPUAccessFlags = 0; // none - tex_desc.MiscFlags = 0; + tex_desc.Width = + (info.width != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.width : swapchain_width; + tex_desc.Height = + (info.height != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.height : swapchain_height; + tex_desc.MipLevels = 1; + tex_desc.ArraySize = 1; + tex_desc.Format = rtConvertPixelFormat(info.format); + tex_desc.SampleDesc.Count = 1; + tex_desc.SampleDesc.Quality = 0; + tex_desc.Usage = D3D11_USAGE_DEFAULT; // read and write + tex_desc.BindFlags = D3D11_BIND_DEPTH_STENCIL | D3D11_BIND_SHADER_RESOURCE; + tex_desc.CPUAccessFlags = 0; // none + tex_desc.MiscFlags = 0; if (FAILED(g_gpu.device->CreateTexture2D(&tex_desc, nullptr, &slot->texture))) { - rtLog("dx11", "Failed to create backing texture for render target %s", info->name); + rtLog("dx11", "Failed to create backing texture for render target %s", info.name); auto lg = rtAutoLock(_lock); slot->next_free = _first_free; _first_free = slot; @@ -134,7 +147,7 @@ RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) { } D3D11_DEPTH_STENCIL_VIEW_DESC dsv_desc = {}; - dsv_desc.Format = rtConvertPixelFormat(info->format); + dsv_desc.Format = rtConvertPixelFormat(info.format); dsv_desc.Flags = 0; dsv_desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D; dsv_desc.Texture2D.MipSlice = 0; @@ -142,7 +155,7 @@ RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) { slot->texture->Release(); rtLog("dx11", "Failed to create the depth stencil view for render target %s", - info->name); + info.name); auto lg = rtAutoLock(_lock); slot->next_free = _first_free; _first_free = slot; @@ -155,11 +168,7 @@ RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) { } } -extern "C" rt_render_target_handle RT_RENDERER_API_FN(GetSwapchainRenderTarget)(void) { - return {1, 1}; -} - -extern "C" void RT_RENDERER_API_FN(DestroyRenderTarget)(rt_render_target_handle handle) { +void rtDestroyRenderTarget(rt_render_target_handle handle) { RT_UNUSED(handle); } diff --git a/src/renderer/null/meson.build b/src/renderer/null/meson.build index 1e06f75..0c701da 100644 --- a/src/renderer/null/meson.build +++ b/src/renderer/null/meson.build @@ -1,6 +1,6 @@ null_renderer_lib = library('rtnull', 'null.c', - # Project Sources + '../common/common_render_graph.c', include_directories : engine_incdir, link_with : runtime_lib, install : true) diff --git a/src/renderer/null/null.c b/src/renderer/null/null.c index c4cb024..20e3747 100644 --- a/src/renderer/null/null.c +++ b/src/renderer/null/null.c @@ -4,6 +4,8 @@ #include "gfx/renderer_api.h" #include "runtime/runtime.h" +#include "../common/common_render_graph.h" + #define RETURN_HANDLE_STUB2(type, initial) \ static unsigned int s_next = (initial); \ return (type) { .index = (s_next++) % RT_RENDER_BACKEND_HANDLE_MAX_INDEX, .version = 1 } @@ -51,19 +53,6 @@ void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle handle) { RT_UNUSED(handle); } -rt_render_target_handle RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) { - RT_UNUSED(info); - RETURN_HANDLE_STUB2(rt_render_target_handle, 2); -} - -rt_render_target_handle RT_RENDERER_API_FN(GetSwapchainRenderTarget)(void) { - return (rt_render_target_handle){.index = 1, .version = 1}; -} - -void RT_RENDERER_API_FN(DestroyRenderTarget)(rt_render_target_handle handle) { - RT_UNUSED(handle); -} - rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count, const rt_alloc_command_buffer_info *info, rt_command_buffer_handle *p_command_buffers) { @@ -79,34 +68,6 @@ rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue, return RT_SUCCESS; } -rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t count, - const rt_gpu_semaphore_info *info, - rt_gpu_semaphore_handle *p_semaphores) { - RT_UNUSED(info); - RETURN_HANDLE_ARRAY_STUB2(p_semaphores, count, 3) - return RT_SUCCESS; -} - -void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *semaphores) { - RT_UNUSED(count); - RT_UNUSED(semaphores); -} - -/* NOTE(Kevin): It might become necessary to actually track the value, to correctly simulate gpu - * behaviour */ -uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle sem) { - RT_UNUSED(sem); - return 0; -} - -rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void) { - return (rt_gpu_semaphore_handle){.index = 1, .version = 1}; -} - -rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void) { - return (rt_gpu_semaphore_handle){.index = 2, .version = 1}; -} - rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count, const rt_buffer_info *info, rt_buffer_handle *p_buffers) { @@ -144,13 +105,41 @@ void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdb RT_UNUSED(render_target); } +static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *info) { + RETURN_HANDLE_STUB(rt_render_target_handle); +} + +static int RequireExplicitSync(void) { + return 0; +} + rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) { - rt_render_graph_builder b = { - .obj = NULL, - }; - return b; + rt_render_graph_builder_platform_callbacks cbs = {.CreateRenderTarget = CreateRenderTarget, + .RequireExplicitSynchronization = + RequireExplicitSync}; + return rtCreateRenderGraphBuilder(&cbs); } void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) { - RT_UNUSED(builder); -} \ No newline at end of file + rtDestroyRenderGraphBuilder(builder); +} + +rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph) { + RT_UNUSED(render_graph); + return RT_SUCCESS; +} + +void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle cmdhandle, + rt_pipeline_handle pipeline_handle) { +} + +void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle cmdhandle, + uint32_t first_binding, + uint32_t count, + const rt_buffer_handle *buffers, + const uint64_t *_offsets) { +} +void RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle cmdhandle, + uint32_t first, + uint32_t count) { +} diff --git a/src/renderer/vk/commands.c b/src/renderer/vk/commands.c index 9368688..c426dc2 100644 --- a/src/renderer/vk/commands.c +++ b/src/renderer/vk/commands.c @@ -448,7 +448,7 @@ void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdb rt_render_target_state new_state) { GET_CMDBUF(cmdbuf, cmdbuf_handle) uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight; - if (render_target.index == g_renderer.GetSwapchainRenderTarget().index) { + if (render_target.index == rtGetSwapchainRenderTarget().index) { image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index; } @@ -468,7 +468,7 @@ void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdb rt_render_target_handle render_target) { GET_CMDBUF(cmdbuf, cmdbuf_handle) uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight; - if (render_target.index == g_renderer.GetSwapchainRenderTarget().index) { + if (render_target.index == rtGetSwapchainRenderTarget().index) { image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index; } rt_render_target *rt = rtGetRenderTarget(render_target); diff --git a/src/renderer/vk/frame.c b/src/renderer/vk/frame.c index 94e664b..f0cf2a9 100644 --- a/src/renderer/vk/frame.c +++ b/src/renderer/vk/frame.c @@ -44,7 +44,7 @@ void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) { } /* Update the swapchain render target */ - rt_render_target_handle swap_rt_handle = g_renderer.GetSwapchainRenderTarget(); + rt_render_target_handle swap_rt_handle = rtGetSwapchainRenderTarget(); rt_render_target *swap_rt = rtGetRenderTarget(swap_rt_handle); swap_rt->states[frame->swapchain_image_index] = RT_RENDER_TARGET_STATE_INVALID; } diff --git a/src/renderer/vk/meson.build b/src/renderer/vk/meson.build index d77e5df..baff687 100644 --- a/src/renderer/vk/meson.build +++ b/src/renderer/vk/meson.build @@ -17,6 +17,8 @@ if vk_dep.found() 'swapchain.h', 'transfers.h', + '../common/common_render_graph.h', + 'buffers.c', 'command_buffers.c', 'commands.c', @@ -32,6 +34,8 @@ if vk_dep.found() 'simple_sync_impl.cpp', + '../common/common_render_graph.c', + # Contrib Sources '../../../contrib/volk/volk.h', '../../../contrib/volk/volk.c', diff --git a/src/renderer/vk/render_graph.c b/src/renderer/vk/render_graph.c index 9451b0e..7921f35 100644 --- a/src/renderer/vk/render_graph.c +++ b/src/renderer/vk/render_graph.c @@ -3,502 +3,25 @@ #include "gfx/renderer_api.h" #include "runtime/mem_arena.h" -#include -#include -#include +#include "../common/common_render_graph.h" +#include "render_targets.h" -/* **************************************************************************** - * - * BUILDER CODE - * - * ****************************************************************************/ - -#define MAX_COLOR_ATTACHMENTS_PER_PASS 8 -#define MAX_SAMPLED_INPUTS_PER_PASS 8 - -typedef struct { - const char *name; - rt_pixel_format format; - unsigned int width; - unsigned int height; - - unsigned int samples; - unsigned int layers; - - uint32_t first_usage; - uint32_t last_usage; -} rt_render_target_build_info; - -typedef struct { - const char *name; - uint32_t flags; - - uint32_t color_attachments[MAX_COLOR_ATTACHMENTS_PER_PASS]; - uint32_t color_attachment_count; - - uint32_t sampled_inputs[MAX_SAMPLED_INPUTS_PER_PASS]; - uint32_t sampled_input_count; - - uint32_t depth_stencil_attachment; - - uint32_t *dependencies; - uint32_t dependency_count; -} rt_pass_build_info; - -typedef struct { - rt_arena arena; - - rt_render_target_build_info *render_targets; - uint32_t render_target_count; - uint32_t render_target_capacity; - - rt_pass_build_info *passes; - uint32_t pass_count; - uint32_t pass_capacity; - - uint32_t backbuffer; -} rt_render_graph_builder_obj; - -static void AddRenderTarget(void *_obj, const rt_attachment_info *info) { - rt_render_graph_builder_obj *obj = _obj; - - if (obj->render_target_count == obj->render_target_capacity) { - uint32_t new_cap = obj->render_target_capacity ? 2 * obj->render_target_capacity : 32; - rt_render_target_build_info *tmp = - RT_ARENA_PUSH_ARRAY(&obj->arena, rt_render_target_build_info, new_cap); - if (obj->render_target_capacity) - memcpy(tmp, - obj->render_targets, - sizeof(rt_render_target_build_info) * obj->render_target_capacity); - obj->render_targets = tmp; - obj->render_target_capacity = new_cap; - } - - char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1); - strcpy(name, info->name); - obj->render_targets[obj->render_target_count].name = name; - obj->render_targets[obj->render_target_count].format = info->format; - obj->render_targets[obj->render_target_count].width = info->width; - obj->render_targets[obj->render_target_count].height = info->height; - obj->render_targets[obj->render_target_count].samples = info->samples; - obj->render_targets[obj->render_target_count].layers = info->layers; - obj->render_targets[obj->render_target_count].first_usage = 0; - obj->render_targets[obj->render_target_count].last_usage = 0; - ++obj->render_target_count; -} - -static void SetBackbuffer(void *_obj, const char *rt_name) { - rt_render_graph_builder_obj *obj = _obj; - for (uint32_t i = 0; i < obj->render_target_count; ++i) { - if (strcmp(obj->render_targets[i].name, rt_name) == 0) { - obj->backbuffer = i; - return; - } - } - rtLog("vk", "Tried to set backbuffer to unknown render target %s", rt_name); -} - -static void AddRenderPass(void *_obj, const rt_pass_info *info) { - rt_render_graph_builder_obj *obj = _obj; - - if (obj->pass_count == obj->pass_capacity) { - uint32_t new_cap = obj->pass_capacity ? 2 * obj->pass_capacity : 32; - rt_pass_build_info *tmp = RT_ARENA_PUSH_ARRAY(&obj->arena, rt_pass_build_info, new_cap); - if (obj->pass_capacity) - memcpy(tmp, obj->passes, sizeof(rt_pass_build_info) * obj->pass_capacity); - obj->passes = tmp; - obj->pass_capacity = new_cap; - } - - char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1); - strcpy(name, info->name); - obj->passes[obj->pass_count].name = name; - obj->passes[obj->pass_count].flags = info->flags; - obj->passes[obj->pass_count].color_attachment_count = 0; - obj->passes[obj->pass_count].sampled_input_count = 0; - obj->passes[obj->pass_count].depth_stencil_attachment = UINT_MAX; - obj->passes[obj->pass_count].dependencies = NULL; - obj->passes[obj->pass_count].dependency_count = 0; - - ++obj->pass_count; -} - -static void AddColorOutput(void *_obj, const char *pass_name, const char *rt_name) { - uint32_t rt_index = UINT_MAX; - - rt_render_graph_builder_obj *obj = _obj; - for (uint32_t i = 0; i < obj->render_target_count; ++i) { - if (strcmp(obj->render_targets[i].name, rt_name) == 0) { - rt_index = i; - break; - } - } - if (rt_index == UINT_MAX) { - rtLog("vk", - "Tried to add unknown render target %s as color output to %s", - rt_name, - pass_name); - return; - } - - for (uint32_t i = 0; i < obj->pass_count; ++i) { - if (strcmp(obj->passes[i].name, pass_name) == 0) { - if (obj->passes[i].color_attachment_count == MAX_COLOR_ATTACHMENTS_PER_PASS) { - rtLog("vk", "Too many color attachments in pass %s", pass_name); - } - obj->passes[i].color_attachments[obj->passes[i].color_attachment_count++] = rt_index; - return; - } - } - rtLog("vk", - "Tried to add render target %s as color output to unknown render target %s", - rt_name, - pass_name); -} - -static void AddSampledInput(void *_obj, const char *pass_name, const char *rt_name) { - uint32_t rt_index = UINT_MAX; - - rt_render_graph_builder_obj *obj = _obj; - for (uint32_t i = 0; i < obj->render_target_count; ++i) { - if (strcmp(obj->render_targets[i].name, rt_name) == 0) { - rt_index = i; - break; - } - } - if (rt_index == UINT_MAX) { - rtLog("vk", - "Tried to add unknown render target %s as color output to %s", - rt_name, - pass_name); - return; - } - - for (uint32_t i = 0; i < obj->pass_count; ++i) { - if (strcmp(obj->passes[i].name, pass_name) == 0) { - if (obj->passes[i].sampled_input_count == MAX_SAMPLED_INPUTS_PER_PASS) { - rtLog("vk", "Too many sampled inputs in pass %s", pass_name); - } - obj->passes[i].sampled_inputs[obj->passes[i].sampled_input_count++] = rt_index; - return; - } - } - rtLog("vk", - "Tried to add render target %s as sampled input to unknown render target %s", - rt_name, - pass_name); -} - -static void SetDepthStencilAttachment(void *_obj, const char *pass_name, const char *rt_name) { - uint32_t rt_index = UINT_MAX; - - rt_render_graph_builder_obj *obj = _obj; - for (uint32_t i = 0; i < obj->render_target_count; ++i) { - if (strcmp(obj->render_targets[i].name, rt_name) == 0) { - rt_index = i; - break; - } - } - if (rt_index == UINT_MAX) { - rtLog("vk", - "Tried to add unknown render target %s as depth stencil attachment to %s", - rt_name, - pass_name); - return; - } - - for (uint32_t i = 0; i < obj->pass_count; ++i) { - if (strcmp(obj->passes[i].name, pass_name) == 0) { - obj->passes[i].depth_stencil_attachment = rt_index; - return; - } - } - rtLog("vk", - "Tried to add render target %s as depth stencil attachment to unknown render target %s", - rt_name, - pass_name); -} - -typedef struct { - uint32_t added; - uint32_t moved; -} rt_find_writers_result; - -static rt_find_writers_result FindWriters(rt_render_graph_builder_obj *obj, - uint32_t rt_index, - uint32_t append_at, - uint32_t *p_passes) { - rt_find_writers_result res = {0, 0}; - for (uint32_t i = 0; i < obj->pass_count; ++i) { - bool writes_rt = false; - if (obj->passes[i].depth_stencil_attachment == rt_index) { - writes_rt = true; - } else { - for (uint32_t j = 0; j < obj->passes[i].color_attachment_count; ++j) { - if (obj->passes[i].color_attachments[j] == rt_index) { - writes_rt = true; - } - } - } - - if (!writes_rt) - continue; - - uint32_t lower_index = UINT32_MAX; - for (uint32_t j = 0; j < append_at; ++j) { - if (p_passes[j] == i) { - lower_index = j; - break; - } - } - - if (lower_index == UINT32_MAX) { - p_passes[append_at++] = i; - res.added++; - } else { - memmove(&p_passes[lower_index], - &p_passes[lower_index + 1], - (append_at - lower_index - 1) * sizeof(uint32_t)); - p_passes[append_at - 1] = i; - res.moved++; - } - } - return res; -} - -static uint32_t CreateNaiveOrderRec(rt_render_graph_builder_obj *obj, - uint32_t search_rt, - uint32_t append_at, - uint32_t *p_order) { - rt_find_writers_result writers = FindWriters(obj, search_rt, append_at, p_order); - uint32_t new_append = append_at + writers.added; - for (uint32_t i = 0; i < writers.moved; ++i) { - uint32_t pass_idx = p_order[append_at - writers.moved + i]; - const rt_pass_build_info *pass = &obj->passes[pass_idx]; - for (uint32_t j = 0; j < pass->sampled_input_count; ++j) { - new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order); - } - } - for (uint32_t i = 0; i < writers.added; ++i) { - uint32_t pass_idx = p_order[append_at + i]; - const rt_pass_build_info *pass = &obj->passes[pass_idx]; - for (uint32_t j = 0; j < pass->sampled_input_count; ++j) { - new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order); - } - } - return new_append; -} - -static rt_result -CreateNaiveOrder(rt_render_graph_builder_obj *obj, uint32_t **p_order, uint32_t *p_count) { - uint32_t *order = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, obj->pass_count); - if (!order) - return RT_OUT_OF_MEMORY; - uint32_t count = CreateNaiveOrderRec(obj, obj->backbuffer, 0, order); - - /* Now the pass writing the backbuffer is first, we need to revert the order */ - for (uint32_t i = 0; i < count / 2; ++i) { - uint32_t t = order[i]; - order[i] = order[count - i - 1]; - order[count - i - 1] = t; - } - *p_order = order; - *p_count = count; - return RT_SUCCESS; -} - -static uint32_t * -ReorderPasses(rt_render_graph_builder_obj *obj, uint32_t pass_count, uint32_t *naive_order) { - /* Our goal is to calculate a schedule that: - * A) Does not break the dependency chain - * B) Has the maximum amount of overlap, i.e. keeps the GPU busy. - * This means that if pass A depends on pass B, we want to have as much passes inbetween as - * possible, to reduce the likelyhood of stalls caused by B waiting for A to finish. */ - uint32_t *schedule = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, pass_count); - if (!schedule) - return NULL; - uint32_t scheduled_count = 0; - - while (scheduled_count < pass_count) { - /* The number of passes remaining in naive_order */ - uint32_t unscheduled_count = pass_count - scheduled_count; - - /* It is always valid to use the front */ - uint32_t selected_idx = 0; - uint32_t selected_score = 0; - for (uint32_t i = 0; i < unscheduled_count; ++i) { - /* Check if any dependency is not scheduled yet */ - uint32_t pass_idx = naive_order[i]; - const rt_pass_build_info *pass = &obj->passes[pass_idx]; - uint32_t score = 0; - bool is_valid = true; - - if (pass->dependency_count) { - for (uint32_t j = 0; j < unscheduled_count; ++j) { - uint32_t pass2_idx = naive_order[j]; - for (uint32_t k = 0; k < pass->dependency_count; ++k) { - if (pass->dependencies[k] == pass2_idx) { - is_valid = false; - break; - } - } - if (!is_valid) - break; - } - if (!is_valid) - continue; - - for (uint32_t j = 0; j < pass->dependency_count; ++j) { - for (uint32_t k = 0; k < scheduled_count; ++k) { - if (schedule[k] == pass->dependencies[j]) { - score += scheduled_count - k; - break; - } - } - } - - } else { - score = UINT32_MAX; - } - - if (score > selected_score) { - selected_score = score; - selected_idx = i; - } - } - - schedule[scheduled_count++] = naive_order[selected_idx]; - memmove(&naive_order[selected_idx], - &naive_order[selected_idx + 1], - (unscheduled_count - selected_idx - 1) * sizeof(uint32_t)); - } - return schedule; -} - -static rt_result DeterminePassDependencies(rt_render_graph_builder_obj *obj) { - /* NOTE(Kevin): This has a lot of overlap with FindWriters, so maybe its possible to combine - * the two */ - for (uint32_t pass_idx = 0; pass_idx < obj->pass_count; ++pass_idx) { - rt_pass_build_info *pass = &obj->passes[pass_idx]; - uint32_t dependency_capacity = pass->sampled_input_count; - if (dependency_capacity) { - pass->dependencies = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity); - if (!pass->dependencies) - return RT_OUT_OF_MEMORY; - } - for (uint32_t input_idx = 0; input_idx < pass->sampled_input_count; ++input_idx) { - uint32_t rt_index = pass->sampled_inputs[input_idx]; - for (uint32_t candidate_idx = 0; candidate_idx < obj->pass_count; ++candidate_idx) { - const rt_pass_build_info *candidate = &obj->passes[candidate_idx]; - bool is_dependency = false; - if (candidate->depth_stencil_attachment == rt_index) - is_dependency = true; - for (uint32_t j = 0; j < candidate->color_attachment_count; ++j) { - if (candidate->color_attachments[j] == rt_index) - is_dependency = true; - } - - if (!is_dependency) - continue; - - if (pass->dependency_count == dependency_capacity) { - /* The dependencies are still on top of the arena, so we can just grow that - * array */ - if (!RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity)) - return RT_OUT_OF_MEMORY; - dependency_capacity *= 2; - } - pass->dependencies[pass->dependency_count++] = candidate_idx; - } - } - } - return RT_SUCCESS; -} - -static void DetermineRenderTargetUsage(rt_render_graph_builder_obj *obj, - uint32_t pass_count, - const uint32_t *schedule) { - for (uint32_t rt_idx = 0; rt_idx < obj->render_target_count; ++rt_idx) { - rt_render_target_build_info *rt = &obj->render_targets[rt_idx]; - rt->first_usage = UINT32_MAX; - rt->last_usage = 0; - for (uint32_t sched_idx = 0; sched_idx < pass_count; ++sched_idx) { - uint32_t pass_idx = schedule[sched_idx]; - const rt_pass_build_info *pass = &obj->passes[pass_idx]; - bool usage = pass->depth_stencil_attachment == rt_idx; - if (!usage) { - for (unsigned int i = 0; i < pass->color_attachment_count; ++i) { - if (pass->color_attachments[i] == rt_idx) - usage = true; - } - } - if (!usage) { - for (unsigned int i = 0; i < pass->sampled_input_count; ++i) { - if (pass->sampled_inputs[i] == rt_idx) - usage = true; - } - } - if (usage) { - if (sched_idx < rt->first_usage) - rt->first_usage = sched_idx; - if (sched_idx > rt->last_usage) - rt->last_usage = sched_idx; - } - } - } -} - -static rt_result Build(void *_obj) { - rt_render_graph_builder_obj *obj = _obj; - uint32_t *naive_order; - uint32_t pass_count; - rt_result res = CreateNaiveOrder(obj, &naive_order, &pass_count); - if (res != RT_SUCCESS) - return res; - - res = DeterminePassDependencies(obj); - if (res != RT_SUCCESS) - return res; - - uint32_t *optimized_order = ReorderPasses(obj, pass_count, naive_order); - if (!optimized_order) - return RT_OUT_OF_MEMORY; - - /* Next steps: - * Determine first & last usage for every render-target - * For every pair of render-targets, note if they could be merged: - - Identical format - - Non-overlapping usage */ - DetermineRenderTargetUsage(obj, pass_count, optimized_order); - - - return RT_SUCCESS; +static int RequireExplicitSynchronization(void) { + return 1; } rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) { - // TODO: This is totally temporary. We do NOT want malloc anywhere in non-init functions - rt_render_graph_builder_obj *obj = malloc(sizeof(*obj)); - RT_ASSERT(obj, "Failed to allocate the builder object."); - memset(obj, 0, sizeof(*obj)); - rt_create_arena_result arena_res = rtCreateArena(NULL, RT_MB(16)); - RT_ASSERT(arena_res.ok, ""); - obj->arena = arena_res.arena; - - return (rt_render_graph_builder){ - .obj = obj, - .AddRenderTarget = AddRenderTarget, - .SetBackbuffer = SetBackbuffer, - .AddRenderPass = AddRenderPass, - .AddColorOutput = AddColorOutput, - .AddSampledInput = AddSampledInput, - .SetDepthStencilAttachment = SetDepthStencilAttachment, - .Build = Build, - }; + rt_render_graph_builder_platform_callbacks cbs = {.CreateRenderTarget = rtCreateRenderTarget, + .RequireExplicitSynchronization = + RequireExplicitSynchronization}; + return rtCreateRenderGraphBuilder(&cbs); } void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) { - rt_render_graph_builder_obj *obj = builder->obj; - rtReleaseArena(&obj->arena); - free(obj); - memset(builder, 0, sizeof(*builder)); + rtDestroyRenderGraphBuilder(builder); } + +rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph) { + RT_NOT_IMPLEMENTED; + return RT_UNKNOWN_ERROR; +} \ No newline at end of file diff --git a/src/renderer/vk/render_targets.c b/src/renderer/vk/render_targets.c index ec57e36..6c7e1e9 100644 --- a/src/renderer/vk/render_targets.c +++ b/src/renderer/vk/render_targets.c @@ -203,7 +203,7 @@ void ShutdownRenderTargetManagement(void) { _first_free = NULL; } -rt_render_target_handle RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) { +rt_render_target_handle rtCreateRenderTarget(const rt_physical_render_target_info *info) { rt_render_target_handle handle = {0}; rtLockWrite(&_lock); @@ -222,7 +222,7 @@ rt_render_target_handle RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_t */ rtUnlockWrite(&_lock); - const char *name = rtResolveConstRelptr(&info->name); + const char *name = info->name; slot->render_target.match_swapchain = 0; slot->render_target.image_count = g_swapchain.image_count; @@ -258,7 +258,7 @@ rt_render_target_handle RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_t VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT; slot->render_target.aspect = VK_IMAGE_ASPECT_COLOR_BIT; } - slot->render_target.sample_count = rtSampleCountToFlags(info->sample_count); + slot->render_target.sample_count = rtSampleCountToFlags(info->samples); if (!CreateImageAndView(slot->render_target.extent, slot->render_target.format, slot->render_target.sample_count, @@ -282,11 +282,7 @@ out: return handle; } -rt_render_target_handle RT_RENDERER_API_FN(GetSwapchainRenderTarget)(void) { - return _swapchain_handle; -} - -void RT_RENDERER_API_FN(DestroyRenderTarget)(rt_render_target_handle handle) { +void rtDestroyRenderTarget(rt_render_target_handle handle) { if (handle.index >= (uint32_t)r_VkMaxRenderTargetCount.i) return; rtLockWrite(&_lock); @@ -310,6 +306,10 @@ rt_render_target *rtGetRenderTarget(rt_render_target_handle handle) { return res; } +rt_render_target_handle rtGetSwapchainRenderTarget(void) { + return _swapchain_handle; +} + void rtUpdateSwapchainRenderTarget(void) { RT_ASSERT(_swapchain_handle.index != 0, "Invalid swap chain render target!"); rt_render_target_slot *slot = &_render_targets[_swapchain_handle.index]; diff --git a/src/renderer/vk/render_targets.h b/src/renderer/vk/render_targets.h index 45ee039..09edaeb 100644 --- a/src/renderer/vk/render_targets.h +++ b/src/renderer/vk/render_targets.h @@ -4,6 +4,9 @@ #include "gpu.h" #include "gfx/renderer_api.h" +#include "../common/common_render_graph.h" + + /* Must match RT_VK_MAX_SWAPCHAIN_IMAGES */ #define RT_VK_RENDER_TARGET_MAX_IMAGES 3 @@ -26,7 +29,11 @@ typedef struct { rt_render_target_match_swapchain_flags match_swapchain; } rt_render_target; +rt_render_target_handle rtCreateRenderTarget(const rt_physical_render_target_info *info); +void rtDestroyRenderTarget(rt_render_target_handle handle); + rt_render_target *rtGetRenderTarget(rt_render_target_handle handle); +rt_render_target_handle rtGetSwapchainRenderTarget(void); /* Update the render target that represents the swap chain */ void rtUpdateSwapchainRenderTarget(void); diff --git a/src/runtime/mem_arena.h b/src/runtime/mem_arena.h index 4b65005..c98bfd6 100644 --- a/src/runtime/mem_arena.h +++ b/src/runtime/mem_arena.h @@ -69,11 +69,11 @@ RT_INLINE void rtReturnTemporaryArena(rt_temp_arena tmp) { } /* Helper macros */ -#define RT_ARENA_PUSH_STRUCT(_Arena, _Type) rtArenaPush((_Arena), sizeof(_Type)) -#define RT_ARENA_PUSH_STRUCT_ZERO(_Arena, _Type) rtArenaPushZero((_Arena), sizeof(_Type)) +#define RT_ARENA_PUSH_STRUCT(_Arena, _Type) (_Type*)rtArenaPush((_Arena), sizeof(_Type)) +#define RT_ARENA_PUSH_STRUCT_ZERO(_Arena, _Type) (_Type *)rtArenaPushZero((_Arena), sizeof(_Type)) #define RT_ARENA_POP_STRUCT(_Arena, _Type) rtArenaPop((_Arena), sizeof(_Type)) -#define RT_ARENA_PUSH_ARRAY(_Arena, _Type, _N) rtArenaPush((_Arena), sizeof(_Type) * (_N)) -#define RT_ARENA_PUSH_ARRAY_ZERO(_Arena, _Type, _N) rtArenaPushZero((_Arena), sizeof(_Type) * (_N)) +#define RT_ARENA_PUSH_ARRAY(_Arena, _Type, _N) (_Type*)rtArenaPush((_Arena), sizeof(_Type) * (_N)) +#define RT_ARENA_PUSH_ARRAY_ZERO(_Arena, _Type, _N) (_Type*)rtArenaPushZero((_Arena), sizeof(_Type) * (_N)) #define RT_ARENA_POP_ARRAY(_Arena, _Type, _N) rtArenaPop((_Arena), sizeof(_Type) * (_N) #ifdef __cplusplus diff --git a/src/runtime/resource_manager.c b/src/runtime/resource_manager.c index ca72639..0dbfbcf 100644 --- a/src/runtime/resource_manager.c +++ b/src/runtime/resource_manager.c @@ -101,6 +101,7 @@ static size_t GetResourceDataSize(const rt_resource *resource) { case RT_RESOURCE_PIPELINE: return sizeof(rt_pipeline_info); case RT_RESOURCE_FRAMEGRAPH: { + #if 0 const rt_framegraph_info *info = resource->data; size_t size = sizeof(*info) + sizeof(rt_render_target_info) * info->render_target_count + sizeof(rt_render_pass_info) * info->render_pass_count + info->names_size; @@ -110,6 +111,7 @@ static size_t GetResourceDataSize(const rt_resource *resource) { passes[i].write_render_target_count * sizeof(rt_render_target_write); } return size; + #endif } break; case RT_RESOURCE_EFFECT: { return sizeof(rt_effect_info); @@ -134,6 +136,7 @@ static void CopyResourceData(const rt_resource *resource, void *dest) { memcpy(dest, resource->data, sizeof(rt_pipeline_info)); break; case RT_RESOURCE_FRAMEGRAPH: { + #if 0 const rt_framegraph_info *info = resource->data; rt_framegraph_info *dest_info = dest; memcpy(dest_info, info, sizeof(*info)); @@ -202,6 +205,8 @@ static void CopyResourceData(const rt_resource *resource, void *dest) { if (src_name) rtSetRelptr(&passes_dest[i].name, names_begin + (src_name - src_names)); } + + #endif } break; case RT_RESOURCE_EFFECT: { memcpy(dest, resource->data, sizeof(rt_effect_info)); @@ -984,6 +989,7 @@ RT_DLLEXPORT void rDebugLogResource(rt_resource_id id, const rt_resource *resour rtLog("RESMGR", " compute shader: %llx", pipeline->compute_shader); } break; case RT_RESOURCE_FRAMEGRAPH: { + #if 0 static const char *format_str[RT_PIXEL_FORMAT_count] = { "", @@ -1061,6 +1067,7 @@ RT_DLLEXPORT void rDebugLogResource(rt_resource_id id, const rt_resource *resour writes[j].clear.depth_stencil.stencil); } } + #endif } break; case RT_RESOURCE_EFFECT: { const rt_effect_info *effect = resource->data; diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index 956e178..98765be 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -32,6 +32,9 @@ extern "C" { #define RT_RESTRICT_VALUE_TO_BOUNDS(v, lower, upper) \ (((v) < (lower)) ? (lower) : (((v) > (upper)) ? (upper) : (v))) +#define RT_MIN(a, b) (((a) < (b))?(a):(b)) +#define RT_MAX(a, b) (((a) > (b))?(a):(b)) + #define RT_KB(n) ((n)*1024U) #define RT_MB(n) ((n)*1024U * 1024U) #define RT_GB(n) ((n)*1024U * 1024U * 1024U) @@ -127,7 +130,7 @@ RT_DLLEXPORT int rtAssertHandler(const char *expr, const char *msg, const char * // Asserts if p is "false", evaluates to p // NOTE that this will evaluate p multiple times! #define RT_VERIFY(p) \ - ((!p) ? (RT_DEBUGBREAK, rtAssertHandler(#p, "Verify failed", __FILE__, __LINE__), p) : p) + ((!p) ? (rtAssertHandler(#p, "Verify failed", __FILE__, __LINE__), p) : p) #else #define RT_ASSERT(x, msg) RT_UNUSED(x) @@ -135,6 +138,9 @@ RT_DLLEXPORT int rtAssertHandler(const char *expr, const char *msg, const char * #define RT_VERIFY(p) (p) #endif +/* Makes it easier to search for unimplemented functions */ +#define RT_NOT_IMPLEMENTED RT_ASSERT_ALWAYS_EVAL(0, "Not implemented.") + enum { RT_INVALID_UNICODE = RT_CUSTOM_ERROR_START, RT_INSUFFICIENT_BUFFER,