From c7e5bb8a31c92bd48dfbe7ac90ec9d99b4dea62a Mon Sep 17 00:00:00 2001 From: Kevin Trogant Date: Tue, 2 Apr 2024 19:43:02 +0200 Subject: [PATCH] Render Graph prep --- src/gfx/gfx.h | 1 + src/gfx/gfx_main.c | 8 + src/gfx/renderer_api.h | 50 ++++ src/renderer/vk/meson.build | 1 + src/renderer/vk/render_graph.c | 504 +++++++++++++++++++++++++++++++++ 5 files changed, 564 insertions(+) create mode 100644 src/renderer/vk/render_graph.c diff --git a/src/gfx/gfx.h b/src/gfx/gfx.h index 612c956..8decd98 100644 --- a/src/gfx/gfx.h +++ b/src/gfx/gfx.h @@ -155,6 +155,7 @@ typedef struct { rt_relptr render_passes; uint32_t render_target_count; uint32_t render_pass_count; + rt_render_target_id backbuffer; rt_relptr names; uint32_t names_size; } rt_framegraph_info; diff --git a/src/gfx/gfx_main.c b/src/gfx/gfx_main.c index fcebcfb..22fd29e 100644 --- a/src/gfx/gfx_main.c +++ b/src/gfx/gfx_main.c @@ -55,6 +55,10 @@ extern rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(vo extern rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t, const rt_buffer_info *, rt_buffer_handle *); extern void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t, rt_buffer_handle *); + +extern rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void); +extern void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *); + extern void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle, const rt_cmd_begin_pass_info *); extern void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle); @@ -109,6 +113,8 @@ static bool LoadRenderer(void) { RETRIEVE_SYMBOL(GetRenderFinishedSemaphore, rt_get_render_finished_semaphore_fn); RETRIEVE_SYMBOL(CreateBuffers, rt_create_buffers_fn); RETRIEVE_SYMBOL(DestroyBuffers, rt_destroy_buffers_fn); + RETRIEVE_SYMBOL(CreateRenderGrapbuilder, rt_create_render_graph_builder_fn); + RETRIEVE_SYMBOL(DestroyRenderGraphBuilder, rt_destroy_render_graph_builder_fn); RETRIEVE_SYMBOL(CmdBeginPass, rt_cmd_begin_pass_fn); RETRIEVE_SYMBOL(CmdEndPass, rt_cmd_end_pass_fn); RETRIEVE_SYMBOL(CmdTransitionRenderTarget, rt_cmd_transition_render_target_fn); @@ -142,6 +148,8 @@ static bool LoadRenderer(void) { g_renderer.GetRenderFinishedSemaphore = &rtRenGetRenderFinishedSemaphore; g_renderer.CreateBuffers = &rtRenCreateBuffers; g_renderer.DestroyBuffers = &rtRenDestroyBuffers; + g_renderer.CreateRenderGraphBuilder = &rtRenCreateRenderGraphBuilder; + g_renderer.DestroyRenderGraphBuilder = &rtRenDestroyRenderGraphBuilder; g_renderer.CmdBeginPass = &rtRenCmdBeginPass; g_renderer.CmdEndPass = &rtRenCmdEndPass; g_renderer.CmdTransitionRenderTarget = &rtRenCmdTransitionRenderTarget; diff --git a/src/gfx/renderer_api.h b/src/gfx/renderer_api.h index 89a4389..c7dce10 100644 --- a/src/gfx/renderer_api.h +++ b/src/gfx/renderer_api.h @@ -207,6 +207,49 @@ typedef enum { typedef struct rt_pipeline_info_s rt_pipeline_info; + +typedef struct { + const char *name; + rt_pixel_format format; + unsigned int width; + unsigned int height; + + unsigned int samples; + unsigned int layers; +} rt_attachment_info; + +enum { + /* Bit 0 contains the type: 0 -> graphics, 1 -> compute */ + RT_PASS_FLAG_GRAPHICS = 0x0000, + RT_PASS_FLAG_COMPUTE = 0x0001, + RT_PASS_FLAG_TYPE_MASK = RT_PASS_FLAG_COMPUTE | RT_PASS_FLAG_GRAPHICS, + + /* Always excecute the pass, even if no objects will be rendered. */ + RT_PASS_FLAG_EXECUTE_ALWAYS = 0x0002, +}; + +typedef struct { + const char *name; + uint32_t flags; +} rt_pass_info; + +typedef struct { + void *obj; + + void (*AddRenderTarget)(void *obj, const rt_attachment_info *info); + + void (*SetBackbuffer)(void *obj, const char *rt_name); + + void (*AddRenderPass)(void *obj, const rt_pass_info *info); + void (*AddColorOutput)(void *obj, const char *pass_name, const char *rt_name); + void (*AddSampledInput)(void *obj, const char *pass_name, const char *rt_name); + void (*SetDepthStencilAttachment)(void *obj, const char *pass_name, const char *rt_name); + + + rt_result (*Build)(void *obj); +} rt_render_graph_builder; + + typedef void rt_register_renderer_cvars_fn(void); typedef rt_result rt_init_renderer_fn(const rt_renderer_init_info *info); typedef void rt_shutdown_renderer_fn(void); @@ -234,6 +277,9 @@ typedef rt_result rt_create_buffers_fn(uint32_t count, const rt_buffer_info *info, rt_buffer_handle *p_buffers); typedef void rt_destroy_buffers_fn(uint32_t count, rt_buffer_handle *buffers); +typedef rt_render_graph_builder rt_create_render_graph_builder_fn(void); +typedef void rt_destroy_render_graph_builder_fn(rt_render_graph_builder *builder); + typedef void rt_cmd_begin_pass_fn(rt_command_buffer_handle cmdbuf, const rt_cmd_begin_pass_info *info); typedef void rt_cmd_end_pass_fn(rt_command_buffer_handle cmdbuf); @@ -264,6 +310,10 @@ typedef struct { rt_get_render_finished_semaphore_fn *GetRenderFinishedSemaphore; rt_create_buffers_fn *CreateBuffers; rt_destroy_buffers_fn *DestroyBuffers; + + /*render graph functions*/ + rt_create_render_graph_builder_fn *CreateRenderGraphBuilder; + rt_destroy_render_graph_builder_fn *DestroyRenderGraphBuilder; /* Command Buffer Functions */ rt_cmd_begin_pass_fn *CmdBeginPass; diff --git a/src/renderer/vk/meson.build b/src/renderer/vk/meson.build index a267a87..d77e5df 100644 --- a/src/renderer/vk/meson.build +++ b/src/renderer/vk/meson.build @@ -25,6 +25,7 @@ if vk_dep.found() 'helper.c', 'init.c', 'pipelines.c', + 'render_graph.c', 'render_targets.c', 'swapchain.c', 'transfers.c', diff --git a/src/renderer/vk/render_graph.c b/src/renderer/vk/render_graph.c new file mode 100644 index 0000000..9451b0e --- /dev/null +++ b/src/renderer/vk/render_graph.c @@ -0,0 +1,504 @@ +#include "gpu.h" + +#include "gfx/renderer_api.h" +#include "runtime/mem_arena.h" + +#include +#include +#include + +/* **************************************************************************** + * + * BUILDER CODE + * + * ****************************************************************************/ + +#define MAX_COLOR_ATTACHMENTS_PER_PASS 8 +#define MAX_SAMPLED_INPUTS_PER_PASS 8 + +typedef struct { + const char *name; + rt_pixel_format format; + unsigned int width; + unsigned int height; + + unsigned int samples; + unsigned int layers; + + uint32_t first_usage; + uint32_t last_usage; +} rt_render_target_build_info; + +typedef struct { + const char *name; + uint32_t flags; + + uint32_t color_attachments[MAX_COLOR_ATTACHMENTS_PER_PASS]; + uint32_t color_attachment_count; + + uint32_t sampled_inputs[MAX_SAMPLED_INPUTS_PER_PASS]; + uint32_t sampled_input_count; + + uint32_t depth_stencil_attachment; + + uint32_t *dependencies; + uint32_t dependency_count; +} rt_pass_build_info; + +typedef struct { + rt_arena arena; + + rt_render_target_build_info *render_targets; + uint32_t render_target_count; + uint32_t render_target_capacity; + + rt_pass_build_info *passes; + uint32_t pass_count; + uint32_t pass_capacity; + + uint32_t backbuffer; +} rt_render_graph_builder_obj; + +static void AddRenderTarget(void *_obj, const rt_attachment_info *info) { + rt_render_graph_builder_obj *obj = _obj; + + if (obj->render_target_count == obj->render_target_capacity) { + uint32_t new_cap = obj->render_target_capacity ? 2 * obj->render_target_capacity : 32; + rt_render_target_build_info *tmp = + RT_ARENA_PUSH_ARRAY(&obj->arena, rt_render_target_build_info, new_cap); + if (obj->render_target_capacity) + memcpy(tmp, + obj->render_targets, + sizeof(rt_render_target_build_info) * obj->render_target_capacity); + obj->render_targets = tmp; + obj->render_target_capacity = new_cap; + } + + char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1); + strcpy(name, info->name); + obj->render_targets[obj->render_target_count].name = name; + obj->render_targets[obj->render_target_count].format = info->format; + obj->render_targets[obj->render_target_count].width = info->width; + obj->render_targets[obj->render_target_count].height = info->height; + obj->render_targets[obj->render_target_count].samples = info->samples; + obj->render_targets[obj->render_target_count].layers = info->layers; + obj->render_targets[obj->render_target_count].first_usage = 0; + obj->render_targets[obj->render_target_count].last_usage = 0; + ++obj->render_target_count; +} + +static void SetBackbuffer(void *_obj, const char *rt_name) { + rt_render_graph_builder_obj *obj = _obj; + for (uint32_t i = 0; i < obj->render_target_count; ++i) { + if (strcmp(obj->render_targets[i].name, rt_name) == 0) { + obj->backbuffer = i; + return; + } + } + rtLog("vk", "Tried to set backbuffer to unknown render target %s", rt_name); +} + +static void AddRenderPass(void *_obj, const rt_pass_info *info) { + rt_render_graph_builder_obj *obj = _obj; + + if (obj->pass_count == obj->pass_capacity) { + uint32_t new_cap = obj->pass_capacity ? 2 * obj->pass_capacity : 32; + rt_pass_build_info *tmp = RT_ARENA_PUSH_ARRAY(&obj->arena, rt_pass_build_info, new_cap); + if (obj->pass_capacity) + memcpy(tmp, obj->passes, sizeof(rt_pass_build_info) * obj->pass_capacity); + obj->passes = tmp; + obj->pass_capacity = new_cap; + } + + char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1); + strcpy(name, info->name); + obj->passes[obj->pass_count].name = name; + obj->passes[obj->pass_count].flags = info->flags; + obj->passes[obj->pass_count].color_attachment_count = 0; + obj->passes[obj->pass_count].sampled_input_count = 0; + obj->passes[obj->pass_count].depth_stencil_attachment = UINT_MAX; + obj->passes[obj->pass_count].dependencies = NULL; + obj->passes[obj->pass_count].dependency_count = 0; + + ++obj->pass_count; +} + +static void AddColorOutput(void *_obj, const char *pass_name, const char *rt_name) { + uint32_t rt_index = UINT_MAX; + + rt_render_graph_builder_obj *obj = _obj; + for (uint32_t i = 0; i < obj->render_target_count; ++i) { + if (strcmp(obj->render_targets[i].name, rt_name) == 0) { + rt_index = i; + break; + } + } + if (rt_index == UINT_MAX) { + rtLog("vk", + "Tried to add unknown render target %s as color output to %s", + rt_name, + pass_name); + return; + } + + for (uint32_t i = 0; i < obj->pass_count; ++i) { + if (strcmp(obj->passes[i].name, pass_name) == 0) { + if (obj->passes[i].color_attachment_count == MAX_COLOR_ATTACHMENTS_PER_PASS) { + rtLog("vk", "Too many color attachments in pass %s", pass_name); + } + obj->passes[i].color_attachments[obj->passes[i].color_attachment_count++] = rt_index; + return; + } + } + rtLog("vk", + "Tried to add render target %s as color output to unknown render target %s", + rt_name, + pass_name); +} + +static void AddSampledInput(void *_obj, const char *pass_name, const char *rt_name) { + uint32_t rt_index = UINT_MAX; + + rt_render_graph_builder_obj *obj = _obj; + for (uint32_t i = 0; i < obj->render_target_count; ++i) { + if (strcmp(obj->render_targets[i].name, rt_name) == 0) { + rt_index = i; + break; + } + } + if (rt_index == UINT_MAX) { + rtLog("vk", + "Tried to add unknown render target %s as color output to %s", + rt_name, + pass_name); + return; + } + + for (uint32_t i = 0; i < obj->pass_count; ++i) { + if (strcmp(obj->passes[i].name, pass_name) == 0) { + if (obj->passes[i].sampled_input_count == MAX_SAMPLED_INPUTS_PER_PASS) { + rtLog("vk", "Too many sampled inputs in pass %s", pass_name); + } + obj->passes[i].sampled_inputs[obj->passes[i].sampled_input_count++] = rt_index; + return; + } + } + rtLog("vk", + "Tried to add render target %s as sampled input to unknown render target %s", + rt_name, + pass_name); +} + +static void SetDepthStencilAttachment(void *_obj, const char *pass_name, const char *rt_name) { + uint32_t rt_index = UINT_MAX; + + rt_render_graph_builder_obj *obj = _obj; + for (uint32_t i = 0; i < obj->render_target_count; ++i) { + if (strcmp(obj->render_targets[i].name, rt_name) == 0) { + rt_index = i; + break; + } + } + if (rt_index == UINT_MAX) { + rtLog("vk", + "Tried to add unknown render target %s as depth stencil attachment to %s", + rt_name, + pass_name); + return; + } + + for (uint32_t i = 0; i < obj->pass_count; ++i) { + if (strcmp(obj->passes[i].name, pass_name) == 0) { + obj->passes[i].depth_stencil_attachment = rt_index; + return; + } + } + rtLog("vk", + "Tried to add render target %s as depth stencil attachment to unknown render target %s", + rt_name, + pass_name); +} + +typedef struct { + uint32_t added; + uint32_t moved; +} rt_find_writers_result; + +static rt_find_writers_result FindWriters(rt_render_graph_builder_obj *obj, + uint32_t rt_index, + uint32_t append_at, + uint32_t *p_passes) { + rt_find_writers_result res = {0, 0}; + for (uint32_t i = 0; i < obj->pass_count; ++i) { + bool writes_rt = false; + if (obj->passes[i].depth_stencil_attachment == rt_index) { + writes_rt = true; + } else { + for (uint32_t j = 0; j < obj->passes[i].color_attachment_count; ++j) { + if (obj->passes[i].color_attachments[j] == rt_index) { + writes_rt = true; + } + } + } + + if (!writes_rt) + continue; + + uint32_t lower_index = UINT32_MAX; + for (uint32_t j = 0; j < append_at; ++j) { + if (p_passes[j] == i) { + lower_index = j; + break; + } + } + + if (lower_index == UINT32_MAX) { + p_passes[append_at++] = i; + res.added++; + } else { + memmove(&p_passes[lower_index], + &p_passes[lower_index + 1], + (append_at - lower_index - 1) * sizeof(uint32_t)); + p_passes[append_at - 1] = i; + res.moved++; + } + } + return res; +} + +static uint32_t CreateNaiveOrderRec(rt_render_graph_builder_obj *obj, + uint32_t search_rt, + uint32_t append_at, + uint32_t *p_order) { + rt_find_writers_result writers = FindWriters(obj, search_rt, append_at, p_order); + uint32_t new_append = append_at + writers.added; + for (uint32_t i = 0; i < writers.moved; ++i) { + uint32_t pass_idx = p_order[append_at - writers.moved + i]; + const rt_pass_build_info *pass = &obj->passes[pass_idx]; + for (uint32_t j = 0; j < pass->sampled_input_count; ++j) { + new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order); + } + } + for (uint32_t i = 0; i < writers.added; ++i) { + uint32_t pass_idx = p_order[append_at + i]; + const rt_pass_build_info *pass = &obj->passes[pass_idx]; + for (uint32_t j = 0; j < pass->sampled_input_count; ++j) { + new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order); + } + } + return new_append; +} + +static rt_result +CreateNaiveOrder(rt_render_graph_builder_obj *obj, uint32_t **p_order, uint32_t *p_count) { + uint32_t *order = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, obj->pass_count); + if (!order) + return RT_OUT_OF_MEMORY; + uint32_t count = CreateNaiveOrderRec(obj, obj->backbuffer, 0, order); + + /* Now the pass writing the backbuffer is first, we need to revert the order */ + for (uint32_t i = 0; i < count / 2; ++i) { + uint32_t t = order[i]; + order[i] = order[count - i - 1]; + order[count - i - 1] = t; + } + *p_order = order; + *p_count = count; + return RT_SUCCESS; +} + +static uint32_t * +ReorderPasses(rt_render_graph_builder_obj *obj, uint32_t pass_count, uint32_t *naive_order) { + /* Our goal is to calculate a schedule that: + * A) Does not break the dependency chain + * B) Has the maximum amount of overlap, i.e. keeps the GPU busy. + * This means that if pass A depends on pass B, we want to have as much passes inbetween as + * possible, to reduce the likelyhood of stalls caused by B waiting for A to finish. */ + uint32_t *schedule = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, pass_count); + if (!schedule) + return NULL; + uint32_t scheduled_count = 0; + + while (scheduled_count < pass_count) { + /* The number of passes remaining in naive_order */ + uint32_t unscheduled_count = pass_count - scheduled_count; + + /* It is always valid to use the front */ + uint32_t selected_idx = 0; + uint32_t selected_score = 0; + for (uint32_t i = 0; i < unscheduled_count; ++i) { + /* Check if any dependency is not scheduled yet */ + uint32_t pass_idx = naive_order[i]; + const rt_pass_build_info *pass = &obj->passes[pass_idx]; + uint32_t score = 0; + bool is_valid = true; + + if (pass->dependency_count) { + for (uint32_t j = 0; j < unscheduled_count; ++j) { + uint32_t pass2_idx = naive_order[j]; + for (uint32_t k = 0; k < pass->dependency_count; ++k) { + if (pass->dependencies[k] == pass2_idx) { + is_valid = false; + break; + } + } + if (!is_valid) + break; + } + if (!is_valid) + continue; + + for (uint32_t j = 0; j < pass->dependency_count; ++j) { + for (uint32_t k = 0; k < scheduled_count; ++k) { + if (schedule[k] == pass->dependencies[j]) { + score += scheduled_count - k; + break; + } + } + } + + } else { + score = UINT32_MAX; + } + + if (score > selected_score) { + selected_score = score; + selected_idx = i; + } + } + + schedule[scheduled_count++] = naive_order[selected_idx]; + memmove(&naive_order[selected_idx], + &naive_order[selected_idx + 1], + (unscheduled_count - selected_idx - 1) * sizeof(uint32_t)); + } + return schedule; +} + +static rt_result DeterminePassDependencies(rt_render_graph_builder_obj *obj) { + /* NOTE(Kevin): This has a lot of overlap with FindWriters, so maybe its possible to combine + * the two */ + for (uint32_t pass_idx = 0; pass_idx < obj->pass_count; ++pass_idx) { + rt_pass_build_info *pass = &obj->passes[pass_idx]; + uint32_t dependency_capacity = pass->sampled_input_count; + if (dependency_capacity) { + pass->dependencies = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity); + if (!pass->dependencies) + return RT_OUT_OF_MEMORY; + } + for (uint32_t input_idx = 0; input_idx < pass->sampled_input_count; ++input_idx) { + uint32_t rt_index = pass->sampled_inputs[input_idx]; + for (uint32_t candidate_idx = 0; candidate_idx < obj->pass_count; ++candidate_idx) { + const rt_pass_build_info *candidate = &obj->passes[candidate_idx]; + bool is_dependency = false; + if (candidate->depth_stencil_attachment == rt_index) + is_dependency = true; + for (uint32_t j = 0; j < candidate->color_attachment_count; ++j) { + if (candidate->color_attachments[j] == rt_index) + is_dependency = true; + } + + if (!is_dependency) + continue; + + if (pass->dependency_count == dependency_capacity) { + /* The dependencies are still on top of the arena, so we can just grow that + * array */ + if (!RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity)) + return RT_OUT_OF_MEMORY; + dependency_capacity *= 2; + } + pass->dependencies[pass->dependency_count++] = candidate_idx; + } + } + } + return RT_SUCCESS; +} + +static void DetermineRenderTargetUsage(rt_render_graph_builder_obj *obj, + uint32_t pass_count, + const uint32_t *schedule) { + for (uint32_t rt_idx = 0; rt_idx < obj->render_target_count; ++rt_idx) { + rt_render_target_build_info *rt = &obj->render_targets[rt_idx]; + rt->first_usage = UINT32_MAX; + rt->last_usage = 0; + for (uint32_t sched_idx = 0; sched_idx < pass_count; ++sched_idx) { + uint32_t pass_idx = schedule[sched_idx]; + const rt_pass_build_info *pass = &obj->passes[pass_idx]; + bool usage = pass->depth_stencil_attachment == rt_idx; + if (!usage) { + for (unsigned int i = 0; i < pass->color_attachment_count; ++i) { + if (pass->color_attachments[i] == rt_idx) + usage = true; + } + } + if (!usage) { + for (unsigned int i = 0; i < pass->sampled_input_count; ++i) { + if (pass->sampled_inputs[i] == rt_idx) + usage = true; + } + } + if (usage) { + if (sched_idx < rt->first_usage) + rt->first_usage = sched_idx; + if (sched_idx > rt->last_usage) + rt->last_usage = sched_idx; + } + } + } +} + +static rt_result Build(void *_obj) { + rt_render_graph_builder_obj *obj = _obj; + uint32_t *naive_order; + uint32_t pass_count; + rt_result res = CreateNaiveOrder(obj, &naive_order, &pass_count); + if (res != RT_SUCCESS) + return res; + + res = DeterminePassDependencies(obj); + if (res != RT_SUCCESS) + return res; + + uint32_t *optimized_order = ReorderPasses(obj, pass_count, naive_order); + if (!optimized_order) + return RT_OUT_OF_MEMORY; + + /* Next steps: + * Determine first & last usage for every render-target + * For every pair of render-targets, note if they could be merged: + - Identical format + - Non-overlapping usage */ + DetermineRenderTargetUsage(obj, pass_count, optimized_order); + + + return RT_SUCCESS; +} + +rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) { + // TODO: This is totally temporary. We do NOT want malloc anywhere in non-init functions + rt_render_graph_builder_obj *obj = malloc(sizeof(*obj)); + RT_ASSERT(obj, "Failed to allocate the builder object."); + memset(obj, 0, sizeof(*obj)); + rt_create_arena_result arena_res = rtCreateArena(NULL, RT_MB(16)); + RT_ASSERT(arena_res.ok, ""); + obj->arena = arena_res.arena; + + return (rt_render_graph_builder){ + .obj = obj, + .AddRenderTarget = AddRenderTarget, + .SetBackbuffer = SetBackbuffer, + .AddRenderPass = AddRenderPass, + .AddColorOutput = AddColorOutput, + .AddSampledInput = AddSampledInput, + .SetDepthStencilAttachment = SetDepthStencilAttachment, + .Build = Build, + }; +} + +void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) { + rt_render_graph_builder_obj *obj = builder->obj; + rtReleaseArena(&obj->arena); + free(obj); + memset(builder, 0, sizeof(*builder)); +}