diff --git a/src/renderer/common/common_render_graph.c b/src/renderer/common/common_render_graph.c index 25773bd..0b2d3f1 100644 --- a/src/renderer/common/common_render_graph.c +++ b/src/renderer/common/common_render_graph.c @@ -78,12 +78,6 @@ typedef struct rt_render_graph_builder_obj { rt_render_graph_builder_platform_callbacks platform_cbs; } rt_render_graph_builder_obj; -/* **************************************************************************** - * - * BUILDER CODE - * - * ****************************************************************************/ - static void AddRenderTarget(void *_obj, const rt_attachment_info *info) { rt_render_graph_builder_obj *obj = _obj; @@ -657,7 +651,27 @@ static rt_result CreateSynchronizationPoints(rt_render_graph_builder_obj *obj) { return RT_UNKNOWN_ERROR; } -static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, const uint32_t *order) { +static uint32_t *DeterminePassExecutionLevels(rt_render_graph_builder_obj *obj, + const uint32_t *schedule) { + uint32_t *execution_levels = RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, uint32_t, obj->pass_count); + if (!execution_levels) + return NULL; + + const rt_pass_build_info *passes = obj->passes; + uint32_t pass_count = obj->pass_count; + for (uint32_t i = 0; i < pass_count; ++i) { + uint32_t level = 0; + uint32_t pass_idx = schedule[i]; + for (uint32_t j = 0; j < passes[pass_idx].dependency_count; ++j) { + uint32_t dep_idx = passes[pass_idx].dependencies[j]; + level = RT_MAX(execution_levels[dep_idx] + 1, level); + } + execution_levels[pass_idx] = level; + } + return execution_levels; +} + +static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, const uint32_t *order, const uint32_t *execution_levels) { size_t required_size = sizeof(rt_render_graph); required_size += obj->phys_render_target_count * sizeof(rt_render_target_handle); @@ -713,7 +727,7 @@ static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, cons graph->passes[i].signal_count = 0; graph->passes[i].first_wait = 0; graph->passes[i].wait_count = 0; - graph->passes[i].execution_level = i; + graph->passes[i].execution_level = execution_levels[passidx]; graph->passes[i].depth_stencil = (obj->passes[i].depth_stencil_attachment != UINT_MAX) @@ -788,6 +802,10 @@ static rt_result Build(void *_obj, rt_render_graph **p_graph) { if (res != RT_SUCCESS) return res; + uint32_t *execution_levels = DeterminePassExecutionLevels(obj, optimized_order); + if (!execution_levels) + return RT_OUT_OF_MEMORY; + if (obj->platform_cbs.RequireExplicitSynchronization()) { res = CreateSynchronizationPoints(obj); if (res != RT_SUCCESS) @@ -796,7 +814,7 @@ static rt_result Build(void *_obj, rt_render_graph **p_graph) { obj->sync_point_count = 0; } - *p_graph = CreateRenderGraph(obj, optimized_order); + *p_graph = CreateRenderGraph(obj, optimized_order, execution_levels); return *p_graph ? RT_SUCCESS : RT_UNKNOWN_ERROR; } diff --git a/src/renderer/common/common_render_graph.h b/src/renderer/common/common_render_graph.h index b880a60..95730b8 100644 --- a/src/renderer/common/common_render_graph.h +++ b/src/renderer/common/common_render_graph.h @@ -12,7 +12,6 @@ typedef struct { unsigned int samples; unsigned int layers; - } rt_physical_render_target_info; typedef rt_render_target_handle diff --git a/src/renderer/dx11/render_graph.cpp b/src/renderer/dx11/render_graph.cpp index 8b19151..a77a560 100644 --- a/src/renderer/dx11/render_graph.cpp +++ b/src/renderer/dx11/render_graph.cpp @@ -141,9 +141,12 @@ extern "C" rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *ren if (IsCopyResourcePossible(backbuffer)) { g_gpu.device_context->CopyResource(frame_buffer, backbuffer->texture); } else { + // NOTE(Kevin): The most flexible solution would probably be a fullscreen tri draw + // that implements a blit. + // Another idea would be a compute shader that does a copy&filter but that requires more work RT_NOT_IMPLEMENTED; } rtReturnTemporaryArena(temp); return res; -} \ No newline at end of file +}