#include "config.h" #include "gfx.h" #include "handles.h" #include "hashing.h" #include "mem_arena.h" #include "renderer_api.h" #include "threading.h" #include #include #include RT_CVAR_I(rt_MaxFramegraphs, "Maximum number of framegraphs. Default 16", 16); #define RT_FRAMEGRAPH_MAX_PASSES 32 #define RT_FRAMEGRAPH_MAX_RENDER_TARGETS 32 #define RT_RENDERPASS_MAX_READS 8 #define RT_RENDERPASS_MAX_WRITES 8 typedef struct { rt_render_target_id id; rt_pixel_format format; unsigned int width; unsigned int height; unsigned int sample_count; rt_render_target_handle api_render_target; } rt_render_target; typedef struct { rt_render_pass_id id; int execution_level; unsigned int read_count; unsigned int write_count; rt_render_pass_bind_fns bound_fns; rt_render_target_read reads[RT_RENDERPASS_MAX_READS]; rt_render_target_write writes[RT_RENDERPASS_MAX_WRITES]; } rt_render_pass; struct rt_framegraph_s { uint32_t pass_count; uint32_t render_target_count; rt_framegraph *next_free; rt_render_pass passes[RT_FRAMEGRAPH_MAX_PASSES]; rt_render_target render_targets[RT_FRAMEGRAPH_MAX_RENDER_TARGETS]; }; static rt_framegraph *_framegraphs; static rt_framegraph *_first_free; static rt_mutex *_free_list_lock; static void ReturnFrameGraph(rt_framegraph *framegraph) { rtLockMutex(_free_list_lock); framegraph->next_free = _first_free; _first_free = framegraph; rtUnlockMutex(_free_list_lock); } rt_result InitFramegraphManager(void) { _free_list_lock = rtCreateMutex(); if (!_free_list_lock) return RT_UNKNOWN_ERROR; _framegraphs = calloc((size_t)rt_MaxFramegraphs.i, sizeof(rt_framegraph)); if (!_framegraphs) return RT_OUT_OF_MEMORY; for (int i = 0; i < rt_MaxFramegraphs.i; ++i) _framegraphs[i].next_free = (i < rt_MaxFramegraphs.i - 1) ? &_framegraphs[i + 1] : NULL; _first_free = &_framegraphs[0]; return RT_SUCCESS; } void ShutdownFramegraphManager(void) { free(_framegraphs); rtDestroyMutex(_free_list_lock); } typedef struct { unsigned int dependency_count; int execution_level; } rt_pass_construct; static int CompareRenderPassExecutionLevels(const void *a, const void *b) { const rt_render_pass *pass_a = a, *pass_b = b; return pass_a->execution_level - pass_b->execution_level; } static bool CreateRenderPasses(rt_framegraph *graph, const rt_framegraph_info *info, rt_arena *arena) { uint32_t render_pass_count = info->render_pass_count; bool result = false; /* Pass A depends on pass B, if: * B preceeds A in the list of render passes AND * B writes to a render target that A reads from. */ bool *dependency_matrix = rtArenaPushZero(arena, render_pass_count * render_pass_count * sizeof(bool)); if (!dependency_matrix) { rtLog("GFX", "Not enough memory to allocate a %ux%u dependency matrix.", render_pass_count, render_pass_count); goto out; } /* Checks if pass "dependent_idx" depends on pass "dependency_idx" */ #define PASS_DEPENDS(dependent_idx, dependency_idx) \ dependency_matrix[(dependency_idx)*render_pass_count + (dependent_idx)] rt_pass_construct *construct_passes = RT_ARENA_PUSH_ARRAY_ZERO(arena, rt_pass_construct, render_pass_count); if (!construct_passes) { rtLog("GFX", "Not enough memory to allocate construction information for %u passes.", render_pass_count); goto out; } const rt_render_pass_info *pass_info = rtResolveConstRelptr(&info->render_passes); for (uint32_t i = 0; i < render_pass_count; ++i) { construct_passes[i].execution_level = -1; /* not scheduled yet */ const rt_render_target_write *writes_i = rtResolveConstRelptr(&pass_info[i].write_render_targets); for (uint32_t j = i + 1; j < render_pass_count; ++j) { const rt_render_target_read *reads_j = rtResolveConstRelptr(&pass_info[j].read_render_targets); bool depends = false; for (uint32_t read_idx = 0; read_idx < pass_info[j].read_render_target_count; ++read_idx) { for (uint32_t write_idx = 0; write_idx < pass_info[i].write_render_target_count; ++write_idx) { if (writes_i[write_idx].render_target == reads_j[read_idx].render_target) depends = true; } } PASS_DEPENDS(j, i) = depends; if (depends) ++construct_passes[j].dependency_count; } } /* Pass A can be executed concurrently with pass B if: * 1. A and B don't write to the same render target AND * 2. A's dependencies and B's dependencies have finished executing. */ /* We can have at most render_pass_count execution levels */ uint32_t *level_passes = RT_ARENA_PUSH_ARRAY_ZERO(arena, uint32_t, render_pass_count); if (!level_passes) { rtLog("GFX", "Failed to allocate a temporary array for constructing execution levels."); goto out; } uint32_t unscheduled_passes = render_pass_count; for (int level = 0; level < (int)render_pass_count; ++level) { unsigned int level_pass_count = 0; for (uint32_t i = 0; i < render_pass_count; ++i) { if (construct_passes[i].execution_level == -1 && construct_passes[i].dependency_count == 0) { /* Check that no writes conflict */ bool write_conflict = false; const rt_render_target_write *writes_i = rtResolveConstRelptr(&pass_info[i].write_render_targets); for (unsigned int j = 0; j < level_pass_count; ++j) { uint32_t pass_idx = level_passes[i]; const rt_render_target_write *pass_writes = rtResolveConstRelptr(&pass_info[pass_idx].write_render_targets); for (uint32_t k = 0; k < pass_info[i].write_render_target_count; ++k) { for (uint32_t l = 0; l < pass_info[pass_idx].write_render_target_count; ++l) { if (writes_i[k].render_target == pass_writes[l].render_target) { write_conflict = true; break; } } if (write_conflict) break; } if (write_conflict) break; } if (!write_conflict) { RT_ASSERT(level_pass_count < render_pass_count, ""); level_passes[level_pass_count++] = i; construct_passes[i].execution_level = level; } } } if (level_pass_count == 0) { rtLog("GFX", "Failed to compute a valid schedule for the provided framegraph."); goto out; } /* level passes now contains the passes we can execute concurrently. * Decrement dependency count for all passes that depend on a pass in this level */ for (uint32_t i = 0; i < level_pass_count; ++i) { for (uint32_t j = 0; j < render_pass_count; ++j) { if (PASS_DEPENDS(j, level_passes[i])) --construct_passes[j].dependency_count; } } unscheduled_passes -= level_pass_count; if (unscheduled_passes == 0) break; } RT_ASSERT(unscheduled_passes == 0, "Did not schedule all passes"); /* Construct passes now contains the "execution level" for each pass. * We execute passes in that order, those with the same execution level can be executed * concurrently. */ graph->pass_count = render_pass_count; for (uint32_t i = 0; i < render_pass_count; ++i) { graph->passes[i].execution_level = construct_passes[i].execution_level; const rt_render_target_write *writes = rtResolveConstRelptr(&pass_info[i].write_render_targets); const rt_render_target_read *reads = rtResolveConstRelptr(&pass_info[i].read_render_targets); memcpy(graph->passes[i].writes, writes, pass_info[i].write_render_target_count * sizeof(rt_render_target_write)); memcpy(graph->passes[i].reads, reads, pass_info[i].read_render_target_count * sizeof(rt_render_target_read)); graph->passes[i].write_count = pass_info[i].write_render_target_count; graph->passes[i].read_count = pass_info[i].read_render_target_count; graph->passes[i].id = pass_info[i].id; } /* Sort by execution level */ qsort(graph->passes, render_pass_count, sizeof(rt_render_pass), CompareRenderPassExecutionLevels); result = true; out: return result; #undef PASS_DEPENDS } static bool CreateRenderTargets(rt_framegraph *graph, const rt_framegraph_info *info, rt_arena *arena) { bool result = false; /* TODO(Kevin): determine aliasing opportunities */ const rt_render_target_info *render_targets = rtResolveConstRelptr(&info->render_targets); for (uint32_t i = 0; i < info->render_target_count; ++i) { graph->render_targets[i].id = render_targets[i].id; graph->render_targets[i].format = render_targets[i].format; graph->render_targets[i].width = render_targets[i].width; graph->render_targets[i].height = render_targets[i].height; graph->render_targets[i].sample_count = render_targets[i].sample_count; graph->render_targets[i].api_render_target = g_renderer.CreateRenderTarget(&render_targets[i]); if (!RT_IS_HANDLE_VALID(graph->render_targets[i].api_render_target)) { rtReportError("GFX", "Failed to create render target %u of framegraph.", i); for (uint32_t j = 0; j < i; ++j) g_renderer.DestroyRenderTarget(graph->render_targets[j].api_render_target); goto out; } } result = true; out: return result; } static bool ValidateInfo(const rt_framegraph_info *info) { if (info->render_pass_count > RT_FRAMEGRAPH_MAX_PASSES) { rtReportError("GFX", "Framegraph has too many passes: %u (maximum allowed is %u)", info->render_pass_count, RT_FRAMEGRAPH_MAX_PASSES); return false; } if (info->render_target_count > RT_FRAMEGRAPH_MAX_RENDER_TARGETS) { rtReportError("GFX", "Framegraph has too many render targets: %u (maximum allowed is %u)", info->render_target_count, RT_FRAMEGRAPH_MAX_RENDER_TARGETS); return false; } const rt_render_target_info *render_targets = rtResolveConstRelptr(&info->render_targets); for (uint32_t i = 0; i < info->render_target_count; ++i) { if (render_targets[i].id == 0) { rtReportError("GFX", "Framegraph render target %u has invalid id 0", i); return false; } else if ((render_targets[i].width == RT_RENDER_TARGET_SIZE_SWAPCHAIN || render_targets[i].height == RT_RENDER_TARGET_SIZE_SWAPCHAIN) && (render_targets[i].width != render_targets[i].height)) { rtReportError("GFX", "Framegraph render target %u: If width or height is set to " "SWAPCHAIN, both values must be set to SWAPCHAIN.", i); return false; } else if (render_targets[i].format >= RT_PIXEL_FORMAT_count) { rtReportError("GFX", "Framegraph render target %u format is outside the allowed range.", i); return false; } } const rt_render_pass_info *passes = rtResolveConstRelptr(&info->render_passes); for (uint32_t i = 0; i < info->render_pass_count; ++i) { if (passes[i].id == 0) { rtReportError("GFX", "Framegraph pass %u has invalid id 0", i); return false; } else if (passes[i].read_render_target_count > RT_RENDERPASS_MAX_READS) { rtReportError( "GFX", "Framegraph pass %u reads too many rendertargets: %u (maximum allowed is %u)", i, passes[i].read_render_target_count, RT_RENDERPASS_MAX_READS); return false; } else if (passes[i].write_render_target_count > RT_RENDERPASS_MAX_WRITES) { rtReportError( "GFX", "Framegraph pass %u writes too many rendertargets: %u (maximum allowed is %u)", i, passes[i].write_render_target_count, RT_RENDERPASS_MAX_WRITES); return false; } } return true; } RT_DLLEXPORT rt_framegraph *rtCreateFramegraph(const rt_framegraph_info *info) { if (!ValidateInfo(info)) { return NULL; } rt_temp_arena temp = rtGetTemporaryArena(NULL, 0); if (!temp.arena) { rtReportError("GFX", "Failed to acquire a temporary arena for constructing a framegraph"); return NULL; } rt_framegraph *graph = NULL; /* Acquire a unused framegraph */ rtLockMutex(_free_list_lock); graph = _first_free; if (graph) _first_free = graph->next_free; rtUnlockMutex(_free_list_lock); if (!graph) goto out; memset(graph, 0, sizeof(*graph)); if (!CreateRenderPasses(graph, info, temp.arena)) { ReturnFrameGraph(graph); graph = NULL; goto out; } if (!CreateRenderTargets(graph, info, temp.arena)) { ReturnFrameGraph(graph); graph = NULL; goto out; } out: rtReturnTemporaryArena(temp); return graph; } RT_DLLEXPORT void rtDestroyFramegraph(rt_framegraph *framegraph) { ReturnFrameGraph(framegraph); } RT_DLLEXPORT void rtBindRenderPass(rt_framegraph *framegraph, rt_render_pass_id id, const rt_render_pass_bind_fns *bind_fns) { for (uint32_t i = 0; i < framegraph->pass_count; ++i) { if (framegraph->passes[i].id == id) { if (framegraph->passes[i].bound_fns.Execute) rtLog("GFX", "Rebound pass %x to new functions", id); framegraph->passes[i].bound_fns = *bind_fns; return; } } rtLog("GFX", "Tried to bind functions to unknown render pass %x", id); } RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph) { int execution_level = framegraph->passes[0].execution_level; uint32_t level_start = 0; for (uint32_t i = 0; i <= framegraph->pass_count && level_start < framegraph->pass_count; ++i) { if ((i == framegraph->pass_count) || (framegraph->passes[i].execution_level > execution_level)) { /* Dispatch all passes in the current execution level */ for (uint32_t pass_idx = level_start; pass_idx < i; ++pass_idx) { bool pass_bound = framegraph->passes[pass_idx].bound_fns.Prepare != NULL && framegraph->passes[pass_idx].bound_fns.Execute != NULL && framegraph->passes[pass_idx].bound_fns.Finalize != NULL; if (!pass_bound) { rtLog("GFX", "Framegraph pass %u (%x) is not bound to any function.", pass_idx, framegraph->passes[pass_idx].id); continue; } rt_render_pass_id id = framegraph->passes[pass_idx].id; const rt_render_target_write *writes = framegraph->passes[pass_idx].writes; const rt_render_target_read *reads = framegraph->passes[pass_idx].reads; uint32_t write_count = framegraph->passes[pass_idx].write_count; uint32_t read_count = framegraph->passes[pass_idx].read_count; /* TODO(Kevin): Every one of these should be a job-dispatch*/ framegraph->passes[pass_idx].bound_fns.Prepare(id, writes, write_count, reads, read_count); framegraph->passes[pass_idx].bound_fns.Execute(id, writes, write_count, reads, read_count); framegraph->passes[pass_idx].bound_fns.Finalize(id, writes, write_count, reads, read_count); } /* Start next level */ level_start = i; if (i < framegraph->pass_count) execution_level = framegraph->passes[i].execution_level; } } } RT_DLLEXPORT rt_render_target_id rtCalculateRenderTargetID(const char *name, size_t len) { rt_render_target_id id = rtHashBytes32(name, len); if (id == 0) id = ~id; return id; } RT_DLLEXPORT rt_render_pass_id rtCalculateRenderPassID(const char *name, size_t len) { rt_render_pass_id id = rtHashBytes32(name, len); if (id == 0) id = ~id; return id; }