rtengine/src/runtime/gfx_framegraph.c
2024-02-09 00:07:35 +01:00

453 lines
18 KiB
C

#include "config.h"
#include "gfx.h"
#include "handles.h"
#include "hashing.h"
#include "mem_arena.h"
#include "renderer_api.h"
#include "threading.h"
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
RT_CVAR_I(rt_MaxFramegraphs, "Maximum number of framegraphs. Default 16", 16);
#define RT_FRAMEGRAPH_MAX_PASSES 32
#define RT_FRAMEGRAPH_MAX_RENDER_TARGETS 32
#define RT_RENDERPASS_MAX_READS 8
#define RT_RENDERPASS_MAX_WRITES 8
typedef struct {
rt_render_target_id id;
rt_pixel_format format;
unsigned int width;
unsigned int height;
unsigned int sample_count;
rt_render_target_handle api_render_target;
} rt_render_target;
typedef struct {
rt_render_pass_id id;
int execution_level;
unsigned int read_count;
unsigned int write_count;
rt_render_pass_bind_fns bound_fns;
rt_render_target_read reads[RT_RENDERPASS_MAX_READS];
rt_render_target_write writes[RT_RENDERPASS_MAX_WRITES];
} rt_render_pass;
struct rt_framegraph_s {
uint32_t pass_count;
uint32_t render_target_count;
rt_framegraph *next_free;
rt_render_pass passes[RT_FRAMEGRAPH_MAX_PASSES];
rt_render_target render_targets[RT_FRAMEGRAPH_MAX_RENDER_TARGETS];
};
static rt_framegraph *_framegraphs;
static rt_framegraph *_first_free;
static rt_mutex *_free_list_lock;
static void ReturnFrameGraph(rt_framegraph *framegraph) {
rtLockMutex(_free_list_lock);
framegraph->next_free = _first_free;
_first_free = framegraph;
rtUnlockMutex(_free_list_lock);
}
rt_result InitFramegraphManager(void) {
_free_list_lock = rtCreateMutex();
if (!_free_list_lock)
return RT_UNKNOWN_ERROR;
_framegraphs = calloc((size_t)rt_MaxFramegraphs.i, sizeof(rt_framegraph));
if (!_framegraphs)
return RT_OUT_OF_MEMORY;
for (int i = 0; i < rt_MaxFramegraphs.i; ++i)
_framegraphs[i].next_free = (i < rt_MaxFramegraphs.i - 1) ? &_framegraphs[i + 1] : NULL;
_first_free = &_framegraphs[0];
return RT_SUCCESS;
}
void ShutdownFramegraphManager(void) {
free(_framegraphs);
rtDestroyMutex(_free_list_lock);
}
typedef struct {
unsigned int dependency_count;
int execution_level;
} rt_pass_construct;
static int CompareRenderPassExecutionLevels(const void *a, const void *b) {
const rt_render_pass *pass_a = a, *pass_b = b;
return pass_a->execution_level - pass_b->execution_level;
}
static bool
CreateRenderPasses(rt_framegraph *graph, const rt_framegraph_info *info, rt_arena *arena) {
uint32_t render_pass_count = info->render_pass_count;
bool result = false;
/* Pass A depends on pass B, if:
* B preceeds A in the list of render passes AND
* B writes to a render target that A reads from. */
bool *dependency_matrix =
rtArenaPushZero(arena, render_pass_count * render_pass_count * sizeof(bool));
if (!dependency_matrix) {
rtLog("GFX",
"Not enough memory to allocate a %ux%u dependency matrix.",
render_pass_count,
render_pass_count);
goto out;
}
/* Checks if pass "dependent_idx" depends on pass "dependency_idx" */
#define PASS_DEPENDS(dependent_idx, dependency_idx) \
dependency_matrix[(dependency_idx)*render_pass_count + (dependent_idx)]
rt_pass_construct *construct_passes =
RT_ARENA_PUSH_ARRAY_ZERO(arena, rt_pass_construct, render_pass_count);
if (!construct_passes) {
rtLog("GFX",
"Not enough memory to allocate construction information for %u passes.",
render_pass_count);
goto out;
}
const rt_render_pass_info *pass_info = rtResolveConstRelptr(&info->render_passes);
for (uint32_t i = 0; i < render_pass_count; ++i) {
construct_passes[i].execution_level = -1; /* not scheduled yet */
const rt_render_target_write *writes_i =
rtResolveConstRelptr(&pass_info[i].write_render_targets);
for (uint32_t j = i + 1; j < render_pass_count; ++j) {
const rt_render_target_read *reads_j =
rtResolveConstRelptr(&pass_info[j].read_render_targets);
bool depends = false;
for (uint32_t read_idx = 0; read_idx < pass_info[j].read_render_target_count;
++read_idx) {
for (uint32_t write_idx = 0; write_idx < pass_info[i].write_render_target_count;
++write_idx) {
if (writes_i[write_idx].render_target == reads_j[read_idx].render_target)
depends = true;
}
}
PASS_DEPENDS(j, i) = depends;
if (depends)
++construct_passes[j].dependency_count;
}
}
/* Pass A can be executed concurrently with pass B if:
* 1. A and B don't write to the same render target AND
* 2. A's dependencies and B's dependencies have finished executing. */
/* We can have at most render_pass_count execution levels */
uint32_t *level_passes = RT_ARENA_PUSH_ARRAY_ZERO(arena, uint32_t, render_pass_count);
if (!level_passes) {
rtLog("GFX", "Failed to allocate a temporary array for constructing execution levels.");
goto out;
}
uint32_t unscheduled_passes = render_pass_count;
for (int level = 0; level < (int)render_pass_count; ++level) {
unsigned int level_pass_count = 0;
for (uint32_t i = 0; i < render_pass_count; ++i) {
if (construct_passes[i].execution_level == -1 &&
construct_passes[i].dependency_count == 0) {
/* Check that no writes conflict */
bool write_conflict = false;
const rt_render_target_write *writes_i =
rtResolveConstRelptr(&pass_info[i].write_render_targets);
for (unsigned int j = 0; j < level_pass_count; ++j) {
uint32_t pass_idx = level_passes[i];
const rt_render_target_write *pass_writes =
rtResolveConstRelptr(&pass_info[pass_idx].write_render_targets);
for (uint32_t k = 0; k < pass_info[i].write_render_target_count; ++k) {
for (uint32_t l = 0; l < pass_info[pass_idx].write_render_target_count;
++l) {
if (writes_i[k].render_target == pass_writes[l].render_target) {
write_conflict = true;
break;
}
}
if (write_conflict)
break;
}
if (write_conflict)
break;
}
if (!write_conflict) {
RT_ASSERT(level_pass_count < render_pass_count, "");
level_passes[level_pass_count++] = i;
construct_passes[i].execution_level = level;
}
}
}
if (level_pass_count == 0) {
rtLog("GFX", "Failed to compute a valid schedule for the provided framegraph.");
goto out;
}
/* level passes now contains the passes we can execute concurrently.
* Decrement dependency count for all passes that depend on a pass in this level */
for (uint32_t i = 0; i < level_pass_count; ++i) {
for (uint32_t j = 0; j < render_pass_count; ++j) {
if (PASS_DEPENDS(j, level_passes[i]))
--construct_passes[j].dependency_count;
}
}
unscheduled_passes -= level_pass_count;
if (unscheduled_passes == 0)
break;
}
RT_ASSERT(unscheduled_passes == 0, "Did not schedule all passes");
/* Construct passes now contains the "execution level" for each pass.
* We execute passes in that order, those with the same execution level can be executed
* concurrently. */
graph->pass_count = render_pass_count;
for (uint32_t i = 0; i < render_pass_count; ++i) {
graph->passes[i].execution_level = construct_passes[i].execution_level;
const rt_render_target_write *writes =
rtResolveConstRelptr(&pass_info[i].write_render_targets);
const rt_render_target_read *reads =
rtResolveConstRelptr(&pass_info[i].read_render_targets);
memcpy(graph->passes[i].writes,
writes,
pass_info[i].write_render_target_count * sizeof(rt_render_target_write));
memcpy(graph->passes[i].reads,
reads,
pass_info[i].read_render_target_count * sizeof(rt_render_target_read));
graph->passes[i].write_count = pass_info[i].write_render_target_count;
graph->passes[i].read_count = pass_info[i].read_render_target_count;
graph->passes[i].id = pass_info[i].id;
}
/* Sort by execution level */
qsort(graph->passes,
render_pass_count,
sizeof(rt_render_pass),
CompareRenderPassExecutionLevels);
result = true;
out:
return result;
#undef PASS_DEPENDS
}
static bool
CreateRenderTargets(rt_framegraph *graph, const rt_framegraph_info *info, rt_arena *arena) {
bool result = false;
/* TODO(Kevin): determine aliasing opportunities */
const rt_render_target_info *render_targets = rtResolveConstRelptr(&info->render_targets);
for (uint32_t i = 0; i < info->render_target_count; ++i) {
graph->render_targets[i].id = render_targets[i].id;
graph->render_targets[i].format = render_targets[i].format;
graph->render_targets[i].width = render_targets[i].width;
graph->render_targets[i].height = render_targets[i].height;
graph->render_targets[i].sample_count = render_targets[i].sample_count;
graph->render_targets[i].api_render_target =
g_renderer.CreateRenderTarget(&render_targets[i]);
if (!RT_IS_HANDLE_VALID(graph->render_targets[i].api_render_target)) {
rtReportError("GFX", "Failed to create render target %u of framegraph.", i);
for (uint32_t j = 0; j < i; ++j)
g_renderer.DestroyRenderTarget(graph->render_targets[j].api_render_target);
goto out;
}
}
result = true;
out:
return result;
}
static bool ValidateInfo(const rt_framegraph_info *info) {
if (info->render_pass_count > RT_FRAMEGRAPH_MAX_PASSES) {
rtReportError("GFX",
"Framegraph has too many passes: %u (maximum allowed is %u)",
info->render_pass_count,
RT_FRAMEGRAPH_MAX_PASSES);
return false;
}
if (info->render_target_count > RT_FRAMEGRAPH_MAX_RENDER_TARGETS) {
rtReportError("GFX",
"Framegraph has too many render targets: %u (maximum allowed is %u)",
info->render_target_count,
RT_FRAMEGRAPH_MAX_RENDER_TARGETS);
return false;
}
const rt_render_target_info *render_targets = rtResolveConstRelptr(&info->render_targets);
for (uint32_t i = 0; i < info->render_target_count; ++i) {
if (render_targets[i].id == 0) {
rtReportError("GFX", "Framegraph render target %u has invalid id 0", i);
return false;
} else if ((render_targets[i].width == RT_RENDER_TARGET_SIZE_SWAPCHAIN ||
render_targets[i].height == RT_RENDER_TARGET_SIZE_SWAPCHAIN) &&
(render_targets[i].width != render_targets[i].height)) {
rtReportError("GFX",
"Framegraph render target %u: If width or height is set to "
"SWAPCHAIN, both values must be set to SWAPCHAIN.",
i);
return false;
} else if (render_targets[i].format >= RT_PIXEL_FORMAT_count) {
rtReportError("GFX",
"Framegraph render target %u format is outside the allowed range.",
i);
return false;
}
}
const rt_render_pass_info *passes = rtResolveConstRelptr(&info->render_passes);
for (uint32_t i = 0; i < info->render_pass_count; ++i) {
if (passes[i].id == 0) {
rtReportError("GFX", "Framegraph pass %u has invalid id 0", i);
return false;
} else if (passes[i].read_render_target_count > RT_RENDERPASS_MAX_READS) {
rtReportError(
"GFX",
"Framegraph pass %u reads too many rendertargets: %u (maximum allowed is %u)",
i,
passes[i].read_render_target_count,
RT_RENDERPASS_MAX_READS);
return false;
} else if (passes[i].write_render_target_count > RT_RENDERPASS_MAX_WRITES) {
rtReportError(
"GFX",
"Framegraph pass %u writes too many rendertargets: %u (maximum allowed is %u)",
i,
passes[i].write_render_target_count,
RT_RENDERPASS_MAX_WRITES);
return false;
}
}
return true;
}
RT_DLLEXPORT rt_framegraph *rtCreateFramegraph(const rt_framegraph_info *info) {
if (!ValidateInfo(info)) {
return NULL;
}
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
if (!temp.arena) {
rtReportError("GFX", "Failed to acquire a temporary arena for constructing a framegraph");
return NULL;
}
rt_framegraph *graph = NULL;
/* Acquire a unused framegraph */
rtLockMutex(_free_list_lock);
graph = _first_free;
if (graph)
_first_free = graph->next_free;
rtUnlockMutex(_free_list_lock);
if (!graph)
goto out;
memset(graph, 0, sizeof(*graph));
if (!CreateRenderPasses(graph, info, temp.arena)) {
ReturnFrameGraph(graph);
graph = NULL;
goto out;
}
if (!CreateRenderTargets(graph, info, temp.arena)) {
ReturnFrameGraph(graph);
graph = NULL;
goto out;
}
out:
rtReturnTemporaryArena(temp);
return graph;
}
RT_DLLEXPORT void rtDestroyFramegraph(rt_framegraph *framegraph) {
ReturnFrameGraph(framegraph);
}
RT_DLLEXPORT void rtBindRenderPass(rt_framegraph *framegraph,
rt_render_pass_id id,
const rt_render_pass_bind_fns *bind_fns) {
for (uint32_t i = 0; i < framegraph->pass_count; ++i) {
if (framegraph->passes[i].id == id) {
if (framegraph->passes[i].bound_fns.Execute)
rtLog("GFX", "Rebound pass %x to new functions", id);
framegraph->passes[i].bound_fns = *bind_fns;
return;
}
}
rtLog("GFX", "Tried to bind functions to unknown render pass %x", id);
}
RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph) {
int execution_level = framegraph->passes[0].execution_level;
uint32_t level_start = 0;
for (uint32_t i = 0; i <= framegraph->pass_count && level_start < framegraph->pass_count; ++i) {
if ((i == framegraph->pass_count) ||
(framegraph->passes[i].execution_level > execution_level)) {
/* Dispatch all passes in the current execution level */
for (uint32_t pass_idx = level_start; pass_idx < i; ++pass_idx) {
bool pass_bound = framegraph->passes[pass_idx].bound_fns.Prepare != NULL &&
framegraph->passes[pass_idx].bound_fns.Execute != NULL &&
framegraph->passes[pass_idx].bound_fns.Finalize != NULL;
if (!pass_bound) {
rtLog("GFX",
"Framegraph pass %u (%x) is not bound to any function.",
pass_idx,
framegraph->passes[pass_idx].id);
continue;
}
rt_render_pass_id id = framegraph->passes[pass_idx].id;
const rt_render_target_write *writes = framegraph->passes[pass_idx].writes;
const rt_render_target_read *reads = framegraph->passes[pass_idx].reads;
uint32_t write_count = framegraph->passes[pass_idx].write_count;
uint32_t read_count = framegraph->passes[pass_idx].read_count;
/* TODO(Kevin): Every one of these should be a job-dispatch*/
framegraph->passes[pass_idx].bound_fns.Prepare(id,
writes,
write_count,
reads,
read_count);
framegraph->passes[pass_idx].bound_fns.Execute(id,
writes,
write_count,
reads,
read_count);
framegraph->passes[pass_idx].bound_fns.Finalize(id,
writes,
write_count,
reads,
read_count);
}
/* Start next level */
level_start = i;
if (i < framegraph->pass_count)
execution_level = framegraph->passes[i].execution_level;
}
}
}
RT_DLLEXPORT rt_render_target_id rtCalculateRenderTargetID(const char *name, size_t len) {
rt_render_target_id id = rtHashBytes32(name, len);
if (id == 0)
id = ~id;
return id;
}
RT_DLLEXPORT rt_render_pass_id rtCalculateRenderPassID(const char *name, size_t len) {
rt_render_pass_id id = rtHashBytes32(name, len);
if (id == 0)
id = ~id;
return id;
}