453 lines
18 KiB
C
453 lines
18 KiB
C
#include "config.h"
|
|
#include "gfx.h"
|
|
#include "handles.h"
|
|
#include "hashing.h"
|
|
#include "mem_arena.h"
|
|
#include "renderer_api.h"
|
|
#include "threading.h"
|
|
|
|
#include <stdbool.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
RT_CVAR_I(rt_MaxFramegraphs, "Maximum number of framegraphs. Default 16", 16);
|
|
|
|
#define RT_FRAMEGRAPH_MAX_PASSES 32
|
|
#define RT_FRAMEGRAPH_MAX_RENDER_TARGETS 32
|
|
#define RT_RENDERPASS_MAX_READS 8
|
|
#define RT_RENDERPASS_MAX_WRITES 8
|
|
|
|
typedef struct {
|
|
rt_render_target_id id;
|
|
rt_pixel_format format;
|
|
unsigned int width;
|
|
unsigned int height;
|
|
unsigned int sample_count;
|
|
rt_render_target_handle api_render_target;
|
|
} rt_render_target;
|
|
|
|
typedef struct {
|
|
rt_render_pass_id id;
|
|
int execution_level;
|
|
unsigned int read_count;
|
|
unsigned int write_count;
|
|
rt_render_pass_bind_fns bound_fns;
|
|
rt_render_target_read reads[RT_RENDERPASS_MAX_READS];
|
|
rt_render_target_write writes[RT_RENDERPASS_MAX_WRITES];
|
|
} rt_render_pass;
|
|
|
|
struct rt_framegraph_s {
|
|
uint32_t pass_count;
|
|
uint32_t render_target_count;
|
|
|
|
rt_framegraph *next_free;
|
|
|
|
rt_render_pass passes[RT_FRAMEGRAPH_MAX_PASSES];
|
|
|
|
rt_render_target render_targets[RT_FRAMEGRAPH_MAX_RENDER_TARGETS];
|
|
};
|
|
|
|
static rt_framegraph *_framegraphs;
|
|
static rt_framegraph *_first_free;
|
|
static rt_mutex *_free_list_lock;
|
|
|
|
static void ReturnFrameGraph(rt_framegraph *framegraph) {
|
|
rtLockMutex(_free_list_lock);
|
|
framegraph->next_free = _first_free;
|
|
_first_free = framegraph;
|
|
rtUnlockMutex(_free_list_lock);
|
|
}
|
|
|
|
rt_result InitFramegraphManager(void) {
|
|
_free_list_lock = rtCreateMutex();
|
|
if (!_free_list_lock)
|
|
return RT_UNKNOWN_ERROR;
|
|
_framegraphs = calloc((size_t)rt_MaxFramegraphs.i, sizeof(rt_framegraph));
|
|
if (!_framegraphs)
|
|
return RT_OUT_OF_MEMORY;
|
|
for (int i = 0; i < rt_MaxFramegraphs.i; ++i)
|
|
_framegraphs[i].next_free = (i < rt_MaxFramegraphs.i - 1) ? &_framegraphs[i + 1] : NULL;
|
|
_first_free = &_framegraphs[0];
|
|
return RT_SUCCESS;
|
|
}
|
|
|
|
void ShutdownFramegraphManager(void) {
|
|
free(_framegraphs);
|
|
rtDestroyMutex(_free_list_lock);
|
|
}
|
|
|
|
typedef struct {
|
|
unsigned int dependency_count;
|
|
int execution_level;
|
|
} rt_pass_construct;
|
|
|
|
static int CompareRenderPassExecutionLevels(const void *a, const void *b) {
|
|
const rt_render_pass *pass_a = a, *pass_b = b;
|
|
return pass_a->execution_level - pass_b->execution_level;
|
|
}
|
|
|
|
static bool
|
|
CreateRenderPasses(rt_framegraph *graph, const rt_framegraph_info *info, rt_arena *arena) {
|
|
uint32_t render_pass_count = info->render_pass_count;
|
|
|
|
bool result = false;
|
|
|
|
/* Pass A depends on pass B, if:
|
|
* B preceeds A in the list of render passes AND
|
|
* B writes to a render target that A reads from. */
|
|
bool *dependency_matrix =
|
|
rtArenaPushZero(arena, render_pass_count * render_pass_count * sizeof(bool));
|
|
if (!dependency_matrix) {
|
|
rtLog("GFX",
|
|
"Not enough memory to allocate a %ux%u dependency matrix.",
|
|
render_pass_count,
|
|
render_pass_count);
|
|
goto out;
|
|
}
|
|
/* Checks if pass "dependent_idx" depends on pass "dependency_idx" */
|
|
#define PASS_DEPENDS(dependent_idx, dependency_idx) \
|
|
dependency_matrix[(dependency_idx)*render_pass_count + (dependent_idx)]
|
|
|
|
rt_pass_construct *construct_passes =
|
|
RT_ARENA_PUSH_ARRAY_ZERO(arena, rt_pass_construct, render_pass_count);
|
|
if (!construct_passes) {
|
|
rtLog("GFX",
|
|
"Not enough memory to allocate construction information for %u passes.",
|
|
render_pass_count);
|
|
goto out;
|
|
}
|
|
|
|
const rt_render_pass_info *pass_info = rtResolveConstRelptr(&info->render_passes);
|
|
for (uint32_t i = 0; i < render_pass_count; ++i) {
|
|
construct_passes[i].execution_level = -1; /* not scheduled yet */
|
|
const rt_render_target_write *writes_i =
|
|
rtResolveConstRelptr(&pass_info[i].write_render_targets);
|
|
for (uint32_t j = i + 1; j < render_pass_count; ++j) {
|
|
const rt_render_target_read *reads_j =
|
|
rtResolveConstRelptr(&pass_info[j].read_render_targets);
|
|
bool depends = false;
|
|
for (uint32_t read_idx = 0; read_idx < pass_info[j].read_render_target_count;
|
|
++read_idx) {
|
|
for (uint32_t write_idx = 0; write_idx < pass_info[i].write_render_target_count;
|
|
++write_idx) {
|
|
if (writes_i[write_idx].render_target == reads_j[read_idx].render_target)
|
|
depends = true;
|
|
}
|
|
}
|
|
PASS_DEPENDS(j, i) = depends;
|
|
if (depends)
|
|
++construct_passes[j].dependency_count;
|
|
}
|
|
}
|
|
|
|
/* Pass A can be executed concurrently with pass B if:
|
|
* 1. A and B don't write to the same render target AND
|
|
* 2. A's dependencies and B's dependencies have finished executing. */
|
|
|
|
/* We can have at most render_pass_count execution levels */
|
|
uint32_t *level_passes = RT_ARENA_PUSH_ARRAY_ZERO(arena, uint32_t, render_pass_count);
|
|
if (!level_passes) {
|
|
rtLog("GFX", "Failed to allocate a temporary array for constructing execution levels.");
|
|
goto out;
|
|
}
|
|
uint32_t unscheduled_passes = render_pass_count;
|
|
for (int level = 0; level < (int)render_pass_count; ++level) {
|
|
unsigned int level_pass_count = 0;
|
|
for (uint32_t i = 0; i < render_pass_count; ++i) {
|
|
if (construct_passes[i].execution_level == -1 &&
|
|
construct_passes[i].dependency_count == 0) {
|
|
|
|
/* Check that no writes conflict */
|
|
bool write_conflict = false;
|
|
const rt_render_target_write *writes_i =
|
|
rtResolveConstRelptr(&pass_info[i].write_render_targets);
|
|
for (unsigned int j = 0; j < level_pass_count; ++j) {
|
|
uint32_t pass_idx = level_passes[i];
|
|
const rt_render_target_write *pass_writes =
|
|
rtResolveConstRelptr(&pass_info[pass_idx].write_render_targets);
|
|
for (uint32_t k = 0; k < pass_info[i].write_render_target_count; ++k) {
|
|
for (uint32_t l = 0; l < pass_info[pass_idx].write_render_target_count;
|
|
++l) {
|
|
if (writes_i[k].render_target == pass_writes[l].render_target) {
|
|
write_conflict = true;
|
|
break;
|
|
}
|
|
}
|
|
if (write_conflict)
|
|
break;
|
|
}
|
|
if (write_conflict)
|
|
break;
|
|
}
|
|
if (!write_conflict) {
|
|
RT_ASSERT(level_pass_count < render_pass_count, "");
|
|
level_passes[level_pass_count++] = i;
|
|
construct_passes[i].execution_level = level;
|
|
}
|
|
}
|
|
}
|
|
if (level_pass_count == 0) {
|
|
rtLog("GFX", "Failed to compute a valid schedule for the provided framegraph.");
|
|
goto out;
|
|
}
|
|
/* level passes now contains the passes we can execute concurrently.
|
|
* Decrement dependency count for all passes that depend on a pass in this level */
|
|
|
|
for (uint32_t i = 0; i < level_pass_count; ++i) {
|
|
for (uint32_t j = 0; j < render_pass_count; ++j) {
|
|
if (PASS_DEPENDS(j, level_passes[i]))
|
|
--construct_passes[j].dependency_count;
|
|
}
|
|
}
|
|
|
|
unscheduled_passes -= level_pass_count;
|
|
if (unscheduled_passes == 0)
|
|
break;
|
|
}
|
|
RT_ASSERT(unscheduled_passes == 0, "Did not schedule all passes");
|
|
/* Construct passes now contains the "execution level" for each pass.
|
|
* We execute passes in that order, those with the same execution level can be executed
|
|
* concurrently. */
|
|
|
|
graph->pass_count = render_pass_count;
|
|
for (uint32_t i = 0; i < render_pass_count; ++i) {
|
|
graph->passes[i].execution_level = construct_passes[i].execution_level;
|
|
const rt_render_target_write *writes =
|
|
rtResolveConstRelptr(&pass_info[i].write_render_targets);
|
|
const rt_render_target_read *reads =
|
|
rtResolveConstRelptr(&pass_info[i].read_render_targets);
|
|
memcpy(graph->passes[i].writes,
|
|
writes,
|
|
pass_info[i].write_render_target_count * sizeof(rt_render_target_write));
|
|
memcpy(graph->passes[i].reads,
|
|
reads,
|
|
pass_info[i].read_render_target_count * sizeof(rt_render_target_read));
|
|
graph->passes[i].write_count = pass_info[i].write_render_target_count;
|
|
graph->passes[i].read_count = pass_info[i].read_render_target_count;
|
|
graph->passes[i].id = pass_info[i].id;
|
|
}
|
|
|
|
/* Sort by execution level */
|
|
qsort(graph->passes,
|
|
render_pass_count,
|
|
sizeof(rt_render_pass),
|
|
CompareRenderPassExecutionLevels);
|
|
result = true;
|
|
out:
|
|
return result;
|
|
#undef PASS_DEPENDS
|
|
}
|
|
|
|
static bool
|
|
CreateRenderTargets(rt_framegraph *graph, const rt_framegraph_info *info, rt_arena *arena) {
|
|
bool result = false;
|
|
|
|
/* TODO(Kevin): determine aliasing opportunities */
|
|
const rt_render_target_info *render_targets = rtResolveConstRelptr(&info->render_targets);
|
|
for (uint32_t i = 0; i < info->render_target_count; ++i) {
|
|
graph->render_targets[i].id = render_targets[i].id;
|
|
graph->render_targets[i].format = render_targets[i].format;
|
|
graph->render_targets[i].width = render_targets[i].width;
|
|
graph->render_targets[i].height = render_targets[i].height;
|
|
graph->render_targets[i].sample_count = render_targets[i].sample_count;
|
|
graph->render_targets[i].api_render_target =
|
|
g_renderer.CreateRenderTarget(&render_targets[i]);
|
|
if (!RT_IS_HANDLE_VALID(graph->render_targets[i].api_render_target)) {
|
|
rtReportError("GFX", "Failed to create render target %u of framegraph.", i);
|
|
for (uint32_t j = 0; j < i; ++j)
|
|
g_renderer.DestroyRenderTarget(graph->render_targets[j].api_render_target);
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
result = true;
|
|
out:
|
|
return result;
|
|
}
|
|
|
|
static bool ValidateInfo(const rt_framegraph_info *info) {
|
|
if (info->render_pass_count > RT_FRAMEGRAPH_MAX_PASSES) {
|
|
rtReportError("GFX",
|
|
"Framegraph has too many passes: %u (maximum allowed is %u)",
|
|
info->render_pass_count,
|
|
RT_FRAMEGRAPH_MAX_PASSES);
|
|
return false;
|
|
}
|
|
if (info->render_target_count > RT_FRAMEGRAPH_MAX_RENDER_TARGETS) {
|
|
rtReportError("GFX",
|
|
"Framegraph has too many render targets: %u (maximum allowed is %u)",
|
|
info->render_target_count,
|
|
RT_FRAMEGRAPH_MAX_RENDER_TARGETS);
|
|
return false;
|
|
}
|
|
|
|
const rt_render_target_info *render_targets = rtResolveConstRelptr(&info->render_targets);
|
|
for (uint32_t i = 0; i < info->render_target_count; ++i) {
|
|
if (render_targets[i].id == 0) {
|
|
rtReportError("GFX", "Framegraph render target %u has invalid id 0", i);
|
|
return false;
|
|
} else if ((render_targets[i].width == RT_RENDER_TARGET_SIZE_SWAPCHAIN ||
|
|
render_targets[i].height == RT_RENDER_TARGET_SIZE_SWAPCHAIN) &&
|
|
(render_targets[i].width != render_targets[i].height)) {
|
|
rtReportError("GFX",
|
|
"Framegraph render target %u: If width or height is set to "
|
|
"SWAPCHAIN, both values must be set to SWAPCHAIN.",
|
|
i);
|
|
return false;
|
|
} else if (render_targets[i].format >= RT_PIXEL_FORMAT_count) {
|
|
rtReportError("GFX",
|
|
"Framegraph render target %u format is outside the allowed range.",
|
|
i);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
const rt_render_pass_info *passes = rtResolveConstRelptr(&info->render_passes);
|
|
for (uint32_t i = 0; i < info->render_pass_count; ++i) {
|
|
if (passes[i].id == 0) {
|
|
rtReportError("GFX", "Framegraph pass %u has invalid id 0", i);
|
|
return false;
|
|
} else if (passes[i].read_render_target_count > RT_RENDERPASS_MAX_READS) {
|
|
rtReportError(
|
|
"GFX",
|
|
"Framegraph pass %u reads too many rendertargets: %u (maximum allowed is %u)",
|
|
i,
|
|
passes[i].read_render_target_count,
|
|
RT_RENDERPASS_MAX_READS);
|
|
return false;
|
|
} else if (passes[i].write_render_target_count > RT_RENDERPASS_MAX_WRITES) {
|
|
rtReportError(
|
|
"GFX",
|
|
"Framegraph pass %u writes too many rendertargets: %u (maximum allowed is %u)",
|
|
i,
|
|
passes[i].write_render_target_count,
|
|
RT_RENDERPASS_MAX_WRITES);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
RT_DLLEXPORT rt_framegraph *rtCreateFramegraph(const rt_framegraph_info *info) {
|
|
if (!ValidateInfo(info)) {
|
|
return NULL;
|
|
}
|
|
|
|
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
|
if (!temp.arena) {
|
|
rtReportError("GFX", "Failed to acquire a temporary arena for constructing a framegraph");
|
|
return NULL;
|
|
}
|
|
|
|
rt_framegraph *graph = NULL;
|
|
/* Acquire a unused framegraph */
|
|
rtLockMutex(_free_list_lock);
|
|
graph = _first_free;
|
|
if (graph)
|
|
_first_free = graph->next_free;
|
|
rtUnlockMutex(_free_list_lock);
|
|
if (!graph)
|
|
goto out;
|
|
memset(graph, 0, sizeof(*graph));
|
|
|
|
if (!CreateRenderPasses(graph, info, temp.arena)) {
|
|
ReturnFrameGraph(graph);
|
|
graph = NULL;
|
|
goto out;
|
|
}
|
|
|
|
if (!CreateRenderTargets(graph, info, temp.arena)) {
|
|
ReturnFrameGraph(graph);
|
|
graph = NULL;
|
|
goto out;
|
|
}
|
|
|
|
out:
|
|
rtReturnTemporaryArena(temp);
|
|
return graph;
|
|
}
|
|
|
|
RT_DLLEXPORT void rtDestroyFramegraph(rt_framegraph *framegraph) {
|
|
ReturnFrameGraph(framegraph);
|
|
}
|
|
|
|
RT_DLLEXPORT void rtBindRenderPass(rt_framegraph *framegraph,
|
|
rt_render_pass_id id,
|
|
const rt_render_pass_bind_fns *bind_fns) {
|
|
for (uint32_t i = 0; i < framegraph->pass_count; ++i) {
|
|
if (framegraph->passes[i].id == id) {
|
|
if (framegraph->passes[i].bound_fns.Execute)
|
|
rtLog("GFX", "Rebound pass %x to new functions", id);
|
|
framegraph->passes[i].bound_fns = *bind_fns;
|
|
return;
|
|
}
|
|
}
|
|
rtLog("GFX", "Tried to bind functions to unknown render pass %x", id);
|
|
}
|
|
|
|
RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph) {
|
|
int execution_level = framegraph->passes[0].execution_level;
|
|
uint32_t level_start = 0;
|
|
|
|
for (uint32_t i = 0; i <= framegraph->pass_count && level_start < framegraph->pass_count; ++i) {
|
|
if ((i == framegraph->pass_count) ||
|
|
(framegraph->passes[i].execution_level > execution_level)) {
|
|
/* Dispatch all passes in the current execution level */
|
|
for (uint32_t pass_idx = level_start; pass_idx < i; ++pass_idx) {
|
|
bool pass_bound = framegraph->passes[pass_idx].bound_fns.Prepare != NULL &&
|
|
framegraph->passes[pass_idx].bound_fns.Execute != NULL &&
|
|
framegraph->passes[pass_idx].bound_fns.Finalize != NULL;
|
|
if (!pass_bound) {
|
|
rtLog("GFX",
|
|
"Framegraph pass %u (%x) is not bound to any function.",
|
|
pass_idx,
|
|
framegraph->passes[pass_idx].id);
|
|
continue;
|
|
}
|
|
rt_render_pass_id id = framegraph->passes[pass_idx].id;
|
|
const rt_render_target_write *writes = framegraph->passes[pass_idx].writes;
|
|
const rt_render_target_read *reads = framegraph->passes[pass_idx].reads;
|
|
uint32_t write_count = framegraph->passes[pass_idx].write_count;
|
|
uint32_t read_count = framegraph->passes[pass_idx].read_count;
|
|
|
|
/* TODO(Kevin): Every one of these should be a job-dispatch*/
|
|
|
|
framegraph->passes[pass_idx].bound_fns.Prepare(id,
|
|
writes,
|
|
write_count,
|
|
reads,
|
|
read_count);
|
|
framegraph->passes[pass_idx].bound_fns.Execute(id,
|
|
writes,
|
|
write_count,
|
|
reads,
|
|
read_count);
|
|
framegraph->passes[pass_idx].bound_fns.Finalize(id,
|
|
writes,
|
|
write_count,
|
|
reads,
|
|
read_count);
|
|
}
|
|
|
|
/* Start next level */
|
|
level_start = i;
|
|
if (i < framegraph->pass_count)
|
|
execution_level = framegraph->passes[i].execution_level;
|
|
}
|
|
}
|
|
}
|
|
|
|
RT_DLLEXPORT rt_render_target_id rtCalculateRenderTargetID(const char *name, size_t len) {
|
|
rt_render_target_id id = rtHashBytes32(name, len);
|
|
if (id == 0)
|
|
id = ~id;
|
|
return id;
|
|
}
|
|
|
|
RT_DLLEXPORT rt_render_pass_id rtCalculateRenderPassID(const char *name, size_t len) {
|
|
rt_render_pass_id id = rtHashBytes32(name, len);
|
|
if (id == 0)
|
|
id = ~id;
|
|
return id;
|
|
} |