Render Graph prep
This commit is contained in:
parent
3bc192b281
commit
c7e5bb8a31
@ -155,6 +155,7 @@ typedef struct {
|
|||||||
rt_relptr render_passes;
|
rt_relptr render_passes;
|
||||||
uint32_t render_target_count;
|
uint32_t render_target_count;
|
||||||
uint32_t render_pass_count;
|
uint32_t render_pass_count;
|
||||||
|
rt_render_target_id backbuffer;
|
||||||
rt_relptr names;
|
rt_relptr names;
|
||||||
uint32_t names_size;
|
uint32_t names_size;
|
||||||
} rt_framegraph_info;
|
} rt_framegraph_info;
|
||||||
|
@ -55,6 +55,10 @@ extern rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(vo
|
|||||||
extern rt_result
|
extern rt_result
|
||||||
RT_RENDERER_API_FN(CreateBuffers)(uint32_t, const rt_buffer_info *, rt_buffer_handle *);
|
RT_RENDERER_API_FN(CreateBuffers)(uint32_t, const rt_buffer_info *, rt_buffer_handle *);
|
||||||
extern void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t, rt_buffer_handle *);
|
extern void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t, rt_buffer_handle *);
|
||||||
|
|
||||||
|
extern rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void);
|
||||||
|
extern void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *);
|
||||||
|
|
||||||
extern void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle,
|
extern void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle,
|
||||||
const rt_cmd_begin_pass_info *);
|
const rt_cmd_begin_pass_info *);
|
||||||
extern void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle);
|
extern void RT_RENDERER_API_FN(CmdEndPass)(rt_command_buffer_handle);
|
||||||
@ -109,6 +113,8 @@ static bool LoadRenderer(void) {
|
|||||||
RETRIEVE_SYMBOL(GetRenderFinishedSemaphore, rt_get_render_finished_semaphore_fn);
|
RETRIEVE_SYMBOL(GetRenderFinishedSemaphore, rt_get_render_finished_semaphore_fn);
|
||||||
RETRIEVE_SYMBOL(CreateBuffers, rt_create_buffers_fn);
|
RETRIEVE_SYMBOL(CreateBuffers, rt_create_buffers_fn);
|
||||||
RETRIEVE_SYMBOL(DestroyBuffers, rt_destroy_buffers_fn);
|
RETRIEVE_SYMBOL(DestroyBuffers, rt_destroy_buffers_fn);
|
||||||
|
RETRIEVE_SYMBOL(CreateRenderGrapbuilder, rt_create_render_graph_builder_fn);
|
||||||
|
RETRIEVE_SYMBOL(DestroyRenderGraphBuilder, rt_destroy_render_graph_builder_fn);
|
||||||
RETRIEVE_SYMBOL(CmdBeginPass, rt_cmd_begin_pass_fn);
|
RETRIEVE_SYMBOL(CmdBeginPass, rt_cmd_begin_pass_fn);
|
||||||
RETRIEVE_SYMBOL(CmdEndPass, rt_cmd_end_pass_fn);
|
RETRIEVE_SYMBOL(CmdEndPass, rt_cmd_end_pass_fn);
|
||||||
RETRIEVE_SYMBOL(CmdTransitionRenderTarget, rt_cmd_transition_render_target_fn);
|
RETRIEVE_SYMBOL(CmdTransitionRenderTarget, rt_cmd_transition_render_target_fn);
|
||||||
@ -142,6 +148,8 @@ static bool LoadRenderer(void) {
|
|||||||
g_renderer.GetRenderFinishedSemaphore = &rtRenGetRenderFinishedSemaphore;
|
g_renderer.GetRenderFinishedSemaphore = &rtRenGetRenderFinishedSemaphore;
|
||||||
g_renderer.CreateBuffers = &rtRenCreateBuffers;
|
g_renderer.CreateBuffers = &rtRenCreateBuffers;
|
||||||
g_renderer.DestroyBuffers = &rtRenDestroyBuffers;
|
g_renderer.DestroyBuffers = &rtRenDestroyBuffers;
|
||||||
|
g_renderer.CreateRenderGraphBuilder = &rtRenCreateRenderGraphBuilder;
|
||||||
|
g_renderer.DestroyRenderGraphBuilder = &rtRenDestroyRenderGraphBuilder;
|
||||||
g_renderer.CmdBeginPass = &rtRenCmdBeginPass;
|
g_renderer.CmdBeginPass = &rtRenCmdBeginPass;
|
||||||
g_renderer.CmdEndPass = &rtRenCmdEndPass;
|
g_renderer.CmdEndPass = &rtRenCmdEndPass;
|
||||||
g_renderer.CmdTransitionRenderTarget = &rtRenCmdTransitionRenderTarget;
|
g_renderer.CmdTransitionRenderTarget = &rtRenCmdTransitionRenderTarget;
|
||||||
|
@ -207,6 +207,49 @@ typedef enum {
|
|||||||
|
|
||||||
typedef struct rt_pipeline_info_s rt_pipeline_info;
|
typedef struct rt_pipeline_info_s rt_pipeline_info;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const char *name;
|
||||||
|
rt_pixel_format format;
|
||||||
|
unsigned int width;
|
||||||
|
unsigned int height;
|
||||||
|
|
||||||
|
unsigned int samples;
|
||||||
|
unsigned int layers;
|
||||||
|
} rt_attachment_info;
|
||||||
|
|
||||||
|
enum {
|
||||||
|
/* Bit 0 contains the type: 0 -> graphics, 1 -> compute */
|
||||||
|
RT_PASS_FLAG_GRAPHICS = 0x0000,
|
||||||
|
RT_PASS_FLAG_COMPUTE = 0x0001,
|
||||||
|
RT_PASS_FLAG_TYPE_MASK = RT_PASS_FLAG_COMPUTE | RT_PASS_FLAG_GRAPHICS,
|
||||||
|
|
||||||
|
/* Always excecute the pass, even if no objects will be rendered. */
|
||||||
|
RT_PASS_FLAG_EXECUTE_ALWAYS = 0x0002,
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const char *name;
|
||||||
|
uint32_t flags;
|
||||||
|
} rt_pass_info;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
void *obj;
|
||||||
|
|
||||||
|
void (*AddRenderTarget)(void *obj, const rt_attachment_info *info);
|
||||||
|
|
||||||
|
void (*SetBackbuffer)(void *obj, const char *rt_name);
|
||||||
|
|
||||||
|
void (*AddRenderPass)(void *obj, const rt_pass_info *info);
|
||||||
|
void (*AddColorOutput)(void *obj, const char *pass_name, const char *rt_name);
|
||||||
|
void (*AddSampledInput)(void *obj, const char *pass_name, const char *rt_name);
|
||||||
|
void (*SetDepthStencilAttachment)(void *obj, const char *pass_name, const char *rt_name);
|
||||||
|
|
||||||
|
|
||||||
|
rt_result (*Build)(void *obj);
|
||||||
|
} rt_render_graph_builder;
|
||||||
|
|
||||||
|
|
||||||
typedef void rt_register_renderer_cvars_fn(void);
|
typedef void rt_register_renderer_cvars_fn(void);
|
||||||
typedef rt_result rt_init_renderer_fn(const rt_renderer_init_info *info);
|
typedef rt_result rt_init_renderer_fn(const rt_renderer_init_info *info);
|
||||||
typedef void rt_shutdown_renderer_fn(void);
|
typedef void rt_shutdown_renderer_fn(void);
|
||||||
@ -234,6 +277,9 @@ typedef rt_result
|
|||||||
rt_create_buffers_fn(uint32_t count, const rt_buffer_info *info, rt_buffer_handle *p_buffers);
|
rt_create_buffers_fn(uint32_t count, const rt_buffer_info *info, rt_buffer_handle *p_buffers);
|
||||||
typedef void rt_destroy_buffers_fn(uint32_t count, rt_buffer_handle *buffers);
|
typedef void rt_destroy_buffers_fn(uint32_t count, rt_buffer_handle *buffers);
|
||||||
|
|
||||||
|
typedef rt_render_graph_builder rt_create_render_graph_builder_fn(void);
|
||||||
|
typedef void rt_destroy_render_graph_builder_fn(rt_render_graph_builder *builder);
|
||||||
|
|
||||||
typedef void rt_cmd_begin_pass_fn(rt_command_buffer_handle cmdbuf,
|
typedef void rt_cmd_begin_pass_fn(rt_command_buffer_handle cmdbuf,
|
||||||
const rt_cmd_begin_pass_info *info);
|
const rt_cmd_begin_pass_info *info);
|
||||||
typedef void rt_cmd_end_pass_fn(rt_command_buffer_handle cmdbuf);
|
typedef void rt_cmd_end_pass_fn(rt_command_buffer_handle cmdbuf);
|
||||||
@ -264,6 +310,10 @@ typedef struct {
|
|||||||
rt_get_render_finished_semaphore_fn *GetRenderFinishedSemaphore;
|
rt_get_render_finished_semaphore_fn *GetRenderFinishedSemaphore;
|
||||||
rt_create_buffers_fn *CreateBuffers;
|
rt_create_buffers_fn *CreateBuffers;
|
||||||
rt_destroy_buffers_fn *DestroyBuffers;
|
rt_destroy_buffers_fn *DestroyBuffers;
|
||||||
|
|
||||||
|
/*render graph functions*/
|
||||||
|
rt_create_render_graph_builder_fn *CreateRenderGraphBuilder;
|
||||||
|
rt_destroy_render_graph_builder_fn *DestroyRenderGraphBuilder;
|
||||||
|
|
||||||
/* Command Buffer Functions */
|
/* Command Buffer Functions */
|
||||||
rt_cmd_begin_pass_fn *CmdBeginPass;
|
rt_cmd_begin_pass_fn *CmdBeginPass;
|
||||||
|
@ -25,6 +25,7 @@ if vk_dep.found()
|
|||||||
'helper.c',
|
'helper.c',
|
||||||
'init.c',
|
'init.c',
|
||||||
'pipelines.c',
|
'pipelines.c',
|
||||||
|
'render_graph.c',
|
||||||
'render_targets.c',
|
'render_targets.c',
|
||||||
'swapchain.c',
|
'swapchain.c',
|
||||||
'transfers.c',
|
'transfers.c',
|
||||||
|
504
src/renderer/vk/render_graph.c
Normal file
504
src/renderer/vk/render_graph.c
Normal file
@ -0,0 +1,504 @@
|
|||||||
|
#include "gpu.h"
|
||||||
|
|
||||||
|
#include "gfx/renderer_api.h"
|
||||||
|
#include "runtime/mem_arena.h"
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
/* ****************************************************************************
|
||||||
|
*
|
||||||
|
* BUILDER CODE
|
||||||
|
*
|
||||||
|
* ****************************************************************************/
|
||||||
|
|
||||||
|
#define MAX_COLOR_ATTACHMENTS_PER_PASS 8
|
||||||
|
#define MAX_SAMPLED_INPUTS_PER_PASS 8
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const char *name;
|
||||||
|
rt_pixel_format format;
|
||||||
|
unsigned int width;
|
||||||
|
unsigned int height;
|
||||||
|
|
||||||
|
unsigned int samples;
|
||||||
|
unsigned int layers;
|
||||||
|
|
||||||
|
uint32_t first_usage;
|
||||||
|
uint32_t last_usage;
|
||||||
|
} rt_render_target_build_info;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const char *name;
|
||||||
|
uint32_t flags;
|
||||||
|
|
||||||
|
uint32_t color_attachments[MAX_COLOR_ATTACHMENTS_PER_PASS];
|
||||||
|
uint32_t color_attachment_count;
|
||||||
|
|
||||||
|
uint32_t sampled_inputs[MAX_SAMPLED_INPUTS_PER_PASS];
|
||||||
|
uint32_t sampled_input_count;
|
||||||
|
|
||||||
|
uint32_t depth_stencil_attachment;
|
||||||
|
|
||||||
|
uint32_t *dependencies;
|
||||||
|
uint32_t dependency_count;
|
||||||
|
} rt_pass_build_info;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
rt_arena arena;
|
||||||
|
|
||||||
|
rt_render_target_build_info *render_targets;
|
||||||
|
uint32_t render_target_count;
|
||||||
|
uint32_t render_target_capacity;
|
||||||
|
|
||||||
|
rt_pass_build_info *passes;
|
||||||
|
uint32_t pass_count;
|
||||||
|
uint32_t pass_capacity;
|
||||||
|
|
||||||
|
uint32_t backbuffer;
|
||||||
|
} rt_render_graph_builder_obj;
|
||||||
|
|
||||||
|
static void AddRenderTarget(void *_obj, const rt_attachment_info *info) {
|
||||||
|
rt_render_graph_builder_obj *obj = _obj;
|
||||||
|
|
||||||
|
if (obj->render_target_count == obj->render_target_capacity) {
|
||||||
|
uint32_t new_cap = obj->render_target_capacity ? 2 * obj->render_target_capacity : 32;
|
||||||
|
rt_render_target_build_info *tmp =
|
||||||
|
RT_ARENA_PUSH_ARRAY(&obj->arena, rt_render_target_build_info, new_cap);
|
||||||
|
if (obj->render_target_capacity)
|
||||||
|
memcpy(tmp,
|
||||||
|
obj->render_targets,
|
||||||
|
sizeof(rt_render_target_build_info) * obj->render_target_capacity);
|
||||||
|
obj->render_targets = tmp;
|
||||||
|
obj->render_target_capacity = new_cap;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1);
|
||||||
|
strcpy(name, info->name);
|
||||||
|
obj->render_targets[obj->render_target_count].name = name;
|
||||||
|
obj->render_targets[obj->render_target_count].format = info->format;
|
||||||
|
obj->render_targets[obj->render_target_count].width = info->width;
|
||||||
|
obj->render_targets[obj->render_target_count].height = info->height;
|
||||||
|
obj->render_targets[obj->render_target_count].samples = info->samples;
|
||||||
|
obj->render_targets[obj->render_target_count].layers = info->layers;
|
||||||
|
obj->render_targets[obj->render_target_count].first_usage = 0;
|
||||||
|
obj->render_targets[obj->render_target_count].last_usage = 0;
|
||||||
|
++obj->render_target_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetBackbuffer(void *_obj, const char *rt_name) {
|
||||||
|
rt_render_graph_builder_obj *obj = _obj;
|
||||||
|
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
||||||
|
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
||||||
|
obj->backbuffer = i;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rtLog("vk", "Tried to set backbuffer to unknown render target %s", rt_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AddRenderPass(void *_obj, const rt_pass_info *info) {
|
||||||
|
rt_render_graph_builder_obj *obj = _obj;
|
||||||
|
|
||||||
|
if (obj->pass_count == obj->pass_capacity) {
|
||||||
|
uint32_t new_cap = obj->pass_capacity ? 2 * obj->pass_capacity : 32;
|
||||||
|
rt_pass_build_info *tmp = RT_ARENA_PUSH_ARRAY(&obj->arena, rt_pass_build_info, new_cap);
|
||||||
|
if (obj->pass_capacity)
|
||||||
|
memcpy(tmp, obj->passes, sizeof(rt_pass_build_info) * obj->pass_capacity);
|
||||||
|
obj->passes = tmp;
|
||||||
|
obj->pass_capacity = new_cap;
|
||||||
|
}
|
||||||
|
|
||||||
|
char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1);
|
||||||
|
strcpy(name, info->name);
|
||||||
|
obj->passes[obj->pass_count].name = name;
|
||||||
|
obj->passes[obj->pass_count].flags = info->flags;
|
||||||
|
obj->passes[obj->pass_count].color_attachment_count = 0;
|
||||||
|
obj->passes[obj->pass_count].sampled_input_count = 0;
|
||||||
|
obj->passes[obj->pass_count].depth_stencil_attachment = UINT_MAX;
|
||||||
|
obj->passes[obj->pass_count].dependencies = NULL;
|
||||||
|
obj->passes[obj->pass_count].dependency_count = 0;
|
||||||
|
|
||||||
|
++obj->pass_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AddColorOutput(void *_obj, const char *pass_name, const char *rt_name) {
|
||||||
|
uint32_t rt_index = UINT_MAX;
|
||||||
|
|
||||||
|
rt_render_graph_builder_obj *obj = _obj;
|
||||||
|
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
||||||
|
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
||||||
|
rt_index = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (rt_index == UINT_MAX) {
|
||||||
|
rtLog("vk",
|
||||||
|
"Tried to add unknown render target %s as color output to %s",
|
||||||
|
rt_name,
|
||||||
|
pass_name);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||||
|
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
||||||
|
if (obj->passes[i].color_attachment_count == MAX_COLOR_ATTACHMENTS_PER_PASS) {
|
||||||
|
rtLog("vk", "Too many color attachments in pass %s", pass_name);
|
||||||
|
}
|
||||||
|
obj->passes[i].color_attachments[obj->passes[i].color_attachment_count++] = rt_index;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rtLog("vk",
|
||||||
|
"Tried to add render target %s as color output to unknown render target %s",
|
||||||
|
rt_name,
|
||||||
|
pass_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void AddSampledInput(void *_obj, const char *pass_name, const char *rt_name) {
|
||||||
|
uint32_t rt_index = UINT_MAX;
|
||||||
|
|
||||||
|
rt_render_graph_builder_obj *obj = _obj;
|
||||||
|
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
||||||
|
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
||||||
|
rt_index = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (rt_index == UINT_MAX) {
|
||||||
|
rtLog("vk",
|
||||||
|
"Tried to add unknown render target %s as color output to %s",
|
||||||
|
rt_name,
|
||||||
|
pass_name);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||||
|
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
||||||
|
if (obj->passes[i].sampled_input_count == MAX_SAMPLED_INPUTS_PER_PASS) {
|
||||||
|
rtLog("vk", "Too many sampled inputs in pass %s", pass_name);
|
||||||
|
}
|
||||||
|
obj->passes[i].sampled_inputs[obj->passes[i].sampled_input_count++] = rt_index;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rtLog("vk",
|
||||||
|
"Tried to add render target %s as sampled input to unknown render target %s",
|
||||||
|
rt_name,
|
||||||
|
pass_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetDepthStencilAttachment(void *_obj, const char *pass_name, const char *rt_name) {
|
||||||
|
uint32_t rt_index = UINT_MAX;
|
||||||
|
|
||||||
|
rt_render_graph_builder_obj *obj = _obj;
|
||||||
|
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
||||||
|
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
||||||
|
rt_index = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (rt_index == UINT_MAX) {
|
||||||
|
rtLog("vk",
|
||||||
|
"Tried to add unknown render target %s as depth stencil attachment to %s",
|
||||||
|
rt_name,
|
||||||
|
pass_name);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||||
|
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
||||||
|
obj->passes[i].depth_stencil_attachment = rt_index;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
rtLog("vk",
|
||||||
|
"Tried to add render target %s as depth stencil attachment to unknown render target %s",
|
||||||
|
rt_name,
|
||||||
|
pass_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
uint32_t added;
|
||||||
|
uint32_t moved;
|
||||||
|
} rt_find_writers_result;
|
||||||
|
|
||||||
|
static rt_find_writers_result FindWriters(rt_render_graph_builder_obj *obj,
|
||||||
|
uint32_t rt_index,
|
||||||
|
uint32_t append_at,
|
||||||
|
uint32_t *p_passes) {
|
||||||
|
rt_find_writers_result res = {0, 0};
|
||||||
|
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||||
|
bool writes_rt = false;
|
||||||
|
if (obj->passes[i].depth_stencil_attachment == rt_index) {
|
||||||
|
writes_rt = true;
|
||||||
|
} else {
|
||||||
|
for (uint32_t j = 0; j < obj->passes[i].color_attachment_count; ++j) {
|
||||||
|
if (obj->passes[i].color_attachments[j] == rt_index) {
|
||||||
|
writes_rt = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!writes_rt)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
uint32_t lower_index = UINT32_MAX;
|
||||||
|
for (uint32_t j = 0; j < append_at; ++j) {
|
||||||
|
if (p_passes[j] == i) {
|
||||||
|
lower_index = j;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lower_index == UINT32_MAX) {
|
||||||
|
p_passes[append_at++] = i;
|
||||||
|
res.added++;
|
||||||
|
} else {
|
||||||
|
memmove(&p_passes[lower_index],
|
||||||
|
&p_passes[lower_index + 1],
|
||||||
|
(append_at - lower_index - 1) * sizeof(uint32_t));
|
||||||
|
p_passes[append_at - 1] = i;
|
||||||
|
res.moved++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t CreateNaiveOrderRec(rt_render_graph_builder_obj *obj,
|
||||||
|
uint32_t search_rt,
|
||||||
|
uint32_t append_at,
|
||||||
|
uint32_t *p_order) {
|
||||||
|
rt_find_writers_result writers = FindWriters(obj, search_rt, append_at, p_order);
|
||||||
|
uint32_t new_append = append_at + writers.added;
|
||||||
|
for (uint32_t i = 0; i < writers.moved; ++i) {
|
||||||
|
uint32_t pass_idx = p_order[append_at - writers.moved + i];
|
||||||
|
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||||
|
for (uint32_t j = 0; j < pass->sampled_input_count; ++j) {
|
||||||
|
new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (uint32_t i = 0; i < writers.added; ++i) {
|
||||||
|
uint32_t pass_idx = p_order[append_at + i];
|
||||||
|
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||||
|
for (uint32_t j = 0; j < pass->sampled_input_count; ++j) {
|
||||||
|
new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new_append;
|
||||||
|
}
|
||||||
|
|
||||||
|
static rt_result
|
||||||
|
CreateNaiveOrder(rt_render_graph_builder_obj *obj, uint32_t **p_order, uint32_t *p_count) {
|
||||||
|
uint32_t *order = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, obj->pass_count);
|
||||||
|
if (!order)
|
||||||
|
return RT_OUT_OF_MEMORY;
|
||||||
|
uint32_t count = CreateNaiveOrderRec(obj, obj->backbuffer, 0, order);
|
||||||
|
|
||||||
|
/* Now the pass writing the backbuffer is first, we need to revert the order */
|
||||||
|
for (uint32_t i = 0; i < count / 2; ++i) {
|
||||||
|
uint32_t t = order[i];
|
||||||
|
order[i] = order[count - i - 1];
|
||||||
|
order[count - i - 1] = t;
|
||||||
|
}
|
||||||
|
*p_order = order;
|
||||||
|
*p_count = count;
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t *
|
||||||
|
ReorderPasses(rt_render_graph_builder_obj *obj, uint32_t pass_count, uint32_t *naive_order) {
|
||||||
|
/* Our goal is to calculate a schedule that:
|
||||||
|
* A) Does not break the dependency chain
|
||||||
|
* B) Has the maximum amount of overlap, i.e. keeps the GPU busy.
|
||||||
|
* This means that if pass A depends on pass B, we want to have as much passes inbetween as
|
||||||
|
* possible, to reduce the likelyhood of stalls caused by B waiting for A to finish. */
|
||||||
|
uint32_t *schedule = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, pass_count);
|
||||||
|
if (!schedule)
|
||||||
|
return NULL;
|
||||||
|
uint32_t scheduled_count = 0;
|
||||||
|
|
||||||
|
while (scheduled_count < pass_count) {
|
||||||
|
/* The number of passes remaining in naive_order */
|
||||||
|
uint32_t unscheduled_count = pass_count - scheduled_count;
|
||||||
|
|
||||||
|
/* It is always valid to use the front */
|
||||||
|
uint32_t selected_idx = 0;
|
||||||
|
uint32_t selected_score = 0;
|
||||||
|
for (uint32_t i = 0; i < unscheduled_count; ++i) {
|
||||||
|
/* Check if any dependency is not scheduled yet */
|
||||||
|
uint32_t pass_idx = naive_order[i];
|
||||||
|
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||||
|
uint32_t score = 0;
|
||||||
|
bool is_valid = true;
|
||||||
|
|
||||||
|
if (pass->dependency_count) {
|
||||||
|
for (uint32_t j = 0; j < unscheduled_count; ++j) {
|
||||||
|
uint32_t pass2_idx = naive_order[j];
|
||||||
|
for (uint32_t k = 0; k < pass->dependency_count; ++k) {
|
||||||
|
if (pass->dependencies[k] == pass2_idx) {
|
||||||
|
is_valid = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!is_valid)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (!is_valid)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (uint32_t j = 0; j < pass->dependency_count; ++j) {
|
||||||
|
for (uint32_t k = 0; k < scheduled_count; ++k) {
|
||||||
|
if (schedule[k] == pass->dependencies[j]) {
|
||||||
|
score += scheduled_count - k;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
score = UINT32_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (score > selected_score) {
|
||||||
|
selected_score = score;
|
||||||
|
selected_idx = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
schedule[scheduled_count++] = naive_order[selected_idx];
|
||||||
|
memmove(&naive_order[selected_idx],
|
||||||
|
&naive_order[selected_idx + 1],
|
||||||
|
(unscheduled_count - selected_idx - 1) * sizeof(uint32_t));
|
||||||
|
}
|
||||||
|
return schedule;
|
||||||
|
}
|
||||||
|
|
||||||
|
static rt_result DeterminePassDependencies(rt_render_graph_builder_obj *obj) {
|
||||||
|
/* NOTE(Kevin): This has a lot of overlap with FindWriters, so maybe its possible to combine
|
||||||
|
* the two */
|
||||||
|
for (uint32_t pass_idx = 0; pass_idx < obj->pass_count; ++pass_idx) {
|
||||||
|
rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||||
|
uint32_t dependency_capacity = pass->sampled_input_count;
|
||||||
|
if (dependency_capacity) {
|
||||||
|
pass->dependencies = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity);
|
||||||
|
if (!pass->dependencies)
|
||||||
|
return RT_OUT_OF_MEMORY;
|
||||||
|
}
|
||||||
|
for (uint32_t input_idx = 0; input_idx < pass->sampled_input_count; ++input_idx) {
|
||||||
|
uint32_t rt_index = pass->sampled_inputs[input_idx];
|
||||||
|
for (uint32_t candidate_idx = 0; candidate_idx < obj->pass_count; ++candidate_idx) {
|
||||||
|
const rt_pass_build_info *candidate = &obj->passes[candidate_idx];
|
||||||
|
bool is_dependency = false;
|
||||||
|
if (candidate->depth_stencil_attachment == rt_index)
|
||||||
|
is_dependency = true;
|
||||||
|
for (uint32_t j = 0; j < candidate->color_attachment_count; ++j) {
|
||||||
|
if (candidate->color_attachments[j] == rt_index)
|
||||||
|
is_dependency = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!is_dependency)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (pass->dependency_count == dependency_capacity) {
|
||||||
|
/* The dependencies are still on top of the arena, so we can just grow that
|
||||||
|
* array */
|
||||||
|
if (!RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity))
|
||||||
|
return RT_OUT_OF_MEMORY;
|
||||||
|
dependency_capacity *= 2;
|
||||||
|
}
|
||||||
|
pass->dependencies[pass->dependency_count++] = candidate_idx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void DetermineRenderTargetUsage(rt_render_graph_builder_obj *obj,
|
||||||
|
uint32_t pass_count,
|
||||||
|
const uint32_t *schedule) {
|
||||||
|
for (uint32_t rt_idx = 0; rt_idx < obj->render_target_count; ++rt_idx) {
|
||||||
|
rt_render_target_build_info *rt = &obj->render_targets[rt_idx];
|
||||||
|
rt->first_usage = UINT32_MAX;
|
||||||
|
rt->last_usage = 0;
|
||||||
|
for (uint32_t sched_idx = 0; sched_idx < pass_count; ++sched_idx) {
|
||||||
|
uint32_t pass_idx = schedule[sched_idx];
|
||||||
|
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||||
|
bool usage = pass->depth_stencil_attachment == rt_idx;
|
||||||
|
if (!usage) {
|
||||||
|
for (unsigned int i = 0; i < pass->color_attachment_count; ++i) {
|
||||||
|
if (pass->color_attachments[i] == rt_idx)
|
||||||
|
usage = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!usage) {
|
||||||
|
for (unsigned int i = 0; i < pass->sampled_input_count; ++i) {
|
||||||
|
if (pass->sampled_inputs[i] == rt_idx)
|
||||||
|
usage = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (usage) {
|
||||||
|
if (sched_idx < rt->first_usage)
|
||||||
|
rt->first_usage = sched_idx;
|
||||||
|
if (sched_idx > rt->last_usage)
|
||||||
|
rt->last_usage = sched_idx;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static rt_result Build(void *_obj) {
|
||||||
|
rt_render_graph_builder_obj *obj = _obj;
|
||||||
|
uint32_t *naive_order;
|
||||||
|
uint32_t pass_count;
|
||||||
|
rt_result res = CreateNaiveOrder(obj, &naive_order, &pass_count);
|
||||||
|
if (res != RT_SUCCESS)
|
||||||
|
return res;
|
||||||
|
|
||||||
|
res = DeterminePassDependencies(obj);
|
||||||
|
if (res != RT_SUCCESS)
|
||||||
|
return res;
|
||||||
|
|
||||||
|
uint32_t *optimized_order = ReorderPasses(obj, pass_count, naive_order);
|
||||||
|
if (!optimized_order)
|
||||||
|
return RT_OUT_OF_MEMORY;
|
||||||
|
|
||||||
|
/* Next steps:
|
||||||
|
* Determine first & last usage for every render-target
|
||||||
|
* For every pair of render-targets, note if they could be merged:
|
||||||
|
- Identical format
|
||||||
|
- Non-overlapping usage */
|
||||||
|
DetermineRenderTargetUsage(obj, pass_count, optimized_order);
|
||||||
|
|
||||||
|
|
||||||
|
return RT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
|
||||||
|
// TODO: This is totally temporary. We do NOT want malloc anywhere in non-init functions
|
||||||
|
rt_render_graph_builder_obj *obj = malloc(sizeof(*obj));
|
||||||
|
RT_ASSERT(obj, "Failed to allocate the builder object.");
|
||||||
|
memset(obj, 0, sizeof(*obj));
|
||||||
|
rt_create_arena_result arena_res = rtCreateArena(NULL, RT_MB(16));
|
||||||
|
RT_ASSERT(arena_res.ok, "");
|
||||||
|
obj->arena = arena_res.arena;
|
||||||
|
|
||||||
|
return (rt_render_graph_builder){
|
||||||
|
.obj = obj,
|
||||||
|
.AddRenderTarget = AddRenderTarget,
|
||||||
|
.SetBackbuffer = SetBackbuffer,
|
||||||
|
.AddRenderPass = AddRenderPass,
|
||||||
|
.AddColorOutput = AddColorOutput,
|
||||||
|
.AddSampledInput = AddSampledInput,
|
||||||
|
.SetDepthStencilAttachment = SetDepthStencilAttachment,
|
||||||
|
.Build = Build,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) {
|
||||||
|
rt_render_graph_builder_obj *obj = builder->obj;
|
||||||
|
rtReleaseArena(&obj->arena);
|
||||||
|
free(obj);
|
||||||
|
memset(builder, 0, sizeof(*builder));
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user