Executing the simplest possible render graph
This commit is contained in:
parent
df50759303
commit
388b747a04
@ -64,11 +64,12 @@ static rt_asset_db _asset_db;
|
||||
static rt_processing_queue _processing_queue;
|
||||
|
||||
extern RT_ASSET_PROCESSOR_FN(EffectProcessor);
|
||||
extern RT_ASSET_PROCESSOR_FN(FramegraphProcessor);
|
||||
/* extern RT_ASSET_PROCESSOR_FN(FramegraphProcessor);*/
|
||||
|
||||
static rt_asset_processor _processors[] = {
|
||||
{ .file_ext = ".effect", .proc = EffectProcessor},
|
||||
{.file_ext = ".framegraph", .proc = FramegraphProcessor},
|
||||
{.file_ext = ".effect", .proc = EffectProcessor},
|
||||
/*
|
||||
{.file_ext = ".framegraph", .proc = FramegraphProcessor},*/
|
||||
};
|
||||
|
||||
static void ProcessorThreadEntry(void *);
|
||||
|
@ -36,7 +36,7 @@ typedef struct {
|
||||
typedef struct {
|
||||
unsigned int pass_count;
|
||||
rt_parsed_pipeline_data pipelines[RT_MAX_SUBRESOURCES];
|
||||
rt_render_pass_id pass_ids[RT_MAX_SUBRESOURCES];
|
||||
uint32_t pass_ids[RT_MAX_SUBRESOURCES];
|
||||
} rt_parsed_effect_data;
|
||||
|
||||
enum {
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#if 0
|
||||
static int RenderTargetExists(const rt_framegraph_info *framegraph, rt_render_target_id id) {
|
||||
const rt_render_target_info *render_targets = rtResolveConstRelptr(&framegraph->render_targets);
|
||||
for (uint32_t i = 0; i < framegraph->render_target_count; ++i) {
|
||||
@ -658,3 +659,4 @@ out:
|
||||
rtReleaseBuffer(asset.buffer, asset.size);
|
||||
return result;
|
||||
}
|
||||
#endif
|
@ -11,9 +11,14 @@ void RegisterCVars(void) {
|
||||
rtRegisterAssetCompilerCVars();
|
||||
}
|
||||
|
||||
static rt_framegraph *_framegraph;
|
||||
static rt_render_graph *_graph;
|
||||
|
||||
static rt_render_target_handle _rt;
|
||||
static rt_result ForwardPassExecute(rt_command_buffer_handle cmdbuf,
|
||||
const rt_render_list *lists,
|
||||
uint32_t list_count,
|
||||
void *userdata) {
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
/* Called after the runtime has finished its initialization and before entering the main-loop*/
|
||||
void Init(void) {
|
||||
@ -22,7 +27,31 @@ void Init(void) {
|
||||
|
||||
rtWaitForAssetProcessing();
|
||||
|
||||
_rt = g_renderer.GetSwapchainRenderTarget();
|
||||
rt_render_graph_builder builder = g_renderer.CreateRenderGraphBuilder();
|
||||
rt_attachment_info backbuffer = {
|
||||
.name = "backbuffer",
|
||||
.format = RT_PIXEL_FORMAT_SWAPCHAIN,
|
||||
.width = RT_RENDER_TARGET_SIZE_SWAPCHAIN,
|
||||
.height = RT_RENDER_TARGET_SIZE_SWAPCHAIN,
|
||||
.samples = 1,
|
||||
.layers = 1,
|
||||
};
|
||||
builder.AddRenderTarget(builder.obj, &backbuffer);
|
||||
|
||||
rt_pass_info forward = {.name = "forward",
|
||||
.flags = RT_PASS_FLAG_EXECUTE_ALWAYS | RT_PASS_FLAG_GRAPHICS};
|
||||
builder.AddRenderPass(builder.obj, &forward);
|
||||
builder.AddColorOutput(builder.obj,
|
||||
"forward",
|
||||
"backbuffer",
|
||||
RT_PASS_LOAD_MODE_CLEAR,
|
||||
RT_PASS_WRITE_MODE_STORE,
|
||||
(rt_color){.r = 1.f, .g = 0.f, .b = 1.f, .a = 1.f});
|
||||
builder.SetBackbuffer(builder.obj, "backbuffer");
|
||||
builder.BindRenderPass(builder.obj, "forward", ForwardPassExecute, NULL);
|
||||
builder.Build(builder.obj, &_graph);
|
||||
|
||||
g_renderer.DestroyRenderGraphBuilder(&builder);
|
||||
}
|
||||
|
||||
/* Called after exiting the main-loop and before the runtime starts its shutdown */
|
||||
@ -36,20 +65,5 @@ void Update(unsigned int frame_id) {
|
||||
}
|
||||
|
||||
void Render(unsigned int frame_id) {
|
||||
rt_alloc_command_buffer_info info = {RT_GRAPHICS_QUEUE};
|
||||
rt_command_buffer_handle cmd;
|
||||
g_renderer.AllocCommandBuffers(1, &info, &cmd);
|
||||
|
||||
rt_cmd_begin_pass_info pass_info = {
|
||||
.color_buffer_count = 1,
|
||||
.color_buffers = {_rt},
|
||||
.color_buffer_loads = {RT_PASS_LOAD_MODE_CLEAR},
|
||||
.color_buffer_clear_values = {{.color = {1.f, 0.f, 0.f, 1.f}}},
|
||||
.name = "testme",
|
||||
};
|
||||
g_renderer.CmdBeginPass(cmd, &pass_info);
|
||||
g_renderer.CmdEndPass(cmd);
|
||||
|
||||
rt_submit_command_buffers_info submit = {.command_buffer_count = 1, .command_buffers = &cmd};
|
||||
g_renderer.SubmitCommandBuffers(RT_GRAPHICS_QUEUE, &submit);
|
||||
g_renderer.ExecuteRenderGraph(_graph);
|
||||
}
|
16
src/gfx/effect.c
Normal file
16
src/gfx/effect.c
Normal file
@ -0,0 +1,16 @@
|
||||
#include "effect.h"
|
||||
#include "runtime/hashing.h"
|
||||
|
||||
RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len) {
|
||||
uint32_t id = rtHashBytes32(name, len);
|
||||
if (id == 0)
|
||||
id = ~id;
|
||||
return id;
|
||||
}
|
||||
|
||||
RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len) {
|
||||
uint32_t id = rtHashBytes32(name, len);
|
||||
if (id == 0)
|
||||
id = ~id;
|
||||
return id;
|
||||
}
|
@ -19,7 +19,7 @@ typedef struct rt_pipeline_info_s {
|
||||
|
||||
typedef struct {
|
||||
/* Id of the render pass during which this effect pass is run. */
|
||||
rt_render_pass_id pass_id;
|
||||
uint32_t pass_id;
|
||||
rt_resource_id pipeline;
|
||||
} rt_effect_pass_info;
|
||||
|
||||
@ -28,4 +28,7 @@ typedef struct {
|
||||
rt_effect_pass_info passes[RT_MAX_SUBRESOURCES];
|
||||
} rt_effect_info;
|
||||
|
||||
RT_DLLEXPORT uint32_t rtCalculateRenderTargetID(const char *name, size_t len);
|
||||
RT_DLLEXPORT uint32_t rtCalculateRenderPassID(const char *name, size_t len);
|
||||
|
||||
#endif
|
||||
|
128
src/gfx/gfx.h
128
src/gfx/gfx.h
@ -81,134 +81,6 @@ RT_DLLEXPORT void rtBeginGFXFrame(unsigned int frame_id);
|
||||
|
||||
RT_DLLEXPORT void rtEndGFXFrame(unsigned int frame_id);
|
||||
|
||||
/* *********************************************************************
|
||||
* Framegraph API
|
||||
*
|
||||
* The framegraph is used to organize and schedule the work for a frame.
|
||||
* *********************************************************************/
|
||||
|
||||
/* Special value for the .width and .height fields of rt_render_target_info
|
||||
* to indicate that these should be set to the width or height of the swapchain, respectively. */
|
||||
#define RT_RENDER_TARGET_SIZE_SWAPCHAIN 0
|
||||
/* 32 bit string hashes */
|
||||
typedef uint32_t rt_render_target_id;
|
||||
typedef uint32_t rt_render_pass_id;
|
||||
|
||||
typedef struct {
|
||||
rt_render_target_id id;
|
||||
rt_pixel_format format;
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
uint32_t sample_count;
|
||||
|
||||
/* For debug purposes, can be 0 */
|
||||
rt_relptr name;
|
||||
uint32_t name_len;
|
||||
} rt_render_target_info;
|
||||
|
||||
typedef enum {
|
||||
RT_RENDER_TARGET_READ_SAMPLED,
|
||||
RT_RENDER_TARGET_READ_DIRECT,
|
||||
|
||||
RT_RENDER_TARGET_READ_count,
|
||||
} rt_render_target_read_mode;
|
||||
|
||||
typedef struct {
|
||||
rt_render_target_id render_target;
|
||||
rt_render_target_read_mode mode;
|
||||
} rt_render_target_read;
|
||||
|
||||
typedef enum {
|
||||
/* Clears the render target with the clear value before executing the pass */
|
||||
RT_RENDER_TARGET_WRITE_CLEAR = 0x01,
|
||||
|
||||
/* Discards the written values after the pass has finished executing */
|
||||
RT_RENDER_TARGET_WRITE_DISCARD = 0x02,
|
||||
} rt_render_target_write_flags;
|
||||
|
||||
typedef struct {
|
||||
rt_render_target_id render_target;
|
||||
union {
|
||||
rt_color color;
|
||||
struct {
|
||||
float depth;
|
||||
int32_t stencil;
|
||||
} depth_stencil;
|
||||
} clear;
|
||||
rt_render_target_write_flags flags;
|
||||
} rt_render_target_write;
|
||||
|
||||
typedef enum {
|
||||
RT_RENDER_PASS_TYPE_GRAPHICS,
|
||||
RT_RENDER_PASS_TYPE_COMPUTE,
|
||||
} rt_render_pass_type;
|
||||
|
||||
typedef struct {
|
||||
rt_render_pass_id id;
|
||||
|
||||
/* For debug purposes, can be 0 */
|
||||
rt_relptr name;
|
||||
uint32_t name_len;
|
||||
|
||||
rt_render_pass_type type;
|
||||
/* list of rt_render_target_reads */
|
||||
rt_relptr read_render_targets;
|
||||
/* list of rt_render_target_writes */
|
||||
rt_relptr write_render_targets;
|
||||
uint32_t read_render_target_count;
|
||||
uint32_t write_render_target_count;
|
||||
} rt_render_pass_info;
|
||||
|
||||
typedef struct {
|
||||
rt_relptr render_targets;
|
||||
rt_relptr render_passes;
|
||||
uint32_t render_target_count;
|
||||
uint32_t render_pass_count;
|
||||
rt_render_target_id backbuffer;
|
||||
rt_relptr names;
|
||||
uint32_t names_size;
|
||||
} rt_framegraph_info;
|
||||
|
||||
typedef void rt_render_pass_prepare_fn(rt_render_pass_id id,
|
||||
const rt_render_target_write *writes,
|
||||
uint32_t write_count,
|
||||
const rt_render_target_read *reads,
|
||||
uint32_t read_count);
|
||||
typedef void rt_render_pass_execute_fn(rt_render_pass_id id,
|
||||
const rt_render_target_write *writes,
|
||||
uint32_t write_count,
|
||||
const rt_render_target_read *reads,
|
||||
uint32_t read_count);
|
||||
typedef void rt_render_pass_finalize_fn(rt_render_pass_id id,
|
||||
const rt_render_target_write *writes,
|
||||
uint32_t write_count,
|
||||
const rt_render_target_read *reads,
|
||||
uint32_t read_count);
|
||||
|
||||
typedef struct {
|
||||
rt_render_pass_prepare_fn *Prepare;
|
||||
rt_render_pass_execute_fn *Execute;
|
||||
rt_render_pass_finalize_fn *Finalize;
|
||||
} rt_render_pass_bind_fns;
|
||||
|
||||
typedef struct rt_framegraph_s rt_framegraph;
|
||||
|
||||
RT_DLLEXPORT rt_framegraph *rtCreateFramegraph(const rt_framegraph_info *info);
|
||||
|
||||
RT_DLLEXPORT void rtDestroyFramegraph(rt_framegraph *framegraph);
|
||||
|
||||
RT_DLLEXPORT void rtBindRenderPass(rt_framegraph *framegraph,
|
||||
rt_render_pass_id pass,
|
||||
const rt_render_pass_bind_fns *bind_fns);
|
||||
|
||||
RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int frame_id);
|
||||
|
||||
/* Utility to turn a string into a usable render target id. */
|
||||
RT_DLLEXPORT rt_render_target_id rtCalculateRenderTargetID(const char *name, size_t len);
|
||||
|
||||
/* Utility to turn a string into a usable render pass id. */
|
||||
RT_DLLEXPORT rt_render_pass_id rtCalculateRenderPassID(const char *name, size_t len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -1,938 +0,0 @@
|
||||
#include "gfx.h"
|
||||
#include "renderer_api.h"
|
||||
|
||||
#include "runtime/config.h"
|
||||
#include "runtime/handles.h"
|
||||
#include "runtime/hashing.h"
|
||||
#include "runtime/mem_arena.h"
|
||||
#include "runtime/threading.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
RT_CVAR_I(rt_MaxFramegraphs, "Maximum number of framegraphs. Default 16", 16);
|
||||
|
||||
#define RT_FRAMEGRAPH_MAX_PASSES 32
|
||||
#define RT_FRAMEGRAPH_MAX_RENDER_TARGETS 32
|
||||
#define RT_RENDERPASS_MAX_READS 8
|
||||
#define RT_RENDERPASS_MAX_WRITES 8
|
||||
|
||||
typedef struct {
|
||||
rt_render_target_id id;
|
||||
rt_pixel_format format;
|
||||
const char *name;
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
unsigned int sample_count;
|
||||
rt_gpu_semaphore_handle semaphores[3];
|
||||
rt_render_target_handle api_render_target;
|
||||
} rt_render_target;
|
||||
|
||||
typedef struct {
|
||||
rt_render_pass_id id;
|
||||
rt_render_pass_type type;
|
||||
const char *name;
|
||||
int execution_level;
|
||||
|
||||
bool reads_swapchain;
|
||||
bool writes_swapchain;
|
||||
|
||||
unsigned int read_count;
|
||||
unsigned int write_count;
|
||||
rt_render_pass_bind_fns bound_fns;
|
||||
rt_render_target_read reads[RT_RENDERPASS_MAX_READS];
|
||||
rt_render_target_write writes[RT_RENDERPASS_MAX_WRITES];
|
||||
} rt_render_pass;
|
||||
|
||||
struct rt_framegraph_s {
|
||||
uint32_t pass_count;
|
||||
uint32_t render_target_count;
|
||||
|
||||
rt_framegraph *next_free;
|
||||
|
||||
rt_render_pass passes[RT_FRAMEGRAPH_MAX_PASSES];
|
||||
|
||||
rt_render_target render_targets[RT_FRAMEGRAPH_MAX_RENDER_TARGETS];
|
||||
};
|
||||
|
||||
static rt_framegraph *_framegraphs;
|
||||
static rt_framegraph *_first_free;
|
||||
static rt_mutex *_free_list_lock;
|
||||
|
||||
#define NAMES_CAPACITY 512
|
||||
static char _name_buffer[512];
|
||||
static char *_name_next;
|
||||
static rt_mutex *_name_lock;
|
||||
|
||||
static void ReturnFrameGraph(rt_framegraph *framegraph) {
|
||||
rtLockMutex(_free_list_lock);
|
||||
framegraph->next_free = _first_free;
|
||||
_first_free = framegraph;
|
||||
rtUnlockMutex(_free_list_lock);
|
||||
}
|
||||
|
||||
rt_result InitFramegraphManager(void) {
|
||||
_free_list_lock = rtCreateMutex();
|
||||
if (!_free_list_lock)
|
||||
return RT_UNKNOWN_ERROR;
|
||||
_name_lock = rtCreateMutex();
|
||||
if (!_name_lock) {
|
||||
rtDestroyMutex(_free_list_lock);
|
||||
return RT_UNKNOWN_ERROR;
|
||||
}
|
||||
_framegraphs = calloc((size_t)rt_MaxFramegraphs.i, sizeof(rt_framegraph));
|
||||
if (!_framegraphs)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
for (int i = 0; i < rt_MaxFramegraphs.i; ++i)
|
||||
_framegraphs[i].next_free = (i < rt_MaxFramegraphs.i - 1) ? &_framegraphs[i + 1] : NULL;
|
||||
_first_free = &_framegraphs[0];
|
||||
_name_next = &_name_buffer[0];
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
void ShutdownFramegraphManager(void) {
|
||||
free(_framegraphs);
|
||||
rtDestroyMutex(_free_list_lock);
|
||||
rtDestroyMutex(_name_lock);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
unsigned int dependency_count;
|
||||
int execution_level;
|
||||
} rt_pass_construct;
|
||||
|
||||
static int CompareRenderPassExecutionLevels(const void *a, const void *b) {
|
||||
const rt_render_pass *pass_a = a, *pass_b = b;
|
||||
return pass_a->execution_level - pass_b->execution_level;
|
||||
}
|
||||
|
||||
static bool
|
||||
CreateRenderPasses(rt_framegraph *graph, const rt_framegraph_info *info, rt_arena *arena) {
|
||||
uint32_t render_pass_count = info->render_pass_count;
|
||||
|
||||
bool result = false;
|
||||
|
||||
/* Pass A depends on pass B, if:
|
||||
* B preceeds A in the list of render passes AND
|
||||
* B writes to a render target that A reads from. */
|
||||
bool *dependency_matrix =
|
||||
rtArenaPushZero(arena, render_pass_count * render_pass_count * sizeof(bool));
|
||||
if (!dependency_matrix) {
|
||||
rtLog("GFX",
|
||||
"Not enough memory to allocate a %ux%u dependency matrix.",
|
||||
render_pass_count,
|
||||
render_pass_count);
|
||||
goto out;
|
||||
}
|
||||
/* Checks if pass "dependent_idx" depends on pass "dependency_idx" */
|
||||
#define PASS_DEPENDS(dependent_idx, dependency_idx) \
|
||||
dependency_matrix[(dependency_idx)*render_pass_count + (dependent_idx)]
|
||||
|
||||
rt_pass_construct *construct_passes =
|
||||
RT_ARENA_PUSH_ARRAY_ZERO(arena, rt_pass_construct, render_pass_count);
|
||||
if (!construct_passes) {
|
||||
rtLog("GFX",
|
||||
"Not enough memory to allocate construction information for %u passes.",
|
||||
render_pass_count);
|
||||
goto out;
|
||||
}
|
||||
|
||||
const rt_render_pass_info *pass_info = rtResolveConstRelptr(&info->render_passes);
|
||||
for (uint32_t i = 0; i < render_pass_count; ++i) {
|
||||
construct_passes[i].execution_level = -1; /* not scheduled yet */
|
||||
const rt_render_target_write *writes_i =
|
||||
rtResolveConstRelptr(&pass_info[i].write_render_targets);
|
||||
for (uint32_t j = i + 1; j < render_pass_count; ++j) {
|
||||
const rt_render_target_read *reads_j =
|
||||
rtResolveConstRelptr(&pass_info[j].read_render_targets);
|
||||
bool depends = false;
|
||||
for (uint32_t read_idx = 0; read_idx < pass_info[j].read_render_target_count;
|
||||
++read_idx) {
|
||||
for (uint32_t write_idx = 0; write_idx < pass_info[i].write_render_target_count;
|
||||
++write_idx) {
|
||||
if (writes_i[write_idx].render_target == reads_j[read_idx].render_target)
|
||||
depends = true;
|
||||
}
|
||||
}
|
||||
PASS_DEPENDS(j, i) = depends;
|
||||
if (depends)
|
||||
++construct_passes[j].dependency_count;
|
||||
}
|
||||
}
|
||||
|
||||
/* Pass A can be executed concurrently with pass B if:
|
||||
* 1. A and B don't write to the same render target AND
|
||||
* 2. A's dependencies and B's dependencies have finished executing. */
|
||||
|
||||
/* We can have at most render_pass_count execution levels */
|
||||
uint32_t *level_passes = RT_ARENA_PUSH_ARRAY_ZERO(arena, uint32_t, render_pass_count);
|
||||
if (!level_passes) {
|
||||
rtLog("GFX", "Failed to allocate a temporary array for constructing execution levels.");
|
||||
goto out;
|
||||
}
|
||||
uint32_t unscheduled_passes = render_pass_count;
|
||||
for (int level = 0; level < (int)render_pass_count; ++level) {
|
||||
unsigned int level_pass_count = 0;
|
||||
for (uint32_t i = 0; i < render_pass_count; ++i) {
|
||||
if (construct_passes[i].execution_level == -1 &&
|
||||
construct_passes[i].dependency_count == 0) {
|
||||
|
||||
/* Check that no writes conflict */
|
||||
bool write_conflict = false;
|
||||
const rt_render_target_write *writes_i =
|
||||
rtResolveConstRelptr(&pass_info[i].write_render_targets);
|
||||
for (unsigned int j = 0; j < level_pass_count; ++j) {
|
||||
uint32_t pass_idx = level_passes[i];
|
||||
const rt_render_target_write *pass_writes =
|
||||
rtResolveConstRelptr(&pass_info[pass_idx].write_render_targets);
|
||||
for (uint32_t k = 0; k < pass_info[i].write_render_target_count; ++k) {
|
||||
for (uint32_t l = 0; l < pass_info[pass_idx].write_render_target_count;
|
||||
++l) {
|
||||
if (writes_i[k].render_target == pass_writes[l].render_target) {
|
||||
write_conflict = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (write_conflict)
|
||||
break;
|
||||
}
|
||||
if (write_conflict)
|
||||
break;
|
||||
}
|
||||
if (!write_conflict) {
|
||||
RT_ASSERT(level_pass_count < render_pass_count, "");
|
||||
level_passes[level_pass_count++] = i;
|
||||
construct_passes[i].execution_level = level;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (level_pass_count == 0) {
|
||||
rtLog("GFX", "Failed to compute a valid schedule for the provided framegraph.");
|
||||
goto out;
|
||||
}
|
||||
/* level passes now contains the passes we can execute concurrently.
|
||||
* Decrement dependency count for all passes that depend on a pass in this level */
|
||||
|
||||
for (uint32_t i = 0; i < level_pass_count; ++i) {
|
||||
for (uint32_t j = 0; j < render_pass_count; ++j) {
|
||||
if (PASS_DEPENDS(j, level_passes[i]))
|
||||
--construct_passes[j].dependency_count;
|
||||
}
|
||||
}
|
||||
|
||||
unscheduled_passes -= level_pass_count;
|
||||
if (unscheduled_passes == 0)
|
||||
break;
|
||||
}
|
||||
RT_ASSERT(unscheduled_passes == 0, "Did not schedule all passes");
|
||||
/* Construct passes now contains the "execution level" for each pass.
|
||||
* We execute passes in that order, those with the same execution level can be executed
|
||||
* concurrently. */
|
||||
|
||||
graph->pass_count = render_pass_count;
|
||||
for (uint32_t i = 0; i < render_pass_count; ++i) {
|
||||
graph->passes[i].execution_level = construct_passes[i].execution_level;
|
||||
const rt_render_target_write *writes =
|
||||
rtResolveConstRelptr(&pass_info[i].write_render_targets);
|
||||
const rt_render_target_read *reads =
|
||||
rtResolveConstRelptr(&pass_info[i].read_render_targets);
|
||||
memcpy(graph->passes[i].writes,
|
||||
writes,
|
||||
pass_info[i].write_render_target_count * sizeof(rt_render_target_write));
|
||||
memcpy(graph->passes[i].reads,
|
||||
reads,
|
||||
pass_info[i].read_render_target_count * sizeof(rt_render_target_read));
|
||||
graph->passes[i].write_count = pass_info[i].write_render_target_count;
|
||||
graph->passes[i].read_count = pass_info[i].read_render_target_count;
|
||||
graph->passes[i].id = pass_info[i].id;
|
||||
graph->passes[i].type = pass_info[i].type;
|
||||
graph->passes[i].name = NULL;
|
||||
|
||||
graph->passes[i].reads_swapchain = false;
|
||||
graph->passes[i].writes_swapchain = false;
|
||||
const rt_render_target_info *rts = rtResolveConstRelptr(&info->render_targets);
|
||||
for (unsigned int j = 0; j < graph->passes[i].read_count; ++j) {
|
||||
rt_render_target_id rt = graph->passes[i].reads[j].render_target;
|
||||
for (unsigned int k = 0; k < info->render_target_count; ++k) {
|
||||
if (rts[k].id == rt && rts[k].width == RT_RENDER_TARGET_SIZE_SWAPCHAIN &&
|
||||
rts[k].height == RT_RENDER_TARGET_SIZE_SWAPCHAIN &&
|
||||
rts[k].format == RT_PIXEL_FORMAT_SWAPCHAIN) {
|
||||
graph->passes[i].reads_swapchain = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (unsigned int j = 0; j < graph->passes[i].write_count; ++j) {
|
||||
rt_render_target_id rt = graph->passes[i].writes[j].render_target;
|
||||
for (unsigned int k = 0; k < info->render_target_count; ++k) {
|
||||
if (rts[k].id == rt && rts[k].width == RT_RENDER_TARGET_SIZE_SWAPCHAIN &&
|
||||
rts[k].height == RT_RENDER_TARGET_SIZE_SWAPCHAIN &&
|
||||
rts[k].format == RT_PIXEL_FORMAT_SWAPCHAIN) {
|
||||
graph->passes[i].writes_swapchain = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const char *name = rtResolveConstRelptr(&pass_info[i].name);
|
||||
if (name) {
|
||||
size_t name_strlen = strlen(name);
|
||||
if (name_strlen + 1 == pass_info[i].name_len) {
|
||||
rtLockMutex(_name_lock);
|
||||
ptrdiff_t name_off = _name_next - _name_buffer;
|
||||
if ((name_off + pass_info[i].name_len) < NAMES_CAPACITY) {
|
||||
char *dst_name = _name_next;
|
||||
memcpy(dst_name, name, pass_info[i].name_len);
|
||||
_name_next += pass_info[i].name_len;
|
||||
graph->passes[i].name = dst_name;
|
||||
} else {
|
||||
rtLog("GFX", "Ran out of storage for debug name %s", name);
|
||||
}
|
||||
rtUnlockMutex(_name_lock);
|
||||
} else {
|
||||
rtLog("GFX", "Declared name-length for pass %u does not match strlen()");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort by execution level */
|
||||
qsort(graph->passes,
|
||||
render_pass_count,
|
||||
sizeof(rt_render_pass),
|
||||
CompareRenderPassExecutionLevels);
|
||||
result = true;
|
||||
out:
|
||||
return result;
|
||||
#undef PASS_DEPENDS
|
||||
}
|
||||
|
||||
static bool
|
||||
CreateRenderTargets(rt_framegraph *graph, const rt_framegraph_info *info, rt_arena *arena) {
|
||||
bool result = false;
|
||||
|
||||
unsigned int max_frames_in_flight = g_renderer.GetMaxFramesInFlight();
|
||||
|
||||
/* TODO(Kevin): determine aliasing opportunities */
|
||||
const rt_render_target_info *render_targets = rtResolveConstRelptr(&info->render_targets);
|
||||
for (uint32_t i = 0; i < info->render_target_count; ++i) {
|
||||
graph->render_targets[i].id = render_targets[i].id;
|
||||
graph->render_targets[i].format = render_targets[i].format;
|
||||
graph->render_targets[i].width = render_targets[i].width;
|
||||
graph->render_targets[i].height = render_targets[i].height;
|
||||
graph->render_targets[i].sample_count = render_targets[i].sample_count;
|
||||
graph->render_targets[i].name = NULL;
|
||||
|
||||
const char *name = rtResolveConstRelptr(&render_targets[i].name);
|
||||
if (name) {
|
||||
size_t name_strlen = strlen(name);
|
||||
if (name_strlen + 1 == render_targets[i].name_len) {
|
||||
rtLockMutex(_name_lock);
|
||||
ptrdiff_t name_off = _name_next - _name_buffer;
|
||||
if ((name_off + render_targets[i].name_len) < NAMES_CAPACITY) {
|
||||
char *dst_name = _name_next;
|
||||
memcpy(dst_name, name, render_targets[i].name_len);
|
||||
_name_next += render_targets[i].name_len;
|
||||
graph->render_targets[i].name = dst_name;
|
||||
} else {
|
||||
rtLog("GFX", "Ran out of storage for debug name %s", name);
|
||||
}
|
||||
rtUnlockMutex(_name_lock);
|
||||
} else {
|
||||
rtLog("GFX", "Declared name-length for render-target %u does not match strlen()");
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned int j = 0; j < max_frames_in_flight; ++j) {
|
||||
char sem_name[128];
|
||||
rtSPrint(sem_name, 128, "%s - Semaphore (%u)", (name) ? name : "Unnamed RT", j);
|
||||
rt_gpu_semaphore_info sem_info = {
|
||||
.initial_value = 0,
|
||||
.name = sem_name,
|
||||
};
|
||||
g_renderer.CreateSemaphores(1, &sem_info, &graph->render_targets[i].semaphores[j]);
|
||||
}
|
||||
|
||||
if (graph->render_targets[i].width != RT_RENDER_TARGET_SIZE_SWAPCHAIN ||
|
||||
graph->render_targets[i].height != RT_RENDER_TARGET_SIZE_SWAPCHAIN ||
|
||||
graph->render_targets[i].format != RT_PIXEL_FORMAT_SWAPCHAIN) {
|
||||
|
||||
graph->render_targets[i].api_render_target =
|
||||
g_renderer.CreateRenderTarget(&render_targets[i]);
|
||||
} else {
|
||||
graph->render_targets[i].api_render_target = g_renderer.GetSwapchainRenderTarget();
|
||||
}
|
||||
if (!RT_IS_HANDLE_VALID(graph->render_targets[i].api_render_target)) {
|
||||
rtReportError("GFX", "Failed to create render target %u of framegraph.", i);
|
||||
for (uint32_t j = 0; j < i; ++j)
|
||||
g_renderer.DestroyRenderTarget(graph->render_targets[j].api_render_target);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
graph->render_target_count = info->render_target_count;
|
||||
|
||||
result = true;
|
||||
out:
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool ValidateInfo(const rt_framegraph_info *info) {
|
||||
if (info->render_pass_count > RT_FRAMEGRAPH_MAX_PASSES) {
|
||||
rtReportError("GFX",
|
||||
"Framegraph has too many passes: %u (maximum allowed is %u)",
|
||||
info->render_pass_count,
|
||||
RT_FRAMEGRAPH_MAX_PASSES);
|
||||
return false;
|
||||
}
|
||||
if (info->render_target_count > RT_FRAMEGRAPH_MAX_RENDER_TARGETS) {
|
||||
rtReportError("GFX",
|
||||
"Framegraph has too many render targets: %u (maximum allowed is %u)",
|
||||
info->render_target_count,
|
||||
RT_FRAMEGRAPH_MAX_RENDER_TARGETS);
|
||||
return false;
|
||||
}
|
||||
|
||||
const rt_render_target_info *render_targets = rtResolveConstRelptr(&info->render_targets);
|
||||
for (uint32_t i = 0; i < info->render_target_count; ++i) {
|
||||
if (render_targets[i].id == 0) {
|
||||
rtReportError("GFX", "Framegraph render target %u has invalid id 0", i);
|
||||
return false;
|
||||
} else if ((render_targets[i].width == RT_RENDER_TARGET_SIZE_SWAPCHAIN ||
|
||||
render_targets[i].height == RT_RENDER_TARGET_SIZE_SWAPCHAIN) &&
|
||||
(render_targets[i].width != render_targets[i].height)) {
|
||||
rtReportError("GFX",
|
||||
"Framegraph render target %u: If width or height is set to "
|
||||
"SWAPCHAIN, both values must be set to SWAPCHAIN.",
|
||||
i);
|
||||
return false;
|
||||
} else if (render_targets[i].format >= RT_PIXEL_FORMAT_count) {
|
||||
rtReportError("GFX",
|
||||
"Framegraph render target %u format is outside the allowed range.",
|
||||
i);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const rt_render_pass_info *passes = rtResolveConstRelptr(&info->render_passes);
|
||||
for (uint32_t i = 0; i < info->render_pass_count; ++i) {
|
||||
if (passes[i].id == 0) {
|
||||
rtReportError("GFX", "Framegraph pass %u has invalid id 0", i);
|
||||
return false;
|
||||
} else if (passes[i].read_render_target_count > RT_RENDERPASS_MAX_READS) {
|
||||
rtReportError(
|
||||
"GFX",
|
||||
"Framegraph pass %u reads too many rendertargets: %u (maximum allowed is %u)",
|
||||
i,
|
||||
passes[i].read_render_target_count,
|
||||
RT_RENDERPASS_MAX_READS);
|
||||
return false;
|
||||
} else if (passes[i].write_render_target_count > RT_RENDERPASS_MAX_WRITES) {
|
||||
rtReportError(
|
||||
"GFX",
|
||||
"Framegraph pass %u writes too many rendertargets: %u (maximum allowed is %u)",
|
||||
i,
|
||||
passes[i].write_render_target_count,
|
||||
RT_RENDERPASS_MAX_WRITES);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
RT_DLLEXPORT rt_framegraph *rtCreateFramegraph(const rt_framegraph_info *info) {
|
||||
if (!ValidateInfo(info)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
||||
if (!temp.arena) {
|
||||
rtReportError("GFX", "Failed to acquire a temporary arena for constructing a framegraph");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rt_framegraph *graph = NULL;
|
||||
/* Acquire a unused framegraph */
|
||||
rtLockMutex(_free_list_lock);
|
||||
graph = _first_free;
|
||||
if (graph)
|
||||
_first_free = graph->next_free;
|
||||
rtUnlockMutex(_free_list_lock);
|
||||
if (!graph)
|
||||
goto out;
|
||||
memset(graph, 0, sizeof(*graph));
|
||||
|
||||
if (!CreateRenderPasses(graph, info, temp.arena)) {
|
||||
ReturnFrameGraph(graph);
|
||||
graph = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!CreateRenderTargets(graph, info, temp.arena)) {
|
||||
ReturnFrameGraph(graph);
|
||||
graph = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
rtReturnTemporaryArena(temp);
|
||||
return graph;
|
||||
}
|
||||
|
||||
RT_DLLEXPORT void rtDestroyFramegraph(rt_framegraph *framegraph) {
|
||||
ReturnFrameGraph(framegraph);
|
||||
}
|
||||
|
||||
RT_DLLEXPORT void rtBindRenderPass(rt_framegraph *framegraph,
|
||||
rt_render_pass_id id,
|
||||
const rt_render_pass_bind_fns *bind_fns) {
|
||||
for (uint32_t i = 0; i < framegraph->pass_count; ++i) {
|
||||
if (framegraph->passes[i].id == id) {
|
||||
if (framegraph->passes[i].bound_fns.Execute)
|
||||
rtLog("GFX", "Rebound pass %x to new functions", id);
|
||||
framegraph->passes[i].bound_fns = *bind_fns;
|
||||
return;
|
||||
}
|
||||
}
|
||||
rtLog("GFX", "Tried to bind functions to unknown render pass %x", id);
|
||||
}
|
||||
|
||||
static bool IsDepthFormat(rt_pixel_format format) {
|
||||
return format == RT_PIXEL_FORMAT_DEPTH32 || format == RT_PIXEL_FORMAT_DEPTH24_STENCIL8;
|
||||
}
|
||||
|
||||
static rt_render_target *GetRenderTarget(rt_framegraph *framegraph, rt_render_target_id id) {
|
||||
for (uint32_t i = 0; i < framegraph->render_target_count; ++i) {
|
||||
if (framegraph->render_targets[i].id == id)
|
||||
return &framegraph->render_targets[i];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
BeginGraphicsPass(rt_framegraph *framegraph, uint32_t pass_idx, rt_command_buffer_handle cmdbuf) {
|
||||
const rt_render_target_write *writes = framegraph->passes[pass_idx].writes;
|
||||
const rt_render_target_read *reads = framegraph->passes[pass_idx].reads;
|
||||
uint32_t write_count = framegraph->passes[pass_idx].write_count;
|
||||
uint32_t read_count = framegraph->passes[pass_idx].read_count;
|
||||
|
||||
/* Convert reads and writes into the pass begin info for the renderer */
|
||||
rt_cmd_begin_pass_info begin_info;
|
||||
memset(&begin_info, 0, sizeof(begin_info));
|
||||
begin_info.name = framegraph->passes[pass_idx].name;
|
||||
|
||||
/* All written render targets need to have the same size */
|
||||
if (write_count > 0) {
|
||||
rt_render_target *rt = GetRenderTarget(framegraph, writes[0].render_target);
|
||||
RT_ASSERT(rt != NULL, "Invalid render target in pass write.");
|
||||
begin_info.render_area = (rt_rect2i){
|
||||
.offset = {{0, 0}},
|
||||
.size = {{.x = rt->width, .y = rt->height}},
|
||||
};
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < write_count; ++i) {
|
||||
rt_render_target *rt = GetRenderTarget(framegraph, writes[i].render_target);
|
||||
RT_ASSERT(rt != NULL, "Invalid render target in pass write.");
|
||||
|
||||
g_renderer.CmdTransitionRenderTarget(cmdbuf,
|
||||
rt->api_render_target,
|
||||
RT_RENDER_TARGET_STATE_ATTACHMENT);
|
||||
|
||||
if (!IsDepthFormat(rt->format)) {
|
||||
/* Add as color buffer */
|
||||
uint32_t cbidx = begin_info.color_buffer_count;
|
||||
RT_ASSERT(cbidx < 4, "Maximum of 4 colorbuffers per pass exceeded.");
|
||||
begin_info.color_buffers[cbidx] = rt->api_render_target;
|
||||
if ((writes[i].flags & RT_RENDER_TARGET_WRITE_CLEAR) != 0) {
|
||||
begin_info.color_buffer_loads[cbidx] = RT_PASS_LOAD_MODE_CLEAR;
|
||||
begin_info.color_buffer_clear_values[cbidx].color = writes[i].clear.color;
|
||||
} else {
|
||||
begin_info.color_buffer_loads[cbidx] = RT_PASS_LOAD_MODE_LOAD;
|
||||
}
|
||||
if ((writes[i].flags & RT_RENDER_TARGET_WRITE_DISCARD) != 0) {
|
||||
begin_info.color_buffer_writes[cbidx] = RT_PASS_WRITE_MODE_DISCARD;
|
||||
} else {
|
||||
begin_info.color_buffer_writes[cbidx] = RT_PASS_WRITE_MODE_STORE;
|
||||
}
|
||||
++begin_info.color_buffer_count;
|
||||
} else {
|
||||
/* Add as depth buffer*/
|
||||
RT_ASSERT(!RT_IS_HANDLE_VALID(begin_info.depth_stencil_buffer),
|
||||
"Only one depth/stencil buffer can be set!");
|
||||
begin_info.depth_stencil_buffer = rt->api_render_target;
|
||||
if ((writes[i].flags & RT_RENDER_TARGET_WRITE_CLEAR) != 0) {
|
||||
begin_info.depth_stencil_buffer_load = RT_PASS_LOAD_MODE_CLEAR;
|
||||
begin_info.depth_stencil_buffer_clear_value.depth_stencil.depth =
|
||||
writes[i].clear.depth_stencil.depth;
|
||||
begin_info.depth_stencil_buffer_clear_value.depth_stencil.stencil =
|
||||
writes[i].clear.depth_stencil.stencil;
|
||||
} else {
|
||||
begin_info.depth_stencil_buffer_load = RT_PASS_LOAD_MODE_LOAD;
|
||||
}
|
||||
if ((writes[i].flags & RT_RENDER_TARGET_WRITE_DISCARD) != 0) {
|
||||
begin_info.depth_stencil_buffer_write = RT_PASS_WRITE_MODE_DISCARD;
|
||||
} else {
|
||||
begin_info.depth_stencil_buffer_write = RT_PASS_WRITE_MODE_STORE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < read_count; ++i) {
|
||||
rt_render_target *rt = GetRenderTarget(framegraph, reads[i].render_target);
|
||||
RT_ASSERT(rt != NULL, "Invalid render target in pass read.");
|
||||
/* We need to transition the render target */
|
||||
|
||||
switch (reads[i].mode) {
|
||||
case RT_RENDER_TARGET_READ_SAMPLED:
|
||||
g_renderer.CmdTransitionRenderTarget(cmdbuf,
|
||||
rt->api_render_target,
|
||||
RT_RENDER_TARGET_STATE_SAMPLED_IMAGE);
|
||||
break;
|
||||
case RT_RENDER_TARGET_READ_DIRECT:
|
||||
g_renderer.CmdTransitionRenderTarget(cmdbuf,
|
||||
rt->api_render_target,
|
||||
RT_RENDER_TARGET_STATE_STORAGE_IMAGE);
|
||||
break;
|
||||
default:
|
||||
RT_ASSERT(0, "Invalid render target read mode");
|
||||
}
|
||||
}
|
||||
|
||||
g_renderer.CmdBeginPass(cmdbuf, &begin_info);
|
||||
}
|
||||
|
||||
static void
|
||||
BeginComputePass(rt_framegraph *framegraph, uint32_t pass_idx, rt_command_buffer_handle cmdbuf) {
|
||||
const rt_render_target_write *writes = framegraph->passes[pass_idx].writes;
|
||||
const rt_render_target_read *reads = framegraph->passes[pass_idx].reads;
|
||||
uint32_t write_count = framegraph->passes[pass_idx].write_count;
|
||||
uint32_t read_count = framegraph->passes[pass_idx].read_count;
|
||||
|
||||
for (uint32_t i = 0; i < write_count; ++i) {
|
||||
rt_render_target *rt = GetRenderTarget(framegraph, writes[i].render_target);
|
||||
RT_ASSERT(rt != NULL, "Invalid render target in pass write.");
|
||||
g_renderer.CmdTransitionRenderTarget(cmdbuf,
|
||||
rt->api_render_target,
|
||||
RT_RENDER_TARGET_STATE_STORAGE_IMAGE);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < read_count; ++i) {
|
||||
rt_render_target *rt = GetRenderTarget(framegraph, reads[i].render_target);
|
||||
RT_ASSERT(rt != NULL, "Invalid render target in pass read.");
|
||||
/* We need to transition the render target */
|
||||
|
||||
switch (reads[i].mode) {
|
||||
case RT_RENDER_TARGET_READ_SAMPLED:
|
||||
g_renderer.CmdTransitionRenderTarget(cmdbuf,
|
||||
rt->api_render_target,
|
||||
RT_RENDER_TARGET_STATE_SAMPLED_IMAGE);
|
||||
break;
|
||||
case RT_RENDER_TARGET_READ_DIRECT:
|
||||
g_renderer.CmdTransitionRenderTarget(cmdbuf,
|
||||
rt->api_render_target,
|
||||
RT_RENDER_TARGET_STATE_STORAGE_IMAGE);
|
||||
break;
|
||||
default:
|
||||
RT_ASSERT(0, "Invalid render target read mode");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RT_DLLEXPORT void rtExecuteFramegraph(rt_framegraph *framegraph, unsigned int frame_id) {
|
||||
int execution_level = framegraph->passes[0].execution_level;
|
||||
uint32_t level_start = 0;
|
||||
|
||||
rt_gpu_semaphore_handle swapchain_available = g_renderer.GetSwapchainAvailableSemaphore();
|
||||
rt_gpu_semaphore_handle render_finished = g_renderer.GetRenderFinishedSemaphore();
|
||||
unsigned int max_frames_in_flight = g_renderer.GetMaxFramesInFlight();
|
||||
unsigned int frame_index = frame_id % max_frames_in_flight;
|
||||
|
||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
||||
if (!temp.arena) {
|
||||
rtLog("GFX", "Unable to execute framegraph because no temporary arena is available.");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Find the last pass that writes to the swapchain */
|
||||
uint32_t last_swapchain_write = framegraph->pass_count - 1;
|
||||
for (uint32_t i = framegraph->pass_count - 1; i > 0; --i) {
|
||||
if (framegraph->passes[i].writes_swapchain) {
|
||||
last_swapchain_write = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* Find the first pass that reads the swapchain */
|
||||
uint32_t first_swapchain_read = 0;
|
||||
for (uint32_t i = 0; framegraph->pass_count; ++i) {
|
||||
if (framegraph->passes[i].reads_swapchain || framegraph->passes[i].writes_swapchain) {
|
||||
first_swapchain_read = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Overflows after ~4.871x10^7 years */
|
||||
uint64_t signal_value_base = (uint64_t)frame_id * 100;
|
||||
|
||||
for (uint32_t i = 0; i <= framegraph->pass_count && level_start < framegraph->pass_count; ++i) {
|
||||
if ((i == framegraph->pass_count) ||
|
||||
(framegraph->passes[i].execution_level > execution_level)) {
|
||||
|
||||
rt_temp_arena level_temp = rtBeginTempArena(temp.arena);
|
||||
|
||||
rt_gpu_semaphore_handle *graphics_wait_semaphores = NULL;
|
||||
rt_gpu_semaphore_handle *graphics_signal_semaphores = NULL;
|
||||
uint64_t *graphics_wait_values = NULL;
|
||||
uint64_t *graphics_signal_values = NULL;
|
||||
rt_command_buffer_handle *graphics_command_buffers = NULL;
|
||||
uint32_t graphics_command_buffer_count = 0;
|
||||
uint32_t graphics_signal_semaphore_count = 0;
|
||||
uint32_t graphics_wait_semaphore_count = 0;
|
||||
|
||||
rt_gpu_semaphore_handle *compute_wait_semaphores = NULL;
|
||||
rt_gpu_semaphore_handle *compute_signal_semaphores = NULL;
|
||||
rt_command_buffer_handle *compute_command_buffers = NULL;
|
||||
uint64_t *compute_wait_values = NULL;
|
||||
uint64_t *compute_signal_values = NULL;
|
||||
uint32_t compute_command_buffer_count = 0;
|
||||
uint32_t compute_signal_semaphore_count = 0;
|
||||
uint32_t compute_wait_semaphore_count = 0;
|
||||
|
||||
/* Determine necessary array sizes */
|
||||
for (uint32_t pass_idx = level_start; pass_idx < i; ++pass_idx) {
|
||||
bool pass_bound = framegraph->passes[pass_idx].bound_fns.Prepare != NULL &&
|
||||
framegraph->passes[pass_idx].bound_fns.Execute != NULL &&
|
||||
framegraph->passes[pass_idx].bound_fns.Finalize != NULL;
|
||||
if (!pass_bound) {
|
||||
rtLog("GFX",
|
||||
"Framegraph pass %u (%x) is not bound to any function.",
|
||||
pass_idx,
|
||||
framegraph->passes[pass_idx].id);
|
||||
continue;
|
||||
}
|
||||
bool is_graphics_pass =
|
||||
framegraph->passes[pass_idx].type == RT_RENDER_PASS_TYPE_GRAPHICS;
|
||||
if (is_graphics_pass) {
|
||||
graphics_wait_semaphore_count += framegraph->passes[pass_idx].read_count +
|
||||
framegraph->passes[pass_idx].write_count;
|
||||
graphics_signal_semaphore_count += framegraph->passes[pass_idx].write_count;
|
||||
if (framegraph->passes[pass_idx].reads_swapchain ||
|
||||
pass_idx == first_swapchain_read)
|
||||
graphics_wait_semaphore_count += 1;
|
||||
if (framegraph->passes[pass_idx].writes_swapchain ||
|
||||
pass_idx == last_swapchain_write)
|
||||
graphics_signal_semaphore_count += 1;
|
||||
++graphics_command_buffer_count;
|
||||
} else {
|
||||
compute_wait_semaphore_count += framegraph->passes[pass_idx].read_count +
|
||||
framegraph->passes[pass_idx].write_count;
|
||||
compute_signal_semaphore_count += framegraph->passes[pass_idx].write_count;
|
||||
if (framegraph->passes[pass_idx].reads_swapchain ||
|
||||
pass_idx == first_swapchain_read)
|
||||
compute_wait_semaphore_count += 1;
|
||||
if (framegraph->passes[pass_idx].writes_swapchain ||
|
||||
pass_idx == last_swapchain_write)
|
||||
compute_signal_semaphore_count += 1;
|
||||
++compute_command_buffer_count;
|
||||
}
|
||||
}
|
||||
|
||||
graphics_wait_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena,
|
||||
rt_gpu_semaphore_handle,
|
||||
graphics_wait_semaphore_count);
|
||||
graphics_wait_values =
|
||||
RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, graphics_wait_semaphore_count);
|
||||
graphics_signal_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena,
|
||||
rt_gpu_semaphore_handle,
|
||||
graphics_signal_semaphore_count);
|
||||
graphics_signal_values =
|
||||
RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, graphics_signal_semaphore_count);
|
||||
graphics_command_buffers = RT_ARENA_PUSH_ARRAY(level_temp.arena,
|
||||
rt_command_buffer_handle,
|
||||
graphics_command_buffer_count);
|
||||
graphics_signal_semaphore_count = 0;
|
||||
graphics_wait_semaphore_count = 0;
|
||||
graphics_command_buffer_count = 0;
|
||||
|
||||
compute_wait_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena,
|
||||
rt_gpu_semaphore_handle,
|
||||
compute_wait_semaphore_count);
|
||||
compute_wait_values =
|
||||
RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, compute_wait_semaphore_count);
|
||||
compute_signal_semaphores = RT_ARENA_PUSH_ARRAY(level_temp.arena,
|
||||
rt_gpu_semaphore_handle,
|
||||
compute_signal_semaphore_count);
|
||||
compute_signal_values =
|
||||
RT_ARENA_PUSH_ARRAY(level_temp.arena, uint64_t, compute_signal_semaphore_count);
|
||||
compute_command_buffers = RT_ARENA_PUSH_ARRAY(level_temp.arena,
|
||||
rt_command_buffer_handle,
|
||||
compute_command_buffer_count);
|
||||
compute_signal_semaphore_count = 0;
|
||||
compute_wait_semaphore_count = 0;
|
||||
compute_command_buffer_count = 0;
|
||||
|
||||
/* Dispatch all passes in the current execution level */
|
||||
for (uint32_t pass_idx = level_start; pass_idx < i; ++pass_idx) {
|
||||
bool pass_bound = framegraph->passes[pass_idx].bound_fns.Prepare != NULL &&
|
||||
framegraph->passes[pass_idx].bound_fns.Execute != NULL &&
|
||||
framegraph->passes[pass_idx].bound_fns.Finalize != NULL;
|
||||
if (!pass_bound)
|
||||
continue;
|
||||
|
||||
// rt_render_pass_id id = framegraph->passes[pass_idx].id;
|
||||
const rt_render_target_write *writes = framegraph->passes[pass_idx].writes;
|
||||
const rt_render_target_read *reads = framegraph->passes[pass_idx].reads;
|
||||
uint32_t write_count = framegraph->passes[pass_idx].write_count;
|
||||
uint32_t read_count = framegraph->passes[pass_idx].read_count;
|
||||
|
||||
/* TODO(Kevin): Every one of these should be a job-dispatch*/
|
||||
|
||||
bool is_graphics_pass =
|
||||
framegraph->passes[pass_idx].type == RT_RENDER_PASS_TYPE_GRAPHICS;
|
||||
rt_command_buffer_handle cmdbuf;
|
||||
rt_alloc_command_buffer_info cmdbuf_alloc = {
|
||||
.target_queue = is_graphics_pass ? RT_GRAPHICS_QUEUE : RT_COMPUTE_QUEUE,
|
||||
};
|
||||
if (g_renderer.AllocCommandBuffers(1, &cmdbuf_alloc, &cmdbuf) != RT_SUCCESS) {
|
||||
rtLog("GFX",
|
||||
"Failed to allocate a command buffer for framegraph pass %u (%x)",
|
||||
pass_idx,
|
||||
framegraph->passes[pass_idx].id);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (is_graphics_pass) {
|
||||
BeginGraphicsPass(framegraph, pass_idx, cmdbuf);
|
||||
} else {
|
||||
BeginComputePass(framegraph, pass_idx, cmdbuf);
|
||||
}
|
||||
|
||||
/*
|
||||
framegraph->passes[pass_idx].bound_fns.Prepare(id,
|
||||
writes,
|
||||
write_count,
|
||||
reads,
|
||||
read_count);
|
||||
framegraph->passes[pass_idx].bound_fns.Execute(id,
|
||||
writes,
|
||||
write_count,
|
||||
reads,
|
||||
read_count);
|
||||
framegraph->passes[pass_idx].bound_fns.Finalize(id,
|
||||
writes,
|
||||
write_count,
|
||||
reads,
|
||||
read_count);
|
||||
*/
|
||||
if (is_graphics_pass) {
|
||||
g_renderer.CmdEndPass(cmdbuf);
|
||||
}
|
||||
for (uint32_t j = 0; j < write_count; j++) {
|
||||
rt_render_target *rt = GetRenderTarget(framegraph, writes[j].render_target);
|
||||
g_renderer.CmdFlushRenderTargetWrite(cmdbuf, rt->api_render_target);
|
||||
}
|
||||
|
||||
rt_gpu_semaphore_handle *wait_semaphores = NULL, *signal_semaphores = NULL;
|
||||
uint64_t *wait_values = NULL, *signal_values = NULL;
|
||||
rt_command_buffer_handle *command_buffers = NULL;
|
||||
uint32_t *wait_count = NULL, *signal_count = 0;
|
||||
uint32_t *command_buffer_count = NULL;
|
||||
if (is_graphics_pass) {
|
||||
wait_semaphores = graphics_wait_semaphores;
|
||||
signal_semaphores = graphics_signal_semaphores;
|
||||
wait_values = graphics_wait_values;
|
||||
signal_values = graphics_signal_values;
|
||||
command_buffers = graphics_command_buffers;
|
||||
wait_count = &graphics_wait_semaphore_count;
|
||||
signal_count = &graphics_signal_semaphore_count;
|
||||
command_buffer_count = &graphics_command_buffer_count;
|
||||
} else {
|
||||
wait_semaphores = compute_wait_semaphores;
|
||||
signal_semaphores = compute_signal_semaphores;
|
||||
wait_values = compute_wait_values;
|
||||
signal_values = compute_signal_values;
|
||||
command_buffers = compute_command_buffers;
|
||||
wait_count = &compute_wait_semaphore_count;
|
||||
signal_count = &compute_signal_semaphore_count;
|
||||
command_buffer_count = &compute_command_buffer_count;
|
||||
}
|
||||
|
||||
for (uint32_t j = 0; j < read_count; ++j) {
|
||||
rt_render_target *rt = GetRenderTarget(framegraph, reads[j].render_target);
|
||||
wait_semaphores[*wait_count] = rt->semaphores[frame_index];
|
||||
wait_values[*wait_count] = signal_value_base + execution_level;
|
||||
*wait_count += 1;
|
||||
}
|
||||
for (uint32_t j = 0; j < write_count; ++j) {
|
||||
rt_render_target *rt = GetRenderTarget(framegraph, writes[j].render_target);
|
||||
signal_semaphores[*signal_count] = rt->semaphores[frame_index];
|
||||
signal_values[*signal_count] = signal_value_base + execution_level + 1;
|
||||
*signal_count += 1;
|
||||
|
||||
if (signal_value_base >= 200) {
|
||||
wait_semaphores[*wait_count] = rt->semaphores[frame_index];
|
||||
wait_values[*wait_count] = signal_value_base - 200 + execution_level + 1;
|
||||
*wait_count += 1;
|
||||
}
|
||||
}
|
||||
if (pass_idx == first_swapchain_read) {
|
||||
wait_semaphores[*wait_count] = swapchain_available;
|
||||
wait_values[*wait_count] = 0;
|
||||
*wait_count += 1;
|
||||
}
|
||||
if (pass_idx == last_swapchain_write) {
|
||||
signal_semaphores[*signal_count] = render_finished;
|
||||
signal_values[*signal_count] = 0;
|
||||
*signal_count += 1;
|
||||
}
|
||||
command_buffers[*command_buffer_count] = cmdbuf;
|
||||
*command_buffer_count += 1;
|
||||
}
|
||||
|
||||
if (graphics_command_buffer_count > 0) {
|
||||
rt_submit_command_buffers_info submit = {
|
||||
.command_buffers = graphics_command_buffers,
|
||||
.command_buffer_count = graphics_command_buffer_count,
|
||||
.signal_semaphores = graphics_signal_semaphores,
|
||||
.signal_values = graphics_signal_values,
|
||||
.signal_semaphore_count = graphics_signal_semaphore_count,
|
||||
.wait_semaphores = graphics_wait_semaphores,
|
||||
.wait_values = graphics_wait_values,
|
||||
.wait_semaphore_count = graphics_wait_semaphore_count,
|
||||
};
|
||||
g_renderer.SubmitCommandBuffers(RT_GRAPHICS_QUEUE, &submit);
|
||||
}
|
||||
|
||||
if (compute_command_buffer_count > 0) {
|
||||
rt_submit_command_buffers_info submit = {
|
||||
.command_buffers = compute_command_buffers,
|
||||
.command_buffer_count = compute_command_buffer_count,
|
||||
.signal_semaphores = compute_signal_semaphores,
|
||||
.signal_values = compute_signal_values,
|
||||
.signal_semaphore_count = compute_signal_semaphore_count,
|
||||
.wait_semaphores = compute_wait_semaphores,
|
||||
.wait_values = compute_wait_values,
|
||||
.wait_semaphore_count = compute_wait_semaphore_count,
|
||||
};
|
||||
g_renderer.SubmitCommandBuffers(RT_COMPUTE_QUEUE, &submit);
|
||||
}
|
||||
|
||||
/* Start next level */
|
||||
level_start = i;
|
||||
if (i < framegraph->pass_count)
|
||||
execution_level = framegraph->passes[i].execution_level;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RT_DLLEXPORT rt_render_target_id rtCalculateRenderTargetID(const char *name, size_t len) {
|
||||
rt_render_target_id id = rtHashBytes32(name, len);
|
||||
if (id == 0)
|
||||
id = ~id;
|
||||
return id;
|
||||
}
|
||||
|
||||
RT_DLLEXPORT rt_render_pass_id rtCalculateRenderPassID(const char *name, size_t len) {
|
||||
rt_render_pass_id id = rtHashBytes32(name, len);
|
||||
if (id == 0)
|
||||
id = ~id;
|
||||
return id;
|
||||
}
|
@ -15,7 +15,6 @@
|
||||
*/
|
||||
|
||||
rt_renderer_api g_renderer;
|
||||
extern rt_cvar rt_MaxFramegraphs;
|
||||
|
||||
#ifndef RT_STATIC_LIB
|
||||
static rt_dynlib _renderer_lib;
|
||||
@ -36,10 +35,6 @@ extern void RT_RENDERER_API_FN(BeginFrame)(unsigned int);
|
||||
extern void RT_RENDERER_API_FN(EndFrame)(unsigned int);
|
||||
extern rt_pipeline_handle RT_RENDERER_API_FN(CompilePipeline)(const rt_pipeline_info *);
|
||||
extern void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle);
|
||||
extern rt_render_target_handle
|
||||
RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *);
|
||||
extern rt_render_target_handle RT_RENDERER_API_FN(GetSwapchainRenderTarget)(void);
|
||||
extern void RT_RENDERER_API_FN(DestroyRenderTarget)(rt_render_target_handle);
|
||||
extern rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t,
|
||||
const rt_alloc_command_buffer_info *,
|
||||
rt_command_buffer_handle *);
|
||||
@ -58,6 +53,7 @@ extern void RT_RENDERER_API_FN(DestroyBuffers)(uint32_t, rt_buffer_handle *);
|
||||
|
||||
extern rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void);
|
||||
extern void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *);
|
||||
extern rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *);
|
||||
|
||||
extern void RT_RENDERER_API_FN(CmdBeginPass)(rt_command_buffer_handle,
|
||||
const rt_cmd_begin_pass_info *);
|
||||
@ -67,10 +63,15 @@ extern void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_hand
|
||||
rt_render_target_state);
|
||||
extern void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle,
|
||||
rt_render_target_handle);
|
||||
extern void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle, rt_pipeline_handle);
|
||||
extern void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle,
|
||||
uint32_t,
|
||||
uint32_t,
|
||||
const rt_buffer_handle *,
|
||||
const uint64_t *);
|
||||
extern void RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle, uint32_t, uint32_t);
|
||||
#endif
|
||||
|
||||
extern rt_result InitFramegraphManager(void);
|
||||
extern void ShutdownFramegraphManager(void);
|
||||
extern rt_result InitRenderLists(void);
|
||||
extern void ShutdownRenderLists(void);
|
||||
extern void ResetRenderLists(void);
|
||||
@ -101,24 +102,20 @@ static bool LoadRenderer(void) {
|
||||
RETRIEVE_SYMBOL(EndFrame, rt_end_frame_fn);
|
||||
RETRIEVE_SYMBOL(CompilePipeline, rt_compile_pipeline_fn);
|
||||
RETRIEVE_SYMBOL(DestroyPipeline, rt_destroy_pipeline_fn);
|
||||
RETRIEVE_SYMBOL(CreateRenderTarget, rt_create_render_target_fn);
|
||||
RETRIEVE_SYMBOL(GetSwapchainRenderTarget, rt_get_swapchain_render_target_fn);
|
||||
RETRIEVE_SYMBOL(DestroyRenderTarget, rt_destroy_render_target_fn);
|
||||
RETRIEVE_SYMBOL(AllocCommandBuffers, rt_alloc_command_buffers_fn);
|
||||
RETRIEVE_SYMBOL(SubmitCommandBuffers, rt_submit_command_buffers_fn);
|
||||
RETRIEVE_SYMBOL(CreateSemaphores, rt_create_gpu_semaphores_fn);
|
||||
RETRIEVE_SYMBOL(DestroySemaphores, rt_destroy_gpu_semaphores_fn);
|
||||
RETRIEVE_SYMBOL(GetSemaphoreValue, rt_get_gpu_semaphore_value_fn);
|
||||
RETRIEVE_SYMBOL(GetSwapchainAvailableSemaphore, rt_get_swapchain_available_semaphore_fn);
|
||||
RETRIEVE_SYMBOL(GetRenderFinishedSemaphore, rt_get_render_finished_semaphore_fn);
|
||||
RETRIEVE_SYMBOL(CreateBuffers, rt_create_buffers_fn);
|
||||
RETRIEVE_SYMBOL(DestroyBuffers, rt_destroy_buffers_fn);
|
||||
RETRIEVE_SYMBOL(CreateRenderGrapbuilder, rt_create_render_graph_builder_fn);
|
||||
RETRIEVE_SYMBOL(DestroyRenderGraphBuilder, rt_destroy_render_graph_builder_fn);
|
||||
RETRIEVE_SYMBOL(ExecuteRenderGraph, rt_execute_render_graph_fn);
|
||||
RETRIEVE_SYMBOL(CmdBeginPass, rt_cmd_begin_pass_fn);
|
||||
RETRIEVE_SYMBOL(CmdEndPass, rt_cmd_end_pass_fn);
|
||||
RETRIEVE_SYMBOL(CmdTransitionRenderTarget, rt_cmd_transition_render_target_fn);
|
||||
RETRIEVE_SYMBOL(CmdFlushRenderTargetWrite, rt_cmd_flush_render_target_write_fn);
|
||||
RETRIEVE_SYMBOL(CmdBindPipeline, rt_cmd_bind_pipeline_fn);
|
||||
RETRIEVE_SYMBOL(CmdBindVertexBuffers, rt_cmd_bind_vertex_buffers_fn);
|
||||
RETRIEVE_SYMBOL(CmdDraw, rt_cmd_draw_fn);
|
||||
} else {
|
||||
rtReportError("GFX",
|
||||
"Unsupported renderer backend: (%s) %s",
|
||||
@ -136,24 +133,20 @@ static bool LoadRenderer(void) {
|
||||
g_renderer.EndFrame = &rtRenEndFrame;
|
||||
g_renderer.CompilePipeline = &rtRenCompilePipeline;
|
||||
g_renderer.DestroyPipeline = &rtRenDestroyPipeline;
|
||||
g_renderer.CreateRenderTarget = &rtRenCreateRenderTarget;
|
||||
g_renderer.GetSwapchainRenderTarget = &rtRenGetSwapchainRenderTarget;
|
||||
g_renderer.DestroyRenderTarget = &rtRenDestroyRenderTarget;
|
||||
g_renderer.AllocCommandBuffers = &rtRenAllocCommandBuffers;
|
||||
g_renderer.SubmitCommandBuffers = &rtRenSubmitCommandBuffers;
|
||||
g_renderer.CreateSemaphores = &rtRenCreateSemaphores;
|
||||
g_renderer.DestroySemaphores = &rtRenDestroySemaphores;
|
||||
g_renderer.GetSemaphoreValue = &rtRenGetSemaphoreValue;
|
||||
g_renderer.GetSwapchainAvailableSemaphore = &rtRenGetSwapchainAvailableSemaphore;
|
||||
g_renderer.GetRenderFinishedSemaphore = &rtRenGetRenderFinishedSemaphore;
|
||||
g_renderer.CreateBuffers = &rtRenCreateBuffers;
|
||||
g_renderer.DestroyBuffers = &rtRenDestroyBuffers;
|
||||
g_renderer.CreateRenderGraphBuilder = &rtRenCreateRenderGraphBuilder;
|
||||
g_renderer.DestroyRenderGraphBuilder = &rtRenDestroyRenderGraphBuilder;
|
||||
g_renderer.ExecuteRenderGraph = &rtRenExecuteRenderGraph;
|
||||
g_renderer.CmdBeginPass = &rtRenCmdBeginPass;
|
||||
g_renderer.CmdEndPass = &rtRenCmdEndPass;
|
||||
g_renderer.CmdTransitionRenderTarget = &rtRenCmdTransitionRenderTarget;
|
||||
g_renderer.CmdFlushRenderTargetWrite = &rtRenCmdFlushRenderTargetWrite;
|
||||
g_renderer.CmdBindPipeline = &rtRenCmdBindPipeline;
|
||||
g_renderer.CmdBindVertexBuffers = &rtRenCmdBindVertexBuffers;
|
||||
g_renderer.CmdDraw = &rtRenCmdDraw;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
@ -169,7 +162,6 @@ RT_DLLEXPORT void rtRegisterRendererCVars(void) {
|
||||
|
||||
RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info) {
|
||||
rtRegisterCVAR(&rt_Renderer);
|
||||
rtRegisterCVAR(&rt_MaxFramegraphs);
|
||||
|
||||
if (!_renderer_loaded) {
|
||||
if (!LoadRenderer())
|
||||
@ -182,9 +174,6 @@ RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info) {
|
||||
if ((result = g_renderer.Init(renderer_info)) != RT_SUCCESS)
|
||||
return result;
|
||||
|
||||
if ((result = InitFramegraphManager()) != RT_SUCCESS)
|
||||
return result;
|
||||
|
||||
if ((result = InitRenderLists()) != RT_SUCCESS)
|
||||
return result;
|
||||
|
||||
@ -193,7 +182,6 @@ RT_DLLEXPORT rt_result rtInitGFX(rt_renderer_init_info *renderer_info) {
|
||||
|
||||
RT_DLLEXPORT void rtShutdownGFX(void) {
|
||||
ShutdownRenderLists();
|
||||
ShutdownFramegraphManager();
|
||||
g_renderer.Shutdown();
|
||||
}
|
||||
|
||||
|
@ -8,7 +8,7 @@ gfx_lib = library('rtgfx',
|
||||
'render_list.h',
|
||||
|
||||
'builtin_objects.c',
|
||||
'gfx_framegraph.c',
|
||||
'effect.c',
|
||||
'gfx_main.c',
|
||||
'render_list.c',
|
||||
# Contrib Sources
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <stddef.h>
|
||||
|
||||
#include "gfx.h"
|
||||
#include "render_list.h"
|
||||
|
||||
#include "runtime/resources.h"
|
||||
#include "runtime/rt_math.h"
|
||||
@ -174,6 +175,11 @@ typedef union {
|
||||
} depth_stencil;
|
||||
} rt_pass_clear_value;
|
||||
|
||||
typedef struct {
|
||||
float depth;
|
||||
int32_t stencil;
|
||||
} rt_depth_stencil_value;
|
||||
|
||||
typedef struct {
|
||||
rt_render_target_handle color_buffers[4];
|
||||
rt_pass_load_mode color_buffer_loads[4];
|
||||
@ -204,11 +210,12 @@ typedef enum {
|
||||
RT_RENDER_TARGET_STATE_STORAGE_IMAGE,
|
||||
} rt_render_target_state;
|
||||
|
||||
#define RT_RENDER_TARGET_SIZE_SWAPCHAIN 0
|
||||
|
||||
/* Renderer API */
|
||||
|
||||
typedef struct rt_pipeline_info_s rt_pipeline_info;
|
||||
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
rt_pixel_format format;
|
||||
@ -234,6 +241,9 @@ typedef struct {
|
||||
uint32_t flags;
|
||||
} rt_pass_info;
|
||||
|
||||
typedef struct rt_render_graph_s rt_render_graph;
|
||||
typedef rt_result rt_execute_render_pass_fn(rt_command_buffer_handle cmdbuf, const rt_render_list *render_lists, unsigned int render_list_count, void *userdata);
|
||||
|
||||
typedef struct {
|
||||
void *obj;
|
||||
|
||||
@ -242,12 +252,17 @@ typedef struct {
|
||||
void (*SetBackbuffer)(void *obj, const char *rt_name);
|
||||
|
||||
void (*AddRenderPass)(void *obj, const rt_pass_info *info);
|
||||
void (*AddColorOutput)(void *obj, const char *pass_name, const char *rt_name);
|
||||
void (*AddColorOutput)(void *obj, const char *pass_name, const char *rt_name, rt_pass_load_mode load, rt_pass_write_mode write, rt_color clear_color);
|
||||
void (*AddSampledInput)(void *obj, const char *pass_name, const char *rt_name);
|
||||
void (*SetDepthStencilAttachment)(void *obj, const char *pass_name, const char *rt_name);
|
||||
void (*SetDepthStencilAttachment)(void *obj,
|
||||
const char *pass_name,
|
||||
const char *rt_name,
|
||||
rt_pass_load_mode load,
|
||||
rt_pass_write_mode write,
|
||||
rt_depth_stencil_value clear_value);
|
||||
void (*BindRenderPass)(void *obj, const char *pass_name, rt_execute_render_pass_fn *execute_fn, void *userdata);
|
||||
|
||||
|
||||
rt_result (*Build)(void *obj);
|
||||
rt_result (*Build)(void *obj, rt_render_graph **p_render_graph);
|
||||
} rt_render_graph_builder;
|
||||
|
||||
|
||||
@ -259,27 +274,18 @@ typedef void rt_begin_frame_fn(unsigned int frame_id);
|
||||
typedef void rt_end_frame_fn(unsigned int frame_id);
|
||||
typedef rt_pipeline_handle rt_compile_pipeline_fn(const rt_pipeline_info *info);
|
||||
typedef void rt_destroy_pipeline_fn(rt_pipeline_handle handle);
|
||||
typedef rt_render_target_handle rt_create_render_target_fn(const rt_render_target_info *info);
|
||||
typedef rt_render_target_handle rt_get_swapchain_render_target_fn(void);
|
||||
typedef void rt_destroy_render_target_fn(rt_render_target_handle handle);
|
||||
typedef rt_result rt_alloc_command_buffers_fn(uint32_t count,
|
||||
const rt_alloc_command_buffer_info *info,
|
||||
rt_command_buffer_handle *p_command_buffers);
|
||||
typedef rt_result rt_submit_command_buffers_fn(rt_gpu_queue queue,
|
||||
const rt_submit_command_buffers_info *info);
|
||||
typedef rt_result rt_create_gpu_semaphores_fn(uint32_t count,
|
||||
const rt_gpu_semaphore_info *info,
|
||||
rt_gpu_semaphore_handle *p_semaphores);
|
||||
typedef void rt_destroy_gpu_semaphores_fn(uint32_t count, rt_gpu_semaphore_handle *semaphores);
|
||||
typedef uint64_t rt_get_gpu_semaphore_value_fn(rt_gpu_semaphore_handle semaphore);
|
||||
typedef rt_gpu_semaphore_handle rt_get_swapchain_available_semaphore_fn(void);
|
||||
typedef rt_gpu_semaphore_handle rt_get_render_finished_semaphore_fn(void);
|
||||
typedef rt_result
|
||||
rt_create_buffers_fn(uint32_t count, const rt_buffer_info *info, rt_buffer_handle *p_buffers);
|
||||
typedef void rt_destroy_buffers_fn(uint32_t count, rt_buffer_handle *buffers);
|
||||
|
||||
typedef rt_render_graph_builder rt_create_render_graph_builder_fn(void);
|
||||
typedef void rt_destroy_render_graph_builder_fn(rt_render_graph_builder *builder);
|
||||
typedef rt_result rt_execute_render_graph_fn(rt_render_graph *rgraph);
|
||||
|
||||
typedef void rt_cmd_begin_pass_fn(rt_command_buffer_handle cmdbuf,
|
||||
const rt_cmd_begin_pass_info *info);
|
||||
@ -289,6 +295,13 @@ typedef void rt_cmd_transition_render_target_fn(rt_command_buffer_handle cmdbuf,
|
||||
rt_render_target_state new_state);
|
||||
typedef void rt_cmd_flush_render_target_write_fn(rt_command_buffer_handle cmdbuf,
|
||||
rt_render_target_handle render_target);
|
||||
typedef void rt_cmd_bind_pipeline_fn(rt_command_buffer_handle cmd, rt_pipeline_handle pipeline);
|
||||
typedef void rt_cmd_bind_vertex_buffers_fn(rt_command_buffer_handle cmd,
|
||||
uint32_t first_binding,
|
||||
uint32_t count,
|
||||
const rt_buffer_handle *buffers,
|
||||
const uint64_t *offsets);
|
||||
typedef void rt_cmd_draw_fn(rt_command_buffer_handle cmdbuf, uint32_t first_vertex, uint32_t vertex_count);
|
||||
|
||||
typedef struct {
|
||||
rt_register_renderer_cvars_fn *RegisterCVars;
|
||||
@ -299,28 +312,24 @@ typedef struct {
|
||||
rt_end_frame_fn *EndFrame;
|
||||
rt_compile_pipeline_fn *CompilePipeline;
|
||||
rt_destroy_pipeline_fn *DestroyPipeline;
|
||||
rt_create_render_target_fn *CreateRenderTarget;
|
||||
rt_get_swapchain_render_target_fn *GetSwapchainRenderTarget;
|
||||
rt_destroy_render_target_fn *DestroyRenderTarget;
|
||||
rt_alloc_command_buffers_fn *AllocCommandBuffers;
|
||||
rt_submit_command_buffers_fn *SubmitCommandBuffers;
|
||||
rt_create_gpu_semaphores_fn *CreateSemaphores;
|
||||
rt_destroy_gpu_semaphores_fn *DestroySemaphores;
|
||||
rt_get_gpu_semaphore_value_fn *GetSemaphoreValue;
|
||||
rt_get_swapchain_available_semaphore_fn *GetSwapchainAvailableSemaphore;
|
||||
rt_get_render_finished_semaphore_fn *GetRenderFinishedSemaphore;
|
||||
rt_create_buffers_fn *CreateBuffers;
|
||||
rt_destroy_buffers_fn *DestroyBuffers;
|
||||
|
||||
/*render graph functions*/
|
||||
rt_create_render_graph_builder_fn *CreateRenderGraphBuilder;
|
||||
rt_destroy_render_graph_builder_fn *DestroyRenderGraphBuilder;
|
||||
rt_execute_render_graph_fn *ExecuteRenderGraph;
|
||||
|
||||
/* Command Buffer Functions */
|
||||
rt_cmd_begin_pass_fn *CmdBeginPass;
|
||||
rt_cmd_end_pass_fn *CmdEndPass;
|
||||
rt_cmd_transition_render_target_fn *CmdTransitionRenderTarget;
|
||||
rt_cmd_flush_render_target_write_fn *CmdFlushRenderTargetWrite;
|
||||
rt_cmd_bind_pipeline_fn *CmdBindPipeline;
|
||||
rt_cmd_bind_vertex_buffers_fn *CmdBindVertexBuffers;
|
||||
rt_cmd_draw_fn *CmdDraw;
|
||||
} rt_renderer_api;
|
||||
|
||||
#define RT_RENDERER_API_FN(name) RT_DLLEXPORT rtRen##name
|
||||
|
832
src/renderer/common/common_render_graph.c
Normal file
832
src/renderer/common/common_render_graph.c
Normal file
@ -0,0 +1,832 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "gfx/effect.h"
|
||||
#include "gfx/renderer_api.h"
|
||||
#include "runtime/buffer_manager.h"
|
||||
#include "runtime/handles.h"
|
||||
#include "runtime/mem_arena.h"
|
||||
|
||||
#include "common_render_graph.h"
|
||||
|
||||
#define MAX_COLOR_ATTACHMENTS_PER_PASS 8
|
||||
#define MAX_SAMPLED_INPUTS_PER_PASS 8
|
||||
|
||||
typedef struct rt_render_target_build_info {
|
||||
const char *name;
|
||||
rt_pixel_format format;
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
|
||||
unsigned int samples;
|
||||
unsigned int layers;
|
||||
|
||||
uint32_t first_usage;
|
||||
uint32_t last_usage;
|
||||
} rt_render_target_build_info;
|
||||
|
||||
typedef struct rt_pass_build_info {
|
||||
const char *name;
|
||||
uint32_t flags;
|
||||
|
||||
void *userdata;
|
||||
rt_execute_render_pass_fn *Execute;
|
||||
|
||||
uint32_t color_attachments[MAX_COLOR_ATTACHMENTS_PER_PASS];
|
||||
rt_color color_attachment_clear_values[MAX_COLOR_ATTACHMENTS_PER_PASS];
|
||||
rt_pass_load_mode color_attachment_loads[MAX_COLOR_ATTACHMENTS_PER_PASS];
|
||||
rt_pass_write_mode color_attachment_writes[MAX_COLOR_ATTACHMENTS_PER_PASS];
|
||||
uint32_t color_attachment_count;
|
||||
|
||||
uint32_t sampled_inputs[MAX_SAMPLED_INPUTS_PER_PASS];
|
||||
uint32_t sampled_input_count;
|
||||
|
||||
uint32_t depth_stencil_attachment;
|
||||
rt_depth_stencil_value depth_stencil_clear_value;
|
||||
rt_pass_load_mode depth_stencil_load;
|
||||
rt_pass_write_mode depth_stencil_write;
|
||||
|
||||
uint32_t *dependencies;
|
||||
uint32_t dependency_count;
|
||||
} rt_pass_build_info;
|
||||
|
||||
typedef struct {
|
||||
uint32_t signaled_by;
|
||||
uint32_t waited_on_by;
|
||||
} rt_sync_point_build_info;
|
||||
|
||||
typedef struct rt_render_graph_builder_obj {
|
||||
rt_arena arena;
|
||||
|
||||
rt_render_target_build_info *render_targets;
|
||||
uint32_t render_target_count;
|
||||
uint32_t render_target_capacity;
|
||||
|
||||
rt_pass_build_info *passes;
|
||||
uint32_t pass_count;
|
||||
uint32_t pass_capacity;
|
||||
|
||||
rt_physical_render_target_info *phys_render_targets;
|
||||
uint32_t phys_render_target_count;
|
||||
|
||||
rt_sync_point_build_info *sync_points;
|
||||
uint32_t sync_point_count;
|
||||
|
||||
uint32_t backbuffer;
|
||||
|
||||
rt_render_graph_builder_platform_callbacks platform_cbs;
|
||||
} rt_render_graph_builder_obj;
|
||||
|
||||
/* ****************************************************************************
|
||||
*
|
||||
* BUILDER CODE
|
||||
*
|
||||
* ****************************************************************************/
|
||||
|
||||
static void AddRenderTarget(void *_obj, const rt_attachment_info *info) {
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
|
||||
if (obj->render_target_count == obj->render_target_capacity) {
|
||||
uint32_t new_cap = obj->render_target_capacity ? 2 * obj->render_target_capacity : 32;
|
||||
rt_render_target_build_info *tmp =
|
||||
RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, rt_render_target_build_info, new_cap);
|
||||
if (obj->render_target_capacity)
|
||||
memcpy(tmp,
|
||||
obj->render_targets,
|
||||
sizeof(rt_render_target_build_info) * obj->render_target_capacity);
|
||||
obj->render_targets = tmp;
|
||||
obj->render_target_capacity = new_cap;
|
||||
}
|
||||
|
||||
char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1);
|
||||
strcpy(name, info->name);
|
||||
obj->render_targets[obj->render_target_count].name = name;
|
||||
obj->render_targets[obj->render_target_count].format = info->format;
|
||||
obj->render_targets[obj->render_target_count].width = info->width;
|
||||
obj->render_targets[obj->render_target_count].height = info->height;
|
||||
obj->render_targets[obj->render_target_count].samples = info->samples;
|
||||
obj->render_targets[obj->render_target_count].layers = info->layers;
|
||||
obj->render_targets[obj->render_target_count].first_usage = 0;
|
||||
obj->render_targets[obj->render_target_count].last_usage = 0;
|
||||
++obj->render_target_count;
|
||||
}
|
||||
|
||||
static void SetBackbuffer(void *_obj, const char *rt_name) {
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
||||
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
||||
obj->backbuffer = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
rtLog("vk", "Tried to set backbuffer to unknown render target %s", rt_name);
|
||||
}
|
||||
|
||||
static void AddRenderPass(void *_obj, const rt_pass_info *info) {
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
|
||||
if (obj->pass_count == obj->pass_capacity) {
|
||||
uint32_t new_cap = obj->pass_capacity ? 2 * obj->pass_capacity : 32;
|
||||
rt_pass_build_info *tmp =
|
||||
RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, rt_pass_build_info, new_cap);
|
||||
if (obj->pass_capacity)
|
||||
memcpy(tmp, obj->passes, sizeof(rt_pass_build_info) * obj->pass_capacity);
|
||||
obj->passes = tmp;
|
||||
obj->pass_capacity = new_cap;
|
||||
}
|
||||
|
||||
char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1);
|
||||
strcpy(name, info->name);
|
||||
obj->passes[obj->pass_count].name = name;
|
||||
obj->passes[obj->pass_count].flags = info->flags;
|
||||
obj->passes[obj->pass_count].color_attachment_count = 0;
|
||||
obj->passes[obj->pass_count].sampled_input_count = 0;
|
||||
obj->passes[obj->pass_count].depth_stencil_attachment = UINT_MAX;
|
||||
obj->passes[obj->pass_count].dependencies = NULL;
|
||||
obj->passes[obj->pass_count].dependency_count = 0;
|
||||
|
||||
++obj->pass_count;
|
||||
}
|
||||
|
||||
static void AddColorOutput(void *_obj,
|
||||
const char *pass_name,
|
||||
const char *rt_name,
|
||||
rt_pass_load_mode load,
|
||||
rt_pass_write_mode write,
|
||||
rt_color clear_color) {
|
||||
uint32_t rt_index = UINT_MAX;
|
||||
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
||||
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
||||
rt_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (rt_index == UINT_MAX) {
|
||||
rtLog("ren",
|
||||
"Tried to add unknown render target %s as color output to %s",
|
||||
rt_name,
|
||||
pass_name);
|
||||
return;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
||||
if (obj->passes[i].color_attachment_count == MAX_COLOR_ATTACHMENTS_PER_PASS) {
|
||||
rtLog("ren", "Too many color attachments in pass %s", pass_name);
|
||||
}
|
||||
obj->passes[i].color_attachment_clear_values[obj->passes[i].color_attachment_count] =
|
||||
clear_color;
|
||||
obj->passes[i].color_attachment_loads[obj->passes[i].color_attachment_count] = load;
|
||||
obj->passes[i].color_attachment_writes[obj->passes[i].color_attachment_count] = write;
|
||||
obj->passes[i].color_attachments[obj->passes[i].color_attachment_count++] = rt_index;
|
||||
return;
|
||||
}
|
||||
}
|
||||
rtLog("ren",
|
||||
"Tried to add render target %s as color output to unknown render target %s",
|
||||
rt_name,
|
||||
pass_name);
|
||||
}
|
||||
|
||||
static void AddSampledInput(void *_obj, const char *pass_name, const char *rt_name) {
|
||||
uint32_t rt_index = UINT_MAX;
|
||||
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
||||
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
||||
rt_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (rt_index == UINT_MAX) {
|
||||
rtLog("ren",
|
||||
"Tried to add unknown render target %s as color output to %s",
|
||||
rt_name,
|
||||
pass_name);
|
||||
return;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
||||
if (obj->passes[i].sampled_input_count == MAX_SAMPLED_INPUTS_PER_PASS) {
|
||||
rtLog("ren", "Too many sampled inputs in pass %s", pass_name);
|
||||
}
|
||||
obj->passes[i].sampled_inputs[obj->passes[i].sampled_input_count++] = rt_index;
|
||||
return;
|
||||
}
|
||||
}
|
||||
rtLog("ren",
|
||||
"Tried to add render target %s as sampled input to unknown render target %s",
|
||||
rt_name,
|
||||
pass_name);
|
||||
}
|
||||
|
||||
static void SetDepthStencilAttachment(void *_obj,
|
||||
const char *pass_name,
|
||||
const char *rt_name,
|
||||
rt_pass_load_mode load,
|
||||
rt_pass_write_mode write,
|
||||
rt_depth_stencil_value clear_value) {
|
||||
uint32_t rt_index = UINT_MAX;
|
||||
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
||||
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
||||
rt_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (rt_index == UINT_MAX) {
|
||||
rtLog("ren",
|
||||
"Tried to add unknown render target %s as depth stencil attachment to %s",
|
||||
rt_name,
|
||||
pass_name);
|
||||
return;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
||||
obj->passes[i].depth_stencil_attachment = rt_index;
|
||||
obj->passes[i].depth_stencil_clear_value = clear_value;
|
||||
obj->passes[i].depth_stencil_load = load;
|
||||
obj->passes[i].depth_stencil_write = write;
|
||||
return;
|
||||
}
|
||||
}
|
||||
rtLog("ren",
|
||||
"Tried to add render target %s as depth stencil attachment to unknown render target %s",
|
||||
rt_name,
|
||||
pass_name);
|
||||
}
|
||||
|
||||
static void BindRenderPass(void *_obj,
|
||||
const char *pass_name,
|
||||
rt_execute_render_pass_fn *execute_fn,
|
||||
void *userdata) {
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
||||
obj->passes[i].Execute = execute_fn;
|
||||
obj->passes[i].userdata = userdata;
|
||||
return;
|
||||
}
|
||||
}
|
||||
rtLog("ren", "Tried to bind unknown render pass %s.", pass_name);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint32_t added;
|
||||
uint32_t moved;
|
||||
} rt_find_writers_result;
|
||||
|
||||
static rt_find_writers_result FindWriters(rt_render_graph_builder_obj *obj,
|
||||
uint32_t rt_index,
|
||||
uint32_t append_at,
|
||||
uint32_t *p_passes) {
|
||||
rt_find_writers_result res = {0, 0};
|
||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||
bool writes_rt = false;
|
||||
if (obj->passes[i].depth_stencil_attachment == rt_index) {
|
||||
writes_rt = true;
|
||||
} else {
|
||||
for (uint32_t j = 0; j < obj->passes[i].color_attachment_count; ++j) {
|
||||
if (obj->passes[i].color_attachments[j] == rt_index) {
|
||||
writes_rt = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!writes_rt)
|
||||
continue;
|
||||
|
||||
uint32_t lower_index = UINT32_MAX;
|
||||
for (uint32_t j = 0; j < append_at; ++j) {
|
||||
if (p_passes[j] == i) {
|
||||
lower_index = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (lower_index == UINT32_MAX) {
|
||||
p_passes[append_at++] = i;
|
||||
res.added++;
|
||||
} else {
|
||||
memmove(&p_passes[lower_index],
|
||||
&p_passes[lower_index + 1],
|
||||
(append_at - lower_index - 1) * sizeof(uint32_t));
|
||||
p_passes[append_at - 1] = i;
|
||||
res.moved++;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static uint32_t CreateNaiveOrderRec(rt_render_graph_builder_obj *obj,
|
||||
uint32_t search_rt,
|
||||
uint32_t append_at,
|
||||
uint32_t *p_order) {
|
||||
rt_find_writers_result writers = FindWriters(obj, search_rt, append_at, p_order);
|
||||
uint32_t new_append = append_at + writers.added;
|
||||
for (uint32_t i = 0; i < writers.moved; ++i) {
|
||||
uint32_t pass_idx = p_order[append_at - writers.moved + i];
|
||||
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||
for (uint32_t j = 0; j < pass->sampled_input_count; ++j) {
|
||||
new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order);
|
||||
}
|
||||
}
|
||||
for (uint32_t i = 0; i < writers.added; ++i) {
|
||||
uint32_t pass_idx = p_order[append_at + i];
|
||||
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||
for (uint32_t j = 0; j < pass->sampled_input_count; ++j) {
|
||||
new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order);
|
||||
}
|
||||
}
|
||||
return new_append;
|
||||
}
|
||||
|
||||
static rt_result
|
||||
CreateNaiveOrder(rt_render_graph_builder_obj *obj, uint32_t **p_order, uint32_t *p_count) {
|
||||
uint32_t *order = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, obj->pass_count);
|
||||
if (!order)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
uint32_t count = CreateNaiveOrderRec(obj, obj->backbuffer, 0, order);
|
||||
|
||||
/* Now the pass writing the backbuffer is first, we need to revert the order */
|
||||
for (uint32_t i = 0; i < count / 2; ++i) {
|
||||
uint32_t t = order[i];
|
||||
order[i] = order[count - i - 1];
|
||||
order[count - i - 1] = t;
|
||||
}
|
||||
*p_order = order;
|
||||
*p_count = count;
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
static uint32_t *
|
||||
ReorderPasses(rt_render_graph_builder_obj *obj, uint32_t pass_count, uint32_t *naive_order) {
|
||||
/* Our goal is to calculate a schedule that:
|
||||
* A) Does not break the dependency chain
|
||||
* B) Has the maximum amount of overlap, i.e. keeps the GPU busy.
|
||||
* This means that if pass A depends on pass B, we want to have as much passes inbetween as
|
||||
* possible, to reduce the likelyhood of stalls caused by B waiting for A to finish. */
|
||||
uint32_t *schedule = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, pass_count);
|
||||
if (!schedule)
|
||||
return NULL;
|
||||
uint32_t scheduled_count = 0;
|
||||
|
||||
while (scheduled_count < pass_count) {
|
||||
/* The number of passes remaining in naive_order */
|
||||
uint32_t unscheduled_count = pass_count - scheduled_count;
|
||||
|
||||
/* It is always valid to use the front */
|
||||
uint32_t selected_idx = 0;
|
||||
uint32_t selected_score = 0;
|
||||
for (uint32_t i = 0; i < unscheduled_count; ++i) {
|
||||
/* Check if any dependency is not scheduled yet */
|
||||
uint32_t pass_idx = naive_order[i];
|
||||
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||
uint32_t score = 0;
|
||||
bool is_valid = true;
|
||||
|
||||
if (pass->dependency_count) {
|
||||
for (uint32_t j = 0; j < unscheduled_count; ++j) {
|
||||
uint32_t pass2_idx = naive_order[j];
|
||||
for (uint32_t k = 0; k < pass->dependency_count; ++k) {
|
||||
if (pass->dependencies[k] == pass2_idx) {
|
||||
is_valid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!is_valid)
|
||||
break;
|
||||
}
|
||||
if (!is_valid)
|
||||
continue;
|
||||
|
||||
for (uint32_t j = 0; j < pass->dependency_count; ++j) {
|
||||
for (uint32_t k = 0; k < scheduled_count; ++k) {
|
||||
if (schedule[k] == pass->dependencies[j]) {
|
||||
score += scheduled_count - k;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
score = UINT32_MAX;
|
||||
}
|
||||
|
||||
if (score > selected_score) {
|
||||
selected_score = score;
|
||||
selected_idx = i;
|
||||
}
|
||||
}
|
||||
|
||||
schedule[scheduled_count++] = naive_order[selected_idx];
|
||||
memmove(&naive_order[selected_idx],
|
||||
&naive_order[selected_idx + 1],
|
||||
(unscheduled_count - selected_idx - 1) * sizeof(uint32_t));
|
||||
}
|
||||
return schedule;
|
||||
}
|
||||
|
||||
static rt_result DeterminePassDependencies(rt_render_graph_builder_obj *obj) {
|
||||
/* NOTE(Kevin): This has a lot of overlap with FindWriters, so maybe its possible to combine
|
||||
* the two */
|
||||
for (uint32_t pass_idx = 0; pass_idx < obj->pass_count; ++pass_idx) {
|
||||
rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||
uint32_t dependency_capacity = pass->sampled_input_count;
|
||||
if (dependency_capacity) {
|
||||
pass->dependencies = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity);
|
||||
if (!pass->dependencies)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
}
|
||||
for (uint32_t input_idx = 0; input_idx < pass->sampled_input_count; ++input_idx) {
|
||||
uint32_t rt_index = pass->sampled_inputs[input_idx];
|
||||
for (uint32_t candidate_idx = 0; candidate_idx < obj->pass_count; ++candidate_idx) {
|
||||
const rt_pass_build_info *candidate = &obj->passes[candidate_idx];
|
||||
bool is_dependency = false;
|
||||
if (candidate->depth_stencil_attachment == rt_index)
|
||||
is_dependency = true;
|
||||
for (uint32_t j = 0; j < candidate->color_attachment_count; ++j) {
|
||||
if (candidate->color_attachments[j] == rt_index)
|
||||
is_dependency = true;
|
||||
}
|
||||
|
||||
if (!is_dependency)
|
||||
continue;
|
||||
|
||||
if (pass->dependency_count == dependency_capacity) {
|
||||
/* The dependencies are still on top of the arena, so we can just grow that
|
||||
* array */
|
||||
if (!RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity))
|
||||
return RT_OUT_OF_MEMORY;
|
||||
dependency_capacity *= 2;
|
||||
}
|
||||
pass->dependencies[pass->dependency_count++] = candidate_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
static void DetermineRenderTargetUsage(rt_render_graph_builder_obj *obj,
|
||||
uint32_t pass_count,
|
||||
const uint32_t *schedule) {
|
||||
for (uint32_t rt_idx = 0; rt_idx < obj->render_target_count; ++rt_idx) {
|
||||
rt_render_target_build_info *rt = &obj->render_targets[rt_idx];
|
||||
rt->first_usage = UINT32_MAX;
|
||||
rt->last_usage = 0;
|
||||
for (uint32_t sched_idx = 0; sched_idx < pass_count; ++sched_idx) {
|
||||
uint32_t pass_idx = schedule[sched_idx];
|
||||
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||
bool usage = pass->depth_stencil_attachment == rt_idx;
|
||||
if (!usage) {
|
||||
for (unsigned int i = 0; i < pass->color_attachment_count; ++i) {
|
||||
if (pass->color_attachments[i] == rt_idx)
|
||||
usage = true;
|
||||
}
|
||||
}
|
||||
if (!usage) {
|
||||
for (unsigned int i = 0; i < pass->sampled_input_count; ++i) {
|
||||
if (pass->sampled_inputs[i] == rt_idx)
|
||||
usage = true;
|
||||
}
|
||||
}
|
||||
if (usage) {
|
||||
if (sched_idx < rt->first_usage)
|
||||
rt->first_usage = sched_idx;
|
||||
if (sched_idx > rt->last_usage)
|
||||
rt->last_usage = sched_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static rt_result GreedyMergeRenderTargets(rt_render_graph_builder_obj *obj) {
|
||||
typedef struct {
|
||||
rt_physical_render_target_info info;
|
||||
int alive;
|
||||
int backbuffer;
|
||||
uint32_t first_usage;
|
||||
uint32_t last_usage;
|
||||
} merged_rts;
|
||||
|
||||
merged_rts *merged = RT_ARENA_PUSH_ARRAY(&obj->arena, merged_rts, 2 * obj->render_target_count);
|
||||
if (!merged) {
|
||||
return RT_OUT_OF_MEMORY;
|
||||
}
|
||||
uint32_t candidate_count = obj->render_target_count;
|
||||
for (uint32_t i = 0; i < candidate_count; ++i) {
|
||||
merged[i].alive = 1;
|
||||
merged[i].backbuffer = (i == obj->backbuffer);
|
||||
merged[i].info.format = obj->render_targets[i].format;
|
||||
merged[i].info.width = obj->render_targets[i].width;
|
||||
merged[i].info.height = obj->render_targets[i].height;
|
||||
merged[i].info.layers = obj->render_targets[i].layers;
|
||||
merged[i].info.name = obj->render_targets[i].name;
|
||||
merged[i].info.samples = obj->render_targets[i].samples;
|
||||
merged[i].first_usage = obj->render_targets[i].first_usage;
|
||||
merged[i].last_usage = obj->render_targets[i].last_usage;
|
||||
}
|
||||
|
||||
uint32_t *rt_mapping =
|
||||
RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, uint32_t, obj->render_target_count);
|
||||
if (!rt_mapping)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
for (uint32_t i = 0; i < obj->render_target_count; ++i)
|
||||
rt_mapping[i] = i;
|
||||
|
||||
bool did_merge;
|
||||
do {
|
||||
did_merge = false;
|
||||
for (uint32_t first = 0; first < candidate_count - 1; ++first) {
|
||||
if (!merged[first].alive)
|
||||
continue;
|
||||
for (uint32_t second = first + 1; second < candidate_count; ++second) {
|
||||
if (!merged[second].alive)
|
||||
continue;
|
||||
|
||||
if (!((merged[first].last_usage < merged[second].first_usage) ||
|
||||
(merged[second].last_usage < merged[first].first_usage)))
|
||||
continue;
|
||||
|
||||
if (!(merged[first].info.width == merged[second].info.width &&
|
||||
merged[first].info.height == merged[second].info.height &&
|
||||
merged[first].info.samples == merged[second].info.samples &&
|
||||
merged[first].info.layers == merged[second].info.layers &&
|
||||
merged[first].info.format == merged[second].info.format))
|
||||
continue;
|
||||
|
||||
merged[first].alive = 0;
|
||||
merged[second].alive = 0;
|
||||
|
||||
merged_rts combined = {
|
||||
.alive = 1,
|
||||
.backbuffer = merged[first].backbuffer || merged[second].backbuffer,
|
||||
.first_usage = RT_MIN(merged[first].first_usage, merged[second].first_usage),
|
||||
.last_usage = RT_MAX(merged[first].last_usage, merged[second].last_usage),
|
||||
.info = merged[first].info,
|
||||
};
|
||||
char *combined_name = rtArenaPush(&obj->arena,
|
||||
strlen(merged[first].info.name) +
|
||||
strlen(merged[second].info.name) + 2);
|
||||
if (!combined_name)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
strcpy(combined_name, merged[first].info.name);
|
||||
strcat(combined_name, "+");
|
||||
strcat(combined_name, merged[second].info.name);
|
||||
combined.info.name = combined_name;
|
||||
|
||||
/* Update mappings. If indes < render_target_count, than it refers to a
|
||||
* logical render target. If not, it refers to a merged render target */
|
||||
if (first < obj->render_target_count) {
|
||||
rt_mapping[first] = candidate_count;
|
||||
} else {
|
||||
// Find mappings that refer to this index and update them
|
||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
||||
if (rt_mapping[i] == first)
|
||||
rt_mapping[i] = candidate_count;
|
||||
}
|
||||
}
|
||||
if (second < obj->render_target_count) {
|
||||
rt_mapping[second] = candidate_count;
|
||||
} else {
|
||||
// Find mappings that refer to this index and update them
|
||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
||||
if (rt_mapping[i] == second)
|
||||
rt_mapping[i] = candidate_count;
|
||||
}
|
||||
}
|
||||
|
||||
RT_ASSERT(candidate_count < 2 * obj->render_target_count, "");
|
||||
merged[candidate_count++] = combined;
|
||||
did_merge = true;
|
||||
break;
|
||||
}
|
||||
if (did_merge)
|
||||
break;
|
||||
}
|
||||
} while (did_merge);
|
||||
|
||||
uint32_t phys_count = 0;
|
||||
for (uint32_t i = 0; i < candidate_count; ++i) {
|
||||
if (merged[i].alive)
|
||||
++phys_count;
|
||||
}
|
||||
obj->phys_render_targets =
|
||||
RT_ARENA_PUSH_ARRAY(&obj->arena, rt_physical_render_target_info, phys_count);
|
||||
if (!obj->phys_render_targets)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
obj->phys_render_target_count = 0;
|
||||
for (uint32_t i = 0; i < candidate_count; ++i) {
|
||||
if (merged[i].alive) {
|
||||
uint32_t index = obj->phys_render_target_count;
|
||||
if (merged[i].backbuffer)
|
||||
obj->backbuffer = obj->phys_render_target_count;
|
||||
obj->phys_render_targets[obj->phys_render_target_count++] = merged[i].info;
|
||||
|
||||
/* Update the mapping table */
|
||||
for (uint32_t j = 0; j < obj->render_target_count; ++j) {
|
||||
if (rt_mapping[j] == i)
|
||||
rt_mapping[j] = index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Update pass render target references */
|
||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||
if (obj->passes[i].depth_stencil_attachment < UINT_MAX)
|
||||
obj->passes[i].depth_stencil_attachment =
|
||||
rt_mapping[obj->passes[i].depth_stencil_attachment];
|
||||
for (uint32_t j = 0; j < obj->passes[i].color_attachment_count; ++j)
|
||||
obj->passes[i].color_attachments[j] = rt_mapping[obj->passes[i].color_attachments[j]];
|
||||
for (uint32_t j = 0; j < obj->passes[i].sampled_input_count; ++j)
|
||||
obj->passes[i].sampled_inputs[j] = rt_mapping[obj->passes[i].sampled_inputs[j]];
|
||||
}
|
||||
obj->backbuffer = rt_mapping[obj->backbuffer];
|
||||
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
static rt_result CreateSynchronizationPoints(rt_render_graph_builder_obj *obj) {
|
||||
RT_ASSERT(false, "Not implemented yet");
|
||||
return RT_UNKNOWN_ERROR;
|
||||
}
|
||||
|
||||
static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, const uint32_t *order) {
|
||||
|
||||
size_t required_size = sizeof(rt_render_graph);
|
||||
required_size += obj->phys_render_target_count * sizeof(rt_render_target_handle);
|
||||
required_size += obj->sync_point_count * sizeof(rt_gpu_semaphore_handle);
|
||||
required_size += obj->pass_count * sizeof(rt_render_pass);
|
||||
|
||||
size_t pass_attachment_size = 0;
|
||||
|
||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||
required_size += strlen(obj->passes[i].name) + 1;
|
||||
pass_attachment_size += obj->passes[i].color_attachment_count *
|
||||
(sizeof(rt_render_target_handle) + sizeof(rt_color) +
|
||||
sizeof(rt_pass_load_mode) + sizeof(rt_pass_write_mode));
|
||||
pass_attachment_size +=
|
||||
obj->passes[i].sampled_input_count * sizeof(rt_render_target_handle);
|
||||
}
|
||||
required_size += pass_attachment_size;
|
||||
|
||||
rt_render_graph *graph = rtAllocBuffer(required_size);
|
||||
if (!graph)
|
||||
return NULL;
|
||||
memset(graph, 0, required_size);
|
||||
graph->render_targets = (rt_render_target_handle *)(graph + 1);
|
||||
graph->semaphores =
|
||||
(rt_gpu_semaphore_handle *)(graph->render_targets + obj->phys_render_target_count);
|
||||
graph->passes = (rt_render_pass *)(graph->semaphores + obj->sync_point_count);
|
||||
char *attachment_storage = (char *)(graph->passes + obj->pass_count);
|
||||
char *names = attachment_storage + pass_attachment_size;
|
||||
char *next_name = names;
|
||||
|
||||
graph->render_target_count = obj->phys_render_target_count;
|
||||
graph->semaphore_count = obj->sync_point_count;
|
||||
graph->pass_count = obj->pass_count;
|
||||
|
||||
for (uint32_t i = 0; i < obj->phys_render_target_count; ++i) {
|
||||
graph->render_targets[i] =
|
||||
obj->platform_cbs.CreateRenderTarget(&obj->phys_render_targets[i]);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < obj->sync_point_count; ++i) {
|
||||
// TODO
|
||||
RT_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||
uint32_t passidx = order[i];
|
||||
size_t namelen = strlen(obj->passes[passidx].name);
|
||||
graph->passes[i].Execute = RT_VERIFY(obj->passes[passidx].Execute);
|
||||
graph->passes[i].user_data = obj->passes[passidx].userdata;
|
||||
graph->passes[i].flags = obj->passes[passidx].flags;
|
||||
graph->passes[i].id = rtCalculateRenderPassID(obj->passes[passidx].name, namelen);
|
||||
graph->passes[i].first_signal = 0;
|
||||
graph->passes[i].signal_count = 0;
|
||||
graph->passes[i].first_wait = 0;
|
||||
graph->passes[i].wait_count = 0;
|
||||
graph->passes[i].execution_level = i;
|
||||
|
||||
graph->passes[i].depth_stencil =
|
||||
(obj->passes[i].depth_stencil_attachment != UINT_MAX)
|
||||
? graph->render_targets[obj->passes[i].depth_stencil_attachment]
|
||||
: (rt_render_target_handle)RT_INVALID_HANDLE;
|
||||
graph->passes[i].depth_stencil_clear_value = obj->passes[i].depth_stencil_clear_value;
|
||||
graph->passes[i].depth_stencil_load = obj->passes[i].depth_stencil_load;
|
||||
graph->passes[i].depth_stencil_write = obj->passes[i].depth_stencil_write;
|
||||
|
||||
graph->passes[i].color_output_count = obj->passes[i].color_attachment_count;
|
||||
if (graph->passes[i].color_output_count) {
|
||||
graph->passes[i].color_outputs = (rt_render_target_handle *)attachment_storage;
|
||||
attachment_storage +=
|
||||
sizeof(rt_render_target_handle) * graph->passes[i].color_output_count;
|
||||
graph->passes[i].color_clear_values = (rt_color *)attachment_storage;
|
||||
attachment_storage += sizeof(rt_color) * graph->passes[i].color_output_count;
|
||||
graph->passes[i].color_loads = (rt_pass_load_mode *)attachment_storage;
|
||||
attachment_storage += sizeof(rt_pass_load_mode) * graph->passes[i].color_output_count;
|
||||
graph->passes[i].color_writes = (rt_pass_write_mode *)attachment_storage;
|
||||
attachment_storage += sizeof(rt_pass_write_mode) * graph->passes[i].color_output_count;
|
||||
|
||||
for (uint32_t j = 0; j < graph->passes[i].color_output_count; ++j) {
|
||||
graph->passes[i].color_outputs[j] =
|
||||
graph->render_targets[obj->passes[i].color_attachments[j]];
|
||||
graph->passes[i].color_clear_values[j] =
|
||||
obj->passes[i].color_attachment_clear_values[j];
|
||||
graph->passes[i].color_loads[j] = obj->passes[i].color_attachment_loads[j];
|
||||
graph->passes[i].color_writes[j] = obj->passes[i].color_attachment_writes[j];
|
||||
}
|
||||
}
|
||||
|
||||
graph->passes[i].sampled_input_count = obj->passes[i].sampled_input_count;
|
||||
if (graph->passes[i].sampled_input_count) {
|
||||
graph->passes[i].sampled_inputs = (rt_render_target_handle *)attachment_storage;
|
||||
attachment_storage +=
|
||||
sizeof(rt_render_target_handle) * graph->passes[i].sampled_input_count;
|
||||
|
||||
for (uint32_t j = 0; j < graph->passes[i].sampled_input_count; ++j) {
|
||||
graph->passes[i].sampled_inputs[j] =
|
||||
graph->render_targets[obj->passes[i].sampled_inputs[j]];
|
||||
}
|
||||
}
|
||||
|
||||
graph->passes[i].name = next_name;
|
||||
next_name += namelen + 1;
|
||||
memcpy((char *)graph->passes[i].name, obj->passes[passidx].name, namelen + 1);
|
||||
}
|
||||
|
||||
graph->backbuffer_index = obj->backbuffer;
|
||||
|
||||
return graph;
|
||||
}
|
||||
|
||||
static rt_result Build(void *_obj, rt_render_graph **p_graph) {
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
uint32_t *naive_order;
|
||||
uint32_t pass_count;
|
||||
rt_result res = CreateNaiveOrder(obj, &naive_order, &pass_count);
|
||||
if (res != RT_SUCCESS)
|
||||
return res;
|
||||
|
||||
res = DeterminePassDependencies(obj);
|
||||
if (res != RT_SUCCESS)
|
||||
return res;
|
||||
|
||||
uint32_t *optimized_order = ReorderPasses(obj, pass_count, naive_order);
|
||||
if (!optimized_order)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
|
||||
DetermineRenderTargetUsage(obj, pass_count, optimized_order);
|
||||
res = GreedyMergeRenderTargets(obj);
|
||||
if (res != RT_SUCCESS)
|
||||
return res;
|
||||
|
||||
if (obj->platform_cbs.RequireExplicitSynchronization()) {
|
||||
res = CreateSynchronizationPoints(obj);
|
||||
if (res != RT_SUCCESS)
|
||||
return res;
|
||||
} else {
|
||||
obj->sync_point_count = 0;
|
||||
}
|
||||
|
||||
*p_graph = CreateRenderGraph(obj, optimized_order);
|
||||
return *p_graph ? RT_SUCCESS : RT_UNKNOWN_ERROR;
|
||||
}
|
||||
|
||||
rt_render_graph_builder
|
||||
rtCreateRenderGraphBuilder(const rt_render_graph_builder_platform_callbacks *platform_cbs) {
|
||||
// TODO: This is totally temporary. We do NOT want malloc anywhere in non-init functions
|
||||
rt_render_graph_builder_obj *obj = malloc(sizeof(*obj));
|
||||
RT_ASSERT(obj, "Failed to allocate the builder object.");
|
||||
memset(obj, 0, sizeof(*obj));
|
||||
rt_create_arena_result arena_res = rtCreateArena(NULL, RT_MB(16));
|
||||
RT_ASSERT(arena_res.ok, "");
|
||||
obj->arena = arena_res.arena;
|
||||
obj->platform_cbs = *platform_cbs;
|
||||
|
||||
return (rt_render_graph_builder){
|
||||
.obj = obj,
|
||||
.AddRenderTarget = AddRenderTarget,
|
||||
.SetBackbuffer = SetBackbuffer,
|
||||
.AddRenderPass = AddRenderPass,
|
||||
.AddColorOutput = AddColorOutput,
|
||||
.AddSampledInput = AddSampledInput,
|
||||
.SetDepthStencilAttachment = SetDepthStencilAttachment,
|
||||
.BindRenderPass = BindRenderPass,
|
||||
.Build = Build,
|
||||
};
|
||||
}
|
||||
|
||||
void rtDestroyRenderGraphBuilder(rt_render_graph_builder *builder) {
|
||||
rt_render_graph_builder_obj *obj = builder->obj;
|
||||
rtReleaseArena(&obj->arena);
|
||||
free(obj);
|
||||
memset(builder, 0, sizeof(*builder));
|
||||
}
|
90
src/renderer/common/common_render_graph.h
Normal file
90
src/renderer/common/common_render_graph.h
Normal file
@ -0,0 +1,90 @@
|
||||
#ifndef RT_RENDERER_COMMON_RENDER_GRAPH_H
|
||||
#define RT_RENDERER_COMMON_RENDER_GRAPH_H
|
||||
|
||||
#include "gfx/renderer_api.h"
|
||||
#include "runtime/mem_arena.h"
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
rt_pixel_format format;
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
|
||||
unsigned int samples;
|
||||
unsigned int layers;
|
||||
|
||||
} rt_physical_render_target_info;
|
||||
|
||||
typedef rt_render_target_handle
|
||||
rt_rgb_create_render_target_fn(const rt_physical_render_target_info *rt_info);
|
||||
typedef int rt_rgb_require_explicit_synchronization_fn(void);
|
||||
|
||||
typedef struct {
|
||||
rt_rgb_create_render_target_fn *CreateRenderTarget;
|
||||
rt_rgb_require_explicit_synchronization_fn *RequireExplicitSynchronization;
|
||||
} rt_render_graph_builder_platform_callbacks;
|
||||
|
||||
typedef struct {
|
||||
uint32_t flags;
|
||||
|
||||
/* Used for cheap referencing */
|
||||
uint32_t id;
|
||||
|
||||
/* Used for debug output */
|
||||
const char *name;
|
||||
|
||||
/* Render targets */
|
||||
rt_render_target_handle *color_outputs;
|
||||
rt_color *color_clear_values;
|
||||
rt_pass_load_mode *color_loads;
|
||||
rt_pass_write_mode *color_writes;
|
||||
uint32_t color_output_count;
|
||||
rt_render_target_handle depth_stencil;
|
||||
rt_depth_stencil_value depth_stencil_clear_value;
|
||||
rt_pass_load_mode depth_stencil_load;
|
||||
rt_pass_write_mode depth_stencil_write;
|
||||
rt_render_target_handle *sampled_inputs;
|
||||
uint32_t sampled_input_count;
|
||||
|
||||
/* Used for parallelisation on the CPU-side. All passes with execution level N can
|
||||
* be recorded in parallel, after passes with level N-1 have finished. */
|
||||
uint32_t execution_level;
|
||||
|
||||
/* GFX layer function for executing the pass */
|
||||
rt_execute_render_pass_fn *Execute;
|
||||
void *user_data;
|
||||
|
||||
/* These refer to the semaphores array */
|
||||
uint32_t first_wait;
|
||||
uint32_t wait_count;
|
||||
uint32_t first_signal;
|
||||
uint32_t signal_count;
|
||||
} rt_render_pass;
|
||||
|
||||
struct rt_render_graph_s {
|
||||
rt_render_target_handle *render_targets;
|
||||
uint32_t render_target_count;
|
||||
|
||||
rt_gpu_semaphore_handle *semaphores;
|
||||
uint32_t semaphore_count;
|
||||
|
||||
rt_render_pass *passes;
|
||||
uint32_t pass_count;
|
||||
|
||||
uint32_t backbuffer_index;
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
rt_render_graph_builder
|
||||
rtCreateRenderGraphBuilder(const rt_render_graph_builder_platform_callbacks *platform_cbs);
|
||||
|
||||
void rtDestroyRenderGraphBuilder(rt_render_graph_builder *builder);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
@ -46,10 +46,7 @@ void ShutdownCommandBufferManagement() {
|
||||
_buffers = nullptr;
|
||||
}
|
||||
|
||||
extern "C" rt_result
|
||||
RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
|
||||
const rt_alloc_command_buffer_info *,
|
||||
rt_command_buffer_handle *p_command_buffers) {
|
||||
rt_result rtAllocCommandBuffers(uint32_t count, rt_command_buffer_handle *p_handles) {
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
rtLockMutex(_lock);
|
||||
rt_command_buffer *slot = _first_free;
|
||||
@ -61,7 +58,7 @@ RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
|
||||
rtLog("dx11", "Failed to allocate a command buffer slot.");
|
||||
rtLockMutex(_lock);
|
||||
for (uint32_t j = 0; j < i; ++j) {
|
||||
rt_command_buffer *s = &_buffers[p_command_buffers[j].index];
|
||||
rt_command_buffer *s = &_buffers[p_handles[j].index];
|
||||
s->next_free = _first_free;
|
||||
_first_free = s;
|
||||
}
|
||||
@ -74,7 +71,7 @@ RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
|
||||
rtLog("dx11", "Failed to create a deferred context.");
|
||||
auto lock_guard = rtAutoLock(_lock);
|
||||
for (uint32_t j = 0; j < i; ++j) {
|
||||
rt_command_buffer *s = &_buffers[p_command_buffers[j].index];
|
||||
rt_command_buffer *s = &_buffers[p_handles[j].index];
|
||||
s->next_free = _first_free;
|
||||
_first_free = s;
|
||||
}
|
||||
@ -93,21 +90,20 @@ RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
|
||||
|
||||
slot->version = (slot->version + 1) % RT_RENDER_BACKEND_HANDLE_MAX_VERSION;
|
||||
const uint32_t index = (uint32_t)(slot - _buffers);
|
||||
p_command_buffers[i].version = slot->version;
|
||||
p_command_buffers[i].index = index;
|
||||
p_handles[i].version = slot->version;
|
||||
p_handles[i].index = index;
|
||||
}
|
||||
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
extern "C" rt_result
|
||||
RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue, const rt_submit_command_buffers_info *info) {
|
||||
rt_result rtSubmitCommandBuffers(uint32_t count, const rt_command_buffer_handle *handles) {
|
||||
// TODO: Handle semaphores
|
||||
|
||||
// Submit the command lists to the gpu
|
||||
for (uint32_t i = 0; i < info->command_buffer_count; ++i) {
|
||||
rt_command_buffer *cmdbuf = &_buffers[info->command_buffers[i].index];
|
||||
if (cmdbuf->version != info->command_buffers[i].version) {
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
rt_command_buffer *cmdbuf = &_buffers[handles[i].index];
|
||||
if (cmdbuf->version != handles[i].version) {
|
||||
rtLog("dx11", "Tried to submit an invalid command buffer (version mismatch)");
|
||||
return RT_INVALID_VALUE;
|
||||
}
|
||||
@ -138,3 +134,15 @@ rt_command_buffer *rtGetCommandBuffer(rt_command_buffer_handle handle) {
|
||||
return nullptr;
|
||||
return &_buffers[handle.index];
|
||||
}
|
||||
|
||||
extern "C" rt_result
|
||||
RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
|
||||
const rt_alloc_command_buffer_info *,
|
||||
rt_command_buffer_handle *p_command_buffers) {
|
||||
return rtAllocCommandBuffers(count, p_command_buffers);
|
||||
}
|
||||
|
||||
extern "C" rt_result
|
||||
RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue, const rt_submit_command_buffers_info *info) {
|
||||
return rtSubmitCommandBuffers(info->command_buffer_count, info->command_buffers);
|
||||
}
|
@ -2,6 +2,7 @@
|
||||
#include <d3d11_1.h>
|
||||
|
||||
#include "gfx/renderer_api.h"
|
||||
#include "runtime/mem_arena.h"
|
||||
|
||||
#include "device_objects.hpp"
|
||||
#include "gpu.hpp"
|
||||
@ -85,3 +86,69 @@ RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdhandle
|
||||
return;
|
||||
RT_UNUSED(render_target);
|
||||
}
|
||||
|
||||
extern "C" void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle cmdhandle,
|
||||
rt_pipeline_handle pipeline_handle) {
|
||||
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
|
||||
if (!RT_VERIFY(cmd))
|
||||
return;
|
||||
rt_pipeline *pipeline = rtGetPipeline(pipeline_handle);
|
||||
|
||||
if (pipeline->IsComputePipeline()) {
|
||||
rtReportError("dx11",
|
||||
"Attempted to bind a compute pipeline via CmdBindPipeline. Use "
|
||||
"CmdBindComputePipeline instead.");
|
||||
return;
|
||||
}
|
||||
|
||||
cmd->context->IASetInputLayout(pipeline->input_layout);
|
||||
cmd->context->VSSetShader(pipeline->vertex_shader, nullptr, 0);
|
||||
cmd->context->PSSetShader(pipeline->pixel_shader, nullptr, 0);
|
||||
}
|
||||
|
||||
extern "C" void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle cmdhandle,
|
||||
uint32_t first_binding,
|
||||
uint32_t count,
|
||||
const rt_buffer_handle *buffers,
|
||||
const uint64_t *_offsets) {
|
||||
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
|
||||
if (!RT_VERIFY(cmd))
|
||||
return;
|
||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
||||
if (!temp.arena)
|
||||
return;
|
||||
|
||||
ID3D11Buffer **vbos = RT_ARENA_PUSH_ARRAY(temp.arena, ID3D11Buffer *, count);
|
||||
UINT *offsets = nullptr;
|
||||
|
||||
if (!vbos)
|
||||
goto out;
|
||||
|
||||
if (_offsets) {
|
||||
offsets = RT_ARENA_PUSH_ARRAY(temp.arena, UINT, count);
|
||||
if (!offsets)
|
||||
goto out;
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
offsets[i] = static_cast<UINT>(_offsets[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < count; ++i) {
|
||||
rt_buffer *buffer = rtGetBuffer(buffers[i]);
|
||||
RT_ASSERT(buffer->type == RT_BUFFER_TYPE_VERTEX, "Buffer must be a vertex buffer");
|
||||
vbos[i] = buffer->buffer;
|
||||
}
|
||||
|
||||
cmd->context->IASetVertexBuffers(first_binding, count, vbos, nullptr, offsets);
|
||||
|
||||
out:
|
||||
rtReturnTemporaryArena(temp);
|
||||
}
|
||||
|
||||
extern "C" void
|
||||
RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle cmdhandle, uint32_t first, uint32_t count) {
|
||||
rt_command_buffer *cmd = rtGetCommandBuffer(cmdhandle);
|
||||
if (!RT_VERIFY(cmd))
|
||||
return;
|
||||
cmd->context->Draw(count, first);
|
||||
}
|
||||
|
@ -73,6 +73,19 @@ struct rt_pipeline {
|
||||
}
|
||||
};
|
||||
|
||||
struct rt_render_target_create_info {
|
||||
rt_pixel_format format;
|
||||
uint32_t width;
|
||||
uint32_t height;
|
||||
const char *name;
|
||||
};
|
||||
|
||||
rt_render_target_handle rtCreateRenderTarget(const rt_render_target_create_info &info);
|
||||
void rtDestroyRenderTarget(rt_render_target_handle handle);
|
||||
|
||||
rt_result rtAllocCommandBuffers(uint32_t count, rt_command_buffer_handle *p_handles);
|
||||
rt_result rtSubmitCommandBuffers(uint32_t count, const rt_command_buffer_handle *handles);
|
||||
|
||||
rt_render_target *rtGetRenderTarget(rt_render_target_handle handle);
|
||||
rt_command_buffer *rtGetCommandBuffer(rt_command_buffer_handle handle);
|
||||
rt_buffer *rtGetBuffer(rt_buffer_handle handle);
|
||||
|
@ -26,6 +26,9 @@ DXGI_FORMAT rtConvertPixelFormat(rt_pixel_format format) {
|
||||
case RT_PIXEL_FORMAT_DEPTH32:
|
||||
return DXGI_FORMAT_D32_FLOAT;
|
||||
|
||||
case RT_PIXEL_FORMAT_SWAPCHAIN:
|
||||
return DXGI_FORMAT_B8G8R8A8_UNORM;
|
||||
|
||||
default:
|
||||
return DXGI_FORMAT_UNKNOWN;
|
||||
}
|
||||
|
@ -6,13 +6,19 @@ if get_option('build_dx11')
|
||||
'device_objects.hpp',
|
||||
'gpu.hpp',
|
||||
|
||||
'../common/common_render_graph.h',
|
||||
|
||||
'buffers.cpp',
|
||||
'commands.cpp',
|
||||
'command_buffers.cpp',
|
||||
'helpers.cpp',
|
||||
'init.cpp',
|
||||
'pipelines.cpp',
|
||||
'render_graph.cpp',
|
||||
'render_targets.cpp',
|
||||
|
||||
'../common/common_render_graph.c',
|
||||
|
||||
dependencies : [m_dep, windowing_dep, dx11_dep],
|
||||
include_directories : [engine_incdir, contrib_incdir],
|
||||
link_with : [runtime_lib],
|
||||
|
149
src/renderer/dx11/render_graph.cpp
Normal file
149
src/renderer/dx11/render_graph.cpp
Normal file
@ -0,0 +1,149 @@
|
||||
#include "gfx/renderer_api.h"
|
||||
#include "renderer/common/common_render_graph.h"
|
||||
|
||||
#include "device_objects.hpp"
|
||||
#include "gpu.hpp"
|
||||
|
||||
static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *rtinfo) {
|
||||
return rtCreateRenderTarget({.format = rtinfo->format,
|
||||
.width = rtinfo->width,
|
||||
.height = rtinfo->height,
|
||||
.name = rtinfo->name});
|
||||
}
|
||||
|
||||
static int RequireExplicitSynchronization() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
extern "C" rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
|
||||
rt_render_graph_builder_platform_callbacks cbs{};
|
||||
cbs.CreateRenderTarget = CreateRenderTarget;
|
||||
cbs.RequireExplicitSynchronization = RequireExplicitSynchronization;
|
||||
return rtCreateRenderGraphBuilder(&cbs);
|
||||
}
|
||||
|
||||
extern "C" void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) {
|
||||
rtDestroyRenderGraphBuilder(builder);
|
||||
}
|
||||
|
||||
static rt_result ExecutePass(rt_render_pass *pass, rt_command_buffer_handle cmdbuf_handle) {
|
||||
rt_command_buffer *cmd = rtGetCommandBuffer(cmdbuf_handle);
|
||||
if (!RT_VERIFY(cmd))
|
||||
return RT_INVALID_VALUE;
|
||||
|
||||
if (cmd->annotation) {
|
||||
WCHAR wname[128];
|
||||
if (rtUTF8ToWStr(pass->name, wname, sizeof(wname)) == RT_SUCCESS)
|
||||
cmd->annotation->BeginEvent(wname);
|
||||
}
|
||||
|
||||
// Setup rtvs
|
||||
ID3D11RenderTargetView *rtvs[4];
|
||||
ID3D11DepthStencilView *dsv = nullptr;
|
||||
|
||||
for (uint32_t i = 0; i < pass->color_output_count; ++i) {
|
||||
rt_render_target *rt = rtGetRenderTarget(pass->color_outputs[i]);
|
||||
if (!RT_VERIFY(rt))
|
||||
return RT_INVALID_VALUE;
|
||||
RT_ASSERT(rt->IsColorRenderTarget(), "Needs to provide a valid color render target");
|
||||
rtvs[i] = rt->rtv;
|
||||
|
||||
if (pass->color_loads[i] == RT_PASS_LOAD_MODE_CLEAR) {
|
||||
FLOAT color[4] = {
|
||||
pass->color_clear_values[i].r,
|
||||
pass->color_clear_values[i].g,
|
||||
pass->color_clear_values[i].b,
|
||||
pass->color_clear_values[i].a,
|
||||
};
|
||||
cmd->context->ClearRenderTargetView(rt->rtv, color);
|
||||
}
|
||||
}
|
||||
|
||||
rt_render_target *dsvrt = rtGetRenderTarget(pass->depth_stencil);
|
||||
if (dsvrt) {
|
||||
RT_ASSERT(dsvrt->IsDepthStencilTarget(),
|
||||
"Need to provide a valid depth stencil render target");
|
||||
dsv = dsvrt->dsv;
|
||||
|
||||
if (pass->depth_stencil_load == RT_PASS_LOAD_MODE_CLEAR)
|
||||
cmd->context->ClearDepthStencilView(
|
||||
dsv,
|
||||
(dsvrt->HasStencilComponent()) ? D3D11_CLEAR_DEPTH | D3D11_CLEAR_STENCIL
|
||||
: D3D11_CLEAR_DEPTH,
|
||||
pass->depth_stencil_clear_value.depth,
|
||||
static_cast<UINT8>(pass->depth_stencil_clear_value.stencil));
|
||||
}
|
||||
|
||||
cmd->context->OMSetRenderTargets(static_cast<UINT>(pass->color_output_count), rtvs, dsv);
|
||||
|
||||
rt_result res = RT_VERIFY(pass->Execute)(cmdbuf_handle, nullptr, 0, pass->user_data);
|
||||
|
||||
if (cmd->annotation) {
|
||||
cmd->annotation->EndEvent();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static bool IsCopyResourcePossible(const rt_render_target *backbuffer) {
|
||||
DXGI_SWAP_CHAIN_DESC scd;
|
||||
g_gpu.swap_chain.swap_chain->GetDesc(&scd);
|
||||
|
||||
D3D11_TEXTURE2D_DESC td;
|
||||
backbuffer->texture->GetDesc(&td);
|
||||
|
||||
// This is more strict than necessary, because the formats could also be from the same group
|
||||
return scd.BufferDesc.Width == td.Width && scd.BufferDesc.Height == td.Height &&
|
||||
scd.SampleDesc.Count == td.SampleDesc.Count && scd.BufferDesc.Format == td.Format;
|
||||
}
|
||||
|
||||
extern "C" rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph) {
|
||||
rt_temp_arena temp = rtGetTemporaryArena(NULL, 0);
|
||||
if (!temp.arena)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
|
||||
// Alloc a command buffer for every pass
|
||||
rt_command_buffer_handle *cmdbufs =
|
||||
RT_ARENA_PUSH_ARRAY(temp.arena, rt_command_buffer_handle, render_graph->pass_count);
|
||||
rt_result res = rtAllocCommandBuffers(render_graph->pass_count, cmdbufs);
|
||||
if (res != RT_SUCCESS) {
|
||||
rtReturnTemporaryArena(temp);
|
||||
return res;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < render_graph->pass_count; ++i) {
|
||||
rt_render_pass *pass = &render_graph->passes[i];
|
||||
|
||||
res = ExecutePass(pass, cmdbufs[i]);
|
||||
if (res != RT_SUCCESS)
|
||||
break;
|
||||
}
|
||||
|
||||
if (res == RT_SUCCESS) {
|
||||
res = rtSubmitCommandBuffers(render_graph->pass_count, cmdbufs);
|
||||
}
|
||||
|
||||
// Copy backbuffer to swapchain
|
||||
rt_render_target *backbuffer =
|
||||
rtGetRenderTarget(render_graph->render_targets[render_graph->backbuffer_index]);
|
||||
if (!backbuffer) {
|
||||
rtReturnTemporaryArena(temp);
|
||||
return RT_INVALID_VALUE;
|
||||
}
|
||||
|
||||
ID3D11Texture2D *frame_buffer;
|
||||
if (FAILED(g_gpu.swap_chain.swap_chain->GetBuffer(0, IID_PPV_ARGS(&frame_buffer)))) {
|
||||
rtReportError("dx11", "Failed to retrieve the backbuffer.");
|
||||
rtReturnTemporaryArena(temp);
|
||||
return RT_UNKNOWN_ERROR;
|
||||
}
|
||||
|
||||
if (IsCopyResourcePossible(backbuffer)) {
|
||||
g_gpu.device_context->CopyResource(frame_buffer, backbuffer->texture);
|
||||
} else {
|
||||
RT_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
rtReturnTemporaryArena(temp);
|
||||
return res;
|
||||
}
|
@ -55,8 +55,7 @@ void ShutdownRenderTargetManagement() {
|
||||
rtDestroyMutex(_lock);
|
||||
}
|
||||
|
||||
extern "C" rt_render_target_handle
|
||||
RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) {
|
||||
rt_render_target_handle rtCreateRenderTarget(const rt_render_target_create_info &info) {
|
||||
rt_render_target *slot = nullptr;
|
||||
{
|
||||
auto lock_guard = rtAutoLock(_lock);
|
||||
@ -71,15 +70,27 @@ RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) {
|
||||
return RT_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
slot->format = info->format;
|
||||
slot->format = info.format;
|
||||
|
||||
if (!rtIsDepthFormat(info->format)) {
|
||||
uint32_t swapchain_width = 0, swapchain_height = 0;
|
||||
if (info.width == RT_RENDER_TARGET_SIZE_SWAPCHAIN ||
|
||||
info.height == RT_RENDER_TARGET_SIZE_SWAPCHAIN) {
|
||||
|
||||
DXGI_SWAP_CHAIN_DESC desc;
|
||||
g_gpu.swap_chain.swap_chain->GetDesc(&desc);
|
||||
swapchain_width = desc.BufferDesc.Width;
|
||||
swapchain_height = desc.BufferDesc.Height;
|
||||
}
|
||||
|
||||
if (!rtIsDepthFormat(info.format)) {
|
||||
D3D11_TEXTURE2D_DESC tex_desc = {};
|
||||
tex_desc.Width = info->width;
|
||||
tex_desc.Height = info->height;
|
||||
tex_desc.Width =
|
||||
(info.width != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.width : swapchain_width;
|
||||
tex_desc.Height =
|
||||
(info.height != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.height : swapchain_height;
|
||||
tex_desc.MipLevels = 1;
|
||||
tex_desc.ArraySize = 1;
|
||||
tex_desc.Format = rtConvertPixelFormat(info->format);
|
||||
tex_desc.Format = rtConvertPixelFormat(info.format);
|
||||
tex_desc.SampleDesc.Count = 1;
|
||||
tex_desc.SampleDesc.Quality = 0;
|
||||
tex_desc.Usage = D3D11_USAGE_DEFAULT; // read and write
|
||||
@ -87,7 +98,7 @@ RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) {
|
||||
tex_desc.CPUAccessFlags = 0; // none
|
||||
tex_desc.MiscFlags = 0;
|
||||
if (FAILED(g_gpu.device->CreateTexture2D(&tex_desc, nullptr, &slot->texture))) {
|
||||
rtLog("dx11", "Failed to create backing texture for render target %s", info->name);
|
||||
rtLog("dx11", "Failed to create backing texture for render target %s", info.name);
|
||||
auto lg = rtAutoLock(_lock);
|
||||
slot->next_free = _first_free;
|
||||
_first_free = slot;
|
||||
@ -95,14 +106,14 @@ RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) {
|
||||
}
|
||||
|
||||
D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = {};
|
||||
rtv_desc.Format = rtConvertPixelFormat(info->format);
|
||||
rtv_desc.Format = rtConvertPixelFormat(info.format);
|
||||
rtv_desc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
|
||||
rtv_desc.Texture2D.MipSlice = 0;
|
||||
if (FAILED(g_gpu.device->CreateRenderTargetView(slot->texture, &rtv_desc, &slot->rtv))) {
|
||||
slot->texture->Release();
|
||||
rtLog("dx11",
|
||||
"Failed to create the render target view for render target %s",
|
||||
info->name);
|
||||
info.name);
|
||||
auto lg = rtAutoLock(_lock);
|
||||
slot->next_free = _first_free;
|
||||
_first_free = slot;
|
||||
@ -114,11 +125,13 @@ RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) {
|
||||
return {.version = slot->version, .index = index};
|
||||
} else {
|
||||
D3D11_TEXTURE2D_DESC tex_desc = {};
|
||||
tex_desc.Width = info->width;
|
||||
tex_desc.Height = info->height;
|
||||
tex_desc.Width =
|
||||
(info.width != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.width : swapchain_width;
|
||||
tex_desc.Height =
|
||||
(info.height != RT_RENDER_TARGET_SIZE_SWAPCHAIN) ? info.height : swapchain_height;
|
||||
tex_desc.MipLevels = 1;
|
||||
tex_desc.ArraySize = 1;
|
||||
tex_desc.Format = rtConvertPixelFormat(info->format);
|
||||
tex_desc.Format = rtConvertPixelFormat(info.format);
|
||||
tex_desc.SampleDesc.Count = 1;
|
||||
tex_desc.SampleDesc.Quality = 0;
|
||||
tex_desc.Usage = D3D11_USAGE_DEFAULT; // read and write
|
||||
@ -126,7 +139,7 @@ RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) {
|
||||
tex_desc.CPUAccessFlags = 0; // none
|
||||
tex_desc.MiscFlags = 0;
|
||||
if (FAILED(g_gpu.device->CreateTexture2D(&tex_desc, nullptr, &slot->texture))) {
|
||||
rtLog("dx11", "Failed to create backing texture for render target %s", info->name);
|
||||
rtLog("dx11", "Failed to create backing texture for render target %s", info.name);
|
||||
auto lg = rtAutoLock(_lock);
|
||||
slot->next_free = _first_free;
|
||||
_first_free = slot;
|
||||
@ -134,7 +147,7 @@ RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) {
|
||||
}
|
||||
|
||||
D3D11_DEPTH_STENCIL_VIEW_DESC dsv_desc = {};
|
||||
dsv_desc.Format = rtConvertPixelFormat(info->format);
|
||||
dsv_desc.Format = rtConvertPixelFormat(info.format);
|
||||
dsv_desc.Flags = 0;
|
||||
dsv_desc.ViewDimension = D3D11_DSV_DIMENSION_TEXTURE2D;
|
||||
dsv_desc.Texture2D.MipSlice = 0;
|
||||
@ -142,7 +155,7 @@ RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) {
|
||||
slot->texture->Release();
|
||||
rtLog("dx11",
|
||||
"Failed to create the depth stencil view for render target %s",
|
||||
info->name);
|
||||
info.name);
|
||||
auto lg = rtAutoLock(_lock);
|
||||
slot->next_free = _first_free;
|
||||
_first_free = slot;
|
||||
@ -155,11 +168,7 @@ RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) {
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" rt_render_target_handle RT_RENDERER_API_FN(GetSwapchainRenderTarget)(void) {
|
||||
return {1, 1};
|
||||
}
|
||||
|
||||
extern "C" void RT_RENDERER_API_FN(DestroyRenderTarget)(rt_render_target_handle handle) {
|
||||
void rtDestroyRenderTarget(rt_render_target_handle handle) {
|
||||
RT_UNUSED(handle);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
null_renderer_lib = library('rtnull',
|
||||
'null.c',
|
||||
# Project Sources
|
||||
'../common/common_render_graph.c',
|
||||
include_directories : engine_incdir,
|
||||
link_with : runtime_lib,
|
||||
install : true)
|
||||
|
@ -4,6 +4,8 @@
|
||||
#include "gfx/renderer_api.h"
|
||||
#include "runtime/runtime.h"
|
||||
|
||||
#include "../common/common_render_graph.h"
|
||||
|
||||
#define RETURN_HANDLE_STUB2(type, initial) \
|
||||
static unsigned int s_next = (initial); \
|
||||
return (type) { .index = (s_next++) % RT_RENDER_BACKEND_HANDLE_MAX_INDEX, .version = 1 }
|
||||
@ -51,19 +53,6 @@ void RT_RENDERER_API_FN(DestroyPipeline)(rt_pipeline_handle handle) {
|
||||
RT_UNUSED(handle);
|
||||
}
|
||||
|
||||
rt_render_target_handle RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) {
|
||||
RT_UNUSED(info);
|
||||
RETURN_HANDLE_STUB2(rt_render_target_handle, 2);
|
||||
}
|
||||
|
||||
rt_render_target_handle RT_RENDERER_API_FN(GetSwapchainRenderTarget)(void) {
|
||||
return (rt_render_target_handle){.index = 1, .version = 1};
|
||||
}
|
||||
|
||||
void RT_RENDERER_API_FN(DestroyRenderTarget)(rt_render_target_handle handle) {
|
||||
RT_UNUSED(handle);
|
||||
}
|
||||
|
||||
rt_result RT_RENDERER_API_FN(AllocCommandBuffers)(uint32_t count,
|
||||
const rt_alloc_command_buffer_info *info,
|
||||
rt_command_buffer_handle *p_command_buffers) {
|
||||
@ -79,34 +68,6 @@ rt_result RT_RENDERER_API_FN(SubmitCommandBuffers)(rt_gpu_queue queue,
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
rt_result RT_RENDERER_API_FN(CreateSemaphores)(uint32_t count,
|
||||
const rt_gpu_semaphore_info *info,
|
||||
rt_gpu_semaphore_handle *p_semaphores) {
|
||||
RT_UNUSED(info);
|
||||
RETURN_HANDLE_ARRAY_STUB2(p_semaphores, count, 3)
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
void RT_RENDERER_API_FN(DestroySemaphores)(uint32_t count, rt_gpu_semaphore_handle *semaphores) {
|
||||
RT_UNUSED(count);
|
||||
RT_UNUSED(semaphores);
|
||||
}
|
||||
|
||||
/* NOTE(Kevin): It might become necessary to actually track the value, to correctly simulate gpu
|
||||
* behaviour */
|
||||
uint64_t RT_RENDERER_API_FN(GetSemaphoreValue)(rt_gpu_semaphore_handle sem) {
|
||||
RT_UNUSED(sem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetSwapchainAvailableSemaphore)(void) {
|
||||
return (rt_gpu_semaphore_handle){.index = 1, .version = 1};
|
||||
}
|
||||
|
||||
rt_gpu_semaphore_handle RT_RENDERER_API_FN(GetRenderFinishedSemaphore)(void) {
|
||||
return (rt_gpu_semaphore_handle){.index = 2, .version = 1};
|
||||
}
|
||||
|
||||
rt_result RT_RENDERER_API_FN(CreateBuffers)(uint32_t count,
|
||||
const rt_buffer_info *info,
|
||||
rt_buffer_handle *p_buffers) {
|
||||
@ -144,13 +105,41 @@ void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdb
|
||||
RT_UNUSED(render_target);
|
||||
}
|
||||
|
||||
static rt_render_target_handle CreateRenderTarget(const rt_physical_render_target_info *info) {
|
||||
RETURN_HANDLE_STUB(rt_render_target_handle);
|
||||
}
|
||||
|
||||
static int RequireExplicitSync(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
|
||||
rt_render_graph_builder b = {
|
||||
.obj = NULL,
|
||||
};
|
||||
return b;
|
||||
rt_render_graph_builder_platform_callbacks cbs = {.CreateRenderTarget = CreateRenderTarget,
|
||||
.RequireExplicitSynchronization =
|
||||
RequireExplicitSync};
|
||||
return rtCreateRenderGraphBuilder(&cbs);
|
||||
}
|
||||
|
||||
void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) {
|
||||
RT_UNUSED(builder);
|
||||
rtDestroyRenderGraphBuilder(builder);
|
||||
}
|
||||
|
||||
rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph) {
|
||||
RT_UNUSED(render_graph);
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
void RT_RENDERER_API_FN(CmdBindPipeline)(rt_command_buffer_handle cmdhandle,
|
||||
rt_pipeline_handle pipeline_handle) {
|
||||
}
|
||||
|
||||
void RT_RENDERER_API_FN(CmdBindVertexBuffers)(rt_command_buffer_handle cmdhandle,
|
||||
uint32_t first_binding,
|
||||
uint32_t count,
|
||||
const rt_buffer_handle *buffers,
|
||||
const uint64_t *_offsets) {
|
||||
}
|
||||
void RT_RENDERER_API_FN(CmdDraw)(rt_command_buffer_handle cmdhandle,
|
||||
uint32_t first,
|
||||
uint32_t count) {
|
||||
}
|
@ -448,7 +448,7 @@ void RT_RENDERER_API_FN(CmdTransitionRenderTarget)(rt_command_buffer_handle cmdb
|
||||
rt_render_target_state new_state) {
|
||||
GET_CMDBUF(cmdbuf, cmdbuf_handle)
|
||||
uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
|
||||
if (render_target.index == g_renderer.GetSwapchainRenderTarget().index) {
|
||||
if (render_target.index == rtGetSwapchainRenderTarget().index) {
|
||||
image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index;
|
||||
}
|
||||
|
||||
@ -468,7 +468,7 @@ void RT_RENDERER_API_FN(CmdFlushRenderTargetWrite)(rt_command_buffer_handle cmdb
|
||||
rt_render_target_handle render_target) {
|
||||
GET_CMDBUF(cmdbuf, cmdbuf_handle)
|
||||
uint32_t image_index = g_gpu.current_frame_id % g_gpu.max_frames_in_flight;
|
||||
if (render_target.index == g_renderer.GetSwapchainRenderTarget().index) {
|
||||
if (render_target.index == rtGetSwapchainRenderTarget().index) {
|
||||
image_index = rtGetFrameData(g_gpu.current_frame_id)->swapchain_image_index;
|
||||
}
|
||||
rt_render_target *rt = rtGetRenderTarget(render_target);
|
||||
|
@ -44,7 +44,7 @@ void RT_RENDERER_API_FN(BeginFrame)(unsigned int frame_id) {
|
||||
}
|
||||
|
||||
/* Update the swapchain render target */
|
||||
rt_render_target_handle swap_rt_handle = g_renderer.GetSwapchainRenderTarget();
|
||||
rt_render_target_handle swap_rt_handle = rtGetSwapchainRenderTarget();
|
||||
rt_render_target *swap_rt = rtGetRenderTarget(swap_rt_handle);
|
||||
swap_rt->states[frame->swapchain_image_index] = RT_RENDER_TARGET_STATE_INVALID;
|
||||
}
|
||||
|
@ -17,6 +17,8 @@ if vk_dep.found()
|
||||
'swapchain.h',
|
||||
'transfers.h',
|
||||
|
||||
'../common/common_render_graph.h',
|
||||
|
||||
'buffers.c',
|
||||
'command_buffers.c',
|
||||
'commands.c',
|
||||
@ -32,6 +34,8 @@ if vk_dep.found()
|
||||
|
||||
'simple_sync_impl.cpp',
|
||||
|
||||
'../common/common_render_graph.c',
|
||||
|
||||
# Contrib Sources
|
||||
'../../../contrib/volk/volk.h',
|
||||
'../../../contrib/volk/volk.c',
|
||||
|
@ -3,502 +3,25 @@
|
||||
#include "gfx/renderer_api.h"
|
||||
#include "runtime/mem_arena.h"
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "../common/common_render_graph.h"
|
||||
#include "render_targets.h"
|
||||
|
||||
/* ****************************************************************************
|
||||
*
|
||||
* BUILDER CODE
|
||||
*
|
||||
* ****************************************************************************/
|
||||
|
||||
#define MAX_COLOR_ATTACHMENTS_PER_PASS 8
|
||||
#define MAX_SAMPLED_INPUTS_PER_PASS 8
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
rt_pixel_format format;
|
||||
unsigned int width;
|
||||
unsigned int height;
|
||||
|
||||
unsigned int samples;
|
||||
unsigned int layers;
|
||||
|
||||
uint32_t first_usage;
|
||||
uint32_t last_usage;
|
||||
} rt_render_target_build_info;
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
uint32_t flags;
|
||||
|
||||
uint32_t color_attachments[MAX_COLOR_ATTACHMENTS_PER_PASS];
|
||||
uint32_t color_attachment_count;
|
||||
|
||||
uint32_t sampled_inputs[MAX_SAMPLED_INPUTS_PER_PASS];
|
||||
uint32_t sampled_input_count;
|
||||
|
||||
uint32_t depth_stencil_attachment;
|
||||
|
||||
uint32_t *dependencies;
|
||||
uint32_t dependency_count;
|
||||
} rt_pass_build_info;
|
||||
|
||||
typedef struct {
|
||||
rt_arena arena;
|
||||
|
||||
rt_render_target_build_info *render_targets;
|
||||
uint32_t render_target_count;
|
||||
uint32_t render_target_capacity;
|
||||
|
||||
rt_pass_build_info *passes;
|
||||
uint32_t pass_count;
|
||||
uint32_t pass_capacity;
|
||||
|
||||
uint32_t backbuffer;
|
||||
} rt_render_graph_builder_obj;
|
||||
|
||||
static void AddRenderTarget(void *_obj, const rt_attachment_info *info) {
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
|
||||
if (obj->render_target_count == obj->render_target_capacity) {
|
||||
uint32_t new_cap = obj->render_target_capacity ? 2 * obj->render_target_capacity : 32;
|
||||
rt_render_target_build_info *tmp =
|
||||
RT_ARENA_PUSH_ARRAY(&obj->arena, rt_render_target_build_info, new_cap);
|
||||
if (obj->render_target_capacity)
|
||||
memcpy(tmp,
|
||||
obj->render_targets,
|
||||
sizeof(rt_render_target_build_info) * obj->render_target_capacity);
|
||||
obj->render_targets = tmp;
|
||||
obj->render_target_capacity = new_cap;
|
||||
}
|
||||
|
||||
char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1);
|
||||
strcpy(name, info->name);
|
||||
obj->render_targets[obj->render_target_count].name = name;
|
||||
obj->render_targets[obj->render_target_count].format = info->format;
|
||||
obj->render_targets[obj->render_target_count].width = info->width;
|
||||
obj->render_targets[obj->render_target_count].height = info->height;
|
||||
obj->render_targets[obj->render_target_count].samples = info->samples;
|
||||
obj->render_targets[obj->render_target_count].layers = info->layers;
|
||||
obj->render_targets[obj->render_target_count].first_usage = 0;
|
||||
obj->render_targets[obj->render_target_count].last_usage = 0;
|
||||
++obj->render_target_count;
|
||||
}
|
||||
|
||||
static void SetBackbuffer(void *_obj, const char *rt_name) {
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
||||
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
||||
obj->backbuffer = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
rtLog("vk", "Tried to set backbuffer to unknown render target %s", rt_name);
|
||||
}
|
||||
|
||||
static void AddRenderPass(void *_obj, const rt_pass_info *info) {
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
|
||||
if (obj->pass_count == obj->pass_capacity) {
|
||||
uint32_t new_cap = obj->pass_capacity ? 2 * obj->pass_capacity : 32;
|
||||
rt_pass_build_info *tmp = RT_ARENA_PUSH_ARRAY(&obj->arena, rt_pass_build_info, new_cap);
|
||||
if (obj->pass_capacity)
|
||||
memcpy(tmp, obj->passes, sizeof(rt_pass_build_info) * obj->pass_capacity);
|
||||
obj->passes = tmp;
|
||||
obj->pass_capacity = new_cap;
|
||||
}
|
||||
|
||||
char *name = rtArenaPush(&obj->arena, strlen(info->name) + 1);
|
||||
strcpy(name, info->name);
|
||||
obj->passes[obj->pass_count].name = name;
|
||||
obj->passes[obj->pass_count].flags = info->flags;
|
||||
obj->passes[obj->pass_count].color_attachment_count = 0;
|
||||
obj->passes[obj->pass_count].sampled_input_count = 0;
|
||||
obj->passes[obj->pass_count].depth_stencil_attachment = UINT_MAX;
|
||||
obj->passes[obj->pass_count].dependencies = NULL;
|
||||
obj->passes[obj->pass_count].dependency_count = 0;
|
||||
|
||||
++obj->pass_count;
|
||||
}
|
||||
|
||||
static void AddColorOutput(void *_obj, const char *pass_name, const char *rt_name) {
|
||||
uint32_t rt_index = UINT_MAX;
|
||||
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
||||
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
||||
rt_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (rt_index == UINT_MAX) {
|
||||
rtLog("vk",
|
||||
"Tried to add unknown render target %s as color output to %s",
|
||||
rt_name,
|
||||
pass_name);
|
||||
return;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
||||
if (obj->passes[i].color_attachment_count == MAX_COLOR_ATTACHMENTS_PER_PASS) {
|
||||
rtLog("vk", "Too many color attachments in pass %s", pass_name);
|
||||
}
|
||||
obj->passes[i].color_attachments[obj->passes[i].color_attachment_count++] = rt_index;
|
||||
return;
|
||||
}
|
||||
}
|
||||
rtLog("vk",
|
||||
"Tried to add render target %s as color output to unknown render target %s",
|
||||
rt_name,
|
||||
pass_name);
|
||||
}
|
||||
|
||||
static void AddSampledInput(void *_obj, const char *pass_name, const char *rt_name) {
|
||||
uint32_t rt_index = UINT_MAX;
|
||||
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
||||
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
||||
rt_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (rt_index == UINT_MAX) {
|
||||
rtLog("vk",
|
||||
"Tried to add unknown render target %s as color output to %s",
|
||||
rt_name,
|
||||
pass_name);
|
||||
return;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
||||
if (obj->passes[i].sampled_input_count == MAX_SAMPLED_INPUTS_PER_PASS) {
|
||||
rtLog("vk", "Too many sampled inputs in pass %s", pass_name);
|
||||
}
|
||||
obj->passes[i].sampled_inputs[obj->passes[i].sampled_input_count++] = rt_index;
|
||||
return;
|
||||
}
|
||||
}
|
||||
rtLog("vk",
|
||||
"Tried to add render target %s as sampled input to unknown render target %s",
|
||||
rt_name,
|
||||
pass_name);
|
||||
}
|
||||
|
||||
static void SetDepthStencilAttachment(void *_obj, const char *pass_name, const char *rt_name) {
|
||||
uint32_t rt_index = UINT_MAX;
|
||||
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
for (uint32_t i = 0; i < obj->render_target_count; ++i) {
|
||||
if (strcmp(obj->render_targets[i].name, rt_name) == 0) {
|
||||
rt_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (rt_index == UINT_MAX) {
|
||||
rtLog("vk",
|
||||
"Tried to add unknown render target %s as depth stencil attachment to %s",
|
||||
rt_name,
|
||||
pass_name);
|
||||
return;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||
if (strcmp(obj->passes[i].name, pass_name) == 0) {
|
||||
obj->passes[i].depth_stencil_attachment = rt_index;
|
||||
return;
|
||||
}
|
||||
}
|
||||
rtLog("vk",
|
||||
"Tried to add render target %s as depth stencil attachment to unknown render target %s",
|
||||
rt_name,
|
||||
pass_name);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
uint32_t added;
|
||||
uint32_t moved;
|
||||
} rt_find_writers_result;
|
||||
|
||||
static rt_find_writers_result FindWriters(rt_render_graph_builder_obj *obj,
|
||||
uint32_t rt_index,
|
||||
uint32_t append_at,
|
||||
uint32_t *p_passes) {
|
||||
rt_find_writers_result res = {0, 0};
|
||||
for (uint32_t i = 0; i < obj->pass_count; ++i) {
|
||||
bool writes_rt = false;
|
||||
if (obj->passes[i].depth_stencil_attachment == rt_index) {
|
||||
writes_rt = true;
|
||||
} else {
|
||||
for (uint32_t j = 0; j < obj->passes[i].color_attachment_count; ++j) {
|
||||
if (obj->passes[i].color_attachments[j] == rt_index) {
|
||||
writes_rt = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!writes_rt)
|
||||
continue;
|
||||
|
||||
uint32_t lower_index = UINT32_MAX;
|
||||
for (uint32_t j = 0; j < append_at; ++j) {
|
||||
if (p_passes[j] == i) {
|
||||
lower_index = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (lower_index == UINT32_MAX) {
|
||||
p_passes[append_at++] = i;
|
||||
res.added++;
|
||||
} else {
|
||||
memmove(&p_passes[lower_index],
|
||||
&p_passes[lower_index + 1],
|
||||
(append_at - lower_index - 1) * sizeof(uint32_t));
|
||||
p_passes[append_at - 1] = i;
|
||||
res.moved++;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static uint32_t CreateNaiveOrderRec(rt_render_graph_builder_obj *obj,
|
||||
uint32_t search_rt,
|
||||
uint32_t append_at,
|
||||
uint32_t *p_order) {
|
||||
rt_find_writers_result writers = FindWriters(obj, search_rt, append_at, p_order);
|
||||
uint32_t new_append = append_at + writers.added;
|
||||
for (uint32_t i = 0; i < writers.moved; ++i) {
|
||||
uint32_t pass_idx = p_order[append_at - writers.moved + i];
|
||||
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||
for (uint32_t j = 0; j < pass->sampled_input_count; ++j) {
|
||||
new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order);
|
||||
}
|
||||
}
|
||||
for (uint32_t i = 0; i < writers.added; ++i) {
|
||||
uint32_t pass_idx = p_order[append_at + i];
|
||||
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||
for (uint32_t j = 0; j < pass->sampled_input_count; ++j) {
|
||||
new_append = CreateNaiveOrderRec(obj, pass->sampled_inputs[j], new_append, p_order);
|
||||
}
|
||||
}
|
||||
return new_append;
|
||||
}
|
||||
|
||||
static rt_result
|
||||
CreateNaiveOrder(rt_render_graph_builder_obj *obj, uint32_t **p_order, uint32_t *p_count) {
|
||||
uint32_t *order = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, obj->pass_count);
|
||||
if (!order)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
uint32_t count = CreateNaiveOrderRec(obj, obj->backbuffer, 0, order);
|
||||
|
||||
/* Now the pass writing the backbuffer is first, we need to revert the order */
|
||||
for (uint32_t i = 0; i < count / 2; ++i) {
|
||||
uint32_t t = order[i];
|
||||
order[i] = order[count - i - 1];
|
||||
order[count - i - 1] = t;
|
||||
}
|
||||
*p_order = order;
|
||||
*p_count = count;
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
static uint32_t *
|
||||
ReorderPasses(rt_render_graph_builder_obj *obj, uint32_t pass_count, uint32_t *naive_order) {
|
||||
/* Our goal is to calculate a schedule that:
|
||||
* A) Does not break the dependency chain
|
||||
* B) Has the maximum amount of overlap, i.e. keeps the GPU busy.
|
||||
* This means that if pass A depends on pass B, we want to have as much passes inbetween as
|
||||
* possible, to reduce the likelyhood of stalls caused by B waiting for A to finish. */
|
||||
uint32_t *schedule = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, pass_count);
|
||||
if (!schedule)
|
||||
return NULL;
|
||||
uint32_t scheduled_count = 0;
|
||||
|
||||
while (scheduled_count < pass_count) {
|
||||
/* The number of passes remaining in naive_order */
|
||||
uint32_t unscheduled_count = pass_count - scheduled_count;
|
||||
|
||||
/* It is always valid to use the front */
|
||||
uint32_t selected_idx = 0;
|
||||
uint32_t selected_score = 0;
|
||||
for (uint32_t i = 0; i < unscheduled_count; ++i) {
|
||||
/* Check if any dependency is not scheduled yet */
|
||||
uint32_t pass_idx = naive_order[i];
|
||||
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||
uint32_t score = 0;
|
||||
bool is_valid = true;
|
||||
|
||||
if (pass->dependency_count) {
|
||||
for (uint32_t j = 0; j < unscheduled_count; ++j) {
|
||||
uint32_t pass2_idx = naive_order[j];
|
||||
for (uint32_t k = 0; k < pass->dependency_count; ++k) {
|
||||
if (pass->dependencies[k] == pass2_idx) {
|
||||
is_valid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!is_valid)
|
||||
break;
|
||||
}
|
||||
if (!is_valid)
|
||||
continue;
|
||||
|
||||
for (uint32_t j = 0; j < pass->dependency_count; ++j) {
|
||||
for (uint32_t k = 0; k < scheduled_count; ++k) {
|
||||
if (schedule[k] == pass->dependencies[j]) {
|
||||
score += scheduled_count - k;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
score = UINT32_MAX;
|
||||
}
|
||||
|
||||
if (score > selected_score) {
|
||||
selected_score = score;
|
||||
selected_idx = i;
|
||||
}
|
||||
}
|
||||
|
||||
schedule[scheduled_count++] = naive_order[selected_idx];
|
||||
memmove(&naive_order[selected_idx],
|
||||
&naive_order[selected_idx + 1],
|
||||
(unscheduled_count - selected_idx - 1) * sizeof(uint32_t));
|
||||
}
|
||||
return schedule;
|
||||
}
|
||||
|
||||
static rt_result DeterminePassDependencies(rt_render_graph_builder_obj *obj) {
|
||||
/* NOTE(Kevin): This has a lot of overlap with FindWriters, so maybe its possible to combine
|
||||
* the two */
|
||||
for (uint32_t pass_idx = 0; pass_idx < obj->pass_count; ++pass_idx) {
|
||||
rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||
uint32_t dependency_capacity = pass->sampled_input_count;
|
||||
if (dependency_capacity) {
|
||||
pass->dependencies = RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity);
|
||||
if (!pass->dependencies)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
}
|
||||
for (uint32_t input_idx = 0; input_idx < pass->sampled_input_count; ++input_idx) {
|
||||
uint32_t rt_index = pass->sampled_inputs[input_idx];
|
||||
for (uint32_t candidate_idx = 0; candidate_idx < obj->pass_count; ++candidate_idx) {
|
||||
const rt_pass_build_info *candidate = &obj->passes[candidate_idx];
|
||||
bool is_dependency = false;
|
||||
if (candidate->depth_stencil_attachment == rt_index)
|
||||
is_dependency = true;
|
||||
for (uint32_t j = 0; j < candidate->color_attachment_count; ++j) {
|
||||
if (candidate->color_attachments[j] == rt_index)
|
||||
is_dependency = true;
|
||||
}
|
||||
|
||||
if (!is_dependency)
|
||||
continue;
|
||||
|
||||
if (pass->dependency_count == dependency_capacity) {
|
||||
/* The dependencies are still on top of the arena, so we can just grow that
|
||||
* array */
|
||||
if (!RT_ARENA_PUSH_ARRAY(&obj->arena, uint32_t, dependency_capacity))
|
||||
return RT_OUT_OF_MEMORY;
|
||||
dependency_capacity *= 2;
|
||||
}
|
||||
pass->dependencies[pass->dependency_count++] = candidate_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
return RT_SUCCESS;
|
||||
}
|
||||
|
||||
static void DetermineRenderTargetUsage(rt_render_graph_builder_obj *obj,
|
||||
uint32_t pass_count,
|
||||
const uint32_t *schedule) {
|
||||
for (uint32_t rt_idx = 0; rt_idx < obj->render_target_count; ++rt_idx) {
|
||||
rt_render_target_build_info *rt = &obj->render_targets[rt_idx];
|
||||
rt->first_usage = UINT32_MAX;
|
||||
rt->last_usage = 0;
|
||||
for (uint32_t sched_idx = 0; sched_idx < pass_count; ++sched_idx) {
|
||||
uint32_t pass_idx = schedule[sched_idx];
|
||||
const rt_pass_build_info *pass = &obj->passes[pass_idx];
|
||||
bool usage = pass->depth_stencil_attachment == rt_idx;
|
||||
if (!usage) {
|
||||
for (unsigned int i = 0; i < pass->color_attachment_count; ++i) {
|
||||
if (pass->color_attachments[i] == rt_idx)
|
||||
usage = true;
|
||||
}
|
||||
}
|
||||
if (!usage) {
|
||||
for (unsigned int i = 0; i < pass->sampled_input_count; ++i) {
|
||||
if (pass->sampled_inputs[i] == rt_idx)
|
||||
usage = true;
|
||||
}
|
||||
}
|
||||
if (usage) {
|
||||
if (sched_idx < rt->first_usage)
|
||||
rt->first_usage = sched_idx;
|
||||
if (sched_idx > rt->last_usage)
|
||||
rt->last_usage = sched_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static rt_result Build(void *_obj) {
|
||||
rt_render_graph_builder_obj *obj = _obj;
|
||||
uint32_t *naive_order;
|
||||
uint32_t pass_count;
|
||||
rt_result res = CreateNaiveOrder(obj, &naive_order, &pass_count);
|
||||
if (res != RT_SUCCESS)
|
||||
return res;
|
||||
|
||||
res = DeterminePassDependencies(obj);
|
||||
if (res != RT_SUCCESS)
|
||||
return res;
|
||||
|
||||
uint32_t *optimized_order = ReorderPasses(obj, pass_count, naive_order);
|
||||
if (!optimized_order)
|
||||
return RT_OUT_OF_MEMORY;
|
||||
|
||||
/* Next steps:
|
||||
* Determine first & last usage for every render-target
|
||||
* For every pair of render-targets, note if they could be merged:
|
||||
- Identical format
|
||||
- Non-overlapping usage */
|
||||
DetermineRenderTargetUsage(obj, pass_count, optimized_order);
|
||||
|
||||
|
||||
return RT_SUCCESS;
|
||||
static int RequireExplicitSynchronization(void) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
rt_render_graph_builder RT_RENDERER_API_FN(CreateRenderGraphBuilder)(void) {
|
||||
// TODO: This is totally temporary. We do NOT want malloc anywhere in non-init functions
|
||||
rt_render_graph_builder_obj *obj = malloc(sizeof(*obj));
|
||||
RT_ASSERT(obj, "Failed to allocate the builder object.");
|
||||
memset(obj, 0, sizeof(*obj));
|
||||
rt_create_arena_result arena_res = rtCreateArena(NULL, RT_MB(16));
|
||||
RT_ASSERT(arena_res.ok, "");
|
||||
obj->arena = arena_res.arena;
|
||||
|
||||
return (rt_render_graph_builder){
|
||||
.obj = obj,
|
||||
.AddRenderTarget = AddRenderTarget,
|
||||
.SetBackbuffer = SetBackbuffer,
|
||||
.AddRenderPass = AddRenderPass,
|
||||
.AddColorOutput = AddColorOutput,
|
||||
.AddSampledInput = AddSampledInput,
|
||||
.SetDepthStencilAttachment = SetDepthStencilAttachment,
|
||||
.Build = Build,
|
||||
};
|
||||
rt_render_graph_builder_platform_callbacks cbs = {.CreateRenderTarget = rtCreateRenderTarget,
|
||||
.RequireExplicitSynchronization =
|
||||
RequireExplicitSynchronization};
|
||||
return rtCreateRenderGraphBuilder(&cbs);
|
||||
}
|
||||
|
||||
void RT_RENDERER_API_FN(DestroyRenderGraphBuilder)(rt_render_graph_builder *builder) {
|
||||
rt_render_graph_builder_obj *obj = builder->obj;
|
||||
rtReleaseArena(&obj->arena);
|
||||
free(obj);
|
||||
memset(builder, 0, sizeof(*builder));
|
||||
rtDestroyRenderGraphBuilder(builder);
|
||||
}
|
||||
|
||||
rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *render_graph) {
|
||||
RT_NOT_IMPLEMENTED;
|
||||
return RT_UNKNOWN_ERROR;
|
||||
}
|
@ -203,7 +203,7 @@ void ShutdownRenderTargetManagement(void) {
|
||||
_first_free = NULL;
|
||||
}
|
||||
|
||||
rt_render_target_handle RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_target_info *info) {
|
||||
rt_render_target_handle rtCreateRenderTarget(const rt_physical_render_target_info *info) {
|
||||
rt_render_target_handle handle = {0};
|
||||
|
||||
rtLockWrite(&_lock);
|
||||
@ -222,7 +222,7 @@ rt_render_target_handle RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_t
|
||||
*/
|
||||
rtUnlockWrite(&_lock);
|
||||
|
||||
const char *name = rtResolveConstRelptr(&info->name);
|
||||
const char *name = info->name;
|
||||
|
||||
slot->render_target.match_swapchain = 0;
|
||||
slot->render_target.image_count = g_swapchain.image_count;
|
||||
@ -258,7 +258,7 @@ rt_render_target_handle RT_RENDERER_API_FN(CreateRenderTarget)(const rt_render_t
|
||||
VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
|
||||
slot->render_target.aspect = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
}
|
||||
slot->render_target.sample_count = rtSampleCountToFlags(info->sample_count);
|
||||
slot->render_target.sample_count = rtSampleCountToFlags(info->samples);
|
||||
if (!CreateImageAndView(slot->render_target.extent,
|
||||
slot->render_target.format,
|
||||
slot->render_target.sample_count,
|
||||
@ -282,11 +282,7 @@ out:
|
||||
return handle;
|
||||
}
|
||||
|
||||
rt_render_target_handle RT_RENDERER_API_FN(GetSwapchainRenderTarget)(void) {
|
||||
return _swapchain_handle;
|
||||
}
|
||||
|
||||
void RT_RENDERER_API_FN(DestroyRenderTarget)(rt_render_target_handle handle) {
|
||||
void rtDestroyRenderTarget(rt_render_target_handle handle) {
|
||||
if (handle.index >= (uint32_t)r_VkMaxRenderTargetCount.i)
|
||||
return;
|
||||
rtLockWrite(&_lock);
|
||||
@ -310,6 +306,10 @@ rt_render_target *rtGetRenderTarget(rt_render_target_handle handle) {
|
||||
return res;
|
||||
}
|
||||
|
||||
rt_render_target_handle rtGetSwapchainRenderTarget(void) {
|
||||
return _swapchain_handle;
|
||||
}
|
||||
|
||||
void rtUpdateSwapchainRenderTarget(void) {
|
||||
RT_ASSERT(_swapchain_handle.index != 0, "Invalid swap chain render target!");
|
||||
rt_render_target_slot *slot = &_render_targets[_swapchain_handle.index];
|
||||
|
@ -4,6 +4,9 @@
|
||||
#include "gpu.h"
|
||||
#include "gfx/renderer_api.h"
|
||||
|
||||
#include "../common/common_render_graph.h"
|
||||
|
||||
|
||||
/* Must match RT_VK_MAX_SWAPCHAIN_IMAGES */
|
||||
#define RT_VK_RENDER_TARGET_MAX_IMAGES 3
|
||||
|
||||
@ -26,7 +29,11 @@ typedef struct {
|
||||
rt_render_target_match_swapchain_flags match_swapchain;
|
||||
} rt_render_target;
|
||||
|
||||
rt_render_target_handle rtCreateRenderTarget(const rt_physical_render_target_info *info);
|
||||
void rtDestroyRenderTarget(rt_render_target_handle handle);
|
||||
|
||||
rt_render_target *rtGetRenderTarget(rt_render_target_handle handle);
|
||||
rt_render_target_handle rtGetSwapchainRenderTarget(void);
|
||||
|
||||
/* Update the render target that represents the swap chain */
|
||||
void rtUpdateSwapchainRenderTarget(void);
|
||||
|
@ -69,11 +69,11 @@ RT_INLINE void rtReturnTemporaryArena(rt_temp_arena tmp) {
|
||||
}
|
||||
|
||||
/* Helper macros */
|
||||
#define RT_ARENA_PUSH_STRUCT(_Arena, _Type) rtArenaPush((_Arena), sizeof(_Type))
|
||||
#define RT_ARENA_PUSH_STRUCT_ZERO(_Arena, _Type) rtArenaPushZero((_Arena), sizeof(_Type))
|
||||
#define RT_ARENA_PUSH_STRUCT(_Arena, _Type) (_Type*)rtArenaPush((_Arena), sizeof(_Type))
|
||||
#define RT_ARENA_PUSH_STRUCT_ZERO(_Arena, _Type) (_Type *)rtArenaPushZero((_Arena), sizeof(_Type))
|
||||
#define RT_ARENA_POP_STRUCT(_Arena, _Type) rtArenaPop((_Arena), sizeof(_Type))
|
||||
#define RT_ARENA_PUSH_ARRAY(_Arena, _Type, _N) rtArenaPush((_Arena), sizeof(_Type) * (_N))
|
||||
#define RT_ARENA_PUSH_ARRAY_ZERO(_Arena, _Type, _N) rtArenaPushZero((_Arena), sizeof(_Type) * (_N))
|
||||
#define RT_ARENA_PUSH_ARRAY(_Arena, _Type, _N) (_Type*)rtArenaPush((_Arena), sizeof(_Type) * (_N))
|
||||
#define RT_ARENA_PUSH_ARRAY_ZERO(_Arena, _Type, _N) (_Type*)rtArenaPushZero((_Arena), sizeof(_Type) * (_N))
|
||||
#define RT_ARENA_POP_ARRAY(_Arena, _Type, _N) rtArenaPop((_Arena), sizeof(_Type) * (_N)
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -101,6 +101,7 @@ static size_t GetResourceDataSize(const rt_resource *resource) {
|
||||
case RT_RESOURCE_PIPELINE:
|
||||
return sizeof(rt_pipeline_info);
|
||||
case RT_RESOURCE_FRAMEGRAPH: {
|
||||
#if 0
|
||||
const rt_framegraph_info *info = resource->data;
|
||||
size_t size = sizeof(*info) + sizeof(rt_render_target_info) * info->render_target_count +
|
||||
sizeof(rt_render_pass_info) * info->render_pass_count + info->names_size;
|
||||
@ -110,6 +111,7 @@ static size_t GetResourceDataSize(const rt_resource *resource) {
|
||||
passes[i].write_render_target_count * sizeof(rt_render_target_write);
|
||||
}
|
||||
return size;
|
||||
#endif
|
||||
} break;
|
||||
case RT_RESOURCE_EFFECT: {
|
||||
return sizeof(rt_effect_info);
|
||||
@ -134,6 +136,7 @@ static void CopyResourceData(const rt_resource *resource, void *dest) {
|
||||
memcpy(dest, resource->data, sizeof(rt_pipeline_info));
|
||||
break;
|
||||
case RT_RESOURCE_FRAMEGRAPH: {
|
||||
#if 0
|
||||
const rt_framegraph_info *info = resource->data;
|
||||
rt_framegraph_info *dest_info = dest;
|
||||
memcpy(dest_info, info, sizeof(*info));
|
||||
@ -202,6 +205,8 @@ static void CopyResourceData(const rt_resource *resource, void *dest) {
|
||||
if (src_name)
|
||||
rtSetRelptr(&passes_dest[i].name, names_begin + (src_name - src_names));
|
||||
}
|
||||
|
||||
#endif
|
||||
} break;
|
||||
case RT_RESOURCE_EFFECT: {
|
||||
memcpy(dest, resource->data, sizeof(rt_effect_info));
|
||||
@ -984,6 +989,7 @@ RT_DLLEXPORT void rDebugLogResource(rt_resource_id id, const rt_resource *resour
|
||||
rtLog("RESMGR", " compute shader: %llx", pipeline->compute_shader);
|
||||
} break;
|
||||
case RT_RESOURCE_FRAMEGRAPH: {
|
||||
#if 0
|
||||
static const char *format_str[RT_PIXEL_FORMAT_count] = {
|
||||
"<INVALID>",
|
||||
|
||||
@ -1061,6 +1067,7 @@ RT_DLLEXPORT void rDebugLogResource(rt_resource_id id, const rt_resource *resour
|
||||
writes[j].clear.depth_stencil.stencil);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} break;
|
||||
case RT_RESOURCE_EFFECT: {
|
||||
const rt_effect_info *effect = resource->data;
|
||||
|
@ -32,6 +32,9 @@ extern "C" {
|
||||
#define RT_RESTRICT_VALUE_TO_BOUNDS(v, lower, upper) \
|
||||
(((v) < (lower)) ? (lower) : (((v) > (upper)) ? (upper) : (v)))
|
||||
|
||||
#define RT_MIN(a, b) (((a) < (b))?(a):(b))
|
||||
#define RT_MAX(a, b) (((a) > (b))?(a):(b))
|
||||
|
||||
#define RT_KB(n) ((n)*1024U)
|
||||
#define RT_MB(n) ((n)*1024U * 1024U)
|
||||
#define RT_GB(n) ((n)*1024U * 1024U * 1024U)
|
||||
@ -127,7 +130,7 @@ RT_DLLEXPORT int rtAssertHandler(const char *expr, const char *msg, const char *
|
||||
// Asserts if p is "false", evaluates to p
|
||||
// NOTE that this will evaluate p multiple times!
|
||||
#define RT_VERIFY(p) \
|
||||
((!p) ? (RT_DEBUGBREAK, rtAssertHandler(#p, "Verify failed", __FILE__, __LINE__), p) : p)
|
||||
((!p) ? (rtAssertHandler(#p, "Verify failed", __FILE__, __LINE__), p) : p)
|
||||
|
||||
#else
|
||||
#define RT_ASSERT(x, msg) RT_UNUSED(x)
|
||||
@ -135,6 +138,9 @@ RT_DLLEXPORT int rtAssertHandler(const char *expr, const char *msg, const char *
|
||||
#define RT_VERIFY(p) (p)
|
||||
#endif
|
||||
|
||||
/* Makes it easier to search for unimplemented functions */
|
||||
#define RT_NOT_IMPLEMENTED RT_ASSERT_ALWAYS_EVAL(0, "Not implemented.")
|
||||
|
||||
enum {
|
||||
RT_INVALID_UNICODE = RT_CUSTOM_ERROR_START,
|
||||
RT_INSUFFICIENT_BUFFER,
|
||||
|
Loading…
Reference in New Issue
Block a user