Determine pass execution levels
Useful for (once we have a job system) executing render passes in parallel
This commit is contained in:
parent
388b747a04
commit
6052f35485
@ -78,12 +78,6 @@ typedef struct rt_render_graph_builder_obj {
|
|||||||
rt_render_graph_builder_platform_callbacks platform_cbs;
|
rt_render_graph_builder_platform_callbacks platform_cbs;
|
||||||
} rt_render_graph_builder_obj;
|
} rt_render_graph_builder_obj;
|
||||||
|
|
||||||
/* ****************************************************************************
|
|
||||||
*
|
|
||||||
* BUILDER CODE
|
|
||||||
*
|
|
||||||
* ****************************************************************************/
|
|
||||||
|
|
||||||
static void AddRenderTarget(void *_obj, const rt_attachment_info *info) {
|
static void AddRenderTarget(void *_obj, const rt_attachment_info *info) {
|
||||||
rt_render_graph_builder_obj *obj = _obj;
|
rt_render_graph_builder_obj *obj = _obj;
|
||||||
|
|
||||||
@ -657,7 +651,27 @@ static rt_result CreateSynchronizationPoints(rt_render_graph_builder_obj *obj) {
|
|||||||
return RT_UNKNOWN_ERROR;
|
return RT_UNKNOWN_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, const uint32_t *order) {
|
static uint32_t *DeterminePassExecutionLevels(rt_render_graph_builder_obj *obj,
|
||||||
|
const uint32_t *schedule) {
|
||||||
|
uint32_t *execution_levels = RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, uint32_t, obj->pass_count);
|
||||||
|
if (!execution_levels)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
const rt_pass_build_info *passes = obj->passes;
|
||||||
|
uint32_t pass_count = obj->pass_count;
|
||||||
|
for (uint32_t i = 0; i < pass_count; ++i) {
|
||||||
|
uint32_t level = 0;
|
||||||
|
uint32_t pass_idx = schedule[i];
|
||||||
|
for (uint32_t j = 0; j < passes[pass_idx].dependency_count; ++j) {
|
||||||
|
uint32_t dep_idx = passes[pass_idx].dependencies[j];
|
||||||
|
level = RT_MAX(execution_levels[dep_idx] + 1, level);
|
||||||
|
}
|
||||||
|
execution_levels[pass_idx] = level;
|
||||||
|
}
|
||||||
|
return execution_levels;
|
||||||
|
}
|
||||||
|
|
||||||
|
static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, const uint32_t *order, const uint32_t *execution_levels) {
|
||||||
|
|
||||||
size_t required_size = sizeof(rt_render_graph);
|
size_t required_size = sizeof(rt_render_graph);
|
||||||
required_size += obj->phys_render_target_count * sizeof(rt_render_target_handle);
|
required_size += obj->phys_render_target_count * sizeof(rt_render_target_handle);
|
||||||
@ -713,7 +727,7 @@ static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, cons
|
|||||||
graph->passes[i].signal_count = 0;
|
graph->passes[i].signal_count = 0;
|
||||||
graph->passes[i].first_wait = 0;
|
graph->passes[i].first_wait = 0;
|
||||||
graph->passes[i].wait_count = 0;
|
graph->passes[i].wait_count = 0;
|
||||||
graph->passes[i].execution_level = i;
|
graph->passes[i].execution_level = execution_levels[passidx];
|
||||||
|
|
||||||
graph->passes[i].depth_stencil =
|
graph->passes[i].depth_stencil =
|
||||||
(obj->passes[i].depth_stencil_attachment != UINT_MAX)
|
(obj->passes[i].depth_stencil_attachment != UINT_MAX)
|
||||||
@ -788,6 +802,10 @@ static rt_result Build(void *_obj, rt_render_graph **p_graph) {
|
|||||||
if (res != RT_SUCCESS)
|
if (res != RT_SUCCESS)
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
|
uint32_t *execution_levels = DeterminePassExecutionLevels(obj, optimized_order);
|
||||||
|
if (!execution_levels)
|
||||||
|
return RT_OUT_OF_MEMORY;
|
||||||
|
|
||||||
if (obj->platform_cbs.RequireExplicitSynchronization()) {
|
if (obj->platform_cbs.RequireExplicitSynchronization()) {
|
||||||
res = CreateSynchronizationPoints(obj);
|
res = CreateSynchronizationPoints(obj);
|
||||||
if (res != RT_SUCCESS)
|
if (res != RT_SUCCESS)
|
||||||
@ -796,7 +814,7 @@ static rt_result Build(void *_obj, rt_render_graph **p_graph) {
|
|||||||
obj->sync_point_count = 0;
|
obj->sync_point_count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
*p_graph = CreateRenderGraph(obj, optimized_order);
|
*p_graph = CreateRenderGraph(obj, optimized_order, execution_levels);
|
||||||
return *p_graph ? RT_SUCCESS : RT_UNKNOWN_ERROR;
|
return *p_graph ? RT_SUCCESS : RT_UNKNOWN_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,7 +12,6 @@ typedef struct {
|
|||||||
|
|
||||||
unsigned int samples;
|
unsigned int samples;
|
||||||
unsigned int layers;
|
unsigned int layers;
|
||||||
|
|
||||||
} rt_physical_render_target_info;
|
} rt_physical_render_target_info;
|
||||||
|
|
||||||
typedef rt_render_target_handle
|
typedef rt_render_target_handle
|
||||||
|
@ -141,9 +141,12 @@ extern "C" rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *ren
|
|||||||
if (IsCopyResourcePossible(backbuffer)) {
|
if (IsCopyResourcePossible(backbuffer)) {
|
||||||
g_gpu.device_context->CopyResource(frame_buffer, backbuffer->texture);
|
g_gpu.device_context->CopyResource(frame_buffer, backbuffer->texture);
|
||||||
} else {
|
} else {
|
||||||
|
// NOTE(Kevin): The most flexible solution would probably be a fullscreen tri draw
|
||||||
|
// that implements a blit.
|
||||||
|
// Another idea would be a compute shader that does a copy&filter but that requires more work
|
||||||
RT_NOT_IMPLEMENTED;
|
RT_NOT_IMPLEMENTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
rtReturnTemporaryArena(temp);
|
rtReturnTemporaryArena(temp);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user