Determine pass execution levels
Useful for (once we have a job system) executing render passes in parallel
This commit is contained in:
		
							parent
							
								
									388b747a04
								
							
						
					
					
						commit
						6052f35485
					
				@ -78,12 +78,6 @@ typedef struct rt_render_graph_builder_obj {
 | 
				
			|||||||
    rt_render_graph_builder_platform_callbacks platform_cbs;
 | 
					    rt_render_graph_builder_platform_callbacks platform_cbs;
 | 
				
			||||||
} rt_render_graph_builder_obj;
 | 
					} rt_render_graph_builder_obj;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* ****************************************************************************
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 *                           BUILDER CODE
 | 
					 | 
				
			||||||
 *
 | 
					 | 
				
			||||||
 * ****************************************************************************/
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static void AddRenderTarget(void *_obj, const rt_attachment_info *info) {
 | 
					static void AddRenderTarget(void *_obj, const rt_attachment_info *info) {
 | 
				
			||||||
    rt_render_graph_builder_obj *obj = _obj;
 | 
					    rt_render_graph_builder_obj *obj = _obj;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -657,7 +651,27 @@ static rt_result CreateSynchronizationPoints(rt_render_graph_builder_obj *obj) {
 | 
				
			|||||||
    return RT_UNKNOWN_ERROR;
 | 
					    return RT_UNKNOWN_ERROR;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, const uint32_t *order) {
 | 
					static uint32_t *DeterminePassExecutionLevels(rt_render_graph_builder_obj *obj,
 | 
				
			||||||
 | 
					                                              const uint32_t *schedule) {
 | 
				
			||||||
 | 
					    uint32_t *execution_levels = RT_ARENA_PUSH_ARRAY_ZERO(&obj->arena, uint32_t, obj->pass_count);
 | 
				
			||||||
 | 
					    if (!execution_levels)
 | 
				
			||||||
 | 
					        return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const rt_pass_build_info *passes = obj->passes;
 | 
				
			||||||
 | 
					    uint32_t pass_count              = obj->pass_count;
 | 
				
			||||||
 | 
					    for (uint32_t i = 0; i < pass_count; ++i) {
 | 
				
			||||||
 | 
					        uint32_t level    = 0;
 | 
				
			||||||
 | 
					        uint32_t pass_idx = schedule[i];
 | 
				
			||||||
 | 
					        for (uint32_t j = 0; j < passes[pass_idx].dependency_count; ++j) {
 | 
				
			||||||
 | 
					            uint32_t dep_idx = passes[pass_idx].dependencies[j];
 | 
				
			||||||
 | 
					            level            = RT_MAX(execution_levels[dep_idx] + 1, level);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        execution_levels[pass_idx] = level;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return execution_levels;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, const uint32_t *order, const uint32_t *execution_levels) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    size_t required_size = sizeof(rt_render_graph);
 | 
					    size_t required_size = sizeof(rt_render_graph);
 | 
				
			||||||
    required_size += obj->phys_render_target_count * sizeof(rt_render_target_handle);
 | 
					    required_size += obj->phys_render_target_count * sizeof(rt_render_target_handle);
 | 
				
			||||||
@ -713,7 +727,7 @@ static rt_render_graph *CreateRenderGraph(rt_render_graph_builder_obj *obj, cons
 | 
				
			|||||||
        graph->passes[i].signal_count = 0;
 | 
					        graph->passes[i].signal_count = 0;
 | 
				
			||||||
        graph->passes[i].first_wait   = 0;
 | 
					        graph->passes[i].first_wait   = 0;
 | 
				
			||||||
        graph->passes[i].wait_count   = 0;
 | 
					        graph->passes[i].wait_count   = 0;
 | 
				
			||||||
        graph->passes[i].execution_level = i;
 | 
					        graph->passes[i].execution_level = execution_levels[passidx];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        graph->passes[i].depth_stencil =
 | 
					        graph->passes[i].depth_stencil =
 | 
				
			||||||
            (obj->passes[i].depth_stencil_attachment != UINT_MAX)
 | 
					            (obj->passes[i].depth_stencil_attachment != UINT_MAX)
 | 
				
			||||||
@ -788,6 +802,10 @@ static rt_result Build(void *_obj, rt_render_graph **p_graph) {
 | 
				
			|||||||
    if (res != RT_SUCCESS)
 | 
					    if (res != RT_SUCCESS)
 | 
				
			||||||
        return res;
 | 
					        return res;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    uint32_t *execution_levels = DeterminePassExecutionLevels(obj, optimized_order);
 | 
				
			||||||
 | 
					    if (!execution_levels)
 | 
				
			||||||
 | 
					        return RT_OUT_OF_MEMORY;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (obj->platform_cbs.RequireExplicitSynchronization()) {
 | 
					    if (obj->platform_cbs.RequireExplicitSynchronization()) {
 | 
				
			||||||
        res = CreateSynchronizationPoints(obj);
 | 
					        res = CreateSynchronizationPoints(obj);
 | 
				
			||||||
        if (res != RT_SUCCESS)
 | 
					        if (res != RT_SUCCESS)
 | 
				
			||||||
@ -796,7 +814,7 @@ static rt_result Build(void *_obj, rt_render_graph **p_graph) {
 | 
				
			|||||||
        obj->sync_point_count = 0;
 | 
					        obj->sync_point_count = 0;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    *p_graph = CreateRenderGraph(obj, optimized_order);
 | 
					    *p_graph = CreateRenderGraph(obj, optimized_order, execution_levels);
 | 
				
			||||||
    return *p_graph ? RT_SUCCESS : RT_UNKNOWN_ERROR;
 | 
					    return *p_graph ? RT_SUCCESS : RT_UNKNOWN_ERROR;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -12,7 +12,6 @@ typedef struct {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    unsigned int samples;
 | 
					    unsigned int samples;
 | 
				
			||||||
    unsigned int layers;
 | 
					    unsigned int layers;
 | 
				
			||||||
 | 
					 | 
				
			||||||
} rt_physical_render_target_info;
 | 
					} rt_physical_render_target_info;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
typedef rt_render_target_handle
 | 
					typedef rt_render_target_handle
 | 
				
			||||||
 | 
				
			|||||||
@ -141,9 +141,12 @@ extern "C" rt_result RT_RENDERER_API_FN(ExecuteRenderGraph)(rt_render_graph *ren
 | 
				
			|||||||
    if (IsCopyResourcePossible(backbuffer)) {
 | 
					    if (IsCopyResourcePossible(backbuffer)) {
 | 
				
			||||||
        g_gpu.device_context->CopyResource(frame_buffer, backbuffer->texture);
 | 
					        g_gpu.device_context->CopyResource(frame_buffer, backbuffer->texture);
 | 
				
			||||||
    } else {
 | 
					    } else {
 | 
				
			||||||
 | 
					        // NOTE(Kevin): The most flexible solution would probably be a fullscreen tri draw
 | 
				
			||||||
 | 
					        // that implements a blit.
 | 
				
			||||||
 | 
					        // Another idea would be a compute shader that does a copy&filter but that requires more work
 | 
				
			||||||
        RT_NOT_IMPLEMENTED;
 | 
					        RT_NOT_IMPLEMENTED;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    rtReturnTemporaryArena(temp);
 | 
					    rtReturnTemporaryArena(temp);
 | 
				
			||||||
    return res;
 | 
					    return res;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user